Skip to main content

datafusion_common/scalar/
mod.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! [`ScalarValue`]: stores single  values
19
20mod cache;
21mod consts;
22mod struct_builder;
23
24use std::borrow::Borrow;
25use std::cmp::Ordering;
26use std::collections::{HashSet, VecDeque};
27use std::convert::Infallible;
28use std::fmt;
29use std::fmt::Write;
30use std::hash::Hash;
31use std::hash::Hasher;
32use std::iter::repeat_n;
33use std::mem::{size_of, size_of_val};
34use std::str::FromStr;
35use std::sync::Arc;
36
37use crate::assert_or_internal_err;
38use crate::cast::{
39    as_binary_array, as_binary_view_array, as_boolean_array, as_date32_array,
40    as_date64_array, as_decimal32_array, as_decimal64_array, as_decimal128_array,
41    as_decimal256_array, as_dictionary_array, as_duration_microsecond_array,
42    as_duration_millisecond_array, as_duration_nanosecond_array,
43    as_duration_second_array, as_fixed_size_binary_array, as_fixed_size_list_array,
44    as_float16_array, as_float32_array, as_float64_array, as_int8_array, as_int16_array,
45    as_int32_array, as_int64_array, as_interval_dt_array, as_interval_mdn_array,
46    as_interval_ym_array, as_large_binary_array, as_large_list_array,
47    as_large_list_view_array, as_large_string_array, as_list_view_array, as_run_array,
48    as_string_array, as_string_view_array, as_time32_millisecond_array,
49    as_time32_second_array, as_time64_microsecond_array, as_time64_nanosecond_array,
50    as_timestamp_microsecond_array, as_timestamp_millisecond_array,
51    as_timestamp_nanosecond_array, as_timestamp_second_array, as_uint8_array,
52    as_uint16_array, as_uint32_array, as_uint64_array, as_union_array,
53};
54use crate::error::{_exec_err, _internal_err, _not_impl_err, DataFusionError, Result};
55use crate::format::DEFAULT_CAST_OPTIONS;
56use crate::hash_utils::create_hashes;
57use crate::utils::SingleRowListArrayBuilder;
58use crate::{_internal_datafusion_err, arrow_datafusion_err};
59use arrow::array::{
60    Array, ArrayData, ArrayDataBuilder, ArrayRef, ArrowNativeTypeOp, ArrowPrimitiveType,
61    AsArray, BinaryArray, BinaryViewArray, BinaryViewBuilder, BooleanArray, Date32Array,
62    Date64Array, Decimal32Array, Decimal64Array, Decimal128Array, Decimal256Array,
63    DictionaryArray, DurationMicrosecondArray, DurationMillisecondArray,
64    DurationNanosecondArray, DurationSecondArray, FixedSizeBinaryArray,
65    FixedSizeListArray, Float16Array, Float32Array, Float64Array, GenericListArray,
66    GenericListViewArray, Int8Array, Int16Array, Int32Array, Int64Array,
67    IntervalDayTimeArray, IntervalMonthDayNanoArray, IntervalYearMonthArray,
68    LargeBinaryArray, LargeListArray, LargeListViewArray, LargeStringArray, ListArray,
69    ListViewArray, MapArray, MutableArrayData, PrimitiveArray, RunArray, Scalar,
70    StringArray, StringViewArray, StringViewBuilder, StructArray, Time32MillisecondArray,
71    Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray,
72    TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
73    TimestampSecondArray, UInt8Array, UInt16Array, UInt32Array, UInt64Array, UnionArray,
74    downcast_run_array, new_empty_array, new_null_array,
75};
76use arrow::buffer::{BooleanBuffer, ScalarBuffer};
77use arrow::compute::kernels::cast::{CastOptions, cast_with_options};
78use arrow::compute::kernels::numeric::{
79    add, add_wrapping, div, mul, mul_wrapping, rem, sub, sub_wrapping,
80};
81use arrow::datatypes::{
82    ArrowDictionaryKeyType, ArrowNativeType, ArrowTimestampType, DataType, Date32Type,
83    Decimal32Type, Decimal64Type, Decimal128Type, Decimal256Type, DecimalType, Field,
84    FieldRef, Float32Type, Int8Type, Int16Type, Int32Type, Int64Type, IntervalDayTime,
85    IntervalDayTimeType, IntervalMonthDayNano, IntervalMonthDayNanoType, IntervalUnit,
86    IntervalYearMonthType, RunEndIndexType, TimeUnit, TimestampMicrosecondType,
87    TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, UInt8Type,
88    UInt16Type, UInt32Type, UInt64Type, UnionFields, UnionMode, i256,
89    validate_decimal_precision_and_scale,
90};
91use arrow::util::display::{ArrayFormatter, FormatOptions, array_value_to_string};
92use cache::{get_or_create_cached_key_array, get_or_create_cached_null_array};
93use chrono::{Duration, NaiveDate};
94use half::f16;
95pub use struct_builder::ScalarStructBuilder;
96
97const SECONDS_PER_DAY: i64 = 86_400;
98const MILLIS_PER_DAY: i64 = SECONDS_PER_DAY * 1_000;
99const MICROS_PER_DAY: i64 = MILLIS_PER_DAY * 1_000;
100const NANOS_PER_DAY: i64 = MICROS_PER_DAY * 1_000;
101const MICROS_PER_MILLISECOND: i64 = 1_000;
102const NANOS_PER_MILLISECOND: i64 = 1_000_000;
103
104/// Returns the multiplier that converts the input date representation into the
105/// desired timestamp unit, if the conversion requires a multiplication that can
106/// overflow an `i64`.
107pub fn date_to_timestamp_multiplier(
108    source_type: &DataType,
109    target_type: &DataType,
110) -> Option<i64> {
111    let DataType::Timestamp(target_unit, _) = target_type else {
112        return None;
113    };
114
115    // Only `Timestamp` target types have a time unit; otherwise no
116    // multiplier applies (handled above). The function returns `Some(m)`
117    // when converting the `source_type` to `target_type` requires a
118    // multiplication that could overflow `i64`. It returns `None` when
119    // the conversion is a division or otherwise doesn't require a
120    // multiplication (e.g. Date64 -> Second).
121    match source_type {
122        // Date32 stores days since epoch. Converting to any timestamp
123        // unit requires multiplying by the per-day factor (seconds,
124        // milliseconds, microseconds, nanoseconds).
125        DataType::Date32 => Some(match target_unit {
126            TimeUnit::Second => SECONDS_PER_DAY,
127            TimeUnit::Millisecond => MILLIS_PER_DAY,
128            TimeUnit::Microsecond => MICROS_PER_DAY,
129            TimeUnit::Nanosecond => NANOS_PER_DAY,
130        }),
131
132        // Date64 stores milliseconds since epoch. Converting to
133        // seconds is a division (no multiplication), so return `None`.
134        // Converting to milliseconds is 1:1 (multiplier 1). Converting
135        // to micro/nano requires multiplying by 1_000 / 1_000_000.
136        DataType::Date64 => match target_unit {
137            TimeUnit::Second => None,
138            // Converting Date64 (ms since epoch) to millisecond timestamps
139            // is an identity conversion and does not require multiplication.
140            // Returning `None` indicates no multiplication-based overflow
141            // check is necessary.
142            TimeUnit::Millisecond => None,
143            TimeUnit::Microsecond => Some(MICROS_PER_MILLISECOND),
144            TimeUnit::Nanosecond => Some(NANOS_PER_MILLISECOND),
145        },
146
147        _ => None,
148    }
149}
150
151/// Ensures the provided value can be represented as a timestamp with the given
152/// multiplier. Returns an [`DataFusionError::Execution`] when the converted
153/// value would overflow the timestamp range.
154pub fn ensure_timestamp_in_bounds(
155    value: i64,
156    multiplier: i64,
157    source_type: &DataType,
158    target_type: &DataType,
159) -> Result<()> {
160    if multiplier <= 1 {
161        return Ok(());
162    }
163
164    if value.checked_mul(multiplier).is_none() {
165        let target = format_timestamp_type_for_error(target_type);
166        _exec_err!(
167            "Cannot cast {} value {} to {}: converted value exceeds the representable i64 range",
168            source_type,
169            value,
170            target
171        )
172    } else {
173        Ok(())
174    }
175}
176
177/// Format a `DataType::Timestamp` into a short, stable string used in
178/// user-facing error messages.
179pub(crate) fn format_timestamp_type_for_error(target_type: &DataType) -> String {
180    match target_type {
181        DataType::Timestamp(unit, _) => {
182            let s = match unit {
183                TimeUnit::Second => "s",
184                TimeUnit::Millisecond => "ms",
185                TimeUnit::Microsecond => "us",
186                TimeUnit::Nanosecond => "ns",
187            };
188            format!("Timestamp({s})")
189        }
190        other => format!("{other}"),
191    }
192}
193
194/// A dynamically typed, nullable single value.
195///
196/// While an arrow  [`Array`]) stores one or more values of the same type, in a
197/// single column, a `ScalarValue` stores a single value of a single type, the
198/// equivalent of 1 row and one column.
199///
200/// ```text
201///  ┌────────┐
202///  │ value1 │
203///  │ value2 │                  ┌────────┐
204///  │ value3 │                  │ value2 │
205///  │  ...   │                  └────────┘
206///  │ valueN │
207///  └────────┘
208///
209///    Array                     ScalarValue
210///
211/// stores multiple,             stores a single,
212/// possibly null, values of     possible null, value
213/// the same type
214/// ```
215///
216/// # Performance
217///
218/// In general, performance will be better using arrow [`Array`]s rather than
219/// [`ScalarValue`], as it is far more efficient to process multiple values at
220/// once (vectorized processing).
221///
222/// # Example
223/// ```
224/// # use datafusion_common::ScalarValue;
225/// // Create single scalar value for an Int32 value
226/// let s1 = ScalarValue::Int32(Some(10));
227///
228/// // You can also create values using the From impl:
229/// let s2 = ScalarValue::from(10i32);
230/// assert_eq!(s1, s2);
231/// ```
232///
233/// # Null Handling
234///
235/// `ScalarValue` represents null values in the same way as Arrow. Nulls are
236/// "typed" in the sense that a null value in an [`Int32Array`] is different
237/// from a null value in a [`Float64Array`], and is different from the values in
238/// a [`NullArray`].
239///
240/// ```
241/// # fn main() -> datafusion_common::Result<()> {
242/// # use std::collections::hash_set::Difference;
243/// # use datafusion_common::ScalarValue;
244/// # use arrow::datatypes::DataType;
245/// // You can create a 'null' Int32 value directly:
246/// let s1 = ScalarValue::Int32(None);
247///
248/// // You can also create a null value for a given datatype:
249/// let s2 = ScalarValue::try_from(&DataType::Int32)?;
250/// assert_eq!(s1, s2);
251///
252/// // Note that this is DIFFERENT than a `ScalarValue::Null`
253/// let s3 = ScalarValue::Null;
254/// assert_ne!(s1, s3);
255/// # Ok(())
256/// # }
257/// ```
258///
259/// # Nested Types
260///
261/// `List` / `LargeList` / `FixedSizeList` / `ListView` / `LargeListView` / `Struct` / `Map`
262/// are represented as a single element array of the corresponding type.
263///
264/// ## Example: Creating [`ScalarValue::Struct`] using [`ScalarStructBuilder`]
265/// ```
266/// # use std::sync::Arc;
267/// # use arrow::datatypes::{DataType, Field};
268/// # use datafusion_common::{ScalarValue, scalar::ScalarStructBuilder};
269/// // Build a struct like: {a: 1, b: "foo"}
270/// let field_a = Field::new("a", DataType::Int32, false);
271/// let field_b = Field::new("b", DataType::Utf8, false);
272///
273/// let s1 = ScalarStructBuilder::new()
274///     .with_scalar(field_a, ScalarValue::from(1i32))
275///     .with_scalar(field_b, ScalarValue::from("foo"))
276///     .build();
277/// ```
278///
279/// ## Example: Creating a null [`ScalarValue::Struct`] using [`ScalarStructBuilder`]
280/// ```
281/// # use std::sync::Arc;
282/// # use arrow::datatypes::{DataType, Field};
283/// # use datafusion_common::{ScalarValue, scalar::ScalarStructBuilder};
284/// // Build a struct representing a NULL value
285/// let fields = vec![
286///     Field::new("a", DataType::Int32, false),
287///     Field::new("b", DataType::Utf8, false),
288/// ];
289///
290/// let s1 = ScalarStructBuilder::new_null(fields);
291/// ```
292///
293/// ## Example: Creating [`ScalarValue::Struct`] directly
294/// ```
295/// # use std::sync::Arc;
296/// # use arrow::datatypes::{DataType, Field, Fields};
297/// # use arrow::array::{ArrayRef, Int32Array, StructArray, StringArray};
298/// # use datafusion_common::ScalarValue;
299/// // Build a struct like: {a: 1, b: "foo"}
300/// // Field description
301/// let fields = Fields::from(vec![
302///     Field::new("a", DataType::Int32, false),
303///     Field::new("b", DataType::Utf8, false),
304/// ]);
305/// // one row arrays for each field
306/// let arrays: Vec<ArrayRef> = vec![
307///     Arc::new(Int32Array::from(vec![1])),
308///     Arc::new(StringArray::from(vec!["foo"])),
309/// ];
310/// // no nulls for this array
311/// let nulls = None;
312/// let arr = StructArray::new(fields, arrays, nulls);
313///
314/// // Create a ScalarValue::Struct directly
315/// let s1 = ScalarValue::Struct(Arc::new(arr));
316/// ```
317///
318///
319/// # Further Reading
320/// See [datatypes](https://arrow.apache.org/docs/python/api/datatypes.html) for
321/// details on datatypes and the [format](https://github.com/apache/arrow/blob/master/format/Schema.fbs#L354-L375)
322/// for the definitive reference.
323///
324/// [`NullArray`]: arrow::array::NullArray
325#[derive(Clone)]
326pub enum ScalarValue {
327    /// represents `DataType::Null` (castable to/from any other type)
328    Null,
329    /// true or false value
330    Boolean(Option<bool>),
331    /// 16bit float
332    Float16(Option<f16>),
333    /// 32bit float
334    Float32(Option<f32>),
335    /// 64bit float
336    Float64(Option<f64>),
337    /// 32bit decimal, using the i32 to represent the decimal, precision scale
338    Decimal32(Option<i32>, u8, i8),
339    /// 64bit decimal, using the i64 to represent the decimal, precision scale
340    Decimal64(Option<i64>, u8, i8),
341    /// 128bit decimal, using the i128 to represent the decimal, precision scale
342    Decimal128(Option<i128>, u8, i8),
343    /// 256bit decimal, using the i256 to represent the decimal, precision scale
344    Decimal256(Option<i256>, u8, i8),
345    /// signed 8bit int
346    Int8(Option<i8>),
347    /// signed 16bit int
348    Int16(Option<i16>),
349    /// signed 32bit int
350    Int32(Option<i32>),
351    /// signed 64bit int
352    Int64(Option<i64>),
353    /// unsigned 8bit int
354    UInt8(Option<u8>),
355    /// unsigned 16bit int
356    UInt16(Option<u16>),
357    /// unsigned 32bit int
358    UInt32(Option<u32>),
359    /// unsigned 64bit int
360    UInt64(Option<u64>),
361    /// utf-8 encoded string.
362    Utf8(Option<String>),
363    /// utf-8 encoded string but from view types.
364    Utf8View(Option<String>),
365    /// utf-8 encoded string representing a LargeString's arrow type.
366    LargeUtf8(Option<String>),
367    /// binary
368    Binary(Option<Vec<u8>>),
369    /// binary but from view types.
370    BinaryView(Option<Vec<u8>>),
371    /// fixed size binary
372    FixedSizeBinary(i32, Option<Vec<u8>>),
373    /// large binary
374    LargeBinary(Option<Vec<u8>>),
375    /// Fixed size list scalar.
376    ///
377    /// The array must be a FixedSizeListArray with length 1.
378    FixedSizeList(Arc<FixedSizeListArray>),
379    /// Represents a single element of a [`ListArray`] as an [`ArrayRef`]
380    ///
381    /// The array must be a ListArray with length 1.
382    List(Arc<ListArray>),
383    /// The array must be a LargeListArray with length 1.
384    LargeList(Arc<LargeListArray>),
385    /// Represents a single element of a [`ListViewArray`] as an [`ArrayRef`]
386    ///
387    /// The array must be a ListViewArray with length 1.
388    ListView(Arc<ListViewArray>),
389    /// Represents a single element of a [`LargeListViewArray`] as an [`ArrayRef`]
390    ///
391    /// The array must be a LargeListViewArray with length 1.
392    LargeListView(Arc<LargeListViewArray>),
393    /// Represents a single element [`StructArray`] as an [`ArrayRef`]. See
394    /// [`ScalarValue`] for examples of how to create instances of this type.
395    Struct(Arc<StructArray>),
396    /// Represents a single element [`MapArray`] as an [`ArrayRef`].
397    Map(Arc<MapArray>),
398    /// Date stored as a signed 32bit int days since UNIX epoch 1970-01-01
399    Date32(Option<i32>),
400    /// Date stored as a signed 64bit int milliseconds since UNIX epoch 1970-01-01
401    Date64(Option<i64>),
402    /// Time stored as a signed 32bit int as seconds since midnight
403    Time32Second(Option<i32>),
404    /// Time stored as a signed 32bit int as milliseconds since midnight
405    Time32Millisecond(Option<i32>),
406    /// Time stored as a signed 64bit int as microseconds since midnight
407    Time64Microsecond(Option<i64>),
408    /// Time stored as a signed 64bit int as nanoseconds since midnight
409    Time64Nanosecond(Option<i64>),
410    /// Timestamp Second
411    TimestampSecond(Option<i64>, Option<Arc<str>>),
412    /// Timestamp Milliseconds
413    TimestampMillisecond(Option<i64>, Option<Arc<str>>),
414    /// Timestamp Microseconds
415    TimestampMicrosecond(Option<i64>, Option<Arc<str>>),
416    /// Timestamp Nanoseconds
417    TimestampNanosecond(Option<i64>, Option<Arc<str>>),
418    /// Number of elapsed whole months
419    IntervalYearMonth(Option<i32>),
420    /// Number of elapsed days and milliseconds (no leap seconds)
421    /// stored as 2 contiguous 32-bit signed integers
422    IntervalDayTime(Option<IntervalDayTime>),
423    /// A triple of the number of elapsed months, days, and nanoseconds.
424    /// Months and days are encoded as 32-bit signed integers.
425    /// Nanoseconds is encoded as a 64-bit signed integer (no leap seconds).
426    IntervalMonthDayNano(Option<IntervalMonthDayNano>),
427    /// Duration in seconds
428    DurationSecond(Option<i64>),
429    /// Duration in milliseconds
430    DurationMillisecond(Option<i64>),
431    /// Duration in microseconds
432    DurationMicrosecond(Option<i64>),
433    /// Duration in nanoseconds
434    DurationNanosecond(Option<i64>),
435    /// A nested datatype that can represent slots of differing types. Components:
436    /// `.0`: a tuple of union `type_id` and the single value held by this Scalar
437    /// `.1`: the list of fields, zero-to-one of which will by set in `.0`
438    /// `.2`: the physical storage of the source/destination UnionArray from which this Scalar came
439    Union(Option<(i8, Box<ScalarValue>)>, UnionFields, UnionMode),
440    /// Dictionary type: index type and value
441    Dictionary(Box<DataType>, Box<ScalarValue>),
442    /// (run-ends field, value field, value)
443    RunEndEncoded(FieldRef, FieldRef, Box<ScalarValue>),
444}
445
446impl Hash for Fl<f16> {
447    fn hash<H: Hasher>(&self, state: &mut H) {
448        self.0.to_bits().hash(state);
449    }
450}
451
452// manual implementation of `PartialEq`
453impl PartialEq for ScalarValue {
454    fn eq(&self, other: &Self) -> bool {
455        use ScalarValue::*;
456        // This purposely doesn't have a catch-all "(_, _)" so that
457        // any newly added enum variant will require editing this list
458        // or else face a compile error
459        match (self, other) {
460            (Decimal32(v1, p1, s1), Decimal32(v2, p2, s2)) => {
461                v1.eq(v2) && p1.eq(p2) && s1.eq(s2)
462            }
463            (Decimal32(_, _, _), _) => false,
464            (Decimal64(v1, p1, s1), Decimal64(v2, p2, s2)) => {
465                v1.eq(v2) && p1.eq(p2) && s1.eq(s2)
466            }
467            (Decimal64(_, _, _), _) => false,
468            (Decimal128(v1, p1, s1), Decimal128(v2, p2, s2)) => {
469                v1.eq(v2) && p1.eq(p2) && s1.eq(s2)
470            }
471            (Decimal128(_, _, _), _) => false,
472            (Decimal256(v1, p1, s1), Decimal256(v2, p2, s2)) => {
473                v1.eq(v2) && p1.eq(p2) && s1.eq(s2)
474            }
475            (Decimal256(_, _, _), _) => false,
476            (Boolean(v1), Boolean(v2)) => v1.eq(v2),
477            (Boolean(_), _) => false,
478            (Float32(v1), Float32(v2)) => match (v1, v2) {
479                (Some(f1), Some(f2)) => f1.to_bits() == f2.to_bits(),
480                _ => v1.eq(v2),
481            },
482            (Float16(v1), Float16(v2)) => match (v1, v2) {
483                (Some(f1), Some(f2)) => f1.to_bits() == f2.to_bits(),
484                _ => v1.eq(v2),
485            },
486            (Float32(_), _) => false,
487            (Float16(_), _) => false,
488            (Float64(v1), Float64(v2)) => match (v1, v2) {
489                (Some(f1), Some(f2)) => f1.to_bits() == f2.to_bits(),
490                _ => v1.eq(v2),
491            },
492            (Float64(_), _) => false,
493            (Int8(v1), Int8(v2)) => v1.eq(v2),
494            (Int8(_), _) => false,
495            (Int16(v1), Int16(v2)) => v1.eq(v2),
496            (Int16(_), _) => false,
497            (Int32(v1), Int32(v2)) => v1.eq(v2),
498            (Int32(_), _) => false,
499            (Int64(v1), Int64(v2)) => v1.eq(v2),
500            (Int64(_), _) => false,
501            (UInt8(v1), UInt8(v2)) => v1.eq(v2),
502            (UInt8(_), _) => false,
503            (UInt16(v1), UInt16(v2)) => v1.eq(v2),
504            (UInt16(_), _) => false,
505            (UInt32(v1), UInt32(v2)) => v1.eq(v2),
506            (UInt32(_), _) => false,
507            (UInt64(v1), UInt64(v2)) => v1.eq(v2),
508            (UInt64(_), _) => false,
509            (Utf8(v1), Utf8(v2)) => v1.eq(v2),
510            (Utf8(_), _) => false,
511            (Utf8View(v1), Utf8View(v2)) => v1.eq(v2),
512            (Utf8View(_), _) => false,
513            (LargeUtf8(v1), LargeUtf8(v2)) => v1.eq(v2),
514            (LargeUtf8(_), _) => false,
515            (Binary(v1), Binary(v2)) => v1.eq(v2),
516            (Binary(_), _) => false,
517            (BinaryView(v1), BinaryView(v2)) => v1.eq(v2),
518            (BinaryView(_), _) => false,
519            (FixedSizeBinary(_, v1), FixedSizeBinary(_, v2)) => v1.eq(v2),
520            (FixedSizeBinary(_, _), _) => false,
521            (LargeBinary(v1), LargeBinary(v2)) => v1.eq(v2),
522            (LargeBinary(_), _) => false,
523            (FixedSizeList(v1), FixedSizeList(v2)) => v1.eq(v2),
524            (FixedSizeList(_), _) => false,
525            (List(v1), List(v2)) => v1.eq(v2),
526            (List(_), _) => false,
527            (LargeList(v1), LargeList(v2)) => v1.eq(v2),
528            (LargeList(_), _) => false,
529            (ListView(v1), ListView(v2)) => v1.eq(v2),
530            (ListView(_), _) => false,
531            (LargeListView(v1), LargeListView(v2)) => v1.eq(v2),
532            (LargeListView(_), _) => false,
533            (Struct(v1), Struct(v2)) => v1.eq(v2),
534            (Struct(_), _) => false,
535            (Map(v1), Map(v2)) => v1.eq(v2),
536            (Map(_), _) => false,
537            (Date32(v1), Date32(v2)) => v1.eq(v2),
538            (Date32(_), _) => false,
539            (Date64(v1), Date64(v2)) => v1.eq(v2),
540            (Date64(_), _) => false,
541            (Time32Second(v1), Time32Second(v2)) => v1.eq(v2),
542            (Time32Second(_), _) => false,
543            (Time32Millisecond(v1), Time32Millisecond(v2)) => v1.eq(v2),
544            (Time32Millisecond(_), _) => false,
545            (Time64Microsecond(v1), Time64Microsecond(v2)) => v1.eq(v2),
546            (Time64Microsecond(_), _) => false,
547            (Time64Nanosecond(v1), Time64Nanosecond(v2)) => v1.eq(v2),
548            (Time64Nanosecond(_), _) => false,
549            (TimestampSecond(v1, _), TimestampSecond(v2, _)) => v1.eq(v2),
550            (TimestampSecond(_, _), _) => false,
551            (TimestampMillisecond(v1, _), TimestampMillisecond(v2, _)) => v1.eq(v2),
552            (TimestampMillisecond(_, _), _) => false,
553            (TimestampMicrosecond(v1, _), TimestampMicrosecond(v2, _)) => v1.eq(v2),
554            (TimestampMicrosecond(_, _), _) => false,
555            (TimestampNanosecond(v1, _), TimestampNanosecond(v2, _)) => v1.eq(v2),
556            (TimestampNanosecond(_, _), _) => false,
557            (DurationSecond(v1), DurationSecond(v2)) => v1.eq(v2),
558            (DurationSecond(_), _) => false,
559            (DurationMillisecond(v1), DurationMillisecond(v2)) => v1.eq(v2),
560            (DurationMillisecond(_), _) => false,
561            (DurationMicrosecond(v1), DurationMicrosecond(v2)) => v1.eq(v2),
562            (DurationMicrosecond(_), _) => false,
563            (DurationNanosecond(v1), DurationNanosecond(v2)) => v1.eq(v2),
564            (DurationNanosecond(_), _) => false,
565            (IntervalYearMonth(v1), IntervalYearMonth(v2)) => v1.eq(v2),
566            (IntervalYearMonth(_), _) => false,
567            (IntervalDayTime(v1), IntervalDayTime(v2)) => v1.eq(v2),
568            (IntervalDayTime(_), _) => false,
569            (IntervalMonthDayNano(v1), IntervalMonthDayNano(v2)) => v1.eq(v2),
570            (IntervalMonthDayNano(_), _) => false,
571            (Union(val1, fields1, mode1), Union(val2, fields2, mode2)) => {
572                val1.eq(val2) && fields1.eq(fields2) && mode1.eq(mode2)
573            }
574            (Union(_, _, _), _) => false,
575            (Dictionary(k1, v1), Dictionary(k2, v2)) => k1.eq(k2) && v1.eq(v2),
576            (Dictionary(_, _), _) => false,
577            (RunEndEncoded(rf1, vf1, v1), RunEndEncoded(rf2, vf2, v2)) => {
578                rf1.eq(rf2) && vf1.eq(vf2) && v1.eq(v2)
579            }
580            (RunEndEncoded(_, _, _), _) => false,
581            (Null, Null) => true,
582            (Null, _) => false,
583        }
584    }
585}
586
587// manual implementation of `PartialOrd`
588impl PartialOrd for ScalarValue {
589    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
590        use ScalarValue::*;
591        // This purposely doesn't have a catch-all "(_, _)" so that
592        // any newly added enum variant will require editing this list
593        // or else face a compile error
594        match (self, other) {
595            (Decimal32(v1, _, s1), Decimal32(v2, _, s2)) => {
596                if s1.eq(s2) {
597                    // Same scale means the underlying integer values share
598                    // a common interpretation regardless of declared
599                    // precision (arithmetic such as `add_checked` widens
600                    // precision by 1 but does not change the numeric
601                    // meaning).
602                    v1.partial_cmp(v2)
603                } else {
604                    None
605                }
606            }
607            (Decimal32(_, _, _), _) => None,
608            (Decimal64(v1, _, s1), Decimal64(v2, _, s2)) => {
609                if s1.eq(s2) {
610                    v1.partial_cmp(v2)
611                } else {
612                    None
613                }
614            }
615            (Decimal64(_, _, _), _) => None,
616            (Decimal128(v1, _, s1), Decimal128(v2, _, s2)) => {
617                if s1.eq(s2) {
618                    v1.partial_cmp(v2)
619                } else {
620                    None
621                }
622            }
623            (Decimal128(_, _, _), _) => None,
624            (Decimal256(v1, _, s1), Decimal256(v2, _, s2)) => {
625                if s1.eq(s2) {
626                    v1.partial_cmp(v2)
627                } else {
628                    None
629                }
630            }
631            (Decimal256(_, _, _), _) => None,
632            (Boolean(v1), Boolean(v2)) => v1.partial_cmp(v2),
633            (Boolean(_), _) => None,
634            (Float32(v1), Float32(v2)) => match (v1, v2) {
635                (Some(f1), Some(f2)) => Some(f1.total_cmp(f2)),
636                _ => v1.partial_cmp(v2),
637            },
638            (Float16(v1), Float16(v2)) => match (v1, v2) {
639                (Some(f1), Some(f2)) => Some(f1.total_cmp(f2)),
640                _ => v1.partial_cmp(v2),
641            },
642            (Float32(_), _) => None,
643            (Float16(_), _) => None,
644            (Float64(v1), Float64(v2)) => match (v1, v2) {
645                (Some(f1), Some(f2)) => Some(f1.total_cmp(f2)),
646                _ => v1.partial_cmp(v2),
647            },
648            (Float64(_), _) => None,
649            (Int8(v1), Int8(v2)) => v1.partial_cmp(v2),
650            (Int8(_), _) => None,
651            (Int16(v1), Int16(v2)) => v1.partial_cmp(v2),
652            (Int16(_), _) => None,
653            (Int32(v1), Int32(v2)) => v1.partial_cmp(v2),
654            (Int32(_), _) => None,
655            (Int64(v1), Int64(v2)) => v1.partial_cmp(v2),
656            (Int64(_), _) => None,
657            (UInt8(v1), UInt8(v2)) => v1.partial_cmp(v2),
658            (UInt8(_), _) => None,
659            (UInt16(v1), UInt16(v2)) => v1.partial_cmp(v2),
660            (UInt16(_), _) => None,
661            (UInt32(v1), UInt32(v2)) => v1.partial_cmp(v2),
662            (UInt32(_), _) => None,
663            (UInt64(v1), UInt64(v2)) => v1.partial_cmp(v2),
664            (UInt64(_), _) => None,
665            (Utf8(v1), Utf8(v2)) => v1.partial_cmp(v2),
666            (Utf8(_), _) => None,
667            (LargeUtf8(v1), LargeUtf8(v2)) => v1.partial_cmp(v2),
668            (LargeUtf8(_), _) => None,
669            (Utf8View(v1), Utf8View(v2)) => v1.partial_cmp(v2),
670            (Utf8View(_), _) => None,
671            (Binary(v1), Binary(v2)) => v1.partial_cmp(v2),
672            (Binary(_), _) => None,
673            (BinaryView(v1), BinaryView(v2)) => v1.partial_cmp(v2),
674            (BinaryView(_), _) => None,
675            (FixedSizeBinary(_, v1), FixedSizeBinary(_, v2)) => v1.partial_cmp(v2),
676            (FixedSizeBinary(_, _), _) => None,
677            (LargeBinary(v1), LargeBinary(v2)) => v1.partial_cmp(v2),
678            (LargeBinary(_), _) => None,
679            // ScalarValue::List / ScalarValue::FixedSizeList / ScalarValue::LargeList / ScalarValue::ListView / ScalarValue::LargeListView
680            // are guaranteed to have length 1
681            (List(arr1), List(arr2)) => partial_cmp_list(arr1.as_ref(), arr2.as_ref()),
682            (FixedSizeList(arr1), FixedSizeList(arr2)) => {
683                partial_cmp_list(arr1.as_ref(), arr2.as_ref())
684            }
685            (LargeList(arr1), LargeList(arr2)) => {
686                partial_cmp_list(arr1.as_ref(), arr2.as_ref())
687            }
688            (ListView(arr1), ListView(arr2)) => {
689                partial_cmp_list(arr1.as_ref(), arr2.as_ref())
690            }
691            (LargeListView(arr1), LargeListView(arr2)) => {
692                partial_cmp_list(arr1.as_ref(), arr2.as_ref())
693            }
694            (List(_), _)
695            | (LargeList(_), _)
696            | (FixedSizeList(_), _)
697            | (ListView(_), _)
698            | (LargeListView(_), _) => None,
699            (Struct(struct_arr1), Struct(struct_arr2)) => {
700                partial_cmp_struct(struct_arr1.as_ref(), struct_arr2.as_ref())
701            }
702            (Struct(_), _) => None,
703            (Map(map_arr1), Map(map_arr2)) => partial_cmp_map(map_arr1, map_arr2),
704            (Map(_), _) => None,
705            (Date32(v1), Date32(v2)) => v1.partial_cmp(v2),
706            (Date32(_), _) => None,
707            (Date64(v1), Date64(v2)) => v1.partial_cmp(v2),
708            (Date64(_), _) => None,
709            (Time32Second(v1), Time32Second(v2)) => v1.partial_cmp(v2),
710            (Time32Second(_), _) => None,
711            (Time32Millisecond(v1), Time32Millisecond(v2)) => v1.partial_cmp(v2),
712            (Time32Millisecond(_), _) => None,
713            (Time64Microsecond(v1), Time64Microsecond(v2)) => v1.partial_cmp(v2),
714            (Time64Microsecond(_), _) => None,
715            (Time64Nanosecond(v1), Time64Nanosecond(v2)) => v1.partial_cmp(v2),
716            (Time64Nanosecond(_), _) => None,
717            (TimestampSecond(v1, _), TimestampSecond(v2, _)) => v1.partial_cmp(v2),
718            (TimestampSecond(_, _), _) => None,
719            (TimestampMillisecond(v1, _), TimestampMillisecond(v2, _)) => {
720                v1.partial_cmp(v2)
721            }
722            (TimestampMillisecond(_, _), _) => None,
723            (TimestampMicrosecond(v1, _), TimestampMicrosecond(v2, _)) => {
724                v1.partial_cmp(v2)
725            }
726            (TimestampMicrosecond(_, _), _) => None,
727            (TimestampNanosecond(v1, _), TimestampNanosecond(v2, _)) => {
728                v1.partial_cmp(v2)
729            }
730            (TimestampNanosecond(_, _), _) => None,
731            (IntervalYearMonth(v1), IntervalYearMonth(v2)) => v1.partial_cmp(v2),
732            (IntervalYearMonth(_), _) => None,
733            (IntervalDayTime(v1), IntervalDayTime(v2)) => v1.partial_cmp(v2),
734            (IntervalDayTime(_), _) => None,
735            (IntervalMonthDayNano(v1), IntervalMonthDayNano(v2)) => v1.partial_cmp(v2),
736            (IntervalMonthDayNano(_), _) => None,
737            (DurationSecond(v1), DurationSecond(v2)) => v1.partial_cmp(v2),
738            (DurationSecond(_), _) => None,
739            (DurationMillisecond(v1), DurationMillisecond(v2)) => v1.partial_cmp(v2),
740            (DurationMillisecond(_), _) => None,
741            (DurationMicrosecond(v1), DurationMicrosecond(v2)) => v1.partial_cmp(v2),
742            (DurationMicrosecond(_), _) => None,
743            (DurationNanosecond(v1), DurationNanosecond(v2)) => v1.partial_cmp(v2),
744            (DurationNanosecond(_), _) => None,
745            (Union(v1, t1, m1), Union(v2, t2, m2)) => {
746                if t1.eq(t2) && m1.eq(m2) {
747                    v1.partial_cmp(v2)
748                } else {
749                    None
750                }
751            }
752            (Union(_, _, _), _) => None,
753            (Dictionary(k1, v1), Dictionary(k2, v2)) => {
754                // Don't compare if the key types don't match (it is effectively a different datatype)
755                if k1 == k2 { v1.partial_cmp(v2) } else { None }
756            }
757            (Dictionary(_, _), _) => None,
758            (RunEndEncoded(rf1, vf1, v1), RunEndEncoded(rf2, vf2, v2)) => {
759                // Don't compare if the run ends fields don't match (it is effectively a different datatype)
760                if rf1 == rf2 && vf1 == vf2 {
761                    v1.partial_cmp(v2)
762                } else {
763                    None
764                }
765            }
766            (RunEndEncoded(_, _, _), _) => None,
767            (Null, Null) => Some(Ordering::Equal),
768            (Null, _) => None,
769        }
770    }
771}
772
773/// List/LargeList/FixedSizeList/ListView/LargeListView scalars always have a single element
774/// array. This function returns that array
775fn first_array_for_list(arr: &dyn Array) -> ArrayRef {
776    assert_eq!(arr.len(), 1);
777    if let Some(arr) = arr.as_list_opt::<i32>() {
778        arr.value(0)
779    } else if let Some(arr) = arr.as_list_opt::<i64>() {
780        arr.value(0)
781    } else if let Some(arr) = arr.as_fixed_size_list_opt() {
782        arr.value(0)
783    } else if let Some(arr) = arr.as_list_view_opt::<i32>() {
784        arr.value(0)
785    } else if let Some(arr) = arr.as_list_view_opt::<i64>() {
786        arr.value(0)
787    } else {
788        unreachable!(
789            "Since only List / LargeList / FixedSizeList / ListView / LargeListView are supported, this should never happen"
790        )
791    }
792}
793
794/// Compares two List/LargeList/FixedSizeList/ListView/LargeListView scalars
795fn partial_cmp_list(arr1: &dyn Array, arr2: &dyn Array) -> Option<Ordering> {
796    if arr1.data_type() != arr2.data_type() {
797        return None;
798    }
799    let arr1 = first_array_for_list(arr1);
800    let arr2 = first_array_for_list(arr2);
801
802    let min_length = arr1.len().min(arr2.len());
803    let arr1_trimmed = arr1.slice(0, min_length);
804    let arr2_trimmed = arr2.slice(0, min_length);
805
806    let lt_res = arrow::compute::kernels::cmp::lt(&arr1_trimmed, &arr2_trimmed).ok()?;
807    let eq_res = arrow::compute::kernels::cmp::eq(&arr1_trimmed, &arr2_trimmed).ok()?;
808
809    for j in 0..lt_res.len() {
810        // In Postgres, NULL values in lists are always considered to be greater than non-NULL values:
811        //
812        // $ SELECT ARRAY[NULL]::integer[] > ARRAY[1]
813        // true
814        //
815        // These next two if statements are introduced for replicating Postgres behavior, as
816        // arrow::compute does not account for this.
817        if arr1_trimmed.is_null(j) && !arr2_trimmed.is_null(j) {
818            return Some(Ordering::Greater);
819        }
820        if !arr1_trimmed.is_null(j) && arr2_trimmed.is_null(j) {
821            return Some(Ordering::Less);
822        }
823
824        if lt_res.is_valid(j) && lt_res.value(j) {
825            return Some(Ordering::Less);
826        }
827        if eq_res.is_valid(j) && !eq_res.value(j) {
828            return Some(Ordering::Greater);
829        }
830    }
831
832    Some(arr1.len().cmp(&arr2.len()))
833}
834
835fn flatten<'a>(array: &'a StructArray, columns: &mut Vec<&'a ArrayRef>) {
836    for i in 0..array.num_columns() {
837        let column = array.column(i);
838        if let Some(nested_struct) = column.as_any().downcast_ref::<StructArray>() {
839            // If it's a nested struct, recursively expand
840            flatten(nested_struct, columns);
841        } else {
842            // If it's a primitive type, add directly
843            columns.push(column);
844        }
845    }
846}
847
848pub fn partial_cmp_struct(s1: &StructArray, s2: &StructArray) -> Option<Ordering> {
849    if s1.len() != s2.len() {
850        return None;
851    }
852
853    if s1.data_type() != s2.data_type() {
854        return None;
855    }
856
857    let mut expanded_columns1 = Vec::with_capacity(s1.num_columns());
858    let mut expanded_columns2 = Vec::with_capacity(s2.num_columns());
859
860    flatten(s1, &mut expanded_columns1);
861    flatten(s2, &mut expanded_columns2);
862
863    for col_index in 0..expanded_columns1.len() {
864        let arr1 = expanded_columns1[col_index];
865        let arr2 = expanded_columns2[col_index];
866
867        let lt_res = arrow::compute::kernels::cmp::lt(arr1, arr2).ok()?;
868        let eq_res = arrow::compute::kernels::cmp::eq(arr1, arr2).ok()?;
869
870        for j in 0..lt_res.len() {
871            if lt_res.is_valid(j) && lt_res.value(j) {
872                return Some(Ordering::Less);
873            }
874            if eq_res.is_valid(j) && !eq_res.value(j) {
875                return Some(Ordering::Greater);
876            }
877        }
878    }
879    Some(Ordering::Equal)
880}
881
882fn partial_cmp_map(m1: &Arc<MapArray>, m2: &Arc<MapArray>) -> Option<Ordering> {
883    if m1.len() != m2.len() {
884        return None;
885    }
886
887    if m1.data_type() != m2.data_type() {
888        return None;
889    }
890
891    for col_index in 0..m1.len() {
892        let arr1 = m1.entries().column(col_index);
893        let arr2 = m2.entries().column(col_index);
894
895        let lt_res = arrow::compute::kernels::cmp::lt(arr1, arr2).ok()?;
896        let eq_res = arrow::compute::kernels::cmp::eq(arr1, arr2).ok()?;
897
898        for j in 0..lt_res.len() {
899            if lt_res.is_valid(j) && lt_res.value(j) {
900                return Some(Ordering::Less);
901            }
902            if eq_res.is_valid(j) && !eq_res.value(j) {
903                return Some(Ordering::Greater);
904            }
905        }
906    }
907    Some(Ordering::Equal)
908}
909
910impl Eq for ScalarValue {}
911
912//Float wrapper over f32/f64. Just because we cannot build std::hash::Hash for floats directly we have to do it through type wrapper
913struct Fl<T>(T);
914
915macro_rules! hash_float_value {
916    ($(($t:ty, $i:ty)),+) => {
917        $(impl std::hash::Hash for Fl<$t> {
918            #[inline]
919            fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
920                state.write(&<$i>::from_ne_bytes(self.0.to_ne_bytes()).to_ne_bytes())
921            }
922        })+
923    };
924}
925
926hash_float_value!((f64, u64), (f32, u32));
927
928// manual implementation of `Hash`
929//
930// # Panics
931//
932// Panics if there is an error when creating hash values for rows
933impl Hash for ScalarValue {
934    fn hash<H: Hasher>(&self, state: &mut H) {
935        use ScalarValue::*;
936        match self {
937            Decimal32(v, p, s) => {
938                v.hash(state);
939                p.hash(state);
940                s.hash(state)
941            }
942            Decimal64(v, p, s) => {
943                v.hash(state);
944                p.hash(state);
945                s.hash(state)
946            }
947            Decimal128(v, p, s) => {
948                v.hash(state);
949                p.hash(state);
950                s.hash(state)
951            }
952            Decimal256(v, p, s) => {
953                v.hash(state);
954                p.hash(state);
955                s.hash(state)
956            }
957            Boolean(v) => v.hash(state),
958            Float16(v) => v.map(Fl).hash(state),
959            Float32(v) => v.map(Fl).hash(state),
960            Float64(v) => v.map(Fl).hash(state),
961            Int8(v) => v.hash(state),
962            Int16(v) => v.hash(state),
963            Int32(v) => v.hash(state),
964            Int64(v) => v.hash(state),
965            UInt8(v) => v.hash(state),
966            UInt16(v) => v.hash(state),
967            UInt32(v) => v.hash(state),
968            UInt64(v) => v.hash(state),
969            Utf8(v) | LargeUtf8(v) | Utf8View(v) => v.hash(state),
970            Binary(v) | FixedSizeBinary(_, v) | LargeBinary(v) | BinaryView(v) => {
971                v.hash(state)
972            }
973            List(arr) => {
974                hash_nested_array(arr.to_owned() as ArrayRef, state);
975            }
976            LargeList(arr) => {
977                hash_nested_array(arr.to_owned() as ArrayRef, state);
978            }
979            FixedSizeList(arr) => {
980                hash_nested_array(arr.to_owned() as ArrayRef, state);
981            }
982            ListView(arr) => {
983                hash_nested_array(arr.to_owned() as ArrayRef, state);
984            }
985            LargeListView(arr) => {
986                hash_nested_array(arr.to_owned() as ArrayRef, state);
987            }
988            Struct(arr) => {
989                hash_nested_array(arr.to_owned() as ArrayRef, state);
990            }
991            Map(arr) => {
992                hash_nested_array(arr.to_owned() as ArrayRef, state);
993            }
994            Date32(v) => v.hash(state),
995            Date64(v) => v.hash(state),
996            Time32Second(v) => v.hash(state),
997            Time32Millisecond(v) => v.hash(state),
998            Time64Microsecond(v) => v.hash(state),
999            Time64Nanosecond(v) => v.hash(state),
1000            TimestampSecond(v, _) => v.hash(state),
1001            TimestampMillisecond(v, _) => v.hash(state),
1002            TimestampMicrosecond(v, _) => v.hash(state),
1003            TimestampNanosecond(v, _) => v.hash(state),
1004            DurationSecond(v) => v.hash(state),
1005            DurationMillisecond(v) => v.hash(state),
1006            DurationMicrosecond(v) => v.hash(state),
1007            DurationNanosecond(v) => v.hash(state),
1008            IntervalYearMonth(v) => v.hash(state),
1009            IntervalDayTime(v) => v.hash(state),
1010            IntervalMonthDayNano(v) => v.hash(state),
1011            Union(v, t, m) => {
1012                v.hash(state);
1013                t.hash(state);
1014                m.hash(state);
1015            }
1016            Dictionary(k, v) => {
1017                k.hash(state);
1018                v.hash(state);
1019            }
1020            RunEndEncoded(rf, vf, v) => {
1021                rf.hash(state);
1022                vf.hash(state);
1023                v.hash(state);
1024            }
1025            // stable hash for Null value
1026            Null => 1.hash(state),
1027        }
1028    }
1029}
1030
1031fn hash_nested_array<H: Hasher>(arr: ArrayRef, state: &mut H) {
1032    let len = arr.len();
1033    let hashes_buffer = &mut vec![0; len];
1034    let random_state = crate::hash_utils::RandomState::with_seed(0);
1035    let hashes = create_hashes(&[arr], &random_state, hashes_buffer)
1036        .expect("hash_nested_array: failed to create row hashes");
1037    // Hash back to std::hash::Hasher
1038    hashes.hash(state);
1039}
1040
1041/// Return a reference to the values array and the index into it for a
1042/// dictionary array
1043///
1044/// # Errors
1045///
1046/// Errors if the array cannot be downcasted to DictionaryArray
1047#[inline]
1048pub fn get_dict_value<K: ArrowDictionaryKeyType>(
1049    array: &dyn Array,
1050    index: usize,
1051) -> Result<(&ArrayRef, Option<usize>)> {
1052    let dict_array = as_dictionary_array::<K>(array)?;
1053    Ok((dict_array.values(), dict_array.key(index)))
1054}
1055
1056/// Create a dictionary array representing `value` repeated `size`
1057/// times
1058fn dict_from_scalar<K: ArrowDictionaryKeyType>(
1059    value: &ScalarValue,
1060    size: usize,
1061) -> Result<ArrayRef> {
1062    // values array is one element long (the value)
1063    let values_array = value.to_array_of_size(1)?;
1064
1065    // Create a key array with `size` elements, each of 0
1066    // Use cache to avoid repeated allocations for the same size
1067    let key_array: PrimitiveArray<K> =
1068        get_or_create_cached_key_array::<K>(size, value.is_null());
1069
1070    // create a new DictionaryArray
1071    //
1072    // Note: this path could be made faster by using the ArrayData
1073    // APIs and skipping validation, if it every comes up in
1074    // performance traces.
1075    Ok(Arc::new(
1076        DictionaryArray::<K>::try_new(key_array, values_array)?, // should always be valid by construction above
1077    ))
1078}
1079
1080/// Create a `DictionaryArray` from the provided values array.
1081///
1082/// Each element gets a unique key (`0..N-1`), without deduplication.
1083/// Useful for wrapping arrays in dictionary form.
1084///
1085/// # Input
1086/// ["alice", "bob", "alice", null, "carol"]
1087///
1088/// # Output
1089/// `DictionaryArray<Int32>`
1090/// {
1091///   keys:   [0, 1, 2, 3, 4],
1092///   values: ["alice", "bob", "alice", null, "carol"]
1093/// }
1094pub fn dict_from_values<K: ArrowDictionaryKeyType>(
1095    values_array: ArrayRef,
1096) -> Result<ArrayRef> {
1097    // Create a key array with `size` elements of 0..array_len for all
1098    // non-null value elements
1099    let key_array: PrimitiveArray<K> = (0..values_array.len())
1100        .map(|index| {
1101            if values_array.is_valid(index) {
1102                let native_index = K::Native::from_usize(index).ok_or_else(|| {
1103                    _internal_datafusion_err!(
1104                        "Can not create index of type {} from value {index}",
1105                        K::DATA_TYPE
1106                    )
1107                })?;
1108                Ok(Some(native_index))
1109            } else {
1110                Ok(None)
1111            }
1112        })
1113        .collect::<Result<Vec<_>>>()?
1114        .into_iter()
1115        .collect();
1116
1117    // create a new DictionaryArray
1118    //
1119    // Note: this path could be made faster by using the ArrayData
1120    // APIs and skipping validation, if it every comes up in
1121    // performance traces.
1122    let dict_array = DictionaryArray::<K>::try_new(key_array, values_array)?;
1123    Ok(Arc::new(dict_array))
1124}
1125
1126macro_rules! typed_cast_tz {
1127    ($array:expr, $index:expr, $array_cast:ident, $SCALAR:ident, $TZ:expr) => {{
1128        let array = $array_cast($array)?;
1129        Ok::<ScalarValue, DataFusionError>(ScalarValue::$SCALAR(
1130            match array.is_null($index) {
1131                true => None,
1132                false => Some(array.value($index).into()),
1133            },
1134            $TZ.clone(),
1135        ))
1136    }};
1137}
1138
1139macro_rules! typed_cast {
1140    ($array:expr, $index:expr, $array_cast:ident, $SCALAR:ident) => {{
1141        let array = $array_cast($array)?;
1142        Ok::<ScalarValue, DataFusionError>(ScalarValue::$SCALAR(
1143            match array.is_null($index) {
1144                true => None,
1145                false => Some(array.value($index).into()),
1146            },
1147        ))
1148    }};
1149}
1150
1151macro_rules! build_array_from_option {
1152    ($DATA_TYPE:ident, $ARRAY_TYPE:ident, $EXPR:expr, $SIZE:expr) => {{
1153        match $EXPR {
1154            Some(value) => Arc::new($ARRAY_TYPE::from_value(*value, $SIZE)),
1155            None => new_null_array(&DataType::$DATA_TYPE, $SIZE),
1156        }
1157    }};
1158    ($DATA_TYPE:ident, $ENUM:expr, $ARRAY_TYPE:ident, $EXPR:expr, $SIZE:expr) => {{
1159        match $EXPR {
1160            Some(value) => Arc::new($ARRAY_TYPE::from_value(*value, $SIZE)),
1161            None => new_null_array(&DataType::$DATA_TYPE($ENUM), $SIZE),
1162        }
1163    }};
1164}
1165
1166macro_rules! build_timestamp_array_from_option {
1167    ($TIME_UNIT:expr, $TZ:expr, $ARRAY_TYPE:ident, $EXPR:expr, $SIZE:expr) => {
1168        match $EXPR {
1169            Some(value) => {
1170                Arc::new($ARRAY_TYPE::from_value(*value, $SIZE).with_timezone_opt($TZ))
1171            }
1172            None => new_null_array(&DataType::Timestamp($TIME_UNIT, $TZ), $SIZE),
1173        }
1174    };
1175}
1176
1177macro_rules! eq_array_primitive {
1178    ($array:expr, $index:expr, $array_cast:ident, $VALUE:expr) => {{
1179        let array = $array_cast($array)?;
1180        let is_valid = array.is_valid($index);
1181        Ok::<bool, DataFusionError>(match $VALUE {
1182            Some(val) => is_valid && &array.value($index) == val,
1183            None => !is_valid,
1184        })
1185    }};
1186}
1187
1188impl ScalarValue {
1189    /// Create a [`Result<ScalarValue>`] with the provided value and datatype
1190    ///
1191    /// # Panics
1192    ///
1193    /// Panics if d is not compatible with T
1194    pub fn new_primitive<T: ArrowPrimitiveType>(
1195        a: Option<T::Native>,
1196        d: &DataType,
1197    ) -> Result<Self> {
1198        match a {
1199            None => d.try_into(),
1200            Some(v) => {
1201                let array = PrimitiveArray::<T>::new(vec![v].into(), None)
1202                    .with_data_type(d.clone());
1203                Self::try_from_array(&array, 0)
1204            }
1205        }
1206    }
1207
1208    /// Create a decimal Scalar from value/precision and scale.
1209    pub fn try_new_decimal128(value: i128, precision: u8, scale: i8) -> Result<Self> {
1210        Self::validate_decimal_or_internal_err::<Decimal128Type>(precision, scale)?;
1211        Ok(ScalarValue::Decimal128(Some(value), precision, scale))
1212    }
1213
1214    /// Create a Null instance of ScalarValue for this datatype
1215    ///
1216    /// Example
1217    /// ```
1218    /// use arrow::datatypes::DataType;
1219    /// use datafusion_common::ScalarValue;
1220    ///
1221    /// let scalar = ScalarValue::try_new_null(&DataType::Int32).unwrap();
1222    /// assert_eq!(scalar.is_null(), true);
1223    /// assert_eq!(scalar.data_type(), DataType::Int32);
1224    /// ```
1225    pub fn try_new_null(data_type: &DataType) -> Result<Self> {
1226        Ok(match data_type {
1227            DataType::Boolean => ScalarValue::Boolean(None),
1228            DataType::Float16 => ScalarValue::Float16(None),
1229            DataType::Float64 => ScalarValue::Float64(None),
1230            DataType::Float32 => ScalarValue::Float32(None),
1231            DataType::Int8 => ScalarValue::Int8(None),
1232            DataType::Int16 => ScalarValue::Int16(None),
1233            DataType::Int32 => ScalarValue::Int32(None),
1234            DataType::Int64 => ScalarValue::Int64(None),
1235            DataType::UInt8 => ScalarValue::UInt8(None),
1236            DataType::UInt16 => ScalarValue::UInt16(None),
1237            DataType::UInt32 => ScalarValue::UInt32(None),
1238            DataType::UInt64 => ScalarValue::UInt64(None),
1239            DataType::Decimal32(precision, scale) => {
1240                ScalarValue::Decimal32(None, *precision, *scale)
1241            }
1242            DataType::Decimal64(precision, scale) => {
1243                ScalarValue::Decimal64(None, *precision, *scale)
1244            }
1245            DataType::Decimal128(precision, scale) => {
1246                ScalarValue::Decimal128(None, *precision, *scale)
1247            }
1248            DataType::Decimal256(precision, scale) => {
1249                ScalarValue::Decimal256(None, *precision, *scale)
1250            }
1251            DataType::Utf8 => ScalarValue::Utf8(None),
1252            DataType::LargeUtf8 => ScalarValue::LargeUtf8(None),
1253            DataType::Utf8View => ScalarValue::Utf8View(None),
1254            DataType::Binary => ScalarValue::Binary(None),
1255            DataType::BinaryView => ScalarValue::BinaryView(None),
1256            DataType::FixedSizeBinary(len) => ScalarValue::FixedSizeBinary(*len, None),
1257            DataType::LargeBinary => ScalarValue::LargeBinary(None),
1258            DataType::Date32 => ScalarValue::Date32(None),
1259            DataType::Date64 => ScalarValue::Date64(None),
1260            DataType::Time32(TimeUnit::Second) => ScalarValue::Time32Second(None),
1261            DataType::Time32(TimeUnit::Millisecond) => {
1262                ScalarValue::Time32Millisecond(None)
1263            }
1264            DataType::Time64(TimeUnit::Microsecond) => {
1265                ScalarValue::Time64Microsecond(None)
1266            }
1267            DataType::Time64(TimeUnit::Nanosecond) => ScalarValue::Time64Nanosecond(None),
1268            DataType::Timestamp(TimeUnit::Second, tz_opt) => {
1269                ScalarValue::TimestampSecond(None, tz_opt.clone())
1270            }
1271            DataType::Timestamp(TimeUnit::Millisecond, tz_opt) => {
1272                ScalarValue::TimestampMillisecond(None, tz_opt.clone())
1273            }
1274            DataType::Timestamp(TimeUnit::Microsecond, tz_opt) => {
1275                ScalarValue::TimestampMicrosecond(None, tz_opt.clone())
1276            }
1277            DataType::Timestamp(TimeUnit::Nanosecond, tz_opt) => {
1278                ScalarValue::TimestampNanosecond(None, tz_opt.clone())
1279            }
1280            DataType::Interval(IntervalUnit::YearMonth) => {
1281                ScalarValue::IntervalYearMonth(None)
1282            }
1283            DataType::Interval(IntervalUnit::DayTime) => {
1284                ScalarValue::IntervalDayTime(None)
1285            }
1286            DataType::Interval(IntervalUnit::MonthDayNano) => {
1287                ScalarValue::IntervalMonthDayNano(None)
1288            }
1289            DataType::Duration(TimeUnit::Second) => ScalarValue::DurationSecond(None),
1290            DataType::Duration(TimeUnit::Millisecond) => {
1291                ScalarValue::DurationMillisecond(None)
1292            }
1293            DataType::Duration(TimeUnit::Microsecond) => {
1294                ScalarValue::DurationMicrosecond(None)
1295            }
1296            DataType::Duration(TimeUnit::Nanosecond) => {
1297                ScalarValue::DurationNanosecond(None)
1298            }
1299            DataType::Dictionary(index_type, value_type) => ScalarValue::Dictionary(
1300                index_type.clone(),
1301                Box::new(value_type.as_ref().try_into()?),
1302            ),
1303            DataType::RunEndEncoded(run_ends_field, value_field) => {
1304                ScalarValue::RunEndEncoded(
1305                    Arc::clone(run_ends_field),
1306                    Arc::clone(value_field),
1307                    Box::new(value_field.data_type().try_into()?),
1308                )
1309            }
1310            // `ScalarValue::List` contains single element `ListArray`.
1311            DataType::List(field_ref) => ScalarValue::List(Arc::new(
1312                GenericListArray::new_null(Arc::clone(field_ref), 1),
1313            )),
1314            // `ScalarValue::LargeList` contains single element `LargeListArray`.
1315            DataType::LargeList(field_ref) => ScalarValue::LargeList(Arc::new(
1316                GenericListArray::new_null(Arc::clone(field_ref), 1),
1317            )),
1318            // `ScalarValue::FixedSizeList` contains single element `FixedSizeList`.
1319            DataType::FixedSizeList(field_ref, fixed_length) => {
1320                ScalarValue::FixedSizeList(Arc::new(FixedSizeListArray::new_null(
1321                    Arc::clone(field_ref),
1322                    *fixed_length,
1323                    1,
1324                )))
1325            }
1326            DataType::ListView(field_ref) => ScalarValue::ListView(Arc::new(
1327                GenericListViewArray::new_null(Arc::clone(field_ref), 1),
1328            )),
1329            DataType::LargeListView(field_ref) => ScalarValue::LargeListView(Arc::new(
1330                GenericListViewArray::new_null(Arc::clone(field_ref), 1),
1331            )),
1332            DataType::Struct(fields) => ScalarValue::Struct(
1333                new_null_array(&DataType::Struct(fields.to_owned()), 1)
1334                    .as_struct()
1335                    .to_owned()
1336                    .into(),
1337            ),
1338            DataType::Map(fields, sorted) => ScalarValue::Map(
1339                new_null_array(&DataType::Map(fields.to_owned(), sorted.to_owned()), 1)
1340                    .as_map()
1341                    .to_owned()
1342                    .into(),
1343            ),
1344            DataType::Union(fields, mode) => {
1345                ScalarValue::Union(None, fields.clone(), *mode)
1346            }
1347            DataType::Null => ScalarValue::Null,
1348            _ => {
1349                return _not_impl_err!(
1350                    "Can't create a null scalar from data_type \"{data_type}\""
1351                );
1352            }
1353        })
1354    }
1355
1356    /// Returns a [`ScalarValue::Utf8`] representing `val`
1357    pub fn new_utf8(val: impl Into<String>) -> Self {
1358        ScalarValue::from(val.into())
1359    }
1360
1361    /// Returns a [`ScalarValue::Utf8View`] representing `val`
1362    pub fn new_utf8view(val: impl Into<String>) -> Self {
1363        ScalarValue::Utf8View(Some(val.into()))
1364    }
1365
1366    /// Returns a [`ScalarValue::IntervalYearMonth`] representing
1367    /// `years` years and `months` months
1368    pub fn new_interval_ym(years: i32, months: i32) -> Self {
1369        let val = IntervalYearMonthType::make_value(years, months);
1370        ScalarValue::IntervalYearMonth(Some(val))
1371    }
1372
1373    /// Returns a [`ScalarValue::IntervalDayTime`] representing
1374    /// `days` days and `millis` milliseconds
1375    pub fn new_interval_dt(days: i32, millis: i32) -> Self {
1376        let val = IntervalDayTimeType::make_value(days, millis);
1377        Self::IntervalDayTime(Some(val))
1378    }
1379
1380    /// Returns a [`ScalarValue::IntervalMonthDayNano`] representing
1381    /// `months` months and `days` days, and `nanos` nanoseconds
1382    pub fn new_interval_mdn(months: i32, days: i32, nanos: i64) -> Self {
1383        let val = IntervalMonthDayNanoType::make_value(months, days, nanos);
1384        ScalarValue::IntervalMonthDayNano(Some(val))
1385    }
1386
1387    /// Returns a [`ScalarValue`] representing
1388    /// `value` and `tz_opt` timezone
1389    pub fn new_timestamp<T: ArrowTimestampType>(
1390        value: Option<i64>,
1391        tz_opt: Option<Arc<str>>,
1392    ) -> Self {
1393        match T::UNIT {
1394            TimeUnit::Second => ScalarValue::TimestampSecond(value, tz_opt),
1395            TimeUnit::Millisecond => ScalarValue::TimestampMillisecond(value, tz_opt),
1396            TimeUnit::Microsecond => ScalarValue::TimestampMicrosecond(value, tz_opt),
1397            TimeUnit::Nanosecond => ScalarValue::TimestampNanosecond(value, tz_opt),
1398        }
1399    }
1400
1401    /// Returns a [`ScalarValue`] representing PI
1402    pub fn new_pi(datatype: &DataType) -> Result<ScalarValue> {
1403        match datatype {
1404            DataType::Float16 => Ok(ScalarValue::from(f16::PI)),
1405            DataType::Float32 => Ok(ScalarValue::from(std::f32::consts::PI)),
1406            DataType::Float64 => Ok(ScalarValue::from(std::f64::consts::PI)),
1407            _ => _internal_err!("PI is not supported for data type: {}", datatype),
1408        }
1409    }
1410
1411    /// Returns a [`ScalarValue`] representing PI's upper bound
1412    pub fn new_pi_upper(datatype: &DataType) -> Result<ScalarValue> {
1413        match datatype {
1414            DataType::Float16 => Ok(ScalarValue::Float16(Some(consts::PI_UPPER_F16))),
1415            DataType::Float32 => Ok(ScalarValue::from(consts::PI_UPPER_F32)),
1416            DataType::Float64 => Ok(ScalarValue::from(consts::PI_UPPER_F64)),
1417            _ => {
1418                _internal_err!("PI_UPPER is not supported for data type: {}", datatype)
1419            }
1420        }
1421    }
1422
1423    /// Returns a [`ScalarValue`] representing -PI's lower bound
1424    pub fn new_negative_pi_lower(datatype: &DataType) -> Result<ScalarValue> {
1425        match datatype {
1426            DataType::Float16 => {
1427                Ok(ScalarValue::Float16(Some(consts::NEGATIVE_PI_LOWER_F16)))
1428            }
1429            DataType::Float32 => Ok(ScalarValue::from(consts::NEGATIVE_PI_LOWER_F32)),
1430            DataType::Float64 => Ok(ScalarValue::from(consts::NEGATIVE_PI_LOWER_F64)),
1431            _ => {
1432                _internal_err!("-PI_LOWER is not supported for data type: {}", datatype)
1433            }
1434        }
1435    }
1436
1437    /// Returns a [`ScalarValue`] representing FRAC_PI_2's upper bound
1438    pub fn new_frac_pi_2_upper(datatype: &DataType) -> Result<ScalarValue> {
1439        match datatype {
1440            DataType::Float16 => {
1441                Ok(ScalarValue::Float16(Some(consts::FRAC_PI_2_UPPER_F16)))
1442            }
1443            DataType::Float32 => Ok(ScalarValue::from(consts::FRAC_PI_2_UPPER_F32)),
1444            DataType::Float64 => Ok(ScalarValue::from(consts::FRAC_PI_2_UPPER_F64)),
1445            _ => {
1446                _internal_err!("PI_UPPER/2 is not supported for data type: {}", datatype)
1447            }
1448        }
1449    }
1450
1451    // Returns a [`ScalarValue`] representing FRAC_PI_2's lower bound
1452    pub fn new_neg_frac_pi_2_lower(datatype: &DataType) -> Result<ScalarValue> {
1453        match datatype {
1454            DataType::Float16 => Ok(ScalarValue::Float16(Some(
1455                consts::NEGATIVE_FRAC_PI_2_LOWER_F16,
1456            ))),
1457            DataType::Float32 => {
1458                Ok(ScalarValue::from(consts::NEGATIVE_FRAC_PI_2_LOWER_F32))
1459            }
1460            DataType::Float64 => {
1461                Ok(ScalarValue::from(consts::NEGATIVE_FRAC_PI_2_LOWER_F64))
1462            }
1463            _ => {
1464                _internal_err!("-PI/2_LOWER is not supported for data type: {}", datatype)
1465            }
1466        }
1467    }
1468
1469    /// Returns a [`ScalarValue`] representing -PI
1470    pub fn new_negative_pi(datatype: &DataType) -> Result<ScalarValue> {
1471        match datatype {
1472            DataType::Float16 => Ok(ScalarValue::from(-f16::PI)),
1473            DataType::Float32 => Ok(ScalarValue::from(-std::f32::consts::PI)),
1474            DataType::Float64 => Ok(ScalarValue::from(-std::f64::consts::PI)),
1475            _ => _internal_err!("-PI is not supported for data type: {}", datatype),
1476        }
1477    }
1478
1479    /// Returns a [`ScalarValue`] representing PI/2
1480    pub fn new_frac_pi_2(datatype: &DataType) -> Result<ScalarValue> {
1481        match datatype {
1482            DataType::Float16 => Ok(ScalarValue::from(f16::FRAC_PI_2)),
1483            DataType::Float32 => Ok(ScalarValue::from(std::f32::consts::FRAC_PI_2)),
1484            DataType::Float64 => Ok(ScalarValue::from(std::f64::consts::FRAC_PI_2)),
1485            _ => _internal_err!("PI/2 is not supported for data type: {}", datatype),
1486        }
1487    }
1488
1489    /// Returns a [`ScalarValue`] representing -PI/2
1490    pub fn new_neg_frac_pi_2(datatype: &DataType) -> Result<ScalarValue> {
1491        match datatype {
1492            DataType::Float16 => Ok(ScalarValue::from(-f16::FRAC_PI_2)),
1493            DataType::Float32 => Ok(ScalarValue::from(-std::f32::consts::FRAC_PI_2)),
1494            DataType::Float64 => Ok(ScalarValue::from(-std::f64::consts::FRAC_PI_2)),
1495            _ => _internal_err!("-PI/2 is not supported for data type: {}", datatype),
1496        }
1497    }
1498
1499    /// Returns a [`ScalarValue`] representing infinity
1500    pub fn new_infinity(datatype: &DataType) -> Result<ScalarValue> {
1501        match datatype {
1502            DataType::Float16 => Ok(ScalarValue::from(f16::INFINITY)),
1503            DataType::Float32 => Ok(ScalarValue::from(f32::INFINITY)),
1504            DataType::Float64 => Ok(ScalarValue::from(f64::INFINITY)),
1505            _ => {
1506                _internal_err!("Infinity is not supported for data type: {}", datatype)
1507            }
1508        }
1509    }
1510
1511    /// Returns a [`ScalarValue`] representing negative infinity
1512    pub fn new_neg_infinity(datatype: &DataType) -> Result<ScalarValue> {
1513        match datatype {
1514            DataType::Float16 => Ok(ScalarValue::from(f16::NEG_INFINITY)),
1515            DataType::Float32 => Ok(ScalarValue::from(f32::NEG_INFINITY)),
1516            DataType::Float64 => Ok(ScalarValue::from(f64::NEG_INFINITY)),
1517            _ => {
1518                _internal_err!(
1519                    "Negative Infinity is not supported for data type: {}",
1520                    datatype
1521                )
1522            }
1523        }
1524    }
1525
1526    /// Create a zero value in the given type.
1527    pub fn new_zero(datatype: &DataType) -> Result<ScalarValue> {
1528        Ok(match datatype {
1529            DataType::Boolean => ScalarValue::Boolean(Some(false)),
1530            DataType::Int8 => ScalarValue::Int8(Some(0)),
1531            DataType::Int16 => ScalarValue::Int16(Some(0)),
1532            DataType::Int32 => ScalarValue::Int32(Some(0)),
1533            DataType::Int64 => ScalarValue::Int64(Some(0)),
1534            DataType::UInt8 => ScalarValue::UInt8(Some(0)),
1535            DataType::UInt16 => ScalarValue::UInt16(Some(0)),
1536            DataType::UInt32 => ScalarValue::UInt32(Some(0)),
1537            DataType::UInt64 => ScalarValue::UInt64(Some(0)),
1538            DataType::Float16 => ScalarValue::Float16(Some(f16::ZERO)),
1539            DataType::Float32 => ScalarValue::Float32(Some(0.0)),
1540            DataType::Float64 => ScalarValue::Float64(Some(0.0)),
1541            DataType::Decimal32(precision, scale) => {
1542                ScalarValue::Decimal32(Some(0), *precision, *scale)
1543            }
1544            DataType::Decimal64(precision, scale) => {
1545                ScalarValue::Decimal64(Some(0), *precision, *scale)
1546            }
1547            DataType::Decimal128(precision, scale) => {
1548                ScalarValue::Decimal128(Some(0), *precision, *scale)
1549            }
1550            DataType::Decimal256(precision, scale) => {
1551                ScalarValue::Decimal256(Some(i256::ZERO), *precision, *scale)
1552            }
1553            DataType::Timestamp(TimeUnit::Second, tz) => {
1554                ScalarValue::TimestampSecond(Some(0), tz.clone())
1555            }
1556            DataType::Timestamp(TimeUnit::Millisecond, tz) => {
1557                ScalarValue::TimestampMillisecond(Some(0), tz.clone())
1558            }
1559            DataType::Timestamp(TimeUnit::Microsecond, tz) => {
1560                ScalarValue::TimestampMicrosecond(Some(0), tz.clone())
1561            }
1562            DataType::Timestamp(TimeUnit::Nanosecond, tz) => {
1563                ScalarValue::TimestampNanosecond(Some(0), tz.clone())
1564            }
1565            DataType::Time32(TimeUnit::Second) => ScalarValue::Time32Second(Some(0)),
1566            DataType::Time32(TimeUnit::Millisecond) => {
1567                ScalarValue::Time32Millisecond(Some(0))
1568            }
1569            DataType::Time64(TimeUnit::Microsecond) => {
1570                ScalarValue::Time64Microsecond(Some(0))
1571            }
1572            DataType::Time64(TimeUnit::Nanosecond) => {
1573                ScalarValue::Time64Nanosecond(Some(0))
1574            }
1575            DataType::Interval(IntervalUnit::YearMonth) => {
1576                ScalarValue::IntervalYearMonth(Some(0))
1577            }
1578            DataType::Interval(IntervalUnit::DayTime) => {
1579                ScalarValue::IntervalDayTime(Some(IntervalDayTime::ZERO))
1580            }
1581            DataType::Interval(IntervalUnit::MonthDayNano) => {
1582                ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano::ZERO))
1583            }
1584            DataType::Duration(TimeUnit::Second) => ScalarValue::DurationSecond(Some(0)),
1585            DataType::Duration(TimeUnit::Millisecond) => {
1586                ScalarValue::DurationMillisecond(Some(0))
1587            }
1588            DataType::Duration(TimeUnit::Microsecond) => {
1589                ScalarValue::DurationMicrosecond(Some(0))
1590            }
1591            DataType::Duration(TimeUnit::Nanosecond) => {
1592                ScalarValue::DurationNanosecond(Some(0))
1593            }
1594            DataType::Date32 => ScalarValue::Date32(Some(0)),
1595            DataType::Date64 => ScalarValue::Date64(Some(0)),
1596            _ => {
1597                return _not_impl_err!(
1598                    "Can't create a zero scalar from data_type \"{datatype}\""
1599                );
1600            }
1601        })
1602    }
1603
1604    /// Returns a default value for the given `DataType`.
1605    ///
1606    /// This function is useful when you need to initialize a column with
1607    /// non-null values in a DataFrame or when you need a "zero" value
1608    /// for a specific data type.
1609    ///
1610    /// # Default Values
1611    ///
1612    /// - **Numeric types**: Returns zero (via [`new_zero`])
1613    /// - **String types**: Returns empty string (`""`)
1614    /// - **Binary types**: Returns empty byte array
1615    /// - **Temporal types**: Returns zero/epoch value
1616    /// - **List types**: Returns empty list
1617    /// - **Struct types**: Returns struct with all fields set to their defaults
1618    /// - **Dictionary types**: Returns dictionary with default value
1619    /// - **Map types**: Returns empty map
1620    /// - **Union types**: Returns first variant with default value
1621    ///
1622    /// # Errors
1623    ///
1624    /// Returns an error for data types that don't have a clear default value
1625    /// or are not yet supported (e.g., `RunEndEncoded`).
1626    ///
1627    /// [`new_zero`]: Self::new_zero
1628    pub fn new_default(datatype: &DataType) -> Result<ScalarValue> {
1629        match datatype {
1630            // Null type
1631            DataType::Null => Ok(ScalarValue::Null),
1632
1633            // Numeric types
1634            DataType::Boolean
1635            | DataType::Int8
1636            | DataType::Int16
1637            | DataType::Int32
1638            | DataType::Int64
1639            | DataType::UInt8
1640            | DataType::UInt16
1641            | DataType::UInt32
1642            | DataType::UInt64
1643            | DataType::Float16
1644            | DataType::Float32
1645            | DataType::Float64
1646            | DataType::Decimal32(_, _)
1647            | DataType::Decimal64(_, _)
1648            | DataType::Decimal128(_, _)
1649            | DataType::Decimal256(_, _)
1650            | DataType::Timestamp(_, _)
1651            | DataType::Time32(_)
1652            | DataType::Time64(_)
1653            | DataType::Interval(_)
1654            | DataType::Duration(_)
1655            | DataType::Date32
1656            | DataType::Date64 => ScalarValue::new_zero(datatype),
1657
1658            // String types
1659            DataType::Utf8 => Ok(ScalarValue::Utf8(Some("".to_string()))),
1660            DataType::LargeUtf8 => Ok(ScalarValue::LargeUtf8(Some("".to_string()))),
1661            DataType::Utf8View => Ok(ScalarValue::Utf8View(Some("".to_string()))),
1662
1663            // Binary types
1664            DataType::Binary => Ok(ScalarValue::Binary(Some(vec![]))),
1665            DataType::LargeBinary => Ok(ScalarValue::LargeBinary(Some(vec![]))),
1666            DataType::BinaryView => Ok(ScalarValue::BinaryView(Some(vec![]))),
1667
1668            // Fixed-size binary
1669            DataType::FixedSizeBinary(size) => Ok(ScalarValue::FixedSizeBinary(
1670                *size,
1671                Some(vec![0; *size as usize]),
1672            )),
1673
1674            // List types
1675            DataType::List(field) => {
1676                let list =
1677                    ScalarValue::new_list(&[], field.data_type(), field.is_nullable());
1678                Ok(ScalarValue::List(list))
1679            }
1680            DataType::FixedSizeList(field, _size) => {
1681                let empty_arr = new_empty_array(field.data_type());
1682                let values = Arc::new(
1683                    SingleRowListArrayBuilder::new(empty_arr)
1684                        .with_field(field)
1685                        .build_fixed_size_list_array(0),
1686                );
1687                Ok(ScalarValue::FixedSizeList(values))
1688            }
1689            DataType::LargeList(field) => {
1690                let list = ScalarValue::new_large_list(&[], field.data_type());
1691                Ok(ScalarValue::LargeList(list))
1692            }
1693            DataType::ListView(field) => {
1694                let empty_arr = new_empty_array(field.data_type());
1695                let values = Arc::new(
1696                    SingleRowListArrayBuilder::new(empty_arr)
1697                        .with_field(field)
1698                        .build_list_view_array(),
1699                );
1700                Ok(ScalarValue::ListView(values))
1701            }
1702            DataType::LargeListView(field) => {
1703                let empty_arr = new_empty_array(field.data_type());
1704                let values = Arc::new(
1705                    SingleRowListArrayBuilder::new(empty_arr)
1706                        .with_field(field)
1707                        .build_large_list_view_array(),
1708                );
1709                Ok(ScalarValue::LargeListView(values))
1710            }
1711
1712            // Struct types
1713            DataType::Struct(fields) => {
1714                let values = fields
1715                    .iter()
1716                    .map(|f| ScalarValue::new_default(f.data_type()))
1717                    .collect::<Result<Vec<_>>>()?;
1718                Ok(ScalarValue::Struct(Arc::new(StructArray::new(
1719                    fields.clone(),
1720                    values
1721                        .into_iter()
1722                        .map(|v| v.to_array())
1723                        .collect::<Result<_>>()?,
1724                    None,
1725                ))))
1726            }
1727
1728            // Dictionary types
1729            DataType::Dictionary(key_type, value_type) => Ok(ScalarValue::Dictionary(
1730                key_type.clone(),
1731                Box::new(ScalarValue::new_default(value_type)?),
1732            )),
1733
1734            DataType::RunEndEncoded(run_ends_field, value_field) => {
1735                Ok(ScalarValue::RunEndEncoded(
1736                    Arc::clone(run_ends_field),
1737                    Arc::clone(value_field),
1738                    Box::new(ScalarValue::new_default(value_field.data_type())?),
1739                ))
1740            }
1741
1742            // Map types
1743            DataType::Map(field, _) => Ok(ScalarValue::Map(Arc::new(MapArray::from(
1744                ArrayData::new_empty(field.data_type()),
1745            )))),
1746
1747            // Union types - return first variant with default value
1748            DataType::Union(fields, mode) => {
1749                if let Some((type_id, field)) = fields.iter().next() {
1750                    let default_value = ScalarValue::new_default(field.data_type())?;
1751                    Ok(ScalarValue::Union(
1752                        Some((type_id, Box::new(default_value))),
1753                        fields.clone(),
1754                        *mode,
1755                    ))
1756                } else {
1757                    _internal_err!("Union type must have at least one field")
1758                }
1759            }
1760        }
1761    }
1762
1763    /// Create an one value in the given type.
1764    pub fn new_one(datatype: &DataType) -> Result<ScalarValue> {
1765        Ok(match datatype {
1766            DataType::Int8 => ScalarValue::Int8(Some(1)),
1767            DataType::Int16 => ScalarValue::Int16(Some(1)),
1768            DataType::Int32 => ScalarValue::Int32(Some(1)),
1769            DataType::Int64 => ScalarValue::Int64(Some(1)),
1770            DataType::UInt8 => ScalarValue::UInt8(Some(1)),
1771            DataType::UInt16 => ScalarValue::UInt16(Some(1)),
1772            DataType::UInt32 => ScalarValue::UInt32(Some(1)),
1773            DataType::UInt64 => ScalarValue::UInt64(Some(1)),
1774            DataType::Float16 => ScalarValue::Float16(Some(f16::ONE)),
1775            DataType::Float32 => ScalarValue::Float32(Some(1.0)),
1776            DataType::Float64 => ScalarValue::Float64(Some(1.0)),
1777            DataType::Decimal32(precision, scale) => {
1778                Self::validate_decimal_or_internal_err::<Decimal32Type>(
1779                    *precision, *scale,
1780                )?;
1781                assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
1782                match 10_i32.checked_pow(*scale as u32) {
1783                    Some(value) => {
1784                        ScalarValue::Decimal32(Some(value), *precision, *scale)
1785                    }
1786                    None => return _internal_err!("Unsupported scale {scale}"),
1787                }
1788            }
1789            DataType::Decimal64(precision, scale) => {
1790                Self::validate_decimal_or_internal_err::<Decimal64Type>(
1791                    *precision, *scale,
1792                )?;
1793                assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
1794                match i64::from(10).checked_pow(*scale as u32) {
1795                    Some(value) => {
1796                        ScalarValue::Decimal64(Some(value), *precision, *scale)
1797                    }
1798                    None => return _internal_err!("Unsupported scale {scale}"),
1799                }
1800            }
1801            DataType::Decimal128(precision, scale) => {
1802                Self::validate_decimal_or_internal_err::<Decimal128Type>(
1803                    *precision, *scale,
1804                )?;
1805                assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
1806                match i128::from(10).checked_pow(*scale as u32) {
1807                    Some(value) => {
1808                        ScalarValue::Decimal128(Some(value), *precision, *scale)
1809                    }
1810                    None => return _internal_err!("Unsupported scale {scale}"),
1811                }
1812            }
1813            DataType::Decimal256(precision, scale) => {
1814                Self::validate_decimal_or_internal_err::<Decimal256Type>(
1815                    *precision, *scale,
1816                )?;
1817                assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
1818                match i256::from(10).checked_pow(*scale as u32) {
1819                    Some(value) => {
1820                        ScalarValue::Decimal256(Some(value), *precision, *scale)
1821                    }
1822                    None => return _internal_err!("Unsupported scale {scale}"),
1823                }
1824            }
1825            _ => {
1826                return _not_impl_err!(
1827                    "Can't create an one scalar from data_type \"{datatype}\""
1828                );
1829            }
1830        })
1831    }
1832
1833    /// Create a negative one value in the given type.
1834    pub fn new_negative_one(datatype: &DataType) -> Result<ScalarValue> {
1835        Ok(match datatype {
1836            DataType::Int8 | DataType::UInt8 => ScalarValue::Int8(Some(-1)),
1837            DataType::Int16 | DataType::UInt16 => ScalarValue::Int16(Some(-1)),
1838            DataType::Int32 | DataType::UInt32 => ScalarValue::Int32(Some(-1)),
1839            DataType::Int64 | DataType::UInt64 => ScalarValue::Int64(Some(-1)),
1840            DataType::Float16 => ScalarValue::Float16(Some(f16::NEG_ONE)),
1841            DataType::Float32 => ScalarValue::Float32(Some(-1.0)),
1842            DataType::Float64 => ScalarValue::Float64(Some(-1.0)),
1843            DataType::Decimal32(precision, scale) => {
1844                Self::validate_decimal_or_internal_err::<Decimal32Type>(
1845                    *precision, *scale,
1846                )?;
1847                assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
1848                match 10_i32.checked_pow(*scale as u32) {
1849                    Some(value) => {
1850                        ScalarValue::Decimal32(Some(-value), *precision, *scale)
1851                    }
1852                    None => return _internal_err!("Unsupported scale {scale}"),
1853                }
1854            }
1855            DataType::Decimal64(precision, scale) => {
1856                Self::validate_decimal_or_internal_err::<Decimal64Type>(
1857                    *precision, *scale,
1858                )?;
1859                assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
1860                match i64::from(10).checked_pow(*scale as u32) {
1861                    Some(value) => {
1862                        ScalarValue::Decimal64(Some(-value), *precision, *scale)
1863                    }
1864                    None => return _internal_err!("Unsupported scale {scale}"),
1865                }
1866            }
1867            DataType::Decimal128(precision, scale) => {
1868                Self::validate_decimal_or_internal_err::<Decimal128Type>(
1869                    *precision, *scale,
1870                )?;
1871                assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
1872                match i128::from(10).checked_pow(*scale as u32) {
1873                    Some(value) => {
1874                        ScalarValue::Decimal128(Some(-value), *precision, *scale)
1875                    }
1876                    None => return _internal_err!("Unsupported scale {scale}"),
1877                }
1878            }
1879            DataType::Decimal256(precision, scale) => {
1880                Self::validate_decimal_or_internal_err::<Decimal256Type>(
1881                    *precision, *scale,
1882                )?;
1883                assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
1884                match i256::from(10).checked_pow(*scale as u32) {
1885                    Some(value) => {
1886                        ScalarValue::Decimal256(Some(-value), *precision, *scale)
1887                    }
1888                    None => return _internal_err!("Unsupported scale {scale}"),
1889                }
1890            }
1891            _ => {
1892                return _not_impl_err!(
1893                    "Can't create a negative one scalar from data_type \"{datatype}\""
1894                );
1895            }
1896        })
1897    }
1898
1899    pub fn new_ten(datatype: &DataType) -> Result<ScalarValue> {
1900        Ok(match datatype {
1901            DataType::Int8 => ScalarValue::Int8(Some(10)),
1902            DataType::Int16 => ScalarValue::Int16(Some(10)),
1903            DataType::Int32 => ScalarValue::Int32(Some(10)),
1904            DataType::Int64 => ScalarValue::Int64(Some(10)),
1905            DataType::UInt8 => ScalarValue::UInt8(Some(10)),
1906            DataType::UInt16 => ScalarValue::UInt16(Some(10)),
1907            DataType::UInt32 => ScalarValue::UInt32(Some(10)),
1908            DataType::UInt64 => ScalarValue::UInt64(Some(10)),
1909            DataType::Float16 => ScalarValue::Float16(Some(f16::from_f32(10.0))),
1910            DataType::Float32 => ScalarValue::Float32(Some(10.0)),
1911            DataType::Float64 => ScalarValue::Float64(Some(10.0)),
1912            DataType::Decimal32(precision, scale) => {
1913                Self::validate_decimal_or_internal_err::<Decimal32Type>(
1914                    *precision, *scale,
1915                )?;
1916                assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
1917                match 10_i32.checked_pow((*scale + 1) as u32) {
1918                    Some(value) => {
1919                        ScalarValue::Decimal32(Some(value), *precision, *scale)
1920                    }
1921                    None => return _internal_err!("Unsupported scale {scale}"),
1922                }
1923            }
1924            DataType::Decimal64(precision, scale) => {
1925                Self::validate_decimal_or_internal_err::<Decimal64Type>(
1926                    *precision, *scale,
1927                )?;
1928                assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
1929                match i64::from(10).checked_pow((*scale + 1) as u32) {
1930                    Some(value) => {
1931                        ScalarValue::Decimal64(Some(value), *precision, *scale)
1932                    }
1933                    None => return _internal_err!("Unsupported scale {scale}"),
1934                }
1935            }
1936            DataType::Decimal128(precision, scale) => {
1937                Self::validate_decimal_or_internal_err::<Decimal128Type>(
1938                    *precision, *scale,
1939                )?;
1940                assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
1941                match i128::from(10).checked_pow((*scale + 1) as u32) {
1942                    Some(value) => {
1943                        ScalarValue::Decimal128(Some(value), *precision, *scale)
1944                    }
1945                    None => return _internal_err!("Unsupported scale {scale}"),
1946                }
1947            }
1948            DataType::Decimal256(precision, scale) => {
1949                Self::validate_decimal_or_internal_err::<Decimal256Type>(
1950                    *precision, *scale,
1951                )?;
1952                assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
1953                match i256::from(10).checked_pow((*scale + 1) as u32) {
1954                    Some(value) => {
1955                        ScalarValue::Decimal256(Some(value), *precision, *scale)
1956                    }
1957                    None => return _internal_err!("Unsupported scale {scale}"),
1958                }
1959            }
1960            _ => {
1961                return _not_impl_err!(
1962                    "Can't create a ten scalar from data_type \"{datatype}\""
1963                );
1964            }
1965        })
1966    }
1967
1968    /// return the [`DataType`] of this `ScalarValue`
1969    pub fn data_type(&self) -> DataType {
1970        match self {
1971            ScalarValue::Boolean(_) => DataType::Boolean,
1972            ScalarValue::UInt8(_) => DataType::UInt8,
1973            ScalarValue::UInt16(_) => DataType::UInt16,
1974            ScalarValue::UInt32(_) => DataType::UInt32,
1975            ScalarValue::UInt64(_) => DataType::UInt64,
1976            ScalarValue::Int8(_) => DataType::Int8,
1977            ScalarValue::Int16(_) => DataType::Int16,
1978            ScalarValue::Int32(_) => DataType::Int32,
1979            ScalarValue::Int64(_) => DataType::Int64,
1980            ScalarValue::Decimal32(_, precision, scale) => {
1981                DataType::Decimal32(*precision, *scale)
1982            }
1983            ScalarValue::Decimal64(_, precision, scale) => {
1984                DataType::Decimal64(*precision, *scale)
1985            }
1986            ScalarValue::Decimal128(_, precision, scale) => {
1987                DataType::Decimal128(*precision, *scale)
1988            }
1989            ScalarValue::Decimal256(_, precision, scale) => {
1990                DataType::Decimal256(*precision, *scale)
1991            }
1992            ScalarValue::TimestampSecond(_, tz_opt) => {
1993                DataType::Timestamp(TimeUnit::Second, tz_opt.clone())
1994            }
1995            ScalarValue::TimestampMillisecond(_, tz_opt) => {
1996                DataType::Timestamp(TimeUnit::Millisecond, tz_opt.clone())
1997            }
1998            ScalarValue::TimestampMicrosecond(_, tz_opt) => {
1999                DataType::Timestamp(TimeUnit::Microsecond, tz_opt.clone())
2000            }
2001            ScalarValue::TimestampNanosecond(_, tz_opt) => {
2002                DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone())
2003            }
2004            ScalarValue::Float16(_) => DataType::Float16,
2005            ScalarValue::Float32(_) => DataType::Float32,
2006            ScalarValue::Float64(_) => DataType::Float64,
2007            ScalarValue::Utf8(_) => DataType::Utf8,
2008            ScalarValue::LargeUtf8(_) => DataType::LargeUtf8,
2009            ScalarValue::Utf8View(_) => DataType::Utf8View,
2010            ScalarValue::Binary(_) => DataType::Binary,
2011            ScalarValue::BinaryView(_) => DataType::BinaryView,
2012            ScalarValue::FixedSizeBinary(sz, _) => DataType::FixedSizeBinary(*sz),
2013            ScalarValue::LargeBinary(_) => DataType::LargeBinary,
2014            ScalarValue::List(arr) => arr.data_type().to_owned(),
2015            ScalarValue::LargeList(arr) => arr.data_type().to_owned(),
2016            ScalarValue::FixedSizeList(arr) => arr.data_type().to_owned(),
2017            ScalarValue::ListView(arr) => arr.data_type().to_owned(),
2018            ScalarValue::LargeListView(arr) => arr.data_type().to_owned(),
2019            ScalarValue::Struct(arr) => arr.data_type().to_owned(),
2020            ScalarValue::Map(arr) => arr.data_type().to_owned(),
2021            ScalarValue::Date32(_) => DataType::Date32,
2022            ScalarValue::Date64(_) => DataType::Date64,
2023            ScalarValue::Time32Second(_) => DataType::Time32(TimeUnit::Second),
2024            ScalarValue::Time32Millisecond(_) => DataType::Time32(TimeUnit::Millisecond),
2025            ScalarValue::Time64Microsecond(_) => DataType::Time64(TimeUnit::Microsecond),
2026            ScalarValue::Time64Nanosecond(_) => DataType::Time64(TimeUnit::Nanosecond),
2027            ScalarValue::IntervalYearMonth(_) => {
2028                DataType::Interval(IntervalUnit::YearMonth)
2029            }
2030            ScalarValue::IntervalDayTime(_) => DataType::Interval(IntervalUnit::DayTime),
2031            ScalarValue::IntervalMonthDayNano(_) => {
2032                DataType::Interval(IntervalUnit::MonthDayNano)
2033            }
2034            ScalarValue::DurationSecond(_) => DataType::Duration(TimeUnit::Second),
2035            ScalarValue::DurationMillisecond(_) => {
2036                DataType::Duration(TimeUnit::Millisecond)
2037            }
2038            ScalarValue::DurationMicrosecond(_) => {
2039                DataType::Duration(TimeUnit::Microsecond)
2040            }
2041            ScalarValue::DurationNanosecond(_) => {
2042                DataType::Duration(TimeUnit::Nanosecond)
2043            }
2044            ScalarValue::Union(_, fields, mode) => DataType::Union(fields.clone(), *mode),
2045            ScalarValue::Dictionary(k, v) => {
2046                DataType::Dictionary(k.clone(), Box::new(v.data_type()))
2047            }
2048            ScalarValue::RunEndEncoded(run_ends_field, value_field, _) => {
2049                DataType::RunEndEncoded(
2050                    Arc::clone(run_ends_field),
2051                    Arc::clone(value_field),
2052                )
2053            }
2054            ScalarValue::Null => DataType::Null,
2055        }
2056    }
2057
2058    #[inline]
2059    fn can_use_direct_add(lhs: &ScalarValue, rhs: &ScalarValue) -> bool {
2060        matches!(
2061            (lhs, rhs),
2062            (ScalarValue::Int8(_), ScalarValue::Int8(_))
2063                | (ScalarValue::Int16(_), ScalarValue::Int16(_))
2064                | (ScalarValue::Int32(_), ScalarValue::Int32(_))
2065                | (ScalarValue::Int64(_), ScalarValue::Int64(_))
2066                | (ScalarValue::UInt8(_), ScalarValue::UInt8(_))
2067                | (ScalarValue::UInt16(_), ScalarValue::UInt16(_))
2068                | (ScalarValue::UInt32(_), ScalarValue::UInt32(_))
2069                | (ScalarValue::UInt64(_), ScalarValue::UInt64(_))
2070                | (ScalarValue::Float16(_), ScalarValue::Float16(_))
2071                | (ScalarValue::Float32(_), ScalarValue::Float32(_))
2072                | (ScalarValue::Float64(_), ScalarValue::Float64(_))
2073                | (
2074                    ScalarValue::Decimal32(_, _, _),
2075                    ScalarValue::Decimal32(_, _, _)
2076                )
2077                | (
2078                    ScalarValue::Decimal64(_, _, _),
2079                    ScalarValue::Decimal64(_, _, _)
2080                )
2081                | (
2082                    ScalarValue::Decimal128(_, _, _),
2083                    ScalarValue::Decimal128(_, _, _),
2084                )
2085                | (
2086                    ScalarValue::Decimal256(_, _, _),
2087                    ScalarValue::Decimal256(_, _, _),
2088                )
2089        )
2090    }
2091
2092    #[inline]
2093    fn add_optional<T: ArrowNativeTypeOp>(
2094        lhs: &mut Option<T>,
2095        rhs: Option<T>,
2096        checked: bool,
2097    ) -> Result<()> {
2098        match rhs {
2099            Some(rhs) => {
2100                if let Some(lhs) = lhs.as_mut() {
2101                    *lhs = if checked {
2102                        lhs.add_checked(rhs).map_err(|e| arrow_datafusion_err!(e))?
2103                    } else {
2104                        lhs.add_wrapping(rhs)
2105                    };
2106                }
2107            }
2108            None => *lhs = None,
2109        }
2110        Ok(())
2111    }
2112
2113    #[inline]
2114    fn add_decimal_values<T: DecimalType>(
2115        lhs_value: &mut Option<T::Native>,
2116        lhs_precision: &mut u8,
2117        lhs_scale: &mut i8,
2118        rhs_value: Option<T::Native>,
2119        rhs_precision: u8,
2120        rhs_scale: i8,
2121    ) -> Result<()>
2122    where
2123        T::Native: ArrowNativeTypeOp,
2124    {
2125        Self::validate_decimal_or_internal_err::<T>(*lhs_precision, *lhs_scale)?;
2126        Self::validate_decimal_or_internal_err::<T>(rhs_precision, rhs_scale)?;
2127
2128        let result_scale = (*lhs_scale).max(rhs_scale);
2129        // Decimal scales can be negative, so use a wider signed type for the
2130        // intermediate precision arithmetic.
2131        let lhs_precision_delta = i16::from(*lhs_precision) - i16::from(*lhs_scale);
2132        let rhs_precision_delta = i16::from(rhs_precision) - i16::from(rhs_scale);
2133        let result_precision =
2134            (i16::from(result_scale) + lhs_precision_delta.max(rhs_precision_delta) + 1)
2135                .min(i16::from(T::MAX_PRECISION)) as u8;
2136
2137        Self::validate_decimal_or_internal_err::<T>(result_precision, result_scale)?;
2138
2139        let lhs_mul = T::Native::usize_as(10)
2140            .pow_checked((result_scale - *lhs_scale) as u32)
2141            .map_err(|e| arrow_datafusion_err!(e))?;
2142        let rhs_mul = T::Native::usize_as(10)
2143            .pow_checked((result_scale - rhs_scale) as u32)
2144            .map_err(|e| arrow_datafusion_err!(e))?;
2145
2146        let result_value = match (*lhs_value, rhs_value) {
2147            (Some(lhs_value), Some(rhs_value)) => Some(
2148                lhs_value
2149                    .mul_checked(lhs_mul)
2150                    .and_then(|lhs| {
2151                        rhs_value
2152                            .mul_checked(rhs_mul)
2153                            .and_then(|rhs| lhs.add_checked(rhs))
2154                    })
2155                    .map_err(|e| arrow_datafusion_err!(e))?,
2156            ),
2157            _ => None,
2158        };
2159
2160        *lhs_value = result_value;
2161        *lhs_precision = result_precision;
2162        *lhs_scale = result_scale;
2163
2164        Ok(())
2165    }
2166
2167    #[inline]
2168    fn try_add_in_place_impl(
2169        &mut self,
2170        other: &ScalarValue,
2171        checked: bool,
2172    ) -> Result<bool> {
2173        match (self, other) {
2174            (ScalarValue::Int8(lhs), ScalarValue::Int8(rhs)) => {
2175                Self::add_optional(lhs, *rhs, checked)?;
2176            }
2177            (ScalarValue::Int16(lhs), ScalarValue::Int16(rhs)) => {
2178                Self::add_optional(lhs, *rhs, checked)?;
2179            }
2180            (ScalarValue::Int32(lhs), ScalarValue::Int32(rhs)) => {
2181                Self::add_optional(lhs, *rhs, checked)?;
2182            }
2183            (ScalarValue::Int64(lhs), ScalarValue::Int64(rhs)) => {
2184                Self::add_optional(lhs, *rhs, checked)?;
2185            }
2186            (ScalarValue::UInt8(lhs), ScalarValue::UInt8(rhs)) => {
2187                Self::add_optional(lhs, *rhs, checked)?;
2188            }
2189            (ScalarValue::UInt16(lhs), ScalarValue::UInt16(rhs)) => {
2190                Self::add_optional(lhs, *rhs, checked)?;
2191            }
2192            (ScalarValue::UInt32(lhs), ScalarValue::UInt32(rhs)) => {
2193                Self::add_optional(lhs, *rhs, checked)?;
2194            }
2195            (ScalarValue::UInt64(lhs), ScalarValue::UInt64(rhs)) => {
2196                Self::add_optional(lhs, *rhs, checked)?;
2197            }
2198            (ScalarValue::Float16(lhs), ScalarValue::Float16(rhs)) => {
2199                Self::add_optional(lhs, *rhs, checked)?;
2200            }
2201            (ScalarValue::Float32(lhs), ScalarValue::Float32(rhs)) => {
2202                Self::add_optional(lhs, *rhs, checked)?;
2203            }
2204            (ScalarValue::Float64(lhs), ScalarValue::Float64(rhs)) => {
2205                Self::add_optional(lhs, *rhs, checked)?;
2206            }
2207            (
2208                ScalarValue::Decimal32(lhs, p, s),
2209                ScalarValue::Decimal32(rhs, rhs_p, rhs_s),
2210            ) => {
2211                Self::add_decimal_values::<Decimal32Type>(
2212                    lhs, p, s, *rhs, *rhs_p, *rhs_s,
2213                )?;
2214            }
2215            (
2216                ScalarValue::Decimal64(lhs, p, s),
2217                ScalarValue::Decimal64(rhs, rhs_p, rhs_s),
2218            ) => {
2219                Self::add_decimal_values::<Decimal64Type>(
2220                    lhs, p, s, *rhs, *rhs_p, *rhs_s,
2221                )?;
2222            }
2223            (
2224                ScalarValue::Decimal128(lhs, p, s),
2225                ScalarValue::Decimal128(rhs, rhs_p, rhs_s),
2226            ) => {
2227                Self::add_decimal_values::<Decimal128Type>(
2228                    lhs, p, s, *rhs, *rhs_p, *rhs_s,
2229                )?;
2230            }
2231            (
2232                ScalarValue::Decimal256(lhs, p, s),
2233                ScalarValue::Decimal256(rhs, rhs_p, rhs_s),
2234            ) => {
2235                Self::add_decimal_values::<Decimal256Type>(
2236                    lhs, p, s, *rhs, *rhs_p, *rhs_s,
2237                )?;
2238            }
2239            _ => return Ok(false),
2240        }
2241
2242        Ok(true)
2243    }
2244
2245    #[inline]
2246    pub(crate) fn try_add_wrapping_in_place(
2247        &mut self,
2248        other: &ScalarValue,
2249    ) -> Result<bool> {
2250        self.try_add_in_place_impl(other, false)
2251    }
2252
2253    #[inline]
2254    pub(crate) fn try_add_checked_in_place(
2255        &mut self,
2256        other: &ScalarValue,
2257    ) -> Result<bool> {
2258        self.try_add_in_place_impl(other, true)
2259    }
2260
2261    /// Calculate arithmetic negation for a scalar value
2262    pub fn arithmetic_negate(&self) -> Result<Self> {
2263        fn neg_checked_with_ctx<T: ArrowNativeTypeOp>(
2264            v: T,
2265            ctx: impl Fn() -> String,
2266        ) -> Result<T> {
2267            v.neg_checked()
2268                .map_err(|e| arrow_datafusion_err!(e).context(ctx()))
2269        }
2270        match self {
2271            ScalarValue::Int8(None)
2272            | ScalarValue::Int16(None)
2273            | ScalarValue::Int32(None)
2274            | ScalarValue::Int64(None)
2275            | ScalarValue::Float16(None)
2276            | ScalarValue::Float32(None)
2277            | ScalarValue::Float64(None) => Ok(self.clone()),
2278            ScalarValue::Float16(Some(v)) => Ok(ScalarValue::Float16(Some(-v))),
2279            ScalarValue::Float64(Some(v)) => Ok(ScalarValue::Float64(Some(-v))),
2280            ScalarValue::Float32(Some(v)) => Ok(ScalarValue::Float32(Some(-v))),
2281            ScalarValue::Int8(Some(v)) => Ok(ScalarValue::Int8(Some(v.neg_checked()?))),
2282            ScalarValue::Int16(Some(v)) => Ok(ScalarValue::Int16(Some(v.neg_checked()?))),
2283            ScalarValue::Int32(Some(v)) => Ok(ScalarValue::Int32(Some(v.neg_checked()?))),
2284            ScalarValue::Int64(Some(v)) => Ok(ScalarValue::Int64(Some(v.neg_checked()?))),
2285            ScalarValue::IntervalYearMonth(Some(v)) => Ok(
2286                ScalarValue::IntervalYearMonth(Some(neg_checked_with_ctx(*v, || {
2287                    format!("In negation of IntervalYearMonth({v})")
2288                })?)),
2289            ),
2290            ScalarValue::IntervalDayTime(Some(v)) => {
2291                let (days, ms) = IntervalDayTimeType::to_parts(*v);
2292                let val = IntervalDayTimeType::make_value(
2293                    neg_checked_with_ctx(days, || {
2294                        format!("In negation of days {days} in IntervalDayTime")
2295                    })?,
2296                    neg_checked_with_ctx(ms, || {
2297                        format!("In negation of milliseconds {ms} in IntervalDayTime")
2298                    })?,
2299                );
2300                Ok(ScalarValue::IntervalDayTime(Some(val)))
2301            }
2302            ScalarValue::IntervalMonthDayNano(Some(v)) => {
2303                let (months, days, nanos) = IntervalMonthDayNanoType::to_parts(*v);
2304                let val = IntervalMonthDayNanoType::make_value(
2305                    neg_checked_with_ctx(months, || {
2306                        format!("In negation of months {months} of IntervalMonthDayNano")
2307                    })?,
2308                    neg_checked_with_ctx(days, || {
2309                        format!("In negation of days {days} of IntervalMonthDayNano")
2310                    })?,
2311                    neg_checked_with_ctx(nanos, || {
2312                        format!("In negation of nanos {nanos} of IntervalMonthDayNano")
2313                    })?,
2314                );
2315                Ok(ScalarValue::IntervalMonthDayNano(Some(val)))
2316            }
2317            ScalarValue::Decimal32(Some(v), precision, scale) => {
2318                Ok(ScalarValue::Decimal32(
2319                    Some(neg_checked_with_ctx(*v, || {
2320                        format!("In negation of Decimal32({v}, {precision}, {scale})")
2321                    })?),
2322                    *precision,
2323                    *scale,
2324                ))
2325            }
2326            ScalarValue::Decimal64(Some(v), precision, scale) => {
2327                Ok(ScalarValue::Decimal64(
2328                    Some(neg_checked_with_ctx(*v, || {
2329                        format!("In negation of Decimal64({v}, {precision}, {scale})")
2330                    })?),
2331                    *precision,
2332                    *scale,
2333                ))
2334            }
2335            ScalarValue::Decimal128(Some(v), precision, scale) => {
2336                Ok(ScalarValue::Decimal128(
2337                    Some(neg_checked_with_ctx(*v, || {
2338                        format!("In negation of Decimal128({v}, {precision}, {scale})")
2339                    })?),
2340                    *precision,
2341                    *scale,
2342                ))
2343            }
2344            ScalarValue::Decimal256(Some(v), precision, scale) => {
2345                Ok(ScalarValue::Decimal256(
2346                    Some(neg_checked_with_ctx(*v, || {
2347                        format!("In negation of Decimal256({v}, {precision}, {scale})")
2348                    })?),
2349                    *precision,
2350                    *scale,
2351                ))
2352            }
2353            ScalarValue::TimestampSecond(Some(v), tz) => {
2354                Ok(ScalarValue::TimestampSecond(
2355                    Some(neg_checked_with_ctx(*v, || {
2356                        format!("In negation of TimestampSecond({v})")
2357                    })?),
2358                    tz.clone(),
2359                ))
2360            }
2361            ScalarValue::TimestampNanosecond(Some(v), tz) => {
2362                Ok(ScalarValue::TimestampNanosecond(
2363                    Some(neg_checked_with_ctx(*v, || {
2364                        format!("In negation of TimestampNanoSecond({v})")
2365                    })?),
2366                    tz.clone(),
2367                ))
2368            }
2369            ScalarValue::TimestampMicrosecond(Some(v), tz) => {
2370                Ok(ScalarValue::TimestampMicrosecond(
2371                    Some(neg_checked_with_ctx(*v, || {
2372                        format!("In negation of TimestampMicroSecond({v})")
2373                    })?),
2374                    tz.clone(),
2375                ))
2376            }
2377            ScalarValue::TimestampMillisecond(Some(v), tz) => {
2378                Ok(ScalarValue::TimestampMillisecond(
2379                    Some(neg_checked_with_ctx(*v, || {
2380                        format!("In negation of TimestampMilliSecond({v})")
2381                    })?),
2382                    tz.clone(),
2383                ))
2384            }
2385            value => _internal_err!(
2386                "Can not run arithmetic negative on scalar value {value:?}"
2387            ),
2388        }
2389    }
2390
2391    /// Wrapping addition of `ScalarValue`
2392    ///
2393    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
2394    /// should operate on Arrays directly, using vectorized array kernels
2395    pub fn add<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
2396        let other = other.borrow();
2397        if Self::can_use_direct_add(self, other) {
2398            let mut result = self.clone();
2399            if result.try_add_wrapping_in_place(other)? {
2400                return Ok(result);
2401            }
2402            debug_assert!(false, "fast-path eligibility drifted from implementation");
2403        }
2404
2405        let r = add_wrapping(&self.to_scalar()?, &other.to_scalar()?)?;
2406        Self::try_from_array(r.as_ref(), 0)
2407    }
2408
2409    /// Checked addition of `ScalarValue`
2410    ///
2411    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
2412    /// should operate on Arrays directly, using vectorized array kernels
2413    pub fn add_checked<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
2414        let other = other.borrow();
2415        if Self::can_use_direct_add(self, other) {
2416            let mut result = self.clone();
2417            if result.try_add_checked_in_place(other)? {
2418                return Ok(result);
2419            }
2420            debug_assert!(false, "fast-path eligibility drifted from implementation");
2421        }
2422
2423        let r = add(&self.to_scalar()?, &other.to_scalar()?)?;
2424        Self::try_from_array(r.as_ref(), 0)
2425    }
2426
2427    /// Wrapping subtraction of `ScalarValue`
2428    ///
2429    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
2430    /// should operate on Arrays directly, using vectorized array kernels
2431    pub fn sub<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
2432        let r = sub_wrapping(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
2433        Self::try_from_array(r.as_ref(), 0)
2434    }
2435
2436    /// Checked subtraction of `ScalarValue`
2437    ///
2438    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
2439    /// should operate on Arrays directly, using vectorized array kernels
2440    pub fn sub_checked<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
2441        let r = sub(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
2442        Self::try_from_array(r.as_ref(), 0)
2443    }
2444
2445    /// Wrapping multiplication of `ScalarValue`
2446    ///
2447    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
2448    /// should operate on Arrays directly, using vectorized array kernels.
2449    pub fn mul<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
2450        let r = mul_wrapping(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
2451        Self::try_from_array(r.as_ref(), 0)
2452    }
2453
2454    /// Checked multiplication of `ScalarValue`
2455    ///
2456    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
2457    /// should operate on Arrays directly, using vectorized array kernels.
2458    pub fn mul_checked<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
2459        let r = mul(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
2460        Self::try_from_array(r.as_ref(), 0)
2461    }
2462
2463    /// Performs `lhs / rhs`
2464    ///
2465    /// Overflow or division by zero will result in an error, with exception to
2466    /// floating point numbers, which instead follow the IEEE 754 rules.
2467    ///
2468    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
2469    /// should operate on Arrays directly, using vectorized array kernels.
2470    pub fn div<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
2471        let r = div(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
2472        Self::try_from_array(r.as_ref(), 0)
2473    }
2474
2475    /// Performs `lhs % rhs`
2476    ///
2477    /// Overflow or division by zero will result in an error, with exception to
2478    /// floating point numbers, which instead follow the IEEE 754 rules.
2479    ///
2480    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
2481    /// should operate on Arrays directly, using vectorized array kernels.
2482    pub fn rem<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
2483        let r = rem(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
2484        Self::try_from_array(r.as_ref(), 0)
2485    }
2486
2487    pub fn is_unsigned(&self) -> bool {
2488        matches!(
2489            self,
2490            ScalarValue::UInt8(_)
2491                | ScalarValue::UInt16(_)
2492                | ScalarValue::UInt32(_)
2493                | ScalarValue::UInt64(_)
2494        )
2495    }
2496
2497    /// whether this value is null or not.
2498    pub fn is_null(&self) -> bool {
2499        match self {
2500            ScalarValue::Boolean(v) => v.is_none(),
2501            ScalarValue::Null => true,
2502            ScalarValue::Float16(v) => v.is_none(),
2503            ScalarValue::Float32(v) => v.is_none(),
2504            ScalarValue::Float64(v) => v.is_none(),
2505            ScalarValue::Decimal32(v, _, _) => v.is_none(),
2506            ScalarValue::Decimal64(v, _, _) => v.is_none(),
2507            ScalarValue::Decimal128(v, _, _) => v.is_none(),
2508            ScalarValue::Decimal256(v, _, _) => v.is_none(),
2509            ScalarValue::Int8(v) => v.is_none(),
2510            ScalarValue::Int16(v) => v.is_none(),
2511            ScalarValue::Int32(v) => v.is_none(),
2512            ScalarValue::Int64(v) => v.is_none(),
2513            ScalarValue::UInt8(v) => v.is_none(),
2514            ScalarValue::UInt16(v) => v.is_none(),
2515            ScalarValue::UInt32(v) => v.is_none(),
2516            ScalarValue::UInt64(v) => v.is_none(),
2517            ScalarValue::Utf8(v)
2518            | ScalarValue::Utf8View(v)
2519            | ScalarValue::LargeUtf8(v) => v.is_none(),
2520            ScalarValue::Binary(v)
2521            | ScalarValue::BinaryView(v)
2522            | ScalarValue::FixedSizeBinary(_, v)
2523            | ScalarValue::LargeBinary(v) => v.is_none(),
2524            // arr.len() should be 1 for a list scalar, but we don't seem to
2525            // enforce that anywhere, so we still check against array length.
2526            ScalarValue::List(arr) => arr.len() == arr.null_count(),
2527            ScalarValue::LargeList(arr) => arr.len() == arr.null_count(),
2528            ScalarValue::FixedSizeList(arr) => arr.len() == arr.null_count(),
2529            ScalarValue::ListView(arr) => arr.len() == arr.null_count(),
2530            ScalarValue::LargeListView(arr) => arr.len() == arr.null_count(),
2531            ScalarValue::Struct(arr) => arr.len() == arr.null_count(),
2532            ScalarValue::Map(arr) => arr.len() == arr.null_count(),
2533            ScalarValue::Date32(v) => v.is_none(),
2534            ScalarValue::Date64(v) => v.is_none(),
2535            ScalarValue::Time32Second(v) => v.is_none(),
2536            ScalarValue::Time32Millisecond(v) => v.is_none(),
2537            ScalarValue::Time64Microsecond(v) => v.is_none(),
2538            ScalarValue::Time64Nanosecond(v) => v.is_none(),
2539            ScalarValue::TimestampSecond(v, _) => v.is_none(),
2540            ScalarValue::TimestampMillisecond(v, _) => v.is_none(),
2541            ScalarValue::TimestampMicrosecond(v, _) => v.is_none(),
2542            ScalarValue::TimestampNanosecond(v, _) => v.is_none(),
2543            ScalarValue::IntervalYearMonth(v) => v.is_none(),
2544            ScalarValue::IntervalDayTime(v) => v.is_none(),
2545            ScalarValue::IntervalMonthDayNano(v) => v.is_none(),
2546            ScalarValue::DurationSecond(v) => v.is_none(),
2547            ScalarValue::DurationMillisecond(v) => v.is_none(),
2548            ScalarValue::DurationMicrosecond(v) => v.is_none(),
2549            ScalarValue::DurationNanosecond(v) => v.is_none(),
2550            ScalarValue::Union(v, _, _) => match v {
2551                Some((_, s)) => s.is_null(),
2552                None => true,
2553            },
2554            ScalarValue::Dictionary(_, v) => v.is_null(),
2555            ScalarValue::RunEndEncoded(_, _, v) => v.is_null(),
2556        }
2557    }
2558
2559    /// Absolute distance between two numeric values (of the same type). This method will return
2560    /// None if either one of the arguments are null. It might also return None if the resulting
2561    /// distance is greater than [`usize::MAX`]. If the type is a float, then the distance will be
2562    /// rounded to the nearest integer.
2563    ///
2564    ///
2565    /// Note: the datatype itself must support subtraction.
2566    pub fn distance(&self, other: &ScalarValue) -> Option<usize> {
2567        match (self, other) {
2568            (Self::Int8(Some(l)), Self::Int8(Some(r))) => Some(l.abs_diff(*r) as _),
2569            (Self::Int16(Some(l)), Self::Int16(Some(r))) => Some(l.abs_diff(*r) as _),
2570            (Self::Int32(Some(l)), Self::Int32(Some(r))) => Some(l.abs_diff(*r) as _),
2571            (Self::Int64(Some(l)), Self::Int64(Some(r))) => Some(l.abs_diff(*r) as _),
2572            (Self::UInt8(Some(l)), Self::UInt8(Some(r))) => Some(l.abs_diff(*r) as _),
2573            (Self::UInt16(Some(l)), Self::UInt16(Some(r))) => Some(l.abs_diff(*r) as _),
2574            (Self::UInt32(Some(l)), Self::UInt32(Some(r))) => Some(l.abs_diff(*r) as _),
2575            (Self::UInt64(Some(l)), Self::UInt64(Some(r))) => Some(l.abs_diff(*r) as _),
2576            // TODO: we might want to look into supporting ceil/floor here for floats.
2577            (Self::Float16(Some(l)), Self::Float16(Some(r))) => {
2578                Some((f16::to_f32(*l) - f16::to_f32(*r)).abs().round() as _)
2579            }
2580            (Self::Float32(Some(l)), Self::Float32(Some(r))) => {
2581                Some((l - r).abs().round() as _)
2582            }
2583            (Self::Float64(Some(l)), Self::Float64(Some(r))) => {
2584                Some((l - r).abs().round() as _)
2585            }
2586            (Self::Date32(Some(l)), Self::Date32(Some(r))) => Some(l.abs_diff(*r) as _),
2587            (Self::Date64(Some(l)), Self::Date64(Some(r))) => Some(l.abs_diff(*r) as _),
2588            // Timestamp values are stored as epoch ticks regardless of timezone
2589            // annotation, so the distance is tz-independent (tz is display metadata).
2590            (Self::TimestampSecond(Some(l), _), Self::TimestampSecond(Some(r), _)) => {
2591                Some(l.abs_diff(*r) as _)
2592            }
2593            (
2594                Self::TimestampMillisecond(Some(l), _),
2595                Self::TimestampMillisecond(Some(r), _),
2596            ) => Some(l.abs_diff(*r) as _),
2597            (
2598                Self::TimestampMicrosecond(Some(l), _),
2599                Self::TimestampMicrosecond(Some(r), _),
2600            ) => Some(l.abs_diff(*r) as _),
2601            (
2602                Self::TimestampNanosecond(Some(l), _),
2603                Self::TimestampNanosecond(Some(r), _),
2604            ) => Some(l.abs_diff(*r) as _),
2605            (
2606                Self::Decimal128(Some(l), lprecision, lscale),
2607                Self::Decimal128(Some(r), rprecision, rscale),
2608            ) => {
2609                if lprecision == rprecision && lscale == rscale {
2610                    l.checked_sub(*r)?.checked_abs()?.to_usize()
2611                } else {
2612                    None
2613                }
2614            }
2615            (
2616                Self::Decimal256(Some(l), lprecision, lscale),
2617                Self::Decimal256(Some(r), rprecision, rscale),
2618            ) => {
2619                if lprecision == rprecision && lscale == rscale {
2620                    l.checked_sub(*r)?.checked_abs()?.to_usize()
2621                } else {
2622                    None
2623                }
2624            }
2625            _ => None,
2626        }
2627    }
2628
2629    /// Converts a scalar value into an 1-row array.
2630    ///
2631    /// # Errors
2632    ///
2633    /// Errors if the ScalarValue cannot be converted into a 1-row array
2634    pub fn to_array(&self) -> Result<ArrayRef> {
2635        self.to_array_of_size(1)
2636    }
2637
2638    /// Converts a scalar into an arrow [`Scalar`] (which implements
2639    /// the [`Datum`] interface).
2640    ///
2641    /// This can be used to call arrow compute kernels such as `lt`
2642    ///
2643    /// # Errors
2644    ///
2645    /// Errors if the ScalarValue cannot be converted into a 1-row array
2646    ///
2647    /// # Example
2648    /// ```
2649    /// use arrow::array::{BooleanArray, Int32Array};
2650    /// use datafusion_common::ScalarValue;
2651    ///
2652    /// let arr = Int32Array::from(vec![Some(1), None, Some(10)]);
2653    /// let five = ScalarValue::Int32(Some(5));
2654    ///
2655    /// let result =
2656    ///     arrow::compute::kernels::cmp::lt(&arr, &five.to_scalar().unwrap()).unwrap();
2657    ///
2658    /// let expected = BooleanArray::from(vec![Some(true), None, Some(false)]);
2659    ///
2660    /// assert_eq!(&result, &expected);
2661    /// ```
2662    /// [`Datum`]: arrow::array::Datum
2663    pub fn to_scalar(&self) -> Result<Scalar<ArrayRef>> {
2664        Ok(Scalar::new(self.to_array_of_size(1)?))
2665    }
2666
2667    /// Converts an iterator of references [`ScalarValue`] into an [`ArrayRef`]
2668    /// corresponding to those values. For example, an iterator of
2669    /// [`ScalarValue::Int32`] would be converted to an [`Int32Array`].
2670    ///
2671    /// Returns an error if the iterator is empty or if the
2672    /// [`ScalarValue`]s are not all the same type
2673    ///
2674    /// # Example
2675    /// ```
2676    /// use arrow::array::{ArrayRef, BooleanArray};
2677    /// use datafusion_common::ScalarValue;
2678    ///
2679    /// let scalars = vec![
2680    ///     ScalarValue::Boolean(Some(true)),
2681    ///     ScalarValue::Boolean(None),
2682    ///     ScalarValue::Boolean(Some(false)),
2683    /// ];
2684    ///
2685    /// // Build an Array from the list of ScalarValues
2686    /// let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
2687    ///
2688    /// let expected: ArrayRef =
2689    ///     std::sync::Arc::new(BooleanArray::from(vec![Some(true), None, Some(false)]));
2690    ///
2691    /// assert_eq!(&array, &expected);
2692    /// ```
2693    pub fn iter_to_array(
2694        scalars: impl IntoIterator<Item = ScalarValue>,
2695    ) -> Result<ArrayRef> {
2696        let mut scalars = scalars.into_iter().peekable();
2697
2698        // figure out the type based on the first element
2699        let data_type = match scalars.peek() {
2700            None => {
2701                return _exec_err!("Empty iterator passed to ScalarValue::iter_to_array");
2702            }
2703            Some(sv) => sv.data_type(),
2704        };
2705
2706        /// Creates an array of $ARRAY_TY by unpacking values of
2707        /// SCALAR_TY for primitive types
2708        macro_rules! build_array_primitive {
2709            ($ARRAY_TY:ident, $SCALAR_TY:ident) => {{
2710                {
2711                    let array = scalars
2712                        .map(|sv| {
2713                            if let ScalarValue::$SCALAR_TY(v) = sv {
2714                                Ok(v)
2715                            } else {
2716                                _exec_err!(
2717                                    "Inconsistent types in ScalarValue::iter_to_array. \
2718                                    Expected {:?}, got {:?}",
2719                                    data_type,
2720                                    sv
2721                                )
2722                            }
2723                        })
2724                        .collect::<Result<$ARRAY_TY>>()?;
2725                    Arc::new(array)
2726                }
2727            }};
2728        }
2729
2730        macro_rules! build_array_primitive_tz {
2731            ($ARRAY_TY:ident, $SCALAR_TY:ident, $TZ:expr) => {{
2732                {
2733                    let array = scalars
2734                        .map(|sv| {
2735                            if let ScalarValue::$SCALAR_TY(v, _) = sv {
2736                                Ok(v)
2737                            } else {
2738                                _exec_err!(
2739                                    "Inconsistent types in ScalarValue::iter_to_array. \
2740                                    Expected {:?}, got {:?}",
2741                                    data_type,
2742                                    sv
2743                                )
2744                            }
2745                        })
2746                        .collect::<Result<$ARRAY_TY>>()?;
2747                    Arc::new(array.with_timezone_opt($TZ.clone()))
2748                }
2749            }};
2750        }
2751
2752        /// Creates an array of $ARRAY_TY by unpacking values of
2753        /// SCALAR_TY for "string-like" types.
2754        macro_rules! build_array_string {
2755            ($ARRAY_TY:ident, $SCALAR_TY:ident) => {{
2756                {
2757                    let array = scalars
2758                        .map(|sv| {
2759                            if let ScalarValue::$SCALAR_TY(v) = sv {
2760                                Ok(v)
2761                            } else {
2762                                _exec_err!(
2763                                    "Inconsistent types in ScalarValue::iter_to_array. \
2764                                    Expected {:?}, got {:?}",
2765                                    data_type,
2766                                    sv
2767                                )
2768                            }
2769                        })
2770                        .collect::<Result<$ARRAY_TY>>()?;
2771                    Arc::new(array)
2772                }
2773            }};
2774        }
2775
2776        let array: ArrayRef = match &data_type {
2777            DataType::Decimal32(precision, scale) => {
2778                let decimal_array =
2779                    ScalarValue::iter_to_decimal32_array(scalars, *precision, *scale)?;
2780                Arc::new(decimal_array)
2781            }
2782            DataType::Decimal64(precision, scale) => {
2783                let decimal_array =
2784                    ScalarValue::iter_to_decimal64_array(scalars, *precision, *scale)?;
2785                Arc::new(decimal_array)
2786            }
2787            DataType::Decimal128(precision, scale) => {
2788                let decimal_array =
2789                    ScalarValue::iter_to_decimal128_array(scalars, *precision, *scale)?;
2790                Arc::new(decimal_array)
2791            }
2792            DataType::Decimal256(precision, scale) => {
2793                let decimal_array =
2794                    ScalarValue::iter_to_decimal256_array(scalars, *precision, *scale)?;
2795                Arc::new(decimal_array)
2796            }
2797            DataType::Null => ScalarValue::iter_to_null_array(scalars)?,
2798            DataType::Boolean => build_array_primitive!(BooleanArray, Boolean),
2799            DataType::Float16 => build_array_primitive!(Float16Array, Float16),
2800            DataType::Float32 => build_array_primitive!(Float32Array, Float32),
2801            DataType::Float64 => build_array_primitive!(Float64Array, Float64),
2802            DataType::Int8 => build_array_primitive!(Int8Array, Int8),
2803            DataType::Int16 => build_array_primitive!(Int16Array, Int16),
2804            DataType::Int32 => build_array_primitive!(Int32Array, Int32),
2805            DataType::Int64 => build_array_primitive!(Int64Array, Int64),
2806            DataType::UInt8 => build_array_primitive!(UInt8Array, UInt8),
2807            DataType::UInt16 => build_array_primitive!(UInt16Array, UInt16),
2808            DataType::UInt32 => build_array_primitive!(UInt32Array, UInt32),
2809            DataType::UInt64 => build_array_primitive!(UInt64Array, UInt64),
2810            DataType::Utf8View => build_array_string!(StringViewArray, Utf8View),
2811            DataType::Utf8 => build_array_string!(StringArray, Utf8),
2812            DataType::LargeUtf8 => build_array_string!(LargeStringArray, LargeUtf8),
2813            DataType::BinaryView => build_array_string!(BinaryViewArray, BinaryView),
2814            DataType::Binary => build_array_string!(BinaryArray, Binary),
2815            DataType::LargeBinary => build_array_string!(LargeBinaryArray, LargeBinary),
2816            DataType::Date32 => build_array_primitive!(Date32Array, Date32),
2817            DataType::Date64 => build_array_primitive!(Date64Array, Date64),
2818            DataType::Time32(TimeUnit::Second) => {
2819                build_array_primitive!(Time32SecondArray, Time32Second)
2820            }
2821            DataType::Time32(TimeUnit::Millisecond) => {
2822                build_array_primitive!(Time32MillisecondArray, Time32Millisecond)
2823            }
2824            DataType::Time64(TimeUnit::Microsecond) => {
2825                build_array_primitive!(Time64MicrosecondArray, Time64Microsecond)
2826            }
2827            DataType::Time64(TimeUnit::Nanosecond) => {
2828                build_array_primitive!(Time64NanosecondArray, Time64Nanosecond)
2829            }
2830            DataType::Timestamp(TimeUnit::Second, tz) => {
2831                build_array_primitive_tz!(TimestampSecondArray, TimestampSecond, tz)
2832            }
2833            DataType::Timestamp(TimeUnit::Millisecond, tz) => {
2834                build_array_primitive_tz!(
2835                    TimestampMillisecondArray,
2836                    TimestampMillisecond,
2837                    tz
2838                )
2839            }
2840            DataType::Timestamp(TimeUnit::Microsecond, tz) => {
2841                build_array_primitive_tz!(
2842                    TimestampMicrosecondArray,
2843                    TimestampMicrosecond,
2844                    tz
2845                )
2846            }
2847            DataType::Timestamp(TimeUnit::Nanosecond, tz) => {
2848                build_array_primitive_tz!(
2849                    TimestampNanosecondArray,
2850                    TimestampNanosecond,
2851                    tz
2852                )
2853            }
2854            DataType::Duration(TimeUnit::Second) => {
2855                build_array_primitive!(DurationSecondArray, DurationSecond)
2856            }
2857            DataType::Duration(TimeUnit::Millisecond) => {
2858                build_array_primitive!(DurationMillisecondArray, DurationMillisecond)
2859            }
2860            DataType::Duration(TimeUnit::Microsecond) => {
2861                build_array_primitive!(DurationMicrosecondArray, DurationMicrosecond)
2862            }
2863            DataType::Duration(TimeUnit::Nanosecond) => {
2864                build_array_primitive!(DurationNanosecondArray, DurationNanosecond)
2865            }
2866            DataType::Interval(IntervalUnit::DayTime) => {
2867                build_array_primitive!(IntervalDayTimeArray, IntervalDayTime)
2868            }
2869            DataType::Interval(IntervalUnit::YearMonth) => {
2870                build_array_primitive!(IntervalYearMonthArray, IntervalYearMonth)
2871            }
2872            DataType::Interval(IntervalUnit::MonthDayNano) => {
2873                build_array_primitive!(IntervalMonthDayNanoArray, IntervalMonthDayNano)
2874            }
2875            DataType::FixedSizeList(_, _) => {
2876                // arrow::compute::concat does not allow inconsistent types including the size of FixedSizeList.
2877                // The length of nulls here we got is 1, so we need to resize the length of nulls to
2878                // the length of non-nulls.
2879                let mut arrays =
2880                    scalars.map(|s| s.to_array()).collect::<Result<Vec<_>>>()?;
2881                let first_non_null_data_type = arrays
2882                    .iter()
2883                    .find(|sv| !sv.is_null(0))
2884                    .map(|sv| sv.data_type().to_owned());
2885                if let Some(DataType::FixedSizeList(f, l)) = first_non_null_data_type {
2886                    for array in arrays.iter_mut() {
2887                        if array.is_null(0) {
2888                            *array = Arc::new(FixedSizeListArray::new_null(
2889                                Arc::clone(&f),
2890                                l,
2891                                1,
2892                            ));
2893                        }
2894                    }
2895                }
2896                let arrays = arrays.iter().map(|a| a.as_ref()).collect::<Vec<_>>();
2897                arrow::compute::concat(arrays.as_slice())?
2898            }
2899            DataType::List(_)
2900            | DataType::LargeList(_)
2901            | DataType::ListView(_)
2902            | DataType::LargeListView(_)
2903            | DataType::Map(_, _)
2904            | DataType::Struct(_)
2905            | DataType::Union(_, _) => {
2906                let arrays = scalars.map(|s| s.to_array()).collect::<Result<Vec<_>>>()?;
2907                let arrays = arrays.iter().map(|a| a.as_ref()).collect::<Vec<_>>();
2908                arrow::compute::concat(arrays.as_slice())?
2909            }
2910            DataType::Dictionary(key_type, value_type) => {
2911                // create the values array
2912                let value_scalars = scalars
2913                    .map(|scalar| match scalar {
2914                        ScalarValue::Dictionary(inner_key_type, scalar) => {
2915                            if &inner_key_type == key_type {
2916                                Ok(*scalar)
2917                            } else {
2918                                _exec_err!("Expected inner key type of {key_type} but found: {inner_key_type}, value was ({scalar:?})")
2919                            }
2920                        }
2921                        _ => {
2922                            _exec_err!(
2923                                "Expected scalar of type {value_type} but found: {scalar} {scalar:?}"
2924                            )
2925                        }
2926                    })
2927                    .collect::<Result<Vec<_>>>()?;
2928
2929                let values = Self::iter_to_array(value_scalars)?;
2930                assert_eq!(values.data_type(), value_type.as_ref());
2931
2932                match key_type.as_ref() {
2933                    DataType::Int8 => dict_from_values::<Int8Type>(values)?,
2934                    DataType::Int16 => dict_from_values::<Int16Type>(values)?,
2935                    DataType::Int32 => dict_from_values::<Int32Type>(values)?,
2936                    DataType::Int64 => dict_from_values::<Int64Type>(values)?,
2937                    DataType::UInt8 => dict_from_values::<UInt8Type>(values)?,
2938                    DataType::UInt16 => dict_from_values::<UInt16Type>(values)?,
2939                    DataType::UInt32 => dict_from_values::<UInt32Type>(values)?,
2940                    DataType::UInt64 => dict_from_values::<UInt64Type>(values)?,
2941                    _ => unreachable!("Invalid dictionary keys type: {}", key_type),
2942                }
2943            }
2944            DataType::RunEndEncoded(run_ends_field, value_field) => {
2945                fn make_run_array<R: RunEndIndexType>(
2946                    scalars: impl IntoIterator<Item = ScalarValue>,
2947                    run_ends_field: &FieldRef,
2948                    values_field: &FieldRef,
2949                ) -> Result<ArrayRef> {
2950                    let mut scalars = scalars.into_iter();
2951
2952                    let mut run_ends = vec![];
2953                    let mut value_scalars = vec![];
2954
2955                    let mut len = R::Native::ONE;
2956                    let mut current =
2957                        if let Some(ScalarValue::RunEndEncoded(_, _, scalar)) =
2958                            scalars.next()
2959                        {
2960                            *scalar
2961                        } else {
2962                            // We are guaranteed to have one element of correct
2963                            // type because we peeked above
2964                            unreachable!()
2965                        };
2966                    for scalar in scalars {
2967                        let scalar = match scalar {
2968                            ScalarValue::RunEndEncoded(
2969                                inner_run_ends_field,
2970                                inner_value_field,
2971                                scalar,
2972                            ) if &inner_run_ends_field == run_ends_field
2973                                && &inner_value_field == values_field =>
2974                            {
2975                                *scalar
2976                            }
2977                            _ => {
2978                                return _exec_err!(
2979                                    "Expected RunEndEncoded scalar with run-ends field {run_ends_field} but got: {scalar:?}"
2980                                );
2981                            }
2982                        };
2983
2984                        // new run
2985                        if scalar != current {
2986                            run_ends.push(len);
2987                            value_scalars.push(current);
2988                            current = scalar;
2989                        }
2990
2991                        len = len.add_checked(R::Native::ONE).map_err(|_| {
2992                            DataFusionError::Execution(format!(
2993                                "Cannot construct RunArray: Overflows run-ends type {}",
2994                                run_ends_field.data_type()
2995                            ))
2996                        })?;
2997                    }
2998
2999                    run_ends.push(len);
3000                    value_scalars.push(current);
3001
3002                    let run_ends = PrimitiveArray::<R>::from_iter_values(run_ends);
3003                    let values = ScalarValue::iter_to_array(value_scalars)?;
3004
3005                    // Using ArrayDataBuilder so we can maintain the fields
3006                    let dt = DataType::RunEndEncoded(
3007                        Arc::clone(run_ends_field),
3008                        Arc::clone(values_field),
3009                    );
3010                    let builder = ArrayDataBuilder::new(dt)
3011                        .len(RunArray::logical_len(&run_ends))
3012                        .add_child_data(run_ends.to_data())
3013                        .add_child_data(values.to_data());
3014                    let run_array = RunArray::<R>::from(builder.build()?);
3015
3016                    Ok(Arc::new(run_array))
3017                }
3018
3019                match run_ends_field.data_type() {
3020                    DataType::Int16 => {
3021                        make_run_array::<Int16Type>(scalars, run_ends_field, value_field)?
3022                    }
3023                    DataType::Int32 => {
3024                        make_run_array::<Int32Type>(scalars, run_ends_field, value_field)?
3025                    }
3026                    DataType::Int64 => {
3027                        make_run_array::<Int64Type>(scalars, run_ends_field, value_field)?
3028                    }
3029                    dt => unreachable!("Invalid run-ends type: {dt}"),
3030                }
3031            }
3032            DataType::FixedSizeBinary(size) => {
3033                let array = scalars
3034                    .map(|sv| {
3035                        if let ScalarValue::FixedSizeBinary(_, v) = sv {
3036                            Ok(v)
3037                        } else {
3038                            _exec_err!(
3039                                "Inconsistent types in ScalarValue::iter_to_array. \
3040                                Expected {data_type}, got {sv:?}"
3041                            )
3042                        }
3043                    })
3044                    .collect::<Result<Vec<_>>>()?;
3045                let array = FixedSizeBinaryArray::try_from_sparse_iter_with_size(
3046                    array.into_iter(),
3047                    *size,
3048                )?;
3049                Arc::new(array)
3050            }
3051            // explicitly enumerate unsupported types so newly added
3052            // types must be acknowledged, Time32 and Time64 types are
3053            // not supported if the TimeUnit is not valid (Time32 can
3054            // only be used with Second and Millisecond, Time64 only
3055            // with Microsecond and Nanosecond)
3056            DataType::Time32(TimeUnit::Microsecond)
3057            | DataType::Time32(TimeUnit::Nanosecond)
3058            | DataType::Time64(TimeUnit::Second)
3059            | DataType::Time64(TimeUnit::Millisecond) => {
3060                return _not_impl_err!(
3061                    "Unsupported creation of {:?} array from ScalarValue {:?}",
3062                    data_type,
3063                    scalars.peek()
3064                );
3065            }
3066        };
3067        Ok(array)
3068    }
3069
3070    fn iter_to_null_array(
3071        scalars: impl IntoIterator<Item = ScalarValue>,
3072    ) -> Result<ArrayRef> {
3073        let length = scalars.into_iter().try_fold(
3074            0usize,
3075            |r, element: ScalarValue| match element {
3076                ScalarValue::Null => Ok::<usize, DataFusionError>(r + 1),
3077                s => {
3078                    _internal_err!("Expected ScalarValue::Null element. Received {s:?}")
3079                }
3080            },
3081        )?;
3082        Ok(new_null_array(&DataType::Null, length))
3083    }
3084
3085    fn iter_to_decimal32_array(
3086        scalars: impl IntoIterator<Item = ScalarValue>,
3087        precision: u8,
3088        scale: i8,
3089    ) -> Result<Decimal32Array> {
3090        let array = scalars
3091            .into_iter()
3092            .map(|element: ScalarValue| match element {
3093                ScalarValue::Decimal32(v1, _, _) => Ok(v1),
3094                s => {
3095                    _internal_err!("Expected ScalarValue::Null element. Received {s:?}")
3096                }
3097            })
3098            .collect::<Result<Decimal32Array>>()?
3099            .with_precision_and_scale(precision, scale)?;
3100        Ok(array)
3101    }
3102
3103    fn iter_to_decimal64_array(
3104        scalars: impl IntoIterator<Item = ScalarValue>,
3105        precision: u8,
3106        scale: i8,
3107    ) -> Result<Decimal64Array> {
3108        let array = scalars
3109            .into_iter()
3110            .map(|element: ScalarValue| match element {
3111                ScalarValue::Decimal64(v1, _, _) => Ok(v1),
3112                s => {
3113                    _internal_err!("Expected ScalarValue::Null element. Received {s:?}")
3114                }
3115            })
3116            .collect::<Result<Decimal64Array>>()?
3117            .with_precision_and_scale(precision, scale)?;
3118        Ok(array)
3119    }
3120
3121    fn iter_to_decimal128_array(
3122        scalars: impl IntoIterator<Item = ScalarValue>,
3123        precision: u8,
3124        scale: i8,
3125    ) -> Result<Decimal128Array> {
3126        let array = scalars
3127            .into_iter()
3128            .map(|element: ScalarValue| match element {
3129                ScalarValue::Decimal128(v1, _, _) => Ok(v1),
3130                s => {
3131                    _internal_err!("Expected ScalarValue::Null element. Received {s:?}")
3132                }
3133            })
3134            .collect::<Result<Decimal128Array>>()?
3135            .with_precision_and_scale(precision, scale)?;
3136        Ok(array)
3137    }
3138
3139    fn iter_to_decimal256_array(
3140        scalars: impl IntoIterator<Item = ScalarValue>,
3141        precision: u8,
3142        scale: i8,
3143    ) -> Result<Decimal256Array> {
3144        let array = scalars
3145            .into_iter()
3146            .map(|element: ScalarValue| match element {
3147                ScalarValue::Decimal256(v1, _, _) => Ok(v1),
3148                s => {
3149                    _internal_err!(
3150                        "Expected ScalarValue::Decimal256 element. Received {s:?}"
3151                    )
3152                }
3153            })
3154            .collect::<Result<Decimal256Array>>()?
3155            .with_precision_and_scale(precision, scale)?;
3156        Ok(array)
3157    }
3158
3159    /// Converts `Vec<ScalarValue>` where each element has type corresponding to
3160    /// `data_type`, to a single element [`ListArray`].
3161    ///
3162    /// Example
3163    /// ```
3164    /// use arrow::array::{Int32Array, ListArray};
3165    /// use arrow::datatypes::{DataType, Int32Type};
3166    /// use datafusion_common::cast::as_list_array;
3167    /// use datafusion_common::ScalarValue;
3168    ///
3169    /// let scalars = vec![
3170    ///     ScalarValue::Int32(Some(1)),
3171    ///     ScalarValue::Int32(None),
3172    ///     ScalarValue::Int32(Some(2)),
3173    /// ];
3174    ///
3175    /// let result = ScalarValue::new_list(&scalars, &DataType::Int32, true);
3176    ///
3177    /// let expected = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
3178    ///     Some(1),
3179    ///     None,
3180    ///     Some(2),
3181    /// ])]);
3182    ///
3183    /// assert_eq!(*result, expected);
3184    /// ```
3185    pub fn new_list(
3186        values: &[ScalarValue],
3187        data_type: &DataType,
3188        nullable: bool,
3189    ) -> Arc<ListArray> {
3190        let values = if values.is_empty() {
3191            new_empty_array(data_type)
3192        } else {
3193            Self::iter_to_array(values.iter().cloned()).unwrap()
3194        };
3195        Arc::new(
3196            SingleRowListArrayBuilder::new(values)
3197                .with_nullable(nullable)
3198                .build_list_array(),
3199        )
3200    }
3201
3202    /// Same as [`ScalarValue::new_list`] but with nullable set to true.
3203    pub fn new_list_nullable(
3204        values: &[ScalarValue],
3205        data_type: &DataType,
3206    ) -> Arc<ListArray> {
3207        Self::new_list(values, data_type, true)
3208    }
3209
3210    /// Create ListArray with Null with specific data type
3211    ///
3212    /// - new_null_list(i32, nullable, 1): `ListArray[NULL]`
3213    pub fn new_null_list(data_type: DataType, nullable: bool, null_len: usize) -> Self {
3214        let data_type = DataType::List(Field::new_list_field(data_type, nullable).into());
3215        Self::List(Arc::new(ListArray::from(ArrayData::new_null(
3216            &data_type, null_len,
3217        ))))
3218    }
3219
3220    /// Converts `IntoIterator<Item = ScalarValue>` where each element has type corresponding to
3221    /// `data_type`, to a [`ListArray`].
3222    ///
3223    /// Example
3224    /// ```
3225    /// use arrow::array::{Int32Array, ListArray};
3226    /// use arrow::datatypes::{DataType, Int32Type};
3227    /// use datafusion_common::cast::as_list_array;
3228    /// use datafusion_common::ScalarValue;
3229    ///
3230    /// let scalars = vec![
3231    ///     ScalarValue::Int32(Some(1)),
3232    ///     ScalarValue::Int32(None),
3233    ///     ScalarValue::Int32(Some(2)),
3234    /// ];
3235    ///
3236    /// let result =
3237    ///     ScalarValue::new_list_from_iter(scalars.into_iter(), &DataType::Int32, true);
3238    ///
3239    /// let expected = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
3240    ///     Some(1),
3241    ///     None,
3242    ///     Some(2),
3243    /// ])]);
3244    ///
3245    /// assert_eq!(*result, expected);
3246    /// ```
3247    pub fn new_list_from_iter(
3248        values: impl IntoIterator<Item = ScalarValue> + ExactSizeIterator,
3249        data_type: &DataType,
3250        nullable: bool,
3251    ) -> Arc<ListArray> {
3252        let values = if values.len() == 0 {
3253            new_empty_array(data_type)
3254        } else {
3255            Self::iter_to_array(values).unwrap()
3256        };
3257        Arc::new(
3258            SingleRowListArrayBuilder::new(values)
3259                .with_nullable(nullable)
3260                .build_list_array(),
3261        )
3262    }
3263
3264    /// Converts `Vec<ScalarValue>` where each element has type corresponding to
3265    /// `data_type`, to a [`LargeListArray`].
3266    ///
3267    /// Example
3268    /// ```
3269    /// use arrow::array::{Int32Array, LargeListArray};
3270    /// use arrow::datatypes::{DataType, Int32Type};
3271    /// use datafusion_common::cast::as_large_list_array;
3272    /// use datafusion_common::ScalarValue;
3273    ///
3274    /// let scalars = vec![
3275    ///     ScalarValue::Int32(Some(1)),
3276    ///     ScalarValue::Int32(None),
3277    ///     ScalarValue::Int32(Some(2)),
3278    /// ];
3279    ///
3280    /// let result = ScalarValue::new_large_list(&scalars, &DataType::Int32);
3281    ///
3282    /// let expected =
3283    ///     LargeListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
3284    ///         Some(1),
3285    ///         None,
3286    ///         Some(2),
3287    ///     ])]);
3288    ///
3289    /// assert_eq!(*result, expected);
3290    /// ```
3291    pub fn new_large_list(
3292        values: &[ScalarValue],
3293        data_type: &DataType,
3294    ) -> Arc<LargeListArray> {
3295        let values = if values.is_empty() {
3296            new_empty_array(data_type)
3297        } else {
3298            Self::iter_to_array(values.iter().cloned()).unwrap()
3299        };
3300        Arc::new(SingleRowListArrayBuilder::new(values).build_large_list_array())
3301    }
3302
3303    /// Converts a scalar value into an array of `size` rows.
3304    ///
3305    /// # Errors
3306    ///
3307    /// Errors if `self` is
3308    /// - a decimal that fails be converted to a decimal array of size
3309    /// - a `FixedSizeList` that fails to be concatenated into an array of size
3310    /// - a `List` that fails to be concatenated into an array of size
3311    /// - a `Dictionary` that fails be converted to a dictionary array of size
3312    pub fn to_array_of_size(&self, size: usize) -> Result<ArrayRef> {
3313        Ok(match self {
3314            ScalarValue::Decimal32(Some(e), precision, scale) => Arc::new(
3315                Decimal32Array::from_value(*e, size)
3316                    .with_precision_and_scale(*precision, *scale)?,
3317            ),
3318            ScalarValue::Decimal32(None, precision, scale) => {
3319                new_null_array(&DataType::Decimal32(*precision, *scale), size)
3320            }
3321            ScalarValue::Decimal64(Some(e), precision, scale) => Arc::new(
3322                Decimal64Array::from_value(*e, size)
3323                    .with_precision_and_scale(*precision, *scale)?,
3324            ),
3325            ScalarValue::Decimal64(None, precision, scale) => {
3326                new_null_array(&DataType::Decimal64(*precision, *scale), size)
3327            }
3328            ScalarValue::Decimal128(Some(e), precision, scale) => Arc::new(
3329                Decimal128Array::from_value(*e, size)
3330                    .with_precision_and_scale(*precision, *scale)?,
3331            ),
3332            ScalarValue::Decimal128(None, precision, scale) => {
3333                new_null_array(&DataType::Decimal128(*precision, *scale), size)
3334            }
3335            ScalarValue::Decimal256(Some(e), precision, scale) => Arc::new(
3336                Decimal256Array::from_value(*e, size)
3337                    .with_precision_and_scale(*precision, *scale)?,
3338            ),
3339            ScalarValue::Decimal256(None, precision, scale) => {
3340                new_null_array(&DataType::Decimal256(*precision, *scale), size)
3341            }
3342
3343            ScalarValue::Boolean(e) => match e {
3344                None => new_null_array(&DataType::Boolean, size),
3345                Some(true) => {
3346                    Arc::new(BooleanArray::new(BooleanBuffer::new_set(size), None))
3347                        as ArrayRef
3348                }
3349                Some(false) => {
3350                    Arc::new(BooleanArray::new(BooleanBuffer::new_unset(size), None))
3351                        as ArrayRef
3352                }
3353            },
3354            ScalarValue::Float64(e) => {
3355                build_array_from_option!(Float64, Float64Array, e, size)
3356            }
3357            ScalarValue::Float32(e) => {
3358                build_array_from_option!(Float32, Float32Array, e, size)
3359            }
3360            ScalarValue::Float16(e) => {
3361                build_array_from_option!(Float16, Float16Array, e, size)
3362            }
3363            ScalarValue::Int8(e) => build_array_from_option!(Int8, Int8Array, e, size),
3364            ScalarValue::Int16(e) => build_array_from_option!(Int16, Int16Array, e, size),
3365            ScalarValue::Int32(e) => build_array_from_option!(Int32, Int32Array, e, size),
3366            ScalarValue::Int64(e) => build_array_from_option!(Int64, Int64Array, e, size),
3367            ScalarValue::UInt8(e) => build_array_from_option!(UInt8, UInt8Array, e, size),
3368            ScalarValue::UInt16(e) => {
3369                build_array_from_option!(UInt16, UInt16Array, e, size)
3370            }
3371            ScalarValue::UInt32(e) => {
3372                build_array_from_option!(UInt32, UInt32Array, e, size)
3373            }
3374            ScalarValue::UInt64(e) => {
3375                build_array_from_option!(UInt64, UInt64Array, e, size)
3376            }
3377            ScalarValue::TimestampSecond(e, tz_opt) => {
3378                build_timestamp_array_from_option!(
3379                    TimeUnit::Second,
3380                    tz_opt.clone(),
3381                    TimestampSecondArray,
3382                    e,
3383                    size
3384                )
3385            }
3386            ScalarValue::TimestampMillisecond(e, tz_opt) => {
3387                build_timestamp_array_from_option!(
3388                    TimeUnit::Millisecond,
3389                    tz_opt.clone(),
3390                    TimestampMillisecondArray,
3391                    e,
3392                    size
3393                )
3394            }
3395
3396            ScalarValue::TimestampMicrosecond(e, tz_opt) => {
3397                build_timestamp_array_from_option!(
3398                    TimeUnit::Microsecond,
3399                    tz_opt.clone(),
3400                    TimestampMicrosecondArray,
3401                    e,
3402                    size
3403                )
3404            }
3405            ScalarValue::TimestampNanosecond(e, tz_opt) => {
3406                build_timestamp_array_from_option!(
3407                    TimeUnit::Nanosecond,
3408                    tz_opt.clone(),
3409                    TimestampNanosecondArray,
3410                    e,
3411                    size
3412                )
3413            }
3414            ScalarValue::Utf8(e) => match e {
3415                Some(value) => Arc::new(StringArray::new_repeated(value, size)),
3416                None => new_null_array(&DataType::Utf8, size),
3417            },
3418            ScalarValue::Utf8View(e) => match e {
3419                Some(value) => {
3420                    let mut builder = StringViewBuilder::with_capacity(size);
3421                    builder.try_append_value_n(value, size)?;
3422                    let array = builder.finish();
3423                    Arc::new(array)
3424                }
3425                None => new_null_array(&DataType::Utf8View, size),
3426            },
3427            ScalarValue::LargeUtf8(e) => match e {
3428                Some(value) => Arc::new(LargeStringArray::new_repeated(value, size)),
3429                None => new_null_array(&DataType::LargeUtf8, size),
3430            },
3431            ScalarValue::Binary(e) => match e {
3432                Some(value) => {
3433                    Arc::new(BinaryArray::new_repeated(value.as_slice(), size))
3434                }
3435                None => new_null_array(&DataType::Binary, size),
3436            },
3437            ScalarValue::BinaryView(e) => match e {
3438                Some(value) => {
3439                    let mut builder = BinaryViewBuilder::with_capacity(size);
3440                    builder.try_append_value_n(value, size)?;
3441                    let array = builder.finish();
3442                    Arc::new(array)
3443                }
3444                None => new_null_array(&DataType::BinaryView, size),
3445            },
3446            ScalarValue::FixedSizeBinary(s, e) => match e {
3447                Some(value) => Arc::new(
3448                    FixedSizeBinaryArray::try_from_sparse_iter_with_size(
3449                        repeat_n(Some(value.as_slice()), size),
3450                        *s,
3451                    )
3452                    .unwrap(),
3453                ),
3454                None => Arc::new(FixedSizeBinaryArray::new_null(*s, size)),
3455            },
3456            ScalarValue::LargeBinary(e) => match e {
3457                Some(value) => {
3458                    Arc::new(LargeBinaryArray::new_repeated(value.as_slice(), size))
3459                }
3460                None => new_null_array(&DataType::LargeBinary, size),
3461            },
3462            ScalarValue::List(arr) => {
3463                if size == 1 {
3464                    return Ok(Arc::clone(arr) as Arc<dyn Array>);
3465                }
3466                Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
3467            }
3468            ScalarValue::LargeList(arr) => {
3469                if size == 1 {
3470                    return Ok(Arc::clone(arr) as Arc<dyn Array>);
3471                }
3472                Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
3473            }
3474            ScalarValue::FixedSizeList(arr) => {
3475                if size == 1 {
3476                    return Ok(Arc::clone(arr) as Arc<dyn Array>);
3477                }
3478                Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
3479            }
3480            ScalarValue::ListView(arr) => {
3481                if size == 1 {
3482                    return Ok(Arc::clone(arr) as Arc<dyn Array>);
3483                }
3484                Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
3485            }
3486            ScalarValue::LargeListView(arr) => {
3487                if size == 1 {
3488                    return Ok(Arc::clone(arr) as Arc<dyn Array>);
3489                }
3490                Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
3491            }
3492            ScalarValue::Struct(arr) => {
3493                if size == 1 {
3494                    return Ok(Arc::clone(arr) as Arc<dyn Array>);
3495                }
3496                Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
3497            }
3498            ScalarValue::Map(arr) => {
3499                if size == 1 {
3500                    return Ok(Arc::clone(arr) as Arc<dyn Array>);
3501                }
3502                Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
3503            }
3504            ScalarValue::Date32(e) => {
3505                build_array_from_option!(Date32, Date32Array, e, size)
3506            }
3507            ScalarValue::Date64(e) => {
3508                build_array_from_option!(Date64, Date64Array, e, size)
3509            }
3510            ScalarValue::Time32Second(e) => {
3511                build_array_from_option!(
3512                    Time32,
3513                    TimeUnit::Second,
3514                    Time32SecondArray,
3515                    e,
3516                    size
3517                )
3518            }
3519            ScalarValue::Time32Millisecond(e) => {
3520                build_array_from_option!(
3521                    Time32,
3522                    TimeUnit::Millisecond,
3523                    Time32MillisecondArray,
3524                    e,
3525                    size
3526                )
3527            }
3528            ScalarValue::Time64Microsecond(e) => {
3529                build_array_from_option!(
3530                    Time64,
3531                    TimeUnit::Microsecond,
3532                    Time64MicrosecondArray,
3533                    e,
3534                    size
3535                )
3536            }
3537            ScalarValue::Time64Nanosecond(e) => {
3538                build_array_from_option!(
3539                    Time64,
3540                    TimeUnit::Nanosecond,
3541                    Time64NanosecondArray,
3542                    e,
3543                    size
3544                )
3545            }
3546            ScalarValue::IntervalDayTime(e) => build_array_from_option!(
3547                Interval,
3548                IntervalUnit::DayTime,
3549                IntervalDayTimeArray,
3550                e,
3551                size
3552            ),
3553            ScalarValue::IntervalYearMonth(e) => build_array_from_option!(
3554                Interval,
3555                IntervalUnit::YearMonth,
3556                IntervalYearMonthArray,
3557                e,
3558                size
3559            ),
3560            ScalarValue::IntervalMonthDayNano(e) => build_array_from_option!(
3561                Interval,
3562                IntervalUnit::MonthDayNano,
3563                IntervalMonthDayNanoArray,
3564                e,
3565                size
3566            ),
3567            ScalarValue::DurationSecond(e) => build_array_from_option!(
3568                Duration,
3569                TimeUnit::Second,
3570                DurationSecondArray,
3571                e,
3572                size
3573            ),
3574            ScalarValue::DurationMillisecond(e) => build_array_from_option!(
3575                Duration,
3576                TimeUnit::Millisecond,
3577                DurationMillisecondArray,
3578                e,
3579                size
3580            ),
3581            ScalarValue::DurationMicrosecond(e) => build_array_from_option!(
3582                Duration,
3583                TimeUnit::Microsecond,
3584                DurationMicrosecondArray,
3585                e,
3586                size
3587            ),
3588            ScalarValue::DurationNanosecond(e) => build_array_from_option!(
3589                Duration,
3590                TimeUnit::Nanosecond,
3591                DurationNanosecondArray,
3592                e,
3593                size
3594            ),
3595            ScalarValue::Union(value, fields, mode) => match value {
3596                Some((v_id, value)) => {
3597                    let mut new_fields = Vec::with_capacity(fields.len());
3598                    let mut child_arrays = Vec::<ArrayRef>::with_capacity(fields.len());
3599                    for (f_id, field) in fields.iter() {
3600                        let ar = if f_id == *v_id {
3601                            value.to_array_of_size(size)?
3602                        } else {
3603                            let dt = field.data_type();
3604                            match mode {
3605                                UnionMode::Sparse => new_null_array(dt, size),
3606                                // In a dense union, only the child with values needs to be
3607                                // allocated
3608                                UnionMode::Dense => new_null_array(dt, 0),
3609                            }
3610                        };
3611                        let field = (**field).clone();
3612                        child_arrays.push(ar);
3613                        new_fields.push(field.clone());
3614                    }
3615                    let type_ids = repeat_n(*v_id, size);
3616                    let type_ids = ScalarBuffer::<i8>::from_iter(type_ids);
3617                    let value_offsets = match mode {
3618                        UnionMode::Sparse => None,
3619                        UnionMode::Dense => Some(ScalarBuffer::from_iter(0..size as i32)),
3620                    };
3621                    let ar = UnionArray::try_new(
3622                        fields.clone(),
3623                        type_ids,
3624                        value_offsets,
3625                        child_arrays,
3626                    )
3627                    .map_err(|e| DataFusionError::ArrowError(Box::new(e), None))?;
3628                    Arc::new(ar)
3629                }
3630                None => new_null_array(&DataType::Union(fields.clone(), *mode), size),
3631            },
3632            ScalarValue::Dictionary(key_type, v) => {
3633                // values array is one element long (the value)
3634                match key_type.as_ref() {
3635                    DataType::Int8 => dict_from_scalar::<Int8Type>(v, size)?,
3636                    DataType::Int16 => dict_from_scalar::<Int16Type>(v, size)?,
3637                    DataType::Int32 => dict_from_scalar::<Int32Type>(v, size)?,
3638                    DataType::Int64 => dict_from_scalar::<Int64Type>(v, size)?,
3639                    DataType::UInt8 => dict_from_scalar::<UInt8Type>(v, size)?,
3640                    DataType::UInt16 => dict_from_scalar::<UInt16Type>(v, size)?,
3641                    DataType::UInt32 => dict_from_scalar::<UInt32Type>(v, size)?,
3642                    DataType::UInt64 => dict_from_scalar::<UInt64Type>(v, size)?,
3643                    _ => unreachable!("Invalid dictionary keys type: {}", key_type),
3644                }
3645            }
3646            ScalarValue::RunEndEncoded(run_ends_field, values_field, value) => {
3647                fn make_run_array<R: RunEndIndexType>(
3648                    run_ends_field: &Arc<Field>,
3649                    values_field: &Arc<Field>,
3650                    value: &ScalarValue,
3651                    size: usize,
3652                ) -> Result<ArrayRef> {
3653                    let size_native = R::Native::from_usize(size)
3654                        .ok_or_else(|| DataFusionError::Execution(format!("Cannot construct RunArray of size {size}: Overflows run-ends type {}", R::DATA_TYPE)))?;
3655                    let values = value.to_array_of_size(1)?;
3656                    let run_ends =
3657                        PrimitiveArray::<R>::new(vec![size_native].into(), None);
3658
3659                    // Using ArrayDataBuilder so we can maintain the fields
3660                    let dt = DataType::RunEndEncoded(
3661                        Arc::clone(run_ends_field),
3662                        Arc::clone(values_field),
3663                    );
3664                    let builder = ArrayDataBuilder::new(dt)
3665                        .len(size)
3666                        .add_child_data(run_ends.to_data())
3667                        .add_child_data(values.to_data());
3668                    let run_array = RunArray::<R>::from(builder.build()?);
3669
3670                    Ok(Arc::new(run_array))
3671                }
3672                match run_ends_field.data_type() {
3673                    DataType::Int16 => make_run_array::<Int16Type>(
3674                        run_ends_field,
3675                        values_field,
3676                        value,
3677                        size,
3678                    )?,
3679                    DataType::Int32 => make_run_array::<Int32Type>(
3680                        run_ends_field,
3681                        values_field,
3682                        value,
3683                        size,
3684                    )?,
3685                    DataType::Int64 => make_run_array::<Int64Type>(
3686                        run_ends_field,
3687                        values_field,
3688                        value,
3689                        size,
3690                    )?,
3691                    dt => unreachable!("Invalid run-ends type: {dt}"),
3692                }
3693            }
3694            ScalarValue::Null => get_or_create_cached_null_array(size),
3695        })
3696    }
3697
3698    fn get_decimal_value_from_array(
3699        array: &dyn Array,
3700        index: usize,
3701        precision: u8,
3702        scale: i8,
3703    ) -> Result<ScalarValue> {
3704        match array.data_type() {
3705            DataType::Decimal32(_, _) => {
3706                let array = as_decimal32_array(array)?;
3707                if array.is_null(index) {
3708                    Ok(ScalarValue::Decimal32(None, precision, scale))
3709                } else {
3710                    let value = array.value(index);
3711                    Ok(ScalarValue::Decimal32(Some(value), precision, scale))
3712                }
3713            }
3714            DataType::Decimal64(_, _) => {
3715                let array = as_decimal64_array(array)?;
3716                if array.is_null(index) {
3717                    Ok(ScalarValue::Decimal64(None, precision, scale))
3718                } else {
3719                    let value = array.value(index);
3720                    Ok(ScalarValue::Decimal64(Some(value), precision, scale))
3721                }
3722            }
3723            DataType::Decimal128(_, _) => {
3724                let array = as_decimal128_array(array)?;
3725                if array.is_null(index) {
3726                    Ok(ScalarValue::Decimal128(None, precision, scale))
3727                } else {
3728                    let value = array.value(index);
3729                    Ok(ScalarValue::Decimal128(Some(value), precision, scale))
3730                }
3731            }
3732            DataType::Decimal256(_, _) => {
3733                let array = as_decimal256_array(array)?;
3734                if array.is_null(index) {
3735                    Ok(ScalarValue::Decimal256(None, precision, scale))
3736                } else {
3737                    let value = array.value(index);
3738                    Ok(ScalarValue::Decimal256(Some(value), precision, scale))
3739                }
3740            }
3741            other => {
3742                unreachable!("Invalid type isn't decimal: {other:?}")
3743            }
3744        }
3745    }
3746
3747    /// Repeats the rows of `arr` `size` times, producing an array with
3748    /// `arr.len() * size` total rows.
3749    fn list_to_array_of_size(arr: &dyn Array, size: usize) -> Result<ArrayRef> {
3750        if size == 0 {
3751            return Ok(arr.slice(0, 0));
3752        }
3753
3754        // Examples: given `arr = [[A, B, C]]` and `size = 3`, `indices = [0, 0, 0]` and
3755        // the result is `[[A, B, C], [A, B, C], [A, B, C]]`.
3756        //
3757        // Given `arr = [[A, B], [C]]` and `size = 2`, `indices = [0, 1, 0, 1]` and the
3758        // result is `[[A, B], [C], [A, B], [C]]`. (But in practice, we are always called
3759        // with `arr.len() == 1`.)
3760        let n = arr.len() as u32;
3761        let indices = UInt32Array::from_iter_values((0..size).flat_map(|_| 0..n));
3762        Ok(arrow::compute::take(arr, &indices, None)?)
3763    }
3764
3765    /// Retrieve ScalarValue for each row in `array`
3766    ///
3767    /// Elements in `array` may be NULL, in which case the corresponding element in the returned vector is None.
3768    ///
3769    /// Example 1: Array (ScalarValue::Int32)
3770    /// ```
3771    /// use arrow::array::ListArray;
3772    /// use arrow::datatypes::{DataType, Int32Type};
3773    /// use datafusion_common::ScalarValue;
3774    ///
3775    /// // Equivalent to [[1,2,3], [4,5]]
3776    /// let list_arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
3777    ///     Some(vec![Some(1), Some(2), Some(3)]),
3778    ///     Some(vec![Some(4), Some(5)]),
3779    /// ]);
3780    ///
3781    /// // Convert the array into Scalar Values for each row
3782    /// let scalar_vec = ScalarValue::convert_array_to_scalar_vec(&list_arr).unwrap();
3783    ///
3784    /// let expected = vec![
3785    ///     Some(vec![
3786    ///         ScalarValue::Int32(Some(1)),
3787    ///         ScalarValue::Int32(Some(2)),
3788    ///         ScalarValue::Int32(Some(3)),
3789    ///     ]),
3790    ///     Some(vec![
3791    ///         ScalarValue::Int32(Some(4)),
3792    ///         ScalarValue::Int32(Some(5)),
3793    ///     ]),
3794    /// ];
3795    ///
3796    /// assert_eq!(scalar_vec, expected);
3797    /// ```
3798    ///
3799    /// Example 2: Nested array (ScalarValue::List)
3800    /// ```
3801    /// use arrow::array::ListArray;
3802    /// use arrow::datatypes::{DataType, Int32Type};
3803    /// use datafusion_common::utils::SingleRowListArrayBuilder;
3804    /// use datafusion_common::ScalarValue;
3805    /// use std::sync::Arc;
3806    ///
3807    /// let list_arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
3808    ///     Some(vec![Some(1), Some(2), Some(3)]),
3809    ///     Some(vec![Some(4), Some(5)]),
3810    /// ]);
3811    ///
3812    /// // Wrap into another layer of list, we got nested array as [ [[1,2,3], [4,5]] ]
3813    /// let list_arr = SingleRowListArrayBuilder::new(Arc::new(list_arr)).build_list_array();
3814    ///
3815    /// // Convert the array into Scalar Values for each row, we got 1D arrays in this example
3816    /// let scalar_vec = ScalarValue::convert_array_to_scalar_vec(&list_arr).unwrap();
3817    ///
3818    /// let l1 = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
3819    ///     Some(1),
3820    ///     Some(2),
3821    ///     Some(3),
3822    /// ])]);
3823    /// let l2 = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
3824    ///     Some(4),
3825    ///     Some(5),
3826    /// ])]);
3827    ///
3828    /// let expected = vec![Some(vec![
3829    ///     ScalarValue::List(Arc::new(l1)),
3830    ///     ScalarValue::List(Arc::new(l2)),
3831    /// ])];
3832    ///
3833    /// assert_eq!(scalar_vec, expected);
3834    /// ```
3835    ///
3836    /// Example 3: Nullable array
3837    /// ```
3838    /// use arrow::array::ListArray;
3839    /// use arrow::datatypes::{DataType, Int32Type};
3840    /// use datafusion_common::ScalarValue;
3841    ///
3842    /// let list_arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
3843    ///     Some(vec![Some(1), Some(2), Some(3)]),
3844    ///     None,
3845    ///     Some(vec![Some(4), Some(5)]),
3846    /// ]);
3847    ///
3848    /// // Convert the array into Scalar Values for each row
3849    /// let scalar_vec = ScalarValue::convert_array_to_scalar_vec(&list_arr).unwrap();
3850    ///
3851    /// let expected = vec![
3852    ///     Some(vec![
3853    ///         ScalarValue::Int32(Some(1)),
3854    ///         ScalarValue::Int32(Some(2)),
3855    ///         ScalarValue::Int32(Some(3)),
3856    ///     ]),
3857    ///     None,
3858    ///     Some(vec![
3859    ///         ScalarValue::Int32(Some(4)),
3860    ///         ScalarValue::Int32(Some(5)),
3861    ///     ]),
3862    /// ];
3863    ///
3864    /// assert_eq!(scalar_vec, expected);
3865    /// ```
3866    pub fn convert_array_to_scalar_vec(
3867        array: &dyn Array,
3868    ) -> Result<Vec<Option<Vec<Self>>>> {
3869        fn map_element(
3870            nested_array: Option<ArrayRef>,
3871        ) -> Result<Option<Vec<ScalarValue>>> {
3872            nested_array
3873                .map(|array| {
3874                    (0..array.len())
3875                        .map(|i| ScalarValue::try_from_array(&array, i))
3876                        .collect::<Result<Vec<_>>>()
3877                })
3878                .transpose()
3879        }
3880
3881        match array.data_type() {
3882            DataType::List(_) => array.as_list::<i32>().iter().map(map_element).collect(),
3883            DataType::LargeList(_) => {
3884                array.as_list::<i64>().iter().map(map_element).collect()
3885            }
3886            DataType::ListView(_) => array
3887                .as_list_view::<i32>()
3888                .iter()
3889                .map(map_element)
3890                .collect(),
3891            DataType::LargeListView(_) => array
3892                .as_list_view::<i64>()
3893                .iter()
3894                .map(map_element)
3895                .collect(),
3896            _ => _internal_err!(
3897                "ScalarValue::convert_array_to_scalar_vec input must be a List/LargeList/ListView/LargeListView type"
3898            ),
3899        }
3900    }
3901
3902    #[deprecated(
3903        since = "46.0.0",
3904        note = "This function is obsolete. Use `to_array` instead"
3905    )]
3906    pub fn raw_data(&self) -> Result<ArrayRef> {
3907        match self {
3908            ScalarValue::List(arr) => Ok(arr.to_owned()),
3909            _ => _internal_err!("ScalarValue is not a list"),
3910        }
3911    }
3912
3913    /// Converts a value in `array` at `index` into a ScalarValue
3914    pub fn try_from_array(array: &dyn Array, index: usize) -> Result<Self> {
3915        // handle NULL value
3916        if array.is_null(index) {
3917            return array.data_type().try_into();
3918        }
3919
3920        Ok(match array.data_type() {
3921            DataType::Null => ScalarValue::Null,
3922            DataType::Decimal32(precision, scale) => {
3923                ScalarValue::get_decimal_value_from_array(
3924                    array, index, *precision, *scale,
3925                )?
3926            }
3927            DataType::Decimal64(precision, scale) => {
3928                ScalarValue::get_decimal_value_from_array(
3929                    array, index, *precision, *scale,
3930                )?
3931            }
3932            DataType::Decimal128(precision, scale) => {
3933                ScalarValue::get_decimal_value_from_array(
3934                    array, index, *precision, *scale,
3935                )?
3936            }
3937            DataType::Decimal256(precision, scale) => {
3938                ScalarValue::get_decimal_value_from_array(
3939                    array, index, *precision, *scale,
3940                )?
3941            }
3942            DataType::Boolean => typed_cast!(array, index, as_boolean_array, Boolean)?,
3943            DataType::Float64 => typed_cast!(array, index, as_float64_array, Float64)?,
3944            DataType::Float32 => typed_cast!(array, index, as_float32_array, Float32)?,
3945            DataType::Float16 => typed_cast!(array, index, as_float16_array, Float16)?,
3946            DataType::UInt64 => typed_cast!(array, index, as_uint64_array, UInt64)?,
3947            DataType::UInt32 => typed_cast!(array, index, as_uint32_array, UInt32)?,
3948            DataType::UInt16 => typed_cast!(array, index, as_uint16_array, UInt16)?,
3949            DataType::UInt8 => typed_cast!(array, index, as_uint8_array, UInt8)?,
3950            DataType::Int64 => typed_cast!(array, index, as_int64_array, Int64)?,
3951            DataType::Int32 => typed_cast!(array, index, as_int32_array, Int32)?,
3952            DataType::Int16 => typed_cast!(array, index, as_int16_array, Int16)?,
3953            DataType::Int8 => typed_cast!(array, index, as_int8_array, Int8)?,
3954            DataType::Binary => typed_cast!(array, index, as_binary_array, Binary)?,
3955            DataType::LargeBinary => {
3956                typed_cast!(array, index, as_large_binary_array, LargeBinary)?
3957            }
3958            DataType::BinaryView => {
3959                typed_cast!(array, index, as_binary_view_array, BinaryView)?
3960            }
3961            DataType::Utf8 => typed_cast!(array, index, as_string_array, Utf8)?,
3962            DataType::LargeUtf8 => {
3963                typed_cast!(array, index, as_large_string_array, LargeUtf8)?
3964            }
3965            DataType::Utf8View => {
3966                typed_cast!(array, index, as_string_view_array, Utf8View)?
3967            }
3968            DataType::List(field) => {
3969                let list_array = array.as_list::<i32>();
3970                let nested_array = list_array.value(index);
3971                // Produces a single element `ListArray` with the value at `index`.
3972                SingleRowListArrayBuilder::new(nested_array)
3973                    .with_field(field)
3974                    .build_list_scalar()
3975            }
3976            DataType::LargeList(field) => {
3977                let list_array = as_large_list_array(array)?;
3978                let nested_array = list_array.value(index);
3979                // Produces a single element `LargeListArray` with the value at `index`.
3980                SingleRowListArrayBuilder::new(nested_array)
3981                    .with_field(field)
3982                    .build_large_list_scalar()
3983            }
3984            // TODO: There is no test for FixedSizeList now, add it later
3985            DataType::FixedSizeList(field, _) => {
3986                let list_array = as_fixed_size_list_array(array)?;
3987                let nested_array = list_array.value(index);
3988                // Produces a single element `FixedSizeListArray` with the value at `index`.
3989                let list_size = nested_array.len();
3990                SingleRowListArrayBuilder::new(nested_array)
3991                    .with_field(field)
3992                    .build_fixed_size_list_scalar(list_size)
3993            }
3994            DataType::ListView(field) => {
3995                let list_array = as_list_view_array(array)?;
3996                let nested_array = list_array.value(index);
3997                // Produces a single element `ListViewArray` with the value at `index`.
3998                SingleRowListArrayBuilder::new(nested_array)
3999                    .with_field(field)
4000                    .build_list_view_scalar()
4001            }
4002            DataType::LargeListView(field) => {
4003                let list_array = as_large_list_view_array(array)?;
4004                let nested_array = list_array.value(index);
4005                // Produces a single element `LargeListViewArray` with the value at `index`.
4006                SingleRowListArrayBuilder::new(nested_array)
4007                    .with_field(field)
4008                    .build_large_list_view_scalar()
4009            }
4010            DataType::Date32 => typed_cast!(array, index, as_date32_array, Date32)?,
4011            DataType::Date64 => typed_cast!(array, index, as_date64_array, Date64)?,
4012            DataType::Time32(TimeUnit::Second) => {
4013                typed_cast!(array, index, as_time32_second_array, Time32Second)?
4014            }
4015            DataType::Time32(TimeUnit::Millisecond) => {
4016                typed_cast!(array, index, as_time32_millisecond_array, Time32Millisecond)?
4017            }
4018            DataType::Time64(TimeUnit::Microsecond) => {
4019                typed_cast!(array, index, as_time64_microsecond_array, Time64Microsecond)?
4020            }
4021            DataType::Time64(TimeUnit::Nanosecond) => {
4022                typed_cast!(array, index, as_time64_nanosecond_array, Time64Nanosecond)?
4023            }
4024            DataType::Timestamp(TimeUnit::Second, tz_opt) => typed_cast_tz!(
4025                array,
4026                index,
4027                as_timestamp_second_array,
4028                TimestampSecond,
4029                tz_opt
4030            )?,
4031            DataType::Timestamp(TimeUnit::Millisecond, tz_opt) => typed_cast_tz!(
4032                array,
4033                index,
4034                as_timestamp_millisecond_array,
4035                TimestampMillisecond,
4036                tz_opt
4037            )?,
4038            DataType::Timestamp(TimeUnit::Microsecond, tz_opt) => typed_cast_tz!(
4039                array,
4040                index,
4041                as_timestamp_microsecond_array,
4042                TimestampMicrosecond,
4043                tz_opt
4044            )?,
4045            DataType::Timestamp(TimeUnit::Nanosecond, tz_opt) => typed_cast_tz!(
4046                array,
4047                index,
4048                as_timestamp_nanosecond_array,
4049                TimestampNanosecond,
4050                tz_opt
4051            )?,
4052            DataType::Dictionary(key_type, _) => {
4053                let (values_array, values_index) = match key_type.as_ref() {
4054                    DataType::Int8 => get_dict_value::<Int8Type>(array, index)?,
4055                    DataType::Int16 => get_dict_value::<Int16Type>(array, index)?,
4056                    DataType::Int32 => get_dict_value::<Int32Type>(array, index)?,
4057                    DataType::Int64 => get_dict_value::<Int64Type>(array, index)?,
4058                    DataType::UInt8 => get_dict_value::<UInt8Type>(array, index)?,
4059                    DataType::UInt16 => get_dict_value::<UInt16Type>(array, index)?,
4060                    DataType::UInt32 => get_dict_value::<UInt32Type>(array, index)?,
4061                    DataType::UInt64 => get_dict_value::<UInt64Type>(array, index)?,
4062                    _ => unreachable!("Invalid dictionary keys type: {}", key_type),
4063                };
4064                // look up the index in the values dictionary
4065                let value = match values_index {
4066                    Some(values_index) => {
4067                        ScalarValue::try_from_array(values_array, values_index)
4068                    }
4069                    // else entry was null, so return null
4070                    None => values_array.data_type().try_into(),
4071                }?;
4072
4073                Self::Dictionary(key_type.clone(), Box::new(value))
4074            }
4075            DataType::RunEndEncoded(run_ends_field, value_field) => {
4076                // Explicitly check length here since get_physical_index() doesn't
4077                // bound check for us
4078                if index > array.len() {
4079                    return _exec_err!(
4080                        "Index {index} out of bounds for array of length {}",
4081                        array.len()
4082                    );
4083                }
4084                let scalar = downcast_run_array!(
4085                    array => {
4086                        let index = array.get_physical_index(index);
4087                        ScalarValue::try_from_array(array.values(), index)?
4088                    },
4089                    dt => unreachable!("Invalid run-ends type: {dt}")
4090                );
4091                Self::RunEndEncoded(
4092                    Arc::clone(run_ends_field),
4093                    Arc::clone(value_field),
4094                    Box::new(scalar),
4095                )
4096            }
4097            DataType::Struct(_) => {
4098                let a = array.slice(index, 1);
4099                Self::Struct(Arc::new(a.as_struct().to_owned()))
4100            }
4101            DataType::FixedSizeBinary(_) => {
4102                let array = as_fixed_size_binary_array(array)?;
4103                let size = match array.data_type() {
4104                    DataType::FixedSizeBinary(size) => *size,
4105                    _ => unreachable!(),
4106                };
4107                ScalarValue::FixedSizeBinary(
4108                    size,
4109                    match array.is_null(index) {
4110                        true => None,
4111                        false => Some(array.value(index).into()),
4112                    },
4113                )
4114            }
4115            DataType::Interval(IntervalUnit::DayTime) => {
4116                typed_cast!(array, index, as_interval_dt_array, IntervalDayTime)?
4117            }
4118            DataType::Interval(IntervalUnit::YearMonth) => {
4119                typed_cast!(array, index, as_interval_ym_array, IntervalYearMonth)?
4120            }
4121            DataType::Interval(IntervalUnit::MonthDayNano) => {
4122                typed_cast!(array, index, as_interval_mdn_array, IntervalMonthDayNano)?
4123            }
4124
4125            DataType::Duration(TimeUnit::Second) => {
4126                typed_cast!(array, index, as_duration_second_array, DurationSecond)?
4127            }
4128            DataType::Duration(TimeUnit::Millisecond) => typed_cast!(
4129                array,
4130                index,
4131                as_duration_millisecond_array,
4132                DurationMillisecond
4133            )?,
4134            DataType::Duration(TimeUnit::Microsecond) => typed_cast!(
4135                array,
4136                index,
4137                as_duration_microsecond_array,
4138                DurationMicrosecond
4139            )?,
4140            DataType::Duration(TimeUnit::Nanosecond) => typed_cast!(
4141                array,
4142                index,
4143                as_duration_nanosecond_array,
4144                DurationNanosecond
4145            )?,
4146            DataType::Map(_, _) => {
4147                let a = array.slice(index, 1);
4148                Self::Map(Arc::new(a.as_map().to_owned()))
4149            }
4150            DataType::Union(fields, mode) => {
4151                let array = as_union_array(array)?;
4152                let ti = array.type_id(index);
4153                let index = array.value_offset(index);
4154                let value = ScalarValue::try_from_array(array.child(ti), index)?;
4155                ScalarValue::Union(Some((ti, Box::new(value))), fields.clone(), *mode)
4156            }
4157            other => {
4158                return _not_impl_err!(
4159                    "Can't create a scalar from array of type \"{other:?}\""
4160                );
4161            }
4162        })
4163    }
4164
4165    /// Try to parse `value` into a ScalarValue of type `target_type`
4166    pub fn try_from_string(value: String, target_type: &DataType) -> Result<Self> {
4167        ScalarValue::from(value).cast_to(target_type)
4168    }
4169
4170    /// Returns the Some(`&str`) representation of `ScalarValue` of logical string type
4171    ///
4172    /// Returns `None` if this `ScalarValue` is not a logical string type or the
4173    /// `ScalarValue` represents the `NULL` value.
4174    ///
4175    /// Note you can use [`Option::flatten`] to check for non null logical
4176    /// strings.
4177    ///
4178    /// For example, [`ScalarValue::Utf8`], [`ScalarValue::LargeUtf8`], and
4179    /// [`ScalarValue::Dictionary`] with a logical string value and store
4180    /// strings and can be accessed as `&str` using this method.
4181    ///
4182    /// # Example: logical strings
4183    /// ```
4184    /// # use datafusion_common::ScalarValue;
4185    /// /// non strings return None
4186    /// let scalar = ScalarValue::from(42);
4187    /// assert_eq!(scalar.try_as_str(), None);
4188    /// // Non null logical string returns Some(Some(&str))
4189    /// let scalar = ScalarValue::from("hello");
4190    /// assert_eq!(scalar.try_as_str(), Some(Some("hello")));
4191    /// // Null logical string returns Some(None)
4192    /// let scalar = ScalarValue::Utf8(None);
4193    /// assert_eq!(scalar.try_as_str(), Some(None));
4194    /// ```
4195    ///
4196    /// # Example: use [`Option::flatten`] to check for non-null logical strings
4197    /// ```
4198    /// # use datafusion_common::ScalarValue;
4199    /// // Non null logical string returns Some(Some(&str))
4200    /// let scalar = ScalarValue::from("hello");
4201    /// assert_eq!(scalar.try_as_str().flatten(), Some("hello"));
4202    /// ```
4203    pub fn try_as_str(&self) -> Option<Option<&str>> {
4204        let v = match self {
4205            ScalarValue::Utf8(v) => v,
4206            ScalarValue::LargeUtf8(v) => v,
4207            ScalarValue::Utf8View(v) => v,
4208            ScalarValue::Dictionary(_, v) => return v.try_as_str(),
4209            ScalarValue::RunEndEncoded(_, _, v) => return v.try_as_str(),
4210            _ => return None,
4211        };
4212        Some(v.as_ref().map(|v| v.as_str()))
4213    }
4214
4215    /// Try to cast this value to a ScalarValue of type `data_type`
4216    pub fn cast_to(&self, target_type: &DataType) -> Result<Self> {
4217        self.cast_to_with_options(target_type, &DEFAULT_CAST_OPTIONS)
4218    }
4219
4220    /// Try to cast this value to a ScalarValue of type `data_type` with [`CastOptions`]
4221    pub fn cast_to_with_options(
4222        &self,
4223        target_type: &DataType,
4224        cast_options: &CastOptions<'static>,
4225    ) -> Result<Self> {
4226        let source_type = self.data_type();
4227        if let Some(multiplier) = date_to_timestamp_multiplier(&source_type, target_type)
4228            && let Some(value) = self.date_scalar_value_as_i64()
4229        {
4230            ensure_timestamp_in_bounds(value, multiplier, &source_type, target_type)?;
4231        }
4232
4233        let scalar_array = self.to_array()?;
4234
4235        // For types that contain structs (including nested inside Lists, Dictionaries,
4236        // etc.), use name-based casting logic that matches struct fields by name and
4237        // recursively casts nested structs.
4238        let cast_arr = if crate::nested_struct::requires_nested_struct_cast(
4239            scalar_array.data_type(),
4240            target_type,
4241        ) {
4242            crate::nested_struct::cast_column(&scalar_array, target_type, cast_options)?
4243        } else {
4244            cast_with_options(&scalar_array, target_type, cast_options)?
4245        };
4246
4247        ScalarValue::try_from_array(&cast_arr, 0)
4248    }
4249
4250    fn date_scalar_value_as_i64(&self) -> Option<i64> {
4251        match self {
4252            ScalarValue::Date32(Some(value)) => Some(i64::from(*value)),
4253            ScalarValue::Date64(Some(value)) => Some(*value),
4254            _ => None,
4255        }
4256    }
4257
4258    fn eq_array_decimal32(
4259        array: &ArrayRef,
4260        index: usize,
4261        value: Option<&i32>,
4262        precision: u8,
4263        scale: i8,
4264    ) -> Result<bool> {
4265        let array = as_decimal32_array(array)?;
4266        if array.precision() != precision || array.scale() != scale {
4267            return Ok(false);
4268        }
4269        let is_null = array.is_null(index);
4270        if let Some(v) = value {
4271            Ok(!array.is_null(index) && array.value(index) == *v)
4272        } else {
4273            Ok(is_null)
4274        }
4275    }
4276
4277    fn eq_array_decimal64(
4278        array: &ArrayRef,
4279        index: usize,
4280        value: Option<&i64>,
4281        precision: u8,
4282        scale: i8,
4283    ) -> Result<bool> {
4284        let array = as_decimal64_array(array)?;
4285        if array.precision() != precision || array.scale() != scale {
4286            return Ok(false);
4287        }
4288        let is_null = array.is_null(index);
4289        if let Some(v) = value {
4290            Ok(!array.is_null(index) && array.value(index) == *v)
4291        } else {
4292            Ok(is_null)
4293        }
4294    }
4295
4296    fn eq_array_decimal(
4297        array: &ArrayRef,
4298        index: usize,
4299        value: Option<&i128>,
4300        precision: u8,
4301        scale: i8,
4302    ) -> Result<bool> {
4303        let array = as_decimal128_array(array)?;
4304        if array.precision() != precision || array.scale() != scale {
4305            return Ok(false);
4306        }
4307        let is_null = array.is_null(index);
4308        if let Some(v) = value {
4309            Ok(!array.is_null(index) && array.value(index) == *v)
4310        } else {
4311            Ok(is_null)
4312        }
4313    }
4314
4315    fn eq_array_decimal256(
4316        array: &ArrayRef,
4317        index: usize,
4318        value: Option<&i256>,
4319        precision: u8,
4320        scale: i8,
4321    ) -> Result<bool> {
4322        let array = as_decimal256_array(array)?;
4323        if array.precision() != precision || array.scale() != scale {
4324            return Ok(false);
4325        }
4326        let is_null = array.is_null(index);
4327        if let Some(v) = value {
4328            Ok(!array.is_null(index) && array.value(index) == *v)
4329        } else {
4330            Ok(is_null)
4331        }
4332    }
4333
4334    /// Compares a single row of array @ index for equality with self,
4335    /// in an optimized fashion.
4336    ///
4337    /// This method implements an optimized version of:
4338    ///
4339    /// ```text
4340    ///     let arr_scalar = Self::try_from_array(array, index).unwrap();
4341    ///     arr_scalar.eq(self)
4342    /// ```
4343    ///
4344    /// *Performance note*: the arrow compute kernels should be
4345    /// preferred over this function if at all possible as they can be
4346    /// vectorized and are generally much faster.
4347    ///
4348    /// This function has a few narrow use cases such as hash table key
4349    /// comparisons where comparing a single row at a time is necessary.
4350    ///
4351    /// # Errors
4352    ///
4353    /// Errors if
4354    /// - it fails to downcast `array` to the data type of `self`
4355    /// - `self` is a `Struct`
4356    ///
4357    /// # Panics
4358    ///
4359    /// Panics if `self` is a dictionary with invalid key type
4360    #[inline]
4361    pub fn eq_array(&self, array: &ArrayRef, index: usize) -> Result<bool> {
4362        Ok(match self {
4363            ScalarValue::Decimal32(v, precision, scale) => {
4364                ScalarValue::eq_array_decimal32(
4365                    array,
4366                    index,
4367                    v.as_ref(),
4368                    *precision,
4369                    *scale,
4370                )?
4371            }
4372            ScalarValue::Decimal64(v, precision, scale) => {
4373                ScalarValue::eq_array_decimal64(
4374                    array,
4375                    index,
4376                    v.as_ref(),
4377                    *precision,
4378                    *scale,
4379                )?
4380            }
4381            ScalarValue::Decimal128(v, precision, scale) => {
4382                ScalarValue::eq_array_decimal(
4383                    array,
4384                    index,
4385                    v.as_ref(),
4386                    *precision,
4387                    *scale,
4388                )?
4389            }
4390            ScalarValue::Decimal256(v, precision, scale) => {
4391                ScalarValue::eq_array_decimal256(
4392                    array,
4393                    index,
4394                    v.as_ref(),
4395                    *precision,
4396                    *scale,
4397                )?
4398            }
4399            ScalarValue::Boolean(val) => {
4400                eq_array_primitive!(array, index, as_boolean_array, val)?
4401            }
4402            ScalarValue::Float16(val) => {
4403                eq_array_primitive!(array, index, as_float16_array, val)?
4404            }
4405            ScalarValue::Float32(val) => {
4406                eq_array_primitive!(array, index, as_float32_array, val)?
4407            }
4408            ScalarValue::Float64(val) => {
4409                eq_array_primitive!(array, index, as_float64_array, val)?
4410            }
4411            ScalarValue::Int8(val) => {
4412                eq_array_primitive!(array, index, as_int8_array, val)?
4413            }
4414            ScalarValue::Int16(val) => {
4415                eq_array_primitive!(array, index, as_int16_array, val)?
4416            }
4417            ScalarValue::Int32(val) => {
4418                eq_array_primitive!(array, index, as_int32_array, val)?
4419            }
4420            ScalarValue::Int64(val) => {
4421                eq_array_primitive!(array, index, as_int64_array, val)?
4422            }
4423            ScalarValue::UInt8(val) => {
4424                eq_array_primitive!(array, index, as_uint8_array, val)?
4425            }
4426            ScalarValue::UInt16(val) => {
4427                eq_array_primitive!(array, index, as_uint16_array, val)?
4428            }
4429            ScalarValue::UInt32(val) => {
4430                eq_array_primitive!(array, index, as_uint32_array, val)?
4431            }
4432            ScalarValue::UInt64(val) => {
4433                eq_array_primitive!(array, index, as_uint64_array, val)?
4434            }
4435            ScalarValue::Utf8(val) => {
4436                eq_array_primitive!(array, index, as_string_array, val)?
4437            }
4438            ScalarValue::Utf8View(val) => {
4439                eq_array_primitive!(array, index, as_string_view_array, val)?
4440            }
4441            ScalarValue::LargeUtf8(val) => {
4442                eq_array_primitive!(array, index, as_large_string_array, val)?
4443            }
4444            ScalarValue::Binary(val) => {
4445                eq_array_primitive!(array, index, as_binary_array, val)?
4446            }
4447            ScalarValue::BinaryView(val) => {
4448                eq_array_primitive!(array, index, as_binary_view_array, val)?
4449            }
4450            ScalarValue::FixedSizeBinary(_, val) => {
4451                eq_array_primitive!(array, index, as_fixed_size_binary_array, val)?
4452            }
4453            ScalarValue::LargeBinary(val) => {
4454                eq_array_primitive!(array, index, as_large_binary_array, val)?
4455            }
4456            ScalarValue::List(arr) => {
4457                Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
4458            }
4459            ScalarValue::LargeList(arr) => {
4460                Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
4461            }
4462            ScalarValue::FixedSizeList(arr) => {
4463                Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
4464            }
4465            ScalarValue::ListView(arr) => {
4466                Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
4467            }
4468            ScalarValue::LargeListView(arr) => {
4469                Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
4470            }
4471            ScalarValue::Struct(arr) => {
4472                Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
4473            }
4474            ScalarValue::Map(arr) => {
4475                Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
4476            }
4477            ScalarValue::Date32(val) => {
4478                eq_array_primitive!(array, index, as_date32_array, val)?
4479            }
4480            ScalarValue::Date64(val) => {
4481                eq_array_primitive!(array, index, as_date64_array, val)?
4482            }
4483            ScalarValue::Time32Second(val) => {
4484                eq_array_primitive!(array, index, as_time32_second_array, val)?
4485            }
4486            ScalarValue::Time32Millisecond(val) => {
4487                eq_array_primitive!(array, index, as_time32_millisecond_array, val)?
4488            }
4489            ScalarValue::Time64Microsecond(val) => {
4490                eq_array_primitive!(array, index, as_time64_microsecond_array, val)?
4491            }
4492            ScalarValue::Time64Nanosecond(val) => {
4493                eq_array_primitive!(array, index, as_time64_nanosecond_array, val)?
4494            }
4495            ScalarValue::TimestampSecond(val, _) => {
4496                eq_array_primitive!(array, index, as_timestamp_second_array, val)?
4497            }
4498            ScalarValue::TimestampMillisecond(val, _) => {
4499                eq_array_primitive!(array, index, as_timestamp_millisecond_array, val)?
4500            }
4501            ScalarValue::TimestampMicrosecond(val, _) => {
4502                eq_array_primitive!(array, index, as_timestamp_microsecond_array, val)?
4503            }
4504            ScalarValue::TimestampNanosecond(val, _) => {
4505                eq_array_primitive!(array, index, as_timestamp_nanosecond_array, val)?
4506            }
4507            ScalarValue::IntervalYearMonth(val) => {
4508                eq_array_primitive!(array, index, as_interval_ym_array, val)?
4509            }
4510            ScalarValue::IntervalDayTime(val) => {
4511                eq_array_primitive!(array, index, as_interval_dt_array, val)?
4512            }
4513            ScalarValue::IntervalMonthDayNano(val) => {
4514                eq_array_primitive!(array, index, as_interval_mdn_array, val)?
4515            }
4516            ScalarValue::DurationSecond(val) => {
4517                eq_array_primitive!(array, index, as_duration_second_array, val)?
4518            }
4519            ScalarValue::DurationMillisecond(val) => {
4520                eq_array_primitive!(array, index, as_duration_millisecond_array, val)?
4521            }
4522            ScalarValue::DurationMicrosecond(val) => {
4523                eq_array_primitive!(array, index, as_duration_microsecond_array, val)?
4524            }
4525            ScalarValue::DurationNanosecond(val) => {
4526                eq_array_primitive!(array, index, as_duration_nanosecond_array, val)?
4527            }
4528            ScalarValue::Union(value, _, _) => {
4529                let array = as_union_array(array)?;
4530                let ti = array.type_id(index);
4531                let index = array.value_offset(index);
4532                if let Some((ti_v, value)) = value {
4533                    ti_v == &ti && value.eq_array(array.child(ti), index)?
4534                } else {
4535                    array.child(ti).is_null(index)
4536                }
4537            }
4538            ScalarValue::Dictionary(key_type, v) => {
4539                let (values_array, values_index) = match key_type.as_ref() {
4540                    DataType::Int8 => get_dict_value::<Int8Type>(array, index)?,
4541                    DataType::Int16 => get_dict_value::<Int16Type>(array, index)?,
4542                    DataType::Int32 => get_dict_value::<Int32Type>(array, index)?,
4543                    DataType::Int64 => get_dict_value::<Int64Type>(array, index)?,
4544                    DataType::UInt8 => get_dict_value::<UInt8Type>(array, index)?,
4545                    DataType::UInt16 => get_dict_value::<UInt16Type>(array, index)?,
4546                    DataType::UInt32 => get_dict_value::<UInt32Type>(array, index)?,
4547                    DataType::UInt64 => get_dict_value::<UInt64Type>(array, index)?,
4548                    _ => unreachable!("Invalid dictionary keys type: {}", key_type),
4549                };
4550                // was the value in the array non null?
4551                match values_index {
4552                    Some(values_index) => v.eq_array(values_array, values_index)?,
4553                    None => v.is_null(),
4554                }
4555            }
4556            ScalarValue::RunEndEncoded(run_ends_field, _, value) => {
4557                // Explicitly check length here since get_physical_index() doesn't
4558                // bound check for us
4559                if index > array.len() {
4560                    return _exec_err!(
4561                        "Index {index} out of bounds for array of length {}",
4562                        array.len()
4563                    );
4564                }
4565                match run_ends_field.data_type() {
4566                    DataType::Int16 => {
4567                        let array = as_run_array::<Int16Type>(array)?;
4568                        let index = array.get_physical_index(index);
4569                        value.eq_array(array.values(), index)?
4570                    }
4571                    DataType::Int32 => {
4572                        let array = as_run_array::<Int32Type>(array)?;
4573                        let index = array.get_physical_index(index);
4574                        value.eq_array(array.values(), index)?
4575                    }
4576                    DataType::Int64 => {
4577                        let array = as_run_array::<Int64Type>(array)?;
4578                        let index = array.get_physical_index(index);
4579                        value.eq_array(array.values(), index)?
4580                    }
4581                    dt => unreachable!("Invalid run-ends type: {dt}"),
4582                }
4583            }
4584            ScalarValue::Null => array.is_null(index),
4585        })
4586    }
4587
4588    fn eq_array_list(arr1: &ArrayRef, arr2: &ArrayRef, index: usize) -> bool {
4589        let right = arr2.slice(index, 1);
4590        arr1 == &right
4591    }
4592
4593    /// Compare `self` with `other` and return an `Ordering`.
4594    ///
4595    /// This is the same as [`PartialOrd`] except that it returns
4596    /// `Err` if the values cannot be compared, e.g., they have incompatible data types.
4597    pub fn try_cmp(&self, other: &Self) -> Result<Ordering> {
4598        self.partial_cmp(other).ok_or_else(|| {
4599            _internal_datafusion_err!("Uncomparable values: {self:?}, {other:?}")
4600        })
4601    }
4602
4603    /// Estimate size if bytes including `Self`. For values with internal containers such as `String`
4604    /// includes the allocated size (`capacity`) rather than the current length (`len`)
4605    pub fn size(&self) -> usize {
4606        size_of_val(self)
4607            + match self {
4608                ScalarValue::Null
4609                | ScalarValue::Boolean(_)
4610                | ScalarValue::Float16(_)
4611                | ScalarValue::Float32(_)
4612                | ScalarValue::Float64(_)
4613                | ScalarValue::Decimal32(_, _, _)
4614                | ScalarValue::Decimal64(_, _, _)
4615                | ScalarValue::Decimal128(_, _, _)
4616                | ScalarValue::Decimal256(_, _, _)
4617                | ScalarValue::Int8(_)
4618                | ScalarValue::Int16(_)
4619                | ScalarValue::Int32(_)
4620                | ScalarValue::Int64(_)
4621                | ScalarValue::UInt8(_)
4622                | ScalarValue::UInt16(_)
4623                | ScalarValue::UInt32(_)
4624                | ScalarValue::UInt64(_)
4625                | ScalarValue::Date32(_)
4626                | ScalarValue::Date64(_)
4627                | ScalarValue::Time32Second(_)
4628                | ScalarValue::Time32Millisecond(_)
4629                | ScalarValue::Time64Microsecond(_)
4630                | ScalarValue::Time64Nanosecond(_)
4631                | ScalarValue::IntervalYearMonth(_)
4632                | ScalarValue::IntervalDayTime(_)
4633                | ScalarValue::IntervalMonthDayNano(_)
4634                | ScalarValue::DurationSecond(_)
4635                | ScalarValue::DurationMillisecond(_)
4636                | ScalarValue::DurationMicrosecond(_)
4637                | ScalarValue::DurationNanosecond(_) => 0,
4638                ScalarValue::Utf8(s)
4639                | ScalarValue::LargeUtf8(s)
4640                | ScalarValue::Utf8View(s) => {
4641                    s.as_ref().map(|s| s.capacity()).unwrap_or_default()
4642                }
4643                ScalarValue::TimestampSecond(_, s)
4644                | ScalarValue::TimestampMillisecond(_, s)
4645                | ScalarValue::TimestampMicrosecond(_, s)
4646                | ScalarValue::TimestampNanosecond(_, s) => {
4647                    s.as_ref().map(|s| s.len()).unwrap_or_default()
4648                }
4649                ScalarValue::Binary(b)
4650                | ScalarValue::FixedSizeBinary(_, b)
4651                | ScalarValue::LargeBinary(b)
4652                | ScalarValue::BinaryView(b) => {
4653                    b.as_ref().map(|b| b.capacity()).unwrap_or_default()
4654                }
4655                ScalarValue::List(arr) => arr.get_array_memory_size(),
4656                ScalarValue::LargeList(arr) => arr.get_array_memory_size(),
4657                ScalarValue::FixedSizeList(arr) => arr.get_array_memory_size(),
4658                ScalarValue::ListView(arr) => arr.get_array_memory_size(),
4659                ScalarValue::LargeListView(arr) => arr.get_array_memory_size(),
4660                ScalarValue::Struct(arr) => arr.get_array_memory_size(),
4661                ScalarValue::Map(arr) => arr.get_array_memory_size(),
4662                ScalarValue::Union(vals, fields, _mode) => {
4663                    vals.as_ref()
4664                        .map(|(_id, sv)| sv.size() - size_of_val(sv))
4665                        .unwrap_or_default()
4666                        // `fields` is boxed, so it is NOT already included in `self`
4667                        + size_of_val(fields)
4668                        + (size_of::<Field>() * fields.len())
4669                        + fields.iter().map(|(_idx, field)| field.size() - size_of_val(field)).sum::<usize>()
4670                }
4671                ScalarValue::Dictionary(dt, sv) => {
4672                    // `dt` and `sv` are boxed, so they are NOT already included in `self`
4673                    dt.size() + sv.size()
4674                }
4675                ScalarValue::RunEndEncoded(rf, vf, v) => rf.size() + vf.size() + v.size(),
4676            }
4677    }
4678
4679    /// Estimates [size](Self::size) of [`Vec`] in bytes.
4680    ///
4681    /// Includes the size of the [`Vec`] container itself.
4682    pub fn size_of_vec(vec: &Vec<Self>) -> usize {
4683        size_of_val(vec)
4684            + (size_of::<ScalarValue>() * vec.capacity())
4685            + vec
4686                .iter()
4687                .map(|sv| sv.size() - size_of_val(sv))
4688                .sum::<usize>()
4689    }
4690
4691    /// Estimates [size](Self::size) of [`VecDeque`] in bytes.
4692    ///
4693    /// Includes the size of the [`VecDeque`] container itself.
4694    pub fn size_of_vec_deque(vec_deque: &VecDeque<Self>) -> usize {
4695        size_of_val(vec_deque)
4696            + (size_of::<ScalarValue>() * vec_deque.capacity())
4697            + vec_deque
4698                .iter()
4699                .map(|sv| sv.size() - size_of_val(sv))
4700                .sum::<usize>()
4701    }
4702
4703    /// Estimates [size](Self::size) of [`HashSet`] in bytes.
4704    ///
4705    /// Includes the size of the [`HashSet`] container itself.
4706    #[allow(clippy::allow_attributes, clippy::mutable_key_type)] // ScalarValue has interior mutability but is intentionally used as hash key
4707    pub fn size_of_hashset<S>(set: &HashSet<Self, S>) -> usize {
4708        size_of_val(set)
4709            + (size_of::<ScalarValue>() * set.capacity())
4710            + set
4711                .iter()
4712                .map(|sv| sv.size() - size_of_val(sv))
4713                .sum::<usize>()
4714    }
4715
4716    /// Compacts the allocation referenced by `self` to the minimum, copying the data if
4717    /// necessary.
4718    ///
4719    /// This can be relevant when `self` is a list or contains a list as a nested value, as
4720    /// a single list holds an Arc to its entire original array buffer.
4721    pub fn compact(&mut self) {
4722        // copy_array_data + compact_view_buffers + downcast back, all in one step.
4723        macro_rules! compact_array {
4724            ($arr:expr, $from_type:ty, $($as_method:tt)+) => {
4725                *Arc::make_mut($arr) = ScalarValue::compact_view_buffers(
4726                    Arc::new(<$from_type>::from(copy_array_data(&$arr.to_data()))) as ArrayRef,
4727                ).$($as_method)+.clone()
4728            };
4729        }
4730        match self {
4731            ScalarValue::Null
4732            | ScalarValue::Boolean(_)
4733            | ScalarValue::Float16(_)
4734            | ScalarValue::Float32(_)
4735            | ScalarValue::Float64(_)
4736            | ScalarValue::Decimal32(_, _, _)
4737            | ScalarValue::Decimal64(_, _, _)
4738            | ScalarValue::Decimal128(_, _, _)
4739            | ScalarValue::Decimal256(_, _, _)
4740            | ScalarValue::Int8(_)
4741            | ScalarValue::Int16(_)
4742            | ScalarValue::Int32(_)
4743            | ScalarValue::Int64(_)
4744            | ScalarValue::UInt8(_)
4745            | ScalarValue::UInt16(_)
4746            | ScalarValue::UInt32(_)
4747            | ScalarValue::UInt64(_)
4748            | ScalarValue::Date32(_)
4749            | ScalarValue::Date64(_)
4750            | ScalarValue::Time32Second(_)
4751            | ScalarValue::Time32Millisecond(_)
4752            | ScalarValue::Time64Microsecond(_)
4753            | ScalarValue::Time64Nanosecond(_)
4754            | ScalarValue::IntervalYearMonth(_)
4755            | ScalarValue::IntervalDayTime(_)
4756            | ScalarValue::IntervalMonthDayNano(_)
4757            | ScalarValue::DurationSecond(_)
4758            | ScalarValue::DurationMillisecond(_)
4759            | ScalarValue::DurationMicrosecond(_)
4760            | ScalarValue::DurationNanosecond(_)
4761            | ScalarValue::Utf8(_)
4762            | ScalarValue::LargeUtf8(_)
4763            | ScalarValue::Utf8View(_)
4764            | ScalarValue::TimestampSecond(_, _)
4765            | ScalarValue::TimestampMillisecond(_, _)
4766            | ScalarValue::TimestampMicrosecond(_, _)
4767            | ScalarValue::TimestampNanosecond(_, _)
4768            | ScalarValue::Binary(_)
4769            | ScalarValue::FixedSizeBinary(_, _)
4770            | ScalarValue::LargeBinary(_)
4771            | ScalarValue::BinaryView(_) => (),
4772            ScalarValue::FixedSizeList(arr) => {
4773                compact_array!(arr, FixedSizeListArray, as_fixed_size_list())
4774            }
4775            ScalarValue::List(arr) => compact_array!(arr, ListArray, as_list::<i32>()),
4776            ScalarValue::LargeList(arr) => {
4777                compact_array!(arr, LargeListArray, as_list::<i64>())
4778            }
4779            ScalarValue::ListView(arr) => {
4780                compact_array!(arr, ListViewArray, as_list_view::<i32>())
4781            }
4782            ScalarValue::LargeListView(arr) => {
4783                compact_array!(arr, LargeListViewArray, as_list_view::<i64>())
4784            }
4785            ScalarValue::Struct(arr) => compact_array!(arr, StructArray, as_struct()),
4786            ScalarValue::Map(arr) => compact_array!(arr, MapArray, as_map()),
4787            ScalarValue::Union(val, _, _) => {
4788                if let Some((_, value)) = val.as_mut() {
4789                    value.compact();
4790                }
4791            }
4792            ScalarValue::Dictionary(_, value) => {
4793                value.compact();
4794            }
4795            ScalarValue::RunEndEncoded(_, _, value) => {
4796                value.compact();
4797            }
4798        }
4799    }
4800
4801    /// Compacts ([ScalarValue::compact]) the current [ScalarValue] and returns it.
4802    pub fn compacted(mut self) -> Self {
4803        self.compact();
4804        self
4805    }
4806
4807    /// Recursively compacts the backing buffers of any [`StringViewArray`] or
4808    /// [`BinaryViewArray`] nested within `array`.
4809    ///
4810    /// View-typed arrays keep an `Arc` reference to their original backing
4811    /// buffers, so a single scalar extracted from a large batch still retains
4812    /// the entire buffer.  Calling [`.gc()`][StringViewArray::gc] copies only
4813    /// the bytes that are actually referenced by the surviving views, releasing
4814    /// the rest.
4815    ///
4816    /// Container types (`List`, `LargeList`, `FixedSizeList`, `ListView`,
4817    /// `LargeListView`, `Struct`, `Map`) are handled by recursing into their
4818    /// child / values arrays and reconstructing the parent with the compacted
4819    /// children.  All other types are returned unchanged.
4820    fn compact_view_buffers(array: ArrayRef) -> ArrayRef {
4821        // Macro for the i32/i64-offset list pair (List / LargeList).
4822        macro_rules! gc_list {
4823            ($field:expr, $offset_type:ty, $array_type:ty) => {{
4824                let list = array.as_list::<$offset_type>();
4825                Arc::new(<$array_type>::new(
4826                    Arc::clone($field),
4827                    list.offsets().clone(),
4828                    ScalarValue::compact_view_buffers(Arc::clone(list.values())),
4829                    list.nulls().cloned(),
4830                )) as ArrayRef
4831            }};
4832        }
4833        // Macro for the i32/i64-offset list-view pair (ListView / LargeListView).
4834        macro_rules! gc_list_view {
4835            ($field:expr, $offset_type:ty, $array_type:ty) => {{
4836                let list = array.as_list_view::<$offset_type>();
4837                Arc::new(<$array_type>::new(
4838                    Arc::clone($field),
4839                    list.offsets().clone(),
4840                    list.sizes().clone(),
4841                    ScalarValue::compact_view_buffers(Arc::clone(list.values())),
4842                    list.nulls().cloned(),
4843                )) as ArrayRef
4844            }};
4845        }
4846
4847        match array.data_type() {
4848            DataType::Utf8View => Arc::new(array.as_string_view().gc()),
4849            DataType::BinaryView => Arc::new(array.as_binary_view().gc()),
4850            DataType::Struct(_) => {
4851                let s = array.as_struct();
4852                let columns = s
4853                    .columns()
4854                    .iter()
4855                    .map(|c| ScalarValue::compact_view_buffers(Arc::clone(c)))
4856                    .collect();
4857                Arc::new(StructArray::new(
4858                    s.fields().clone(),
4859                    columns,
4860                    s.nulls().cloned(),
4861                ))
4862            }
4863            DataType::List(field) => gc_list!(field, i32, ListArray),
4864            DataType::LargeList(field) => gc_list!(field, i64, LargeListArray),
4865            DataType::FixedSizeList(field, size) => {
4866                let list = array.as_fixed_size_list();
4867                Arc::new(FixedSizeListArray::new(
4868                    Arc::clone(field),
4869                    *size,
4870                    ScalarValue::compact_view_buffers(Arc::clone(list.values())),
4871                    list.nulls().cloned(),
4872                ))
4873            }
4874            DataType::ListView(field) => gc_list_view!(field, i32, ListViewArray),
4875            DataType::LargeListView(field) => {
4876                gc_list_view!(field, i64, LargeListViewArray)
4877            }
4878            DataType::Map(field, ordered) => {
4879                let map = array.as_map();
4880                let entries = ScalarValue::compact_view_buffers(Arc::new(
4881                    map.entries().clone(),
4882                )
4883                    as ArrayRef);
4884                Arc::new(MapArray::new(
4885                    Arc::clone(field),
4886                    map.offsets().clone(),
4887                    entries.as_struct().clone(),
4888                    map.nulls().cloned(),
4889                    *ordered,
4890                ))
4891            }
4892            _ => array,
4893        }
4894    }
4895
4896    /// Returns the minimum value for the given numeric `DataType`.
4897    ///
4898    /// This function returns the smallest representable value for numeric
4899    /// and temporal data types. For non-numeric types, it returns `None`.
4900    ///
4901    /// # Supported Types
4902    ///
4903    /// - **Integer types**: `i8::MIN`, `i16::MIN`, etc.
4904    /// - **Unsigned types**: Always 0 (`u8::MIN`, `u16::MIN`, etc.)
4905    /// - **Float types**: Negative infinity (IEEE 754)
4906    /// - **Decimal types**: Smallest value based on precision
4907    /// - **Temporal types**: Minimum timestamp/date values
4908    /// - **Time types**: 0 (midnight)
4909    /// - **Duration types**: `i64::MIN`
4910    pub fn min(datatype: &DataType) -> Option<ScalarValue> {
4911        match datatype {
4912            DataType::Int8 => Some(ScalarValue::Int8(Some(i8::MIN))),
4913            DataType::Int16 => Some(ScalarValue::Int16(Some(i16::MIN))),
4914            DataType::Int32 => Some(ScalarValue::Int32(Some(i32::MIN))),
4915            DataType::Int64 => Some(ScalarValue::Int64(Some(i64::MIN))),
4916            DataType::UInt8 => Some(ScalarValue::UInt8(Some(u8::MIN))),
4917            DataType::UInt16 => Some(ScalarValue::UInt16(Some(u16::MIN))),
4918            DataType::UInt32 => Some(ScalarValue::UInt32(Some(u32::MIN))),
4919            DataType::UInt64 => Some(ScalarValue::UInt64(Some(u64::MIN))),
4920            DataType::Float16 => Some(ScalarValue::Float16(Some(f16::NEG_INFINITY))),
4921            DataType::Float32 => Some(ScalarValue::Float32(Some(f32::NEG_INFINITY))),
4922            DataType::Float64 => Some(ScalarValue::Float64(Some(f64::NEG_INFINITY))),
4923            DataType::Decimal128(precision, scale) => {
4924                // For decimal, min is -10^(precision-scale) + 10^(-scale)
4925                // But for simplicity, we use the minimum i128 value that fits the precision
4926                let max_digits = 10_i128.pow(*precision as u32) - 1;
4927                Some(ScalarValue::Decimal128(
4928                    Some(-max_digits),
4929                    *precision,
4930                    *scale,
4931                ))
4932            }
4933            DataType::Decimal256(precision, scale) => {
4934                // Similar to Decimal128 but with i256
4935                // For now, use a large negative value
4936                let max_digits = i256::from_i128(10_i128)
4937                    .checked_pow(*precision as u32)
4938                    .and_then(|v| v.checked_sub(i256::from_i128(1)))
4939                    .unwrap_or(i256::MAX);
4940                Some(ScalarValue::Decimal256(
4941                    Some(max_digits.neg_wrapping()),
4942                    *precision,
4943                    *scale,
4944                ))
4945            }
4946            DataType::Date32 => Some(ScalarValue::Date32(Some(i32::MIN))),
4947            DataType::Date64 => Some(ScalarValue::Date64(Some(i64::MIN))),
4948            DataType::Time32(TimeUnit::Second) => {
4949                Some(ScalarValue::Time32Second(Some(0)))
4950            }
4951            DataType::Time32(TimeUnit::Millisecond) => {
4952                Some(ScalarValue::Time32Millisecond(Some(0)))
4953            }
4954            DataType::Time64(TimeUnit::Microsecond) => {
4955                Some(ScalarValue::Time64Microsecond(Some(0)))
4956            }
4957            DataType::Time64(TimeUnit::Nanosecond) => {
4958                Some(ScalarValue::Time64Nanosecond(Some(0)))
4959            }
4960            DataType::Timestamp(unit, tz) => match unit {
4961                TimeUnit::Second => {
4962                    Some(ScalarValue::TimestampSecond(Some(i64::MIN), tz.clone()))
4963                }
4964                TimeUnit::Millisecond => Some(ScalarValue::TimestampMillisecond(
4965                    Some(i64::MIN),
4966                    tz.clone(),
4967                )),
4968                TimeUnit::Microsecond => Some(ScalarValue::TimestampMicrosecond(
4969                    Some(i64::MIN),
4970                    tz.clone(),
4971                )),
4972                TimeUnit::Nanosecond => {
4973                    Some(ScalarValue::TimestampNanosecond(Some(i64::MIN), tz.clone()))
4974                }
4975            },
4976            DataType::Duration(unit) => match unit {
4977                TimeUnit::Second => Some(ScalarValue::DurationSecond(Some(i64::MIN))),
4978                TimeUnit::Millisecond => {
4979                    Some(ScalarValue::DurationMillisecond(Some(i64::MIN)))
4980                }
4981                TimeUnit::Microsecond => {
4982                    Some(ScalarValue::DurationMicrosecond(Some(i64::MIN)))
4983                }
4984                TimeUnit::Nanosecond => {
4985                    Some(ScalarValue::DurationNanosecond(Some(i64::MIN)))
4986                }
4987            },
4988            _ => None,
4989        }
4990    }
4991
4992    /// Returns the maximum value for the given numeric `DataType`.
4993    ///
4994    /// This function returns the largest representable value for numeric
4995    /// and temporal data types. For non-numeric types, it returns `None`.
4996    ///
4997    /// # Supported Types
4998    ///
4999    /// - **Integer types**: `i8::MAX`, `i16::MAX`, etc.
5000    /// - **Unsigned types**: `u8::MAX`, `u16::MAX`, etc.
5001    /// - **Float types**: Positive infinity (IEEE 754)
5002    /// - **Decimal types**: Largest value based on precision
5003    /// - **Temporal types**: Maximum timestamp/date values
5004    /// - **Time types**: Maximum time in the day (1 day - 1 unit)
5005    /// - **Duration types**: `i64::MAX`
5006    pub fn max(datatype: &DataType) -> Option<ScalarValue> {
5007        match datatype {
5008            DataType::Int8 => Some(ScalarValue::Int8(Some(i8::MAX))),
5009            DataType::Int16 => Some(ScalarValue::Int16(Some(i16::MAX))),
5010            DataType::Int32 => Some(ScalarValue::Int32(Some(i32::MAX))),
5011            DataType::Int64 => Some(ScalarValue::Int64(Some(i64::MAX))),
5012            DataType::UInt8 => Some(ScalarValue::UInt8(Some(u8::MAX))),
5013            DataType::UInt16 => Some(ScalarValue::UInt16(Some(u16::MAX))),
5014            DataType::UInt32 => Some(ScalarValue::UInt32(Some(u32::MAX))),
5015            DataType::UInt64 => Some(ScalarValue::UInt64(Some(u64::MAX))),
5016            DataType::Float16 => Some(ScalarValue::Float16(Some(f16::INFINITY))),
5017            DataType::Float32 => Some(ScalarValue::Float32(Some(f32::INFINITY))),
5018            DataType::Float64 => Some(ScalarValue::Float64(Some(f64::INFINITY))),
5019            DataType::Decimal128(precision, scale) => {
5020                // For decimal, max is 10^(precision-scale) - 10^(-scale)
5021                // But for simplicity, we use the maximum i128 value that fits the precision
5022                let max_digits = 10_i128.pow(*precision as u32) - 1;
5023                Some(ScalarValue::Decimal128(
5024                    Some(max_digits),
5025                    *precision,
5026                    *scale,
5027                ))
5028            }
5029            DataType::Decimal256(precision, scale) => {
5030                // Similar to Decimal128 but with i256
5031                let max_digits = i256::from_i128(10_i128)
5032                    .checked_pow(*precision as u32)
5033                    .and_then(|v| v.checked_sub(i256::from_i128(1)))
5034                    .unwrap_or(i256::MAX);
5035                Some(ScalarValue::Decimal256(
5036                    Some(max_digits),
5037                    *precision,
5038                    *scale,
5039                ))
5040            }
5041            DataType::Date32 => Some(ScalarValue::Date32(Some(i32::MAX))),
5042            DataType::Date64 => Some(ScalarValue::Date64(Some(i64::MAX))),
5043            DataType::Time32(TimeUnit::Second) => {
5044                // 86399 seconds = 23:59:59
5045                Some(ScalarValue::Time32Second(Some(86_399)))
5046            }
5047            DataType::Time32(TimeUnit::Millisecond) => {
5048                // 86_399_999 milliseconds = 23:59:59.999
5049                Some(ScalarValue::Time32Millisecond(Some(86_399_999)))
5050            }
5051            DataType::Time64(TimeUnit::Microsecond) => {
5052                // 86_399_999_999 microseconds = 23:59:59.999999
5053                Some(ScalarValue::Time64Microsecond(Some(86_399_999_999)))
5054            }
5055            DataType::Time64(TimeUnit::Nanosecond) => {
5056                // 86_399_999_999_999 nanoseconds = 23:59:59.999999999
5057                Some(ScalarValue::Time64Nanosecond(Some(86_399_999_999_999)))
5058            }
5059            DataType::Timestamp(unit, tz) => match unit {
5060                TimeUnit::Second => {
5061                    Some(ScalarValue::TimestampSecond(Some(i64::MAX), tz.clone()))
5062                }
5063                TimeUnit::Millisecond => Some(ScalarValue::TimestampMillisecond(
5064                    Some(i64::MAX),
5065                    tz.clone(),
5066                )),
5067                TimeUnit::Microsecond => Some(ScalarValue::TimestampMicrosecond(
5068                    Some(i64::MAX),
5069                    tz.clone(),
5070                )),
5071                TimeUnit::Nanosecond => {
5072                    Some(ScalarValue::TimestampNanosecond(Some(i64::MAX), tz.clone()))
5073                }
5074            },
5075            DataType::Duration(unit) => match unit {
5076                TimeUnit::Second => Some(ScalarValue::DurationSecond(Some(i64::MAX))),
5077                TimeUnit::Millisecond => {
5078                    Some(ScalarValue::DurationMillisecond(Some(i64::MAX)))
5079                }
5080                TimeUnit::Microsecond => {
5081                    Some(ScalarValue::DurationMicrosecond(Some(i64::MAX)))
5082                }
5083                TimeUnit::Nanosecond => {
5084                    Some(ScalarValue::DurationNanosecond(Some(i64::MAX)))
5085                }
5086            },
5087            _ => None,
5088        }
5089    }
5090
5091    /// A thin wrapper on Arrow's validation that throws internal error if validation
5092    /// fails.
5093    fn validate_decimal_or_internal_err<T: DecimalType>(
5094        precision: u8,
5095        scale: i8,
5096    ) -> Result<()> {
5097        validate_decimal_precision_and_scale::<T>(precision, scale).map_err(|err| {
5098            _internal_datafusion_err!(
5099                "Decimal precision/scale invariant violated \
5100                 (precision={precision}, scale={scale}): {err}"
5101            )
5102        })
5103    }
5104}
5105
5106/// Compacts the data of an `ArrayData` into a new `ArrayData`.
5107///
5108/// This is useful when you want to minimize the memory footprint of an
5109/// `ArrayData`. For example, the value returned by [`Array::slice`] still
5110/// points at the same underlying data buffers as the original array, which may
5111/// hold many more values. Calling `copy_array_data` on the sliced array will
5112/// create a new, smaller, `ArrayData` that only contains the data for the
5113/// sliced array.
5114///
5115/// # Example
5116/// ```
5117/// # use arrow::array::{make_array, Array, Int32Array};
5118/// use datafusion_common::scalar::copy_array_data;
5119/// let array = Int32Array::from_iter_values(0..8192);
5120/// // Take only the first 2 elements
5121/// let sliced_array = array.slice(0, 2);
5122/// // The memory footprint of `sliced_array` is close to 8192 * 4 bytes
5123/// assert_eq!(32864, sliced_array.get_array_memory_size());
5124/// // however, we can copy the data to a new `ArrayData`
5125/// let new_array = make_array(copy_array_data(&sliced_array.into_data()));
5126/// // The memory footprint of `new_array` is now only 2 * 4 bytes
5127/// // and overhead:
5128/// assert_eq!(160, new_array.get_array_memory_size());
5129/// ```
5130///
5131/// See also [`ScalarValue::compact`] which applies to `ScalarValue` instances
5132/// as necessary.
5133pub fn copy_array_data(src_data: &ArrayData) -> ArrayData {
5134    let mut copy = MutableArrayData::new(vec![&src_data], true, src_data.len());
5135    copy.extend(0, 0, src_data.len());
5136    copy.freeze()
5137}
5138
5139macro_rules! impl_scalar {
5140    ($ty:ty, $scalar:tt) => {
5141        impl From<$ty> for ScalarValue {
5142            fn from(value: $ty) -> Self {
5143                ScalarValue::$scalar(Some(value))
5144            }
5145        }
5146
5147        impl From<Option<$ty>> for ScalarValue {
5148            fn from(value: Option<$ty>) -> Self {
5149                ScalarValue::$scalar(value)
5150            }
5151        }
5152    };
5153}
5154
5155impl_scalar!(f64, Float64);
5156impl_scalar!(f32, Float32);
5157impl_scalar!(f16, Float16);
5158impl_scalar!(i8, Int8);
5159impl_scalar!(i16, Int16);
5160impl_scalar!(i32, Int32);
5161impl_scalar!(i64, Int64);
5162impl_scalar!(bool, Boolean);
5163impl_scalar!(u8, UInt8);
5164impl_scalar!(u16, UInt16);
5165impl_scalar!(u32, UInt32);
5166impl_scalar!(u64, UInt64);
5167
5168impl From<&str> for ScalarValue {
5169    fn from(value: &str) -> Self {
5170        Some(value).into()
5171    }
5172}
5173
5174impl From<Option<&str>> for ScalarValue {
5175    fn from(value: Option<&str>) -> Self {
5176        let value = value.map(|s| s.to_string());
5177        value.into()
5178    }
5179}
5180
5181/// Wrapper to create ScalarValue::Struct for convenience
5182impl From<Vec<(&str, ScalarValue)>> for ScalarValue {
5183    fn from(value: Vec<(&str, ScalarValue)>) -> Self {
5184        value
5185            .into_iter()
5186            .fold(ScalarStructBuilder::new(), |builder, (name, value)| {
5187                builder.with_name_and_scalar(name, value)
5188            })
5189            .build()
5190            .unwrap()
5191    }
5192}
5193
5194impl FromStr for ScalarValue {
5195    type Err = Infallible;
5196
5197    fn from_str(s: &str) -> Result<Self, Self::Err> {
5198        Ok(s.into())
5199    }
5200}
5201
5202impl From<String> for ScalarValue {
5203    fn from(value: String) -> Self {
5204        Some(value).into()
5205    }
5206}
5207
5208impl From<Option<String>> for ScalarValue {
5209    fn from(value: Option<String>) -> Self {
5210        ScalarValue::Utf8(value)
5211    }
5212}
5213
5214macro_rules! impl_try_from {
5215    ($SCALAR:ident, $NATIVE:ident) => {
5216        impl TryFrom<ScalarValue> for $NATIVE {
5217            type Error = DataFusionError;
5218
5219            fn try_from(value: ScalarValue) -> Result<Self> {
5220                match value {
5221                    ScalarValue::$SCALAR(Some(inner_value)) => Ok(inner_value),
5222                    _ => _internal_err!(
5223                        "Cannot convert {:?} to {}",
5224                        value,
5225                        std::any::type_name::<Self>()
5226                    ),
5227                }
5228            }
5229        }
5230    };
5231}
5232
5233impl_try_from!(Int8, i8);
5234impl_try_from!(Int16, i16);
5235
5236// special implementation for i32 because of Date32 and Time32
5237impl TryFrom<ScalarValue> for i32 {
5238    type Error = DataFusionError;
5239
5240    fn try_from(value: ScalarValue) -> Result<Self> {
5241        match value {
5242            ScalarValue::Int32(Some(inner_value))
5243            | ScalarValue::Date32(Some(inner_value))
5244            | ScalarValue::Time32Second(Some(inner_value))
5245            | ScalarValue::Time32Millisecond(Some(inner_value)) => Ok(inner_value),
5246            _ => _internal_err!(
5247                "Cannot convert {:?} to {}",
5248                value,
5249                std::any::type_name::<Self>()
5250            ),
5251        }
5252    }
5253}
5254
5255// special implementation for i64 because of Date64, Time64 and Timestamp
5256impl TryFrom<ScalarValue> for i64 {
5257    type Error = DataFusionError;
5258
5259    fn try_from(value: ScalarValue) -> Result<Self> {
5260        match value {
5261            ScalarValue::Int64(Some(inner_value))
5262            | ScalarValue::Date64(Some(inner_value))
5263            | ScalarValue::Time64Microsecond(Some(inner_value))
5264            | ScalarValue::Time64Nanosecond(Some(inner_value))
5265            | ScalarValue::TimestampNanosecond(Some(inner_value), _)
5266            | ScalarValue::TimestampMicrosecond(Some(inner_value), _)
5267            | ScalarValue::TimestampMillisecond(Some(inner_value), _)
5268            | ScalarValue::TimestampSecond(Some(inner_value), _) => Ok(inner_value),
5269            _ => _internal_err!(
5270                "Cannot convert {:?} to {}",
5271                value,
5272                std::any::type_name::<Self>()
5273            ),
5274        }
5275    }
5276}
5277
5278// special implementation for i128 because of Decimal128
5279impl TryFrom<ScalarValue> for i128 {
5280    type Error = DataFusionError;
5281
5282    fn try_from(value: ScalarValue) -> Result<Self> {
5283        match value {
5284            ScalarValue::Decimal128(Some(inner_value), _, _) => Ok(inner_value),
5285            _ => _internal_err!(
5286                "Cannot convert {:?} to {}",
5287                value,
5288                std::any::type_name::<Self>()
5289            ),
5290        }
5291    }
5292}
5293
5294// special implementation for i256 because of Decimal128
5295impl TryFrom<ScalarValue> for i256 {
5296    type Error = DataFusionError;
5297
5298    fn try_from(value: ScalarValue) -> Result<Self> {
5299        match value {
5300            ScalarValue::Decimal256(Some(inner_value), _, _) => Ok(inner_value),
5301            _ => _internal_err!(
5302                "Cannot convert {:?} to {}",
5303                value,
5304                std::any::type_name::<Self>()
5305            ),
5306        }
5307    }
5308}
5309
5310impl_try_from!(UInt8, u8);
5311impl_try_from!(UInt16, u16);
5312impl_try_from!(UInt32, u32);
5313impl_try_from!(UInt64, u64);
5314impl_try_from!(Float16, f16);
5315impl_try_from!(Float32, f32);
5316impl_try_from!(Float64, f64);
5317impl_try_from!(Boolean, bool);
5318
5319impl TryFrom<DataType> for ScalarValue {
5320    type Error = DataFusionError;
5321
5322    /// Create a Null instance of ScalarValue for this datatype
5323    fn try_from(datatype: DataType) -> Result<Self> {
5324        (&datatype).try_into()
5325    }
5326}
5327
5328impl TryFrom<&DataType> for ScalarValue {
5329    type Error = DataFusionError;
5330
5331    /// Create a Null instance of ScalarValue for this datatype
5332    fn try_from(data_type: &DataType) -> Result<Self> {
5333        Self::try_new_null(data_type)
5334    }
5335}
5336
5337macro_rules! format_option {
5338    ($F:expr, $EXPR:expr) => {{
5339        match $EXPR {
5340            Some(e) => write!($F, "{e}"),
5341            None => write!($F, "NULL"),
5342        }
5343    }};
5344}
5345
5346// Implement Display trait for ScalarValue
5347//
5348// # Panics
5349//
5350// Panics if there is an error when creating a visual representation of columns via `arrow::util::pretty`
5351impl fmt::Display for ScalarValue {
5352    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
5353        match self {
5354            ScalarValue::Decimal32(v, p, s) => {
5355                write!(f, "{v:?},{p:?},{s:?}")?;
5356            }
5357            ScalarValue::Decimal64(v, p, s) => {
5358                write!(f, "{v:?},{p:?},{s:?}")?;
5359            }
5360            ScalarValue::Decimal128(v, p, s) => {
5361                write!(f, "{v:?},{p:?},{s:?}")?;
5362            }
5363            ScalarValue::Decimal256(v, p, s) => {
5364                write!(f, "{v:?},{p:?},{s:?}")?;
5365            }
5366            ScalarValue::Boolean(e) => format_option!(f, e)?,
5367            ScalarValue::Float16(e) => format_option!(f, e)?,
5368            ScalarValue::Float32(e) => format_option!(f, e)?,
5369            ScalarValue::Float64(e) => format_option!(f, e)?,
5370            ScalarValue::Int8(e) => format_option!(f, e)?,
5371            ScalarValue::Int16(e) => format_option!(f, e)?,
5372            ScalarValue::Int32(e) => format_option!(f, e)?,
5373            ScalarValue::Int64(e) => format_option!(f, e)?,
5374            ScalarValue::UInt8(e) => format_option!(f, e)?,
5375            ScalarValue::UInt16(e) => format_option!(f, e)?,
5376            ScalarValue::UInt32(e) => format_option!(f, e)?,
5377            ScalarValue::UInt64(e) => format_option!(f, e)?,
5378            ScalarValue::TimestampSecond(e, _) => format_option!(f, e)?,
5379            ScalarValue::TimestampMillisecond(e, _) => format_option!(f, e)?,
5380            ScalarValue::TimestampMicrosecond(e, _) => format_option!(f, e)?,
5381            ScalarValue::TimestampNanosecond(e, _) => format_option!(f, e)?,
5382            ScalarValue::Utf8(e)
5383            | ScalarValue::LargeUtf8(e)
5384            | ScalarValue::Utf8View(e) => format_option!(f, e)?,
5385            ScalarValue::Binary(e)
5386            | ScalarValue::FixedSizeBinary(_, e)
5387            | ScalarValue::LargeBinary(e)
5388            | ScalarValue::BinaryView(e) => match e {
5389                Some(bytes) => {
5390                    // print up to first 10 bytes, with trailing ... if needed
5391                    const HEX_CHARS_UPPER: &[u8; 16] = b"0123456789ABCDEF";
5392                    for b in bytes.iter().take(10) {
5393                        f.write_char(HEX_CHARS_UPPER[(b >> 4) as usize] as char)?;
5394                        f.write_char(HEX_CHARS_UPPER[(b & 0x0f) as usize] as char)?;
5395                    }
5396                    if bytes.len() > 10 {
5397                        write!(f, "...")?;
5398                    }
5399                }
5400                None => write!(f, "NULL")?,
5401            },
5402            ScalarValue::List(arr) => fmt_list(arr.as_ref(), f)?,
5403            ScalarValue::LargeList(arr) => fmt_list(arr.as_ref(), f)?,
5404            ScalarValue::FixedSizeList(arr) => fmt_list(arr.as_ref(), f)?,
5405            ScalarValue::ListView(arr) => fmt_list(arr.as_ref(), f)?,
5406            ScalarValue::LargeListView(arr) => fmt_list(arr.as_ref(), f)?,
5407            ScalarValue::Date32(e) => format_option!(
5408                f,
5409                e.map(|v| {
5410                    let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
5411                    match epoch.checked_add_signed(Duration::try_days(v as i64).unwrap())
5412                    {
5413                        Some(date) => date.to_string(),
5414                        None => "".to_string(),
5415                    }
5416                })
5417            )?,
5418            ScalarValue::Date64(e) => format_option!(
5419                f,
5420                e.map(|v| {
5421                    let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
5422                    match epoch.checked_add_signed(Duration::try_milliseconds(v).unwrap())
5423                    {
5424                        Some(date) => date.to_string(),
5425                        None => "".to_string(),
5426                    }
5427                })
5428            )?,
5429            ScalarValue::Time32Second(e) => format_option!(f, e)?,
5430            ScalarValue::Time32Millisecond(e) => format_option!(f, e)?,
5431            ScalarValue::Time64Microsecond(e) => format_option!(f, e)?,
5432            ScalarValue::Time64Nanosecond(e) => format_option!(f, e)?,
5433            ScalarValue::IntervalYearMonth(e) => format_option!(f, e)?,
5434            ScalarValue::IntervalMonthDayNano(e) => {
5435                format_option!(f, e.map(|v| format!("{v:?}")))?
5436            }
5437            ScalarValue::IntervalDayTime(e) => {
5438                format_option!(f, e.map(|v| format!("{v:?}")))?;
5439            }
5440            ScalarValue::DurationSecond(e) => format_option!(f, e)?,
5441            ScalarValue::DurationMillisecond(e) => format_option!(f, e)?,
5442            ScalarValue::DurationMicrosecond(e) => format_option!(f, e)?,
5443            ScalarValue::DurationNanosecond(e) => format_option!(f, e)?,
5444            ScalarValue::Struct(struct_arr) => {
5445                // ScalarValue Struct should always have a single element
5446                assert_eq!(struct_arr.len(), 1);
5447
5448                if struct_arr.null_count() == struct_arr.len() {
5449                    write!(f, "NULL")?;
5450                    return Ok(());
5451                }
5452
5453                let columns = struct_arr.columns();
5454                let fields = struct_arr.fields();
5455                let nulls = struct_arr.nulls();
5456
5457                write!(
5458                    f,
5459                    "{{{}}}",
5460                    columns
5461                        .iter()
5462                        .zip(fields.iter())
5463                        .map(|(column, field)| {
5464                            if nulls.is_some_and(|b| b.is_null(0)) {
5465                                format!("{}:NULL", field.name())
5466                            } else if let DataType::Struct(_) = field.data_type() {
5467                                let sv = ScalarValue::Struct(Arc::new(
5468                                    column.as_struct().to_owned(),
5469                                ));
5470                                format!("{}:{sv}", field.name())
5471                            } else {
5472                                let sv = array_value_to_string(column, 0).unwrap();
5473                                format!("{}:{sv}", field.name())
5474                            }
5475                        })
5476                        .collect::<Vec<_>>()
5477                        .join(",")
5478                )?
5479            }
5480            ScalarValue::Map(map_arr) => {
5481                if map_arr.null_count() == map_arr.len() {
5482                    write!(f, "NULL")?;
5483                    return Ok(());
5484                }
5485
5486                write!(
5487                    f,
5488                    "[{}]",
5489                    map_arr
5490                        .iter()
5491                        .map(|struct_array| {
5492                            if let Some(arr) = struct_array {
5493                                let mut buffer = VecDeque::new();
5494                                for i in 0..arr.len() {
5495                                    let key =
5496                                        array_value_to_string(arr.column(0), i).unwrap();
5497                                    let value =
5498                                        array_value_to_string(arr.column(1), i).unwrap();
5499                                    buffer.push_back(format!("{key}:{value}"));
5500                                }
5501                                format!(
5502                                    "{{{}}}",
5503                                    buffer
5504                                        .into_iter()
5505                                        .collect::<Vec<_>>()
5506                                        .join(",")
5507                                        .as_str()
5508                                )
5509                            } else {
5510                                "NULL".to_string()
5511                            }
5512                        })
5513                        .collect::<Vec<_>>()
5514                        .join(",")
5515                )?
5516            }
5517            ScalarValue::Union(val, _fields, _mode) => match val {
5518                Some((id, val)) => write!(f, "{id}:{val}")?,
5519                None => write!(f, "NULL")?,
5520            },
5521            ScalarValue::Dictionary(_k, v) => write!(f, "{v}")?,
5522            ScalarValue::RunEndEncoded(_, _, v) => write!(f, "{v}")?,
5523            ScalarValue::Null => write!(f, "NULL")?,
5524        };
5525        Ok(())
5526    }
5527}
5528
5529fn fmt_list(arr: &dyn Array, f: &mut fmt::Formatter) -> fmt::Result {
5530    // ScalarValue List, LargeList, FixedSizeList, ListView, LargeListView should always have a single element
5531    assert_eq!(arr.len(), 1);
5532    let options = FormatOptions::default().with_display_error(true);
5533    let formatter = ArrayFormatter::try_new(arr, &options).unwrap();
5534    let value_formatter = formatter.value(0);
5535    write!(f, "{value_formatter}")
5536}
5537
5538/// writes a byte array to formatter. `[1, 2, 3]` ==> `"1,2,3"`
5539fn fmt_binary(data: &[u8], f: &mut fmt::Formatter) -> fmt::Result {
5540    let mut iter = data.iter();
5541    if let Some(b) = iter.next() {
5542        write!(f, "{b}")?;
5543    }
5544    for b in iter {
5545        write!(f, ",{b}")?;
5546    }
5547    Ok(())
5548}
5549
5550impl fmt::Debug for ScalarValue {
5551    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
5552        match self {
5553            ScalarValue::Decimal32(_, _, _) => write!(f, "Decimal32({self})"),
5554            ScalarValue::Decimal64(_, _, _) => write!(f, "Decimal64({self})"),
5555            ScalarValue::Decimal128(_, _, _) => write!(f, "Decimal128({self})"),
5556            ScalarValue::Decimal256(_, _, _) => write!(f, "Decimal256({self})"),
5557            ScalarValue::Boolean(_) => write!(f, "Boolean({self})"),
5558            ScalarValue::Float16(_) => write!(f, "Float16({self})"),
5559            ScalarValue::Float32(_) => write!(f, "Float32({self})"),
5560            ScalarValue::Float64(_) => write!(f, "Float64({self})"),
5561            ScalarValue::Int8(_) => write!(f, "Int8({self})"),
5562            ScalarValue::Int16(_) => write!(f, "Int16({self})"),
5563            ScalarValue::Int32(_) => write!(f, "Int32({self})"),
5564            ScalarValue::Int64(_) => write!(f, "Int64({self})"),
5565            ScalarValue::UInt8(_) => write!(f, "UInt8({self})"),
5566            ScalarValue::UInt16(_) => write!(f, "UInt16({self})"),
5567            ScalarValue::UInt32(_) => write!(f, "UInt32({self})"),
5568            ScalarValue::UInt64(_) => write!(f, "UInt64({self})"),
5569            ScalarValue::TimestampSecond(_, tz_opt) => {
5570                write!(f, "TimestampSecond({self}, {tz_opt:?})")
5571            }
5572            ScalarValue::TimestampMillisecond(_, tz_opt) => {
5573                write!(f, "TimestampMillisecond({self}, {tz_opt:?})")
5574            }
5575            ScalarValue::TimestampMicrosecond(_, tz_opt) => {
5576                write!(f, "TimestampMicrosecond({self}, {tz_opt:?})")
5577            }
5578            ScalarValue::TimestampNanosecond(_, tz_opt) => {
5579                write!(f, "TimestampNanosecond({self}, {tz_opt:?})")
5580            }
5581            ScalarValue::Utf8(None) => write!(f, "Utf8({self})"),
5582            ScalarValue::Utf8(Some(_)) => write!(f, "Utf8(\"{self}\")"),
5583            ScalarValue::Utf8View(None) => write!(f, "Utf8View({self})"),
5584            ScalarValue::Utf8View(Some(_)) => write!(f, "Utf8View(\"{self}\")"),
5585            ScalarValue::LargeUtf8(None) => write!(f, "LargeUtf8({self})"),
5586            ScalarValue::LargeUtf8(Some(_)) => write!(f, "LargeUtf8(\"{self}\")"),
5587            ScalarValue::Binary(None) => write!(f, "Binary({self})"),
5588            ScalarValue::Binary(Some(b)) => {
5589                write!(f, "Binary(\"")?;
5590                fmt_binary(b.as_slice(), f)?;
5591                write!(f, "\")")
5592            }
5593            ScalarValue::BinaryView(None) => write!(f, "BinaryView({self})"),
5594            ScalarValue::BinaryView(Some(b)) => {
5595                write!(f, "BinaryView(\"")?;
5596                fmt_binary(b.as_slice(), f)?;
5597                write!(f, "\")")
5598            }
5599            ScalarValue::FixedSizeBinary(size, None) => {
5600                write!(f, "FixedSizeBinary({size}, {self})")
5601            }
5602            ScalarValue::FixedSizeBinary(size, Some(b)) => {
5603                write!(f, "FixedSizeBinary({size}, \"")?;
5604                fmt_binary(b.as_slice(), f)?;
5605                write!(f, "\")")
5606            }
5607            ScalarValue::LargeBinary(None) => write!(f, "LargeBinary({self})"),
5608            ScalarValue::LargeBinary(Some(b)) => {
5609                write!(f, "LargeBinary(\"")?;
5610                fmt_binary(b.as_slice(), f)?;
5611                write!(f, "\")")
5612            }
5613            ScalarValue::FixedSizeList(_) => write!(f, "FixedSizeList({self})"),
5614            ScalarValue::List(_) => write!(f, "List({self})"),
5615            ScalarValue::LargeList(_) => write!(f, "LargeList({self})"),
5616            ScalarValue::ListView(_) => write!(f, "ListView({self})"),
5617            ScalarValue::LargeListView(_) => write!(f, "LargeListView({self})"),
5618            ScalarValue::Struct(struct_arr) => {
5619                // ScalarValue Struct should always have a single element
5620                assert_eq!(struct_arr.len(), 1);
5621
5622                let columns = struct_arr.columns();
5623                let fields = struct_arr.fields();
5624
5625                write!(
5626                    f,
5627                    "Struct({{{}}})",
5628                    columns
5629                        .iter()
5630                        .zip(fields.iter())
5631                        .map(|(column, field)| {
5632                            let sv = array_value_to_string(column, 0).unwrap();
5633                            let name = field.name();
5634                            format!("{name}:{sv}")
5635                        })
5636                        .collect::<Vec<_>>()
5637                        .join(",")
5638                )
5639            }
5640            ScalarValue::Map(map_arr) => {
5641                write!(
5642                    f,
5643                    "Map([{}])",
5644                    map_arr
5645                        .iter()
5646                        .map(|struct_array| {
5647                            if let Some(arr) = struct_array {
5648                                let buffer: Vec<String> = (0..arr.len())
5649                                    .map(|i| {
5650                                        let key = array_value_to_string(arr.column(0), i)
5651                                            .unwrap();
5652                                        let value =
5653                                            array_value_to_string(arr.column(1), i)
5654                                                .unwrap();
5655                                        format!("{key:?}:{value:?}")
5656                                    })
5657                                    .collect();
5658                                format!("{{{}}}", buffer.join(","))
5659                            } else {
5660                                "NULL".to_string()
5661                            }
5662                        })
5663                        .collect::<Vec<_>>()
5664                        .join(",")
5665                )
5666            }
5667            ScalarValue::Date32(_) => write!(f, "Date32(\"{self}\")"),
5668            ScalarValue::Date64(_) => write!(f, "Date64(\"{self}\")"),
5669            ScalarValue::Time32Second(_) => write!(f, "Time32Second(\"{self}\")"),
5670            ScalarValue::Time32Millisecond(_) => {
5671                write!(f, "Time32Millisecond(\"{self}\")")
5672            }
5673            ScalarValue::Time64Microsecond(_) => {
5674                write!(f, "Time64Microsecond(\"{self}\")")
5675            }
5676            ScalarValue::Time64Nanosecond(_) => {
5677                write!(f, "Time64Nanosecond(\"{self}\")")
5678            }
5679            ScalarValue::IntervalDayTime(_) => {
5680                write!(f, "IntervalDayTime(\"{self}\")")
5681            }
5682            ScalarValue::IntervalYearMonth(_) => {
5683                write!(f, "IntervalYearMonth(\"{self}\")")
5684            }
5685            ScalarValue::IntervalMonthDayNano(_) => {
5686                write!(f, "IntervalMonthDayNano(\"{self}\")")
5687            }
5688            ScalarValue::DurationSecond(_) => write!(f, "DurationSecond(\"{self}\")"),
5689            ScalarValue::DurationMillisecond(_) => {
5690                write!(f, "DurationMillisecond(\"{self}\")")
5691            }
5692            ScalarValue::DurationMicrosecond(_) => {
5693                write!(f, "DurationMicrosecond(\"{self}\")")
5694            }
5695            ScalarValue::DurationNanosecond(_) => {
5696                write!(f, "DurationNanosecond(\"{self}\")")
5697            }
5698            ScalarValue::Union(val, _fields, _mode) => match val {
5699                Some((id, val)) => write!(f, "Union {id}:{val}"),
5700                None => write!(f, "Union(NULL)"),
5701            },
5702            ScalarValue::Dictionary(k, v) => write!(f, "Dictionary({k:?}, {v:?})"),
5703            ScalarValue::RunEndEncoded(rf, vf, v) => {
5704                write!(f, "RunEndEncoded({rf:?}, {vf:?}, {v:?})")
5705            }
5706            ScalarValue::Null => write!(f, "NULL"),
5707        }
5708    }
5709}
5710
5711/// Trait used to map a NativeType to a ScalarValue
5712pub trait ScalarType<T: ArrowNativeType> {
5713    /// returns a scalar from an optional T
5714    fn scalar(r: Option<T>) -> ScalarValue;
5715}
5716
5717impl ScalarType<f32> for Float32Type {
5718    fn scalar(r: Option<f32>) -> ScalarValue {
5719        ScalarValue::Float32(r)
5720    }
5721}
5722
5723impl ScalarType<i64> for TimestampSecondType {
5724    fn scalar(r: Option<i64>) -> ScalarValue {
5725        ScalarValue::TimestampSecond(r, None)
5726    }
5727}
5728
5729impl ScalarType<i64> for TimestampMillisecondType {
5730    fn scalar(r: Option<i64>) -> ScalarValue {
5731        ScalarValue::TimestampMillisecond(r, None)
5732    }
5733}
5734
5735impl ScalarType<i64> for TimestampMicrosecondType {
5736    fn scalar(r: Option<i64>) -> ScalarValue {
5737        ScalarValue::TimestampMicrosecond(r, None)
5738    }
5739}
5740
5741impl ScalarType<i64> for TimestampNanosecondType {
5742    fn scalar(r: Option<i64>) -> ScalarValue {
5743        ScalarValue::TimestampNanosecond(r, None)
5744    }
5745}
5746
5747impl ScalarType<i32> for Date32Type {
5748    fn scalar(r: Option<i32>) -> ScalarValue {
5749        ScalarValue::Date32(r)
5750    }
5751}
5752
5753#[cfg(test)]
5754mod tests {
5755
5756    use super::*;
5757    use crate::cast::{
5758        as_large_list_view_array, as_list_array, as_map_array, as_struct_array,
5759    };
5760    use crate::test_util::batches_to_string;
5761    use arrow::array::{
5762        FixedSizeListBuilder, Int32Builder, LargeListBuilder, LargeListViewBuilder,
5763        ListBuilder, ListViewBuilder, MapBuilder, NullArray, NullBufferBuilder,
5764        OffsetSizeTrait, PrimitiveBuilder, RecordBatch, StringBuilder,
5765        StringDictionaryBuilder, StructBuilder, UnionBuilder,
5766    };
5767    use arrow::buffer::{Buffer, NullBuffer, OffsetBuffer};
5768    use arrow::compute::{is_null, kernels};
5769    use arrow::datatypes::{
5770        ArrowNumericType, DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION, Fields,
5771        Float64Type, TimeUnit,
5772    };
5773    use arrow::error::ArrowError;
5774    use arrow::util::pretty::pretty_format_columns;
5775    use insta::assert_snapshot;
5776    use rand::Rng;
5777
5778    #[test]
5779    fn test_scalar_value_from_for_map() {
5780        let string_builder = StringBuilder::new();
5781        let int_builder = Int32Builder::with_capacity(4);
5782        let mut builder = MapBuilder::new(None, string_builder, int_builder);
5783        builder.keys().append_value("joe");
5784        builder.values().append_value(1);
5785        builder.append(true).unwrap();
5786
5787        builder.keys().append_value("blogs");
5788        builder.values().append_value(2);
5789        builder.keys().append_value("foo");
5790        builder.values().append_value(4);
5791        builder.append(true).unwrap();
5792        builder.append(true).unwrap();
5793        builder.append(false).unwrap();
5794
5795        let expected = builder.finish();
5796
5797        let sv = ScalarValue::Map(Arc::new(expected.clone()));
5798        let map_arr = sv.to_array().unwrap();
5799        let actual = as_map_array(&map_arr).unwrap();
5800        assert_eq!(actual, &expected);
5801    }
5802
5803    #[test]
5804    fn test_format_timestamp_type_for_error_and_bounds() {
5805        // format helper
5806        let ts_ns = format_timestamp_type_for_error(&DataType::Timestamp(
5807            TimeUnit::Nanosecond,
5808            None,
5809        ));
5810        assert_eq!(ts_ns, "Timestamp(ns)");
5811
5812        let ts_us = format_timestamp_type_for_error(&DataType::Timestamp(
5813            TimeUnit::Microsecond,
5814            None,
5815        ));
5816        assert_eq!(ts_us, "Timestamp(us)");
5817
5818        // ensure_timestamp_in_bounds: Date32 non-overflow
5819        let ok = ensure_timestamp_in_bounds(
5820            1000,
5821            NANOS_PER_DAY,
5822            &DataType::Date32,
5823            &DataType::Timestamp(TimeUnit::Nanosecond, None),
5824        );
5825        assert!(ok.is_ok());
5826
5827        // Date32 overflow -- known large day value (9999-12-31 -> 2932896)
5828        let err = ensure_timestamp_in_bounds(
5829            2932896,
5830            NANOS_PER_DAY,
5831            &DataType::Date32,
5832            &DataType::Timestamp(TimeUnit::Nanosecond, None),
5833        );
5834        assert!(err.is_err());
5835        let msg = err.unwrap_err().to_string();
5836        assert!(msg.contains("Cannot cast Date32 value 2932896 to Timestamp(ns): converted value exceeds the representable i64 range"));
5837
5838        // Date64 overflow for ns (millis * 1_000_000)
5839        let overflow_millis: i64 = (i64::MAX / NANOS_PER_MILLISECOND) + 1;
5840        let err2 = ensure_timestamp_in_bounds(
5841            overflow_millis,
5842            NANOS_PER_MILLISECOND,
5843            &DataType::Date64,
5844            &DataType::Timestamp(TimeUnit::Nanosecond, None),
5845        );
5846        assert!(err2.is_err());
5847    }
5848
5849    #[test]
5850    fn test_scalar_value_from_for_struct() {
5851        let boolean = Arc::new(BooleanArray::from(vec![false]));
5852        let int = Arc::new(Int32Array::from(vec![42]));
5853
5854        let expected = StructArray::from(vec![
5855            (
5856                Arc::new(Field::new("b", DataType::Boolean, false)),
5857                Arc::clone(&boolean) as ArrayRef,
5858            ),
5859            (
5860                Arc::new(Field::new("c", DataType::Int32, false)),
5861                Arc::clone(&int) as ArrayRef,
5862            ),
5863        ]);
5864
5865        let sv = ScalarStructBuilder::new()
5866            .with_array(Field::new("b", DataType::Boolean, false), boolean)
5867            .with_array(Field::new("c", DataType::Int32, false), int)
5868            .build()
5869            .unwrap();
5870
5871        let struct_arr = sv.to_array().unwrap();
5872        let actual = as_struct_array(&struct_arr).unwrap();
5873        assert_eq!(actual, &expected);
5874    }
5875
5876    #[test]
5877    #[should_panic(
5878        expected = "InvalidArgumentError(\"Incorrect array length for StructArray field \\\"bool\\\", expected 1 got 4\")"
5879    )]
5880    fn test_scalar_value_from_for_struct_should_panic() {
5881        let _ = ScalarStructBuilder::new()
5882            .with_array(
5883                Field::new("bool", DataType::Boolean, false),
5884                Arc::new(BooleanArray::from(vec![false, true, false, false])),
5885            )
5886            .with_array(
5887                Field::new("i32", DataType::Int32, false),
5888                Arc::new(Int32Array::from(vec![42, 28, 19, 31])),
5889            )
5890            .build()
5891            .unwrap();
5892    }
5893
5894    #[test]
5895    fn test_to_array_of_size_for_nested() {
5896        // Struct
5897        let boolean = Arc::new(BooleanArray::from(vec![false, false, true, true]));
5898        let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
5899
5900        let struct_array = StructArray::from(vec![
5901            (
5902                Arc::new(Field::new("b", DataType::Boolean, false)),
5903                Arc::clone(&boolean) as ArrayRef,
5904            ),
5905            (
5906                Arc::new(Field::new("c", DataType::Int32, false)),
5907                Arc::clone(&int) as ArrayRef,
5908            ),
5909        ]);
5910        let sv = ScalarValue::Struct(Arc::new(struct_array));
5911        let actual_arr = sv.to_array_of_size(2).unwrap();
5912
5913        let boolean = Arc::new(BooleanArray::from(vec![
5914            false, false, true, true, false, false, true, true,
5915        ]));
5916        let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31, 42, 28, 19, 31]));
5917
5918        let struct_array = StructArray::from(vec![
5919            (
5920                Arc::new(Field::new("b", DataType::Boolean, false)),
5921                Arc::clone(&boolean) as ArrayRef,
5922            ),
5923            (
5924                Arc::new(Field::new("c", DataType::Int32, false)),
5925                Arc::clone(&int) as ArrayRef,
5926            ),
5927        ]);
5928
5929        let actual = as_struct_array(&actual_arr).unwrap();
5930        assert_eq!(actual, &struct_array);
5931
5932        // List
5933        let arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
5934            Some(1),
5935            None,
5936            Some(2),
5937        ])]);
5938
5939        let sv = ScalarValue::List(Arc::new(arr));
5940        let actual_arr = sv
5941            .to_array_of_size(2)
5942            .expect("Failed to convert to array of size");
5943        let actual_list_arr = actual_arr.as_list::<i32>();
5944
5945        let arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
5946            Some(vec![Some(1), None, Some(2)]),
5947            Some(vec![Some(1), None, Some(2)]),
5948        ]);
5949
5950        assert_eq!(&arr, actual_list_arr);
5951
5952        // ListView
5953        let arr =
5954            ListViewArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
5955                Some(1),
5956                None,
5957                Some(2),
5958            ])]);
5959
5960        let sv = ScalarValue::ListView(Arc::new(arr));
5961        let actual_arr = sv
5962            .to_array_of_size(2)
5963            .expect("Failed to convert to array of size");
5964        let actual_list_arr = actual_arr.as_list_view::<i32>();
5965
5966        let arr = ListViewArray::from_iter_primitive::<Int32Type, _, _>(vec![
5967            Some(vec![Some(1), None, Some(2)]),
5968            Some(vec![Some(1), None, Some(2)]),
5969        ]);
5970
5971        assert_eq!(&arr, actual_list_arr);
5972    }
5973
5974    #[test]
5975    fn test_to_array_of_size_for_fsl() {
5976        let values = Int32Array::from_iter([Some(1), None, Some(2)]);
5977        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
5978        let arr = FixedSizeListArray::new(Arc::clone(&field), 3, Arc::new(values), None);
5979        let sv = ScalarValue::FixedSizeList(Arc::new(arr));
5980        let actual_arr = sv
5981            .to_array_of_size(2)
5982            .expect("Failed to convert to array of size");
5983
5984        let expected_values =
5985            Int32Array::from_iter([Some(1), None, Some(2), Some(1), None, Some(2)]);
5986        let expected_arr =
5987            FixedSizeListArray::new(field, 3, Arc::new(expected_values), None);
5988
5989        assert_eq!(
5990            &expected_arr,
5991            as_fixed_size_list_array(actual_arr.as_ref()).unwrap()
5992        );
5993
5994        let empty_array = sv
5995            .to_array_of_size(0)
5996            .expect("Failed to convert to empty array");
5997
5998        assert_eq!(empty_array.len(), 0);
5999    }
6000
6001    #[test]
6002    fn test_to_array_of_size_list_size_one() {
6003        // size=1 takes the fast path (Arc::clone)
6004        let arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
6005            Some(10),
6006            Some(20),
6007        ])]);
6008        let sv = ScalarValue::List(Arc::new(arr.clone()));
6009        let result = sv.to_array_of_size(1).unwrap();
6010        assert_eq!(result.as_list::<i32>(), &arr);
6011    }
6012
6013    #[test]
6014    fn test_to_array_of_size_list_empty_inner() {
6015        // A list scalar containing an empty list: [[]]
6016        let arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![])]);
6017        let sv = ScalarValue::List(Arc::new(arr));
6018        let result = sv.to_array_of_size(3).unwrap();
6019        let result_list = result.as_list::<i32>();
6020        assert_eq!(result_list.len(), 3);
6021        for i in 0..3 {
6022            assert_eq!(result_list.value(i).len(), 0);
6023        }
6024    }
6025
6026    #[test]
6027    fn test_to_array_of_size_large_list() {
6028        let arr =
6029            LargeListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
6030                Some(100),
6031                Some(200),
6032            ])]);
6033        let sv = ScalarValue::LargeList(Arc::new(arr));
6034        let result = sv.to_array_of_size(3).unwrap();
6035        let expected = LargeListArray::from_iter_primitive::<Int32Type, _, _>(vec![
6036            Some(vec![Some(100), Some(200)]),
6037            Some(vec![Some(100), Some(200)]),
6038            Some(vec![Some(100), Some(200)]),
6039        ]);
6040        assert_eq!(result.as_list::<i64>(), &expected);
6041    }
6042
6043    #[test]
6044    fn test_list_to_array_of_size_multi_row() {
6045        // Call list_to_array_of_size directly with arr.len() > 1
6046        let arr = Int32Array::from(vec![Some(10), None, Some(30)]);
6047        let result = ScalarValue::list_to_array_of_size(&arr, 3).unwrap();
6048        let result = result.as_primitive::<Int32Type>();
6049        assert_eq!(
6050            result.iter().collect::<Vec<_>>(),
6051            vec![
6052                Some(10),
6053                None,
6054                Some(30),
6055                Some(10),
6056                None,
6057                Some(30),
6058                Some(10),
6059                None,
6060                Some(30),
6061            ]
6062        );
6063    }
6064
6065    #[test]
6066    fn test_to_array_of_size_null_list() {
6067        let dt = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
6068        let sv = ScalarValue::try_from(&dt).unwrap();
6069        let result = sv.to_array_of_size(3).unwrap();
6070        assert_eq!(result.len(), 3);
6071        assert_eq!(result.null_count(), 3);
6072    }
6073
6074    /// See https://github.com/apache/datafusion/issues/18870
6075    #[test]
6076    fn test_to_array_of_size_for_none_fsb() {
6077        let sv = ScalarValue::FixedSizeBinary(5, None);
6078        let result = sv
6079            .to_array_of_size(2)
6080            .expect("Failed to convert to array of size");
6081        assert_eq!(result.len(), 2);
6082        assert_eq!(result.null_count(), 2);
6083        assert_eq!(result.as_fixed_size_binary().values().len(), 10);
6084    }
6085
6086    #[test]
6087    fn test_list_to_array_string() {
6088        let scalars = vec![
6089            ScalarValue::from("rust"),
6090            ScalarValue::from("arrow"),
6091            ScalarValue::from("data-fusion"),
6092        ];
6093
6094        let result = ScalarValue::new_list_nullable(scalars.as_slice(), &DataType::Utf8);
6095
6096        let expected = single_row_list_array(vec!["rust", "arrow", "data-fusion"]);
6097        assert_eq!(*result, expected);
6098    }
6099
6100    fn single_row_list_array(items: Vec<&str>) -> ListArray {
6101        SingleRowListArrayBuilder::new(Arc::new(StringArray::from(items)))
6102            .build_list_array()
6103    }
6104
6105    fn build_list<O: OffsetSizeTrait>(
6106        values: Vec<Option<Vec<Option<i64>>>>,
6107    ) -> Vec<ScalarValue> {
6108        values
6109            .into_iter()
6110            .map(|v| {
6111                let arr = Arc::new(GenericListArray::<O>::from_iter_primitive::<
6112                    Int64Type,
6113                    _,
6114                    _,
6115                >(vec![v])) as ArrayRef;
6116
6117                if O::IS_LARGE {
6118                    ScalarValue::LargeList(arr.as_list::<i64>().to_owned().into())
6119                } else {
6120                    ScalarValue::List(arr.as_list::<i32>().to_owned().into())
6121                }
6122            })
6123            .collect()
6124    }
6125
6126    fn build_list_view<O: OffsetSizeTrait>(
6127        values: Vec<Option<Vec<Option<i64>>>>,
6128    ) -> Vec<ScalarValue> {
6129        values
6130            .into_iter()
6131            .map(|v| {
6132                let arr = Arc::new(GenericListViewArray::<O>::from_iter_primitive::<
6133                    Int64Type,
6134                    _,
6135                    _,
6136                >(vec![v])) as ArrayRef;
6137
6138                if O::IS_LARGE {
6139                    ScalarValue::LargeListView(
6140                        arr.as_list_view::<i64>().to_owned().into(),
6141                    )
6142                } else {
6143                    ScalarValue::ListView(arr.as_list_view::<i32>().to_owned().into())
6144                }
6145            })
6146            .collect()
6147    }
6148
6149    #[test]
6150    fn test_iter_to_array_fixed_size_list() {
6151        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
6152        let f1 = Arc::new(FixedSizeListArray::new(
6153            Arc::clone(&field),
6154            3,
6155            Arc::new(Int32Array::from(vec![1, 2, 3])),
6156            None,
6157        ));
6158        let f2 = Arc::new(FixedSizeListArray::new(
6159            Arc::clone(&field),
6160            3,
6161            Arc::new(Int32Array::from(vec![4, 5, 6])),
6162            None,
6163        ));
6164        let f_nulls = Arc::new(FixedSizeListArray::new_null(field, 1, 1));
6165
6166        let scalars = vec![
6167            ScalarValue::FixedSizeList(Arc::clone(&f_nulls)),
6168            ScalarValue::FixedSizeList(f1),
6169            ScalarValue::FixedSizeList(f2),
6170            ScalarValue::FixedSizeList(f_nulls),
6171        ];
6172
6173        let array = ScalarValue::iter_to_array(scalars).unwrap();
6174
6175        let expected = FixedSizeListArray::from_iter_primitive::<Int32Type, _, _>(
6176            vec![
6177                None,
6178                Some(vec![Some(1), Some(2), Some(3)]),
6179                Some(vec![Some(4), Some(5), Some(6)]),
6180                None,
6181            ],
6182            3,
6183        );
6184        assert_eq!(array.as_ref(), &expected);
6185    }
6186
6187    #[test]
6188    fn test_iter_to_array_struct() {
6189        let s1 = StructArray::from(vec![
6190            (
6191                Arc::new(Field::new("A", DataType::Boolean, false)),
6192                Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
6193            ),
6194            (
6195                Arc::new(Field::new("B", DataType::Int32, false)),
6196                Arc::new(Int32Array::from(vec![42])) as ArrayRef,
6197            ),
6198        ]);
6199
6200        let s2 = StructArray::from(vec![
6201            (
6202                Arc::new(Field::new("A", DataType::Boolean, false)),
6203                Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
6204            ),
6205            (
6206                Arc::new(Field::new("B", DataType::Int32, false)),
6207                Arc::new(Int32Array::from(vec![42])) as ArrayRef,
6208            ),
6209        ]);
6210
6211        let scalars = vec![
6212            ScalarValue::Struct(Arc::new(s1)),
6213            ScalarValue::Struct(Arc::new(s2)),
6214        ];
6215
6216        let array = ScalarValue::iter_to_array(scalars).unwrap();
6217
6218        let expected = StructArray::from(vec![
6219            (
6220                Arc::new(Field::new("A", DataType::Boolean, false)),
6221                Arc::new(BooleanArray::from(vec![false, false])) as ArrayRef,
6222            ),
6223            (
6224                Arc::new(Field::new("B", DataType::Int32, false)),
6225                Arc::new(Int32Array::from(vec![42, 42])) as ArrayRef,
6226            ),
6227        ]);
6228        assert_eq!(array.as_ref(), &expected);
6229    }
6230
6231    #[test]
6232    fn test_iter_to_array_struct_with_nulls() {
6233        // non-null
6234        let s1 = StructArray::from((
6235            vec![
6236                (
6237                    Arc::new(Field::new("A", DataType::Int32, false)),
6238                    Arc::new(Int32Array::from(vec![1])) as ArrayRef,
6239                ),
6240                (
6241                    Arc::new(Field::new("B", DataType::Int64, false)),
6242                    Arc::new(Int64Array::from(vec![2])) as ArrayRef,
6243                ),
6244            ],
6245            // Present the null mask, 1 is non-null, 0 is null
6246            Buffer::from(&[1]),
6247        ));
6248
6249        // null
6250        let s2 = StructArray::from((
6251            vec![
6252                (
6253                    Arc::new(Field::new("A", DataType::Int32, false)),
6254                    Arc::new(Int32Array::from(vec![3])) as ArrayRef,
6255                ),
6256                (
6257                    Arc::new(Field::new("B", DataType::Int64, false)),
6258                    Arc::new(Int64Array::from(vec![4])) as ArrayRef,
6259                ),
6260            ],
6261            Buffer::from(&[0]),
6262        ));
6263
6264        let scalars = vec![
6265            ScalarValue::Struct(Arc::new(s1)),
6266            ScalarValue::Struct(Arc::new(s2)),
6267        ];
6268
6269        let array = ScalarValue::iter_to_array(scalars).unwrap();
6270        let struct_array = array.as_struct();
6271        assert!(struct_array.is_valid(0));
6272        assert!(struct_array.is_null(1));
6273    }
6274
6275    #[test]
6276    fn iter_to_array_primitive_test() {
6277        // List
6278        // List[[1,2,3]], List[null], List[[4,5]]
6279        let scalars = build_list::<i32>(vec![
6280            Some(vec![Some(1), Some(2), Some(3)]),
6281            None,
6282            Some(vec![Some(4), Some(5)]),
6283        ]);
6284        let array = ScalarValue::iter_to_array(scalars).unwrap();
6285        let list_array = as_list_array(&array).unwrap();
6286        // List[[1,2,3], null, [4,5]]
6287        let expected = ListArray::from_iter_primitive::<Int64Type, _, _>(vec![
6288            Some(vec![Some(1), Some(2), Some(3)]),
6289            None,
6290            Some(vec![Some(4), Some(5)]),
6291        ]);
6292        assert_eq!(list_array, &expected);
6293
6294        // LargeList
6295        // List[[1,2,3]], List[null], List[[4,5]]
6296        let scalars = build_list::<i64>(vec![
6297            Some(vec![Some(1), Some(2), Some(3)]),
6298            None,
6299            Some(vec![Some(4), Some(5)]),
6300        ]);
6301        let array = ScalarValue::iter_to_array(scalars).unwrap();
6302        let large_list_array = as_large_list_array(&array).unwrap();
6303        let expected = LargeListArray::from_iter_primitive::<Int64Type, _, _>(vec![
6304            Some(vec![Some(1), Some(2), Some(3)]),
6305            None,
6306            Some(vec![Some(4), Some(5)]),
6307        ]);
6308        assert_eq!(large_list_array, &expected);
6309
6310        // ListView
6311        // ListView[[1,2,3]], ListView[null], ListView[[4,5]]
6312        let scalars = build_list_view::<i32>(vec![
6313            Some(vec![Some(1), Some(2), Some(3)]),
6314            None,
6315            Some(vec![Some(4), Some(5)]),
6316        ]);
6317
6318        let array = ScalarValue::iter_to_array(scalars).unwrap();
6319        let list_view_array = as_list_view_array(&array).unwrap();
6320        // ListView[[1,2,3], null, [4,5]]
6321        let expected = ListViewArray::from_iter_primitive::<Int64Type, _, _>(vec![
6322            Some(vec![Some(1), Some(2), Some(3)]),
6323            None,
6324            Some(vec![Some(4), Some(5)]),
6325        ]);
6326        assert_eq!(list_view_array, &expected);
6327
6328        // LargeListView
6329        // LargeListView[[1,2,3]], LargeListView[null], LargeListView[[4,5]]
6330        let scalars = build_list_view::<i64>(vec![
6331            Some(vec![Some(1), Some(2), Some(3)]),
6332            None,
6333            Some(vec![Some(4), Some(5)]),
6334        ]);
6335
6336        let array = ScalarValue::iter_to_array(scalars).unwrap();
6337        let large_list_view_array = as_large_list_view_array(&array).unwrap();
6338        // LargeListView[[1,2,3], null, [4,5]]
6339        let expected = LargeListViewArray::from_iter_primitive::<Int64Type, _, _>(vec![
6340            Some(vec![Some(1), Some(2), Some(3)]),
6341            None,
6342            Some(vec![Some(4), Some(5)]),
6343        ]);
6344        assert_eq!(large_list_view_array, &expected);
6345    }
6346
6347    #[test]
6348    fn iter_to_array_string_test() {
6349        let arr1 = single_row_list_array(vec!["foo", "bar", "baz"]);
6350        let arr2 = single_row_list_array(vec!["rust", "world"]);
6351
6352        let scalars = vec![
6353            ScalarValue::List(Arc::new(arr1)),
6354            ScalarValue::List(Arc::new(arr2)),
6355        ];
6356
6357        let array = ScalarValue::iter_to_array(scalars).unwrap();
6358        let result = array.as_list::<i32>();
6359
6360        // build expected array
6361        let string_builder = StringBuilder::with_capacity(5, 25);
6362        let mut list_of_string_builder = ListBuilder::new(string_builder);
6363
6364        list_of_string_builder.values().append_value("foo");
6365        list_of_string_builder.values().append_value("bar");
6366        list_of_string_builder.values().append_value("baz");
6367        list_of_string_builder.append(true);
6368
6369        list_of_string_builder.values().append_value("rust");
6370        list_of_string_builder.values().append_value("world");
6371        list_of_string_builder.append(true);
6372        let expected = list_of_string_builder.finish();
6373
6374        assert_eq!(result, &expected);
6375    }
6376
6377    #[test]
6378    fn test_list_scalar_eq_to_array() {
6379        let list_array: ArrayRef =
6380            Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
6381                Some(vec![Some(0), Some(1), Some(2)]),
6382                None,
6383                Some(vec![None, Some(5)]),
6384            ]));
6385
6386        let fsl_array: ArrayRef =
6387            Arc::new(FixedSizeListArray::from_iter_primitive::<Int32Type, _, _>(
6388                vec![
6389                    Some(vec![Some(0), Some(1), Some(2)]),
6390                    None,
6391                    Some(vec![Some(3), None, Some(5)]),
6392                ],
6393                3,
6394            ));
6395        let list_view_array: ArrayRef =
6396            Arc::new(ListViewArray::from_iter_primitive::<Int32Type, _, _>(vec![
6397                Some(vec![Some(0), Some(1), Some(2)]),
6398                None,
6399                Some(vec![None, Some(5)]),
6400            ]));
6401
6402        for arr in [list_array, fsl_array, list_view_array] {
6403            for i in 0..arr.len() {
6404                let slice = arr.slice(i, 1);
6405                let scalar = match arr.data_type() {
6406                    DataType::List(_) => {
6407                        ScalarValue::List(slice.as_list::<i32>().to_owned().into())
6408                    }
6409                    DataType::FixedSizeList(_, _) => ScalarValue::FixedSizeList(
6410                        slice.as_fixed_size_list().to_owned().into(),
6411                    ),
6412                    DataType::ListView(_) => ScalarValue::ListView(
6413                        slice.as_list_view::<i32>().to_owned().into(),
6414                    ),
6415                    _ => unreachable!(),
6416                };
6417                assert!(scalar.eq_array(&arr, i).unwrap());
6418            }
6419        }
6420    }
6421
6422    #[test]
6423    fn test_eq_array_err_message() {
6424        assert_starts_with(
6425            ScalarValue::Utf8(Some("123".to_string()))
6426                .eq_array(&(Arc::new(Int32Array::from(vec![123])) as ArrayRef), 0)
6427                .unwrap_err()
6428                .message(),
6429            "could not cast array of type Int32 to arrow_array::array::byte_array::GenericByteArray<arrow_array::types::GenericStringType<i32>>",
6430        );
6431    }
6432
6433    #[test]
6434    fn scalar_add_trait_test() -> Result<()> {
6435        let float_value = ScalarValue::Float64(Some(123.));
6436        let float_value_2 = ScalarValue::Float64(Some(123.));
6437        assert_eq!(
6438            (float_value.add(&float_value_2))?,
6439            ScalarValue::Float64(Some(246.))
6440        );
6441        assert_eq!(
6442            (float_value.add(float_value_2))?,
6443            ScalarValue::Float64(Some(246.))
6444        );
6445        Ok(())
6446    }
6447
6448    #[test]
6449    fn scalar_add_trait_null_test() -> Result<()> {
6450        let int_value = ScalarValue::Int32(Some(42));
6451
6452        assert_eq!(
6453            int_value.add(ScalarValue::Int32(None))?,
6454            ScalarValue::Int32(None)
6455        );
6456
6457        Ok(())
6458    }
6459
6460    #[test]
6461    fn scalar_add_trait_wrapping_overflow_test() -> Result<()> {
6462        let int_value = ScalarValue::Int32(Some(i32::MAX));
6463        let one = ScalarValue::Int32(Some(1));
6464
6465        assert_eq!(int_value.add(one)?, ScalarValue::Int32(Some(i32::MIN)));
6466
6467        Ok(())
6468    }
6469
6470    #[test]
6471    fn scalar_add_trait_decimal_scale_test() -> Result<()> {
6472        let decimal = ScalarValue::Decimal128(Some(123), 10, 2);
6473        let decimal_2 = ScalarValue::Decimal128(Some(4), 9, 1);
6474
6475        assert_eq!(
6476            decimal.add(decimal_2)?,
6477            ScalarValue::Decimal128(Some(163), 11, 2)
6478        );
6479
6480        Ok(())
6481    }
6482
6483    #[test]
6484    fn scalar_add_trait_decimal256_scale_test() -> Result<()> {
6485        let decimal = ScalarValue::Decimal256(Some(i256::from(123)), 10, 2);
6486        let decimal_2 = ScalarValue::Decimal256(Some(i256::from(4)), 9, 1);
6487
6488        assert_eq!(
6489            decimal.add(decimal_2)?,
6490            ScalarValue::Decimal256(Some(i256::from(163)), 11, 2)
6491        );
6492
6493        Ok(())
6494    }
6495
6496    #[test]
6497    fn scalar_add_trait_decimal_negative_scale_test() -> Result<()> {
6498        let decimal = ScalarValue::Decimal128(Some(1), DECIMAL128_MAX_PRECISION, i8::MIN);
6499        let decimal_2 =
6500            ScalarValue::Decimal128(Some(1), DECIMAL128_MAX_PRECISION, i8::MIN);
6501
6502        assert_eq!(
6503            decimal.add(decimal_2)?,
6504            ScalarValue::Decimal128(Some(2), DECIMAL128_MAX_PRECISION, i8::MIN)
6505        );
6506
6507        Ok(())
6508    }
6509
6510    #[test]
6511    fn scalar_sub_trait_test() -> Result<()> {
6512        let float_value = ScalarValue::Float64(Some(123.));
6513        let float_value_2 = ScalarValue::Float64(Some(123.));
6514        assert_eq!(
6515            float_value.sub(&float_value_2)?,
6516            ScalarValue::Float64(Some(0.))
6517        );
6518        assert_eq!(
6519            float_value.sub(float_value_2)?,
6520            ScalarValue::Float64(Some(0.))
6521        );
6522        Ok(())
6523    }
6524
6525    #[test]
6526    fn scalar_sub_trait_int32_test() -> Result<()> {
6527        let int_value = ScalarValue::Int32(Some(42));
6528        let int_value_2 = ScalarValue::Int32(Some(100));
6529        assert_eq!(int_value.sub(&int_value_2)?, ScalarValue::Int32(Some(-58)));
6530        assert_eq!(int_value_2.sub(int_value)?, ScalarValue::Int32(Some(58)));
6531        Ok(())
6532    }
6533
6534    #[test]
6535    fn scalar_sub_trait_int32_overflow_test() {
6536        let int_value = ScalarValue::Int32(Some(i32::MAX));
6537        let int_value_2 = ScalarValue::Int32(Some(i32::MIN));
6538        let err = int_value
6539            .sub_checked(&int_value_2)
6540            .unwrap_err()
6541            .strip_backtrace();
6542        assert_eq!(
6543            err,
6544            "Arrow error: Arithmetic overflow: Overflow happened on: 2147483647 - -2147483648"
6545        )
6546    }
6547
6548    #[test]
6549    fn scalar_sub_trait_int64_test() -> Result<()> {
6550        let int_value = ScalarValue::Int64(Some(42));
6551        let int_value_2 = ScalarValue::Int64(Some(100));
6552        assert_eq!(int_value.sub(&int_value_2)?, ScalarValue::Int64(Some(-58)));
6553        assert_eq!(int_value_2.sub(int_value)?, ScalarValue::Int64(Some(58)));
6554        Ok(())
6555    }
6556
6557    #[test]
6558    fn scalar_sub_trait_int64_overflow_test() {
6559        let int_value = ScalarValue::Int64(Some(i64::MAX));
6560        let int_value_2 = ScalarValue::Int64(Some(i64::MIN));
6561        let err = int_value
6562            .sub_checked(&int_value_2)
6563            .unwrap_err()
6564            .strip_backtrace();
6565        assert_eq!(
6566            err,
6567            "Arrow error: Arithmetic overflow: Overflow happened on: 9223372036854775807 - -9223372036854775808"
6568        )
6569    }
6570
6571    #[test]
6572    fn scalar_add_overflow_test() -> Result<()> {
6573        check_scalar_add_overflow::<Int8Type>(
6574            ScalarValue::Int8(Some(i8::MAX)),
6575            ScalarValue::Int8(Some(i8::MAX)),
6576        );
6577        check_scalar_add_overflow::<UInt8Type>(
6578            ScalarValue::UInt8(Some(u8::MAX)),
6579            ScalarValue::UInt8(Some(u8::MAX)),
6580        );
6581        check_scalar_add_overflow::<Int16Type>(
6582            ScalarValue::Int16(Some(i16::MAX)),
6583            ScalarValue::Int16(Some(i16::MAX)),
6584        );
6585        check_scalar_add_overflow::<UInt16Type>(
6586            ScalarValue::UInt16(Some(u16::MAX)),
6587            ScalarValue::UInt16(Some(u16::MAX)),
6588        );
6589        check_scalar_add_overflow::<Int32Type>(
6590            ScalarValue::Int32(Some(i32::MAX)),
6591            ScalarValue::Int32(Some(i32::MAX)),
6592        );
6593        check_scalar_add_overflow::<UInt32Type>(
6594            ScalarValue::UInt32(Some(u32::MAX)),
6595            ScalarValue::UInt32(Some(u32::MAX)),
6596        );
6597        check_scalar_add_overflow::<Int64Type>(
6598            ScalarValue::Int64(Some(i64::MAX)),
6599            ScalarValue::Int64(Some(i64::MAX)),
6600        );
6601        check_scalar_add_overflow::<UInt64Type>(
6602            ScalarValue::UInt64(Some(u64::MAX)),
6603            ScalarValue::UInt64(Some(u64::MAX)),
6604        );
6605
6606        Ok(())
6607    }
6608
6609    #[test]
6610    fn scalar_decimal_add_overflow_test() {
6611        check_scalar_decimal_add_overflow::<Decimal128Type>(
6612            ScalarValue::Decimal128(Some(i128::MAX), DECIMAL128_MAX_PRECISION, 0),
6613            ScalarValue::Decimal128(Some(1), DECIMAL128_MAX_PRECISION, 0),
6614        );
6615        check_scalar_decimal_add_overflow::<Decimal256Type>(
6616            ScalarValue::Decimal256(Some(i256::MAX), DECIMAL256_MAX_PRECISION, 0),
6617            ScalarValue::Decimal256(Some(i256::ONE), DECIMAL256_MAX_PRECISION, 0),
6618        );
6619    }
6620
6621    #[test]
6622    fn scalar_decimal_in_place_add_error_preserves_lhs() {
6623        let mut lhs =
6624            ScalarValue::Decimal128(Some(i128::MAX), DECIMAL128_MAX_PRECISION, 0);
6625        let original = lhs.clone();
6626
6627        let err = lhs
6628            .try_add_checked_in_place(&ScalarValue::Decimal128(
6629                Some(1),
6630                DECIMAL128_MAX_PRECISION,
6631                0,
6632            ))
6633            .unwrap_err()
6634            .strip_backtrace();
6635
6636        assert_eq!(
6637            err,
6638            format!(
6639                "Arrow error: Arithmetic overflow: Overflow happened on: {} + 1",
6640                i128::MAX
6641            )
6642        );
6643        assert_eq!(lhs, original);
6644    }
6645
6646    // Verifies that ScalarValue has the same behavior with compute kernel when it overflows.
6647    fn check_scalar_add_overflow<T>(left: ScalarValue, right: ScalarValue)
6648    where
6649        T: ArrowNumericType,
6650    {
6651        let scalar_result = left.add_checked(&right);
6652
6653        let left_array = left.to_array().expect("Failed to convert to array");
6654        let right_array = right.to_array().expect("Failed to convert to array");
6655        let arrow_left_array = left_array.as_primitive::<T>();
6656        let arrow_right_array = right_array.as_primitive::<T>();
6657        let arrow_result = add(arrow_left_array, arrow_right_array);
6658
6659        assert_eq!(scalar_result.is_ok(), arrow_result.is_ok());
6660    }
6661
6662    // Verifies the decimal fast path preserves the same overflow behavior as Arrow kernels.
6663    fn check_scalar_decimal_add_overflow<T>(left: ScalarValue, right: ScalarValue)
6664    where
6665        T: ArrowPrimitiveType,
6666    {
6667        let scalar_result = left.add(&right);
6668
6669        let left_array = left.to_array().expect("Failed to convert to array");
6670        let right_array = right.to_array().expect("Failed to convert to array");
6671        let arrow_left_array = left_array.as_primitive::<T>();
6672        let arrow_right_array = right_array.as_primitive::<T>();
6673        let arrow_result = add_wrapping(arrow_left_array, arrow_right_array);
6674
6675        assert_eq!(scalar_result.is_ok(), arrow_result.is_ok());
6676    }
6677
6678    #[test]
6679    fn test_interval_add_timestamp() -> Result<()> {
6680        let interval = ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano {
6681            months: 1,
6682            days: 2,
6683            nanoseconds: 3,
6684        }));
6685        let timestamp = ScalarValue::TimestampNanosecond(Some(123), None);
6686        let result = interval.add(&timestamp)?;
6687        let expect = timestamp.add(&interval)?;
6688        assert_eq!(result, expect);
6689
6690        let interval = ScalarValue::IntervalYearMonth(Some(123));
6691        let timestamp = ScalarValue::TimestampNanosecond(Some(123), None);
6692        let result = interval.add(&timestamp)?;
6693        let expect = timestamp.add(&interval)?;
6694        assert_eq!(result, expect);
6695
6696        let interval = ScalarValue::IntervalDayTime(Some(IntervalDayTime {
6697            days: 1,
6698            milliseconds: 23,
6699        }));
6700        let timestamp = ScalarValue::TimestampNanosecond(Some(123), None);
6701        let result = interval.add(&timestamp)?;
6702        let expect = timestamp.add(&interval)?;
6703        assert_eq!(result, expect);
6704        Ok(())
6705    }
6706
6707    #[test]
6708    fn test_try_cmp() {
6709        assert_eq!(
6710            ScalarValue::try_cmp(
6711                &ScalarValue::Int32(Some(1)),
6712                &ScalarValue::Int32(Some(2))
6713            )
6714            .unwrap(),
6715            Ordering::Less
6716        );
6717        assert_eq!(
6718            ScalarValue::try_cmp(&ScalarValue::Int32(None), &ScalarValue::Int32(Some(2)))
6719                .unwrap(),
6720            Ordering::Less
6721        );
6722        assert_starts_with(
6723            ScalarValue::try_cmp(
6724                &ScalarValue::Int32(Some(1)),
6725                &ScalarValue::Int64(Some(2)),
6726            )
6727            .unwrap_err()
6728            .message(),
6729            "Uncomparable values: Int32(1), Int64(2)",
6730        );
6731    }
6732
6733    #[test]
6734    fn scalar_decimal_test() -> Result<()> {
6735        let decimal_value = ScalarValue::Decimal128(Some(123), 10, 1);
6736        assert_eq!(DataType::Decimal128(10, 1), decimal_value.data_type());
6737        let try_into_value: i128 = decimal_value.clone().try_into().unwrap();
6738        assert_eq!(123_i128, try_into_value);
6739        assert!(!decimal_value.is_null());
6740        let neg_decimal_value = decimal_value.arithmetic_negate()?;
6741        match neg_decimal_value {
6742            ScalarValue::Decimal128(v, _, _) => {
6743                assert_eq!(-123, v.unwrap());
6744            }
6745            _ => {
6746                unreachable!();
6747            }
6748        }
6749
6750        // decimal scalar to array
6751        let array = decimal_value
6752            .to_array()
6753            .expect("Failed to convert to array");
6754        let array = as_decimal128_array(&array)?;
6755        assert_eq!(1, array.len());
6756        assert_eq!(DataType::Decimal128(10, 1), array.data_type().clone());
6757        assert_eq!(123i128, array.value(0));
6758
6759        // decimal scalar to array with size
6760        let array = decimal_value
6761            .to_array_of_size(10)
6762            .expect("Failed to convert to array of size");
6763        let array_decimal = as_decimal128_array(&array)?;
6764        assert_eq!(10, array.len());
6765        assert_eq!(DataType::Decimal128(10, 1), array.data_type().clone());
6766        assert_eq!(123i128, array_decimal.value(0));
6767        assert_eq!(123i128, array_decimal.value(9));
6768        // test eq array
6769        assert!(
6770            decimal_value
6771                .eq_array(&array, 1)
6772                .expect("Failed to compare arrays")
6773        );
6774        assert!(
6775            decimal_value
6776                .eq_array(&array, 5)
6777                .expect("Failed to compare arrays")
6778        );
6779        // test try from array
6780        assert_eq!(
6781            decimal_value,
6782            ScalarValue::try_from_array(&array, 5).unwrap()
6783        );
6784
6785        assert_eq!(
6786            decimal_value,
6787            ScalarValue::try_new_decimal128(123, 10, 1).unwrap()
6788        );
6789
6790        // test compare
6791        let left = ScalarValue::Decimal128(Some(123), 10, 2);
6792        let right = ScalarValue::Decimal128(Some(124), 10, 2);
6793        assert!(!left.eq(&right));
6794        let result = left < right;
6795        assert!(result);
6796        let result = left <= right;
6797        assert!(result);
6798        let right = ScalarValue::Decimal128(Some(124), 10, 3);
6799        // make sure that two decimals with diff datatype can't be compared.
6800        let result = left.partial_cmp(&right);
6801        assert_eq!(None, result);
6802
6803        let decimal_vec = vec![
6804            ScalarValue::Decimal128(Some(1), 10, 2),
6805            ScalarValue::Decimal128(Some(2), 10, 2),
6806            ScalarValue::Decimal128(Some(3), 10, 2),
6807        ];
6808        // convert the vec to decimal array and check the result
6809        let array = ScalarValue::iter_to_array(decimal_vec).unwrap();
6810        assert_eq!(3, array.len());
6811        assert_eq!(DataType::Decimal128(10, 2), array.data_type().clone());
6812
6813        let decimal_vec = vec![
6814            ScalarValue::Decimal128(Some(1), 10, 2),
6815            ScalarValue::Decimal128(Some(2), 10, 2),
6816            ScalarValue::Decimal128(Some(3), 10, 2),
6817            ScalarValue::Decimal128(None, 10, 2),
6818        ];
6819        let array = ScalarValue::iter_to_array(decimal_vec).unwrap();
6820        assert_eq!(4, array.len());
6821        assert_eq!(DataType::Decimal128(10, 2), array.data_type().clone());
6822
6823        assert!(
6824            ScalarValue::try_new_decimal128(1, 10, 2)
6825                .unwrap()
6826                .eq_array(&array, 0)
6827                .expect("Failed to compare arrays")
6828        );
6829        assert!(
6830            ScalarValue::try_new_decimal128(2, 10, 2)
6831                .unwrap()
6832                .eq_array(&array, 1)
6833                .expect("Failed to compare arrays")
6834        );
6835        assert!(
6836            ScalarValue::try_new_decimal128(3, 10, 2)
6837                .unwrap()
6838                .eq_array(&array, 2)
6839                .expect("Failed to compare arrays")
6840        );
6841        assert_eq!(
6842            ScalarValue::Decimal128(None, 10, 2),
6843            ScalarValue::try_from_array(&array, 3).unwrap()
6844        );
6845
6846        Ok(())
6847    }
6848
6849    #[test]
6850    fn test_new_one_decimal128() {
6851        assert_eq!(
6852            ScalarValue::new_one(&DataType::Decimal128(5, 0)).unwrap(),
6853            ScalarValue::Decimal128(Some(1), 5, 0)
6854        );
6855        assert_eq!(
6856            ScalarValue::new_one(&DataType::Decimal128(5, 1)).unwrap(),
6857            ScalarValue::Decimal128(Some(10), 5, 1)
6858        );
6859        assert_eq!(
6860            ScalarValue::new_one(&DataType::Decimal128(5, 2)).unwrap(),
6861            ScalarValue::Decimal128(Some(100), 5, 2)
6862        );
6863        // More precision
6864        assert_eq!(
6865            ScalarValue::new_one(&DataType::Decimal128(7, 2)).unwrap(),
6866            ScalarValue::Decimal128(Some(100), 7, 2)
6867        );
6868        // No negative scale
6869        assert!(ScalarValue::new_one(&DataType::Decimal128(5, -1)).is_err());
6870        // Invalid combination
6871        assert!(ScalarValue::new_one(&DataType::Decimal128(0, 2)).is_err());
6872        assert!(ScalarValue::new_one(&DataType::Decimal128(5, 7)).is_err());
6873    }
6874
6875    #[test]
6876    fn test_new_one_decimal256() {
6877        assert_eq!(
6878            ScalarValue::new_one(&DataType::Decimal256(5, 0)).unwrap(),
6879            ScalarValue::Decimal256(Some(1.into()), 5, 0)
6880        );
6881        assert_eq!(
6882            ScalarValue::new_one(&DataType::Decimal256(5, 1)).unwrap(),
6883            ScalarValue::Decimal256(Some(10.into()), 5, 1)
6884        );
6885        assert_eq!(
6886            ScalarValue::new_one(&DataType::Decimal256(5, 2)).unwrap(),
6887            ScalarValue::Decimal256(Some(100.into()), 5, 2)
6888        );
6889        // More precision
6890        assert_eq!(
6891            ScalarValue::new_one(&DataType::Decimal256(7, 2)).unwrap(),
6892            ScalarValue::Decimal256(Some(100.into()), 7, 2)
6893        );
6894        // No negative scale
6895        assert!(ScalarValue::new_one(&DataType::Decimal256(5, -1)).is_err());
6896        // Invalid combination
6897        assert!(ScalarValue::new_one(&DataType::Decimal256(0, 2)).is_err());
6898        assert!(ScalarValue::new_one(&DataType::Decimal256(5, 7)).is_err());
6899    }
6900
6901    #[test]
6902    fn test_new_ten_decimal128() {
6903        assert_eq!(
6904            ScalarValue::new_ten(&DataType::Decimal128(5, 1)).unwrap(),
6905            ScalarValue::Decimal128(Some(100), 5, 1)
6906        );
6907        assert_eq!(
6908            ScalarValue::new_ten(&DataType::Decimal128(5, 2)).unwrap(),
6909            ScalarValue::Decimal128(Some(1000), 5, 2)
6910        );
6911        // More precision
6912        assert_eq!(
6913            ScalarValue::new_ten(&DataType::Decimal128(7, 2)).unwrap(),
6914            ScalarValue::Decimal128(Some(1000), 7, 2)
6915        );
6916        // No negative scale
6917        assert!(ScalarValue::new_ten(&DataType::Decimal128(5, -1)).is_err());
6918        // Invalid combination
6919        assert!(ScalarValue::new_ten(&DataType::Decimal128(0, 2)).is_err());
6920        assert!(ScalarValue::new_ten(&DataType::Decimal128(5, 7)).is_err());
6921    }
6922
6923    #[test]
6924    fn test_new_ten_decimal256() {
6925        assert_eq!(
6926            ScalarValue::new_ten(&DataType::Decimal256(5, 1)).unwrap(),
6927            ScalarValue::Decimal256(Some(100.into()), 5, 1)
6928        );
6929        assert_eq!(
6930            ScalarValue::new_ten(&DataType::Decimal256(5, 2)).unwrap(),
6931            ScalarValue::Decimal256(Some(1000.into()), 5, 2)
6932        );
6933        // More precision
6934        assert_eq!(
6935            ScalarValue::new_ten(&DataType::Decimal256(7, 2)).unwrap(),
6936            ScalarValue::Decimal256(Some(1000.into()), 7, 2)
6937        );
6938        // No negative scale
6939        assert!(ScalarValue::new_ten(&DataType::Decimal256(5, -1)).is_err());
6940        // Invalid combination
6941        assert!(ScalarValue::new_ten(&DataType::Decimal256(0, 2)).is_err());
6942        assert!(ScalarValue::new_ten(&DataType::Decimal256(5, 7)).is_err());
6943    }
6944
6945    #[test]
6946    fn test_new_negative_one_decimal128() {
6947        assert_eq!(
6948            ScalarValue::new_negative_one(&DataType::Decimal128(5, 0)).unwrap(),
6949            ScalarValue::Decimal128(Some(-1), 5, 0)
6950        );
6951        assert_eq!(
6952            ScalarValue::new_negative_one(&DataType::Decimal128(5, 2)).unwrap(),
6953            ScalarValue::Decimal128(Some(-100), 5, 2)
6954        );
6955    }
6956
6957    #[test]
6958    fn test_list_partial_cmp() {
6959        let a =
6960            ScalarValue::List(Arc::new(
6961                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
6962                    Some(1),
6963                    Some(2),
6964                    Some(3),
6965                ])]),
6966            ));
6967        let b =
6968            ScalarValue::List(Arc::new(
6969                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
6970                    Some(1),
6971                    Some(2),
6972                    Some(3),
6973                ])]),
6974            ));
6975        assert_eq!(a.partial_cmp(&b), Some(Ordering::Equal));
6976
6977        let a =
6978            ScalarValue::List(Arc::new(
6979                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
6980                    Some(10),
6981                    Some(2),
6982                    Some(3),
6983                ])]),
6984            ));
6985        let b =
6986            ScalarValue::List(Arc::new(
6987                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
6988                    Some(1),
6989                    Some(2),
6990                    Some(30),
6991                ])]),
6992            ));
6993        assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
6994
6995        let a =
6996            ScalarValue::List(Arc::new(
6997                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
6998                    Some(10),
6999                    Some(2),
7000                    Some(3),
7001                ])]),
7002            ));
7003        let b =
7004            ScalarValue::List(Arc::new(
7005                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
7006                    Some(10),
7007                    Some(2),
7008                    Some(30),
7009                ])]),
7010            ));
7011        assert_eq!(a.partial_cmp(&b), Some(Ordering::Less));
7012
7013        let a =
7014            ScalarValue::List(Arc::new(
7015                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
7016                    Some(1),
7017                    Some(2),
7018                    Some(3),
7019                ])]),
7020            ));
7021        let b =
7022            ScalarValue::List(Arc::new(
7023                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
7024                    Some(2),
7025                    Some(3),
7026                ])]),
7027            ));
7028        assert_eq!(a.partial_cmp(&b), Some(Ordering::Less));
7029
7030        let a =
7031            ScalarValue::List(Arc::new(
7032                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
7033                    Some(2),
7034                    Some(3),
7035                    Some(4),
7036                ])]),
7037            ));
7038        let b =
7039            ScalarValue::List(Arc::new(
7040                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
7041                    Some(1),
7042                    Some(2),
7043                ])]),
7044            ));
7045        assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
7046
7047        let a =
7048            ScalarValue::List(Arc::new(
7049                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
7050                    Some(1),
7051                    Some(2),
7052                    Some(3),
7053                ])]),
7054            ));
7055        let b =
7056            ScalarValue::List(Arc::new(
7057                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
7058                    Some(1),
7059                    Some(2),
7060                ])]),
7061            ));
7062        assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
7063
7064        let a =
7065            ScalarValue::List(Arc::new(
7066                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
7067                    None,
7068                    Some(2),
7069                    Some(3),
7070                ])]),
7071            ));
7072        let b =
7073            ScalarValue::List(Arc::new(
7074                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
7075                    Some(1),
7076                    Some(2),
7077                    Some(3),
7078                ])]),
7079            ));
7080        assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
7081
7082        let a = ScalarValue::LargeList(Arc::new(LargeListArray::from_iter_primitive::<
7083            Int64Type,
7084            _,
7085            _,
7086        >(vec![Some(vec![
7087            None,
7088            Some(2),
7089            Some(3),
7090        ])])));
7091        let b = ScalarValue::LargeList(Arc::new(LargeListArray::from_iter_primitive::<
7092            Int64Type,
7093            _,
7094            _,
7095        >(vec![Some(vec![
7096            Some(1),
7097            Some(2),
7098            Some(3),
7099        ])])));
7100        assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
7101
7102        let a = ScalarValue::FixedSizeList(Arc::new(
7103            FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(
7104                vec![Some(vec![None, Some(2), Some(3)])],
7105                3,
7106            ),
7107        ));
7108        let b = ScalarValue::FixedSizeList(Arc::new(
7109            FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(
7110                vec![Some(vec![Some(1), Some(2), Some(3)])],
7111                3,
7112            ),
7113        ));
7114        assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
7115
7116        let a = ScalarValue::ListView(Arc::new(ListViewArray::from_iter_primitive::<
7117            Int64Type,
7118            _,
7119            _,
7120        >(vec![Some(vec![
7121            None,
7122            Some(2),
7123            Some(3),
7124        ])])));
7125        let b = ScalarValue::ListView(Arc::new(ListViewArray::from_iter_primitive::<
7126            Int64Type,
7127            _,
7128            _,
7129        >(vec![Some(vec![
7130            Some(1),
7131            Some(2),
7132            Some(3),
7133        ])])));
7134        assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
7135
7136        let a =
7137            ScalarValue::LargeListView(Arc::new(
7138                LargeListViewArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(
7139                    vec![None, Some(2), Some(3)],
7140                )]),
7141            ));
7142        let b =
7143            ScalarValue::LargeListView(Arc::new(
7144                LargeListViewArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(
7145                    vec![Some(1), Some(2), Some(3)],
7146                )]),
7147            ));
7148        assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
7149    }
7150
7151    #[test]
7152    fn scalar_value_to_array_u64() -> Result<()> {
7153        let value = ScalarValue::UInt64(Some(13u64));
7154        let array = value.to_array().expect("Failed to convert to array");
7155        let array = as_uint64_array(&array)?;
7156        assert_eq!(array.len(), 1);
7157        assert!(!array.is_null(0));
7158        assert_eq!(array.value(0), 13);
7159
7160        let value = ScalarValue::UInt64(None);
7161        let array = value.to_array().expect("Failed to convert to array");
7162        let array = as_uint64_array(&array)?;
7163        assert_eq!(array.len(), 1);
7164        assert!(array.is_null(0));
7165        Ok(())
7166    }
7167
7168    #[test]
7169    fn scalar_value_to_array_u32() -> Result<()> {
7170        let value = ScalarValue::UInt32(Some(13u32));
7171        let array = value.to_array().expect("Failed to convert to array");
7172        let array = as_uint32_array(&array)?;
7173        assert_eq!(array.len(), 1);
7174        assert!(!array.is_null(0));
7175        assert_eq!(array.value(0), 13);
7176
7177        let value = ScalarValue::UInt32(None);
7178        let array = value.to_array().expect("Failed to convert to array");
7179        let array = as_uint32_array(&array)?;
7180        assert_eq!(array.len(), 1);
7181        assert!(array.is_null(0));
7182        Ok(())
7183    }
7184
7185    #[test]
7186    fn scalar_list_null_to_array() {
7187        let list_array = ScalarValue::new_list_nullable(&[], &DataType::UInt64);
7188
7189        assert_eq!(list_array.len(), 1);
7190        assert_eq!(list_array.values().len(), 0);
7191    }
7192
7193    #[test]
7194    fn scalar_large_list_null_to_array() {
7195        let list_array = ScalarValue::new_large_list(&[], &DataType::UInt64);
7196
7197        assert_eq!(list_array.len(), 1);
7198        assert_eq!(list_array.values().len(), 0);
7199    }
7200
7201    #[test]
7202    fn scalar_list_to_array() -> Result<()> {
7203        let values = vec![
7204            ScalarValue::UInt64(Some(100)),
7205            ScalarValue::UInt64(None),
7206            ScalarValue::UInt64(Some(101)),
7207        ];
7208        let list_array = ScalarValue::new_list_nullable(&values, &DataType::UInt64);
7209        assert_eq!(list_array.len(), 1);
7210        assert_eq!(list_array.values().len(), 3);
7211
7212        let prim_array_ref = list_array.value(0);
7213        let prim_array = as_uint64_array(&prim_array_ref)?;
7214        assert_eq!(prim_array.len(), 3);
7215        assert_eq!(prim_array.value(0), 100);
7216        assert!(prim_array.is_null(1));
7217        assert_eq!(prim_array.value(2), 101);
7218        Ok(())
7219    }
7220
7221    #[test]
7222    fn scalar_large_list_to_array() -> Result<()> {
7223        let values = vec![
7224            ScalarValue::UInt64(Some(100)),
7225            ScalarValue::UInt64(None),
7226            ScalarValue::UInt64(Some(101)),
7227        ];
7228        let list_array = ScalarValue::new_large_list(&values, &DataType::UInt64);
7229        assert_eq!(list_array.len(), 1);
7230        assert_eq!(list_array.values().len(), 3);
7231
7232        let prim_array_ref = list_array.value(0);
7233        let prim_array = as_uint64_array(&prim_array_ref)?;
7234        assert_eq!(prim_array.len(), 3);
7235        assert_eq!(prim_array.value(0), 100);
7236        assert!(prim_array.is_null(1));
7237        assert_eq!(prim_array.value(2), 101);
7238        Ok(())
7239    }
7240
7241    /// Creates array directly and via ScalarValue and ensures they are the same
7242    macro_rules! check_scalar_iter {
7243        ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
7244            let scalars: Vec<_> =
7245                $INPUT.iter().map(|v| ScalarValue::$SCALAR_T(*v)).collect();
7246
7247            let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
7248
7249            let expected: ArrayRef = Arc::new($ARRAYTYPE::from($INPUT));
7250
7251            assert_eq!(&array, &expected);
7252        }};
7253    }
7254
7255    /// Creates array directly and via ScalarValue and ensures they are the same
7256    /// but for variants that carry a timezone field.
7257    macro_rules! check_scalar_iter_tz {
7258        ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
7259            let scalars: Vec<_> = $INPUT
7260                .iter()
7261                .map(|v| ScalarValue::$SCALAR_T(*v, None))
7262                .collect();
7263
7264            let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
7265
7266            let expected: ArrayRef = Arc::new($ARRAYTYPE::from($INPUT));
7267
7268            assert_eq!(&array, &expected);
7269        }};
7270    }
7271
7272    /// Creates array directly and via ScalarValue and ensures they
7273    /// are the same, for string  arrays
7274    macro_rules! check_scalar_iter_string {
7275        ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
7276            let scalars: Vec<_> = $INPUT
7277                .iter()
7278                .map(|v| ScalarValue::$SCALAR_T(v.map(|v| v.to_string())))
7279                .collect();
7280
7281            let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
7282
7283            let expected: ArrayRef = Arc::new($ARRAYTYPE::from($INPUT));
7284
7285            assert_eq!(&array, &expected);
7286        }};
7287    }
7288
7289    /// Creates array directly and via ScalarValue and ensures they
7290    /// are the same, for binary arrays
7291    macro_rules! check_scalar_iter_binary {
7292        ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
7293            let scalars: Vec<_> = $INPUT
7294                .iter()
7295                .map(|v| ScalarValue::$SCALAR_T(v.map(|v| v.to_vec())))
7296                .collect();
7297
7298            let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
7299
7300            let expected: $ARRAYTYPE =
7301                $INPUT.iter().map(|v| v.map(|v| v.to_vec())).collect();
7302
7303            let expected: ArrayRef = Arc::new(expected);
7304
7305            assert_eq!(&array, &expected);
7306        }};
7307    }
7308
7309    #[test]
7310    fn scalar_iter_to_array_boolean() {
7311        check_scalar_iter!(Boolean, BooleanArray, vec![Some(true), None, Some(false)]);
7312        check_scalar_iter!(Float32, Float32Array, vec![Some(1.9), None, Some(-2.1)]);
7313        check_scalar_iter!(Float64, Float64Array, vec![Some(1.9), None, Some(-2.1)]);
7314
7315        check_scalar_iter!(Int8, Int8Array, vec![Some(1), None, Some(3)]);
7316        check_scalar_iter!(Int16, Int16Array, vec![Some(1), None, Some(3)]);
7317        check_scalar_iter!(Int32, Int32Array, vec![Some(1), None, Some(3)]);
7318        check_scalar_iter!(Int64, Int64Array, vec![Some(1), None, Some(3)]);
7319
7320        check_scalar_iter!(UInt8, UInt8Array, vec![Some(1), None, Some(3)]);
7321        check_scalar_iter!(UInt16, UInt16Array, vec![Some(1), None, Some(3)]);
7322        check_scalar_iter!(UInt32, UInt32Array, vec![Some(1), None, Some(3)]);
7323        check_scalar_iter!(UInt64, UInt64Array, vec![Some(1), None, Some(3)]);
7324
7325        check_scalar_iter_tz!(
7326            TimestampSecond,
7327            TimestampSecondArray,
7328            vec![Some(1), None, Some(3)]
7329        );
7330        check_scalar_iter_tz!(
7331            TimestampMillisecond,
7332            TimestampMillisecondArray,
7333            vec![Some(1), None, Some(3)]
7334        );
7335        check_scalar_iter_tz!(
7336            TimestampMicrosecond,
7337            TimestampMicrosecondArray,
7338            vec![Some(1), None, Some(3)]
7339        );
7340        check_scalar_iter_tz!(
7341            TimestampNanosecond,
7342            TimestampNanosecondArray,
7343            vec![Some(1), None, Some(3)]
7344        );
7345
7346        check_scalar_iter_string!(
7347            Utf8,
7348            StringArray,
7349            vec![Some("foo"), None, Some("bar")]
7350        );
7351        check_scalar_iter_string!(
7352            LargeUtf8,
7353            LargeStringArray,
7354            vec![Some("foo"), None, Some("bar")]
7355        );
7356        check_scalar_iter_binary!(
7357            Binary,
7358            BinaryArray,
7359            [Some(b"foo"), None, Some(b"bar")]
7360        );
7361        check_scalar_iter_binary!(
7362            LargeBinary,
7363            LargeBinaryArray,
7364            [Some(b"foo"), None, Some(b"bar")]
7365        );
7366    }
7367
7368    #[test]
7369    fn scalar_iter_to_array_empty() {
7370        let scalars = vec![] as Vec<ScalarValue>;
7371
7372        let result = ScalarValue::iter_to_array(scalars).unwrap_err();
7373        assert!(
7374            result
7375                .to_string()
7376                .contains("Empty iterator passed to ScalarValue::iter_to_array"),
7377            "{}",
7378            result
7379        );
7380    }
7381
7382    #[test]
7383    fn scalar_iter_to_dictionary() {
7384        fn make_val(v: Option<String>) -> ScalarValue {
7385            let key_type = DataType::Int32;
7386            let value = ScalarValue::Utf8(v);
7387            ScalarValue::Dictionary(Box::new(key_type), Box::new(value))
7388        }
7389
7390        let scalars = [
7391            make_val(Some("Foo".into())),
7392            make_val(None),
7393            make_val(Some("Bar".into())),
7394        ];
7395
7396        let array = ScalarValue::iter_to_array(scalars).unwrap();
7397        let array = as_dictionary_array::<Int32Type>(&array).unwrap();
7398        let values_array = as_string_array(array.values()).unwrap();
7399
7400        let values = array
7401            .keys_iter()
7402            .map(|k| {
7403                k.map(|k| {
7404                    assert!(values_array.is_valid(k));
7405                    values_array.value(k)
7406                })
7407            })
7408            .collect::<Vec<_>>();
7409
7410        let expected = vec![Some("Foo"), None, Some("Bar")];
7411        assert_eq!(values, expected);
7412    }
7413
7414    #[test]
7415    fn scalar_iter_to_array_mismatched_types() {
7416        use ScalarValue::*;
7417        // If the scalar values are not all the correct type, error here
7418        let scalars = [Boolean(Some(true)), Int32(Some(5))];
7419
7420        let result = ScalarValue::iter_to_array(scalars).unwrap_err();
7421        assert!(result.to_string().contains("Inconsistent types in ScalarValue::iter_to_array. Expected Boolean, got Int32(5)"),
7422                "{}", result);
7423    }
7424
7425    #[test]
7426    fn scalar_try_from_array_null() {
7427        let array = vec![Some(33), None].into_iter().collect::<Int64Array>();
7428        let array: ArrayRef = Arc::new(array);
7429
7430        assert_eq!(
7431            ScalarValue::Int64(Some(33)),
7432            ScalarValue::try_from_array(&array, 0).unwrap()
7433        );
7434        assert_eq!(
7435            ScalarValue::Int64(None),
7436            ScalarValue::try_from_array(&array, 1).unwrap()
7437        );
7438    }
7439
7440    #[test]
7441    fn scalar_try_from_array_list_array_null() {
7442        let list = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
7443            Some(vec![Some(1), Some(2)]),
7444            None,
7445        ]);
7446
7447        let non_null_list_scalar = ScalarValue::try_from_array(&list, 0).unwrap();
7448        let null_list_scalar = ScalarValue::try_from_array(&list, 1).unwrap();
7449
7450        let data_type =
7451            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
7452
7453        assert_eq!(non_null_list_scalar.data_type(), data_type);
7454        assert_eq!(null_list_scalar.data_type(), data_type);
7455    }
7456
7457    #[test]
7458    fn scalar_try_from_list_datatypes() {
7459        let inner_field = Arc::new(Field::new_list_field(DataType::Int32, true));
7460
7461        // Test for List
7462        let data_type = &DataType::List(Arc::clone(&inner_field));
7463        let scalar: ScalarValue = data_type.try_into().unwrap();
7464        let expected = ScalarValue::List(
7465            new_null_array(data_type, 1)
7466                .as_list::<i32>()
7467                .to_owned()
7468                .into(),
7469        );
7470        assert_eq!(expected, scalar);
7471        assert!(expected.is_null());
7472
7473        // Test for LargeList
7474        let data_type = &DataType::LargeList(Arc::clone(&inner_field));
7475        let scalar: ScalarValue = data_type.try_into().unwrap();
7476        let expected = ScalarValue::LargeList(
7477            new_null_array(data_type, 1)
7478                .as_list::<i64>()
7479                .to_owned()
7480                .into(),
7481        );
7482        assert_eq!(expected, scalar);
7483        assert!(expected.is_null());
7484
7485        // Test for FixedSizeList(5)
7486        let data_type = &DataType::FixedSizeList(Arc::clone(&inner_field), 5);
7487        let scalar: ScalarValue = data_type.try_into().unwrap();
7488        let expected = ScalarValue::FixedSizeList(
7489            new_null_array(data_type, 1)
7490                .as_fixed_size_list()
7491                .to_owned()
7492                .into(),
7493        );
7494        assert_eq!(expected, scalar);
7495        assert!(expected.is_null());
7496
7497        // Test for ListView
7498        let data_type = &DataType::ListView(Arc::clone(&inner_field));
7499        let scalar: ScalarValue = data_type.try_into().unwrap();
7500        let expected = ScalarValue::ListView(
7501            new_null_array(data_type, 1)
7502                .as_list_view::<i32>()
7503                .to_owned()
7504                .into(),
7505        );
7506        assert_eq!(expected, scalar);
7507        assert!(expected.is_null());
7508
7509        // Test for LargeListView
7510        let data_type = &DataType::LargeListView(Arc::clone(&inner_field));
7511        let scalar: ScalarValue = data_type.try_into().unwrap();
7512        let expected = ScalarValue::LargeListView(
7513            new_null_array(data_type, 1)
7514                .as_list_view::<i64>()
7515                .to_owned()
7516                .into(),
7517        );
7518        assert_eq!(expected, scalar);
7519        assert!(expected.is_null());
7520    }
7521
7522    #[test]
7523    fn scalar_try_from_list_of_list() {
7524        let data_type = DataType::List(Arc::new(Field::new_list_field(
7525            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
7526            true,
7527        )));
7528        let data_type = &data_type;
7529        let scalar: ScalarValue = data_type.try_into().unwrap();
7530
7531        let expected = ScalarValue::List(
7532            new_null_array(
7533                &DataType::List(Arc::new(Field::new_list_field(
7534                    DataType::List(Arc::new(Field::new_list_field(
7535                        DataType::Int32,
7536                        true,
7537                    ))),
7538                    true,
7539                ))),
7540                1,
7541            )
7542            .as_list::<i32>()
7543            .to_owned()
7544            .into(),
7545        );
7546
7547        assert_eq!(expected, scalar)
7548    }
7549
7550    #[test]
7551    fn scalar_try_from_not_equal_list_nested_list() {
7552        let list_data_type =
7553            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
7554        let data_type = &list_data_type;
7555        let list_scalar: ScalarValue = data_type.try_into().unwrap();
7556
7557        let nested_list_data_type = DataType::List(Arc::new(Field::new_list_field(
7558            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
7559            true,
7560        )));
7561        let data_type = &nested_list_data_type;
7562        let nested_list_scalar: ScalarValue = data_type.try_into().unwrap();
7563
7564        assert_ne!(list_scalar, nested_list_scalar);
7565    }
7566
7567    #[test]
7568    fn scalar_try_from_dict_datatype() {
7569        let data_type =
7570            DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8));
7571        let data_type = &data_type;
7572        let expected = ScalarValue::Dictionary(
7573            Box::new(DataType::Int8),
7574            Box::new(ScalarValue::Utf8(None)),
7575        );
7576        assert_eq!(expected, data_type.try_into().unwrap())
7577    }
7578
7579    #[test]
7580    fn size_of_scalar() {
7581        // Since ScalarValues are used in a non trivial number of places,
7582        // making it larger means significant more memory consumption
7583        // per distinct value.
7584        //
7585        // Thus this test ensures that no code change makes ScalarValue larger
7586        //
7587        // The alignment requirements differ across architectures and
7588        // thus the size of the enum appears to as well
7589
7590        // The value may also change depending on rust version
7591        assert_eq!(size_of::<ScalarValue>(), 64);
7592    }
7593
7594    #[test]
7595    fn memory_size() {
7596        let sv = ScalarValue::Binary(Some(Vec::with_capacity(10)));
7597        assert_eq!(sv.size(), size_of::<ScalarValue>() + 10,);
7598        let sv_size = sv.size();
7599
7600        let mut v = Vec::with_capacity(10);
7601        // do NOT clone `sv` here because this may shrink the vector capacity
7602        v.push(sv);
7603        assert_eq!(v.capacity(), 10);
7604        assert_eq!(
7605            ScalarValue::size_of_vec(&v),
7606            size_of::<Vec<ScalarValue>>() + (9 * size_of::<ScalarValue>()) + sv_size,
7607        );
7608
7609        #[allow(clippy::allow_attributes, clippy::mutable_key_type)]
7610        // ScalarValue has interior mutability but is intentionally used as hash key
7611        let mut s = HashSet::with_capacity(0);
7612        // do NOT clone `sv` here because this may shrink the vector capacity
7613        s.insert(v.pop().unwrap());
7614        // hashsets may easily grow during insert, so capacity is dynamic
7615        let s_capacity = s.capacity();
7616        assert_eq!(
7617            ScalarValue::size_of_hashset(&s),
7618            size_of::<HashSet<ScalarValue>>()
7619                + ((s_capacity - 1) * size_of::<ScalarValue>())
7620                + sv_size,
7621        );
7622    }
7623
7624    #[test]
7625    fn scalar_eq_array() {
7626        // Validate that eq_array has the same semantics as ScalarValue::eq
7627        macro_rules! make_typed_vec {
7628            ($INPUT:expr, $TYPE:ident) => {{
7629                $INPUT
7630                    .iter()
7631                    .map(|v| v.map(|v| v as $TYPE))
7632                    .collect::<Vec<_>>()
7633            }};
7634        }
7635
7636        let bool_vals = [Some(true), None, Some(false)];
7637        let f32_vals = [Some(-1.0), None, Some(1.0)];
7638        let f64_vals = make_typed_vec!(f32_vals, f64);
7639
7640        let i8_vals = [Some(-1), None, Some(1)];
7641        let i16_vals = make_typed_vec!(i8_vals, i16);
7642        let i32_vals = make_typed_vec!(i8_vals, i32);
7643        let i64_vals = make_typed_vec!(i8_vals, i64);
7644
7645        let u8_vals = [Some(0), None, Some(1)];
7646        let u16_vals = make_typed_vec!(u8_vals, u16);
7647        let u32_vals = make_typed_vec!(u8_vals, u32);
7648        let u64_vals = make_typed_vec!(u8_vals, u64);
7649
7650        let str_vals = [Some("foo"), None, Some("bar")];
7651
7652        let interval_dt_vals = [
7653            Some(IntervalDayTime::MINUS_ONE),
7654            None,
7655            Some(IntervalDayTime::ONE),
7656        ];
7657        let interval_mdn_vals = [
7658            Some(IntervalMonthDayNano::MINUS_ONE),
7659            None,
7660            Some(IntervalMonthDayNano::ONE),
7661        ];
7662
7663        /// Test each value in `scalar` with the corresponding element
7664        /// at `array`. Assumes each element is unique (aka not equal
7665        /// with all other indexes)
7666        #[derive(Debug)]
7667        struct TestCase {
7668            array: ArrayRef,
7669            scalars: Vec<ScalarValue>,
7670        }
7671
7672        /// Create a test case for casing the input to the specified array type
7673        macro_rules! make_test_case {
7674            ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident) => {{
7675                TestCase {
7676                    array: Arc::new($INPUT.iter().collect::<$ARRAY_TY>()),
7677                    scalars: $INPUT.iter().map(|v| ScalarValue::$SCALAR_TY(*v)).collect(),
7678                }
7679            }};
7680
7681            ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident, $TZ:expr) => {{
7682                let tz = $TZ;
7683                TestCase {
7684                    array: Arc::new($INPUT.iter().collect::<$ARRAY_TY>()),
7685                    scalars: $INPUT
7686                        .iter()
7687                        .map(|v| ScalarValue::$SCALAR_TY(*v, tz.clone()))
7688                        .collect(),
7689                }
7690            }};
7691        }
7692
7693        macro_rules! make_str_test_case {
7694            ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident) => {{
7695                TestCase {
7696                    array: Arc::new($INPUT.iter().cloned().collect::<$ARRAY_TY>()),
7697                    scalars: $INPUT
7698                        .iter()
7699                        .map(|v| ScalarValue::$SCALAR_TY(v.map(|v| v.to_string())))
7700                        .collect(),
7701                }
7702            }};
7703        }
7704
7705        macro_rules! make_binary_test_case {
7706            ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident) => {{
7707                TestCase {
7708                    array: Arc::new($INPUT.iter().cloned().collect::<$ARRAY_TY>()),
7709                    scalars: $INPUT
7710                        .iter()
7711                        .map(|v| {
7712                            ScalarValue::$SCALAR_TY(v.map(|v| v.as_bytes().to_vec()))
7713                        })
7714                        .collect(),
7715                }
7716            }};
7717        }
7718
7719        /// create a test case for DictionaryArray<$INDEX_TY>
7720        macro_rules! make_str_dict_test_case {
7721            ($INPUT:expr, $INDEX_TY:ident) => {{
7722                TestCase {
7723                    array: Arc::new(
7724                        $INPUT
7725                            .iter()
7726                            .cloned()
7727                            .collect::<DictionaryArray<$INDEX_TY>>(),
7728                    ),
7729                    scalars: $INPUT
7730                        .iter()
7731                        .map(|v| {
7732                            ScalarValue::Dictionary(
7733                                Box::new($INDEX_TY::DATA_TYPE),
7734                                Box::new(ScalarValue::Utf8(v.map(|v| v.to_string()))),
7735                            )
7736                        })
7737                        .collect(),
7738                }
7739            }};
7740        }
7741
7742        let cases = vec![
7743            make_test_case!(bool_vals, BooleanArray, Boolean),
7744            make_test_case!(f32_vals, Float32Array, Float32),
7745            make_test_case!(f64_vals, Float64Array, Float64),
7746            make_test_case!(i8_vals, Int8Array, Int8),
7747            make_test_case!(i16_vals, Int16Array, Int16),
7748            make_test_case!(i32_vals, Int32Array, Int32),
7749            make_test_case!(i64_vals, Int64Array, Int64),
7750            make_test_case!(u8_vals, UInt8Array, UInt8),
7751            make_test_case!(u16_vals, UInt16Array, UInt16),
7752            make_test_case!(u32_vals, UInt32Array, UInt32),
7753            make_test_case!(u64_vals, UInt64Array, UInt64),
7754            make_str_test_case!(str_vals, StringArray, Utf8),
7755            make_str_test_case!(str_vals, LargeStringArray, LargeUtf8),
7756            make_binary_test_case!(str_vals, BinaryArray, Binary),
7757            make_binary_test_case!(str_vals, LargeBinaryArray, LargeBinary),
7758            make_test_case!(i32_vals, Date32Array, Date32),
7759            make_test_case!(i64_vals, Date64Array, Date64),
7760            make_test_case!(i32_vals, Time32SecondArray, Time32Second),
7761            make_test_case!(i32_vals, Time32MillisecondArray, Time32Millisecond),
7762            make_test_case!(i64_vals, Time64MicrosecondArray, Time64Microsecond),
7763            make_test_case!(i64_vals, Time64NanosecondArray, Time64Nanosecond),
7764            make_test_case!(i64_vals, TimestampSecondArray, TimestampSecond, None),
7765            make_test_case!(
7766                i64_vals,
7767                TimestampSecondArray,
7768                TimestampSecond,
7769                Some("UTC".into())
7770            ),
7771            make_test_case!(
7772                i64_vals,
7773                TimestampMillisecondArray,
7774                TimestampMillisecond,
7775                None
7776            ),
7777            make_test_case!(
7778                i64_vals,
7779                TimestampMillisecondArray,
7780                TimestampMillisecond,
7781                Some("UTC".into())
7782            ),
7783            make_test_case!(
7784                i64_vals,
7785                TimestampMicrosecondArray,
7786                TimestampMicrosecond,
7787                None
7788            ),
7789            make_test_case!(
7790                i64_vals,
7791                TimestampMicrosecondArray,
7792                TimestampMicrosecond,
7793                Some("UTC".into())
7794            ),
7795            make_test_case!(
7796                i64_vals,
7797                TimestampNanosecondArray,
7798                TimestampNanosecond,
7799                None
7800            ),
7801            make_test_case!(
7802                i64_vals,
7803                TimestampNanosecondArray,
7804                TimestampNanosecond,
7805                Some("UTC".into())
7806            ),
7807            make_test_case!(i32_vals, IntervalYearMonthArray, IntervalYearMonth),
7808            make_test_case!(interval_dt_vals, IntervalDayTimeArray, IntervalDayTime),
7809            make_test_case!(
7810                interval_mdn_vals,
7811                IntervalMonthDayNanoArray,
7812                IntervalMonthDayNano
7813            ),
7814            make_str_dict_test_case!(str_vals, Int8Type),
7815            make_str_dict_test_case!(str_vals, Int16Type),
7816            make_str_dict_test_case!(str_vals, Int32Type),
7817            make_str_dict_test_case!(str_vals, Int64Type),
7818            make_str_dict_test_case!(str_vals, UInt8Type),
7819            make_str_dict_test_case!(str_vals, UInt16Type),
7820            make_str_dict_test_case!(str_vals, UInt32Type),
7821            make_str_dict_test_case!(str_vals, UInt64Type),
7822        ];
7823
7824        for case in cases {
7825            println!("**** Test Case *****");
7826            let TestCase { array, scalars } = case;
7827            println!("Input array type: {}", array.data_type());
7828            println!("Input scalars: {scalars:#?}");
7829            assert_eq!(array.len(), scalars.len());
7830
7831            for (index, scalar) in scalars.into_iter().enumerate() {
7832                assert!(
7833                    scalar
7834                        .eq_array(&array, index)
7835                        .expect("Failed to compare arrays"),
7836                    "Expected {scalar:?} to be equal to {array:?} at index {index}"
7837                );
7838
7839                // test that all other elements are *not* equal
7840                for other_index in 0..array.len() {
7841                    if index != other_index {
7842                        assert!(
7843                            !scalar
7844                                .eq_array(&array, other_index)
7845                                .expect("Failed to compare arrays"),
7846                            "Expected {scalar:?} to be NOT equal to {array:?} at index {other_index}"
7847                        );
7848                    }
7849                }
7850            }
7851        }
7852    }
7853
7854    #[test]
7855    fn scalar_partial_ordering() {
7856        use ScalarValue::*;
7857
7858        assert_eq!(
7859            Int64(Some(33)).partial_cmp(&Int64(Some(0))),
7860            Some(Ordering::Greater)
7861        );
7862        assert_eq!(
7863            Int64(Some(0)).partial_cmp(&Int64(Some(33))),
7864            Some(Ordering::Less)
7865        );
7866        assert_eq!(
7867            Int64(Some(33)).partial_cmp(&Int64(Some(33))),
7868            Some(Ordering::Equal)
7869        );
7870        // For different data type, `partial_cmp` returns None.
7871        assert_eq!(Int64(Some(33)).partial_cmp(&Int32(Some(33))), None);
7872        assert_eq!(Int32(Some(33)).partial_cmp(&Int64(Some(33))), None);
7873
7874        assert_eq!(
7875            ScalarValue::from(vec![
7876                ("A", ScalarValue::from(1.0)),
7877                ("B", ScalarValue::from("Z")),
7878            ])
7879            .partial_cmp(&ScalarValue::from(vec![
7880                ("A", ScalarValue::from(2.0)),
7881                ("B", ScalarValue::from("A")),
7882            ])),
7883            Some(Ordering::Less)
7884        );
7885
7886        // For different struct fields, `partial_cmp` returns None.
7887        assert_eq!(
7888            ScalarValue::from(vec![
7889                ("A", ScalarValue::from(1.0)),
7890                ("B", ScalarValue::from("Z")),
7891            ])
7892            .partial_cmp(&ScalarValue::from(vec![
7893                ("a", ScalarValue::from(2.0)),
7894                ("b", ScalarValue::from("A")),
7895            ])),
7896            None
7897        );
7898    }
7899
7900    #[test]
7901    fn test_scalar_value_from_string() {
7902        let scalar = ScalarValue::from("foo");
7903        assert_eq!(scalar, ScalarValue::Utf8(Some("foo".to_string())));
7904        let scalar = ScalarValue::from("foo".to_string());
7905        assert_eq!(scalar, ScalarValue::Utf8(Some("foo".to_string())));
7906        let scalar = ScalarValue::from_str("foo").unwrap();
7907        assert_eq!(scalar, ScalarValue::Utf8(Some("foo".to_string())));
7908    }
7909
7910    #[test]
7911    fn test_scalar_struct() {
7912        let field_a = Arc::new(Field::new("A", DataType::Int32, false));
7913        let field_b = Arc::new(Field::new("B", DataType::Boolean, false));
7914        let field_c = Arc::new(Field::new("C", DataType::Utf8, false));
7915
7916        let field_e = Arc::new(Field::new("e", DataType::Int16, false));
7917        let field_f = Arc::new(Field::new("f", DataType::Int64, false));
7918        let field_d = Arc::new(Field::new(
7919            "D",
7920            DataType::Struct(vec![Arc::clone(&field_e), Arc::clone(&field_f)].into()),
7921            false,
7922        ));
7923
7924        let struct_array = StructArray::from(vec![
7925            (
7926                Arc::clone(&field_e),
7927                Arc::new(Int16Array::from(vec![2])) as ArrayRef,
7928            ),
7929            (
7930                Arc::clone(&field_f),
7931                Arc::new(Int64Array::from(vec![3])) as ArrayRef,
7932            ),
7933        ]);
7934
7935        let struct_array = StructArray::from(vec![
7936            (
7937                Arc::clone(&field_a),
7938                Arc::new(Int32Array::from(vec![23])) as ArrayRef,
7939            ),
7940            (
7941                Arc::clone(&field_b),
7942                Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
7943            ),
7944            (
7945                Arc::clone(&field_c),
7946                Arc::new(StringArray::from(vec!["Hello"])) as ArrayRef,
7947            ),
7948            (Arc::clone(&field_d), Arc::new(struct_array) as ArrayRef),
7949        ]);
7950        let scalar = ScalarValue::Struct(Arc::new(struct_array));
7951
7952        let array = scalar
7953            .to_array_of_size(2)
7954            .expect("Failed to convert to array of size");
7955
7956        let expected = Arc::new(StructArray::from(vec![
7957            (
7958                Arc::clone(&field_a),
7959                Arc::new(Int32Array::from(vec![23, 23])) as ArrayRef,
7960            ),
7961            (
7962                Arc::clone(&field_b),
7963                Arc::new(BooleanArray::from(vec![false, false])) as ArrayRef,
7964            ),
7965            (
7966                Arc::clone(&field_c),
7967                Arc::new(StringArray::from(vec!["Hello", "Hello"])) as ArrayRef,
7968            ),
7969            (
7970                Arc::clone(&field_d),
7971                Arc::new(StructArray::from(vec![
7972                    (
7973                        Arc::clone(&field_e),
7974                        Arc::new(Int16Array::from(vec![2, 2])) as ArrayRef,
7975                    ),
7976                    (
7977                        Arc::clone(&field_f),
7978                        Arc::new(Int64Array::from(vec![3, 3])) as ArrayRef,
7979                    ),
7980                ])) as ArrayRef,
7981            ),
7982        ])) as ArrayRef;
7983
7984        assert_eq!(&array, &expected);
7985
7986        // Construct from second element of ArrayRef
7987        let constructed = ScalarValue::try_from_array(&expected, 1).unwrap();
7988        assert_eq!(constructed, scalar);
7989
7990        // None version
7991        let none_scalar = ScalarValue::try_from(array.data_type()).unwrap();
7992        assert!(none_scalar.is_null());
7993        assert_eq!(
7994            format!("{none_scalar:?}"),
7995            String::from("Struct({A:,B:,C:,D:})")
7996        );
7997
7998        // Construct with convenience From<Vec<(&str, ScalarValue)>>
7999        let constructed = ScalarValue::from(vec![
8000            ("A", ScalarValue::from(23)),
8001            ("B", ScalarValue::from(false)),
8002            ("C", ScalarValue::from("Hello")),
8003            (
8004                "D",
8005                ScalarValue::from(vec![
8006                    ("e", ScalarValue::from(2i16)),
8007                    ("f", ScalarValue::from(3i64)),
8008                ]),
8009            ),
8010        ]);
8011        assert_eq!(constructed, scalar);
8012
8013        // Build Array from Vec of structs
8014        let scalars = vec![
8015            ScalarValue::from(vec![
8016                ("A", ScalarValue::from(23)),
8017                ("B", ScalarValue::from(false)),
8018                ("C", ScalarValue::from("Hello")),
8019                (
8020                    "D",
8021                    ScalarValue::from(vec![
8022                        ("e", ScalarValue::from(2i16)),
8023                        ("f", ScalarValue::from(3i64)),
8024                    ]),
8025                ),
8026            ]),
8027            ScalarValue::from(vec![
8028                ("A", ScalarValue::from(7)),
8029                ("B", ScalarValue::from(true)),
8030                ("C", ScalarValue::from("World")),
8031                (
8032                    "D",
8033                    ScalarValue::from(vec![
8034                        ("e", ScalarValue::from(4i16)),
8035                        ("f", ScalarValue::from(5i64)),
8036                    ]),
8037                ),
8038            ]),
8039            ScalarValue::from(vec![
8040                ("A", ScalarValue::from(-1000)),
8041                ("B", ScalarValue::from(true)),
8042                ("C", ScalarValue::from("!!!!!")),
8043                (
8044                    "D",
8045                    ScalarValue::from(vec![
8046                        ("e", ScalarValue::from(6i16)),
8047                        ("f", ScalarValue::from(7i64)),
8048                    ]),
8049                ),
8050            ]),
8051        ];
8052        let array = ScalarValue::iter_to_array(scalars).unwrap();
8053
8054        let expected = Arc::new(StructArray::from(vec![
8055            (
8056                Arc::clone(&field_a),
8057                Arc::new(Int32Array::from(vec![23, 7, -1000])) as ArrayRef,
8058            ),
8059            (
8060                Arc::clone(&field_b),
8061                Arc::new(BooleanArray::from(vec![false, true, true])) as ArrayRef,
8062            ),
8063            (
8064                Arc::clone(&field_c),
8065                Arc::new(StringArray::from(vec!["Hello", "World", "!!!!!"])) as ArrayRef,
8066            ),
8067            (
8068                Arc::clone(&field_d),
8069                Arc::new(StructArray::from(vec![
8070                    (
8071                        Arc::clone(&field_e),
8072                        Arc::new(Int16Array::from(vec![2, 4, 6])) as ArrayRef,
8073                    ),
8074                    (
8075                        Arc::clone(&field_f),
8076                        Arc::new(Int64Array::from(vec![3, 5, 7])) as ArrayRef,
8077                    ),
8078                ])) as ArrayRef,
8079            ),
8080        ])) as ArrayRef;
8081
8082        assert_eq!(&array, &expected);
8083    }
8084
8085    #[test]
8086    fn round_trip() {
8087        // Each array type should be able to round tripped through a scalar
8088        let cases: Vec<ArrayRef> = vec![
8089            // int
8090            Arc::new(Int8Array::from(vec![Some(1), None, Some(3)])),
8091            Arc::new(Int16Array::from(vec![Some(1), None, Some(3)])),
8092            Arc::new(Int32Array::from(vec![Some(1), None, Some(3)])),
8093            Arc::new(Int64Array::from(vec![Some(1), None, Some(3)])),
8094            Arc::new(UInt8Array::from(vec![Some(1), None, Some(3)])),
8095            Arc::new(UInt16Array::from(vec![Some(1), None, Some(3)])),
8096            Arc::new(UInt32Array::from(vec![Some(1), None, Some(3)])),
8097            Arc::new(UInt64Array::from(vec![Some(1), None, Some(3)])),
8098            // bool
8099            Arc::new(BooleanArray::from(vec![Some(true), None, Some(false)])),
8100            // float
8101            Arc::new(Float32Array::from(vec![Some(1.0), None, Some(3.0)])),
8102            Arc::new(Float64Array::from(vec![Some(1.0), None, Some(3.0)])),
8103            // string array
8104            Arc::new(StringArray::from(vec![Some("foo"), None, Some("bar")])),
8105            Arc::new(LargeStringArray::from(vec![Some("foo"), None, Some("bar")])),
8106            Arc::new(StringViewArray::from(vec![Some("foo"), None, Some("bar")])),
8107            // string dictionary
8108            {
8109                let mut builder = StringDictionaryBuilder::<Int32Type>::new();
8110                builder.append("foo").unwrap();
8111                builder.append_null();
8112                builder.append("bar").unwrap();
8113                Arc::new(builder.finish())
8114            },
8115            // binary array
8116            Arc::new(BinaryArray::from_iter(vec![
8117                Some(b"foo"),
8118                None,
8119                Some(b"bar"),
8120            ])),
8121            Arc::new(LargeBinaryArray::from_iter(vec![
8122                Some(b"foo"),
8123                None,
8124                Some(b"bar"),
8125            ])),
8126            Arc::new(BinaryViewArray::from_iter(vec![
8127                Some(b"foo"),
8128                None,
8129                Some(b"bar"),
8130            ])),
8131            // timestamp
8132            Arc::new(TimestampSecondArray::from(vec![Some(1), None, Some(3)])),
8133            Arc::new(TimestampMillisecondArray::from(vec![
8134                Some(1),
8135                None,
8136                Some(3),
8137            ])),
8138            Arc::new(TimestampMicrosecondArray::from(vec![
8139                Some(1),
8140                None,
8141                Some(3),
8142            ])),
8143            Arc::new(TimestampNanosecondArray::from(vec![Some(1), None, Some(3)])),
8144            // timestamp with timezone
8145            Arc::new(
8146                TimestampSecondArray::from(vec![Some(1), None, Some(3)])
8147                    .with_timezone_opt(Some("UTC")),
8148            ),
8149            Arc::new(
8150                TimestampMillisecondArray::from(vec![Some(1), None, Some(3)])
8151                    .with_timezone_opt(Some("UTC")),
8152            ),
8153            Arc::new(
8154                TimestampMicrosecondArray::from(vec![Some(1), None, Some(3)])
8155                    .with_timezone_opt(Some("UTC")),
8156            ),
8157            Arc::new(
8158                TimestampNanosecondArray::from(vec![Some(1), None, Some(3)])
8159                    .with_timezone_opt(Some("UTC")),
8160            ),
8161            // date
8162            Arc::new(Date32Array::from(vec![Some(1), None, Some(3)])),
8163            Arc::new(Date64Array::from(vec![Some(1), None, Some(3)])),
8164            // time
8165            Arc::new(Time32SecondArray::from(vec![Some(1), None, Some(3)])),
8166            Arc::new(Time32MillisecondArray::from(vec![Some(1), None, Some(3)])),
8167            Arc::new(Time64MicrosecondArray::from(vec![Some(1), None, Some(3)])),
8168            Arc::new(Time64NanosecondArray::from(vec![Some(1), None, Some(3)])),
8169            // null array
8170            Arc::new(NullArray::new(3)),
8171            // dense union
8172            {
8173                let mut builder = UnionBuilder::new_dense();
8174                builder.append::<Int32Type>("a", 1).unwrap();
8175                builder.append::<Float64Type>("b", 3.4).unwrap();
8176                Arc::new(builder.build().unwrap())
8177            },
8178            // sparse union
8179            {
8180                let mut builder = UnionBuilder::new_sparse();
8181                builder.append::<Int32Type>("a", 1).unwrap();
8182                builder.append::<Float64Type>("b", 3.4).unwrap();
8183                Arc::new(builder.build().unwrap())
8184            },
8185            // list array
8186            {
8187                let values_builder = StringBuilder::new();
8188                let mut builder = ListBuilder::new(values_builder);
8189                // [A, B]
8190                builder.values().append_value("A");
8191                builder.values().append_value("B");
8192                builder.append(true);
8193                // [ ] (empty list)
8194                builder.append(true);
8195                // Null
8196                builder.values().append_value("?"); // irrelevant
8197                builder.append(false);
8198                Arc::new(builder.finish())
8199            },
8200            // large list array
8201            {
8202                let values_builder = StringBuilder::new();
8203                let mut builder = LargeListBuilder::new(values_builder);
8204                // [A, B]
8205                builder.values().append_value("A");
8206                builder.values().append_value("B");
8207                builder.append(true);
8208                // [ ] (empty list)
8209                builder.append(true);
8210                // Null
8211                builder.append(false);
8212                Arc::new(builder.finish())
8213            },
8214            // fixed size list array
8215            {
8216                let values_builder = Int32Builder::new();
8217                let mut builder = FixedSizeListBuilder::new(values_builder, 3);
8218
8219                //  [[0, 1, 2], null, [3, null, 5]
8220                builder.values().append_value(0);
8221                builder.values().append_value(1);
8222                builder.values().append_value(2);
8223                builder.append(true);
8224                builder.values().append_null();
8225                builder.values().append_null();
8226                builder.values().append_null();
8227                builder.append(false);
8228                builder.values().append_value(3);
8229                builder.values().append_null();
8230                builder.values().append_value(5);
8231                builder.append(true);
8232                Arc::new(builder.finish())
8233            },
8234            // list view array
8235            {
8236                let values_builder = StringBuilder::new();
8237                let mut builder = ListViewBuilder::new(values_builder);
8238                // [A, B]
8239                builder.values().append_value("A");
8240                builder.values().append_value("B");
8241                builder.append(true);
8242                // [ ] (empty list)
8243                builder.append(true);
8244                // Null
8245                builder.append(false);
8246                Arc::new(builder.finish())
8247            },
8248            // large list view array
8249            {
8250                let values_builder = StringBuilder::new();
8251                let mut builder = LargeListViewBuilder::new(values_builder);
8252                // [A, B]
8253                builder.values().append_value("A");
8254                builder.values().append_value("B");
8255                builder.append(true);
8256                // [ ] (empty list)
8257                builder.append(true);
8258                // Null
8259                builder.append(false);
8260                Arc::new(builder.finish())
8261            },
8262            // map
8263            {
8264                let string_builder = StringBuilder::new();
8265                let int_builder = Int32Builder::with_capacity(4);
8266
8267                let mut builder = MapBuilder::new(None, string_builder, int_builder);
8268                // {"joe": 1}
8269                builder.keys().append_value("joe");
8270                builder.values().append_value(1);
8271                builder.append(true).unwrap();
8272                // {}
8273                builder.append(true).unwrap();
8274                // null
8275                builder.append(false).unwrap();
8276
8277                Arc::new(builder.finish())
8278            },
8279        ];
8280
8281        for arr in cases {
8282            round_trip_through_scalar(arr);
8283        }
8284    }
8285
8286    /// for each row in `arr`:
8287    /// 1. convert to a `ScalarValue`
8288    /// 2. Convert `ScalarValue` back to an `ArrayRef`
8289    /// 3. Compare the original array (sliced) and new array for equality
8290    fn round_trip_through_scalar(arr: ArrayRef) {
8291        for i in 0..arr.len() {
8292            // convert Scalar --> Array
8293            let scalar = ScalarValue::try_from_array(&arr, i).unwrap();
8294            let array = scalar.to_array_of_size(1).unwrap();
8295            assert_eq!(array.len(), 1);
8296            assert_eq!(array.data_type(), arr.data_type());
8297            assert_eq!(array.as_ref(), arr.slice(i, 1).as_ref());
8298        }
8299    }
8300
8301    #[test]
8302    fn roundtrip_run_array() {
8303        // Comparison logic in round_trip_through_scalar doesn't work for RunArrays
8304        // so we have a custom test for them
8305        // TODO: https://github.com/apache/arrow-rs/pull/9213 might fix this ^
8306        let run_ends = Int16Array::from(vec![2, 3]);
8307        let values = Int64Array::from(vec![Some(1), None]);
8308        let run_array = RunArray::try_new(&run_ends, &values).unwrap();
8309        let run_array = run_array.downcast::<Int64Array>().unwrap();
8310
8311        let expected_values = run_array.into_iter().collect::<Vec<_>>();
8312
8313        for i in 0..run_array.len() {
8314            let scalar = ScalarValue::try_from_array(&run_array, i).unwrap();
8315            let array = scalar.to_array_of_size(1).unwrap();
8316            assert_eq!(array.data_type(), run_array.data_type());
8317            let array = array.as_run::<Int16Type>();
8318            let array = array.downcast::<Int64Array>().unwrap();
8319            assert_eq!(
8320                array.into_iter().collect::<Vec<_>>(),
8321                expected_values[i..i + 1]
8322            );
8323        }
8324    }
8325
8326    #[test]
8327    fn test_scalar_union_sparse() {
8328        let field_a = Arc::new(Field::new("A", DataType::Int32, true));
8329        let field_b = Arc::new(Field::new("B", DataType::Boolean, true));
8330        let field_c = Arc::new(Field::new("C", DataType::Utf8, true));
8331        let fields = UnionFields::from_iter([(0, field_a), (1, field_b), (2, field_c)]);
8332
8333        let mut values_a = vec![None; 6];
8334        values_a[0] = Some(42);
8335        let mut values_b = vec![None; 6];
8336        values_b[1] = Some(true);
8337        let mut values_c = vec![None; 6];
8338        values_c[2] = Some("foo");
8339        let children: Vec<ArrayRef> = vec![
8340            Arc::new(Int32Array::from(values_a)),
8341            Arc::new(BooleanArray::from(values_b)),
8342            Arc::new(StringArray::from(values_c)),
8343        ];
8344
8345        let type_ids = ScalarBuffer::from(vec![0, 1, 2, 0, 1, 2]);
8346        let array: ArrayRef = Arc::new(
8347            UnionArray::try_new(fields.clone(), type_ids, None, children)
8348                .expect("UnionArray"),
8349        );
8350
8351        let expected = [
8352            (0, ScalarValue::from(42)),
8353            (1, ScalarValue::from(true)),
8354            (2, ScalarValue::from("foo")),
8355            (0, ScalarValue::Int32(None)),
8356            (1, ScalarValue::Boolean(None)),
8357            (2, ScalarValue::Utf8(None)),
8358        ];
8359
8360        for (i, (ti, value)) in expected.into_iter().enumerate() {
8361            let is_null = value.is_null();
8362            let value = Some((ti, Box::new(value)));
8363            let expected = ScalarValue::Union(value, fields.clone(), UnionMode::Sparse);
8364            let actual = ScalarValue::try_from_array(&array, i).expect("try_from_array");
8365
8366            assert_eq!(
8367                actual, expected,
8368                "[{i}] {actual} was not equal to {expected}"
8369            );
8370
8371            assert!(
8372                expected.eq_array(&array, i).expect("eq_array"),
8373                "[{i}] {expected}.eq_array was false"
8374            );
8375
8376            if is_null {
8377                assert!(actual.is_null(), "[{i}] {actual} was not null")
8378            }
8379        }
8380    }
8381
8382    #[test]
8383    fn test_scalar_union_dense() {
8384        let field_a = Arc::new(Field::new("A", DataType::Int32, true));
8385        let field_b = Arc::new(Field::new("B", DataType::Boolean, true));
8386        let field_c = Arc::new(Field::new("C", DataType::Utf8, true));
8387        let fields = UnionFields::from_iter([(0, field_a), (1, field_b), (2, field_c)]);
8388        let children: Vec<ArrayRef> = vec![
8389            Arc::new(Int32Array::from(vec![Some(42), None])),
8390            Arc::new(BooleanArray::from(vec![Some(true), None])),
8391            Arc::new(StringArray::from(vec![Some("foo"), None])),
8392        ];
8393
8394        let type_ids = ScalarBuffer::from(vec![0, 1, 2, 0, 1, 2]);
8395        let offsets = ScalarBuffer::from(vec![0, 0, 0, 1, 1, 1]);
8396        let array: ArrayRef = Arc::new(
8397            UnionArray::try_new(fields.clone(), type_ids, Some(offsets), children)
8398                .expect("UnionArray"),
8399        );
8400
8401        let expected = [
8402            (0, ScalarValue::from(42)),
8403            (1, ScalarValue::from(true)),
8404            (2, ScalarValue::from("foo")),
8405            (0, ScalarValue::Int32(None)),
8406            (1, ScalarValue::Boolean(None)),
8407            (2, ScalarValue::Utf8(None)),
8408        ];
8409
8410        for (i, (ti, value)) in expected.into_iter().enumerate() {
8411            let is_null = value.is_null();
8412            let value = Some((ti, Box::new(value)));
8413            let expected = ScalarValue::Union(value, fields.clone(), UnionMode::Dense);
8414            let actual = ScalarValue::try_from_array(&array, i).expect("try_from_array");
8415
8416            assert_eq!(
8417                actual, expected,
8418                "[{i}] {actual} was not equal to {expected}"
8419            );
8420
8421            assert!(
8422                expected.eq_array(&array, i).expect("eq_array"),
8423                "[{i}] {expected}.eq_array was false"
8424            );
8425
8426            if is_null {
8427                assert!(actual.is_null(), "[{i}] {actual} was not null")
8428            }
8429        }
8430    }
8431
8432    #[test]
8433    fn test_lists_in_struct() {
8434        let field_a = Arc::new(Field::new("A", DataType::Utf8, false));
8435        let field_primitive_list = Arc::new(Field::new(
8436            "primitive_list",
8437            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
8438            false,
8439        ));
8440
8441        // Define primitive list scalars
8442        let l0 =
8443            ScalarValue::List(Arc::new(
8444                ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
8445                    Some(1),
8446                    Some(2),
8447                    Some(3),
8448                ])]),
8449            ));
8450        let l1 =
8451            ScalarValue::List(Arc::new(
8452                ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
8453                    Some(4),
8454                    Some(5),
8455                ])]),
8456            ));
8457        let l2 = ScalarValue::List(Arc::new(ListArray::from_iter_primitive::<
8458            Int32Type,
8459            _,
8460            _,
8461        >(vec![Some(vec![Some(6)])])));
8462
8463        // Define struct scalars
8464        let s0 = ScalarValue::from(vec![
8465            ("A", ScalarValue::from("First")),
8466            ("primitive_list", l0),
8467        ]);
8468
8469        let s1 = ScalarValue::from(vec![
8470            ("A", ScalarValue::from("Second")),
8471            ("primitive_list", l1),
8472        ]);
8473
8474        let s2 = ScalarValue::from(vec![
8475            ("A", ScalarValue::from("Third")),
8476            ("primitive_list", l2),
8477        ]);
8478
8479        // iter_to_array for struct scalars
8480        let array =
8481            ScalarValue::iter_to_array(vec![s0.clone(), s1.clone(), s2.clone()]).unwrap();
8482
8483        let array = as_struct_array(&array).unwrap();
8484        let expected = StructArray::from(vec![
8485            (
8486                Arc::clone(&field_a),
8487                Arc::new(StringArray::from(vec!["First", "Second", "Third"])) as ArrayRef,
8488            ),
8489            (
8490                Arc::clone(&field_primitive_list),
8491                Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
8492                    Some(vec![Some(1), Some(2), Some(3)]),
8493                    Some(vec![Some(4), Some(5)]),
8494                    Some(vec![Some(6)]),
8495                ])),
8496            ),
8497        ]);
8498
8499        assert_eq!(array, &expected);
8500
8501        // Define list-of-structs scalars
8502
8503        let nl0_array = ScalarValue::iter_to_array(vec![s0, s1.clone()]).unwrap();
8504        let nl0 = SingleRowListArrayBuilder::new(nl0_array).build_list_scalar();
8505
8506        let nl1_array = ScalarValue::iter_to_array(vec![s2]).unwrap();
8507        let nl1 = SingleRowListArrayBuilder::new(nl1_array).build_list_scalar();
8508
8509        let nl2_array = ScalarValue::iter_to_array(vec![s1]).unwrap();
8510        let nl2 = SingleRowListArrayBuilder::new(nl2_array).build_list_scalar();
8511
8512        // iter_to_array for list-of-struct
8513        let array = ScalarValue::iter_to_array(vec![nl0, nl1, nl2]).unwrap();
8514        let array = array.as_list::<i32>();
8515
8516        // Construct expected array with array builders
8517        let field_a_builder = StringBuilder::with_capacity(4, 1024);
8518        let primitive_value_builder = Int32Array::builder(8);
8519        let field_primitive_list_builder = ListBuilder::new(primitive_value_builder);
8520
8521        let element_builder = StructBuilder::new(
8522            vec![field_a, field_primitive_list],
8523            vec![
8524                Box::new(field_a_builder),
8525                Box::new(field_primitive_list_builder),
8526            ],
8527        );
8528
8529        let mut list_builder = ListBuilder::new(element_builder);
8530
8531        list_builder
8532            .values()
8533            .field_builder::<StringBuilder>(0)
8534            .unwrap()
8535            .append_value("First");
8536        list_builder
8537            .values()
8538            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
8539            .unwrap()
8540            .values()
8541            .append_value(1);
8542        list_builder
8543            .values()
8544            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
8545            .unwrap()
8546            .values()
8547            .append_value(2);
8548        list_builder
8549            .values()
8550            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
8551            .unwrap()
8552            .values()
8553            .append_value(3);
8554        list_builder
8555            .values()
8556            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
8557            .unwrap()
8558            .append(true);
8559        list_builder.values().append(true);
8560
8561        list_builder
8562            .values()
8563            .field_builder::<StringBuilder>(0)
8564            .unwrap()
8565            .append_value("Second");
8566        list_builder
8567            .values()
8568            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
8569            .unwrap()
8570            .values()
8571            .append_value(4);
8572        list_builder
8573            .values()
8574            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
8575            .unwrap()
8576            .values()
8577            .append_value(5);
8578        list_builder
8579            .values()
8580            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
8581            .unwrap()
8582            .append(true);
8583        list_builder.values().append(true);
8584        list_builder.append(true);
8585
8586        list_builder
8587            .values()
8588            .field_builder::<StringBuilder>(0)
8589            .unwrap()
8590            .append_value("Third");
8591        list_builder
8592            .values()
8593            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
8594            .unwrap()
8595            .values()
8596            .append_value(6);
8597        list_builder
8598            .values()
8599            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
8600            .unwrap()
8601            .append(true);
8602        list_builder.values().append(true);
8603        list_builder.append(true);
8604
8605        list_builder
8606            .values()
8607            .field_builder::<StringBuilder>(0)
8608            .unwrap()
8609            .append_value("Second");
8610        list_builder
8611            .values()
8612            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
8613            .unwrap()
8614            .values()
8615            .append_value(4);
8616        list_builder
8617            .values()
8618            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
8619            .unwrap()
8620            .values()
8621            .append_value(5);
8622        list_builder
8623            .values()
8624            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
8625            .unwrap()
8626            .append(true);
8627        list_builder.values().append(true);
8628        list_builder.append(true);
8629
8630        let expected = list_builder.finish();
8631
8632        assert_eq!(array, &expected);
8633    }
8634
8635    fn build_2d_list(data: Vec<Option<i32>>) -> ListArray {
8636        let a1 = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(data)]);
8637        ListArray::new(
8638            Arc::new(Field::new_list_field(
8639                DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
8640                true,
8641            )),
8642            OffsetBuffer::<i32>::from_lengths([1]),
8643            Arc::new(a1),
8644            None,
8645        )
8646    }
8647
8648    #[test]
8649    fn test_nested_lists() {
8650        // Define inner list scalars
8651        let arr1 = build_2d_list(vec![Some(1), Some(2), Some(3)]);
8652        let arr2 = build_2d_list(vec![Some(4), Some(5)]);
8653        let arr3 = build_2d_list(vec![Some(6)]);
8654
8655        let array = ScalarValue::iter_to_array(vec![
8656            ScalarValue::List(Arc::new(arr1)),
8657            ScalarValue::List(Arc::new(arr2)),
8658            ScalarValue::List(Arc::new(arr3)),
8659        ])
8660        .unwrap();
8661        let array = array.as_list::<i32>();
8662
8663        // Construct expected array with array builders
8664        let inner_builder = Int32Array::builder(6);
8665        let middle_builder = ListBuilder::new(inner_builder);
8666        let mut outer_builder = ListBuilder::new(middle_builder);
8667
8668        outer_builder.values().values().append_value(1);
8669        outer_builder.values().values().append_value(2);
8670        outer_builder.values().values().append_value(3);
8671        outer_builder.values().append(true);
8672        outer_builder.append(true);
8673
8674        outer_builder.values().values().append_value(4);
8675        outer_builder.values().values().append_value(5);
8676        outer_builder.values().append(true);
8677        outer_builder.append(true);
8678
8679        outer_builder.values().values().append_value(6);
8680        outer_builder.values().append(true);
8681        outer_builder.append(true);
8682
8683        let expected = outer_builder.finish();
8684
8685        assert_eq!(array, &expected);
8686    }
8687
8688    #[test]
8689    fn scalar_timestamp_ns_utc_timezone() {
8690        let scalar = ScalarValue::TimestampNanosecond(
8691            Some(1599566400000000000),
8692            Some("UTC".into()),
8693        );
8694
8695        assert_eq!(
8696            scalar.data_type(),
8697            DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()))
8698        );
8699
8700        let array = scalar.to_array().expect("Failed to convert to array");
8701        assert_eq!(array.len(), 1);
8702        assert_eq!(
8703            array.data_type(),
8704            &DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()))
8705        );
8706
8707        let new_scalar = ScalarValue::try_from_array(&array, 0).unwrap();
8708        assert_eq!(
8709            new_scalar.data_type(),
8710            DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()))
8711        );
8712    }
8713
8714    #[test]
8715    fn cast_round_trip() {
8716        check_scalar_cast(ScalarValue::Int8(Some(5)), DataType::Int16);
8717        check_scalar_cast(ScalarValue::Int8(None), DataType::Int16);
8718
8719        check_scalar_cast(ScalarValue::Float64(Some(5.5)), DataType::Int16);
8720
8721        check_scalar_cast(ScalarValue::Float64(None), DataType::Int16);
8722
8723        check_scalar_cast(
8724            ScalarValue::from("foo"),
8725            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
8726        );
8727
8728        check_scalar_cast(
8729            ScalarValue::Utf8(None),
8730            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
8731        );
8732
8733        check_scalar_cast(ScalarValue::Utf8(None), DataType::Utf8View);
8734        check_scalar_cast(ScalarValue::from("foo"), DataType::Utf8View);
8735        check_scalar_cast(
8736            ScalarValue::from("larger than 12 bytes string"),
8737            DataType::Utf8View,
8738        );
8739        check_scalar_cast(
8740            {
8741                let element_field =
8742                    Arc::new(Field::new("element", DataType::Int32, true));
8743
8744                let mut builder =
8745                    ListBuilder::new(Int32Builder::new()).with_field(element_field);
8746                builder.append_value([Some(1)]);
8747                builder.append(true);
8748
8749                ScalarValue::List(Arc::new(builder.finish()))
8750            },
8751            DataType::List(Arc::new(Field::new("element", DataType::Int64, true))),
8752        );
8753        check_scalar_cast(
8754            {
8755                let element_field =
8756                    Arc::new(Field::new("element", DataType::Int32, true));
8757
8758                let mut builder = FixedSizeListBuilder::new(Int32Builder::new(), 1)
8759                    .with_field(element_field);
8760                builder.values().append_value(1);
8761                builder.append(true);
8762
8763                ScalarValue::FixedSizeList(Arc::new(builder.finish()))
8764            },
8765            DataType::FixedSizeList(
8766                Arc::new(Field::new("element", DataType::Int64, true)),
8767                1,
8768            ),
8769        );
8770        check_scalar_cast(
8771            {
8772                let element_field =
8773                    Arc::new(Field::new("element", DataType::Int32, true));
8774
8775                let mut builder =
8776                    LargeListBuilder::new(Int32Builder::new()).with_field(element_field);
8777                builder.append_value([Some(1)]);
8778                builder.append(true);
8779
8780                ScalarValue::LargeList(Arc::new(builder.finish()))
8781            },
8782            DataType::LargeList(Arc::new(Field::new("element", DataType::Int64, true))),
8783        );
8784        check_scalar_cast(
8785            {
8786                let element_field =
8787                    Arc::new(Field::new("element", DataType::Int32, true));
8788
8789                let mut builder =
8790                    ListViewBuilder::new(Int32Builder::new()).with_field(element_field);
8791                builder.append_value([Some(1)]);
8792                builder.append(true);
8793
8794                ScalarValue::ListView(Arc::new(builder.finish()))
8795            },
8796            DataType::ListView(Arc::new(Field::new("element", DataType::Int64, true))),
8797        );
8798        check_scalar_cast(
8799            {
8800                let element_field =
8801                    Arc::new(Field::new("element", DataType::Int32, true));
8802
8803                let mut builder = LargeListViewBuilder::new(Int32Builder::new())
8804                    .with_field(element_field);
8805                builder.append_value([Some(1)]);
8806                builder.append(true);
8807
8808                ScalarValue::LargeListView(Arc::new(builder.finish()))
8809            },
8810            DataType::LargeListView(Arc::new(Field::new(
8811                "element",
8812                DataType::Int64,
8813                true,
8814            ))),
8815        );
8816    }
8817
8818    // mimics how casting work on scalar values by `casting` `scalar` to `desired_type`
8819    fn check_scalar_cast(scalar: ScalarValue, desired_type: DataType) {
8820        // convert from scalar --> Array to call cast
8821        let scalar_array = scalar.to_array().expect("Failed to convert to array");
8822        // cast the actual value
8823        let cast_array = kernels::cast::cast(&scalar_array, &desired_type).unwrap();
8824
8825        // turn it back to a scalar
8826        let cast_scalar = ScalarValue::try_from_array(&cast_array, 0).unwrap();
8827        assert_eq!(cast_scalar.data_type(), desired_type);
8828
8829        // Some time later the "cast" scalar is turned back into an array:
8830        let array = cast_scalar
8831            .to_array_of_size(10)
8832            .expect("Failed to convert to array of size");
8833
8834        // The datatype should be "Dictionary" but is actually Utf8!!!
8835        assert_eq!(array.data_type(), &desired_type)
8836    }
8837
8838    #[test]
8839    fn test_scalar_negative() -> Result<()> {
8840        // positive test
8841        let value = ScalarValue::Int32(Some(12));
8842        assert_eq!(ScalarValue::Int32(Some(-12)), value.arithmetic_negate()?);
8843        let value = ScalarValue::Int32(None);
8844        assert_eq!(ScalarValue::Int32(None), value.arithmetic_negate()?);
8845
8846        // negative test
8847        let value = ScalarValue::UInt8(Some(12));
8848        assert!(value.arithmetic_negate().is_err());
8849        let value = ScalarValue::Boolean(None);
8850        assert!(value.arithmetic_negate().is_err());
8851        Ok(())
8852    }
8853
8854    #[test]
8855    fn test_scalar_negative_overflows() -> Result<()> {
8856        macro_rules! test_overflow_on_value {
8857            ($($val:expr),* $(,)?) => {$(
8858                {
8859                    let value: ScalarValue = $val;
8860                    let err = value.arithmetic_negate().expect_err("Should receive overflow error on negating {value:?}");
8861                    let root_err = err.find_root();
8862                    match  root_err{
8863                        DataFusionError::ArrowError(err, _) if matches!(err.as_ref(), ArrowError::ArithmeticOverflow(_)) => {}
8864                        _ => return Err(err),
8865                    };
8866                }
8867            )*};
8868        }
8869        test_overflow_on_value!(
8870            // the integers
8871            i8::MIN.into(),
8872            i16::MIN.into(),
8873            i32::MIN.into(),
8874            i64::MIN.into(),
8875            // for decimals, only value needs to be tested
8876            ScalarValue::try_new_decimal128(i128::MIN, 10, 5)?,
8877            ScalarValue::Decimal256(Some(i256::MIN), 20, 5),
8878            // interval, check all possible values
8879            ScalarValue::IntervalYearMonth(Some(i32::MIN)),
8880            ScalarValue::new_interval_dt(i32::MIN, 999),
8881            ScalarValue::new_interval_dt(1, i32::MIN),
8882            ScalarValue::new_interval_mdn(i32::MIN, 15, 123_456),
8883            ScalarValue::new_interval_mdn(12, i32::MIN, 123_456),
8884            ScalarValue::new_interval_mdn(12, 15, i64::MIN),
8885            // tz doesn't matter when negating
8886            ScalarValue::TimestampSecond(Some(i64::MIN), None),
8887            ScalarValue::TimestampMillisecond(Some(i64::MIN), None),
8888            ScalarValue::TimestampMicrosecond(Some(i64::MIN), None),
8889            ScalarValue::TimestampNanosecond(Some(i64::MIN), None),
8890        );
8891
8892        let float_cases = [
8893            (
8894                ScalarValue::Float16(Some(f16::MIN)),
8895                ScalarValue::Float16(Some(f16::MAX)),
8896            ),
8897            (
8898                ScalarValue::Float16(Some(f16::MAX)),
8899                ScalarValue::Float16(Some(f16::MIN)),
8900            ),
8901            (f32::MIN.into(), f32::MAX.into()),
8902            (f32::MAX.into(), f32::MIN.into()),
8903            (f64::MIN.into(), f64::MAX.into()),
8904            (f64::MAX.into(), f64::MIN.into()),
8905        ];
8906        // skip float 16 because they aren't supported
8907        for (test, expected) in float_cases.into_iter().skip(2) {
8908            assert_eq!(test.arithmetic_negate()?, expected);
8909        }
8910        Ok(())
8911    }
8912
8913    #[test]
8914    fn f16_test_overflow() {
8915        // TODO: if negate supports f16, add these cases to `test_scalar_negative_overflows` test case
8916        let cases = [
8917            (
8918                ScalarValue::Float16(Some(f16::MIN)),
8919                ScalarValue::Float16(Some(f16::MAX)),
8920            ),
8921            (
8922                ScalarValue::Float16(Some(f16::MAX)),
8923                ScalarValue::Float16(Some(f16::MIN)),
8924            ),
8925        ];
8926
8927        for (test, expected) in cases {
8928            assert_eq!(test.arithmetic_negate().unwrap(), expected);
8929        }
8930    }
8931
8932    macro_rules! expect_operation_error {
8933        ($TEST_NAME:ident, $FUNCTION:ident, $EXPECTED_ERROR:expr) => {
8934            #[test]
8935            fn $TEST_NAME() {
8936                let lhs = ScalarValue::UInt64(Some(12));
8937                let rhs = ScalarValue::Int32(Some(-3));
8938                match lhs.$FUNCTION(&rhs) {
8939                    Ok(_result) => {
8940                        panic!(
8941                            "Expected binary operation error between lhs: '{:?}', rhs: {:?}",
8942                            lhs, rhs
8943                        );
8944                    }
8945                    Err(e) => {
8946                        let error_message = e.to_string();
8947                        assert!(
8948                            error_message.contains($EXPECTED_ERROR),
8949                            "Expected error '{}' not found in actual error '{}'",
8950                            $EXPECTED_ERROR,
8951                            error_message
8952                        );
8953                    }
8954                }
8955            }
8956        };
8957    }
8958
8959    expect_operation_error!(
8960        expect_add_error,
8961        add,
8962        "Invalid arithmetic operation: UInt64 + Int32"
8963    );
8964    expect_operation_error!(
8965        expect_sub_error,
8966        sub,
8967        "Invalid arithmetic operation: UInt64 - Int32"
8968    );
8969
8970    macro_rules! decimal_op_test_cases {
8971    ($OPERATION:ident, [$([$L_VALUE:expr, $L_PRECISION:expr, $L_SCALE:expr, $R_VALUE:expr, $R_PRECISION:expr, $R_SCALE:expr, $O_VALUE:expr, $O_PRECISION:expr, $O_SCALE:expr]),+]) => {
8972            $(
8973
8974                let left = ScalarValue::Decimal128($L_VALUE, $L_PRECISION, $L_SCALE);
8975                let right = ScalarValue::Decimal128($R_VALUE, $R_PRECISION, $R_SCALE);
8976                let result = left.$OPERATION(&right).unwrap();
8977                assert_eq!(ScalarValue::Decimal128($O_VALUE, $O_PRECISION, $O_SCALE), result);
8978
8979            )+
8980        };
8981    }
8982
8983    #[test]
8984    fn decimal_operations() {
8985        decimal_op_test_cases!(
8986            add,
8987            [
8988                [Some(123), 10, 2, Some(124), 10, 2, Some(123 + 124), 11, 2],
8989                // test sum decimal with diff scale
8990                [
8991                    Some(123),
8992                    10,
8993                    3,
8994                    Some(124),
8995                    10,
8996                    2,
8997                    Some(123 + 124 * 10_i128.pow(1)),
8998                    12,
8999                    3
9000                ],
9001                // diff precision and scale for decimal data type
9002                [
9003                    Some(123),
9004                    10,
9005                    2,
9006                    Some(124),
9007                    11,
9008                    3,
9009                    Some(123 * 10_i128.pow(3 - 2) + 124),
9010                    12,
9011                    3
9012                ]
9013            ]
9014        );
9015    }
9016
9017    #[test]
9018    fn decimal_operations_with_nulls() {
9019        decimal_op_test_cases!(
9020            add,
9021            [
9022                // Case: (None, Some, 0)
9023                [None, 10, 2, Some(123), 10, 2, None, 11, 2],
9024                // Case: (Some, None, 0)
9025                [Some(123), 10, 2, None, 10, 2, None, 11, 2],
9026                // Case: (Some, None, _) + Side=False
9027                [Some(123), 8, 2, None, 10, 3, None, 11, 3],
9028                // Case: (None, Some, _) + Side=False
9029                [None, 8, 2, Some(123), 10, 3, None, 11, 3],
9030                // Case: (Some, None, _) + Side=True
9031                [Some(123), 8, 4, None, 10, 3, None, 12, 4],
9032                // Case: (None, Some, _) + Side=True
9033                [None, 10, 3, Some(123), 8, 4, None, 12, 4]
9034            ]
9035        );
9036    }
9037
9038    #[test]
9039    fn test_scalar_distance() {
9040        let cases = [
9041            // scalar (lhs), scalar (rhs), expected distance
9042            // ---------------------------------------------
9043            (ScalarValue::Int8(Some(1)), ScalarValue::Int8(Some(2)), 1),
9044            (ScalarValue::Int8(Some(2)), ScalarValue::Int8(Some(1)), 1),
9045            (
9046                ScalarValue::Int16(Some(-5)),
9047                ScalarValue::Int16(Some(5)),
9048                10,
9049            ),
9050            (
9051                ScalarValue::Int16(Some(5)),
9052                ScalarValue::Int16(Some(-5)),
9053                10,
9054            ),
9055            (ScalarValue::Int32(Some(0)), ScalarValue::Int32(Some(0)), 0),
9056            (
9057                ScalarValue::Int32(Some(-5)),
9058                ScalarValue::Int32(Some(-10)),
9059                5,
9060            ),
9061            (
9062                ScalarValue::Int64(Some(-10)),
9063                ScalarValue::Int64(Some(-5)),
9064                5,
9065            ),
9066            (ScalarValue::UInt8(Some(1)), ScalarValue::UInt8(Some(2)), 1),
9067            (ScalarValue::UInt8(Some(0)), ScalarValue::UInt8(Some(0)), 0),
9068            (
9069                ScalarValue::UInt16(Some(5)),
9070                ScalarValue::UInt16(Some(10)),
9071                5,
9072            ),
9073            (
9074                ScalarValue::UInt32(Some(10)),
9075                ScalarValue::UInt32(Some(5)),
9076                5,
9077            ),
9078            (
9079                ScalarValue::UInt64(Some(5)),
9080                ScalarValue::UInt64(Some(10)),
9081                5,
9082            ),
9083            (
9084                ScalarValue::Float16(Some(f16::from_f32(1.1))),
9085                ScalarValue::Float16(Some(f16::from_f32(1.9))),
9086                1,
9087            ),
9088            (
9089                ScalarValue::Float16(Some(f16::from_f32(-5.3))),
9090                ScalarValue::Float16(Some(f16::from_f32(-9.2))),
9091                4,
9092            ),
9093            (
9094                ScalarValue::Float16(Some(f16::from_f32(-5.3))),
9095                ScalarValue::Float16(Some(f16::from_f32(-9.7))),
9096                4,
9097            ),
9098            (
9099                ScalarValue::Float32(Some(1.0)),
9100                ScalarValue::Float32(Some(2.0)),
9101                1,
9102            ),
9103            (
9104                ScalarValue::Float32(Some(2.0)),
9105                ScalarValue::Float32(Some(1.0)),
9106                1,
9107            ),
9108            (
9109                ScalarValue::Float64(Some(0.0)),
9110                ScalarValue::Float64(Some(0.0)),
9111                0,
9112            ),
9113            (
9114                ScalarValue::Float64(Some(-5.0)),
9115                ScalarValue::Float64(Some(-10.0)),
9116                5,
9117            ),
9118            (
9119                ScalarValue::Float64(Some(-10.0)),
9120                ScalarValue::Float64(Some(-5.0)),
9121                5,
9122            ),
9123            // Floats are currently special cased to f64/f32 and the result is rounded
9124            // rather than ceiled/floored. In the future we might want to take a mode
9125            // which specified the rounding behavior.
9126            (
9127                ScalarValue::Float32(Some(1.2)),
9128                ScalarValue::Float32(Some(1.3)),
9129                0,
9130            ),
9131            (
9132                ScalarValue::Float32(Some(1.1)),
9133                ScalarValue::Float32(Some(1.9)),
9134                1,
9135            ),
9136            (
9137                ScalarValue::Float64(Some(-5.3)),
9138                ScalarValue::Float64(Some(-9.2)),
9139                4,
9140            ),
9141            (
9142                ScalarValue::Float64(Some(-5.3)),
9143                ScalarValue::Float64(Some(-9.7)),
9144                4,
9145            ),
9146            (
9147                ScalarValue::Float64(Some(-5.3)),
9148                ScalarValue::Float64(Some(-9.9)),
9149                5,
9150            ),
9151            (
9152                ScalarValue::Decimal128(Some(10), 1, 0),
9153                ScalarValue::Decimal128(Some(5), 1, 0),
9154                5,
9155            ),
9156            (
9157                ScalarValue::Decimal128(Some(5), 1, 0),
9158                ScalarValue::Decimal128(Some(10), 1, 0),
9159                5,
9160            ),
9161            (
9162                ScalarValue::Decimal256(Some(10.into()), 1, 0),
9163                ScalarValue::Decimal256(Some(5.into()), 1, 0),
9164                5,
9165            ),
9166            (
9167                ScalarValue::Decimal256(Some(5.into()), 1, 0),
9168                ScalarValue::Decimal256(Some(10.into()), 1, 0),
9169                5,
9170            ),
9171            // Temporal types
9172            (
9173                ScalarValue::Date32(Some(0)),
9174                ScalarValue::Date32(Some(10)),
9175                10,
9176            ),
9177            (
9178                ScalarValue::Date32(Some(10)),
9179                ScalarValue::Date32(Some(0)),
9180                10,
9181            ),
9182            (
9183                ScalarValue::Date64(Some(1000)),
9184                ScalarValue::Date64(Some(5000)),
9185                4000,
9186            ),
9187            (
9188                ScalarValue::TimestampSecond(Some(100), None),
9189                ScalarValue::TimestampSecond(Some(200), None),
9190                100,
9191            ),
9192            (
9193                ScalarValue::TimestampMillisecond(Some(1000), None),
9194                ScalarValue::TimestampMillisecond(Some(5000), None),
9195                4000,
9196            ),
9197            (
9198                ScalarValue::TimestampMicrosecond(Some(0), None),
9199                ScalarValue::TimestampMicrosecond(Some(1_000_000), None),
9200                1_000_000,
9201            ),
9202            (
9203                ScalarValue::TimestampNanosecond(Some(1_000_000_000), None),
9204                ScalarValue::TimestampNanosecond(Some(2_000_000_000), None),
9205                1_000_000_000,
9206            ),
9207        ];
9208        for (lhs, rhs, expected) in cases.iter() {
9209            let distance = lhs.distance(rhs).unwrap();
9210            assert_eq!(distance, *expected);
9211        }
9212    }
9213
9214    #[test]
9215    fn test_distance_none() {
9216        let cases = [
9217            (
9218                ScalarValue::Decimal128(Some(i128::MAX), DECIMAL128_MAX_PRECISION, 0),
9219                ScalarValue::Decimal128(Some(-i128::MAX), DECIMAL128_MAX_PRECISION, 0),
9220            ),
9221            (
9222                ScalarValue::Decimal256(Some(i256::MAX), DECIMAL256_MAX_PRECISION, 0),
9223                ScalarValue::Decimal256(Some(-i256::MAX), DECIMAL256_MAX_PRECISION, 0),
9224            ),
9225        ];
9226        for (lhs, rhs) in cases.iter() {
9227            let distance = lhs.distance(rhs);
9228            assert!(distance.is_none(), "{lhs} vs {rhs}");
9229        }
9230    }
9231
9232    #[test]
9233    fn test_scalar_distance_invalid() {
9234        let cases = [
9235            // scalar (lhs), scalar (rhs)
9236            // --------------------------
9237            // Same type but with nulls
9238            (ScalarValue::Int8(None), ScalarValue::Int8(None)),
9239            (ScalarValue::Int8(None), ScalarValue::Int8(Some(1))),
9240            (ScalarValue::Int8(Some(1)), ScalarValue::Int8(None)),
9241            // Different type
9242            (ScalarValue::Int8(Some(1)), ScalarValue::Int16(Some(1))),
9243            (ScalarValue::Int8(Some(1)), ScalarValue::Float32(Some(1.0))),
9244            (
9245                ScalarValue::Float16(Some(f16::from_f32(1.0))),
9246                ScalarValue::Float32(Some(1.0)),
9247            ),
9248            (
9249                ScalarValue::Float16(Some(f16::from_f32(1.0))),
9250                ScalarValue::Int32(Some(1)),
9251            ),
9252            (
9253                ScalarValue::Float64(Some(1.1)),
9254                ScalarValue::Float32(Some(2.2)),
9255            ),
9256            (
9257                ScalarValue::UInt64(Some(777)),
9258                ScalarValue::Int32(Some(111)),
9259            ),
9260            // Different types with nulls
9261            (ScalarValue::Int8(None), ScalarValue::Int16(Some(1))),
9262            (ScalarValue::Int8(Some(1)), ScalarValue::Int16(None)),
9263            // Unsupported types
9264            (ScalarValue::from("foo"), ScalarValue::from("bar")),
9265            (
9266                ScalarValue::Boolean(Some(true)),
9267                ScalarValue::Boolean(Some(false)),
9268            ),
9269            (
9270                ScalarValue::Decimal128(Some(123), 5, 5),
9271                ScalarValue::Decimal128(Some(120), 5, 3),
9272            ),
9273            (
9274                ScalarValue::Decimal128(Some(123), 5, 5),
9275                ScalarValue::Decimal128(Some(120), 3, 5),
9276            ),
9277            (
9278                ScalarValue::Decimal256(Some(123.into()), 5, 5),
9279                ScalarValue::Decimal256(Some(120.into()), 3, 5),
9280            ),
9281            // Distance 2 * 2^50 is larger than usize
9282            (
9283                ScalarValue::Decimal256(
9284                    Some(i256::from_parts(0, 2_i64.pow(50).into())),
9285                    1,
9286                    0,
9287                ),
9288                ScalarValue::Decimal256(
9289                    Some(i256::from_parts(0, (-(2_i64).pow(50)).into())),
9290                    1,
9291                    0,
9292                ),
9293            ),
9294            // Distance overflow
9295            (
9296                ScalarValue::Decimal256(Some(i256::from_parts(0, i128::MAX)), 1, 0),
9297                ScalarValue::Decimal256(Some(i256::from_parts(0, -i128::MAX)), 1, 0),
9298            ),
9299        ];
9300        for (lhs, rhs) in cases {
9301            let distance = lhs.distance(&rhs);
9302            assert!(distance.is_none());
9303        }
9304    }
9305
9306    #[test]
9307    fn test_scalar_interval_negate() {
9308        let cases = [
9309            (
9310                ScalarValue::new_interval_ym(1, 12),
9311                ScalarValue::new_interval_ym(-1, -12),
9312            ),
9313            (
9314                ScalarValue::new_interval_dt(1, 999),
9315                ScalarValue::new_interval_dt(-1, -999),
9316            ),
9317            (
9318                ScalarValue::new_interval_mdn(12, 15, 123_456),
9319                ScalarValue::new_interval_mdn(-12, -15, -123_456),
9320            ),
9321        ];
9322        for (expr, expected) in cases.iter() {
9323            let result = expr.arithmetic_negate().unwrap();
9324            assert_eq!(*expected, result, "-expr:{expr:?}");
9325        }
9326    }
9327
9328    #[test]
9329    fn test_scalar_interval_add() {
9330        let cases = [
9331            (
9332                ScalarValue::new_interval_ym(1, 12),
9333                ScalarValue::new_interval_ym(1, 12),
9334                ScalarValue::new_interval_ym(2, 24),
9335            ),
9336            (
9337                ScalarValue::new_interval_dt(1, 999),
9338                ScalarValue::new_interval_dt(1, 999),
9339                ScalarValue::new_interval_dt(2, 1998),
9340            ),
9341            (
9342                ScalarValue::new_interval_mdn(12, 15, 123_456),
9343                ScalarValue::new_interval_mdn(12, 15, 123_456),
9344                ScalarValue::new_interval_mdn(24, 30, 246_912),
9345            ),
9346        ];
9347        for (lhs, rhs, expected) in cases.iter() {
9348            let result = lhs.add(rhs).unwrap();
9349            let result_commute = rhs.add(lhs).unwrap();
9350            assert_eq!(*expected, result, "lhs:{lhs:?} + rhs:{rhs:?}");
9351            assert_eq!(*expected, result_commute, "lhs:{rhs:?} + rhs:{lhs:?}");
9352        }
9353    }
9354
9355    #[test]
9356    fn test_scalar_interval_sub() {
9357        let cases = [
9358            (
9359                ScalarValue::new_interval_ym(1, 12),
9360                ScalarValue::new_interval_ym(1, 12),
9361                ScalarValue::new_interval_ym(0, 0),
9362            ),
9363            (
9364                ScalarValue::new_interval_dt(1, 999),
9365                ScalarValue::new_interval_dt(1, 999),
9366                ScalarValue::new_interval_dt(0, 0),
9367            ),
9368            (
9369                ScalarValue::new_interval_mdn(12, 15, 123_456),
9370                ScalarValue::new_interval_mdn(12, 15, 123_456),
9371                ScalarValue::new_interval_mdn(0, 0, 0),
9372            ),
9373        ];
9374        for (lhs, rhs, expected) in cases.iter() {
9375            let result = lhs.sub(rhs).unwrap();
9376            assert_eq!(*expected, result, "lhs:{lhs:?} - rhs:{rhs:?}");
9377        }
9378    }
9379
9380    #[test]
9381    fn timestamp_op_random_tests() {
9382        // timestamp1 + (or -) interval = timestamp2
9383        // timestamp2 - timestamp1 (or timestamp1 - timestamp2) = interval ?
9384        let sample_size = 1000;
9385        let timestamps1 = get_random_timestamps(sample_size);
9386        let intervals = get_random_intervals(sample_size);
9387        // ts(sec) + interval(ns) = ts(sec); however,
9388        // ts(sec) - ts(sec) cannot be = interval(ns). Therefore,
9389        // timestamps are more precise than intervals in tests.
9390        for (idx, ts1) in timestamps1.iter().enumerate() {
9391            if idx % 2 == 0 {
9392                let timestamp2 = ts1.add(intervals[idx].clone()).unwrap();
9393                let back = timestamp2.sub(intervals[idx].clone()).unwrap();
9394                assert_eq!(ts1, &back);
9395            } else {
9396                let timestamp2 = ts1.sub(intervals[idx].clone()).unwrap();
9397                let back = timestamp2.add(intervals[idx].clone()).unwrap();
9398                assert_eq!(ts1, &back);
9399            };
9400        }
9401    }
9402
9403    #[test]
9404    fn test_struct_nulls() {
9405        let fields_b = Fields::from(vec![
9406            Field::new("ba", DataType::UInt64, true),
9407            Field::new("bb", DataType::UInt64, true),
9408        ]);
9409        let fields = Fields::from(vec![
9410            Field::new("a", DataType::UInt64, true),
9411            Field::new("b", DataType::Struct(fields_b.clone()), true),
9412        ]);
9413
9414        let struct_value = vec![
9415            (
9416                Arc::clone(&fields[0]),
9417                Arc::new(UInt64Array::from(vec![Some(1)])) as ArrayRef,
9418            ),
9419            (
9420                Arc::clone(&fields[1]),
9421                Arc::new(StructArray::from(vec![
9422                    (
9423                        Arc::clone(&fields_b[0]),
9424                        Arc::new(UInt64Array::from(vec![Some(2)])) as ArrayRef,
9425                    ),
9426                    (
9427                        Arc::clone(&fields_b[1]),
9428                        Arc::new(UInt64Array::from(vec![Some(3)])) as ArrayRef,
9429                    ),
9430                ])) as ArrayRef,
9431            ),
9432        ];
9433
9434        let struct_value_with_nulls = vec![
9435            (
9436                Arc::clone(&fields[0]),
9437                Arc::new(UInt64Array::from(vec![Some(1)])) as ArrayRef,
9438            ),
9439            (
9440                Arc::clone(&fields[1]),
9441                Arc::new(StructArray::from((
9442                    vec![
9443                        (
9444                            Arc::clone(&fields_b[0]),
9445                            Arc::new(UInt64Array::from(vec![Some(2)])) as ArrayRef,
9446                        ),
9447                        (
9448                            Arc::clone(&fields_b[1]),
9449                            Arc::new(UInt64Array::from(vec![Some(3)])) as ArrayRef,
9450                        ),
9451                    ],
9452                    Buffer::from(&[0]),
9453                ))) as ArrayRef,
9454            ),
9455        ];
9456
9457        let scalars = vec![
9458            // all null
9459            ScalarValue::Struct(Arc::new(StructArray::from((
9460                struct_value.clone(),
9461                Buffer::from(&[0]),
9462            )))),
9463            // field 1 valid, field 2 null
9464            ScalarValue::Struct(Arc::new(StructArray::from((
9465                struct_value_with_nulls.clone(),
9466                Buffer::from(&[1]),
9467            )))),
9468            // all valid
9469            ScalarValue::Struct(Arc::new(StructArray::from((
9470                struct_value.clone(),
9471                Buffer::from(&[1]),
9472            )))),
9473        ];
9474
9475        let check_array = |array: Arc<dyn Array>| {
9476            let is_null = is_null(&array).unwrap();
9477            assert_eq!(is_null, BooleanArray::from(vec![true, false, false]));
9478
9479            let formatted = pretty_format_columns("col", &[array]).unwrap().to_string();
9480            let formatted = formatted.split('\n').collect::<Vec<_>>();
9481            let expected = vec![
9482                "+---------------------------+",
9483                "| col                       |",
9484                "+---------------------------+",
9485                "|                           |",
9486                "| {a: 1, b: }               |",
9487                "| {a: 1, b: {ba: 2, bb: 3}} |",
9488                "+---------------------------+",
9489            ];
9490            assert_eq!(
9491                formatted, expected,
9492                "Actual:\n{formatted:#?}\n\nExpected:\n{expected:#?}"
9493            );
9494        };
9495
9496        // test `ScalarValue::iter_to_array`
9497        let array = ScalarValue::iter_to_array(scalars.clone()).unwrap();
9498        check_array(array);
9499
9500        // test `ScalarValue::to_array` / `ScalarValue::to_array_of_size`
9501        let arrays = scalars
9502            .iter()
9503            .map(ScalarValue::to_array)
9504            .collect::<Result<Vec<_>>>()
9505            .expect("Failed to convert to array");
9506        let arrays = arrays.iter().map(|a| a.as_ref()).collect::<Vec<_>>();
9507        let array = arrow::compute::concat(&arrays).unwrap();
9508        check_array(array);
9509    }
9510
9511    #[test]
9512    fn test_struct_display() {
9513        let field_a = Field::new("a", DataType::Int32, true);
9514        let field_b = Field::new("b", DataType::Utf8, true);
9515
9516        let s = ScalarStructBuilder::new()
9517            .with_scalar(field_a, ScalarValue::from(1i32))
9518            .with_scalar(field_b, ScalarValue::Utf8(None))
9519            .build()
9520            .unwrap();
9521
9522        assert_eq!(s.to_string(), "{a:1,b:}");
9523        assert_eq!(format!("{s:?}"), r#"Struct({a:1,b:})"#);
9524
9525        let ScalarValue::Struct(arr) = s else {
9526            panic!("Expected struct");
9527        };
9528
9529        //verify compared to arrow display
9530        let batch = RecordBatch::try_from_iter(vec![("s", arr as _)]).unwrap();
9531        assert_snapshot!(batches_to_string(&[batch]), @r"
9532        +-------------+
9533        | s           |
9534        +-------------+
9535        | {a: 1, b: } |
9536        +-------------+
9537        ");
9538    }
9539
9540    #[test]
9541    fn test_list_view_display() {
9542        let s = ScalarValue::ListView(
9543            ListViewArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
9544                Some(1),
9545                None,
9546                Some(3),
9547            ])])
9548            .into(),
9549        );
9550
9551        assert_eq!(s.to_string(), "[1, , 3]");
9552        assert_eq!(format!("{s:?}"), "ListView([1, , 3])");
9553    }
9554
9555    #[test]
9556    fn test_null_bug() {
9557        let field_a = Field::new("a", DataType::Int32, true);
9558        let field_b = Field::new("b", DataType::Int32, true);
9559        let fields = Fields::from(vec![field_a, field_b]);
9560
9561        let array_a = Arc::new(Int32Array::from_iter_values([1]));
9562        let array_b = Arc::new(Int32Array::from_iter_values([2]));
9563        let arrays: Vec<ArrayRef> = vec![array_a, array_b];
9564
9565        let mut not_nulls = NullBufferBuilder::new(1);
9566
9567        not_nulls.append_non_null();
9568
9569        let ar = StructArray::new(fields, arrays, not_nulls.finish());
9570        let s = ScalarValue::Struct(Arc::new(ar));
9571
9572        assert_eq!(s.to_string(), "{a:1,b:2}");
9573        assert_eq!(format!("{s:?}"), r#"Struct({a:1,b:2})"#);
9574
9575        let ScalarValue::Struct(arr) = s else {
9576            panic!("Expected struct");
9577        };
9578
9579        //verify compared to arrow display
9580        let batch = RecordBatch::try_from_iter(vec![("s", arr as _)]).unwrap();
9581        assert_snapshot!(batches_to_string(&[batch]), @r"
9582        +--------------+
9583        | s            |
9584        +--------------+
9585        | {a: 1, b: 2} |
9586        +--------------+
9587        ");
9588    }
9589
9590    #[test]
9591    fn test_display_date64_large_values() {
9592        assert_eq!(
9593            format!("{}", ScalarValue::Date64(Some(790179464505))),
9594            "1995-01-15"
9595        );
9596        // This used to panic, see https://github.com/apache/arrow-rs/issues/7728
9597        assert_eq!(
9598            format!("{}", ScalarValue::Date64(Some(-790179464505600000))),
9599            ""
9600        );
9601    }
9602
9603    #[test]
9604    fn test_struct_display_null() {
9605        let fields = vec![Field::new("a", DataType::Int32, false)];
9606        let s = ScalarStructBuilder::new_null(fields);
9607        assert_eq!(s.to_string(), "NULL");
9608
9609        let ScalarValue::Struct(arr) = s else {
9610            panic!("Expected struct");
9611        };
9612
9613        //verify compared to arrow display
9614        let batch = RecordBatch::try_from_iter(vec![("s", arr as _)]).unwrap();
9615
9616        assert_snapshot!(batches_to_string(&[batch]), @r"
9617        +---+
9618        | s |
9619        +---+
9620        |   |
9621        +---+
9622        ");
9623    }
9624
9625    #[test]
9626    fn test_map_display_and_debug() {
9627        let string_builder = StringBuilder::new();
9628        let int_builder = Int32Builder::with_capacity(4);
9629        let mut builder = MapBuilder::new(None, string_builder, int_builder);
9630        builder.keys().append_value("joe");
9631        builder.values().append_value(1);
9632        builder.append(true).unwrap();
9633
9634        builder.keys().append_value("blogs");
9635        builder.values().append_value(2);
9636        builder.keys().append_value("foo");
9637        builder.values().append_value(4);
9638        builder.append(true).unwrap();
9639        builder.append(true).unwrap();
9640        builder.append(false).unwrap();
9641
9642        let map_value = ScalarValue::Map(Arc::new(builder.finish()));
9643
9644        assert_eq!(map_value.to_string(), "[{joe:1},{blogs:2,foo:4},{},NULL]");
9645        assert_eq!(
9646            format!("{map_value:?}"),
9647            r#"Map([{"joe":"1"},{"blogs":"2","foo":"4"},{},NULL])"#
9648        );
9649
9650        let ScalarValue::Map(arr) = map_value else {
9651            panic!("Expected map");
9652        };
9653
9654        //verify compared to arrow display
9655        let batch = RecordBatch::try_from_iter(vec![("m", arr as _)]).unwrap();
9656        assert_snapshot!(batches_to_string(&[batch]), @r"
9657        +--------------------+
9658        | m                  |
9659        +--------------------+
9660        | {joe: 1}           |
9661        | {blogs: 2, foo: 4} |
9662        | {}                 |
9663        |                    |
9664        +--------------------+
9665        ");
9666    }
9667
9668    #[test]
9669    fn test_binary_display() {
9670        let no_binary_value = ScalarValue::Binary(None);
9671        assert_eq!(format!("{no_binary_value}"), "NULL");
9672        let single_binary_value = ScalarValue::Binary(Some(vec![42u8]));
9673        assert_eq!(format!("{single_binary_value}"), "2A");
9674        let small_binary_value = ScalarValue::Binary(Some(vec![1u8, 2, 3]));
9675        assert_eq!(format!("{small_binary_value}"), "010203");
9676        let large_binary_value =
9677            ScalarValue::Binary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
9678        assert_eq!(format!("{large_binary_value}"), "0102030405060708090A...");
9679
9680        let no_binary_value = ScalarValue::BinaryView(None);
9681        assert_eq!(format!("{no_binary_value}"), "NULL");
9682        let small_binary_value = ScalarValue::BinaryView(Some(vec![1u8, 2, 3]));
9683        assert_eq!(format!("{small_binary_value}"), "010203");
9684        let large_binary_value =
9685            ScalarValue::BinaryView(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
9686        assert_eq!(format!("{large_binary_value}"), "0102030405060708090A...");
9687
9688        let no_binary_value = ScalarValue::LargeBinary(None);
9689        assert_eq!(format!("{no_binary_value}"), "NULL");
9690        let small_binary_value = ScalarValue::LargeBinary(Some(vec![1u8, 2, 3]));
9691        assert_eq!(format!("{small_binary_value}"), "010203");
9692        let large_binary_value =
9693            ScalarValue::LargeBinary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
9694        assert_eq!(format!("{large_binary_value}"), "0102030405060708090A...");
9695
9696        let no_binary_value = ScalarValue::FixedSizeBinary(3, None);
9697        assert_eq!(format!("{no_binary_value}"), "NULL");
9698        let small_binary_value = ScalarValue::FixedSizeBinary(3, Some(vec![1u8, 2, 3]));
9699        assert_eq!(format!("{small_binary_value}"), "010203");
9700        let large_binary_value = ScalarValue::FixedSizeBinary(
9701            11,
9702            Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
9703        );
9704        assert_eq!(format!("{large_binary_value}"), "0102030405060708090A...");
9705    }
9706
9707    #[test]
9708    fn test_binary_debug() {
9709        let no_binary_value = ScalarValue::Binary(None);
9710        assert_eq!(format!("{no_binary_value:?}"), "Binary(NULL)");
9711        let single_binary_value = ScalarValue::Binary(Some(vec![42u8]));
9712        assert_eq!(format!("{single_binary_value:?}"), "Binary(\"42\")");
9713        let small_binary_value = ScalarValue::Binary(Some(vec![1u8, 2, 3]));
9714        assert_eq!(format!("{small_binary_value:?}"), "Binary(\"1,2,3\")");
9715        let large_binary_value =
9716            ScalarValue::Binary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
9717        assert_eq!(
9718            format!("{large_binary_value:?}"),
9719            "Binary(\"1,2,3,4,5,6,7,8,9,10,11\")"
9720        );
9721
9722        let no_binary_value = ScalarValue::BinaryView(None);
9723        assert_eq!(format!("{no_binary_value:?}"), "BinaryView(NULL)");
9724        let small_binary_value = ScalarValue::BinaryView(Some(vec![1u8, 2, 3]));
9725        assert_eq!(format!("{small_binary_value:?}"), "BinaryView(\"1,2,3\")");
9726        let large_binary_value =
9727            ScalarValue::BinaryView(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
9728        assert_eq!(
9729            format!("{large_binary_value:?}"),
9730            "BinaryView(\"1,2,3,4,5,6,7,8,9,10,11\")"
9731        );
9732
9733        let no_binary_value = ScalarValue::LargeBinary(None);
9734        assert_eq!(format!("{no_binary_value:?}"), "LargeBinary(NULL)");
9735        let small_binary_value = ScalarValue::LargeBinary(Some(vec![1u8, 2, 3]));
9736        assert_eq!(format!("{small_binary_value:?}"), "LargeBinary(\"1,2,3\")");
9737        let large_binary_value =
9738            ScalarValue::LargeBinary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
9739        assert_eq!(
9740            format!("{large_binary_value:?}"),
9741            "LargeBinary(\"1,2,3,4,5,6,7,8,9,10,11\")"
9742        );
9743
9744        let no_binary_value = ScalarValue::FixedSizeBinary(3, None);
9745        assert_eq!(format!("{no_binary_value:?}"), "FixedSizeBinary(3, NULL)");
9746        let small_binary_value = ScalarValue::FixedSizeBinary(3, Some(vec![1u8, 2, 3]));
9747        assert_eq!(
9748            format!("{small_binary_value:?}"),
9749            "FixedSizeBinary(3, \"1,2,3\")"
9750        );
9751        let large_binary_value = ScalarValue::FixedSizeBinary(
9752            11,
9753            Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
9754        );
9755        assert_eq!(
9756            format!("{large_binary_value:?}"),
9757            "FixedSizeBinary(11, \"1,2,3,4,5,6,7,8,9,10,11\")"
9758        );
9759    }
9760
9761    #[test]
9762    fn test_build_timestamp_millisecond_list() {
9763        let values = vec![ScalarValue::TimestampMillisecond(Some(1), None)];
9764        let arr = ScalarValue::new_list_nullable(
9765            &values,
9766            &DataType::Timestamp(TimeUnit::Millisecond, None),
9767        );
9768        assert_eq!(1, arr.len());
9769    }
9770
9771    #[test]
9772    fn test_newlist_timestamp_zone() {
9773        let s: &'static str = "UTC";
9774        let values = vec![ScalarValue::TimestampMillisecond(Some(1), Some(s.into()))];
9775        let arr = ScalarValue::new_list_nullable(
9776            &values,
9777            &DataType::Timestamp(TimeUnit::Millisecond, Some(s.into())),
9778        );
9779        assert_eq!(1, arr.len());
9780        assert_eq!(
9781            arr.data_type(),
9782            &DataType::List(Arc::new(Field::new_list_field(
9783                DataType::Timestamp(TimeUnit::Millisecond, Some(s.into())),
9784                true,
9785            )))
9786        );
9787    }
9788
9789    fn get_random_timestamps(sample_size: u64) -> Vec<ScalarValue> {
9790        let vector_size = sample_size;
9791        let mut timestamp = vec![];
9792        let mut rng = rand::rng();
9793        for i in 0..vector_size {
9794            let year = rng.random_range(1995..=2050);
9795            let month = rng.random_range(1..=12);
9796            let day = rng.random_range(1..=28); // to exclude invalid dates
9797            let hour = rng.random_range(0..=23);
9798            let minute = rng.random_range(0..=59);
9799            let second = rng.random_range(0..=59);
9800            if i % 4 == 0 {
9801                timestamp.push(ScalarValue::TimestampSecond(
9802                    Some(
9803                        NaiveDate::from_ymd_opt(year, month, day)
9804                            .unwrap()
9805                            .and_hms_opt(hour, minute, second)
9806                            .unwrap()
9807                            .and_utc()
9808                            .timestamp(),
9809                    ),
9810                    None,
9811                ))
9812            } else if i % 4 == 1 {
9813                let millisec = rng.random_range(0..=999);
9814                timestamp.push(ScalarValue::TimestampMillisecond(
9815                    Some(
9816                        NaiveDate::from_ymd_opt(year, month, day)
9817                            .unwrap()
9818                            .and_hms_milli_opt(hour, minute, second, millisec)
9819                            .unwrap()
9820                            .and_utc()
9821                            .timestamp_millis(),
9822                    ),
9823                    None,
9824                ))
9825            } else if i % 4 == 2 {
9826                let microsec = rng.random_range(0..=999_999);
9827                timestamp.push(ScalarValue::TimestampMicrosecond(
9828                    Some(
9829                        NaiveDate::from_ymd_opt(year, month, day)
9830                            .unwrap()
9831                            .and_hms_micro_opt(hour, minute, second, microsec)
9832                            .unwrap()
9833                            .and_utc()
9834                            .timestamp_micros(),
9835                    ),
9836                    None,
9837                ))
9838            } else if i % 4 == 3 {
9839                let nanosec = rng.random_range(0..=999_999_999);
9840                timestamp.push(ScalarValue::TimestampNanosecond(
9841                    Some(
9842                        NaiveDate::from_ymd_opt(year, month, day)
9843                            .unwrap()
9844                            .and_hms_nano_opt(hour, minute, second, nanosec)
9845                            .unwrap()
9846                            .and_utc()
9847                            .timestamp_nanos_opt()
9848                            .unwrap(),
9849                    ),
9850                    None,
9851                ))
9852            }
9853        }
9854        timestamp
9855    }
9856
9857    fn get_random_intervals(sample_size: u64) -> Vec<ScalarValue> {
9858        const MILLISECS_IN_ONE_DAY: i64 = 86_400_000;
9859        const NANOSECS_IN_ONE_DAY: i64 = 86_400_000_000_000;
9860
9861        let vector_size = sample_size;
9862        let mut intervals = vec![];
9863        let mut rng = rand::rng();
9864        const SECS_IN_ONE_DAY: i32 = 86_400;
9865        const MICROSECS_IN_ONE_DAY: i64 = 86_400_000_000;
9866        for i in 0..vector_size {
9867            if i % 4 == 0 {
9868                let days = rng.random_range(0..5000);
9869                // to not break second precision
9870                let millis = rng.random_range(0..SECS_IN_ONE_DAY) * 1000;
9871                intervals.push(ScalarValue::new_interval_dt(days, millis));
9872            } else if i % 4 == 1 {
9873                let days = rng.random_range(0..5000);
9874                let millisec = rng.random_range(0..(MILLISECS_IN_ONE_DAY as i32));
9875                intervals.push(ScalarValue::new_interval_dt(days, millisec));
9876            } else if i % 4 == 2 {
9877                let days = rng.random_range(0..5000);
9878                // to not break microsec precision
9879                let nanosec = rng.random_range(0..MICROSECS_IN_ONE_DAY) * 1000;
9880                intervals.push(ScalarValue::new_interval_mdn(0, days, nanosec));
9881            } else {
9882                let days = rng.random_range(0..5000);
9883                let nanosec = rng.random_range(0..NANOSECS_IN_ONE_DAY);
9884                intervals.push(ScalarValue::new_interval_mdn(0, days, nanosec));
9885            }
9886        }
9887        intervals
9888    }
9889
9890    fn union_fields() -> UnionFields {
9891        [
9892            (0, Arc::new(Field::new("A", DataType::Int32, true))),
9893            (1, Arc::new(Field::new("B", DataType::Float64, true))),
9894        ]
9895        .into_iter()
9896        .collect()
9897    }
9898
9899    #[test]
9900    fn sparse_scalar_union_is_null() {
9901        let sparse_scalar = ScalarValue::Union(
9902            Some((0_i8, Box::new(ScalarValue::Int32(None)))),
9903            union_fields(),
9904            UnionMode::Sparse,
9905        );
9906        assert!(sparse_scalar.is_null());
9907    }
9908
9909    #[test]
9910    fn dense_scalar_union_is_null() {
9911        let dense_scalar = ScalarValue::Union(
9912            Some((0_i8, Box::new(ScalarValue::Int32(None)))),
9913            union_fields(),
9914            UnionMode::Dense,
9915        );
9916        assert!(dense_scalar.is_null());
9917    }
9918
9919    #[test]
9920    fn cast_date_to_timestamp_overflow_returns_error() {
9921        let scalar = ScalarValue::Date32(Some(i32::MAX));
9922        let err = scalar
9923            .cast_to(&DataType::Timestamp(TimeUnit::Nanosecond, None))
9924            .expect_err("expected cast to fail");
9925        assert!(
9926            err.to_string()
9927                .contains("converted value exceeds the representable i64 range"),
9928            "unexpected error: {err}"
9929        );
9930    }
9931
9932    #[test]
9933    fn null_dictionary_scalar_produces_null_dictionary_array() {
9934        let dictionary_scalar = ScalarValue::Dictionary(
9935            Box::new(DataType::Int32),
9936            Box::new(ScalarValue::Null),
9937        );
9938        assert!(dictionary_scalar.is_null());
9939        let dictionary_array = dictionary_scalar.to_array().unwrap();
9940        assert!(dictionary_array.is_null(0));
9941    }
9942
9943    #[test]
9944    fn test_scalar_value_try_new_null() {
9945        let scalars = vec![
9946            ScalarValue::try_new_null(&DataType::Boolean).unwrap(),
9947            ScalarValue::try_new_null(&DataType::Int8).unwrap(),
9948            ScalarValue::try_new_null(&DataType::Int16).unwrap(),
9949            ScalarValue::try_new_null(&DataType::Int32).unwrap(),
9950            ScalarValue::try_new_null(&DataType::Int64).unwrap(),
9951            ScalarValue::try_new_null(&DataType::UInt8).unwrap(),
9952            ScalarValue::try_new_null(&DataType::UInt16).unwrap(),
9953            ScalarValue::try_new_null(&DataType::UInt32).unwrap(),
9954            ScalarValue::try_new_null(&DataType::UInt64).unwrap(),
9955            ScalarValue::try_new_null(&DataType::Float16).unwrap(),
9956            ScalarValue::try_new_null(&DataType::Float32).unwrap(),
9957            ScalarValue::try_new_null(&DataType::Float64).unwrap(),
9958            ScalarValue::try_new_null(&DataType::Decimal128(42, 42)).unwrap(),
9959            ScalarValue::try_new_null(&DataType::Decimal256(42, 42)).unwrap(),
9960            ScalarValue::try_new_null(&DataType::Utf8).unwrap(),
9961            ScalarValue::try_new_null(&DataType::LargeUtf8).unwrap(),
9962            ScalarValue::try_new_null(&DataType::Utf8View).unwrap(),
9963            ScalarValue::try_new_null(&DataType::Binary).unwrap(),
9964            ScalarValue::try_new_null(&DataType::BinaryView).unwrap(),
9965            ScalarValue::try_new_null(&DataType::FixedSizeBinary(42)).unwrap(),
9966            ScalarValue::try_new_null(&DataType::LargeBinary).unwrap(),
9967            ScalarValue::try_new_null(&DataType::Date32).unwrap(),
9968            ScalarValue::try_new_null(&DataType::Date64).unwrap(),
9969            ScalarValue::try_new_null(&DataType::Time32(TimeUnit::Second)).unwrap(),
9970            ScalarValue::try_new_null(&DataType::Time32(TimeUnit::Millisecond)).unwrap(),
9971            ScalarValue::try_new_null(&DataType::Time64(TimeUnit::Microsecond)).unwrap(),
9972            ScalarValue::try_new_null(&DataType::Time64(TimeUnit::Nanosecond)).unwrap(),
9973            ScalarValue::try_new_null(&DataType::Timestamp(TimeUnit::Second, None))
9974                .unwrap(),
9975            ScalarValue::try_new_null(&DataType::Timestamp(TimeUnit::Millisecond, None))
9976                .unwrap(),
9977            ScalarValue::try_new_null(&DataType::Timestamp(TimeUnit::Microsecond, None))
9978                .unwrap(),
9979            ScalarValue::try_new_null(&DataType::Timestamp(TimeUnit::Nanosecond, None))
9980                .unwrap(),
9981            ScalarValue::try_new_null(&DataType::Interval(IntervalUnit::YearMonth))
9982                .unwrap(),
9983            ScalarValue::try_new_null(&DataType::Interval(IntervalUnit::DayTime))
9984                .unwrap(),
9985            ScalarValue::try_new_null(&DataType::Interval(IntervalUnit::MonthDayNano))
9986                .unwrap(),
9987            ScalarValue::try_new_null(&DataType::Duration(TimeUnit::Second)).unwrap(),
9988            ScalarValue::try_new_null(&DataType::Duration(TimeUnit::Microsecond))
9989                .unwrap(),
9990            ScalarValue::try_new_null(&DataType::Duration(TimeUnit::Nanosecond)).unwrap(),
9991            ScalarValue::try_new_null(&DataType::Null).unwrap(),
9992        ];
9993        assert!(scalars.iter().all(|s| s.is_null()));
9994
9995        let field_ref = Arc::new(Field::new("foo", DataType::Int32, true));
9996        let map_field_ref = Arc::new(Field::new(
9997            "foo",
9998            DataType::Struct(Fields::from(vec![
9999                Field::new("bar", DataType::Utf8, true),
10000                Field::new("baz", DataType::Int32, true),
10001            ])),
10002            true,
10003        ));
10004        let scalars = [
10005            ScalarValue::try_new_null(&DataType::List(Arc::clone(&field_ref))).unwrap(),
10006            ScalarValue::try_new_null(&DataType::LargeList(Arc::clone(&field_ref)))
10007                .unwrap(),
10008            ScalarValue::try_new_null(&DataType::FixedSizeList(
10009                Arc::clone(&field_ref),
10010                42,
10011            ))
10012            .unwrap(),
10013            ScalarValue::try_new_null(&DataType::ListView(Arc::clone(&field_ref)))
10014                .unwrap(),
10015            ScalarValue::try_new_null(&DataType::LargeListView(Arc::clone(&field_ref)))
10016                .unwrap(),
10017            ScalarValue::try_new_null(&DataType::Struct(
10018                vec![Arc::clone(&field_ref)].into(),
10019            ))
10020            .unwrap(),
10021            ScalarValue::try_new_null(&DataType::Map(map_field_ref, false)).unwrap(),
10022            ScalarValue::try_new_null(&DataType::Union(
10023                UnionFields::try_new(vec![42], vec![field_ref]).unwrap(),
10024                UnionMode::Dense,
10025            ))
10026            .unwrap(),
10027        ];
10028        assert!(scalars.iter().all(|s| s.is_null()));
10029    }
10030
10031    // `err.to_string()` depends on backtrace being present (may have backtrace appended)
10032    // `err.strip_backtrace()` also depends on backtrace being present (may have "This was likely caused by ..." stripped)
10033    fn assert_starts_with(actual: impl AsRef<str>, expected_prefix: impl AsRef<str>) {
10034        let actual = actual.as_ref();
10035        let expected_prefix = expected_prefix.as_ref();
10036        assert!(
10037            actual.starts_with(expected_prefix),
10038            "Expected '{actual}' to start with '{expected_prefix}'"
10039        );
10040    }
10041
10042    #[test]
10043    fn test_new_default() {
10044        // Test numeric types
10045        assert_eq!(
10046            ScalarValue::new_default(&DataType::Int32).unwrap(),
10047            ScalarValue::Int32(Some(0))
10048        );
10049        assert_eq!(
10050            ScalarValue::new_default(&DataType::Float64).unwrap(),
10051            ScalarValue::Float64(Some(0.0))
10052        );
10053        assert_eq!(
10054            ScalarValue::new_default(&DataType::Boolean).unwrap(),
10055            ScalarValue::Boolean(Some(false))
10056        );
10057
10058        // Test string types
10059        assert_eq!(
10060            ScalarValue::new_default(&DataType::Utf8).unwrap(),
10061            ScalarValue::Utf8(Some("".to_string()))
10062        );
10063        assert_eq!(
10064            ScalarValue::new_default(&DataType::LargeUtf8).unwrap(),
10065            ScalarValue::LargeUtf8(Some("".to_string()))
10066        );
10067
10068        // Test binary types
10069        assert_eq!(
10070            ScalarValue::new_default(&DataType::Binary).unwrap(),
10071            ScalarValue::Binary(Some(vec![]))
10072        );
10073
10074        // Test fixed size binary
10075        assert_eq!(
10076            ScalarValue::new_default(&DataType::FixedSizeBinary(5)).unwrap(),
10077            ScalarValue::FixedSizeBinary(5, Some(vec![0, 0, 0, 0, 0]))
10078        );
10079
10080        // Test temporal types
10081        assert_eq!(
10082            ScalarValue::new_default(&DataType::Date32).unwrap(),
10083            ScalarValue::Date32(Some(0))
10084        );
10085        assert_eq!(
10086            ScalarValue::new_default(&DataType::Time32(TimeUnit::Second)).unwrap(),
10087            ScalarValue::Time32Second(Some(0))
10088        );
10089
10090        // Test decimal types
10091        assert_eq!(
10092            ScalarValue::new_default(&DataType::Decimal128(10, 2)).unwrap(),
10093            ScalarValue::Decimal128(Some(0), 10, 2)
10094        );
10095
10096        // Test list type
10097        let list_field = Field::new_list_field(DataType::Int32, true);
10098        let list_result =
10099            ScalarValue::new_default(&DataType::List(Arc::new(list_field.clone())))
10100                .unwrap();
10101        match list_result {
10102            ScalarValue::List(arr) => {
10103                assert_eq!(arr.len(), 1);
10104                assert_eq!(arr.value_length(0), 0); // empty list
10105            }
10106            _ => panic!("Expected List"),
10107        }
10108
10109        let list_field = Field::new_list_field(DataType::Int32, true);
10110        let list_result =
10111            ScalarValue::new_default(&DataType::LargeList(Arc::new(list_field.clone())))
10112                .unwrap();
10113        match list_result {
10114            ScalarValue::LargeList(arr) => {
10115                assert_eq!(arr.len(), 1);
10116                assert_eq!(arr.value_length(0), 0); // empty list
10117            }
10118            _ => panic!("Expected LargeList"),
10119        }
10120
10121        let list_result =
10122            ScalarValue::new_default(&DataType::ListView(Arc::new(list_field.clone())))
10123                .unwrap();
10124        match list_result {
10125            ScalarValue::ListView(arr) => {
10126                assert_eq!(arr.len(), 1);
10127                assert_eq!(arr.value_size(0), 0); // empty list
10128            }
10129            _ => panic!("Expected ListView"),
10130        }
10131
10132        let list_result = ScalarValue::new_default(&DataType::LargeListView(Arc::new(
10133            list_field.clone(),
10134        )))
10135        .unwrap();
10136        match list_result {
10137            ScalarValue::LargeListView(arr) => {
10138                assert_eq!(arr.len(), 1);
10139                assert_eq!(arr.value_size(0), 0); // empty list
10140            }
10141            _ => panic!("Expected LargeListView"),
10142        }
10143
10144        // Test struct type
10145        let struct_fields = Fields::from(vec![
10146            Field::new("a", DataType::Int32, false),
10147            Field::new("b", DataType::Utf8, false),
10148        ]);
10149        let struct_result =
10150            ScalarValue::new_default(&DataType::Struct(struct_fields.clone())).unwrap();
10151        match struct_result {
10152            ScalarValue::Struct(arr) => {
10153                assert_eq!(arr.len(), 1);
10154                assert_eq!(arr.column(0).as_primitive::<Int32Type>().value(0), 0);
10155                assert_eq!(arr.column(1).as_string::<i32>().value(0), "");
10156            }
10157            _ => panic!("Expected Struct"),
10158        }
10159
10160        // Test union type
10161        let union_fields = UnionFields::try_new(
10162            vec![0, 1],
10163            vec![
10164                Field::new("i32", DataType::Int32, false),
10165                Field::new("f64", DataType::Float64, false),
10166            ],
10167        )
10168        .unwrap();
10169        let union_result = ScalarValue::new_default(&DataType::Union(
10170            union_fields.clone(),
10171            UnionMode::Sparse,
10172        ))
10173        .unwrap();
10174        match union_result {
10175            ScalarValue::Union(Some((type_id, value)), _, _) => {
10176                assert_eq!(type_id, 0);
10177                assert_eq!(*value, ScalarValue::Int32(Some(0)));
10178            }
10179            _ => panic!("Expected Union"),
10180        }
10181    }
10182
10183    #[test]
10184    fn test_scalar_min() {
10185        // Test integer types
10186        assert_eq!(
10187            ScalarValue::min(&DataType::Int8),
10188            Some(ScalarValue::Int8(Some(i8::MIN)))
10189        );
10190        assert_eq!(
10191            ScalarValue::min(&DataType::Int32),
10192            Some(ScalarValue::Int32(Some(i32::MIN)))
10193        );
10194        assert_eq!(
10195            ScalarValue::min(&DataType::UInt8),
10196            Some(ScalarValue::UInt8(Some(0)))
10197        );
10198        assert_eq!(
10199            ScalarValue::min(&DataType::UInt64),
10200            Some(ScalarValue::UInt64(Some(0)))
10201        );
10202
10203        // Test float types
10204        assert_eq!(
10205            ScalarValue::min(&DataType::Float32),
10206            Some(ScalarValue::Float32(Some(f32::NEG_INFINITY)))
10207        );
10208        assert_eq!(
10209            ScalarValue::min(&DataType::Float64),
10210            Some(ScalarValue::Float64(Some(f64::NEG_INFINITY)))
10211        );
10212
10213        // Test decimal types
10214        let decimal_min = ScalarValue::min(&DataType::Decimal128(5, 2)).unwrap();
10215        match decimal_min {
10216            ScalarValue::Decimal128(Some(val), 5, 2) => {
10217                assert_eq!(val, -99999); // -999.99 with scale 2
10218            }
10219            _ => panic!("Expected Decimal128"),
10220        }
10221
10222        // Test temporal types
10223        assert_eq!(
10224            ScalarValue::min(&DataType::Date32),
10225            Some(ScalarValue::Date32(Some(i32::MIN)))
10226        );
10227        assert_eq!(
10228            ScalarValue::min(&DataType::Time32(TimeUnit::Second)),
10229            Some(ScalarValue::Time32Second(Some(0)))
10230        );
10231        assert_eq!(
10232            ScalarValue::min(&DataType::Timestamp(TimeUnit::Nanosecond, None)),
10233            Some(ScalarValue::TimestampNanosecond(Some(i64::MIN), None))
10234        );
10235
10236        // Test duration types
10237        assert_eq!(
10238            ScalarValue::min(&DataType::Duration(TimeUnit::Second)),
10239            Some(ScalarValue::DurationSecond(Some(i64::MIN)))
10240        );
10241
10242        // Test unsupported types
10243        assert_eq!(ScalarValue::min(&DataType::Utf8), None);
10244        assert_eq!(ScalarValue::min(&DataType::Binary), None);
10245        assert_eq!(
10246            ScalarValue::min(&DataType::List(Arc::new(Field::new(
10247                "item",
10248                DataType::Int32,
10249                true
10250            )))),
10251            None
10252        );
10253        assert_eq!(
10254            ScalarValue::min(&DataType::LargeList(Arc::new(Field::new(
10255                "item",
10256                DataType::Int32,
10257                true
10258            )))),
10259            None
10260        );
10261        assert_eq!(
10262            ScalarValue::min(&DataType::ListView(Arc::new(Field::new(
10263                "item",
10264                DataType::Int32,
10265                true
10266            )))),
10267            None
10268        );
10269        assert_eq!(
10270            ScalarValue::min(&DataType::LargeListView(Arc::new(Field::new(
10271                "item",
10272                DataType::Int32,
10273                true
10274            )))),
10275            None
10276        );
10277    }
10278
10279    #[test]
10280    fn test_scalar_max() {
10281        // Test integer types
10282        assert_eq!(
10283            ScalarValue::max(&DataType::Int8),
10284            Some(ScalarValue::Int8(Some(i8::MAX)))
10285        );
10286        assert_eq!(
10287            ScalarValue::max(&DataType::Int32),
10288            Some(ScalarValue::Int32(Some(i32::MAX)))
10289        );
10290        assert_eq!(
10291            ScalarValue::max(&DataType::UInt8),
10292            Some(ScalarValue::UInt8(Some(u8::MAX)))
10293        );
10294        assert_eq!(
10295            ScalarValue::max(&DataType::UInt64),
10296            Some(ScalarValue::UInt64(Some(u64::MAX)))
10297        );
10298
10299        // Test float types
10300        assert_eq!(
10301            ScalarValue::max(&DataType::Float32),
10302            Some(ScalarValue::Float32(Some(f32::INFINITY)))
10303        );
10304        assert_eq!(
10305            ScalarValue::max(&DataType::Float64),
10306            Some(ScalarValue::Float64(Some(f64::INFINITY)))
10307        );
10308
10309        // Test decimal types
10310        let decimal_max = ScalarValue::max(&DataType::Decimal128(5, 2)).unwrap();
10311        match decimal_max {
10312            ScalarValue::Decimal128(Some(val), 5, 2) => {
10313                assert_eq!(val, 99999); // 999.99 with scale 2
10314            }
10315            _ => panic!("Expected Decimal128"),
10316        }
10317
10318        // Test temporal types
10319        assert_eq!(
10320            ScalarValue::max(&DataType::Date32),
10321            Some(ScalarValue::Date32(Some(i32::MAX)))
10322        );
10323        assert_eq!(
10324            ScalarValue::max(&DataType::Time32(TimeUnit::Second)),
10325            Some(ScalarValue::Time32Second(Some(86_399))) // 23:59:59
10326        );
10327        assert_eq!(
10328            ScalarValue::max(&DataType::Time64(TimeUnit::Microsecond)),
10329            Some(ScalarValue::Time64Microsecond(Some(86_399_999_999))) // 23:59:59.999999
10330        );
10331        assert_eq!(
10332            ScalarValue::max(&DataType::Timestamp(TimeUnit::Nanosecond, None)),
10333            Some(ScalarValue::TimestampNanosecond(Some(i64::MAX), None))
10334        );
10335
10336        // Test duration types
10337        assert_eq!(
10338            ScalarValue::max(&DataType::Duration(TimeUnit::Millisecond)),
10339            Some(ScalarValue::DurationMillisecond(Some(i64::MAX)))
10340        );
10341
10342        // Test unsupported types
10343        assert_eq!(ScalarValue::max(&DataType::Utf8), None);
10344        assert_eq!(ScalarValue::max(&DataType::Binary), None);
10345        assert_eq!(
10346            ScalarValue::max(&DataType::Struct(Fields::from(vec![Field::new(
10347                "field",
10348                DataType::Int32,
10349                true
10350            )]))),
10351            None
10352        );
10353        assert_eq!(
10354            ScalarValue::max(&DataType::ListView(Arc::new(Field::new(
10355                "item",
10356                DataType::Int32,
10357                true
10358            )))),
10359            None
10360        );
10361        assert_eq!(
10362            ScalarValue::max(&DataType::LargeListView(Arc::new(Field::new(
10363                "item",
10364                DataType::Int32,
10365                true
10366            )))),
10367            None
10368        );
10369    }
10370
10371    #[test]
10372    fn test_min_max_float16() {
10373        // Test Float16 min and max
10374        let min_f16 = ScalarValue::min(&DataType::Float16).unwrap();
10375        match min_f16 {
10376            ScalarValue::Float16(Some(val)) => {
10377                assert_eq!(val, f16::NEG_INFINITY);
10378            }
10379            _ => panic!("Expected Float16"),
10380        }
10381
10382        let max_f16 = ScalarValue::max(&DataType::Float16).unwrap();
10383        match max_f16 {
10384            ScalarValue::Float16(Some(val)) => {
10385                assert_eq!(val, f16::INFINITY);
10386            }
10387            _ => panic!("Expected Float16"),
10388        }
10389    }
10390
10391    #[test]
10392    fn test_new_default_interval() {
10393        // Test all interval types
10394        assert_eq!(
10395            ScalarValue::new_default(&DataType::Interval(IntervalUnit::YearMonth))
10396                .unwrap(),
10397            ScalarValue::IntervalYearMonth(Some(0))
10398        );
10399        assert_eq!(
10400            ScalarValue::new_default(&DataType::Interval(IntervalUnit::DayTime)).unwrap(),
10401            ScalarValue::IntervalDayTime(Some(IntervalDayTime::ZERO))
10402        );
10403        assert_eq!(
10404            ScalarValue::new_default(&DataType::Interval(IntervalUnit::MonthDayNano))
10405                .unwrap(),
10406            ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano::ZERO))
10407        );
10408    }
10409
10410    #[test]
10411    fn test_min_max_with_timezone() {
10412        let tz = Some(Arc::from("UTC"));
10413
10414        // Test timestamp with timezone
10415        let min_ts =
10416            ScalarValue::min(&DataType::Timestamp(TimeUnit::Second, tz.clone())).unwrap();
10417        match min_ts {
10418            ScalarValue::TimestampSecond(Some(val), Some(tz_str)) => {
10419                assert_eq!(val, i64::MIN);
10420                assert_eq!(tz_str.as_ref(), "UTC");
10421            }
10422            _ => panic!("Expected TimestampSecond with timezone"),
10423        }
10424
10425        let max_ts =
10426            ScalarValue::max(&DataType::Timestamp(TimeUnit::Millisecond, tz.clone()))
10427                .unwrap();
10428        match max_ts {
10429            ScalarValue::TimestampMillisecond(Some(val), Some(tz_str)) => {
10430                assert_eq!(val, i64::MAX);
10431                assert_eq!(tz_str.as_ref(), "UTC");
10432            }
10433            _ => panic!("Expected TimestampMillisecond with timezone"),
10434        }
10435    }
10436
10437    #[test]
10438    fn test_views_minimize_memory() {
10439        let value = "this string is longer than 12 bytes".to_string();
10440
10441        let scalar = ScalarValue::Utf8View(Some(value.clone()));
10442        let array = scalar.to_array_of_size(10).unwrap();
10443        let array = array.as_string_view();
10444        let buffers = array.data_buffers();
10445        assert_eq!(1, buffers.len());
10446        // Ensure we only have a single copy of the value string
10447        assert_eq!(value.len(), buffers[0].len());
10448
10449        // Same but for BinaryView
10450        let scalar = ScalarValue::BinaryView(Some(value.bytes().collect()));
10451        let array = scalar.to_array_of_size(10).unwrap();
10452        let array = array.as_binary_view();
10453        let buffers = array.data_buffers();
10454        assert_eq!(1, buffers.len());
10455        assert_eq!(value.len(), buffers[0].len());
10456    }
10457
10458    #[test]
10459    fn test_to_array_of_size_run_end_encoded() {
10460        fn run_test<R: RunEndIndexType>() {
10461            let value = Box::new(ScalarValue::Float32(Some(1.0)));
10462            let size = 5;
10463            let scalar = ScalarValue::RunEndEncoded(
10464                Field::new("run_ends", R::DATA_TYPE, false).into(),
10465                Field::new("values", DataType::Float32, true).into(),
10466                value.clone(),
10467            );
10468            let array = scalar.to_array_of_size(size).unwrap();
10469            let array = array.as_run::<R>();
10470            let array = array.downcast::<Float32Array>().unwrap();
10471            assert_eq!(vec![Some(1.0); size], array.into_iter().collect::<Vec<_>>());
10472            assert_eq!(1, array.values().len());
10473        }
10474
10475        run_test::<Int16Type>();
10476        run_test::<Int32Type>();
10477        run_test::<Int64Type>();
10478
10479        let scalar = ScalarValue::RunEndEncoded(
10480            Field::new("run_ends", DataType::Int16, false).into(),
10481            Field::new("values", DataType::Float32, true).into(),
10482            Box::new(ScalarValue::Float32(Some(1.0))),
10483        );
10484        let err = scalar.to_array_of_size(i16::MAX as usize + 10).unwrap_err();
10485        assert_eq!(
10486            "Execution error: Cannot construct RunArray of size 32777: Overflows run-ends type Int16",
10487            err.to_string()
10488        )
10489    }
10490
10491    #[test]
10492    fn test_eq_array_run_end_encoded() {
10493        let run_ends = Int16Array::from(vec![1, 3]);
10494        let values = Float32Array::from(vec![None, Some(1.0)]);
10495        let run_array =
10496            Arc::new(RunArray::try_new(&run_ends, &values).unwrap()) as ArrayRef;
10497
10498        let scalar = ScalarValue::RunEndEncoded(
10499            Field::new("run_ends", DataType::Int16, false).into(),
10500            Field::new("values", DataType::Float32, true).into(),
10501            Box::new(ScalarValue::Float32(None)),
10502        );
10503        assert!(scalar.eq_array(&run_array, 0).unwrap());
10504
10505        let scalar = ScalarValue::RunEndEncoded(
10506            Field::new("run_ends", DataType::Int16, false).into(),
10507            Field::new("values", DataType::Float32, true).into(),
10508            Box::new(ScalarValue::Float32(Some(1.0))),
10509        );
10510        assert!(scalar.eq_array(&run_array, 1).unwrap());
10511        assert!(scalar.eq_array(&run_array, 2).unwrap());
10512
10513        // value types must match
10514        let scalar = ScalarValue::RunEndEncoded(
10515            Field::new("run_ends", DataType::Int16, false).into(),
10516            Field::new("values", DataType::Float64, true).into(),
10517            Box::new(ScalarValue::Float64(Some(1.0))),
10518        );
10519        let err = scalar.eq_array(&run_array, 1).unwrap_err();
10520        let expected = "Internal error: could not cast array of type Float32 to arrow_array::array::primitive_array::PrimitiveArray<arrow_array::types::Float64Type>";
10521        assert!(err.to_string().starts_with(expected));
10522
10523        // run ends type must match
10524        let scalar = ScalarValue::RunEndEncoded(
10525            Field::new("run_ends", DataType::Int32, false).into(),
10526            Field::new("values", DataType::Float32, true).into(),
10527            Box::new(ScalarValue::Float32(None)),
10528        );
10529        let err = scalar.eq_array(&run_array, 0).unwrap_err();
10530        let expected = "Internal error: could not cast array of type RunEndEncoded(\"run_ends\": non-null Int16, \"values\": Float32) to arrow_array::array::run_array::RunArray<arrow_array::types::Int32Type>";
10531        assert!(err.to_string().starts_with(expected));
10532    }
10533
10534    #[test]
10535    fn test_iter_to_array_run_end_encoded() {
10536        let run_ends_field = Arc::new(Field::new("run_ends", DataType::Int16, false));
10537        let values_field = Arc::new(Field::new("values", DataType::Int64, true));
10538        let scalars = vec![
10539            ScalarValue::RunEndEncoded(
10540                Arc::clone(&run_ends_field),
10541                Arc::clone(&values_field),
10542                Box::new(ScalarValue::Int64(Some(1))),
10543            ),
10544            ScalarValue::RunEndEncoded(
10545                Arc::clone(&run_ends_field),
10546                Arc::clone(&values_field),
10547                Box::new(ScalarValue::Int64(Some(1))),
10548            ),
10549            ScalarValue::RunEndEncoded(
10550                Arc::clone(&run_ends_field),
10551                Arc::clone(&values_field),
10552                Box::new(ScalarValue::Int64(None)),
10553            ),
10554            ScalarValue::RunEndEncoded(
10555                Arc::clone(&run_ends_field),
10556                Arc::clone(&values_field),
10557                Box::new(ScalarValue::Int64(Some(2))),
10558            ),
10559            ScalarValue::RunEndEncoded(
10560                Arc::clone(&run_ends_field),
10561                Arc::clone(&values_field),
10562                Box::new(ScalarValue::Int64(Some(2))),
10563            ),
10564            ScalarValue::RunEndEncoded(
10565                Arc::clone(&run_ends_field),
10566                Arc::clone(&values_field),
10567                Box::new(ScalarValue::Int64(Some(2))),
10568            ),
10569        ];
10570
10571        let run_array = ScalarValue::iter_to_array(scalars).unwrap();
10572        let expected = RunArray::try_new(
10573            &Int16Array::from(vec![2, 3, 6]),
10574            &Int64Array::from(vec![Some(1), None, Some(2)]),
10575        )
10576        .unwrap();
10577        assert_eq!(&expected as &dyn Array, run_array.as_ref());
10578
10579        // inconsistent run-ends type
10580        let scalars = vec![
10581            ScalarValue::RunEndEncoded(
10582                Arc::clone(&run_ends_field),
10583                Arc::clone(&values_field),
10584                Box::new(ScalarValue::Int64(Some(1))),
10585            ),
10586            ScalarValue::RunEndEncoded(
10587                Field::new("run_ends", DataType::Int32, false).into(),
10588                Arc::clone(&values_field),
10589                Box::new(ScalarValue::Int64(Some(1))),
10590            ),
10591        ];
10592        let err = ScalarValue::iter_to_array(scalars).unwrap_err();
10593        let expected = "Execution error: Expected RunEndEncoded scalar with run-ends field Field { \"run_ends\": Int16 } but got: RunEndEncoded(Field { name: \"run_ends\", data_type: Int32 }, Field { name: \"values\", data_type: Int64, nullable: true }, Int64(1))";
10594        assert!(err.to_string().starts_with(expected));
10595
10596        // inconsistent value type
10597        let scalars = vec![
10598            ScalarValue::RunEndEncoded(
10599                Arc::clone(&run_ends_field),
10600                Arc::clone(&values_field),
10601                Box::new(ScalarValue::Int64(Some(1))),
10602            ),
10603            ScalarValue::RunEndEncoded(
10604                Arc::clone(&run_ends_field),
10605                Field::new("values", DataType::Int32, true).into(),
10606                Box::new(ScalarValue::Int32(Some(1))),
10607            ),
10608        ];
10609        let err = ScalarValue::iter_to_array(scalars).unwrap_err();
10610        let expected = "Execution error: Expected RunEndEncoded scalar with run-ends field Field { \"run_ends\": Int16 } but got: RunEndEncoded(Field { name: \"run_ends\", data_type: Int16 }, Field { name: \"values\", data_type: Int32, nullable: true }, Int32(1))";
10611        assert!(err.to_string().starts_with(expected));
10612
10613        // inconsistent scalars type
10614        let scalars = vec![
10615            ScalarValue::RunEndEncoded(
10616                Arc::clone(&run_ends_field),
10617                Arc::clone(&values_field),
10618                Box::new(ScalarValue::Int64(Some(1))),
10619            ),
10620            ScalarValue::Int64(Some(1)),
10621        ];
10622        let err = ScalarValue::iter_to_array(scalars).unwrap_err();
10623        let expected = "Execution error: Expected RunEndEncoded scalar with run-ends field Field { \"run_ends\": Int16 } but got: Int64(1)";
10624        assert!(err.to_string().starts_with(expected));
10625    }
10626
10627    #[test]
10628    fn test_convert_array_to_scalar_vec() {
10629        // 1: Regular ListArray
10630        let list = ListArray::from_iter_primitive::<Int64Type, _, _>(vec![
10631            Some(vec![Some(1), Some(2)]),
10632            None,
10633            Some(vec![Some(3), None, Some(4)]),
10634        ]);
10635        let converted = ScalarValue::convert_array_to_scalar_vec(&list).unwrap();
10636        assert_eq!(
10637            converted,
10638            vec![
10639                Some(vec![
10640                    ScalarValue::Int64(Some(1)),
10641                    ScalarValue::Int64(Some(2))
10642                ]),
10643                None,
10644                Some(vec![
10645                    ScalarValue::Int64(Some(3)),
10646                    ScalarValue::Int64(None),
10647                    ScalarValue::Int64(Some(4))
10648                ]),
10649            ]
10650        );
10651
10652        // 2: Regular LargeListArray
10653        let large_list = LargeListArray::from_iter_primitive::<Int64Type, _, _>(vec![
10654            Some(vec![Some(1), Some(2)]),
10655            None,
10656            Some(vec![Some(3), None, Some(4)]),
10657        ]);
10658        let converted = ScalarValue::convert_array_to_scalar_vec(&large_list).unwrap();
10659        assert_eq!(
10660            converted,
10661            vec![
10662                Some(vec![
10663                    ScalarValue::Int64(Some(1)),
10664                    ScalarValue::Int64(Some(2))
10665                ]),
10666                None,
10667                Some(vec![
10668                    ScalarValue::Int64(Some(3)),
10669                    ScalarValue::Int64(None),
10670                    ScalarValue::Int64(Some(4))
10671                ]),
10672            ]
10673        );
10674
10675        // 3: Funky (null slot has non-zero list offsets)
10676        // Offsets + Values looks like this: [[1, 2], [3, 4], [5]]
10677        // But with NullBuffer it's like this: [[1, 2], NULL, [5]]
10678        let funky = ListArray::new(
10679            Field::new_list_field(DataType::Int64, true).into(),
10680            OffsetBuffer::new(vec![0, 2, 4, 5].into()),
10681            Arc::new(Int64Array::from(vec![1, 2, 3, 4, 5, 6])),
10682            Some(NullBuffer::from(vec![true, false, true])),
10683        );
10684        let converted = ScalarValue::convert_array_to_scalar_vec(&funky).unwrap();
10685        assert_eq!(
10686            converted,
10687            vec![
10688                Some(vec![
10689                    ScalarValue::Int64(Some(1)),
10690                    ScalarValue::Int64(Some(2))
10691                ]),
10692                None,
10693                Some(vec![ScalarValue::Int64(Some(5))]),
10694            ]
10695        );
10696
10697        // 4: Offsets + Values looks like this: [[1, 2], [], [5]]
10698        // But with NullBuffer it's like this: [[1, 2], NULL, [5]]
10699        // The converted result is: [[1, 2], None, [5]]
10700        let array4 = ListArray::new(
10701            Field::new_list_field(DataType::Int64, true).into(),
10702            OffsetBuffer::new(vec![0, 2, 2, 5].into()),
10703            Arc::new(Int64Array::from(vec![1, 2, 3, 4, 5, 6])),
10704            Some(NullBuffer::from(vec![true, false, true])),
10705        );
10706        let converted = ScalarValue::convert_array_to_scalar_vec(&array4).unwrap();
10707        assert_eq!(
10708            converted,
10709            vec![
10710                Some(vec![
10711                    ScalarValue::Int64(Some(1)),
10712                    ScalarValue::Int64(Some(2))
10713                ]),
10714                None,
10715                Some(vec![
10716                    ScalarValue::Int64(Some(3)),
10717                    ScalarValue::Int64(Some(4)),
10718                    ScalarValue::Int64(Some(5)),
10719                ]),
10720            ]
10721        );
10722
10723        // 5: Offsets + Values looks like this: [[1, 2], [], [5]]
10724        // Same as 4, but the middle array is not null, so after conversion it's empty.
10725        let array5 = ListArray::new(
10726            Field::new_list_field(DataType::Int64, true).into(),
10727            OffsetBuffer::new(vec![0, 2, 2, 5].into()),
10728            Arc::new(Int64Array::from(vec![1, 2, 3, 4, 5, 6])),
10729            Some(NullBuffer::from(vec![true, true, true])),
10730        );
10731        let converted = ScalarValue::convert_array_to_scalar_vec(&array5).unwrap();
10732        assert_eq!(
10733            converted,
10734            vec![
10735                Some(vec![
10736                    ScalarValue::Int64(Some(1)),
10737                    ScalarValue::Int64(Some(2))
10738                ]),
10739                Some(vec![]),
10740                Some(vec![
10741                    ScalarValue::Int64(Some(3)),
10742                    ScalarValue::Int64(Some(4)),
10743                    ScalarValue::Int64(Some(5)),
10744                ]),
10745            ]
10746        );
10747
10748        // 6: Regular ListViewArray
10749        let list = ListViewArray::from_iter_primitive::<Int64Type, _, _>(vec![
10750            Some(vec![Some(1), Some(2)]),
10751            None,
10752            Some(vec![Some(3), None, Some(4)]),
10753        ]);
10754        let converted = ScalarValue::convert_array_to_scalar_vec(&list).unwrap();
10755        assert_eq!(
10756            converted,
10757            vec![
10758                Some(vec![
10759                    ScalarValue::Int64(Some(1)),
10760                    ScalarValue::Int64(Some(2))
10761                ]),
10762                None,
10763                Some(vec![
10764                    ScalarValue::Int64(Some(3)),
10765                    ScalarValue::Int64(None),
10766                    ScalarValue::Int64(Some(4))
10767                ]),
10768            ]
10769        );
10770
10771        // 7: Regular LargeListViewArray
10772        let large_list =
10773            LargeListViewArray::from_iter_primitive::<Int64Type, _, _>(vec![
10774                Some(vec![Some(1), Some(2)]),
10775                None,
10776                Some(vec![Some(3), None, Some(4)]),
10777            ]);
10778        let converted = ScalarValue::convert_array_to_scalar_vec(&large_list).unwrap();
10779        assert_eq!(
10780            converted,
10781            vec![
10782                Some(vec![
10783                    ScalarValue::Int64(Some(1)),
10784                    ScalarValue::Int64(Some(2))
10785                ]),
10786                None,
10787                Some(vec![
10788                    ScalarValue::Int64(Some(3)),
10789                    ScalarValue::Int64(None),
10790                    ScalarValue::Int64(Some(4))
10791                ]),
10792            ]
10793        );
10794    }
10795
10796    // ── compact / compact_view_buffers ───────────────────────────────────────
10797
10798    /// Builds a `StringViewArray` with `n` strings that are all longer than
10799    /// 12 bytes so they are stored in backing buffers rather than inline.
10800    fn make_long_strings(n: usize) -> StringViewArray {
10801        let mut b = StringViewBuilder::new();
10802        for i in 0..n {
10803            b.append_value(format!("long_string_value_pad_{i:04}"));
10804        }
10805        b.finish()
10806    }
10807
10808    /// Total bytes across all backing buffers of a `StringViewArray`.
10809    fn utf8view_buffer_bytes(a: &StringViewArray) -> usize {
10810        a.data_buffers().iter().map(|b| b.len()).sum()
10811    }
10812
10813    #[test]
10814    fn test_compact_list_utf8view() {
10815        const N: usize = 50;
10816        let strings = make_long_strings(N);
10817        let one_len = strings.value(0).len();
10818        assert!(utf8view_buffer_bytes(&strings) >= N * one_len);
10819
10820        let single_row_list_array =
10821            SingleRowListArrayBuilder::new(Arc::new(strings.slice(0, 1)) as ArrayRef)
10822                .build_list_array();
10823        let mut scalar = ScalarValue::List(Arc::new(single_row_list_array));
10824        scalar.compact();
10825
10826        let ScalarValue::List(arr) = &scalar else {
10827            panic!("expected List")
10828        };
10829        assert_eq!(
10830            utf8view_buffer_bytes(arr.values().as_string_view()),
10831            one_len
10832        );
10833        assert_eq!(arr.values().as_string_view().value(0), strings.value(0));
10834    }
10835
10836    #[test]
10837    fn test_compact_large_list_utf8view() {
10838        const N: usize = 50;
10839        let strings = make_long_strings(N);
10840        let one_len = strings.value(0).len();
10841        assert!(utf8view_buffer_bytes(&strings) >= N * one_len);
10842
10843        let single_row_list_array =
10844            SingleRowListArrayBuilder::new(Arc::new(strings.slice(0, 1)) as ArrayRef)
10845                .build_large_list_array();
10846        let mut scalar = ScalarValue::LargeList(Arc::new(single_row_list_array));
10847        scalar.compact();
10848
10849        let ScalarValue::LargeList(arr) = &scalar else {
10850            panic!("expected LargeList")
10851        };
10852        assert_eq!(
10853            utf8view_buffer_bytes(arr.values().as_string_view()),
10854            one_len
10855        );
10856        assert_eq!(arr.values().as_string_view().value(0), strings.value(0));
10857    }
10858
10859    #[test]
10860    fn test_compact_fixed_size_list_utf8view() {
10861        const N: usize = 50;
10862        let strings = make_long_strings(N);
10863        let one_len = strings.value(0).len();
10864        assert!(utf8view_buffer_bytes(&strings) >= N * one_len);
10865
10866        let single_row_list_array =
10867            SingleRowListArrayBuilder::new(Arc::new(strings.slice(0, 1)) as ArrayRef)
10868                .build_fixed_size_list_array(1);
10869        let mut scalar = ScalarValue::FixedSizeList(Arc::new(single_row_list_array));
10870        scalar.compact();
10871
10872        let ScalarValue::FixedSizeList(arr) = &scalar else {
10873            panic!("expected FixedSizeList")
10874        };
10875        assert_eq!(
10876            utf8view_buffer_bytes(arr.values().as_string_view()),
10877            one_len
10878        );
10879        assert_eq!(arr.values().as_string_view().value(0), strings.value(0));
10880    }
10881
10882    #[test]
10883    fn test_compact_list_view_utf8view() {
10884        const N: usize = 50;
10885        let strings = make_long_strings(N);
10886        let one_len = strings.value(0).len();
10887        assert!(utf8view_buffer_bytes(&strings) >= N * one_len);
10888
10889        let single_row_list_array =
10890            SingleRowListArrayBuilder::new(Arc::new(strings.slice(0, 1)) as ArrayRef)
10891                .build_list_view_array();
10892        let mut scalar = ScalarValue::ListView(Arc::new(single_row_list_array));
10893        scalar.compact();
10894
10895        let ScalarValue::ListView(arr) = &scalar else {
10896            panic!("expected ListView")
10897        };
10898        assert_eq!(
10899            utf8view_buffer_bytes(arr.values().as_string_view()),
10900            one_len
10901        );
10902        assert_eq!(arr.values().as_string_view().value(0), strings.value(0));
10903    }
10904
10905    #[test]
10906    fn test_compact_large_list_view_utf8view() {
10907        const N: usize = 50;
10908        let strings = make_long_strings(N);
10909        let one_len = strings.value(0).len();
10910        assert!(utf8view_buffer_bytes(&strings) >= N * one_len);
10911
10912        let single_row_list_array =
10913            SingleRowListArrayBuilder::new(Arc::new(strings.slice(0, 1)) as ArrayRef)
10914                .build_large_list_view_array();
10915        let mut scalar = ScalarValue::LargeListView(Arc::new(single_row_list_array));
10916        scalar.compact();
10917
10918        let ScalarValue::LargeListView(arr) = &scalar else {
10919            panic!("expected LargeListView")
10920        };
10921        assert_eq!(
10922            utf8view_buffer_bytes(arr.values().as_string_view()),
10923            one_len
10924        );
10925        assert_eq!(arr.values().as_string_view().value(0), strings.value(0));
10926    }
10927
10928    #[test]
10929    fn test_compact_struct_utf8view() {
10930        const N: usize = 50;
10931        let strings = make_long_strings(N);
10932        let one_len = strings.value(0).len();
10933
10934        let field = Arc::new(Field::new("name", DataType::Utf8View, true));
10935        let struct_arr = StructArray::new(
10936            Fields::from(vec![Arc::clone(&field)]),
10937            vec![Arc::new(strings.slice(0, 1)) as ArrayRef],
10938            None,
10939        );
10940
10941        let mut scalar = ScalarValue::Struct(Arc::new(struct_arr));
10942        scalar.compact();
10943
10944        let ScalarValue::Struct(arr) = &scalar else {
10945            panic!("expected Struct")
10946        };
10947        let col = arr.column(0).as_string_view();
10948        assert_eq!(utf8view_buffer_bytes(col), one_len);
10949        assert_eq!(col.value(0), strings.value(0));
10950    }
10951
10952    #[test]
10953    fn test_compact_map_utf8view() {
10954        const N: usize = 50;
10955        let strings = make_long_strings(N);
10956        let one_len = strings.value(0).len();
10957
10958        let key_field = Arc::new(Field::new("key", DataType::Utf8View, false));
10959        let val_field = Arc::new(Field::new("value", DataType::Int32, true));
10960        let entries = StructArray::new(
10961            Fields::from(vec![Arc::clone(&key_field), Arc::clone(&val_field)]),
10962            vec![
10963                Arc::new(strings.slice(0, 1)) as ArrayRef,
10964                Arc::new(Int32Array::from(vec![1i32])) as ArrayRef,
10965            ],
10966            None,
10967        );
10968        let entries_field = Arc::new(Field::new(
10969            "entries",
10970            DataType::Struct(Fields::from(vec![key_field, val_field])),
10971            false,
10972        ));
10973        let map = MapArray::new(
10974            entries_field,
10975            OffsetBuffer::new(vec![0i32, 1].into()),
10976            entries,
10977            None,
10978            false,
10979        );
10980
10981        let mut scalar = ScalarValue::Map(Arc::new(map));
10982        scalar.compact();
10983
10984        let ScalarValue::Map(arr) = &scalar else {
10985            panic!("expected Map")
10986        };
10987        let keys = arr.entries().column(0).as_string_view();
10988        assert_eq!(utf8view_buffer_bytes(keys), one_len);
10989        assert_eq!(keys.value(0), strings.value(0));
10990    }
10991}