1mod cache;
21mod consts;
22mod struct_builder;
23
24use std::borrow::Borrow;
25use std::cmp::Ordering;
26use std::collections::{HashSet, VecDeque};
27use std::convert::Infallible;
28use std::fmt;
29use std::fmt::Write;
30use std::hash::Hash;
31use std::hash::Hasher;
32use std::iter::repeat_n;
33use std::mem::{size_of, size_of_val};
34use std::str::FromStr;
35use std::sync::Arc;
36
37use crate::assert_or_internal_err;
38use crate::cast::{
39 as_binary_array, as_binary_view_array, as_boolean_array, as_date32_array,
40 as_date64_array, as_decimal32_array, as_decimal64_array, as_decimal128_array,
41 as_decimal256_array, as_dictionary_array, as_duration_microsecond_array,
42 as_duration_millisecond_array, as_duration_nanosecond_array,
43 as_duration_second_array, as_fixed_size_binary_array, as_fixed_size_list_array,
44 as_float16_array, as_float32_array, as_float64_array, as_int8_array, as_int16_array,
45 as_int32_array, as_int64_array, as_interval_dt_array, as_interval_mdn_array,
46 as_interval_ym_array, as_large_binary_array, as_large_list_array,
47 as_large_list_view_array, as_large_string_array, as_list_view_array, as_run_array,
48 as_string_array, as_string_view_array, as_time32_millisecond_array,
49 as_time32_second_array, as_time64_microsecond_array, as_time64_nanosecond_array,
50 as_timestamp_microsecond_array, as_timestamp_millisecond_array,
51 as_timestamp_nanosecond_array, as_timestamp_second_array, as_uint8_array,
52 as_uint16_array, as_uint32_array, as_uint64_array, as_union_array,
53};
54use crate::error::{_exec_err, _internal_err, _not_impl_err, DataFusionError, Result};
55use crate::format::DEFAULT_CAST_OPTIONS;
56use crate::hash_utils::create_hashes;
57use crate::utils::SingleRowListArrayBuilder;
58use crate::{_internal_datafusion_err, arrow_datafusion_err};
59use arrow::array::{
60 Array, ArrayData, ArrayDataBuilder, ArrayRef, ArrowNativeTypeOp, ArrowPrimitiveType,
61 AsArray, BinaryArray, BinaryViewArray, BinaryViewBuilder, BooleanArray, Date32Array,
62 Date64Array, Decimal32Array, Decimal64Array, Decimal128Array, Decimal256Array,
63 DictionaryArray, DurationMicrosecondArray, DurationMillisecondArray,
64 DurationNanosecondArray, DurationSecondArray, FixedSizeBinaryArray,
65 FixedSizeListArray, Float16Array, Float32Array, Float64Array, GenericListArray,
66 GenericListViewArray, Int8Array, Int16Array, Int32Array, Int64Array,
67 IntervalDayTimeArray, IntervalMonthDayNanoArray, IntervalYearMonthArray,
68 LargeBinaryArray, LargeListArray, LargeListViewArray, LargeStringArray, ListArray,
69 ListViewArray, MapArray, MutableArrayData, PrimitiveArray, RunArray, Scalar,
70 StringArray, StringViewArray, StringViewBuilder, StructArray, Time32MillisecondArray,
71 Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray,
72 TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
73 TimestampSecondArray, UInt8Array, UInt16Array, UInt32Array, UInt64Array, UnionArray,
74 downcast_run_array, new_empty_array, new_null_array,
75};
76use arrow::buffer::{BooleanBuffer, ScalarBuffer};
77use arrow::compute::kernels::cast::{CastOptions, cast_with_options};
78use arrow::compute::kernels::numeric::{
79 add, add_wrapping, div, mul, mul_wrapping, rem, sub, sub_wrapping,
80};
81use arrow::datatypes::{
82 ArrowDictionaryKeyType, ArrowNativeType, ArrowTimestampType, DataType, Date32Type,
83 Decimal32Type, Decimal64Type, Decimal128Type, Decimal256Type, DecimalType, Field,
84 FieldRef, Float32Type, Int8Type, Int16Type, Int32Type, Int64Type, IntervalDayTime,
85 IntervalDayTimeType, IntervalMonthDayNano, IntervalMonthDayNanoType, IntervalUnit,
86 IntervalYearMonthType, RunEndIndexType, TimeUnit, TimestampMicrosecondType,
87 TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, UInt8Type,
88 UInt16Type, UInt32Type, UInt64Type, UnionFields, UnionMode, i256,
89 validate_decimal_precision_and_scale,
90};
91use arrow::util::display::{ArrayFormatter, FormatOptions, array_value_to_string};
92use cache::{get_or_create_cached_key_array, get_or_create_cached_null_array};
93use chrono::{Duration, NaiveDate};
94use half::f16;
95pub use struct_builder::ScalarStructBuilder;
96
97const SECONDS_PER_DAY: i64 = 86_400;
98const MILLIS_PER_DAY: i64 = SECONDS_PER_DAY * 1_000;
99const MICROS_PER_DAY: i64 = MILLIS_PER_DAY * 1_000;
100const NANOS_PER_DAY: i64 = MICROS_PER_DAY * 1_000;
101const MICROS_PER_MILLISECOND: i64 = 1_000;
102const NANOS_PER_MILLISECOND: i64 = 1_000_000;
103
104pub fn date_to_timestamp_multiplier(
108 source_type: &DataType,
109 target_type: &DataType,
110) -> Option<i64> {
111 let DataType::Timestamp(target_unit, _) = target_type else {
112 return None;
113 };
114
115 match source_type {
122 DataType::Date32 => Some(match target_unit {
126 TimeUnit::Second => SECONDS_PER_DAY,
127 TimeUnit::Millisecond => MILLIS_PER_DAY,
128 TimeUnit::Microsecond => MICROS_PER_DAY,
129 TimeUnit::Nanosecond => NANOS_PER_DAY,
130 }),
131
132 DataType::Date64 => match target_unit {
137 TimeUnit::Second => None,
138 TimeUnit::Millisecond => None,
143 TimeUnit::Microsecond => Some(MICROS_PER_MILLISECOND),
144 TimeUnit::Nanosecond => Some(NANOS_PER_MILLISECOND),
145 },
146
147 _ => None,
148 }
149}
150
151pub fn ensure_timestamp_in_bounds(
155 value: i64,
156 multiplier: i64,
157 source_type: &DataType,
158 target_type: &DataType,
159) -> Result<()> {
160 if multiplier <= 1 {
161 return Ok(());
162 }
163
164 if value.checked_mul(multiplier).is_none() {
165 let target = format_timestamp_type_for_error(target_type);
166 _exec_err!(
167 "Cannot cast {} value {} to {}: converted value exceeds the representable i64 range",
168 source_type,
169 value,
170 target
171 )
172 } else {
173 Ok(())
174 }
175}
176
177pub(crate) fn format_timestamp_type_for_error(target_type: &DataType) -> String {
180 match target_type {
181 DataType::Timestamp(unit, _) => {
182 let s = match unit {
183 TimeUnit::Second => "s",
184 TimeUnit::Millisecond => "ms",
185 TimeUnit::Microsecond => "us",
186 TimeUnit::Nanosecond => "ns",
187 };
188 format!("Timestamp({s})")
189 }
190 other => format!("{other}"),
191 }
192}
193
194#[derive(Clone)]
326pub enum ScalarValue {
327 Null,
329 Boolean(Option<bool>),
331 Float16(Option<f16>),
333 Float32(Option<f32>),
335 Float64(Option<f64>),
337 Decimal32(Option<i32>, u8, i8),
339 Decimal64(Option<i64>, u8, i8),
341 Decimal128(Option<i128>, u8, i8),
343 Decimal256(Option<i256>, u8, i8),
345 Int8(Option<i8>),
347 Int16(Option<i16>),
349 Int32(Option<i32>),
351 Int64(Option<i64>),
353 UInt8(Option<u8>),
355 UInt16(Option<u16>),
357 UInt32(Option<u32>),
359 UInt64(Option<u64>),
361 Utf8(Option<String>),
363 Utf8View(Option<String>),
365 LargeUtf8(Option<String>),
367 Binary(Option<Vec<u8>>),
369 BinaryView(Option<Vec<u8>>),
371 FixedSizeBinary(i32, Option<Vec<u8>>),
373 LargeBinary(Option<Vec<u8>>),
375 FixedSizeList(Arc<FixedSizeListArray>),
379 List(Arc<ListArray>),
383 LargeList(Arc<LargeListArray>),
385 ListView(Arc<ListViewArray>),
389 LargeListView(Arc<LargeListViewArray>),
393 Struct(Arc<StructArray>),
396 Map(Arc<MapArray>),
398 Date32(Option<i32>),
400 Date64(Option<i64>),
402 Time32Second(Option<i32>),
404 Time32Millisecond(Option<i32>),
406 Time64Microsecond(Option<i64>),
408 Time64Nanosecond(Option<i64>),
410 TimestampSecond(Option<i64>, Option<Arc<str>>),
412 TimestampMillisecond(Option<i64>, Option<Arc<str>>),
414 TimestampMicrosecond(Option<i64>, Option<Arc<str>>),
416 TimestampNanosecond(Option<i64>, Option<Arc<str>>),
418 IntervalYearMonth(Option<i32>),
420 IntervalDayTime(Option<IntervalDayTime>),
423 IntervalMonthDayNano(Option<IntervalMonthDayNano>),
427 DurationSecond(Option<i64>),
429 DurationMillisecond(Option<i64>),
431 DurationMicrosecond(Option<i64>),
433 DurationNanosecond(Option<i64>),
435 Union(Option<(i8, Box<ScalarValue>)>, UnionFields, UnionMode),
440 Dictionary(Box<DataType>, Box<ScalarValue>),
442 RunEndEncoded(FieldRef, FieldRef, Box<ScalarValue>),
444}
445
446impl Hash for Fl<f16> {
447 fn hash<H: Hasher>(&self, state: &mut H) {
448 self.0.to_bits().hash(state);
449 }
450}
451
452impl PartialEq for ScalarValue {
454 fn eq(&self, other: &Self) -> bool {
455 use ScalarValue::*;
456 match (self, other) {
460 (Decimal32(v1, p1, s1), Decimal32(v2, p2, s2)) => {
461 v1.eq(v2) && p1.eq(p2) && s1.eq(s2)
462 }
463 (Decimal32(_, _, _), _) => false,
464 (Decimal64(v1, p1, s1), Decimal64(v2, p2, s2)) => {
465 v1.eq(v2) && p1.eq(p2) && s1.eq(s2)
466 }
467 (Decimal64(_, _, _), _) => false,
468 (Decimal128(v1, p1, s1), Decimal128(v2, p2, s2)) => {
469 v1.eq(v2) && p1.eq(p2) && s1.eq(s2)
470 }
471 (Decimal128(_, _, _), _) => false,
472 (Decimal256(v1, p1, s1), Decimal256(v2, p2, s2)) => {
473 v1.eq(v2) && p1.eq(p2) && s1.eq(s2)
474 }
475 (Decimal256(_, _, _), _) => false,
476 (Boolean(v1), Boolean(v2)) => v1.eq(v2),
477 (Boolean(_), _) => false,
478 (Float32(v1), Float32(v2)) => match (v1, v2) {
479 (Some(f1), Some(f2)) => f1.to_bits() == f2.to_bits(),
480 _ => v1.eq(v2),
481 },
482 (Float16(v1), Float16(v2)) => match (v1, v2) {
483 (Some(f1), Some(f2)) => f1.to_bits() == f2.to_bits(),
484 _ => v1.eq(v2),
485 },
486 (Float32(_), _) => false,
487 (Float16(_), _) => false,
488 (Float64(v1), Float64(v2)) => match (v1, v2) {
489 (Some(f1), Some(f2)) => f1.to_bits() == f2.to_bits(),
490 _ => v1.eq(v2),
491 },
492 (Float64(_), _) => false,
493 (Int8(v1), Int8(v2)) => v1.eq(v2),
494 (Int8(_), _) => false,
495 (Int16(v1), Int16(v2)) => v1.eq(v2),
496 (Int16(_), _) => false,
497 (Int32(v1), Int32(v2)) => v1.eq(v2),
498 (Int32(_), _) => false,
499 (Int64(v1), Int64(v2)) => v1.eq(v2),
500 (Int64(_), _) => false,
501 (UInt8(v1), UInt8(v2)) => v1.eq(v2),
502 (UInt8(_), _) => false,
503 (UInt16(v1), UInt16(v2)) => v1.eq(v2),
504 (UInt16(_), _) => false,
505 (UInt32(v1), UInt32(v2)) => v1.eq(v2),
506 (UInt32(_), _) => false,
507 (UInt64(v1), UInt64(v2)) => v1.eq(v2),
508 (UInt64(_), _) => false,
509 (Utf8(v1), Utf8(v2)) => v1.eq(v2),
510 (Utf8(_), _) => false,
511 (Utf8View(v1), Utf8View(v2)) => v1.eq(v2),
512 (Utf8View(_), _) => false,
513 (LargeUtf8(v1), LargeUtf8(v2)) => v1.eq(v2),
514 (LargeUtf8(_), _) => false,
515 (Binary(v1), Binary(v2)) => v1.eq(v2),
516 (Binary(_), _) => false,
517 (BinaryView(v1), BinaryView(v2)) => v1.eq(v2),
518 (BinaryView(_), _) => false,
519 (FixedSizeBinary(_, v1), FixedSizeBinary(_, v2)) => v1.eq(v2),
520 (FixedSizeBinary(_, _), _) => false,
521 (LargeBinary(v1), LargeBinary(v2)) => v1.eq(v2),
522 (LargeBinary(_), _) => false,
523 (FixedSizeList(v1), FixedSizeList(v2)) => v1.eq(v2),
524 (FixedSizeList(_), _) => false,
525 (List(v1), List(v2)) => v1.eq(v2),
526 (List(_), _) => false,
527 (LargeList(v1), LargeList(v2)) => v1.eq(v2),
528 (LargeList(_), _) => false,
529 (ListView(v1), ListView(v2)) => v1.eq(v2),
530 (ListView(_), _) => false,
531 (LargeListView(v1), LargeListView(v2)) => v1.eq(v2),
532 (LargeListView(_), _) => false,
533 (Struct(v1), Struct(v2)) => v1.eq(v2),
534 (Struct(_), _) => false,
535 (Map(v1), Map(v2)) => v1.eq(v2),
536 (Map(_), _) => false,
537 (Date32(v1), Date32(v2)) => v1.eq(v2),
538 (Date32(_), _) => false,
539 (Date64(v1), Date64(v2)) => v1.eq(v2),
540 (Date64(_), _) => false,
541 (Time32Second(v1), Time32Second(v2)) => v1.eq(v2),
542 (Time32Second(_), _) => false,
543 (Time32Millisecond(v1), Time32Millisecond(v2)) => v1.eq(v2),
544 (Time32Millisecond(_), _) => false,
545 (Time64Microsecond(v1), Time64Microsecond(v2)) => v1.eq(v2),
546 (Time64Microsecond(_), _) => false,
547 (Time64Nanosecond(v1), Time64Nanosecond(v2)) => v1.eq(v2),
548 (Time64Nanosecond(_), _) => false,
549 (TimestampSecond(v1, _), TimestampSecond(v2, _)) => v1.eq(v2),
550 (TimestampSecond(_, _), _) => false,
551 (TimestampMillisecond(v1, _), TimestampMillisecond(v2, _)) => v1.eq(v2),
552 (TimestampMillisecond(_, _), _) => false,
553 (TimestampMicrosecond(v1, _), TimestampMicrosecond(v2, _)) => v1.eq(v2),
554 (TimestampMicrosecond(_, _), _) => false,
555 (TimestampNanosecond(v1, _), TimestampNanosecond(v2, _)) => v1.eq(v2),
556 (TimestampNanosecond(_, _), _) => false,
557 (DurationSecond(v1), DurationSecond(v2)) => v1.eq(v2),
558 (DurationSecond(_), _) => false,
559 (DurationMillisecond(v1), DurationMillisecond(v2)) => v1.eq(v2),
560 (DurationMillisecond(_), _) => false,
561 (DurationMicrosecond(v1), DurationMicrosecond(v2)) => v1.eq(v2),
562 (DurationMicrosecond(_), _) => false,
563 (DurationNanosecond(v1), DurationNanosecond(v2)) => v1.eq(v2),
564 (DurationNanosecond(_), _) => false,
565 (IntervalYearMonth(v1), IntervalYearMonth(v2)) => v1.eq(v2),
566 (IntervalYearMonth(_), _) => false,
567 (IntervalDayTime(v1), IntervalDayTime(v2)) => v1.eq(v2),
568 (IntervalDayTime(_), _) => false,
569 (IntervalMonthDayNano(v1), IntervalMonthDayNano(v2)) => v1.eq(v2),
570 (IntervalMonthDayNano(_), _) => false,
571 (Union(val1, fields1, mode1), Union(val2, fields2, mode2)) => {
572 val1.eq(val2) && fields1.eq(fields2) && mode1.eq(mode2)
573 }
574 (Union(_, _, _), _) => false,
575 (Dictionary(k1, v1), Dictionary(k2, v2)) => k1.eq(k2) && v1.eq(v2),
576 (Dictionary(_, _), _) => false,
577 (RunEndEncoded(rf1, vf1, v1), RunEndEncoded(rf2, vf2, v2)) => {
578 rf1.eq(rf2) && vf1.eq(vf2) && v1.eq(v2)
579 }
580 (RunEndEncoded(_, _, _), _) => false,
581 (Null, Null) => true,
582 (Null, _) => false,
583 }
584 }
585}
586
587impl PartialOrd for ScalarValue {
589 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
590 use ScalarValue::*;
591 match (self, other) {
595 (Decimal32(v1, _, s1), Decimal32(v2, _, s2)) => {
596 if s1.eq(s2) {
597 v1.partial_cmp(v2)
603 } else {
604 None
605 }
606 }
607 (Decimal32(_, _, _), _) => None,
608 (Decimal64(v1, _, s1), Decimal64(v2, _, s2)) => {
609 if s1.eq(s2) {
610 v1.partial_cmp(v2)
611 } else {
612 None
613 }
614 }
615 (Decimal64(_, _, _), _) => None,
616 (Decimal128(v1, _, s1), Decimal128(v2, _, s2)) => {
617 if s1.eq(s2) {
618 v1.partial_cmp(v2)
619 } else {
620 None
621 }
622 }
623 (Decimal128(_, _, _), _) => None,
624 (Decimal256(v1, _, s1), Decimal256(v2, _, s2)) => {
625 if s1.eq(s2) {
626 v1.partial_cmp(v2)
627 } else {
628 None
629 }
630 }
631 (Decimal256(_, _, _), _) => None,
632 (Boolean(v1), Boolean(v2)) => v1.partial_cmp(v2),
633 (Boolean(_), _) => None,
634 (Float32(v1), Float32(v2)) => match (v1, v2) {
635 (Some(f1), Some(f2)) => Some(f1.total_cmp(f2)),
636 _ => v1.partial_cmp(v2),
637 },
638 (Float16(v1), Float16(v2)) => match (v1, v2) {
639 (Some(f1), Some(f2)) => Some(f1.total_cmp(f2)),
640 _ => v1.partial_cmp(v2),
641 },
642 (Float32(_), _) => None,
643 (Float16(_), _) => None,
644 (Float64(v1), Float64(v2)) => match (v1, v2) {
645 (Some(f1), Some(f2)) => Some(f1.total_cmp(f2)),
646 _ => v1.partial_cmp(v2),
647 },
648 (Float64(_), _) => None,
649 (Int8(v1), Int8(v2)) => v1.partial_cmp(v2),
650 (Int8(_), _) => None,
651 (Int16(v1), Int16(v2)) => v1.partial_cmp(v2),
652 (Int16(_), _) => None,
653 (Int32(v1), Int32(v2)) => v1.partial_cmp(v2),
654 (Int32(_), _) => None,
655 (Int64(v1), Int64(v2)) => v1.partial_cmp(v2),
656 (Int64(_), _) => None,
657 (UInt8(v1), UInt8(v2)) => v1.partial_cmp(v2),
658 (UInt8(_), _) => None,
659 (UInt16(v1), UInt16(v2)) => v1.partial_cmp(v2),
660 (UInt16(_), _) => None,
661 (UInt32(v1), UInt32(v2)) => v1.partial_cmp(v2),
662 (UInt32(_), _) => None,
663 (UInt64(v1), UInt64(v2)) => v1.partial_cmp(v2),
664 (UInt64(_), _) => None,
665 (Utf8(v1), Utf8(v2)) => v1.partial_cmp(v2),
666 (Utf8(_), _) => None,
667 (LargeUtf8(v1), LargeUtf8(v2)) => v1.partial_cmp(v2),
668 (LargeUtf8(_), _) => None,
669 (Utf8View(v1), Utf8View(v2)) => v1.partial_cmp(v2),
670 (Utf8View(_), _) => None,
671 (Binary(v1), Binary(v2)) => v1.partial_cmp(v2),
672 (Binary(_), _) => None,
673 (BinaryView(v1), BinaryView(v2)) => v1.partial_cmp(v2),
674 (BinaryView(_), _) => None,
675 (FixedSizeBinary(_, v1), FixedSizeBinary(_, v2)) => v1.partial_cmp(v2),
676 (FixedSizeBinary(_, _), _) => None,
677 (LargeBinary(v1), LargeBinary(v2)) => v1.partial_cmp(v2),
678 (LargeBinary(_), _) => None,
679 (List(arr1), List(arr2)) => partial_cmp_list(arr1.as_ref(), arr2.as_ref()),
682 (FixedSizeList(arr1), FixedSizeList(arr2)) => {
683 partial_cmp_list(arr1.as_ref(), arr2.as_ref())
684 }
685 (LargeList(arr1), LargeList(arr2)) => {
686 partial_cmp_list(arr1.as_ref(), arr2.as_ref())
687 }
688 (ListView(arr1), ListView(arr2)) => {
689 partial_cmp_list(arr1.as_ref(), arr2.as_ref())
690 }
691 (LargeListView(arr1), LargeListView(arr2)) => {
692 partial_cmp_list(arr1.as_ref(), arr2.as_ref())
693 }
694 (List(_), _)
695 | (LargeList(_), _)
696 | (FixedSizeList(_), _)
697 | (ListView(_), _)
698 | (LargeListView(_), _) => None,
699 (Struct(struct_arr1), Struct(struct_arr2)) => {
700 partial_cmp_struct(struct_arr1.as_ref(), struct_arr2.as_ref())
701 }
702 (Struct(_), _) => None,
703 (Map(map_arr1), Map(map_arr2)) => partial_cmp_map(map_arr1, map_arr2),
704 (Map(_), _) => None,
705 (Date32(v1), Date32(v2)) => v1.partial_cmp(v2),
706 (Date32(_), _) => None,
707 (Date64(v1), Date64(v2)) => v1.partial_cmp(v2),
708 (Date64(_), _) => None,
709 (Time32Second(v1), Time32Second(v2)) => v1.partial_cmp(v2),
710 (Time32Second(_), _) => None,
711 (Time32Millisecond(v1), Time32Millisecond(v2)) => v1.partial_cmp(v2),
712 (Time32Millisecond(_), _) => None,
713 (Time64Microsecond(v1), Time64Microsecond(v2)) => v1.partial_cmp(v2),
714 (Time64Microsecond(_), _) => None,
715 (Time64Nanosecond(v1), Time64Nanosecond(v2)) => v1.partial_cmp(v2),
716 (Time64Nanosecond(_), _) => None,
717 (TimestampSecond(v1, _), TimestampSecond(v2, _)) => v1.partial_cmp(v2),
718 (TimestampSecond(_, _), _) => None,
719 (TimestampMillisecond(v1, _), TimestampMillisecond(v2, _)) => {
720 v1.partial_cmp(v2)
721 }
722 (TimestampMillisecond(_, _), _) => None,
723 (TimestampMicrosecond(v1, _), TimestampMicrosecond(v2, _)) => {
724 v1.partial_cmp(v2)
725 }
726 (TimestampMicrosecond(_, _), _) => None,
727 (TimestampNanosecond(v1, _), TimestampNanosecond(v2, _)) => {
728 v1.partial_cmp(v2)
729 }
730 (TimestampNanosecond(_, _), _) => None,
731 (IntervalYearMonth(v1), IntervalYearMonth(v2)) => v1.partial_cmp(v2),
732 (IntervalYearMonth(_), _) => None,
733 (IntervalDayTime(v1), IntervalDayTime(v2)) => v1.partial_cmp(v2),
734 (IntervalDayTime(_), _) => None,
735 (IntervalMonthDayNano(v1), IntervalMonthDayNano(v2)) => v1.partial_cmp(v2),
736 (IntervalMonthDayNano(_), _) => None,
737 (DurationSecond(v1), DurationSecond(v2)) => v1.partial_cmp(v2),
738 (DurationSecond(_), _) => None,
739 (DurationMillisecond(v1), DurationMillisecond(v2)) => v1.partial_cmp(v2),
740 (DurationMillisecond(_), _) => None,
741 (DurationMicrosecond(v1), DurationMicrosecond(v2)) => v1.partial_cmp(v2),
742 (DurationMicrosecond(_), _) => None,
743 (DurationNanosecond(v1), DurationNanosecond(v2)) => v1.partial_cmp(v2),
744 (DurationNanosecond(_), _) => None,
745 (Union(v1, t1, m1), Union(v2, t2, m2)) => {
746 if t1.eq(t2) && m1.eq(m2) {
747 v1.partial_cmp(v2)
748 } else {
749 None
750 }
751 }
752 (Union(_, _, _), _) => None,
753 (Dictionary(k1, v1), Dictionary(k2, v2)) => {
754 if k1 == k2 { v1.partial_cmp(v2) } else { None }
756 }
757 (Dictionary(_, _), _) => None,
758 (RunEndEncoded(rf1, vf1, v1), RunEndEncoded(rf2, vf2, v2)) => {
759 if rf1 == rf2 && vf1 == vf2 {
761 v1.partial_cmp(v2)
762 } else {
763 None
764 }
765 }
766 (RunEndEncoded(_, _, _), _) => None,
767 (Null, Null) => Some(Ordering::Equal),
768 (Null, _) => None,
769 }
770 }
771}
772
773fn first_array_for_list(arr: &dyn Array) -> ArrayRef {
776 assert_eq!(arr.len(), 1);
777 if let Some(arr) = arr.as_list_opt::<i32>() {
778 arr.value(0)
779 } else if let Some(arr) = arr.as_list_opt::<i64>() {
780 arr.value(0)
781 } else if let Some(arr) = arr.as_fixed_size_list_opt() {
782 arr.value(0)
783 } else if let Some(arr) = arr.as_list_view_opt::<i32>() {
784 arr.value(0)
785 } else if let Some(arr) = arr.as_list_view_opt::<i64>() {
786 arr.value(0)
787 } else {
788 unreachable!(
789 "Since only List / LargeList / FixedSizeList / ListView / LargeListView are supported, this should never happen"
790 )
791 }
792}
793
794fn partial_cmp_list(arr1: &dyn Array, arr2: &dyn Array) -> Option<Ordering> {
796 if arr1.data_type() != arr2.data_type() {
797 return None;
798 }
799 let arr1 = first_array_for_list(arr1);
800 let arr2 = first_array_for_list(arr2);
801
802 let min_length = arr1.len().min(arr2.len());
803 let arr1_trimmed = arr1.slice(0, min_length);
804 let arr2_trimmed = arr2.slice(0, min_length);
805
806 let lt_res = arrow::compute::kernels::cmp::lt(&arr1_trimmed, &arr2_trimmed).ok()?;
807 let eq_res = arrow::compute::kernels::cmp::eq(&arr1_trimmed, &arr2_trimmed).ok()?;
808
809 for j in 0..lt_res.len() {
810 if arr1_trimmed.is_null(j) && !arr2_trimmed.is_null(j) {
818 return Some(Ordering::Greater);
819 }
820 if !arr1_trimmed.is_null(j) && arr2_trimmed.is_null(j) {
821 return Some(Ordering::Less);
822 }
823
824 if lt_res.is_valid(j) && lt_res.value(j) {
825 return Some(Ordering::Less);
826 }
827 if eq_res.is_valid(j) && !eq_res.value(j) {
828 return Some(Ordering::Greater);
829 }
830 }
831
832 Some(arr1.len().cmp(&arr2.len()))
833}
834
835fn flatten<'a>(array: &'a StructArray, columns: &mut Vec<&'a ArrayRef>) {
836 for i in 0..array.num_columns() {
837 let column = array.column(i);
838 if let Some(nested_struct) = column.as_any().downcast_ref::<StructArray>() {
839 flatten(nested_struct, columns);
841 } else {
842 columns.push(column);
844 }
845 }
846}
847
848pub fn partial_cmp_struct(s1: &StructArray, s2: &StructArray) -> Option<Ordering> {
849 if s1.len() != s2.len() {
850 return None;
851 }
852
853 if s1.data_type() != s2.data_type() {
854 return None;
855 }
856
857 let mut expanded_columns1 = Vec::with_capacity(s1.num_columns());
858 let mut expanded_columns2 = Vec::with_capacity(s2.num_columns());
859
860 flatten(s1, &mut expanded_columns1);
861 flatten(s2, &mut expanded_columns2);
862
863 for col_index in 0..expanded_columns1.len() {
864 let arr1 = expanded_columns1[col_index];
865 let arr2 = expanded_columns2[col_index];
866
867 let lt_res = arrow::compute::kernels::cmp::lt(arr1, arr2).ok()?;
868 let eq_res = arrow::compute::kernels::cmp::eq(arr1, arr2).ok()?;
869
870 for j in 0..lt_res.len() {
871 if lt_res.is_valid(j) && lt_res.value(j) {
872 return Some(Ordering::Less);
873 }
874 if eq_res.is_valid(j) && !eq_res.value(j) {
875 return Some(Ordering::Greater);
876 }
877 }
878 }
879 Some(Ordering::Equal)
880}
881
882fn partial_cmp_map(m1: &Arc<MapArray>, m2: &Arc<MapArray>) -> Option<Ordering> {
883 if m1.len() != m2.len() {
884 return None;
885 }
886
887 if m1.data_type() != m2.data_type() {
888 return None;
889 }
890
891 for col_index in 0..m1.len() {
892 let arr1 = m1.entries().column(col_index);
893 let arr2 = m2.entries().column(col_index);
894
895 let lt_res = arrow::compute::kernels::cmp::lt(arr1, arr2).ok()?;
896 let eq_res = arrow::compute::kernels::cmp::eq(arr1, arr2).ok()?;
897
898 for j in 0..lt_res.len() {
899 if lt_res.is_valid(j) && lt_res.value(j) {
900 return Some(Ordering::Less);
901 }
902 if eq_res.is_valid(j) && !eq_res.value(j) {
903 return Some(Ordering::Greater);
904 }
905 }
906 }
907 Some(Ordering::Equal)
908}
909
910impl Eq for ScalarValue {}
911
912struct Fl<T>(T);
914
915macro_rules! hash_float_value {
916 ($(($t:ty, $i:ty)),+) => {
917 $(impl std::hash::Hash for Fl<$t> {
918 #[inline]
919 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
920 state.write(&<$i>::from_ne_bytes(self.0.to_ne_bytes()).to_ne_bytes())
921 }
922 })+
923 };
924}
925
926hash_float_value!((f64, u64), (f32, u32));
927
928impl Hash for ScalarValue {
934 fn hash<H: Hasher>(&self, state: &mut H) {
935 use ScalarValue::*;
936 match self {
937 Decimal32(v, p, s) => {
938 v.hash(state);
939 p.hash(state);
940 s.hash(state)
941 }
942 Decimal64(v, p, s) => {
943 v.hash(state);
944 p.hash(state);
945 s.hash(state)
946 }
947 Decimal128(v, p, s) => {
948 v.hash(state);
949 p.hash(state);
950 s.hash(state)
951 }
952 Decimal256(v, p, s) => {
953 v.hash(state);
954 p.hash(state);
955 s.hash(state)
956 }
957 Boolean(v) => v.hash(state),
958 Float16(v) => v.map(Fl).hash(state),
959 Float32(v) => v.map(Fl).hash(state),
960 Float64(v) => v.map(Fl).hash(state),
961 Int8(v) => v.hash(state),
962 Int16(v) => v.hash(state),
963 Int32(v) => v.hash(state),
964 Int64(v) => v.hash(state),
965 UInt8(v) => v.hash(state),
966 UInt16(v) => v.hash(state),
967 UInt32(v) => v.hash(state),
968 UInt64(v) => v.hash(state),
969 Utf8(v) | LargeUtf8(v) | Utf8View(v) => v.hash(state),
970 Binary(v) | FixedSizeBinary(_, v) | LargeBinary(v) | BinaryView(v) => {
971 v.hash(state)
972 }
973 List(arr) => {
974 hash_nested_array(arr.to_owned() as ArrayRef, state);
975 }
976 LargeList(arr) => {
977 hash_nested_array(arr.to_owned() as ArrayRef, state);
978 }
979 FixedSizeList(arr) => {
980 hash_nested_array(arr.to_owned() as ArrayRef, state);
981 }
982 ListView(arr) => {
983 hash_nested_array(arr.to_owned() as ArrayRef, state);
984 }
985 LargeListView(arr) => {
986 hash_nested_array(arr.to_owned() as ArrayRef, state);
987 }
988 Struct(arr) => {
989 hash_nested_array(arr.to_owned() as ArrayRef, state);
990 }
991 Map(arr) => {
992 hash_nested_array(arr.to_owned() as ArrayRef, state);
993 }
994 Date32(v) => v.hash(state),
995 Date64(v) => v.hash(state),
996 Time32Second(v) => v.hash(state),
997 Time32Millisecond(v) => v.hash(state),
998 Time64Microsecond(v) => v.hash(state),
999 Time64Nanosecond(v) => v.hash(state),
1000 TimestampSecond(v, _) => v.hash(state),
1001 TimestampMillisecond(v, _) => v.hash(state),
1002 TimestampMicrosecond(v, _) => v.hash(state),
1003 TimestampNanosecond(v, _) => v.hash(state),
1004 DurationSecond(v) => v.hash(state),
1005 DurationMillisecond(v) => v.hash(state),
1006 DurationMicrosecond(v) => v.hash(state),
1007 DurationNanosecond(v) => v.hash(state),
1008 IntervalYearMonth(v) => v.hash(state),
1009 IntervalDayTime(v) => v.hash(state),
1010 IntervalMonthDayNano(v) => v.hash(state),
1011 Union(v, t, m) => {
1012 v.hash(state);
1013 t.hash(state);
1014 m.hash(state);
1015 }
1016 Dictionary(k, v) => {
1017 k.hash(state);
1018 v.hash(state);
1019 }
1020 RunEndEncoded(rf, vf, v) => {
1021 rf.hash(state);
1022 vf.hash(state);
1023 v.hash(state);
1024 }
1025 Null => 1.hash(state),
1027 }
1028 }
1029}
1030
1031fn hash_nested_array<H: Hasher>(arr: ArrayRef, state: &mut H) {
1032 let len = arr.len();
1033 let hashes_buffer = &mut vec![0; len];
1034 let random_state = crate::hash_utils::RandomState::with_seed(0);
1035 let hashes = create_hashes(&[arr], &random_state, hashes_buffer)
1036 .expect("hash_nested_array: failed to create row hashes");
1037 hashes.hash(state);
1039}
1040
1041#[inline]
1048pub fn get_dict_value<K: ArrowDictionaryKeyType>(
1049 array: &dyn Array,
1050 index: usize,
1051) -> Result<(&ArrayRef, Option<usize>)> {
1052 let dict_array = as_dictionary_array::<K>(array)?;
1053 Ok((dict_array.values(), dict_array.key(index)))
1054}
1055
1056fn dict_from_scalar<K: ArrowDictionaryKeyType>(
1059 value: &ScalarValue,
1060 size: usize,
1061) -> Result<ArrayRef> {
1062 let values_array = value.to_array_of_size(1)?;
1064
1065 let key_array: PrimitiveArray<K> =
1068 get_or_create_cached_key_array::<K>(size, value.is_null());
1069
1070 Ok(Arc::new(
1076 DictionaryArray::<K>::try_new(key_array, values_array)?, ))
1078}
1079
1080pub fn dict_from_values<K: ArrowDictionaryKeyType>(
1095 values_array: ArrayRef,
1096) -> Result<ArrayRef> {
1097 let key_array: PrimitiveArray<K> = (0..values_array.len())
1100 .map(|index| {
1101 if values_array.is_valid(index) {
1102 let native_index = K::Native::from_usize(index).ok_or_else(|| {
1103 _internal_datafusion_err!(
1104 "Can not create index of type {} from value {index}",
1105 K::DATA_TYPE
1106 )
1107 })?;
1108 Ok(Some(native_index))
1109 } else {
1110 Ok(None)
1111 }
1112 })
1113 .collect::<Result<Vec<_>>>()?
1114 .into_iter()
1115 .collect();
1116
1117 let dict_array = DictionaryArray::<K>::try_new(key_array, values_array)?;
1123 Ok(Arc::new(dict_array))
1124}
1125
1126macro_rules! typed_cast_tz {
1127 ($array:expr, $index:expr, $array_cast:ident, $SCALAR:ident, $TZ:expr) => {{
1128 let array = $array_cast($array)?;
1129 Ok::<ScalarValue, DataFusionError>(ScalarValue::$SCALAR(
1130 match array.is_null($index) {
1131 true => None,
1132 false => Some(array.value($index).into()),
1133 },
1134 $TZ.clone(),
1135 ))
1136 }};
1137}
1138
1139macro_rules! typed_cast {
1140 ($array:expr, $index:expr, $array_cast:ident, $SCALAR:ident) => {{
1141 let array = $array_cast($array)?;
1142 Ok::<ScalarValue, DataFusionError>(ScalarValue::$SCALAR(
1143 match array.is_null($index) {
1144 true => None,
1145 false => Some(array.value($index).into()),
1146 },
1147 ))
1148 }};
1149}
1150
1151macro_rules! build_array_from_option {
1152 ($DATA_TYPE:ident, $ARRAY_TYPE:ident, $EXPR:expr, $SIZE:expr) => {{
1153 match $EXPR {
1154 Some(value) => Arc::new($ARRAY_TYPE::from_value(*value, $SIZE)),
1155 None => new_null_array(&DataType::$DATA_TYPE, $SIZE),
1156 }
1157 }};
1158 ($DATA_TYPE:ident, $ENUM:expr, $ARRAY_TYPE:ident, $EXPR:expr, $SIZE:expr) => {{
1159 match $EXPR {
1160 Some(value) => Arc::new($ARRAY_TYPE::from_value(*value, $SIZE)),
1161 None => new_null_array(&DataType::$DATA_TYPE($ENUM), $SIZE),
1162 }
1163 }};
1164}
1165
1166macro_rules! build_timestamp_array_from_option {
1167 ($TIME_UNIT:expr, $TZ:expr, $ARRAY_TYPE:ident, $EXPR:expr, $SIZE:expr) => {
1168 match $EXPR {
1169 Some(value) => {
1170 Arc::new($ARRAY_TYPE::from_value(*value, $SIZE).with_timezone_opt($TZ))
1171 }
1172 None => new_null_array(&DataType::Timestamp($TIME_UNIT, $TZ), $SIZE),
1173 }
1174 };
1175}
1176
1177macro_rules! eq_array_primitive {
1178 ($array:expr, $index:expr, $array_cast:ident, $VALUE:expr) => {{
1179 let array = $array_cast($array)?;
1180 let is_valid = array.is_valid($index);
1181 Ok::<bool, DataFusionError>(match $VALUE {
1182 Some(val) => is_valid && &array.value($index) == val,
1183 None => !is_valid,
1184 })
1185 }};
1186}
1187
1188impl ScalarValue {
1189 pub fn new_primitive<T: ArrowPrimitiveType>(
1195 a: Option<T::Native>,
1196 d: &DataType,
1197 ) -> Result<Self> {
1198 match a {
1199 None => d.try_into(),
1200 Some(v) => {
1201 let array = PrimitiveArray::<T>::new(vec![v].into(), None)
1202 .with_data_type(d.clone());
1203 Self::try_from_array(&array, 0)
1204 }
1205 }
1206 }
1207
1208 pub fn try_new_decimal128(value: i128, precision: u8, scale: i8) -> Result<Self> {
1210 Self::validate_decimal_or_internal_err::<Decimal128Type>(precision, scale)?;
1211 Ok(ScalarValue::Decimal128(Some(value), precision, scale))
1212 }
1213
1214 pub fn try_new_null(data_type: &DataType) -> Result<Self> {
1226 Ok(match data_type {
1227 DataType::Boolean => ScalarValue::Boolean(None),
1228 DataType::Float16 => ScalarValue::Float16(None),
1229 DataType::Float64 => ScalarValue::Float64(None),
1230 DataType::Float32 => ScalarValue::Float32(None),
1231 DataType::Int8 => ScalarValue::Int8(None),
1232 DataType::Int16 => ScalarValue::Int16(None),
1233 DataType::Int32 => ScalarValue::Int32(None),
1234 DataType::Int64 => ScalarValue::Int64(None),
1235 DataType::UInt8 => ScalarValue::UInt8(None),
1236 DataType::UInt16 => ScalarValue::UInt16(None),
1237 DataType::UInt32 => ScalarValue::UInt32(None),
1238 DataType::UInt64 => ScalarValue::UInt64(None),
1239 DataType::Decimal32(precision, scale) => {
1240 ScalarValue::Decimal32(None, *precision, *scale)
1241 }
1242 DataType::Decimal64(precision, scale) => {
1243 ScalarValue::Decimal64(None, *precision, *scale)
1244 }
1245 DataType::Decimal128(precision, scale) => {
1246 ScalarValue::Decimal128(None, *precision, *scale)
1247 }
1248 DataType::Decimal256(precision, scale) => {
1249 ScalarValue::Decimal256(None, *precision, *scale)
1250 }
1251 DataType::Utf8 => ScalarValue::Utf8(None),
1252 DataType::LargeUtf8 => ScalarValue::LargeUtf8(None),
1253 DataType::Utf8View => ScalarValue::Utf8View(None),
1254 DataType::Binary => ScalarValue::Binary(None),
1255 DataType::BinaryView => ScalarValue::BinaryView(None),
1256 DataType::FixedSizeBinary(len) => ScalarValue::FixedSizeBinary(*len, None),
1257 DataType::LargeBinary => ScalarValue::LargeBinary(None),
1258 DataType::Date32 => ScalarValue::Date32(None),
1259 DataType::Date64 => ScalarValue::Date64(None),
1260 DataType::Time32(TimeUnit::Second) => ScalarValue::Time32Second(None),
1261 DataType::Time32(TimeUnit::Millisecond) => {
1262 ScalarValue::Time32Millisecond(None)
1263 }
1264 DataType::Time64(TimeUnit::Microsecond) => {
1265 ScalarValue::Time64Microsecond(None)
1266 }
1267 DataType::Time64(TimeUnit::Nanosecond) => ScalarValue::Time64Nanosecond(None),
1268 DataType::Timestamp(TimeUnit::Second, tz_opt) => {
1269 ScalarValue::TimestampSecond(None, tz_opt.clone())
1270 }
1271 DataType::Timestamp(TimeUnit::Millisecond, tz_opt) => {
1272 ScalarValue::TimestampMillisecond(None, tz_opt.clone())
1273 }
1274 DataType::Timestamp(TimeUnit::Microsecond, tz_opt) => {
1275 ScalarValue::TimestampMicrosecond(None, tz_opt.clone())
1276 }
1277 DataType::Timestamp(TimeUnit::Nanosecond, tz_opt) => {
1278 ScalarValue::TimestampNanosecond(None, tz_opt.clone())
1279 }
1280 DataType::Interval(IntervalUnit::YearMonth) => {
1281 ScalarValue::IntervalYearMonth(None)
1282 }
1283 DataType::Interval(IntervalUnit::DayTime) => {
1284 ScalarValue::IntervalDayTime(None)
1285 }
1286 DataType::Interval(IntervalUnit::MonthDayNano) => {
1287 ScalarValue::IntervalMonthDayNano(None)
1288 }
1289 DataType::Duration(TimeUnit::Second) => ScalarValue::DurationSecond(None),
1290 DataType::Duration(TimeUnit::Millisecond) => {
1291 ScalarValue::DurationMillisecond(None)
1292 }
1293 DataType::Duration(TimeUnit::Microsecond) => {
1294 ScalarValue::DurationMicrosecond(None)
1295 }
1296 DataType::Duration(TimeUnit::Nanosecond) => {
1297 ScalarValue::DurationNanosecond(None)
1298 }
1299 DataType::Dictionary(index_type, value_type) => ScalarValue::Dictionary(
1300 index_type.clone(),
1301 Box::new(value_type.as_ref().try_into()?),
1302 ),
1303 DataType::RunEndEncoded(run_ends_field, value_field) => {
1304 ScalarValue::RunEndEncoded(
1305 Arc::clone(run_ends_field),
1306 Arc::clone(value_field),
1307 Box::new(value_field.data_type().try_into()?),
1308 )
1309 }
1310 DataType::List(field_ref) => ScalarValue::List(Arc::new(
1312 GenericListArray::new_null(Arc::clone(field_ref), 1),
1313 )),
1314 DataType::LargeList(field_ref) => ScalarValue::LargeList(Arc::new(
1316 GenericListArray::new_null(Arc::clone(field_ref), 1),
1317 )),
1318 DataType::FixedSizeList(field_ref, fixed_length) => {
1320 ScalarValue::FixedSizeList(Arc::new(FixedSizeListArray::new_null(
1321 Arc::clone(field_ref),
1322 *fixed_length,
1323 1,
1324 )))
1325 }
1326 DataType::ListView(field_ref) => ScalarValue::ListView(Arc::new(
1327 GenericListViewArray::new_null(Arc::clone(field_ref), 1),
1328 )),
1329 DataType::LargeListView(field_ref) => ScalarValue::LargeListView(Arc::new(
1330 GenericListViewArray::new_null(Arc::clone(field_ref), 1),
1331 )),
1332 DataType::Struct(fields) => ScalarValue::Struct(
1333 new_null_array(&DataType::Struct(fields.to_owned()), 1)
1334 .as_struct()
1335 .to_owned()
1336 .into(),
1337 ),
1338 DataType::Map(fields, sorted) => ScalarValue::Map(
1339 new_null_array(&DataType::Map(fields.to_owned(), sorted.to_owned()), 1)
1340 .as_map()
1341 .to_owned()
1342 .into(),
1343 ),
1344 DataType::Union(fields, mode) => {
1345 ScalarValue::Union(None, fields.clone(), *mode)
1346 }
1347 DataType::Null => ScalarValue::Null,
1348 _ => {
1349 return _not_impl_err!(
1350 "Can't create a null scalar from data_type \"{data_type}\""
1351 );
1352 }
1353 })
1354 }
1355
1356 pub fn new_utf8(val: impl Into<String>) -> Self {
1358 ScalarValue::from(val.into())
1359 }
1360
1361 pub fn new_utf8view(val: impl Into<String>) -> Self {
1363 ScalarValue::Utf8View(Some(val.into()))
1364 }
1365
1366 pub fn new_interval_ym(years: i32, months: i32) -> Self {
1369 let val = IntervalYearMonthType::make_value(years, months);
1370 ScalarValue::IntervalYearMonth(Some(val))
1371 }
1372
1373 pub fn new_interval_dt(days: i32, millis: i32) -> Self {
1376 let val = IntervalDayTimeType::make_value(days, millis);
1377 Self::IntervalDayTime(Some(val))
1378 }
1379
1380 pub fn new_interval_mdn(months: i32, days: i32, nanos: i64) -> Self {
1383 let val = IntervalMonthDayNanoType::make_value(months, days, nanos);
1384 ScalarValue::IntervalMonthDayNano(Some(val))
1385 }
1386
1387 pub fn new_timestamp<T: ArrowTimestampType>(
1390 value: Option<i64>,
1391 tz_opt: Option<Arc<str>>,
1392 ) -> Self {
1393 match T::UNIT {
1394 TimeUnit::Second => ScalarValue::TimestampSecond(value, tz_opt),
1395 TimeUnit::Millisecond => ScalarValue::TimestampMillisecond(value, tz_opt),
1396 TimeUnit::Microsecond => ScalarValue::TimestampMicrosecond(value, tz_opt),
1397 TimeUnit::Nanosecond => ScalarValue::TimestampNanosecond(value, tz_opt),
1398 }
1399 }
1400
1401 pub fn new_pi(datatype: &DataType) -> Result<ScalarValue> {
1403 match datatype {
1404 DataType::Float16 => Ok(ScalarValue::from(f16::PI)),
1405 DataType::Float32 => Ok(ScalarValue::from(std::f32::consts::PI)),
1406 DataType::Float64 => Ok(ScalarValue::from(std::f64::consts::PI)),
1407 _ => _internal_err!("PI is not supported for data type: {}", datatype),
1408 }
1409 }
1410
1411 pub fn new_pi_upper(datatype: &DataType) -> Result<ScalarValue> {
1413 match datatype {
1414 DataType::Float16 => Ok(ScalarValue::Float16(Some(consts::PI_UPPER_F16))),
1415 DataType::Float32 => Ok(ScalarValue::from(consts::PI_UPPER_F32)),
1416 DataType::Float64 => Ok(ScalarValue::from(consts::PI_UPPER_F64)),
1417 _ => {
1418 _internal_err!("PI_UPPER is not supported for data type: {}", datatype)
1419 }
1420 }
1421 }
1422
1423 pub fn new_negative_pi_lower(datatype: &DataType) -> Result<ScalarValue> {
1425 match datatype {
1426 DataType::Float16 => {
1427 Ok(ScalarValue::Float16(Some(consts::NEGATIVE_PI_LOWER_F16)))
1428 }
1429 DataType::Float32 => Ok(ScalarValue::from(consts::NEGATIVE_PI_LOWER_F32)),
1430 DataType::Float64 => Ok(ScalarValue::from(consts::NEGATIVE_PI_LOWER_F64)),
1431 _ => {
1432 _internal_err!("-PI_LOWER is not supported for data type: {}", datatype)
1433 }
1434 }
1435 }
1436
1437 pub fn new_frac_pi_2_upper(datatype: &DataType) -> Result<ScalarValue> {
1439 match datatype {
1440 DataType::Float16 => {
1441 Ok(ScalarValue::Float16(Some(consts::FRAC_PI_2_UPPER_F16)))
1442 }
1443 DataType::Float32 => Ok(ScalarValue::from(consts::FRAC_PI_2_UPPER_F32)),
1444 DataType::Float64 => Ok(ScalarValue::from(consts::FRAC_PI_2_UPPER_F64)),
1445 _ => {
1446 _internal_err!("PI_UPPER/2 is not supported for data type: {}", datatype)
1447 }
1448 }
1449 }
1450
1451 pub fn new_neg_frac_pi_2_lower(datatype: &DataType) -> Result<ScalarValue> {
1453 match datatype {
1454 DataType::Float16 => Ok(ScalarValue::Float16(Some(
1455 consts::NEGATIVE_FRAC_PI_2_LOWER_F16,
1456 ))),
1457 DataType::Float32 => {
1458 Ok(ScalarValue::from(consts::NEGATIVE_FRAC_PI_2_LOWER_F32))
1459 }
1460 DataType::Float64 => {
1461 Ok(ScalarValue::from(consts::NEGATIVE_FRAC_PI_2_LOWER_F64))
1462 }
1463 _ => {
1464 _internal_err!("-PI/2_LOWER is not supported for data type: {}", datatype)
1465 }
1466 }
1467 }
1468
1469 pub fn new_negative_pi(datatype: &DataType) -> Result<ScalarValue> {
1471 match datatype {
1472 DataType::Float16 => Ok(ScalarValue::from(-f16::PI)),
1473 DataType::Float32 => Ok(ScalarValue::from(-std::f32::consts::PI)),
1474 DataType::Float64 => Ok(ScalarValue::from(-std::f64::consts::PI)),
1475 _ => _internal_err!("-PI is not supported for data type: {}", datatype),
1476 }
1477 }
1478
1479 pub fn new_frac_pi_2(datatype: &DataType) -> Result<ScalarValue> {
1481 match datatype {
1482 DataType::Float16 => Ok(ScalarValue::from(f16::FRAC_PI_2)),
1483 DataType::Float32 => Ok(ScalarValue::from(std::f32::consts::FRAC_PI_2)),
1484 DataType::Float64 => Ok(ScalarValue::from(std::f64::consts::FRAC_PI_2)),
1485 _ => _internal_err!("PI/2 is not supported for data type: {}", datatype),
1486 }
1487 }
1488
1489 pub fn new_neg_frac_pi_2(datatype: &DataType) -> Result<ScalarValue> {
1491 match datatype {
1492 DataType::Float16 => Ok(ScalarValue::from(-f16::FRAC_PI_2)),
1493 DataType::Float32 => Ok(ScalarValue::from(-std::f32::consts::FRAC_PI_2)),
1494 DataType::Float64 => Ok(ScalarValue::from(-std::f64::consts::FRAC_PI_2)),
1495 _ => _internal_err!("-PI/2 is not supported for data type: {}", datatype),
1496 }
1497 }
1498
1499 pub fn new_infinity(datatype: &DataType) -> Result<ScalarValue> {
1501 match datatype {
1502 DataType::Float16 => Ok(ScalarValue::from(f16::INFINITY)),
1503 DataType::Float32 => Ok(ScalarValue::from(f32::INFINITY)),
1504 DataType::Float64 => Ok(ScalarValue::from(f64::INFINITY)),
1505 _ => {
1506 _internal_err!("Infinity is not supported for data type: {}", datatype)
1507 }
1508 }
1509 }
1510
1511 pub fn new_neg_infinity(datatype: &DataType) -> Result<ScalarValue> {
1513 match datatype {
1514 DataType::Float16 => Ok(ScalarValue::from(f16::NEG_INFINITY)),
1515 DataType::Float32 => Ok(ScalarValue::from(f32::NEG_INFINITY)),
1516 DataType::Float64 => Ok(ScalarValue::from(f64::NEG_INFINITY)),
1517 _ => {
1518 _internal_err!(
1519 "Negative Infinity is not supported for data type: {}",
1520 datatype
1521 )
1522 }
1523 }
1524 }
1525
1526 pub fn new_zero(datatype: &DataType) -> Result<ScalarValue> {
1528 Ok(match datatype {
1529 DataType::Boolean => ScalarValue::Boolean(Some(false)),
1530 DataType::Int8 => ScalarValue::Int8(Some(0)),
1531 DataType::Int16 => ScalarValue::Int16(Some(0)),
1532 DataType::Int32 => ScalarValue::Int32(Some(0)),
1533 DataType::Int64 => ScalarValue::Int64(Some(0)),
1534 DataType::UInt8 => ScalarValue::UInt8(Some(0)),
1535 DataType::UInt16 => ScalarValue::UInt16(Some(0)),
1536 DataType::UInt32 => ScalarValue::UInt32(Some(0)),
1537 DataType::UInt64 => ScalarValue::UInt64(Some(0)),
1538 DataType::Float16 => ScalarValue::Float16(Some(f16::ZERO)),
1539 DataType::Float32 => ScalarValue::Float32(Some(0.0)),
1540 DataType::Float64 => ScalarValue::Float64(Some(0.0)),
1541 DataType::Decimal32(precision, scale) => {
1542 ScalarValue::Decimal32(Some(0), *precision, *scale)
1543 }
1544 DataType::Decimal64(precision, scale) => {
1545 ScalarValue::Decimal64(Some(0), *precision, *scale)
1546 }
1547 DataType::Decimal128(precision, scale) => {
1548 ScalarValue::Decimal128(Some(0), *precision, *scale)
1549 }
1550 DataType::Decimal256(precision, scale) => {
1551 ScalarValue::Decimal256(Some(i256::ZERO), *precision, *scale)
1552 }
1553 DataType::Timestamp(TimeUnit::Second, tz) => {
1554 ScalarValue::TimestampSecond(Some(0), tz.clone())
1555 }
1556 DataType::Timestamp(TimeUnit::Millisecond, tz) => {
1557 ScalarValue::TimestampMillisecond(Some(0), tz.clone())
1558 }
1559 DataType::Timestamp(TimeUnit::Microsecond, tz) => {
1560 ScalarValue::TimestampMicrosecond(Some(0), tz.clone())
1561 }
1562 DataType::Timestamp(TimeUnit::Nanosecond, tz) => {
1563 ScalarValue::TimestampNanosecond(Some(0), tz.clone())
1564 }
1565 DataType::Time32(TimeUnit::Second) => ScalarValue::Time32Second(Some(0)),
1566 DataType::Time32(TimeUnit::Millisecond) => {
1567 ScalarValue::Time32Millisecond(Some(0))
1568 }
1569 DataType::Time64(TimeUnit::Microsecond) => {
1570 ScalarValue::Time64Microsecond(Some(0))
1571 }
1572 DataType::Time64(TimeUnit::Nanosecond) => {
1573 ScalarValue::Time64Nanosecond(Some(0))
1574 }
1575 DataType::Interval(IntervalUnit::YearMonth) => {
1576 ScalarValue::IntervalYearMonth(Some(0))
1577 }
1578 DataType::Interval(IntervalUnit::DayTime) => {
1579 ScalarValue::IntervalDayTime(Some(IntervalDayTime::ZERO))
1580 }
1581 DataType::Interval(IntervalUnit::MonthDayNano) => {
1582 ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano::ZERO))
1583 }
1584 DataType::Duration(TimeUnit::Second) => ScalarValue::DurationSecond(Some(0)),
1585 DataType::Duration(TimeUnit::Millisecond) => {
1586 ScalarValue::DurationMillisecond(Some(0))
1587 }
1588 DataType::Duration(TimeUnit::Microsecond) => {
1589 ScalarValue::DurationMicrosecond(Some(0))
1590 }
1591 DataType::Duration(TimeUnit::Nanosecond) => {
1592 ScalarValue::DurationNanosecond(Some(0))
1593 }
1594 DataType::Date32 => ScalarValue::Date32(Some(0)),
1595 DataType::Date64 => ScalarValue::Date64(Some(0)),
1596 _ => {
1597 return _not_impl_err!(
1598 "Can't create a zero scalar from data_type \"{datatype}\""
1599 );
1600 }
1601 })
1602 }
1603
1604 pub fn new_default(datatype: &DataType) -> Result<ScalarValue> {
1629 match datatype {
1630 DataType::Null => Ok(ScalarValue::Null),
1632
1633 DataType::Boolean
1635 | DataType::Int8
1636 | DataType::Int16
1637 | DataType::Int32
1638 | DataType::Int64
1639 | DataType::UInt8
1640 | DataType::UInt16
1641 | DataType::UInt32
1642 | DataType::UInt64
1643 | DataType::Float16
1644 | DataType::Float32
1645 | DataType::Float64
1646 | DataType::Decimal32(_, _)
1647 | DataType::Decimal64(_, _)
1648 | DataType::Decimal128(_, _)
1649 | DataType::Decimal256(_, _)
1650 | DataType::Timestamp(_, _)
1651 | DataType::Time32(_)
1652 | DataType::Time64(_)
1653 | DataType::Interval(_)
1654 | DataType::Duration(_)
1655 | DataType::Date32
1656 | DataType::Date64 => ScalarValue::new_zero(datatype),
1657
1658 DataType::Utf8 => Ok(ScalarValue::Utf8(Some("".to_string()))),
1660 DataType::LargeUtf8 => Ok(ScalarValue::LargeUtf8(Some("".to_string()))),
1661 DataType::Utf8View => Ok(ScalarValue::Utf8View(Some("".to_string()))),
1662
1663 DataType::Binary => Ok(ScalarValue::Binary(Some(vec![]))),
1665 DataType::LargeBinary => Ok(ScalarValue::LargeBinary(Some(vec![]))),
1666 DataType::BinaryView => Ok(ScalarValue::BinaryView(Some(vec![]))),
1667
1668 DataType::FixedSizeBinary(size) => Ok(ScalarValue::FixedSizeBinary(
1670 *size,
1671 Some(vec![0; *size as usize]),
1672 )),
1673
1674 DataType::List(field) => {
1676 let list =
1677 ScalarValue::new_list(&[], field.data_type(), field.is_nullable());
1678 Ok(ScalarValue::List(list))
1679 }
1680 DataType::FixedSizeList(field, _size) => {
1681 let empty_arr = new_empty_array(field.data_type());
1682 let values = Arc::new(
1683 SingleRowListArrayBuilder::new(empty_arr)
1684 .with_field(field)
1685 .build_fixed_size_list_array(0),
1686 );
1687 Ok(ScalarValue::FixedSizeList(values))
1688 }
1689 DataType::LargeList(field) => {
1690 let list = ScalarValue::new_large_list(&[], field.data_type());
1691 Ok(ScalarValue::LargeList(list))
1692 }
1693 DataType::ListView(field) => {
1694 let empty_arr = new_empty_array(field.data_type());
1695 let values = Arc::new(
1696 SingleRowListArrayBuilder::new(empty_arr)
1697 .with_field(field)
1698 .build_list_view_array(),
1699 );
1700 Ok(ScalarValue::ListView(values))
1701 }
1702 DataType::LargeListView(field) => {
1703 let empty_arr = new_empty_array(field.data_type());
1704 let values = Arc::new(
1705 SingleRowListArrayBuilder::new(empty_arr)
1706 .with_field(field)
1707 .build_large_list_view_array(),
1708 );
1709 Ok(ScalarValue::LargeListView(values))
1710 }
1711
1712 DataType::Struct(fields) => {
1714 let values = fields
1715 .iter()
1716 .map(|f| ScalarValue::new_default(f.data_type()))
1717 .collect::<Result<Vec<_>>>()?;
1718 Ok(ScalarValue::Struct(Arc::new(StructArray::new(
1719 fields.clone(),
1720 values
1721 .into_iter()
1722 .map(|v| v.to_array())
1723 .collect::<Result<_>>()?,
1724 None,
1725 ))))
1726 }
1727
1728 DataType::Dictionary(key_type, value_type) => Ok(ScalarValue::Dictionary(
1730 key_type.clone(),
1731 Box::new(ScalarValue::new_default(value_type)?),
1732 )),
1733
1734 DataType::RunEndEncoded(run_ends_field, value_field) => {
1735 Ok(ScalarValue::RunEndEncoded(
1736 Arc::clone(run_ends_field),
1737 Arc::clone(value_field),
1738 Box::new(ScalarValue::new_default(value_field.data_type())?),
1739 ))
1740 }
1741
1742 DataType::Map(field, _) => Ok(ScalarValue::Map(Arc::new(MapArray::from(
1744 ArrayData::new_empty(field.data_type()),
1745 )))),
1746
1747 DataType::Union(fields, mode) => {
1749 if let Some((type_id, field)) = fields.iter().next() {
1750 let default_value = ScalarValue::new_default(field.data_type())?;
1751 Ok(ScalarValue::Union(
1752 Some((type_id, Box::new(default_value))),
1753 fields.clone(),
1754 *mode,
1755 ))
1756 } else {
1757 _internal_err!("Union type must have at least one field")
1758 }
1759 }
1760 }
1761 }
1762
1763 pub fn new_one(datatype: &DataType) -> Result<ScalarValue> {
1765 Ok(match datatype {
1766 DataType::Int8 => ScalarValue::Int8(Some(1)),
1767 DataType::Int16 => ScalarValue::Int16(Some(1)),
1768 DataType::Int32 => ScalarValue::Int32(Some(1)),
1769 DataType::Int64 => ScalarValue::Int64(Some(1)),
1770 DataType::UInt8 => ScalarValue::UInt8(Some(1)),
1771 DataType::UInt16 => ScalarValue::UInt16(Some(1)),
1772 DataType::UInt32 => ScalarValue::UInt32(Some(1)),
1773 DataType::UInt64 => ScalarValue::UInt64(Some(1)),
1774 DataType::Float16 => ScalarValue::Float16(Some(f16::ONE)),
1775 DataType::Float32 => ScalarValue::Float32(Some(1.0)),
1776 DataType::Float64 => ScalarValue::Float64(Some(1.0)),
1777 DataType::Decimal32(precision, scale) => {
1778 Self::validate_decimal_or_internal_err::<Decimal32Type>(
1779 *precision, *scale,
1780 )?;
1781 assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
1782 match 10_i32.checked_pow(*scale as u32) {
1783 Some(value) => {
1784 ScalarValue::Decimal32(Some(value), *precision, *scale)
1785 }
1786 None => return _internal_err!("Unsupported scale {scale}"),
1787 }
1788 }
1789 DataType::Decimal64(precision, scale) => {
1790 Self::validate_decimal_or_internal_err::<Decimal64Type>(
1791 *precision, *scale,
1792 )?;
1793 assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
1794 match i64::from(10).checked_pow(*scale as u32) {
1795 Some(value) => {
1796 ScalarValue::Decimal64(Some(value), *precision, *scale)
1797 }
1798 None => return _internal_err!("Unsupported scale {scale}"),
1799 }
1800 }
1801 DataType::Decimal128(precision, scale) => {
1802 Self::validate_decimal_or_internal_err::<Decimal128Type>(
1803 *precision, *scale,
1804 )?;
1805 assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
1806 match i128::from(10).checked_pow(*scale as u32) {
1807 Some(value) => {
1808 ScalarValue::Decimal128(Some(value), *precision, *scale)
1809 }
1810 None => return _internal_err!("Unsupported scale {scale}"),
1811 }
1812 }
1813 DataType::Decimal256(precision, scale) => {
1814 Self::validate_decimal_or_internal_err::<Decimal256Type>(
1815 *precision, *scale,
1816 )?;
1817 assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
1818 match i256::from(10).checked_pow(*scale as u32) {
1819 Some(value) => {
1820 ScalarValue::Decimal256(Some(value), *precision, *scale)
1821 }
1822 None => return _internal_err!("Unsupported scale {scale}"),
1823 }
1824 }
1825 _ => {
1826 return _not_impl_err!(
1827 "Can't create an one scalar from data_type \"{datatype}\""
1828 );
1829 }
1830 })
1831 }
1832
1833 pub fn new_negative_one(datatype: &DataType) -> Result<ScalarValue> {
1835 Ok(match datatype {
1836 DataType::Int8 | DataType::UInt8 => ScalarValue::Int8(Some(-1)),
1837 DataType::Int16 | DataType::UInt16 => ScalarValue::Int16(Some(-1)),
1838 DataType::Int32 | DataType::UInt32 => ScalarValue::Int32(Some(-1)),
1839 DataType::Int64 | DataType::UInt64 => ScalarValue::Int64(Some(-1)),
1840 DataType::Float16 => ScalarValue::Float16(Some(f16::NEG_ONE)),
1841 DataType::Float32 => ScalarValue::Float32(Some(-1.0)),
1842 DataType::Float64 => ScalarValue::Float64(Some(-1.0)),
1843 DataType::Decimal32(precision, scale) => {
1844 Self::validate_decimal_or_internal_err::<Decimal32Type>(
1845 *precision, *scale,
1846 )?;
1847 assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
1848 match 10_i32.checked_pow(*scale as u32) {
1849 Some(value) => {
1850 ScalarValue::Decimal32(Some(-value), *precision, *scale)
1851 }
1852 None => return _internal_err!("Unsupported scale {scale}"),
1853 }
1854 }
1855 DataType::Decimal64(precision, scale) => {
1856 Self::validate_decimal_or_internal_err::<Decimal64Type>(
1857 *precision, *scale,
1858 )?;
1859 assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
1860 match i64::from(10).checked_pow(*scale as u32) {
1861 Some(value) => {
1862 ScalarValue::Decimal64(Some(-value), *precision, *scale)
1863 }
1864 None => return _internal_err!("Unsupported scale {scale}"),
1865 }
1866 }
1867 DataType::Decimal128(precision, scale) => {
1868 Self::validate_decimal_or_internal_err::<Decimal128Type>(
1869 *precision, *scale,
1870 )?;
1871 assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
1872 match i128::from(10).checked_pow(*scale as u32) {
1873 Some(value) => {
1874 ScalarValue::Decimal128(Some(-value), *precision, *scale)
1875 }
1876 None => return _internal_err!("Unsupported scale {scale}"),
1877 }
1878 }
1879 DataType::Decimal256(precision, scale) => {
1880 Self::validate_decimal_or_internal_err::<Decimal256Type>(
1881 *precision, *scale,
1882 )?;
1883 assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
1884 match i256::from(10).checked_pow(*scale as u32) {
1885 Some(value) => {
1886 ScalarValue::Decimal256(Some(-value), *precision, *scale)
1887 }
1888 None => return _internal_err!("Unsupported scale {scale}"),
1889 }
1890 }
1891 _ => {
1892 return _not_impl_err!(
1893 "Can't create a negative one scalar from data_type \"{datatype}\""
1894 );
1895 }
1896 })
1897 }
1898
1899 pub fn new_ten(datatype: &DataType) -> Result<ScalarValue> {
1900 Ok(match datatype {
1901 DataType::Int8 => ScalarValue::Int8(Some(10)),
1902 DataType::Int16 => ScalarValue::Int16(Some(10)),
1903 DataType::Int32 => ScalarValue::Int32(Some(10)),
1904 DataType::Int64 => ScalarValue::Int64(Some(10)),
1905 DataType::UInt8 => ScalarValue::UInt8(Some(10)),
1906 DataType::UInt16 => ScalarValue::UInt16(Some(10)),
1907 DataType::UInt32 => ScalarValue::UInt32(Some(10)),
1908 DataType::UInt64 => ScalarValue::UInt64(Some(10)),
1909 DataType::Float16 => ScalarValue::Float16(Some(f16::from_f32(10.0))),
1910 DataType::Float32 => ScalarValue::Float32(Some(10.0)),
1911 DataType::Float64 => ScalarValue::Float64(Some(10.0)),
1912 DataType::Decimal32(precision, scale) => {
1913 Self::validate_decimal_or_internal_err::<Decimal32Type>(
1914 *precision, *scale,
1915 )?;
1916 assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
1917 match 10_i32.checked_pow((*scale + 1) as u32) {
1918 Some(value) => {
1919 ScalarValue::Decimal32(Some(value), *precision, *scale)
1920 }
1921 None => return _internal_err!("Unsupported scale {scale}"),
1922 }
1923 }
1924 DataType::Decimal64(precision, scale) => {
1925 Self::validate_decimal_or_internal_err::<Decimal64Type>(
1926 *precision, *scale,
1927 )?;
1928 assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
1929 match i64::from(10).checked_pow((*scale + 1) as u32) {
1930 Some(value) => {
1931 ScalarValue::Decimal64(Some(value), *precision, *scale)
1932 }
1933 None => return _internal_err!("Unsupported scale {scale}"),
1934 }
1935 }
1936 DataType::Decimal128(precision, scale) => {
1937 Self::validate_decimal_or_internal_err::<Decimal128Type>(
1938 *precision, *scale,
1939 )?;
1940 assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
1941 match i128::from(10).checked_pow((*scale + 1) as u32) {
1942 Some(value) => {
1943 ScalarValue::Decimal128(Some(value), *precision, *scale)
1944 }
1945 None => return _internal_err!("Unsupported scale {scale}"),
1946 }
1947 }
1948 DataType::Decimal256(precision, scale) => {
1949 Self::validate_decimal_or_internal_err::<Decimal256Type>(
1950 *precision, *scale,
1951 )?;
1952 assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
1953 match i256::from(10).checked_pow((*scale + 1) as u32) {
1954 Some(value) => {
1955 ScalarValue::Decimal256(Some(value), *precision, *scale)
1956 }
1957 None => return _internal_err!("Unsupported scale {scale}"),
1958 }
1959 }
1960 _ => {
1961 return _not_impl_err!(
1962 "Can't create a ten scalar from data_type \"{datatype}\""
1963 );
1964 }
1965 })
1966 }
1967
1968 pub fn data_type(&self) -> DataType {
1970 match self {
1971 ScalarValue::Boolean(_) => DataType::Boolean,
1972 ScalarValue::UInt8(_) => DataType::UInt8,
1973 ScalarValue::UInt16(_) => DataType::UInt16,
1974 ScalarValue::UInt32(_) => DataType::UInt32,
1975 ScalarValue::UInt64(_) => DataType::UInt64,
1976 ScalarValue::Int8(_) => DataType::Int8,
1977 ScalarValue::Int16(_) => DataType::Int16,
1978 ScalarValue::Int32(_) => DataType::Int32,
1979 ScalarValue::Int64(_) => DataType::Int64,
1980 ScalarValue::Decimal32(_, precision, scale) => {
1981 DataType::Decimal32(*precision, *scale)
1982 }
1983 ScalarValue::Decimal64(_, precision, scale) => {
1984 DataType::Decimal64(*precision, *scale)
1985 }
1986 ScalarValue::Decimal128(_, precision, scale) => {
1987 DataType::Decimal128(*precision, *scale)
1988 }
1989 ScalarValue::Decimal256(_, precision, scale) => {
1990 DataType::Decimal256(*precision, *scale)
1991 }
1992 ScalarValue::TimestampSecond(_, tz_opt) => {
1993 DataType::Timestamp(TimeUnit::Second, tz_opt.clone())
1994 }
1995 ScalarValue::TimestampMillisecond(_, tz_opt) => {
1996 DataType::Timestamp(TimeUnit::Millisecond, tz_opt.clone())
1997 }
1998 ScalarValue::TimestampMicrosecond(_, tz_opt) => {
1999 DataType::Timestamp(TimeUnit::Microsecond, tz_opt.clone())
2000 }
2001 ScalarValue::TimestampNanosecond(_, tz_opt) => {
2002 DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone())
2003 }
2004 ScalarValue::Float16(_) => DataType::Float16,
2005 ScalarValue::Float32(_) => DataType::Float32,
2006 ScalarValue::Float64(_) => DataType::Float64,
2007 ScalarValue::Utf8(_) => DataType::Utf8,
2008 ScalarValue::LargeUtf8(_) => DataType::LargeUtf8,
2009 ScalarValue::Utf8View(_) => DataType::Utf8View,
2010 ScalarValue::Binary(_) => DataType::Binary,
2011 ScalarValue::BinaryView(_) => DataType::BinaryView,
2012 ScalarValue::FixedSizeBinary(sz, _) => DataType::FixedSizeBinary(*sz),
2013 ScalarValue::LargeBinary(_) => DataType::LargeBinary,
2014 ScalarValue::List(arr) => arr.data_type().to_owned(),
2015 ScalarValue::LargeList(arr) => arr.data_type().to_owned(),
2016 ScalarValue::FixedSizeList(arr) => arr.data_type().to_owned(),
2017 ScalarValue::ListView(arr) => arr.data_type().to_owned(),
2018 ScalarValue::LargeListView(arr) => arr.data_type().to_owned(),
2019 ScalarValue::Struct(arr) => arr.data_type().to_owned(),
2020 ScalarValue::Map(arr) => arr.data_type().to_owned(),
2021 ScalarValue::Date32(_) => DataType::Date32,
2022 ScalarValue::Date64(_) => DataType::Date64,
2023 ScalarValue::Time32Second(_) => DataType::Time32(TimeUnit::Second),
2024 ScalarValue::Time32Millisecond(_) => DataType::Time32(TimeUnit::Millisecond),
2025 ScalarValue::Time64Microsecond(_) => DataType::Time64(TimeUnit::Microsecond),
2026 ScalarValue::Time64Nanosecond(_) => DataType::Time64(TimeUnit::Nanosecond),
2027 ScalarValue::IntervalYearMonth(_) => {
2028 DataType::Interval(IntervalUnit::YearMonth)
2029 }
2030 ScalarValue::IntervalDayTime(_) => DataType::Interval(IntervalUnit::DayTime),
2031 ScalarValue::IntervalMonthDayNano(_) => {
2032 DataType::Interval(IntervalUnit::MonthDayNano)
2033 }
2034 ScalarValue::DurationSecond(_) => DataType::Duration(TimeUnit::Second),
2035 ScalarValue::DurationMillisecond(_) => {
2036 DataType::Duration(TimeUnit::Millisecond)
2037 }
2038 ScalarValue::DurationMicrosecond(_) => {
2039 DataType::Duration(TimeUnit::Microsecond)
2040 }
2041 ScalarValue::DurationNanosecond(_) => {
2042 DataType::Duration(TimeUnit::Nanosecond)
2043 }
2044 ScalarValue::Union(_, fields, mode) => DataType::Union(fields.clone(), *mode),
2045 ScalarValue::Dictionary(k, v) => {
2046 DataType::Dictionary(k.clone(), Box::new(v.data_type()))
2047 }
2048 ScalarValue::RunEndEncoded(run_ends_field, value_field, _) => {
2049 DataType::RunEndEncoded(
2050 Arc::clone(run_ends_field),
2051 Arc::clone(value_field),
2052 )
2053 }
2054 ScalarValue::Null => DataType::Null,
2055 }
2056 }
2057
2058 #[inline]
2059 fn can_use_direct_add(lhs: &ScalarValue, rhs: &ScalarValue) -> bool {
2060 matches!(
2061 (lhs, rhs),
2062 (ScalarValue::Int8(_), ScalarValue::Int8(_))
2063 | (ScalarValue::Int16(_), ScalarValue::Int16(_))
2064 | (ScalarValue::Int32(_), ScalarValue::Int32(_))
2065 | (ScalarValue::Int64(_), ScalarValue::Int64(_))
2066 | (ScalarValue::UInt8(_), ScalarValue::UInt8(_))
2067 | (ScalarValue::UInt16(_), ScalarValue::UInt16(_))
2068 | (ScalarValue::UInt32(_), ScalarValue::UInt32(_))
2069 | (ScalarValue::UInt64(_), ScalarValue::UInt64(_))
2070 | (ScalarValue::Float16(_), ScalarValue::Float16(_))
2071 | (ScalarValue::Float32(_), ScalarValue::Float32(_))
2072 | (ScalarValue::Float64(_), ScalarValue::Float64(_))
2073 | (
2074 ScalarValue::Decimal32(_, _, _),
2075 ScalarValue::Decimal32(_, _, _)
2076 )
2077 | (
2078 ScalarValue::Decimal64(_, _, _),
2079 ScalarValue::Decimal64(_, _, _)
2080 )
2081 | (
2082 ScalarValue::Decimal128(_, _, _),
2083 ScalarValue::Decimal128(_, _, _),
2084 )
2085 | (
2086 ScalarValue::Decimal256(_, _, _),
2087 ScalarValue::Decimal256(_, _, _),
2088 )
2089 )
2090 }
2091
2092 #[inline]
2093 fn add_optional<T: ArrowNativeTypeOp>(
2094 lhs: &mut Option<T>,
2095 rhs: Option<T>,
2096 checked: bool,
2097 ) -> Result<()> {
2098 match rhs {
2099 Some(rhs) => {
2100 if let Some(lhs) = lhs.as_mut() {
2101 *lhs = if checked {
2102 lhs.add_checked(rhs).map_err(|e| arrow_datafusion_err!(e))?
2103 } else {
2104 lhs.add_wrapping(rhs)
2105 };
2106 }
2107 }
2108 None => *lhs = None,
2109 }
2110 Ok(())
2111 }
2112
2113 #[inline]
2114 fn add_decimal_values<T: DecimalType>(
2115 lhs_value: &mut Option<T::Native>,
2116 lhs_precision: &mut u8,
2117 lhs_scale: &mut i8,
2118 rhs_value: Option<T::Native>,
2119 rhs_precision: u8,
2120 rhs_scale: i8,
2121 ) -> Result<()>
2122 where
2123 T::Native: ArrowNativeTypeOp,
2124 {
2125 Self::validate_decimal_or_internal_err::<T>(*lhs_precision, *lhs_scale)?;
2126 Self::validate_decimal_or_internal_err::<T>(rhs_precision, rhs_scale)?;
2127
2128 let result_scale = (*lhs_scale).max(rhs_scale);
2129 let lhs_precision_delta = i16::from(*lhs_precision) - i16::from(*lhs_scale);
2132 let rhs_precision_delta = i16::from(rhs_precision) - i16::from(rhs_scale);
2133 let result_precision =
2134 (i16::from(result_scale) + lhs_precision_delta.max(rhs_precision_delta) + 1)
2135 .min(i16::from(T::MAX_PRECISION)) as u8;
2136
2137 Self::validate_decimal_or_internal_err::<T>(result_precision, result_scale)?;
2138
2139 let lhs_mul = T::Native::usize_as(10)
2140 .pow_checked((result_scale - *lhs_scale) as u32)
2141 .map_err(|e| arrow_datafusion_err!(e))?;
2142 let rhs_mul = T::Native::usize_as(10)
2143 .pow_checked((result_scale - rhs_scale) as u32)
2144 .map_err(|e| arrow_datafusion_err!(e))?;
2145
2146 let result_value = match (*lhs_value, rhs_value) {
2147 (Some(lhs_value), Some(rhs_value)) => Some(
2148 lhs_value
2149 .mul_checked(lhs_mul)
2150 .and_then(|lhs| {
2151 rhs_value
2152 .mul_checked(rhs_mul)
2153 .and_then(|rhs| lhs.add_checked(rhs))
2154 })
2155 .map_err(|e| arrow_datafusion_err!(e))?,
2156 ),
2157 _ => None,
2158 };
2159
2160 *lhs_value = result_value;
2161 *lhs_precision = result_precision;
2162 *lhs_scale = result_scale;
2163
2164 Ok(())
2165 }
2166
2167 #[inline]
2168 fn try_add_in_place_impl(
2169 &mut self,
2170 other: &ScalarValue,
2171 checked: bool,
2172 ) -> Result<bool> {
2173 match (self, other) {
2174 (ScalarValue::Int8(lhs), ScalarValue::Int8(rhs)) => {
2175 Self::add_optional(lhs, *rhs, checked)?;
2176 }
2177 (ScalarValue::Int16(lhs), ScalarValue::Int16(rhs)) => {
2178 Self::add_optional(lhs, *rhs, checked)?;
2179 }
2180 (ScalarValue::Int32(lhs), ScalarValue::Int32(rhs)) => {
2181 Self::add_optional(lhs, *rhs, checked)?;
2182 }
2183 (ScalarValue::Int64(lhs), ScalarValue::Int64(rhs)) => {
2184 Self::add_optional(lhs, *rhs, checked)?;
2185 }
2186 (ScalarValue::UInt8(lhs), ScalarValue::UInt8(rhs)) => {
2187 Self::add_optional(lhs, *rhs, checked)?;
2188 }
2189 (ScalarValue::UInt16(lhs), ScalarValue::UInt16(rhs)) => {
2190 Self::add_optional(lhs, *rhs, checked)?;
2191 }
2192 (ScalarValue::UInt32(lhs), ScalarValue::UInt32(rhs)) => {
2193 Self::add_optional(lhs, *rhs, checked)?;
2194 }
2195 (ScalarValue::UInt64(lhs), ScalarValue::UInt64(rhs)) => {
2196 Self::add_optional(lhs, *rhs, checked)?;
2197 }
2198 (ScalarValue::Float16(lhs), ScalarValue::Float16(rhs)) => {
2199 Self::add_optional(lhs, *rhs, checked)?;
2200 }
2201 (ScalarValue::Float32(lhs), ScalarValue::Float32(rhs)) => {
2202 Self::add_optional(lhs, *rhs, checked)?;
2203 }
2204 (ScalarValue::Float64(lhs), ScalarValue::Float64(rhs)) => {
2205 Self::add_optional(lhs, *rhs, checked)?;
2206 }
2207 (
2208 ScalarValue::Decimal32(lhs, p, s),
2209 ScalarValue::Decimal32(rhs, rhs_p, rhs_s),
2210 ) => {
2211 Self::add_decimal_values::<Decimal32Type>(
2212 lhs, p, s, *rhs, *rhs_p, *rhs_s,
2213 )?;
2214 }
2215 (
2216 ScalarValue::Decimal64(lhs, p, s),
2217 ScalarValue::Decimal64(rhs, rhs_p, rhs_s),
2218 ) => {
2219 Self::add_decimal_values::<Decimal64Type>(
2220 lhs, p, s, *rhs, *rhs_p, *rhs_s,
2221 )?;
2222 }
2223 (
2224 ScalarValue::Decimal128(lhs, p, s),
2225 ScalarValue::Decimal128(rhs, rhs_p, rhs_s),
2226 ) => {
2227 Self::add_decimal_values::<Decimal128Type>(
2228 lhs, p, s, *rhs, *rhs_p, *rhs_s,
2229 )?;
2230 }
2231 (
2232 ScalarValue::Decimal256(lhs, p, s),
2233 ScalarValue::Decimal256(rhs, rhs_p, rhs_s),
2234 ) => {
2235 Self::add_decimal_values::<Decimal256Type>(
2236 lhs, p, s, *rhs, *rhs_p, *rhs_s,
2237 )?;
2238 }
2239 _ => return Ok(false),
2240 }
2241
2242 Ok(true)
2243 }
2244
2245 #[inline]
2246 pub(crate) fn try_add_wrapping_in_place(
2247 &mut self,
2248 other: &ScalarValue,
2249 ) -> Result<bool> {
2250 self.try_add_in_place_impl(other, false)
2251 }
2252
2253 #[inline]
2254 pub(crate) fn try_add_checked_in_place(
2255 &mut self,
2256 other: &ScalarValue,
2257 ) -> Result<bool> {
2258 self.try_add_in_place_impl(other, true)
2259 }
2260
2261 pub fn arithmetic_negate(&self) -> Result<Self> {
2263 fn neg_checked_with_ctx<T: ArrowNativeTypeOp>(
2264 v: T,
2265 ctx: impl Fn() -> String,
2266 ) -> Result<T> {
2267 v.neg_checked()
2268 .map_err(|e| arrow_datafusion_err!(e).context(ctx()))
2269 }
2270 match self {
2271 ScalarValue::Int8(None)
2272 | ScalarValue::Int16(None)
2273 | ScalarValue::Int32(None)
2274 | ScalarValue::Int64(None)
2275 | ScalarValue::Float16(None)
2276 | ScalarValue::Float32(None)
2277 | ScalarValue::Float64(None) => Ok(self.clone()),
2278 ScalarValue::Float16(Some(v)) => Ok(ScalarValue::Float16(Some(-v))),
2279 ScalarValue::Float64(Some(v)) => Ok(ScalarValue::Float64(Some(-v))),
2280 ScalarValue::Float32(Some(v)) => Ok(ScalarValue::Float32(Some(-v))),
2281 ScalarValue::Int8(Some(v)) => Ok(ScalarValue::Int8(Some(v.neg_checked()?))),
2282 ScalarValue::Int16(Some(v)) => Ok(ScalarValue::Int16(Some(v.neg_checked()?))),
2283 ScalarValue::Int32(Some(v)) => Ok(ScalarValue::Int32(Some(v.neg_checked()?))),
2284 ScalarValue::Int64(Some(v)) => Ok(ScalarValue::Int64(Some(v.neg_checked()?))),
2285 ScalarValue::IntervalYearMonth(Some(v)) => Ok(
2286 ScalarValue::IntervalYearMonth(Some(neg_checked_with_ctx(*v, || {
2287 format!("In negation of IntervalYearMonth({v})")
2288 })?)),
2289 ),
2290 ScalarValue::IntervalDayTime(Some(v)) => {
2291 let (days, ms) = IntervalDayTimeType::to_parts(*v);
2292 let val = IntervalDayTimeType::make_value(
2293 neg_checked_with_ctx(days, || {
2294 format!("In negation of days {days} in IntervalDayTime")
2295 })?,
2296 neg_checked_with_ctx(ms, || {
2297 format!("In negation of milliseconds {ms} in IntervalDayTime")
2298 })?,
2299 );
2300 Ok(ScalarValue::IntervalDayTime(Some(val)))
2301 }
2302 ScalarValue::IntervalMonthDayNano(Some(v)) => {
2303 let (months, days, nanos) = IntervalMonthDayNanoType::to_parts(*v);
2304 let val = IntervalMonthDayNanoType::make_value(
2305 neg_checked_with_ctx(months, || {
2306 format!("In negation of months {months} of IntervalMonthDayNano")
2307 })?,
2308 neg_checked_with_ctx(days, || {
2309 format!("In negation of days {days} of IntervalMonthDayNano")
2310 })?,
2311 neg_checked_with_ctx(nanos, || {
2312 format!("In negation of nanos {nanos} of IntervalMonthDayNano")
2313 })?,
2314 );
2315 Ok(ScalarValue::IntervalMonthDayNano(Some(val)))
2316 }
2317 ScalarValue::Decimal32(Some(v), precision, scale) => {
2318 Ok(ScalarValue::Decimal32(
2319 Some(neg_checked_with_ctx(*v, || {
2320 format!("In negation of Decimal32({v}, {precision}, {scale})")
2321 })?),
2322 *precision,
2323 *scale,
2324 ))
2325 }
2326 ScalarValue::Decimal64(Some(v), precision, scale) => {
2327 Ok(ScalarValue::Decimal64(
2328 Some(neg_checked_with_ctx(*v, || {
2329 format!("In negation of Decimal64({v}, {precision}, {scale})")
2330 })?),
2331 *precision,
2332 *scale,
2333 ))
2334 }
2335 ScalarValue::Decimal128(Some(v), precision, scale) => {
2336 Ok(ScalarValue::Decimal128(
2337 Some(neg_checked_with_ctx(*v, || {
2338 format!("In negation of Decimal128({v}, {precision}, {scale})")
2339 })?),
2340 *precision,
2341 *scale,
2342 ))
2343 }
2344 ScalarValue::Decimal256(Some(v), precision, scale) => {
2345 Ok(ScalarValue::Decimal256(
2346 Some(neg_checked_with_ctx(*v, || {
2347 format!("In negation of Decimal256({v}, {precision}, {scale})")
2348 })?),
2349 *precision,
2350 *scale,
2351 ))
2352 }
2353 ScalarValue::TimestampSecond(Some(v), tz) => {
2354 Ok(ScalarValue::TimestampSecond(
2355 Some(neg_checked_with_ctx(*v, || {
2356 format!("In negation of TimestampSecond({v})")
2357 })?),
2358 tz.clone(),
2359 ))
2360 }
2361 ScalarValue::TimestampNanosecond(Some(v), tz) => {
2362 Ok(ScalarValue::TimestampNanosecond(
2363 Some(neg_checked_with_ctx(*v, || {
2364 format!("In negation of TimestampNanoSecond({v})")
2365 })?),
2366 tz.clone(),
2367 ))
2368 }
2369 ScalarValue::TimestampMicrosecond(Some(v), tz) => {
2370 Ok(ScalarValue::TimestampMicrosecond(
2371 Some(neg_checked_with_ctx(*v, || {
2372 format!("In negation of TimestampMicroSecond({v})")
2373 })?),
2374 tz.clone(),
2375 ))
2376 }
2377 ScalarValue::TimestampMillisecond(Some(v), tz) => {
2378 Ok(ScalarValue::TimestampMillisecond(
2379 Some(neg_checked_with_ctx(*v, || {
2380 format!("In negation of TimestampMilliSecond({v})")
2381 })?),
2382 tz.clone(),
2383 ))
2384 }
2385 value => _internal_err!(
2386 "Can not run arithmetic negative on scalar value {value:?}"
2387 ),
2388 }
2389 }
2390
2391 pub fn add<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
2396 let other = other.borrow();
2397 if Self::can_use_direct_add(self, other) {
2398 let mut result = self.clone();
2399 if result.try_add_wrapping_in_place(other)? {
2400 return Ok(result);
2401 }
2402 debug_assert!(false, "fast-path eligibility drifted from implementation");
2403 }
2404
2405 let r = add_wrapping(&self.to_scalar()?, &other.to_scalar()?)?;
2406 Self::try_from_array(r.as_ref(), 0)
2407 }
2408
2409 pub fn add_checked<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
2414 let other = other.borrow();
2415 if Self::can_use_direct_add(self, other) {
2416 let mut result = self.clone();
2417 if result.try_add_checked_in_place(other)? {
2418 return Ok(result);
2419 }
2420 debug_assert!(false, "fast-path eligibility drifted from implementation");
2421 }
2422
2423 let r = add(&self.to_scalar()?, &other.to_scalar()?)?;
2424 Self::try_from_array(r.as_ref(), 0)
2425 }
2426
2427 pub fn sub<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
2432 let r = sub_wrapping(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
2433 Self::try_from_array(r.as_ref(), 0)
2434 }
2435
2436 pub fn sub_checked<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
2441 let r = sub(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
2442 Self::try_from_array(r.as_ref(), 0)
2443 }
2444
2445 pub fn mul<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
2450 let r = mul_wrapping(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
2451 Self::try_from_array(r.as_ref(), 0)
2452 }
2453
2454 pub fn mul_checked<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
2459 let r = mul(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
2460 Self::try_from_array(r.as_ref(), 0)
2461 }
2462
2463 pub fn div<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
2471 let r = div(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
2472 Self::try_from_array(r.as_ref(), 0)
2473 }
2474
2475 pub fn rem<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
2483 let r = rem(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
2484 Self::try_from_array(r.as_ref(), 0)
2485 }
2486
2487 pub fn is_unsigned(&self) -> bool {
2488 matches!(
2489 self,
2490 ScalarValue::UInt8(_)
2491 | ScalarValue::UInt16(_)
2492 | ScalarValue::UInt32(_)
2493 | ScalarValue::UInt64(_)
2494 )
2495 }
2496
2497 pub fn is_null(&self) -> bool {
2499 match self {
2500 ScalarValue::Boolean(v) => v.is_none(),
2501 ScalarValue::Null => true,
2502 ScalarValue::Float16(v) => v.is_none(),
2503 ScalarValue::Float32(v) => v.is_none(),
2504 ScalarValue::Float64(v) => v.is_none(),
2505 ScalarValue::Decimal32(v, _, _) => v.is_none(),
2506 ScalarValue::Decimal64(v, _, _) => v.is_none(),
2507 ScalarValue::Decimal128(v, _, _) => v.is_none(),
2508 ScalarValue::Decimal256(v, _, _) => v.is_none(),
2509 ScalarValue::Int8(v) => v.is_none(),
2510 ScalarValue::Int16(v) => v.is_none(),
2511 ScalarValue::Int32(v) => v.is_none(),
2512 ScalarValue::Int64(v) => v.is_none(),
2513 ScalarValue::UInt8(v) => v.is_none(),
2514 ScalarValue::UInt16(v) => v.is_none(),
2515 ScalarValue::UInt32(v) => v.is_none(),
2516 ScalarValue::UInt64(v) => v.is_none(),
2517 ScalarValue::Utf8(v)
2518 | ScalarValue::Utf8View(v)
2519 | ScalarValue::LargeUtf8(v) => v.is_none(),
2520 ScalarValue::Binary(v)
2521 | ScalarValue::BinaryView(v)
2522 | ScalarValue::FixedSizeBinary(_, v)
2523 | ScalarValue::LargeBinary(v) => v.is_none(),
2524 ScalarValue::List(arr) => arr.len() == arr.null_count(),
2527 ScalarValue::LargeList(arr) => arr.len() == arr.null_count(),
2528 ScalarValue::FixedSizeList(arr) => arr.len() == arr.null_count(),
2529 ScalarValue::ListView(arr) => arr.len() == arr.null_count(),
2530 ScalarValue::LargeListView(arr) => arr.len() == arr.null_count(),
2531 ScalarValue::Struct(arr) => arr.len() == arr.null_count(),
2532 ScalarValue::Map(arr) => arr.len() == arr.null_count(),
2533 ScalarValue::Date32(v) => v.is_none(),
2534 ScalarValue::Date64(v) => v.is_none(),
2535 ScalarValue::Time32Second(v) => v.is_none(),
2536 ScalarValue::Time32Millisecond(v) => v.is_none(),
2537 ScalarValue::Time64Microsecond(v) => v.is_none(),
2538 ScalarValue::Time64Nanosecond(v) => v.is_none(),
2539 ScalarValue::TimestampSecond(v, _) => v.is_none(),
2540 ScalarValue::TimestampMillisecond(v, _) => v.is_none(),
2541 ScalarValue::TimestampMicrosecond(v, _) => v.is_none(),
2542 ScalarValue::TimestampNanosecond(v, _) => v.is_none(),
2543 ScalarValue::IntervalYearMonth(v) => v.is_none(),
2544 ScalarValue::IntervalDayTime(v) => v.is_none(),
2545 ScalarValue::IntervalMonthDayNano(v) => v.is_none(),
2546 ScalarValue::DurationSecond(v) => v.is_none(),
2547 ScalarValue::DurationMillisecond(v) => v.is_none(),
2548 ScalarValue::DurationMicrosecond(v) => v.is_none(),
2549 ScalarValue::DurationNanosecond(v) => v.is_none(),
2550 ScalarValue::Union(v, _, _) => match v {
2551 Some((_, s)) => s.is_null(),
2552 None => true,
2553 },
2554 ScalarValue::Dictionary(_, v) => v.is_null(),
2555 ScalarValue::RunEndEncoded(_, _, v) => v.is_null(),
2556 }
2557 }
2558
2559 pub fn distance(&self, other: &ScalarValue) -> Option<usize> {
2567 match (self, other) {
2568 (Self::Int8(Some(l)), Self::Int8(Some(r))) => Some(l.abs_diff(*r) as _),
2569 (Self::Int16(Some(l)), Self::Int16(Some(r))) => Some(l.abs_diff(*r) as _),
2570 (Self::Int32(Some(l)), Self::Int32(Some(r))) => Some(l.abs_diff(*r) as _),
2571 (Self::Int64(Some(l)), Self::Int64(Some(r))) => Some(l.abs_diff(*r) as _),
2572 (Self::UInt8(Some(l)), Self::UInt8(Some(r))) => Some(l.abs_diff(*r) as _),
2573 (Self::UInt16(Some(l)), Self::UInt16(Some(r))) => Some(l.abs_diff(*r) as _),
2574 (Self::UInt32(Some(l)), Self::UInt32(Some(r))) => Some(l.abs_diff(*r) as _),
2575 (Self::UInt64(Some(l)), Self::UInt64(Some(r))) => Some(l.abs_diff(*r) as _),
2576 (Self::Float16(Some(l)), Self::Float16(Some(r))) => {
2578 Some((f16::to_f32(*l) - f16::to_f32(*r)).abs().round() as _)
2579 }
2580 (Self::Float32(Some(l)), Self::Float32(Some(r))) => {
2581 Some((l - r).abs().round() as _)
2582 }
2583 (Self::Float64(Some(l)), Self::Float64(Some(r))) => {
2584 Some((l - r).abs().round() as _)
2585 }
2586 (Self::Date32(Some(l)), Self::Date32(Some(r))) => Some(l.abs_diff(*r) as _),
2587 (Self::Date64(Some(l)), Self::Date64(Some(r))) => Some(l.abs_diff(*r) as _),
2588 (Self::TimestampSecond(Some(l), _), Self::TimestampSecond(Some(r), _)) => {
2591 Some(l.abs_diff(*r) as _)
2592 }
2593 (
2594 Self::TimestampMillisecond(Some(l), _),
2595 Self::TimestampMillisecond(Some(r), _),
2596 ) => Some(l.abs_diff(*r) as _),
2597 (
2598 Self::TimestampMicrosecond(Some(l), _),
2599 Self::TimestampMicrosecond(Some(r), _),
2600 ) => Some(l.abs_diff(*r) as _),
2601 (
2602 Self::TimestampNanosecond(Some(l), _),
2603 Self::TimestampNanosecond(Some(r), _),
2604 ) => Some(l.abs_diff(*r) as _),
2605 (
2606 Self::Decimal128(Some(l), lprecision, lscale),
2607 Self::Decimal128(Some(r), rprecision, rscale),
2608 ) => {
2609 if lprecision == rprecision && lscale == rscale {
2610 l.checked_sub(*r)?.checked_abs()?.to_usize()
2611 } else {
2612 None
2613 }
2614 }
2615 (
2616 Self::Decimal256(Some(l), lprecision, lscale),
2617 Self::Decimal256(Some(r), rprecision, rscale),
2618 ) => {
2619 if lprecision == rprecision && lscale == rscale {
2620 l.checked_sub(*r)?.checked_abs()?.to_usize()
2621 } else {
2622 None
2623 }
2624 }
2625 _ => None,
2626 }
2627 }
2628
2629 pub fn to_array(&self) -> Result<ArrayRef> {
2635 self.to_array_of_size(1)
2636 }
2637
2638 pub fn to_scalar(&self) -> Result<Scalar<ArrayRef>> {
2664 Ok(Scalar::new(self.to_array_of_size(1)?))
2665 }
2666
2667 pub fn iter_to_array(
2694 scalars: impl IntoIterator<Item = ScalarValue>,
2695 ) -> Result<ArrayRef> {
2696 let mut scalars = scalars.into_iter().peekable();
2697
2698 let data_type = match scalars.peek() {
2700 None => {
2701 return _exec_err!("Empty iterator passed to ScalarValue::iter_to_array");
2702 }
2703 Some(sv) => sv.data_type(),
2704 };
2705
2706 macro_rules! build_array_primitive {
2709 ($ARRAY_TY:ident, $SCALAR_TY:ident) => {{
2710 {
2711 let array = scalars
2712 .map(|sv| {
2713 if let ScalarValue::$SCALAR_TY(v) = sv {
2714 Ok(v)
2715 } else {
2716 _exec_err!(
2717 "Inconsistent types in ScalarValue::iter_to_array. \
2718 Expected {:?}, got {:?}",
2719 data_type,
2720 sv
2721 )
2722 }
2723 })
2724 .collect::<Result<$ARRAY_TY>>()?;
2725 Arc::new(array)
2726 }
2727 }};
2728 }
2729
2730 macro_rules! build_array_primitive_tz {
2731 ($ARRAY_TY:ident, $SCALAR_TY:ident, $TZ:expr) => {{
2732 {
2733 let array = scalars
2734 .map(|sv| {
2735 if let ScalarValue::$SCALAR_TY(v, _) = sv {
2736 Ok(v)
2737 } else {
2738 _exec_err!(
2739 "Inconsistent types in ScalarValue::iter_to_array. \
2740 Expected {:?}, got {:?}",
2741 data_type,
2742 sv
2743 )
2744 }
2745 })
2746 .collect::<Result<$ARRAY_TY>>()?;
2747 Arc::new(array.with_timezone_opt($TZ.clone()))
2748 }
2749 }};
2750 }
2751
2752 macro_rules! build_array_string {
2755 ($ARRAY_TY:ident, $SCALAR_TY:ident) => {{
2756 {
2757 let array = scalars
2758 .map(|sv| {
2759 if let ScalarValue::$SCALAR_TY(v) = sv {
2760 Ok(v)
2761 } else {
2762 _exec_err!(
2763 "Inconsistent types in ScalarValue::iter_to_array. \
2764 Expected {:?}, got {:?}",
2765 data_type,
2766 sv
2767 )
2768 }
2769 })
2770 .collect::<Result<$ARRAY_TY>>()?;
2771 Arc::new(array)
2772 }
2773 }};
2774 }
2775
2776 let array: ArrayRef = match &data_type {
2777 DataType::Decimal32(precision, scale) => {
2778 let decimal_array =
2779 ScalarValue::iter_to_decimal32_array(scalars, *precision, *scale)?;
2780 Arc::new(decimal_array)
2781 }
2782 DataType::Decimal64(precision, scale) => {
2783 let decimal_array =
2784 ScalarValue::iter_to_decimal64_array(scalars, *precision, *scale)?;
2785 Arc::new(decimal_array)
2786 }
2787 DataType::Decimal128(precision, scale) => {
2788 let decimal_array =
2789 ScalarValue::iter_to_decimal128_array(scalars, *precision, *scale)?;
2790 Arc::new(decimal_array)
2791 }
2792 DataType::Decimal256(precision, scale) => {
2793 let decimal_array =
2794 ScalarValue::iter_to_decimal256_array(scalars, *precision, *scale)?;
2795 Arc::new(decimal_array)
2796 }
2797 DataType::Null => ScalarValue::iter_to_null_array(scalars)?,
2798 DataType::Boolean => build_array_primitive!(BooleanArray, Boolean),
2799 DataType::Float16 => build_array_primitive!(Float16Array, Float16),
2800 DataType::Float32 => build_array_primitive!(Float32Array, Float32),
2801 DataType::Float64 => build_array_primitive!(Float64Array, Float64),
2802 DataType::Int8 => build_array_primitive!(Int8Array, Int8),
2803 DataType::Int16 => build_array_primitive!(Int16Array, Int16),
2804 DataType::Int32 => build_array_primitive!(Int32Array, Int32),
2805 DataType::Int64 => build_array_primitive!(Int64Array, Int64),
2806 DataType::UInt8 => build_array_primitive!(UInt8Array, UInt8),
2807 DataType::UInt16 => build_array_primitive!(UInt16Array, UInt16),
2808 DataType::UInt32 => build_array_primitive!(UInt32Array, UInt32),
2809 DataType::UInt64 => build_array_primitive!(UInt64Array, UInt64),
2810 DataType::Utf8View => build_array_string!(StringViewArray, Utf8View),
2811 DataType::Utf8 => build_array_string!(StringArray, Utf8),
2812 DataType::LargeUtf8 => build_array_string!(LargeStringArray, LargeUtf8),
2813 DataType::BinaryView => build_array_string!(BinaryViewArray, BinaryView),
2814 DataType::Binary => build_array_string!(BinaryArray, Binary),
2815 DataType::LargeBinary => build_array_string!(LargeBinaryArray, LargeBinary),
2816 DataType::Date32 => build_array_primitive!(Date32Array, Date32),
2817 DataType::Date64 => build_array_primitive!(Date64Array, Date64),
2818 DataType::Time32(TimeUnit::Second) => {
2819 build_array_primitive!(Time32SecondArray, Time32Second)
2820 }
2821 DataType::Time32(TimeUnit::Millisecond) => {
2822 build_array_primitive!(Time32MillisecondArray, Time32Millisecond)
2823 }
2824 DataType::Time64(TimeUnit::Microsecond) => {
2825 build_array_primitive!(Time64MicrosecondArray, Time64Microsecond)
2826 }
2827 DataType::Time64(TimeUnit::Nanosecond) => {
2828 build_array_primitive!(Time64NanosecondArray, Time64Nanosecond)
2829 }
2830 DataType::Timestamp(TimeUnit::Second, tz) => {
2831 build_array_primitive_tz!(TimestampSecondArray, TimestampSecond, tz)
2832 }
2833 DataType::Timestamp(TimeUnit::Millisecond, tz) => {
2834 build_array_primitive_tz!(
2835 TimestampMillisecondArray,
2836 TimestampMillisecond,
2837 tz
2838 )
2839 }
2840 DataType::Timestamp(TimeUnit::Microsecond, tz) => {
2841 build_array_primitive_tz!(
2842 TimestampMicrosecondArray,
2843 TimestampMicrosecond,
2844 tz
2845 )
2846 }
2847 DataType::Timestamp(TimeUnit::Nanosecond, tz) => {
2848 build_array_primitive_tz!(
2849 TimestampNanosecondArray,
2850 TimestampNanosecond,
2851 tz
2852 )
2853 }
2854 DataType::Duration(TimeUnit::Second) => {
2855 build_array_primitive!(DurationSecondArray, DurationSecond)
2856 }
2857 DataType::Duration(TimeUnit::Millisecond) => {
2858 build_array_primitive!(DurationMillisecondArray, DurationMillisecond)
2859 }
2860 DataType::Duration(TimeUnit::Microsecond) => {
2861 build_array_primitive!(DurationMicrosecondArray, DurationMicrosecond)
2862 }
2863 DataType::Duration(TimeUnit::Nanosecond) => {
2864 build_array_primitive!(DurationNanosecondArray, DurationNanosecond)
2865 }
2866 DataType::Interval(IntervalUnit::DayTime) => {
2867 build_array_primitive!(IntervalDayTimeArray, IntervalDayTime)
2868 }
2869 DataType::Interval(IntervalUnit::YearMonth) => {
2870 build_array_primitive!(IntervalYearMonthArray, IntervalYearMonth)
2871 }
2872 DataType::Interval(IntervalUnit::MonthDayNano) => {
2873 build_array_primitive!(IntervalMonthDayNanoArray, IntervalMonthDayNano)
2874 }
2875 DataType::FixedSizeList(_, _) => {
2876 let mut arrays =
2880 scalars.map(|s| s.to_array()).collect::<Result<Vec<_>>>()?;
2881 let first_non_null_data_type = arrays
2882 .iter()
2883 .find(|sv| !sv.is_null(0))
2884 .map(|sv| sv.data_type().to_owned());
2885 if let Some(DataType::FixedSizeList(f, l)) = first_non_null_data_type {
2886 for array in arrays.iter_mut() {
2887 if array.is_null(0) {
2888 *array = Arc::new(FixedSizeListArray::new_null(
2889 Arc::clone(&f),
2890 l,
2891 1,
2892 ));
2893 }
2894 }
2895 }
2896 let arrays = arrays.iter().map(|a| a.as_ref()).collect::<Vec<_>>();
2897 arrow::compute::concat(arrays.as_slice())?
2898 }
2899 DataType::List(_)
2900 | DataType::LargeList(_)
2901 | DataType::ListView(_)
2902 | DataType::LargeListView(_)
2903 | DataType::Map(_, _)
2904 | DataType::Struct(_)
2905 | DataType::Union(_, _) => {
2906 let arrays = scalars.map(|s| s.to_array()).collect::<Result<Vec<_>>>()?;
2907 let arrays = arrays.iter().map(|a| a.as_ref()).collect::<Vec<_>>();
2908 arrow::compute::concat(arrays.as_slice())?
2909 }
2910 DataType::Dictionary(key_type, value_type) => {
2911 let value_scalars = scalars
2913 .map(|scalar| match scalar {
2914 ScalarValue::Dictionary(inner_key_type, scalar) => {
2915 if &inner_key_type == key_type {
2916 Ok(*scalar)
2917 } else {
2918 _exec_err!("Expected inner key type of {key_type} but found: {inner_key_type}, value was ({scalar:?})")
2919 }
2920 }
2921 _ => {
2922 _exec_err!(
2923 "Expected scalar of type {value_type} but found: {scalar} {scalar:?}"
2924 )
2925 }
2926 })
2927 .collect::<Result<Vec<_>>>()?;
2928
2929 let values = Self::iter_to_array(value_scalars)?;
2930 assert_eq!(values.data_type(), value_type.as_ref());
2931
2932 match key_type.as_ref() {
2933 DataType::Int8 => dict_from_values::<Int8Type>(values)?,
2934 DataType::Int16 => dict_from_values::<Int16Type>(values)?,
2935 DataType::Int32 => dict_from_values::<Int32Type>(values)?,
2936 DataType::Int64 => dict_from_values::<Int64Type>(values)?,
2937 DataType::UInt8 => dict_from_values::<UInt8Type>(values)?,
2938 DataType::UInt16 => dict_from_values::<UInt16Type>(values)?,
2939 DataType::UInt32 => dict_from_values::<UInt32Type>(values)?,
2940 DataType::UInt64 => dict_from_values::<UInt64Type>(values)?,
2941 _ => unreachable!("Invalid dictionary keys type: {}", key_type),
2942 }
2943 }
2944 DataType::RunEndEncoded(run_ends_field, value_field) => {
2945 fn make_run_array<R: RunEndIndexType>(
2946 scalars: impl IntoIterator<Item = ScalarValue>,
2947 run_ends_field: &FieldRef,
2948 values_field: &FieldRef,
2949 ) -> Result<ArrayRef> {
2950 let mut scalars = scalars.into_iter();
2951
2952 let mut run_ends = vec![];
2953 let mut value_scalars = vec![];
2954
2955 let mut len = R::Native::ONE;
2956 let mut current =
2957 if let Some(ScalarValue::RunEndEncoded(_, _, scalar)) =
2958 scalars.next()
2959 {
2960 *scalar
2961 } else {
2962 unreachable!()
2965 };
2966 for scalar in scalars {
2967 let scalar = match scalar {
2968 ScalarValue::RunEndEncoded(
2969 inner_run_ends_field,
2970 inner_value_field,
2971 scalar,
2972 ) if &inner_run_ends_field == run_ends_field
2973 && &inner_value_field == values_field =>
2974 {
2975 *scalar
2976 }
2977 _ => {
2978 return _exec_err!(
2979 "Expected RunEndEncoded scalar with run-ends field {run_ends_field} but got: {scalar:?}"
2980 );
2981 }
2982 };
2983
2984 if scalar != current {
2986 run_ends.push(len);
2987 value_scalars.push(current);
2988 current = scalar;
2989 }
2990
2991 len = len.add_checked(R::Native::ONE).map_err(|_| {
2992 DataFusionError::Execution(format!(
2993 "Cannot construct RunArray: Overflows run-ends type {}",
2994 run_ends_field.data_type()
2995 ))
2996 })?;
2997 }
2998
2999 run_ends.push(len);
3000 value_scalars.push(current);
3001
3002 let run_ends = PrimitiveArray::<R>::from_iter_values(run_ends);
3003 let values = ScalarValue::iter_to_array(value_scalars)?;
3004
3005 let dt = DataType::RunEndEncoded(
3007 Arc::clone(run_ends_field),
3008 Arc::clone(values_field),
3009 );
3010 let builder = ArrayDataBuilder::new(dt)
3011 .len(RunArray::logical_len(&run_ends))
3012 .add_child_data(run_ends.to_data())
3013 .add_child_data(values.to_data());
3014 let run_array = RunArray::<R>::from(builder.build()?);
3015
3016 Ok(Arc::new(run_array))
3017 }
3018
3019 match run_ends_field.data_type() {
3020 DataType::Int16 => {
3021 make_run_array::<Int16Type>(scalars, run_ends_field, value_field)?
3022 }
3023 DataType::Int32 => {
3024 make_run_array::<Int32Type>(scalars, run_ends_field, value_field)?
3025 }
3026 DataType::Int64 => {
3027 make_run_array::<Int64Type>(scalars, run_ends_field, value_field)?
3028 }
3029 dt => unreachable!("Invalid run-ends type: {dt}"),
3030 }
3031 }
3032 DataType::FixedSizeBinary(size) => {
3033 let array = scalars
3034 .map(|sv| {
3035 if let ScalarValue::FixedSizeBinary(_, v) = sv {
3036 Ok(v)
3037 } else {
3038 _exec_err!(
3039 "Inconsistent types in ScalarValue::iter_to_array. \
3040 Expected {data_type}, got {sv:?}"
3041 )
3042 }
3043 })
3044 .collect::<Result<Vec<_>>>()?;
3045 let array = FixedSizeBinaryArray::try_from_sparse_iter_with_size(
3046 array.into_iter(),
3047 *size,
3048 )?;
3049 Arc::new(array)
3050 }
3051 DataType::Time32(TimeUnit::Microsecond)
3057 | DataType::Time32(TimeUnit::Nanosecond)
3058 | DataType::Time64(TimeUnit::Second)
3059 | DataType::Time64(TimeUnit::Millisecond) => {
3060 return _not_impl_err!(
3061 "Unsupported creation of {:?} array from ScalarValue {:?}",
3062 data_type,
3063 scalars.peek()
3064 );
3065 }
3066 };
3067 Ok(array)
3068 }
3069
3070 fn iter_to_null_array(
3071 scalars: impl IntoIterator<Item = ScalarValue>,
3072 ) -> Result<ArrayRef> {
3073 let length = scalars.into_iter().try_fold(
3074 0usize,
3075 |r, element: ScalarValue| match element {
3076 ScalarValue::Null => Ok::<usize, DataFusionError>(r + 1),
3077 s => {
3078 _internal_err!("Expected ScalarValue::Null element. Received {s:?}")
3079 }
3080 },
3081 )?;
3082 Ok(new_null_array(&DataType::Null, length))
3083 }
3084
3085 fn iter_to_decimal32_array(
3086 scalars: impl IntoIterator<Item = ScalarValue>,
3087 precision: u8,
3088 scale: i8,
3089 ) -> Result<Decimal32Array> {
3090 let array = scalars
3091 .into_iter()
3092 .map(|element: ScalarValue| match element {
3093 ScalarValue::Decimal32(v1, _, _) => Ok(v1),
3094 s => {
3095 _internal_err!("Expected ScalarValue::Null element. Received {s:?}")
3096 }
3097 })
3098 .collect::<Result<Decimal32Array>>()?
3099 .with_precision_and_scale(precision, scale)?;
3100 Ok(array)
3101 }
3102
3103 fn iter_to_decimal64_array(
3104 scalars: impl IntoIterator<Item = ScalarValue>,
3105 precision: u8,
3106 scale: i8,
3107 ) -> Result<Decimal64Array> {
3108 let array = scalars
3109 .into_iter()
3110 .map(|element: ScalarValue| match element {
3111 ScalarValue::Decimal64(v1, _, _) => Ok(v1),
3112 s => {
3113 _internal_err!("Expected ScalarValue::Null element. Received {s:?}")
3114 }
3115 })
3116 .collect::<Result<Decimal64Array>>()?
3117 .with_precision_and_scale(precision, scale)?;
3118 Ok(array)
3119 }
3120
3121 fn iter_to_decimal128_array(
3122 scalars: impl IntoIterator<Item = ScalarValue>,
3123 precision: u8,
3124 scale: i8,
3125 ) -> Result<Decimal128Array> {
3126 let array = scalars
3127 .into_iter()
3128 .map(|element: ScalarValue| match element {
3129 ScalarValue::Decimal128(v1, _, _) => Ok(v1),
3130 s => {
3131 _internal_err!("Expected ScalarValue::Null element. Received {s:?}")
3132 }
3133 })
3134 .collect::<Result<Decimal128Array>>()?
3135 .with_precision_and_scale(precision, scale)?;
3136 Ok(array)
3137 }
3138
3139 fn iter_to_decimal256_array(
3140 scalars: impl IntoIterator<Item = ScalarValue>,
3141 precision: u8,
3142 scale: i8,
3143 ) -> Result<Decimal256Array> {
3144 let array = scalars
3145 .into_iter()
3146 .map(|element: ScalarValue| match element {
3147 ScalarValue::Decimal256(v1, _, _) => Ok(v1),
3148 s => {
3149 _internal_err!(
3150 "Expected ScalarValue::Decimal256 element. Received {s:?}"
3151 )
3152 }
3153 })
3154 .collect::<Result<Decimal256Array>>()?
3155 .with_precision_and_scale(precision, scale)?;
3156 Ok(array)
3157 }
3158
3159 pub fn new_list(
3186 values: &[ScalarValue],
3187 data_type: &DataType,
3188 nullable: bool,
3189 ) -> Arc<ListArray> {
3190 let values = if values.is_empty() {
3191 new_empty_array(data_type)
3192 } else {
3193 Self::iter_to_array(values.iter().cloned()).unwrap()
3194 };
3195 Arc::new(
3196 SingleRowListArrayBuilder::new(values)
3197 .with_nullable(nullable)
3198 .build_list_array(),
3199 )
3200 }
3201
3202 pub fn new_list_nullable(
3204 values: &[ScalarValue],
3205 data_type: &DataType,
3206 ) -> Arc<ListArray> {
3207 Self::new_list(values, data_type, true)
3208 }
3209
3210 pub fn new_null_list(data_type: DataType, nullable: bool, null_len: usize) -> Self {
3214 let data_type = DataType::List(Field::new_list_field(data_type, nullable).into());
3215 Self::List(Arc::new(ListArray::from(ArrayData::new_null(
3216 &data_type, null_len,
3217 ))))
3218 }
3219
3220 pub fn new_list_from_iter(
3248 values: impl IntoIterator<Item = ScalarValue> + ExactSizeIterator,
3249 data_type: &DataType,
3250 nullable: bool,
3251 ) -> Arc<ListArray> {
3252 let values = if values.len() == 0 {
3253 new_empty_array(data_type)
3254 } else {
3255 Self::iter_to_array(values).unwrap()
3256 };
3257 Arc::new(
3258 SingleRowListArrayBuilder::new(values)
3259 .with_nullable(nullable)
3260 .build_list_array(),
3261 )
3262 }
3263
3264 pub fn new_large_list(
3292 values: &[ScalarValue],
3293 data_type: &DataType,
3294 ) -> Arc<LargeListArray> {
3295 let values = if values.is_empty() {
3296 new_empty_array(data_type)
3297 } else {
3298 Self::iter_to_array(values.iter().cloned()).unwrap()
3299 };
3300 Arc::new(SingleRowListArrayBuilder::new(values).build_large_list_array())
3301 }
3302
3303 pub fn to_array_of_size(&self, size: usize) -> Result<ArrayRef> {
3313 Ok(match self {
3314 ScalarValue::Decimal32(Some(e), precision, scale) => Arc::new(
3315 Decimal32Array::from_value(*e, size)
3316 .with_precision_and_scale(*precision, *scale)?,
3317 ),
3318 ScalarValue::Decimal32(None, precision, scale) => {
3319 new_null_array(&DataType::Decimal32(*precision, *scale), size)
3320 }
3321 ScalarValue::Decimal64(Some(e), precision, scale) => Arc::new(
3322 Decimal64Array::from_value(*e, size)
3323 .with_precision_and_scale(*precision, *scale)?,
3324 ),
3325 ScalarValue::Decimal64(None, precision, scale) => {
3326 new_null_array(&DataType::Decimal64(*precision, *scale), size)
3327 }
3328 ScalarValue::Decimal128(Some(e), precision, scale) => Arc::new(
3329 Decimal128Array::from_value(*e, size)
3330 .with_precision_and_scale(*precision, *scale)?,
3331 ),
3332 ScalarValue::Decimal128(None, precision, scale) => {
3333 new_null_array(&DataType::Decimal128(*precision, *scale), size)
3334 }
3335 ScalarValue::Decimal256(Some(e), precision, scale) => Arc::new(
3336 Decimal256Array::from_value(*e, size)
3337 .with_precision_and_scale(*precision, *scale)?,
3338 ),
3339 ScalarValue::Decimal256(None, precision, scale) => {
3340 new_null_array(&DataType::Decimal256(*precision, *scale), size)
3341 }
3342
3343 ScalarValue::Boolean(e) => match e {
3344 None => new_null_array(&DataType::Boolean, size),
3345 Some(true) => {
3346 Arc::new(BooleanArray::new(BooleanBuffer::new_set(size), None))
3347 as ArrayRef
3348 }
3349 Some(false) => {
3350 Arc::new(BooleanArray::new(BooleanBuffer::new_unset(size), None))
3351 as ArrayRef
3352 }
3353 },
3354 ScalarValue::Float64(e) => {
3355 build_array_from_option!(Float64, Float64Array, e, size)
3356 }
3357 ScalarValue::Float32(e) => {
3358 build_array_from_option!(Float32, Float32Array, e, size)
3359 }
3360 ScalarValue::Float16(e) => {
3361 build_array_from_option!(Float16, Float16Array, e, size)
3362 }
3363 ScalarValue::Int8(e) => build_array_from_option!(Int8, Int8Array, e, size),
3364 ScalarValue::Int16(e) => build_array_from_option!(Int16, Int16Array, e, size),
3365 ScalarValue::Int32(e) => build_array_from_option!(Int32, Int32Array, e, size),
3366 ScalarValue::Int64(e) => build_array_from_option!(Int64, Int64Array, e, size),
3367 ScalarValue::UInt8(e) => build_array_from_option!(UInt8, UInt8Array, e, size),
3368 ScalarValue::UInt16(e) => {
3369 build_array_from_option!(UInt16, UInt16Array, e, size)
3370 }
3371 ScalarValue::UInt32(e) => {
3372 build_array_from_option!(UInt32, UInt32Array, e, size)
3373 }
3374 ScalarValue::UInt64(e) => {
3375 build_array_from_option!(UInt64, UInt64Array, e, size)
3376 }
3377 ScalarValue::TimestampSecond(e, tz_opt) => {
3378 build_timestamp_array_from_option!(
3379 TimeUnit::Second,
3380 tz_opt.clone(),
3381 TimestampSecondArray,
3382 e,
3383 size
3384 )
3385 }
3386 ScalarValue::TimestampMillisecond(e, tz_opt) => {
3387 build_timestamp_array_from_option!(
3388 TimeUnit::Millisecond,
3389 tz_opt.clone(),
3390 TimestampMillisecondArray,
3391 e,
3392 size
3393 )
3394 }
3395
3396 ScalarValue::TimestampMicrosecond(e, tz_opt) => {
3397 build_timestamp_array_from_option!(
3398 TimeUnit::Microsecond,
3399 tz_opt.clone(),
3400 TimestampMicrosecondArray,
3401 e,
3402 size
3403 )
3404 }
3405 ScalarValue::TimestampNanosecond(e, tz_opt) => {
3406 build_timestamp_array_from_option!(
3407 TimeUnit::Nanosecond,
3408 tz_opt.clone(),
3409 TimestampNanosecondArray,
3410 e,
3411 size
3412 )
3413 }
3414 ScalarValue::Utf8(e) => match e {
3415 Some(value) => Arc::new(StringArray::new_repeated(value, size)),
3416 None => new_null_array(&DataType::Utf8, size),
3417 },
3418 ScalarValue::Utf8View(e) => match e {
3419 Some(value) => {
3420 let mut builder = StringViewBuilder::with_capacity(size);
3421 builder.try_append_value_n(value, size)?;
3422 let array = builder.finish();
3423 Arc::new(array)
3424 }
3425 None => new_null_array(&DataType::Utf8View, size),
3426 },
3427 ScalarValue::LargeUtf8(e) => match e {
3428 Some(value) => Arc::new(LargeStringArray::new_repeated(value, size)),
3429 None => new_null_array(&DataType::LargeUtf8, size),
3430 },
3431 ScalarValue::Binary(e) => match e {
3432 Some(value) => {
3433 Arc::new(BinaryArray::new_repeated(value.as_slice(), size))
3434 }
3435 None => new_null_array(&DataType::Binary, size),
3436 },
3437 ScalarValue::BinaryView(e) => match e {
3438 Some(value) => {
3439 let mut builder = BinaryViewBuilder::with_capacity(size);
3440 builder.try_append_value_n(value, size)?;
3441 let array = builder.finish();
3442 Arc::new(array)
3443 }
3444 None => new_null_array(&DataType::BinaryView, size),
3445 },
3446 ScalarValue::FixedSizeBinary(s, e) => match e {
3447 Some(value) => Arc::new(
3448 FixedSizeBinaryArray::try_from_sparse_iter_with_size(
3449 repeat_n(Some(value.as_slice()), size),
3450 *s,
3451 )
3452 .unwrap(),
3453 ),
3454 None => Arc::new(FixedSizeBinaryArray::new_null(*s, size)),
3455 },
3456 ScalarValue::LargeBinary(e) => match e {
3457 Some(value) => {
3458 Arc::new(LargeBinaryArray::new_repeated(value.as_slice(), size))
3459 }
3460 None => new_null_array(&DataType::LargeBinary, size),
3461 },
3462 ScalarValue::List(arr) => {
3463 if size == 1 {
3464 return Ok(Arc::clone(arr) as Arc<dyn Array>);
3465 }
3466 Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
3467 }
3468 ScalarValue::LargeList(arr) => {
3469 if size == 1 {
3470 return Ok(Arc::clone(arr) as Arc<dyn Array>);
3471 }
3472 Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
3473 }
3474 ScalarValue::FixedSizeList(arr) => {
3475 if size == 1 {
3476 return Ok(Arc::clone(arr) as Arc<dyn Array>);
3477 }
3478 Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
3479 }
3480 ScalarValue::ListView(arr) => {
3481 if size == 1 {
3482 return Ok(Arc::clone(arr) as Arc<dyn Array>);
3483 }
3484 Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
3485 }
3486 ScalarValue::LargeListView(arr) => {
3487 if size == 1 {
3488 return Ok(Arc::clone(arr) as Arc<dyn Array>);
3489 }
3490 Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
3491 }
3492 ScalarValue::Struct(arr) => {
3493 if size == 1 {
3494 return Ok(Arc::clone(arr) as Arc<dyn Array>);
3495 }
3496 Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
3497 }
3498 ScalarValue::Map(arr) => {
3499 if size == 1 {
3500 return Ok(Arc::clone(arr) as Arc<dyn Array>);
3501 }
3502 Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
3503 }
3504 ScalarValue::Date32(e) => {
3505 build_array_from_option!(Date32, Date32Array, e, size)
3506 }
3507 ScalarValue::Date64(e) => {
3508 build_array_from_option!(Date64, Date64Array, e, size)
3509 }
3510 ScalarValue::Time32Second(e) => {
3511 build_array_from_option!(
3512 Time32,
3513 TimeUnit::Second,
3514 Time32SecondArray,
3515 e,
3516 size
3517 )
3518 }
3519 ScalarValue::Time32Millisecond(e) => {
3520 build_array_from_option!(
3521 Time32,
3522 TimeUnit::Millisecond,
3523 Time32MillisecondArray,
3524 e,
3525 size
3526 )
3527 }
3528 ScalarValue::Time64Microsecond(e) => {
3529 build_array_from_option!(
3530 Time64,
3531 TimeUnit::Microsecond,
3532 Time64MicrosecondArray,
3533 e,
3534 size
3535 )
3536 }
3537 ScalarValue::Time64Nanosecond(e) => {
3538 build_array_from_option!(
3539 Time64,
3540 TimeUnit::Nanosecond,
3541 Time64NanosecondArray,
3542 e,
3543 size
3544 )
3545 }
3546 ScalarValue::IntervalDayTime(e) => build_array_from_option!(
3547 Interval,
3548 IntervalUnit::DayTime,
3549 IntervalDayTimeArray,
3550 e,
3551 size
3552 ),
3553 ScalarValue::IntervalYearMonth(e) => build_array_from_option!(
3554 Interval,
3555 IntervalUnit::YearMonth,
3556 IntervalYearMonthArray,
3557 e,
3558 size
3559 ),
3560 ScalarValue::IntervalMonthDayNano(e) => build_array_from_option!(
3561 Interval,
3562 IntervalUnit::MonthDayNano,
3563 IntervalMonthDayNanoArray,
3564 e,
3565 size
3566 ),
3567 ScalarValue::DurationSecond(e) => build_array_from_option!(
3568 Duration,
3569 TimeUnit::Second,
3570 DurationSecondArray,
3571 e,
3572 size
3573 ),
3574 ScalarValue::DurationMillisecond(e) => build_array_from_option!(
3575 Duration,
3576 TimeUnit::Millisecond,
3577 DurationMillisecondArray,
3578 e,
3579 size
3580 ),
3581 ScalarValue::DurationMicrosecond(e) => build_array_from_option!(
3582 Duration,
3583 TimeUnit::Microsecond,
3584 DurationMicrosecondArray,
3585 e,
3586 size
3587 ),
3588 ScalarValue::DurationNanosecond(e) => build_array_from_option!(
3589 Duration,
3590 TimeUnit::Nanosecond,
3591 DurationNanosecondArray,
3592 e,
3593 size
3594 ),
3595 ScalarValue::Union(value, fields, mode) => match value {
3596 Some((v_id, value)) => {
3597 let mut new_fields = Vec::with_capacity(fields.len());
3598 let mut child_arrays = Vec::<ArrayRef>::with_capacity(fields.len());
3599 for (f_id, field) in fields.iter() {
3600 let ar = if f_id == *v_id {
3601 value.to_array_of_size(size)?
3602 } else {
3603 let dt = field.data_type();
3604 match mode {
3605 UnionMode::Sparse => new_null_array(dt, size),
3606 UnionMode::Dense => new_null_array(dt, 0),
3609 }
3610 };
3611 let field = (**field).clone();
3612 child_arrays.push(ar);
3613 new_fields.push(field.clone());
3614 }
3615 let type_ids = repeat_n(*v_id, size);
3616 let type_ids = ScalarBuffer::<i8>::from_iter(type_ids);
3617 let value_offsets = match mode {
3618 UnionMode::Sparse => None,
3619 UnionMode::Dense => Some(ScalarBuffer::from_iter(0..size as i32)),
3620 };
3621 let ar = UnionArray::try_new(
3622 fields.clone(),
3623 type_ids,
3624 value_offsets,
3625 child_arrays,
3626 )
3627 .map_err(|e| DataFusionError::ArrowError(Box::new(e), None))?;
3628 Arc::new(ar)
3629 }
3630 None => new_null_array(&DataType::Union(fields.clone(), *mode), size),
3631 },
3632 ScalarValue::Dictionary(key_type, v) => {
3633 match key_type.as_ref() {
3635 DataType::Int8 => dict_from_scalar::<Int8Type>(v, size)?,
3636 DataType::Int16 => dict_from_scalar::<Int16Type>(v, size)?,
3637 DataType::Int32 => dict_from_scalar::<Int32Type>(v, size)?,
3638 DataType::Int64 => dict_from_scalar::<Int64Type>(v, size)?,
3639 DataType::UInt8 => dict_from_scalar::<UInt8Type>(v, size)?,
3640 DataType::UInt16 => dict_from_scalar::<UInt16Type>(v, size)?,
3641 DataType::UInt32 => dict_from_scalar::<UInt32Type>(v, size)?,
3642 DataType::UInt64 => dict_from_scalar::<UInt64Type>(v, size)?,
3643 _ => unreachable!("Invalid dictionary keys type: {}", key_type),
3644 }
3645 }
3646 ScalarValue::RunEndEncoded(run_ends_field, values_field, value) => {
3647 fn make_run_array<R: RunEndIndexType>(
3648 run_ends_field: &Arc<Field>,
3649 values_field: &Arc<Field>,
3650 value: &ScalarValue,
3651 size: usize,
3652 ) -> Result<ArrayRef> {
3653 let size_native = R::Native::from_usize(size)
3654 .ok_or_else(|| DataFusionError::Execution(format!("Cannot construct RunArray of size {size}: Overflows run-ends type {}", R::DATA_TYPE)))?;
3655 let values = value.to_array_of_size(1)?;
3656 let run_ends =
3657 PrimitiveArray::<R>::new(vec![size_native].into(), None);
3658
3659 let dt = DataType::RunEndEncoded(
3661 Arc::clone(run_ends_field),
3662 Arc::clone(values_field),
3663 );
3664 let builder = ArrayDataBuilder::new(dt)
3665 .len(size)
3666 .add_child_data(run_ends.to_data())
3667 .add_child_data(values.to_data());
3668 let run_array = RunArray::<R>::from(builder.build()?);
3669
3670 Ok(Arc::new(run_array))
3671 }
3672 match run_ends_field.data_type() {
3673 DataType::Int16 => make_run_array::<Int16Type>(
3674 run_ends_field,
3675 values_field,
3676 value,
3677 size,
3678 )?,
3679 DataType::Int32 => make_run_array::<Int32Type>(
3680 run_ends_field,
3681 values_field,
3682 value,
3683 size,
3684 )?,
3685 DataType::Int64 => make_run_array::<Int64Type>(
3686 run_ends_field,
3687 values_field,
3688 value,
3689 size,
3690 )?,
3691 dt => unreachable!("Invalid run-ends type: {dt}"),
3692 }
3693 }
3694 ScalarValue::Null => get_or_create_cached_null_array(size),
3695 })
3696 }
3697
3698 fn get_decimal_value_from_array(
3699 array: &dyn Array,
3700 index: usize,
3701 precision: u8,
3702 scale: i8,
3703 ) -> Result<ScalarValue> {
3704 match array.data_type() {
3705 DataType::Decimal32(_, _) => {
3706 let array = as_decimal32_array(array)?;
3707 if array.is_null(index) {
3708 Ok(ScalarValue::Decimal32(None, precision, scale))
3709 } else {
3710 let value = array.value(index);
3711 Ok(ScalarValue::Decimal32(Some(value), precision, scale))
3712 }
3713 }
3714 DataType::Decimal64(_, _) => {
3715 let array = as_decimal64_array(array)?;
3716 if array.is_null(index) {
3717 Ok(ScalarValue::Decimal64(None, precision, scale))
3718 } else {
3719 let value = array.value(index);
3720 Ok(ScalarValue::Decimal64(Some(value), precision, scale))
3721 }
3722 }
3723 DataType::Decimal128(_, _) => {
3724 let array = as_decimal128_array(array)?;
3725 if array.is_null(index) {
3726 Ok(ScalarValue::Decimal128(None, precision, scale))
3727 } else {
3728 let value = array.value(index);
3729 Ok(ScalarValue::Decimal128(Some(value), precision, scale))
3730 }
3731 }
3732 DataType::Decimal256(_, _) => {
3733 let array = as_decimal256_array(array)?;
3734 if array.is_null(index) {
3735 Ok(ScalarValue::Decimal256(None, precision, scale))
3736 } else {
3737 let value = array.value(index);
3738 Ok(ScalarValue::Decimal256(Some(value), precision, scale))
3739 }
3740 }
3741 other => {
3742 unreachable!("Invalid type isn't decimal: {other:?}")
3743 }
3744 }
3745 }
3746
3747 fn list_to_array_of_size(arr: &dyn Array, size: usize) -> Result<ArrayRef> {
3750 if size == 0 {
3751 return Ok(arr.slice(0, 0));
3752 }
3753
3754 let n = arr.len() as u32;
3761 let indices = UInt32Array::from_iter_values((0..size).flat_map(|_| 0..n));
3762 Ok(arrow::compute::take(arr, &indices, None)?)
3763 }
3764
3765 pub fn convert_array_to_scalar_vec(
3867 array: &dyn Array,
3868 ) -> Result<Vec<Option<Vec<Self>>>> {
3869 fn map_element(
3870 nested_array: Option<ArrayRef>,
3871 ) -> Result<Option<Vec<ScalarValue>>> {
3872 nested_array
3873 .map(|array| {
3874 (0..array.len())
3875 .map(|i| ScalarValue::try_from_array(&array, i))
3876 .collect::<Result<Vec<_>>>()
3877 })
3878 .transpose()
3879 }
3880
3881 match array.data_type() {
3882 DataType::List(_) => array.as_list::<i32>().iter().map(map_element).collect(),
3883 DataType::LargeList(_) => {
3884 array.as_list::<i64>().iter().map(map_element).collect()
3885 }
3886 DataType::ListView(_) => array
3887 .as_list_view::<i32>()
3888 .iter()
3889 .map(map_element)
3890 .collect(),
3891 DataType::LargeListView(_) => array
3892 .as_list_view::<i64>()
3893 .iter()
3894 .map(map_element)
3895 .collect(),
3896 _ => _internal_err!(
3897 "ScalarValue::convert_array_to_scalar_vec input must be a List/LargeList/ListView/LargeListView type"
3898 ),
3899 }
3900 }
3901
3902 #[deprecated(
3903 since = "46.0.0",
3904 note = "This function is obsolete. Use `to_array` instead"
3905 )]
3906 pub fn raw_data(&self) -> Result<ArrayRef> {
3907 match self {
3908 ScalarValue::List(arr) => Ok(arr.to_owned()),
3909 _ => _internal_err!("ScalarValue is not a list"),
3910 }
3911 }
3912
3913 pub fn try_from_array(array: &dyn Array, index: usize) -> Result<Self> {
3915 if array.is_null(index) {
3917 return array.data_type().try_into();
3918 }
3919
3920 Ok(match array.data_type() {
3921 DataType::Null => ScalarValue::Null,
3922 DataType::Decimal32(precision, scale) => {
3923 ScalarValue::get_decimal_value_from_array(
3924 array, index, *precision, *scale,
3925 )?
3926 }
3927 DataType::Decimal64(precision, scale) => {
3928 ScalarValue::get_decimal_value_from_array(
3929 array, index, *precision, *scale,
3930 )?
3931 }
3932 DataType::Decimal128(precision, scale) => {
3933 ScalarValue::get_decimal_value_from_array(
3934 array, index, *precision, *scale,
3935 )?
3936 }
3937 DataType::Decimal256(precision, scale) => {
3938 ScalarValue::get_decimal_value_from_array(
3939 array, index, *precision, *scale,
3940 )?
3941 }
3942 DataType::Boolean => typed_cast!(array, index, as_boolean_array, Boolean)?,
3943 DataType::Float64 => typed_cast!(array, index, as_float64_array, Float64)?,
3944 DataType::Float32 => typed_cast!(array, index, as_float32_array, Float32)?,
3945 DataType::Float16 => typed_cast!(array, index, as_float16_array, Float16)?,
3946 DataType::UInt64 => typed_cast!(array, index, as_uint64_array, UInt64)?,
3947 DataType::UInt32 => typed_cast!(array, index, as_uint32_array, UInt32)?,
3948 DataType::UInt16 => typed_cast!(array, index, as_uint16_array, UInt16)?,
3949 DataType::UInt8 => typed_cast!(array, index, as_uint8_array, UInt8)?,
3950 DataType::Int64 => typed_cast!(array, index, as_int64_array, Int64)?,
3951 DataType::Int32 => typed_cast!(array, index, as_int32_array, Int32)?,
3952 DataType::Int16 => typed_cast!(array, index, as_int16_array, Int16)?,
3953 DataType::Int8 => typed_cast!(array, index, as_int8_array, Int8)?,
3954 DataType::Binary => typed_cast!(array, index, as_binary_array, Binary)?,
3955 DataType::LargeBinary => {
3956 typed_cast!(array, index, as_large_binary_array, LargeBinary)?
3957 }
3958 DataType::BinaryView => {
3959 typed_cast!(array, index, as_binary_view_array, BinaryView)?
3960 }
3961 DataType::Utf8 => typed_cast!(array, index, as_string_array, Utf8)?,
3962 DataType::LargeUtf8 => {
3963 typed_cast!(array, index, as_large_string_array, LargeUtf8)?
3964 }
3965 DataType::Utf8View => {
3966 typed_cast!(array, index, as_string_view_array, Utf8View)?
3967 }
3968 DataType::List(field) => {
3969 let list_array = array.as_list::<i32>();
3970 let nested_array = list_array.value(index);
3971 SingleRowListArrayBuilder::new(nested_array)
3973 .with_field(field)
3974 .build_list_scalar()
3975 }
3976 DataType::LargeList(field) => {
3977 let list_array = as_large_list_array(array)?;
3978 let nested_array = list_array.value(index);
3979 SingleRowListArrayBuilder::new(nested_array)
3981 .with_field(field)
3982 .build_large_list_scalar()
3983 }
3984 DataType::FixedSizeList(field, _) => {
3986 let list_array = as_fixed_size_list_array(array)?;
3987 let nested_array = list_array.value(index);
3988 let list_size = nested_array.len();
3990 SingleRowListArrayBuilder::new(nested_array)
3991 .with_field(field)
3992 .build_fixed_size_list_scalar(list_size)
3993 }
3994 DataType::ListView(field) => {
3995 let list_array = as_list_view_array(array)?;
3996 let nested_array = list_array.value(index);
3997 SingleRowListArrayBuilder::new(nested_array)
3999 .with_field(field)
4000 .build_list_view_scalar()
4001 }
4002 DataType::LargeListView(field) => {
4003 let list_array = as_large_list_view_array(array)?;
4004 let nested_array = list_array.value(index);
4005 SingleRowListArrayBuilder::new(nested_array)
4007 .with_field(field)
4008 .build_large_list_view_scalar()
4009 }
4010 DataType::Date32 => typed_cast!(array, index, as_date32_array, Date32)?,
4011 DataType::Date64 => typed_cast!(array, index, as_date64_array, Date64)?,
4012 DataType::Time32(TimeUnit::Second) => {
4013 typed_cast!(array, index, as_time32_second_array, Time32Second)?
4014 }
4015 DataType::Time32(TimeUnit::Millisecond) => {
4016 typed_cast!(array, index, as_time32_millisecond_array, Time32Millisecond)?
4017 }
4018 DataType::Time64(TimeUnit::Microsecond) => {
4019 typed_cast!(array, index, as_time64_microsecond_array, Time64Microsecond)?
4020 }
4021 DataType::Time64(TimeUnit::Nanosecond) => {
4022 typed_cast!(array, index, as_time64_nanosecond_array, Time64Nanosecond)?
4023 }
4024 DataType::Timestamp(TimeUnit::Second, tz_opt) => typed_cast_tz!(
4025 array,
4026 index,
4027 as_timestamp_second_array,
4028 TimestampSecond,
4029 tz_opt
4030 )?,
4031 DataType::Timestamp(TimeUnit::Millisecond, tz_opt) => typed_cast_tz!(
4032 array,
4033 index,
4034 as_timestamp_millisecond_array,
4035 TimestampMillisecond,
4036 tz_opt
4037 )?,
4038 DataType::Timestamp(TimeUnit::Microsecond, tz_opt) => typed_cast_tz!(
4039 array,
4040 index,
4041 as_timestamp_microsecond_array,
4042 TimestampMicrosecond,
4043 tz_opt
4044 )?,
4045 DataType::Timestamp(TimeUnit::Nanosecond, tz_opt) => typed_cast_tz!(
4046 array,
4047 index,
4048 as_timestamp_nanosecond_array,
4049 TimestampNanosecond,
4050 tz_opt
4051 )?,
4052 DataType::Dictionary(key_type, _) => {
4053 let (values_array, values_index) = match key_type.as_ref() {
4054 DataType::Int8 => get_dict_value::<Int8Type>(array, index)?,
4055 DataType::Int16 => get_dict_value::<Int16Type>(array, index)?,
4056 DataType::Int32 => get_dict_value::<Int32Type>(array, index)?,
4057 DataType::Int64 => get_dict_value::<Int64Type>(array, index)?,
4058 DataType::UInt8 => get_dict_value::<UInt8Type>(array, index)?,
4059 DataType::UInt16 => get_dict_value::<UInt16Type>(array, index)?,
4060 DataType::UInt32 => get_dict_value::<UInt32Type>(array, index)?,
4061 DataType::UInt64 => get_dict_value::<UInt64Type>(array, index)?,
4062 _ => unreachable!("Invalid dictionary keys type: {}", key_type),
4063 };
4064 let value = match values_index {
4066 Some(values_index) => {
4067 ScalarValue::try_from_array(values_array, values_index)
4068 }
4069 None => values_array.data_type().try_into(),
4071 }?;
4072
4073 Self::Dictionary(key_type.clone(), Box::new(value))
4074 }
4075 DataType::RunEndEncoded(run_ends_field, value_field) => {
4076 if index > array.len() {
4079 return _exec_err!(
4080 "Index {index} out of bounds for array of length {}",
4081 array.len()
4082 );
4083 }
4084 let scalar = downcast_run_array!(
4085 array => {
4086 let index = array.get_physical_index(index);
4087 ScalarValue::try_from_array(array.values(), index)?
4088 },
4089 dt => unreachable!("Invalid run-ends type: {dt}")
4090 );
4091 Self::RunEndEncoded(
4092 Arc::clone(run_ends_field),
4093 Arc::clone(value_field),
4094 Box::new(scalar),
4095 )
4096 }
4097 DataType::Struct(_) => {
4098 let a = array.slice(index, 1);
4099 Self::Struct(Arc::new(a.as_struct().to_owned()))
4100 }
4101 DataType::FixedSizeBinary(_) => {
4102 let array = as_fixed_size_binary_array(array)?;
4103 let size = match array.data_type() {
4104 DataType::FixedSizeBinary(size) => *size,
4105 _ => unreachable!(),
4106 };
4107 ScalarValue::FixedSizeBinary(
4108 size,
4109 match array.is_null(index) {
4110 true => None,
4111 false => Some(array.value(index).into()),
4112 },
4113 )
4114 }
4115 DataType::Interval(IntervalUnit::DayTime) => {
4116 typed_cast!(array, index, as_interval_dt_array, IntervalDayTime)?
4117 }
4118 DataType::Interval(IntervalUnit::YearMonth) => {
4119 typed_cast!(array, index, as_interval_ym_array, IntervalYearMonth)?
4120 }
4121 DataType::Interval(IntervalUnit::MonthDayNano) => {
4122 typed_cast!(array, index, as_interval_mdn_array, IntervalMonthDayNano)?
4123 }
4124
4125 DataType::Duration(TimeUnit::Second) => {
4126 typed_cast!(array, index, as_duration_second_array, DurationSecond)?
4127 }
4128 DataType::Duration(TimeUnit::Millisecond) => typed_cast!(
4129 array,
4130 index,
4131 as_duration_millisecond_array,
4132 DurationMillisecond
4133 )?,
4134 DataType::Duration(TimeUnit::Microsecond) => typed_cast!(
4135 array,
4136 index,
4137 as_duration_microsecond_array,
4138 DurationMicrosecond
4139 )?,
4140 DataType::Duration(TimeUnit::Nanosecond) => typed_cast!(
4141 array,
4142 index,
4143 as_duration_nanosecond_array,
4144 DurationNanosecond
4145 )?,
4146 DataType::Map(_, _) => {
4147 let a = array.slice(index, 1);
4148 Self::Map(Arc::new(a.as_map().to_owned()))
4149 }
4150 DataType::Union(fields, mode) => {
4151 let array = as_union_array(array)?;
4152 let ti = array.type_id(index);
4153 let index = array.value_offset(index);
4154 let value = ScalarValue::try_from_array(array.child(ti), index)?;
4155 ScalarValue::Union(Some((ti, Box::new(value))), fields.clone(), *mode)
4156 }
4157 other => {
4158 return _not_impl_err!(
4159 "Can't create a scalar from array of type \"{other:?}\""
4160 );
4161 }
4162 })
4163 }
4164
4165 pub fn try_from_string(value: String, target_type: &DataType) -> Result<Self> {
4167 ScalarValue::from(value).cast_to(target_type)
4168 }
4169
4170 pub fn try_as_str(&self) -> Option<Option<&str>> {
4204 let v = match self {
4205 ScalarValue::Utf8(v) => v,
4206 ScalarValue::LargeUtf8(v) => v,
4207 ScalarValue::Utf8View(v) => v,
4208 ScalarValue::Dictionary(_, v) => return v.try_as_str(),
4209 ScalarValue::RunEndEncoded(_, _, v) => return v.try_as_str(),
4210 _ => return None,
4211 };
4212 Some(v.as_ref().map(|v| v.as_str()))
4213 }
4214
4215 pub fn cast_to(&self, target_type: &DataType) -> Result<Self> {
4217 self.cast_to_with_options(target_type, &DEFAULT_CAST_OPTIONS)
4218 }
4219
4220 pub fn cast_to_with_options(
4222 &self,
4223 target_type: &DataType,
4224 cast_options: &CastOptions<'static>,
4225 ) -> Result<Self> {
4226 let source_type = self.data_type();
4227 if let Some(multiplier) = date_to_timestamp_multiplier(&source_type, target_type)
4228 && let Some(value) = self.date_scalar_value_as_i64()
4229 {
4230 ensure_timestamp_in_bounds(value, multiplier, &source_type, target_type)?;
4231 }
4232
4233 let scalar_array = self.to_array()?;
4234
4235 let cast_arr = if crate::nested_struct::requires_nested_struct_cast(
4239 scalar_array.data_type(),
4240 target_type,
4241 ) {
4242 crate::nested_struct::cast_column(&scalar_array, target_type, cast_options)?
4243 } else {
4244 cast_with_options(&scalar_array, target_type, cast_options)?
4245 };
4246
4247 ScalarValue::try_from_array(&cast_arr, 0)
4248 }
4249
4250 fn date_scalar_value_as_i64(&self) -> Option<i64> {
4251 match self {
4252 ScalarValue::Date32(Some(value)) => Some(i64::from(*value)),
4253 ScalarValue::Date64(Some(value)) => Some(*value),
4254 _ => None,
4255 }
4256 }
4257
4258 fn eq_array_decimal32(
4259 array: &ArrayRef,
4260 index: usize,
4261 value: Option<&i32>,
4262 precision: u8,
4263 scale: i8,
4264 ) -> Result<bool> {
4265 let array = as_decimal32_array(array)?;
4266 if array.precision() != precision || array.scale() != scale {
4267 return Ok(false);
4268 }
4269 let is_null = array.is_null(index);
4270 if let Some(v) = value {
4271 Ok(!array.is_null(index) && array.value(index) == *v)
4272 } else {
4273 Ok(is_null)
4274 }
4275 }
4276
4277 fn eq_array_decimal64(
4278 array: &ArrayRef,
4279 index: usize,
4280 value: Option<&i64>,
4281 precision: u8,
4282 scale: i8,
4283 ) -> Result<bool> {
4284 let array = as_decimal64_array(array)?;
4285 if array.precision() != precision || array.scale() != scale {
4286 return Ok(false);
4287 }
4288 let is_null = array.is_null(index);
4289 if let Some(v) = value {
4290 Ok(!array.is_null(index) && array.value(index) == *v)
4291 } else {
4292 Ok(is_null)
4293 }
4294 }
4295
4296 fn eq_array_decimal(
4297 array: &ArrayRef,
4298 index: usize,
4299 value: Option<&i128>,
4300 precision: u8,
4301 scale: i8,
4302 ) -> Result<bool> {
4303 let array = as_decimal128_array(array)?;
4304 if array.precision() != precision || array.scale() != scale {
4305 return Ok(false);
4306 }
4307 let is_null = array.is_null(index);
4308 if let Some(v) = value {
4309 Ok(!array.is_null(index) && array.value(index) == *v)
4310 } else {
4311 Ok(is_null)
4312 }
4313 }
4314
4315 fn eq_array_decimal256(
4316 array: &ArrayRef,
4317 index: usize,
4318 value: Option<&i256>,
4319 precision: u8,
4320 scale: i8,
4321 ) -> Result<bool> {
4322 let array = as_decimal256_array(array)?;
4323 if array.precision() != precision || array.scale() != scale {
4324 return Ok(false);
4325 }
4326 let is_null = array.is_null(index);
4327 if let Some(v) = value {
4328 Ok(!array.is_null(index) && array.value(index) == *v)
4329 } else {
4330 Ok(is_null)
4331 }
4332 }
4333
4334 #[inline]
4361 pub fn eq_array(&self, array: &ArrayRef, index: usize) -> Result<bool> {
4362 Ok(match self {
4363 ScalarValue::Decimal32(v, precision, scale) => {
4364 ScalarValue::eq_array_decimal32(
4365 array,
4366 index,
4367 v.as_ref(),
4368 *precision,
4369 *scale,
4370 )?
4371 }
4372 ScalarValue::Decimal64(v, precision, scale) => {
4373 ScalarValue::eq_array_decimal64(
4374 array,
4375 index,
4376 v.as_ref(),
4377 *precision,
4378 *scale,
4379 )?
4380 }
4381 ScalarValue::Decimal128(v, precision, scale) => {
4382 ScalarValue::eq_array_decimal(
4383 array,
4384 index,
4385 v.as_ref(),
4386 *precision,
4387 *scale,
4388 )?
4389 }
4390 ScalarValue::Decimal256(v, precision, scale) => {
4391 ScalarValue::eq_array_decimal256(
4392 array,
4393 index,
4394 v.as_ref(),
4395 *precision,
4396 *scale,
4397 )?
4398 }
4399 ScalarValue::Boolean(val) => {
4400 eq_array_primitive!(array, index, as_boolean_array, val)?
4401 }
4402 ScalarValue::Float16(val) => {
4403 eq_array_primitive!(array, index, as_float16_array, val)?
4404 }
4405 ScalarValue::Float32(val) => {
4406 eq_array_primitive!(array, index, as_float32_array, val)?
4407 }
4408 ScalarValue::Float64(val) => {
4409 eq_array_primitive!(array, index, as_float64_array, val)?
4410 }
4411 ScalarValue::Int8(val) => {
4412 eq_array_primitive!(array, index, as_int8_array, val)?
4413 }
4414 ScalarValue::Int16(val) => {
4415 eq_array_primitive!(array, index, as_int16_array, val)?
4416 }
4417 ScalarValue::Int32(val) => {
4418 eq_array_primitive!(array, index, as_int32_array, val)?
4419 }
4420 ScalarValue::Int64(val) => {
4421 eq_array_primitive!(array, index, as_int64_array, val)?
4422 }
4423 ScalarValue::UInt8(val) => {
4424 eq_array_primitive!(array, index, as_uint8_array, val)?
4425 }
4426 ScalarValue::UInt16(val) => {
4427 eq_array_primitive!(array, index, as_uint16_array, val)?
4428 }
4429 ScalarValue::UInt32(val) => {
4430 eq_array_primitive!(array, index, as_uint32_array, val)?
4431 }
4432 ScalarValue::UInt64(val) => {
4433 eq_array_primitive!(array, index, as_uint64_array, val)?
4434 }
4435 ScalarValue::Utf8(val) => {
4436 eq_array_primitive!(array, index, as_string_array, val)?
4437 }
4438 ScalarValue::Utf8View(val) => {
4439 eq_array_primitive!(array, index, as_string_view_array, val)?
4440 }
4441 ScalarValue::LargeUtf8(val) => {
4442 eq_array_primitive!(array, index, as_large_string_array, val)?
4443 }
4444 ScalarValue::Binary(val) => {
4445 eq_array_primitive!(array, index, as_binary_array, val)?
4446 }
4447 ScalarValue::BinaryView(val) => {
4448 eq_array_primitive!(array, index, as_binary_view_array, val)?
4449 }
4450 ScalarValue::FixedSizeBinary(_, val) => {
4451 eq_array_primitive!(array, index, as_fixed_size_binary_array, val)?
4452 }
4453 ScalarValue::LargeBinary(val) => {
4454 eq_array_primitive!(array, index, as_large_binary_array, val)?
4455 }
4456 ScalarValue::List(arr) => {
4457 Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
4458 }
4459 ScalarValue::LargeList(arr) => {
4460 Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
4461 }
4462 ScalarValue::FixedSizeList(arr) => {
4463 Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
4464 }
4465 ScalarValue::ListView(arr) => {
4466 Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
4467 }
4468 ScalarValue::LargeListView(arr) => {
4469 Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
4470 }
4471 ScalarValue::Struct(arr) => {
4472 Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
4473 }
4474 ScalarValue::Map(arr) => {
4475 Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
4476 }
4477 ScalarValue::Date32(val) => {
4478 eq_array_primitive!(array, index, as_date32_array, val)?
4479 }
4480 ScalarValue::Date64(val) => {
4481 eq_array_primitive!(array, index, as_date64_array, val)?
4482 }
4483 ScalarValue::Time32Second(val) => {
4484 eq_array_primitive!(array, index, as_time32_second_array, val)?
4485 }
4486 ScalarValue::Time32Millisecond(val) => {
4487 eq_array_primitive!(array, index, as_time32_millisecond_array, val)?
4488 }
4489 ScalarValue::Time64Microsecond(val) => {
4490 eq_array_primitive!(array, index, as_time64_microsecond_array, val)?
4491 }
4492 ScalarValue::Time64Nanosecond(val) => {
4493 eq_array_primitive!(array, index, as_time64_nanosecond_array, val)?
4494 }
4495 ScalarValue::TimestampSecond(val, _) => {
4496 eq_array_primitive!(array, index, as_timestamp_second_array, val)?
4497 }
4498 ScalarValue::TimestampMillisecond(val, _) => {
4499 eq_array_primitive!(array, index, as_timestamp_millisecond_array, val)?
4500 }
4501 ScalarValue::TimestampMicrosecond(val, _) => {
4502 eq_array_primitive!(array, index, as_timestamp_microsecond_array, val)?
4503 }
4504 ScalarValue::TimestampNanosecond(val, _) => {
4505 eq_array_primitive!(array, index, as_timestamp_nanosecond_array, val)?
4506 }
4507 ScalarValue::IntervalYearMonth(val) => {
4508 eq_array_primitive!(array, index, as_interval_ym_array, val)?
4509 }
4510 ScalarValue::IntervalDayTime(val) => {
4511 eq_array_primitive!(array, index, as_interval_dt_array, val)?
4512 }
4513 ScalarValue::IntervalMonthDayNano(val) => {
4514 eq_array_primitive!(array, index, as_interval_mdn_array, val)?
4515 }
4516 ScalarValue::DurationSecond(val) => {
4517 eq_array_primitive!(array, index, as_duration_second_array, val)?
4518 }
4519 ScalarValue::DurationMillisecond(val) => {
4520 eq_array_primitive!(array, index, as_duration_millisecond_array, val)?
4521 }
4522 ScalarValue::DurationMicrosecond(val) => {
4523 eq_array_primitive!(array, index, as_duration_microsecond_array, val)?
4524 }
4525 ScalarValue::DurationNanosecond(val) => {
4526 eq_array_primitive!(array, index, as_duration_nanosecond_array, val)?
4527 }
4528 ScalarValue::Union(value, _, _) => {
4529 let array = as_union_array(array)?;
4530 let ti = array.type_id(index);
4531 let index = array.value_offset(index);
4532 if let Some((ti_v, value)) = value {
4533 ti_v == &ti && value.eq_array(array.child(ti), index)?
4534 } else {
4535 array.child(ti).is_null(index)
4536 }
4537 }
4538 ScalarValue::Dictionary(key_type, v) => {
4539 let (values_array, values_index) = match key_type.as_ref() {
4540 DataType::Int8 => get_dict_value::<Int8Type>(array, index)?,
4541 DataType::Int16 => get_dict_value::<Int16Type>(array, index)?,
4542 DataType::Int32 => get_dict_value::<Int32Type>(array, index)?,
4543 DataType::Int64 => get_dict_value::<Int64Type>(array, index)?,
4544 DataType::UInt8 => get_dict_value::<UInt8Type>(array, index)?,
4545 DataType::UInt16 => get_dict_value::<UInt16Type>(array, index)?,
4546 DataType::UInt32 => get_dict_value::<UInt32Type>(array, index)?,
4547 DataType::UInt64 => get_dict_value::<UInt64Type>(array, index)?,
4548 _ => unreachable!("Invalid dictionary keys type: {}", key_type),
4549 };
4550 match values_index {
4552 Some(values_index) => v.eq_array(values_array, values_index)?,
4553 None => v.is_null(),
4554 }
4555 }
4556 ScalarValue::RunEndEncoded(run_ends_field, _, value) => {
4557 if index > array.len() {
4560 return _exec_err!(
4561 "Index {index} out of bounds for array of length {}",
4562 array.len()
4563 );
4564 }
4565 match run_ends_field.data_type() {
4566 DataType::Int16 => {
4567 let array = as_run_array::<Int16Type>(array)?;
4568 let index = array.get_physical_index(index);
4569 value.eq_array(array.values(), index)?
4570 }
4571 DataType::Int32 => {
4572 let array = as_run_array::<Int32Type>(array)?;
4573 let index = array.get_physical_index(index);
4574 value.eq_array(array.values(), index)?
4575 }
4576 DataType::Int64 => {
4577 let array = as_run_array::<Int64Type>(array)?;
4578 let index = array.get_physical_index(index);
4579 value.eq_array(array.values(), index)?
4580 }
4581 dt => unreachable!("Invalid run-ends type: {dt}"),
4582 }
4583 }
4584 ScalarValue::Null => array.is_null(index),
4585 })
4586 }
4587
4588 fn eq_array_list(arr1: &ArrayRef, arr2: &ArrayRef, index: usize) -> bool {
4589 let right = arr2.slice(index, 1);
4590 arr1 == &right
4591 }
4592
4593 pub fn try_cmp(&self, other: &Self) -> Result<Ordering> {
4598 self.partial_cmp(other).ok_or_else(|| {
4599 _internal_datafusion_err!("Uncomparable values: {self:?}, {other:?}")
4600 })
4601 }
4602
4603 pub fn size(&self) -> usize {
4606 size_of_val(self)
4607 + match self {
4608 ScalarValue::Null
4609 | ScalarValue::Boolean(_)
4610 | ScalarValue::Float16(_)
4611 | ScalarValue::Float32(_)
4612 | ScalarValue::Float64(_)
4613 | ScalarValue::Decimal32(_, _, _)
4614 | ScalarValue::Decimal64(_, _, _)
4615 | ScalarValue::Decimal128(_, _, _)
4616 | ScalarValue::Decimal256(_, _, _)
4617 | ScalarValue::Int8(_)
4618 | ScalarValue::Int16(_)
4619 | ScalarValue::Int32(_)
4620 | ScalarValue::Int64(_)
4621 | ScalarValue::UInt8(_)
4622 | ScalarValue::UInt16(_)
4623 | ScalarValue::UInt32(_)
4624 | ScalarValue::UInt64(_)
4625 | ScalarValue::Date32(_)
4626 | ScalarValue::Date64(_)
4627 | ScalarValue::Time32Second(_)
4628 | ScalarValue::Time32Millisecond(_)
4629 | ScalarValue::Time64Microsecond(_)
4630 | ScalarValue::Time64Nanosecond(_)
4631 | ScalarValue::IntervalYearMonth(_)
4632 | ScalarValue::IntervalDayTime(_)
4633 | ScalarValue::IntervalMonthDayNano(_)
4634 | ScalarValue::DurationSecond(_)
4635 | ScalarValue::DurationMillisecond(_)
4636 | ScalarValue::DurationMicrosecond(_)
4637 | ScalarValue::DurationNanosecond(_) => 0,
4638 ScalarValue::Utf8(s)
4639 | ScalarValue::LargeUtf8(s)
4640 | ScalarValue::Utf8View(s) => {
4641 s.as_ref().map(|s| s.capacity()).unwrap_or_default()
4642 }
4643 ScalarValue::TimestampSecond(_, s)
4644 | ScalarValue::TimestampMillisecond(_, s)
4645 | ScalarValue::TimestampMicrosecond(_, s)
4646 | ScalarValue::TimestampNanosecond(_, s) => {
4647 s.as_ref().map(|s| s.len()).unwrap_or_default()
4648 }
4649 ScalarValue::Binary(b)
4650 | ScalarValue::FixedSizeBinary(_, b)
4651 | ScalarValue::LargeBinary(b)
4652 | ScalarValue::BinaryView(b) => {
4653 b.as_ref().map(|b| b.capacity()).unwrap_or_default()
4654 }
4655 ScalarValue::List(arr) => arr.get_array_memory_size(),
4656 ScalarValue::LargeList(arr) => arr.get_array_memory_size(),
4657 ScalarValue::FixedSizeList(arr) => arr.get_array_memory_size(),
4658 ScalarValue::ListView(arr) => arr.get_array_memory_size(),
4659 ScalarValue::LargeListView(arr) => arr.get_array_memory_size(),
4660 ScalarValue::Struct(arr) => arr.get_array_memory_size(),
4661 ScalarValue::Map(arr) => arr.get_array_memory_size(),
4662 ScalarValue::Union(vals, fields, _mode) => {
4663 vals.as_ref()
4664 .map(|(_id, sv)| sv.size() - size_of_val(sv))
4665 .unwrap_or_default()
4666 + size_of_val(fields)
4668 + (size_of::<Field>() * fields.len())
4669 + fields.iter().map(|(_idx, field)| field.size() - size_of_val(field)).sum::<usize>()
4670 }
4671 ScalarValue::Dictionary(dt, sv) => {
4672 dt.size() + sv.size()
4674 }
4675 ScalarValue::RunEndEncoded(rf, vf, v) => rf.size() + vf.size() + v.size(),
4676 }
4677 }
4678
4679 pub fn size_of_vec(vec: &Vec<Self>) -> usize {
4683 size_of_val(vec)
4684 + (size_of::<ScalarValue>() * vec.capacity())
4685 + vec
4686 .iter()
4687 .map(|sv| sv.size() - size_of_val(sv))
4688 .sum::<usize>()
4689 }
4690
4691 pub fn size_of_vec_deque(vec_deque: &VecDeque<Self>) -> usize {
4695 size_of_val(vec_deque)
4696 + (size_of::<ScalarValue>() * vec_deque.capacity())
4697 + vec_deque
4698 .iter()
4699 .map(|sv| sv.size() - size_of_val(sv))
4700 .sum::<usize>()
4701 }
4702
4703 #[allow(clippy::allow_attributes, clippy::mutable_key_type)] pub fn size_of_hashset<S>(set: &HashSet<Self, S>) -> usize {
4708 size_of_val(set)
4709 + (size_of::<ScalarValue>() * set.capacity())
4710 + set
4711 .iter()
4712 .map(|sv| sv.size() - size_of_val(sv))
4713 .sum::<usize>()
4714 }
4715
4716 pub fn compact(&mut self) {
4722 macro_rules! compact_array {
4724 ($arr:expr, $from_type:ty, $($as_method:tt)+) => {
4725 *Arc::make_mut($arr) = ScalarValue::compact_view_buffers(
4726 Arc::new(<$from_type>::from(copy_array_data(&$arr.to_data()))) as ArrayRef,
4727 ).$($as_method)+.clone()
4728 };
4729 }
4730 match self {
4731 ScalarValue::Null
4732 | ScalarValue::Boolean(_)
4733 | ScalarValue::Float16(_)
4734 | ScalarValue::Float32(_)
4735 | ScalarValue::Float64(_)
4736 | ScalarValue::Decimal32(_, _, _)
4737 | ScalarValue::Decimal64(_, _, _)
4738 | ScalarValue::Decimal128(_, _, _)
4739 | ScalarValue::Decimal256(_, _, _)
4740 | ScalarValue::Int8(_)
4741 | ScalarValue::Int16(_)
4742 | ScalarValue::Int32(_)
4743 | ScalarValue::Int64(_)
4744 | ScalarValue::UInt8(_)
4745 | ScalarValue::UInt16(_)
4746 | ScalarValue::UInt32(_)
4747 | ScalarValue::UInt64(_)
4748 | ScalarValue::Date32(_)
4749 | ScalarValue::Date64(_)
4750 | ScalarValue::Time32Second(_)
4751 | ScalarValue::Time32Millisecond(_)
4752 | ScalarValue::Time64Microsecond(_)
4753 | ScalarValue::Time64Nanosecond(_)
4754 | ScalarValue::IntervalYearMonth(_)
4755 | ScalarValue::IntervalDayTime(_)
4756 | ScalarValue::IntervalMonthDayNano(_)
4757 | ScalarValue::DurationSecond(_)
4758 | ScalarValue::DurationMillisecond(_)
4759 | ScalarValue::DurationMicrosecond(_)
4760 | ScalarValue::DurationNanosecond(_)
4761 | ScalarValue::Utf8(_)
4762 | ScalarValue::LargeUtf8(_)
4763 | ScalarValue::Utf8View(_)
4764 | ScalarValue::TimestampSecond(_, _)
4765 | ScalarValue::TimestampMillisecond(_, _)
4766 | ScalarValue::TimestampMicrosecond(_, _)
4767 | ScalarValue::TimestampNanosecond(_, _)
4768 | ScalarValue::Binary(_)
4769 | ScalarValue::FixedSizeBinary(_, _)
4770 | ScalarValue::LargeBinary(_)
4771 | ScalarValue::BinaryView(_) => (),
4772 ScalarValue::FixedSizeList(arr) => {
4773 compact_array!(arr, FixedSizeListArray, as_fixed_size_list())
4774 }
4775 ScalarValue::List(arr) => compact_array!(arr, ListArray, as_list::<i32>()),
4776 ScalarValue::LargeList(arr) => {
4777 compact_array!(arr, LargeListArray, as_list::<i64>())
4778 }
4779 ScalarValue::ListView(arr) => {
4780 compact_array!(arr, ListViewArray, as_list_view::<i32>())
4781 }
4782 ScalarValue::LargeListView(arr) => {
4783 compact_array!(arr, LargeListViewArray, as_list_view::<i64>())
4784 }
4785 ScalarValue::Struct(arr) => compact_array!(arr, StructArray, as_struct()),
4786 ScalarValue::Map(arr) => compact_array!(arr, MapArray, as_map()),
4787 ScalarValue::Union(val, _, _) => {
4788 if let Some((_, value)) = val.as_mut() {
4789 value.compact();
4790 }
4791 }
4792 ScalarValue::Dictionary(_, value) => {
4793 value.compact();
4794 }
4795 ScalarValue::RunEndEncoded(_, _, value) => {
4796 value.compact();
4797 }
4798 }
4799 }
4800
4801 pub fn compacted(mut self) -> Self {
4803 self.compact();
4804 self
4805 }
4806
4807 fn compact_view_buffers(array: ArrayRef) -> ArrayRef {
4821 macro_rules! gc_list {
4823 ($field:expr, $offset_type:ty, $array_type:ty) => {{
4824 let list = array.as_list::<$offset_type>();
4825 Arc::new(<$array_type>::new(
4826 Arc::clone($field),
4827 list.offsets().clone(),
4828 ScalarValue::compact_view_buffers(Arc::clone(list.values())),
4829 list.nulls().cloned(),
4830 )) as ArrayRef
4831 }};
4832 }
4833 macro_rules! gc_list_view {
4835 ($field:expr, $offset_type:ty, $array_type:ty) => {{
4836 let list = array.as_list_view::<$offset_type>();
4837 Arc::new(<$array_type>::new(
4838 Arc::clone($field),
4839 list.offsets().clone(),
4840 list.sizes().clone(),
4841 ScalarValue::compact_view_buffers(Arc::clone(list.values())),
4842 list.nulls().cloned(),
4843 )) as ArrayRef
4844 }};
4845 }
4846
4847 match array.data_type() {
4848 DataType::Utf8View => Arc::new(array.as_string_view().gc()),
4849 DataType::BinaryView => Arc::new(array.as_binary_view().gc()),
4850 DataType::Struct(_) => {
4851 let s = array.as_struct();
4852 let columns = s
4853 .columns()
4854 .iter()
4855 .map(|c| ScalarValue::compact_view_buffers(Arc::clone(c)))
4856 .collect();
4857 Arc::new(StructArray::new(
4858 s.fields().clone(),
4859 columns,
4860 s.nulls().cloned(),
4861 ))
4862 }
4863 DataType::List(field) => gc_list!(field, i32, ListArray),
4864 DataType::LargeList(field) => gc_list!(field, i64, LargeListArray),
4865 DataType::FixedSizeList(field, size) => {
4866 let list = array.as_fixed_size_list();
4867 Arc::new(FixedSizeListArray::new(
4868 Arc::clone(field),
4869 *size,
4870 ScalarValue::compact_view_buffers(Arc::clone(list.values())),
4871 list.nulls().cloned(),
4872 ))
4873 }
4874 DataType::ListView(field) => gc_list_view!(field, i32, ListViewArray),
4875 DataType::LargeListView(field) => {
4876 gc_list_view!(field, i64, LargeListViewArray)
4877 }
4878 DataType::Map(field, ordered) => {
4879 let map = array.as_map();
4880 let entries = ScalarValue::compact_view_buffers(Arc::new(
4881 map.entries().clone(),
4882 )
4883 as ArrayRef);
4884 Arc::new(MapArray::new(
4885 Arc::clone(field),
4886 map.offsets().clone(),
4887 entries.as_struct().clone(),
4888 map.nulls().cloned(),
4889 *ordered,
4890 ))
4891 }
4892 _ => array,
4893 }
4894 }
4895
4896 pub fn min(datatype: &DataType) -> Option<ScalarValue> {
4911 match datatype {
4912 DataType::Int8 => Some(ScalarValue::Int8(Some(i8::MIN))),
4913 DataType::Int16 => Some(ScalarValue::Int16(Some(i16::MIN))),
4914 DataType::Int32 => Some(ScalarValue::Int32(Some(i32::MIN))),
4915 DataType::Int64 => Some(ScalarValue::Int64(Some(i64::MIN))),
4916 DataType::UInt8 => Some(ScalarValue::UInt8(Some(u8::MIN))),
4917 DataType::UInt16 => Some(ScalarValue::UInt16(Some(u16::MIN))),
4918 DataType::UInt32 => Some(ScalarValue::UInt32(Some(u32::MIN))),
4919 DataType::UInt64 => Some(ScalarValue::UInt64(Some(u64::MIN))),
4920 DataType::Float16 => Some(ScalarValue::Float16(Some(f16::NEG_INFINITY))),
4921 DataType::Float32 => Some(ScalarValue::Float32(Some(f32::NEG_INFINITY))),
4922 DataType::Float64 => Some(ScalarValue::Float64(Some(f64::NEG_INFINITY))),
4923 DataType::Decimal128(precision, scale) => {
4924 let max_digits = 10_i128.pow(*precision as u32) - 1;
4927 Some(ScalarValue::Decimal128(
4928 Some(-max_digits),
4929 *precision,
4930 *scale,
4931 ))
4932 }
4933 DataType::Decimal256(precision, scale) => {
4934 let max_digits = i256::from_i128(10_i128)
4937 .checked_pow(*precision as u32)
4938 .and_then(|v| v.checked_sub(i256::from_i128(1)))
4939 .unwrap_or(i256::MAX);
4940 Some(ScalarValue::Decimal256(
4941 Some(max_digits.neg_wrapping()),
4942 *precision,
4943 *scale,
4944 ))
4945 }
4946 DataType::Date32 => Some(ScalarValue::Date32(Some(i32::MIN))),
4947 DataType::Date64 => Some(ScalarValue::Date64(Some(i64::MIN))),
4948 DataType::Time32(TimeUnit::Second) => {
4949 Some(ScalarValue::Time32Second(Some(0)))
4950 }
4951 DataType::Time32(TimeUnit::Millisecond) => {
4952 Some(ScalarValue::Time32Millisecond(Some(0)))
4953 }
4954 DataType::Time64(TimeUnit::Microsecond) => {
4955 Some(ScalarValue::Time64Microsecond(Some(0)))
4956 }
4957 DataType::Time64(TimeUnit::Nanosecond) => {
4958 Some(ScalarValue::Time64Nanosecond(Some(0)))
4959 }
4960 DataType::Timestamp(unit, tz) => match unit {
4961 TimeUnit::Second => {
4962 Some(ScalarValue::TimestampSecond(Some(i64::MIN), tz.clone()))
4963 }
4964 TimeUnit::Millisecond => Some(ScalarValue::TimestampMillisecond(
4965 Some(i64::MIN),
4966 tz.clone(),
4967 )),
4968 TimeUnit::Microsecond => Some(ScalarValue::TimestampMicrosecond(
4969 Some(i64::MIN),
4970 tz.clone(),
4971 )),
4972 TimeUnit::Nanosecond => {
4973 Some(ScalarValue::TimestampNanosecond(Some(i64::MIN), tz.clone()))
4974 }
4975 },
4976 DataType::Duration(unit) => match unit {
4977 TimeUnit::Second => Some(ScalarValue::DurationSecond(Some(i64::MIN))),
4978 TimeUnit::Millisecond => {
4979 Some(ScalarValue::DurationMillisecond(Some(i64::MIN)))
4980 }
4981 TimeUnit::Microsecond => {
4982 Some(ScalarValue::DurationMicrosecond(Some(i64::MIN)))
4983 }
4984 TimeUnit::Nanosecond => {
4985 Some(ScalarValue::DurationNanosecond(Some(i64::MIN)))
4986 }
4987 },
4988 _ => None,
4989 }
4990 }
4991
4992 pub fn max(datatype: &DataType) -> Option<ScalarValue> {
5007 match datatype {
5008 DataType::Int8 => Some(ScalarValue::Int8(Some(i8::MAX))),
5009 DataType::Int16 => Some(ScalarValue::Int16(Some(i16::MAX))),
5010 DataType::Int32 => Some(ScalarValue::Int32(Some(i32::MAX))),
5011 DataType::Int64 => Some(ScalarValue::Int64(Some(i64::MAX))),
5012 DataType::UInt8 => Some(ScalarValue::UInt8(Some(u8::MAX))),
5013 DataType::UInt16 => Some(ScalarValue::UInt16(Some(u16::MAX))),
5014 DataType::UInt32 => Some(ScalarValue::UInt32(Some(u32::MAX))),
5015 DataType::UInt64 => Some(ScalarValue::UInt64(Some(u64::MAX))),
5016 DataType::Float16 => Some(ScalarValue::Float16(Some(f16::INFINITY))),
5017 DataType::Float32 => Some(ScalarValue::Float32(Some(f32::INFINITY))),
5018 DataType::Float64 => Some(ScalarValue::Float64(Some(f64::INFINITY))),
5019 DataType::Decimal128(precision, scale) => {
5020 let max_digits = 10_i128.pow(*precision as u32) - 1;
5023 Some(ScalarValue::Decimal128(
5024 Some(max_digits),
5025 *precision,
5026 *scale,
5027 ))
5028 }
5029 DataType::Decimal256(precision, scale) => {
5030 let max_digits = i256::from_i128(10_i128)
5032 .checked_pow(*precision as u32)
5033 .and_then(|v| v.checked_sub(i256::from_i128(1)))
5034 .unwrap_or(i256::MAX);
5035 Some(ScalarValue::Decimal256(
5036 Some(max_digits),
5037 *precision,
5038 *scale,
5039 ))
5040 }
5041 DataType::Date32 => Some(ScalarValue::Date32(Some(i32::MAX))),
5042 DataType::Date64 => Some(ScalarValue::Date64(Some(i64::MAX))),
5043 DataType::Time32(TimeUnit::Second) => {
5044 Some(ScalarValue::Time32Second(Some(86_399)))
5046 }
5047 DataType::Time32(TimeUnit::Millisecond) => {
5048 Some(ScalarValue::Time32Millisecond(Some(86_399_999)))
5050 }
5051 DataType::Time64(TimeUnit::Microsecond) => {
5052 Some(ScalarValue::Time64Microsecond(Some(86_399_999_999)))
5054 }
5055 DataType::Time64(TimeUnit::Nanosecond) => {
5056 Some(ScalarValue::Time64Nanosecond(Some(86_399_999_999_999)))
5058 }
5059 DataType::Timestamp(unit, tz) => match unit {
5060 TimeUnit::Second => {
5061 Some(ScalarValue::TimestampSecond(Some(i64::MAX), tz.clone()))
5062 }
5063 TimeUnit::Millisecond => Some(ScalarValue::TimestampMillisecond(
5064 Some(i64::MAX),
5065 tz.clone(),
5066 )),
5067 TimeUnit::Microsecond => Some(ScalarValue::TimestampMicrosecond(
5068 Some(i64::MAX),
5069 tz.clone(),
5070 )),
5071 TimeUnit::Nanosecond => {
5072 Some(ScalarValue::TimestampNanosecond(Some(i64::MAX), tz.clone()))
5073 }
5074 },
5075 DataType::Duration(unit) => match unit {
5076 TimeUnit::Second => Some(ScalarValue::DurationSecond(Some(i64::MAX))),
5077 TimeUnit::Millisecond => {
5078 Some(ScalarValue::DurationMillisecond(Some(i64::MAX)))
5079 }
5080 TimeUnit::Microsecond => {
5081 Some(ScalarValue::DurationMicrosecond(Some(i64::MAX)))
5082 }
5083 TimeUnit::Nanosecond => {
5084 Some(ScalarValue::DurationNanosecond(Some(i64::MAX)))
5085 }
5086 },
5087 _ => None,
5088 }
5089 }
5090
5091 fn validate_decimal_or_internal_err<T: DecimalType>(
5094 precision: u8,
5095 scale: i8,
5096 ) -> Result<()> {
5097 validate_decimal_precision_and_scale::<T>(precision, scale).map_err(|err| {
5098 _internal_datafusion_err!(
5099 "Decimal precision/scale invariant violated \
5100 (precision={precision}, scale={scale}): {err}"
5101 )
5102 })
5103 }
5104}
5105
5106pub fn copy_array_data(src_data: &ArrayData) -> ArrayData {
5134 let mut copy = MutableArrayData::new(vec![&src_data], true, src_data.len());
5135 copy.extend(0, 0, src_data.len());
5136 copy.freeze()
5137}
5138
5139macro_rules! impl_scalar {
5140 ($ty:ty, $scalar:tt) => {
5141 impl From<$ty> for ScalarValue {
5142 fn from(value: $ty) -> Self {
5143 ScalarValue::$scalar(Some(value))
5144 }
5145 }
5146
5147 impl From<Option<$ty>> for ScalarValue {
5148 fn from(value: Option<$ty>) -> Self {
5149 ScalarValue::$scalar(value)
5150 }
5151 }
5152 };
5153}
5154
5155impl_scalar!(f64, Float64);
5156impl_scalar!(f32, Float32);
5157impl_scalar!(f16, Float16);
5158impl_scalar!(i8, Int8);
5159impl_scalar!(i16, Int16);
5160impl_scalar!(i32, Int32);
5161impl_scalar!(i64, Int64);
5162impl_scalar!(bool, Boolean);
5163impl_scalar!(u8, UInt8);
5164impl_scalar!(u16, UInt16);
5165impl_scalar!(u32, UInt32);
5166impl_scalar!(u64, UInt64);
5167
5168impl From<&str> for ScalarValue {
5169 fn from(value: &str) -> Self {
5170 Some(value).into()
5171 }
5172}
5173
5174impl From<Option<&str>> for ScalarValue {
5175 fn from(value: Option<&str>) -> Self {
5176 let value = value.map(|s| s.to_string());
5177 value.into()
5178 }
5179}
5180
5181impl From<Vec<(&str, ScalarValue)>> for ScalarValue {
5183 fn from(value: Vec<(&str, ScalarValue)>) -> Self {
5184 value
5185 .into_iter()
5186 .fold(ScalarStructBuilder::new(), |builder, (name, value)| {
5187 builder.with_name_and_scalar(name, value)
5188 })
5189 .build()
5190 .unwrap()
5191 }
5192}
5193
5194impl FromStr for ScalarValue {
5195 type Err = Infallible;
5196
5197 fn from_str(s: &str) -> Result<Self, Self::Err> {
5198 Ok(s.into())
5199 }
5200}
5201
5202impl From<String> for ScalarValue {
5203 fn from(value: String) -> Self {
5204 Some(value).into()
5205 }
5206}
5207
5208impl From<Option<String>> for ScalarValue {
5209 fn from(value: Option<String>) -> Self {
5210 ScalarValue::Utf8(value)
5211 }
5212}
5213
5214macro_rules! impl_try_from {
5215 ($SCALAR:ident, $NATIVE:ident) => {
5216 impl TryFrom<ScalarValue> for $NATIVE {
5217 type Error = DataFusionError;
5218
5219 fn try_from(value: ScalarValue) -> Result<Self> {
5220 match value {
5221 ScalarValue::$SCALAR(Some(inner_value)) => Ok(inner_value),
5222 _ => _internal_err!(
5223 "Cannot convert {:?} to {}",
5224 value,
5225 std::any::type_name::<Self>()
5226 ),
5227 }
5228 }
5229 }
5230 };
5231}
5232
5233impl_try_from!(Int8, i8);
5234impl_try_from!(Int16, i16);
5235
5236impl TryFrom<ScalarValue> for i32 {
5238 type Error = DataFusionError;
5239
5240 fn try_from(value: ScalarValue) -> Result<Self> {
5241 match value {
5242 ScalarValue::Int32(Some(inner_value))
5243 | ScalarValue::Date32(Some(inner_value))
5244 | ScalarValue::Time32Second(Some(inner_value))
5245 | ScalarValue::Time32Millisecond(Some(inner_value)) => Ok(inner_value),
5246 _ => _internal_err!(
5247 "Cannot convert {:?} to {}",
5248 value,
5249 std::any::type_name::<Self>()
5250 ),
5251 }
5252 }
5253}
5254
5255impl TryFrom<ScalarValue> for i64 {
5257 type Error = DataFusionError;
5258
5259 fn try_from(value: ScalarValue) -> Result<Self> {
5260 match value {
5261 ScalarValue::Int64(Some(inner_value))
5262 | ScalarValue::Date64(Some(inner_value))
5263 | ScalarValue::Time64Microsecond(Some(inner_value))
5264 | ScalarValue::Time64Nanosecond(Some(inner_value))
5265 | ScalarValue::TimestampNanosecond(Some(inner_value), _)
5266 | ScalarValue::TimestampMicrosecond(Some(inner_value), _)
5267 | ScalarValue::TimestampMillisecond(Some(inner_value), _)
5268 | ScalarValue::TimestampSecond(Some(inner_value), _) => Ok(inner_value),
5269 _ => _internal_err!(
5270 "Cannot convert {:?} to {}",
5271 value,
5272 std::any::type_name::<Self>()
5273 ),
5274 }
5275 }
5276}
5277
5278impl TryFrom<ScalarValue> for i128 {
5280 type Error = DataFusionError;
5281
5282 fn try_from(value: ScalarValue) -> Result<Self> {
5283 match value {
5284 ScalarValue::Decimal128(Some(inner_value), _, _) => Ok(inner_value),
5285 _ => _internal_err!(
5286 "Cannot convert {:?} to {}",
5287 value,
5288 std::any::type_name::<Self>()
5289 ),
5290 }
5291 }
5292}
5293
5294impl TryFrom<ScalarValue> for i256 {
5296 type Error = DataFusionError;
5297
5298 fn try_from(value: ScalarValue) -> Result<Self> {
5299 match value {
5300 ScalarValue::Decimal256(Some(inner_value), _, _) => Ok(inner_value),
5301 _ => _internal_err!(
5302 "Cannot convert {:?} to {}",
5303 value,
5304 std::any::type_name::<Self>()
5305 ),
5306 }
5307 }
5308}
5309
5310impl_try_from!(UInt8, u8);
5311impl_try_from!(UInt16, u16);
5312impl_try_from!(UInt32, u32);
5313impl_try_from!(UInt64, u64);
5314impl_try_from!(Float16, f16);
5315impl_try_from!(Float32, f32);
5316impl_try_from!(Float64, f64);
5317impl_try_from!(Boolean, bool);
5318
5319impl TryFrom<DataType> for ScalarValue {
5320 type Error = DataFusionError;
5321
5322 fn try_from(datatype: DataType) -> Result<Self> {
5324 (&datatype).try_into()
5325 }
5326}
5327
5328impl TryFrom<&DataType> for ScalarValue {
5329 type Error = DataFusionError;
5330
5331 fn try_from(data_type: &DataType) -> Result<Self> {
5333 Self::try_new_null(data_type)
5334 }
5335}
5336
5337macro_rules! format_option {
5338 ($F:expr, $EXPR:expr) => {{
5339 match $EXPR {
5340 Some(e) => write!($F, "{e}"),
5341 None => write!($F, "NULL"),
5342 }
5343 }};
5344}
5345
5346impl fmt::Display for ScalarValue {
5352 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
5353 match self {
5354 ScalarValue::Decimal32(v, p, s) => {
5355 write!(f, "{v:?},{p:?},{s:?}")?;
5356 }
5357 ScalarValue::Decimal64(v, p, s) => {
5358 write!(f, "{v:?},{p:?},{s:?}")?;
5359 }
5360 ScalarValue::Decimal128(v, p, s) => {
5361 write!(f, "{v:?},{p:?},{s:?}")?;
5362 }
5363 ScalarValue::Decimal256(v, p, s) => {
5364 write!(f, "{v:?},{p:?},{s:?}")?;
5365 }
5366 ScalarValue::Boolean(e) => format_option!(f, e)?,
5367 ScalarValue::Float16(e) => format_option!(f, e)?,
5368 ScalarValue::Float32(e) => format_option!(f, e)?,
5369 ScalarValue::Float64(e) => format_option!(f, e)?,
5370 ScalarValue::Int8(e) => format_option!(f, e)?,
5371 ScalarValue::Int16(e) => format_option!(f, e)?,
5372 ScalarValue::Int32(e) => format_option!(f, e)?,
5373 ScalarValue::Int64(e) => format_option!(f, e)?,
5374 ScalarValue::UInt8(e) => format_option!(f, e)?,
5375 ScalarValue::UInt16(e) => format_option!(f, e)?,
5376 ScalarValue::UInt32(e) => format_option!(f, e)?,
5377 ScalarValue::UInt64(e) => format_option!(f, e)?,
5378 ScalarValue::TimestampSecond(e, _) => format_option!(f, e)?,
5379 ScalarValue::TimestampMillisecond(e, _) => format_option!(f, e)?,
5380 ScalarValue::TimestampMicrosecond(e, _) => format_option!(f, e)?,
5381 ScalarValue::TimestampNanosecond(e, _) => format_option!(f, e)?,
5382 ScalarValue::Utf8(e)
5383 | ScalarValue::LargeUtf8(e)
5384 | ScalarValue::Utf8View(e) => format_option!(f, e)?,
5385 ScalarValue::Binary(e)
5386 | ScalarValue::FixedSizeBinary(_, e)
5387 | ScalarValue::LargeBinary(e)
5388 | ScalarValue::BinaryView(e) => match e {
5389 Some(bytes) => {
5390 const HEX_CHARS_UPPER: &[u8; 16] = b"0123456789ABCDEF";
5392 for b in bytes.iter().take(10) {
5393 f.write_char(HEX_CHARS_UPPER[(b >> 4) as usize] as char)?;
5394 f.write_char(HEX_CHARS_UPPER[(b & 0x0f) as usize] as char)?;
5395 }
5396 if bytes.len() > 10 {
5397 write!(f, "...")?;
5398 }
5399 }
5400 None => write!(f, "NULL")?,
5401 },
5402 ScalarValue::List(arr) => fmt_list(arr.as_ref(), f)?,
5403 ScalarValue::LargeList(arr) => fmt_list(arr.as_ref(), f)?,
5404 ScalarValue::FixedSizeList(arr) => fmt_list(arr.as_ref(), f)?,
5405 ScalarValue::ListView(arr) => fmt_list(arr.as_ref(), f)?,
5406 ScalarValue::LargeListView(arr) => fmt_list(arr.as_ref(), f)?,
5407 ScalarValue::Date32(e) => format_option!(
5408 f,
5409 e.map(|v| {
5410 let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
5411 match epoch.checked_add_signed(Duration::try_days(v as i64).unwrap())
5412 {
5413 Some(date) => date.to_string(),
5414 None => "".to_string(),
5415 }
5416 })
5417 )?,
5418 ScalarValue::Date64(e) => format_option!(
5419 f,
5420 e.map(|v| {
5421 let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
5422 match epoch.checked_add_signed(Duration::try_milliseconds(v).unwrap())
5423 {
5424 Some(date) => date.to_string(),
5425 None => "".to_string(),
5426 }
5427 })
5428 )?,
5429 ScalarValue::Time32Second(e) => format_option!(f, e)?,
5430 ScalarValue::Time32Millisecond(e) => format_option!(f, e)?,
5431 ScalarValue::Time64Microsecond(e) => format_option!(f, e)?,
5432 ScalarValue::Time64Nanosecond(e) => format_option!(f, e)?,
5433 ScalarValue::IntervalYearMonth(e) => format_option!(f, e)?,
5434 ScalarValue::IntervalMonthDayNano(e) => {
5435 format_option!(f, e.map(|v| format!("{v:?}")))?
5436 }
5437 ScalarValue::IntervalDayTime(e) => {
5438 format_option!(f, e.map(|v| format!("{v:?}")))?;
5439 }
5440 ScalarValue::DurationSecond(e) => format_option!(f, e)?,
5441 ScalarValue::DurationMillisecond(e) => format_option!(f, e)?,
5442 ScalarValue::DurationMicrosecond(e) => format_option!(f, e)?,
5443 ScalarValue::DurationNanosecond(e) => format_option!(f, e)?,
5444 ScalarValue::Struct(struct_arr) => {
5445 assert_eq!(struct_arr.len(), 1);
5447
5448 if struct_arr.null_count() == struct_arr.len() {
5449 write!(f, "NULL")?;
5450 return Ok(());
5451 }
5452
5453 let columns = struct_arr.columns();
5454 let fields = struct_arr.fields();
5455 let nulls = struct_arr.nulls();
5456
5457 write!(
5458 f,
5459 "{{{}}}",
5460 columns
5461 .iter()
5462 .zip(fields.iter())
5463 .map(|(column, field)| {
5464 if nulls.is_some_and(|b| b.is_null(0)) {
5465 format!("{}:NULL", field.name())
5466 } else if let DataType::Struct(_) = field.data_type() {
5467 let sv = ScalarValue::Struct(Arc::new(
5468 column.as_struct().to_owned(),
5469 ));
5470 format!("{}:{sv}", field.name())
5471 } else {
5472 let sv = array_value_to_string(column, 0).unwrap();
5473 format!("{}:{sv}", field.name())
5474 }
5475 })
5476 .collect::<Vec<_>>()
5477 .join(",")
5478 )?
5479 }
5480 ScalarValue::Map(map_arr) => {
5481 if map_arr.null_count() == map_arr.len() {
5482 write!(f, "NULL")?;
5483 return Ok(());
5484 }
5485
5486 write!(
5487 f,
5488 "[{}]",
5489 map_arr
5490 .iter()
5491 .map(|struct_array| {
5492 if let Some(arr) = struct_array {
5493 let mut buffer = VecDeque::new();
5494 for i in 0..arr.len() {
5495 let key =
5496 array_value_to_string(arr.column(0), i).unwrap();
5497 let value =
5498 array_value_to_string(arr.column(1), i).unwrap();
5499 buffer.push_back(format!("{key}:{value}"));
5500 }
5501 format!(
5502 "{{{}}}",
5503 buffer
5504 .into_iter()
5505 .collect::<Vec<_>>()
5506 .join(",")
5507 .as_str()
5508 )
5509 } else {
5510 "NULL".to_string()
5511 }
5512 })
5513 .collect::<Vec<_>>()
5514 .join(",")
5515 )?
5516 }
5517 ScalarValue::Union(val, _fields, _mode) => match val {
5518 Some((id, val)) => write!(f, "{id}:{val}")?,
5519 None => write!(f, "NULL")?,
5520 },
5521 ScalarValue::Dictionary(_k, v) => write!(f, "{v}")?,
5522 ScalarValue::RunEndEncoded(_, _, v) => write!(f, "{v}")?,
5523 ScalarValue::Null => write!(f, "NULL")?,
5524 };
5525 Ok(())
5526 }
5527}
5528
5529fn fmt_list(arr: &dyn Array, f: &mut fmt::Formatter) -> fmt::Result {
5530 assert_eq!(arr.len(), 1);
5532 let options = FormatOptions::default().with_display_error(true);
5533 let formatter = ArrayFormatter::try_new(arr, &options).unwrap();
5534 let value_formatter = formatter.value(0);
5535 write!(f, "{value_formatter}")
5536}
5537
5538fn fmt_binary(data: &[u8], f: &mut fmt::Formatter) -> fmt::Result {
5540 let mut iter = data.iter();
5541 if let Some(b) = iter.next() {
5542 write!(f, "{b}")?;
5543 }
5544 for b in iter {
5545 write!(f, ",{b}")?;
5546 }
5547 Ok(())
5548}
5549
5550impl fmt::Debug for ScalarValue {
5551 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
5552 match self {
5553 ScalarValue::Decimal32(_, _, _) => write!(f, "Decimal32({self})"),
5554 ScalarValue::Decimal64(_, _, _) => write!(f, "Decimal64({self})"),
5555 ScalarValue::Decimal128(_, _, _) => write!(f, "Decimal128({self})"),
5556 ScalarValue::Decimal256(_, _, _) => write!(f, "Decimal256({self})"),
5557 ScalarValue::Boolean(_) => write!(f, "Boolean({self})"),
5558 ScalarValue::Float16(_) => write!(f, "Float16({self})"),
5559 ScalarValue::Float32(_) => write!(f, "Float32({self})"),
5560 ScalarValue::Float64(_) => write!(f, "Float64({self})"),
5561 ScalarValue::Int8(_) => write!(f, "Int8({self})"),
5562 ScalarValue::Int16(_) => write!(f, "Int16({self})"),
5563 ScalarValue::Int32(_) => write!(f, "Int32({self})"),
5564 ScalarValue::Int64(_) => write!(f, "Int64({self})"),
5565 ScalarValue::UInt8(_) => write!(f, "UInt8({self})"),
5566 ScalarValue::UInt16(_) => write!(f, "UInt16({self})"),
5567 ScalarValue::UInt32(_) => write!(f, "UInt32({self})"),
5568 ScalarValue::UInt64(_) => write!(f, "UInt64({self})"),
5569 ScalarValue::TimestampSecond(_, tz_opt) => {
5570 write!(f, "TimestampSecond({self}, {tz_opt:?})")
5571 }
5572 ScalarValue::TimestampMillisecond(_, tz_opt) => {
5573 write!(f, "TimestampMillisecond({self}, {tz_opt:?})")
5574 }
5575 ScalarValue::TimestampMicrosecond(_, tz_opt) => {
5576 write!(f, "TimestampMicrosecond({self}, {tz_opt:?})")
5577 }
5578 ScalarValue::TimestampNanosecond(_, tz_opt) => {
5579 write!(f, "TimestampNanosecond({self}, {tz_opt:?})")
5580 }
5581 ScalarValue::Utf8(None) => write!(f, "Utf8({self})"),
5582 ScalarValue::Utf8(Some(_)) => write!(f, "Utf8(\"{self}\")"),
5583 ScalarValue::Utf8View(None) => write!(f, "Utf8View({self})"),
5584 ScalarValue::Utf8View(Some(_)) => write!(f, "Utf8View(\"{self}\")"),
5585 ScalarValue::LargeUtf8(None) => write!(f, "LargeUtf8({self})"),
5586 ScalarValue::LargeUtf8(Some(_)) => write!(f, "LargeUtf8(\"{self}\")"),
5587 ScalarValue::Binary(None) => write!(f, "Binary({self})"),
5588 ScalarValue::Binary(Some(b)) => {
5589 write!(f, "Binary(\"")?;
5590 fmt_binary(b.as_slice(), f)?;
5591 write!(f, "\")")
5592 }
5593 ScalarValue::BinaryView(None) => write!(f, "BinaryView({self})"),
5594 ScalarValue::BinaryView(Some(b)) => {
5595 write!(f, "BinaryView(\"")?;
5596 fmt_binary(b.as_slice(), f)?;
5597 write!(f, "\")")
5598 }
5599 ScalarValue::FixedSizeBinary(size, None) => {
5600 write!(f, "FixedSizeBinary({size}, {self})")
5601 }
5602 ScalarValue::FixedSizeBinary(size, Some(b)) => {
5603 write!(f, "FixedSizeBinary({size}, \"")?;
5604 fmt_binary(b.as_slice(), f)?;
5605 write!(f, "\")")
5606 }
5607 ScalarValue::LargeBinary(None) => write!(f, "LargeBinary({self})"),
5608 ScalarValue::LargeBinary(Some(b)) => {
5609 write!(f, "LargeBinary(\"")?;
5610 fmt_binary(b.as_slice(), f)?;
5611 write!(f, "\")")
5612 }
5613 ScalarValue::FixedSizeList(_) => write!(f, "FixedSizeList({self})"),
5614 ScalarValue::List(_) => write!(f, "List({self})"),
5615 ScalarValue::LargeList(_) => write!(f, "LargeList({self})"),
5616 ScalarValue::ListView(_) => write!(f, "ListView({self})"),
5617 ScalarValue::LargeListView(_) => write!(f, "LargeListView({self})"),
5618 ScalarValue::Struct(struct_arr) => {
5619 assert_eq!(struct_arr.len(), 1);
5621
5622 let columns = struct_arr.columns();
5623 let fields = struct_arr.fields();
5624
5625 write!(
5626 f,
5627 "Struct({{{}}})",
5628 columns
5629 .iter()
5630 .zip(fields.iter())
5631 .map(|(column, field)| {
5632 let sv = array_value_to_string(column, 0).unwrap();
5633 let name = field.name();
5634 format!("{name}:{sv}")
5635 })
5636 .collect::<Vec<_>>()
5637 .join(",")
5638 )
5639 }
5640 ScalarValue::Map(map_arr) => {
5641 write!(
5642 f,
5643 "Map([{}])",
5644 map_arr
5645 .iter()
5646 .map(|struct_array| {
5647 if let Some(arr) = struct_array {
5648 let buffer: Vec<String> = (0..arr.len())
5649 .map(|i| {
5650 let key = array_value_to_string(arr.column(0), i)
5651 .unwrap();
5652 let value =
5653 array_value_to_string(arr.column(1), i)
5654 .unwrap();
5655 format!("{key:?}:{value:?}")
5656 })
5657 .collect();
5658 format!("{{{}}}", buffer.join(","))
5659 } else {
5660 "NULL".to_string()
5661 }
5662 })
5663 .collect::<Vec<_>>()
5664 .join(",")
5665 )
5666 }
5667 ScalarValue::Date32(_) => write!(f, "Date32(\"{self}\")"),
5668 ScalarValue::Date64(_) => write!(f, "Date64(\"{self}\")"),
5669 ScalarValue::Time32Second(_) => write!(f, "Time32Second(\"{self}\")"),
5670 ScalarValue::Time32Millisecond(_) => {
5671 write!(f, "Time32Millisecond(\"{self}\")")
5672 }
5673 ScalarValue::Time64Microsecond(_) => {
5674 write!(f, "Time64Microsecond(\"{self}\")")
5675 }
5676 ScalarValue::Time64Nanosecond(_) => {
5677 write!(f, "Time64Nanosecond(\"{self}\")")
5678 }
5679 ScalarValue::IntervalDayTime(_) => {
5680 write!(f, "IntervalDayTime(\"{self}\")")
5681 }
5682 ScalarValue::IntervalYearMonth(_) => {
5683 write!(f, "IntervalYearMonth(\"{self}\")")
5684 }
5685 ScalarValue::IntervalMonthDayNano(_) => {
5686 write!(f, "IntervalMonthDayNano(\"{self}\")")
5687 }
5688 ScalarValue::DurationSecond(_) => write!(f, "DurationSecond(\"{self}\")"),
5689 ScalarValue::DurationMillisecond(_) => {
5690 write!(f, "DurationMillisecond(\"{self}\")")
5691 }
5692 ScalarValue::DurationMicrosecond(_) => {
5693 write!(f, "DurationMicrosecond(\"{self}\")")
5694 }
5695 ScalarValue::DurationNanosecond(_) => {
5696 write!(f, "DurationNanosecond(\"{self}\")")
5697 }
5698 ScalarValue::Union(val, _fields, _mode) => match val {
5699 Some((id, val)) => write!(f, "Union {id}:{val}"),
5700 None => write!(f, "Union(NULL)"),
5701 },
5702 ScalarValue::Dictionary(k, v) => write!(f, "Dictionary({k:?}, {v:?})"),
5703 ScalarValue::RunEndEncoded(rf, vf, v) => {
5704 write!(f, "RunEndEncoded({rf:?}, {vf:?}, {v:?})")
5705 }
5706 ScalarValue::Null => write!(f, "NULL"),
5707 }
5708 }
5709}
5710
5711pub trait ScalarType<T: ArrowNativeType> {
5713 fn scalar(r: Option<T>) -> ScalarValue;
5715}
5716
5717impl ScalarType<f32> for Float32Type {
5718 fn scalar(r: Option<f32>) -> ScalarValue {
5719 ScalarValue::Float32(r)
5720 }
5721}
5722
5723impl ScalarType<i64> for TimestampSecondType {
5724 fn scalar(r: Option<i64>) -> ScalarValue {
5725 ScalarValue::TimestampSecond(r, None)
5726 }
5727}
5728
5729impl ScalarType<i64> for TimestampMillisecondType {
5730 fn scalar(r: Option<i64>) -> ScalarValue {
5731 ScalarValue::TimestampMillisecond(r, None)
5732 }
5733}
5734
5735impl ScalarType<i64> for TimestampMicrosecondType {
5736 fn scalar(r: Option<i64>) -> ScalarValue {
5737 ScalarValue::TimestampMicrosecond(r, None)
5738 }
5739}
5740
5741impl ScalarType<i64> for TimestampNanosecondType {
5742 fn scalar(r: Option<i64>) -> ScalarValue {
5743 ScalarValue::TimestampNanosecond(r, None)
5744 }
5745}
5746
5747impl ScalarType<i32> for Date32Type {
5748 fn scalar(r: Option<i32>) -> ScalarValue {
5749 ScalarValue::Date32(r)
5750 }
5751}
5752
5753#[cfg(test)]
5754mod tests {
5755
5756 use super::*;
5757 use crate::cast::{
5758 as_large_list_view_array, as_list_array, as_map_array, as_struct_array,
5759 };
5760 use crate::test_util::batches_to_string;
5761 use arrow::array::{
5762 FixedSizeListBuilder, Int32Builder, LargeListBuilder, LargeListViewBuilder,
5763 ListBuilder, ListViewBuilder, MapBuilder, NullArray, NullBufferBuilder,
5764 OffsetSizeTrait, PrimitiveBuilder, RecordBatch, StringBuilder,
5765 StringDictionaryBuilder, StructBuilder, UnionBuilder,
5766 };
5767 use arrow::buffer::{Buffer, NullBuffer, OffsetBuffer};
5768 use arrow::compute::{is_null, kernels};
5769 use arrow::datatypes::{
5770 ArrowNumericType, DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION, Fields,
5771 Float64Type, TimeUnit,
5772 };
5773 use arrow::error::ArrowError;
5774 use arrow::util::pretty::pretty_format_columns;
5775 use insta::assert_snapshot;
5776 use rand::Rng;
5777
5778 #[test]
5779 fn test_scalar_value_from_for_map() {
5780 let string_builder = StringBuilder::new();
5781 let int_builder = Int32Builder::with_capacity(4);
5782 let mut builder = MapBuilder::new(None, string_builder, int_builder);
5783 builder.keys().append_value("joe");
5784 builder.values().append_value(1);
5785 builder.append(true).unwrap();
5786
5787 builder.keys().append_value("blogs");
5788 builder.values().append_value(2);
5789 builder.keys().append_value("foo");
5790 builder.values().append_value(4);
5791 builder.append(true).unwrap();
5792 builder.append(true).unwrap();
5793 builder.append(false).unwrap();
5794
5795 let expected = builder.finish();
5796
5797 let sv = ScalarValue::Map(Arc::new(expected.clone()));
5798 let map_arr = sv.to_array().unwrap();
5799 let actual = as_map_array(&map_arr).unwrap();
5800 assert_eq!(actual, &expected);
5801 }
5802
5803 #[test]
5804 fn test_format_timestamp_type_for_error_and_bounds() {
5805 let ts_ns = format_timestamp_type_for_error(&DataType::Timestamp(
5807 TimeUnit::Nanosecond,
5808 None,
5809 ));
5810 assert_eq!(ts_ns, "Timestamp(ns)");
5811
5812 let ts_us = format_timestamp_type_for_error(&DataType::Timestamp(
5813 TimeUnit::Microsecond,
5814 None,
5815 ));
5816 assert_eq!(ts_us, "Timestamp(us)");
5817
5818 let ok = ensure_timestamp_in_bounds(
5820 1000,
5821 NANOS_PER_DAY,
5822 &DataType::Date32,
5823 &DataType::Timestamp(TimeUnit::Nanosecond, None),
5824 );
5825 assert!(ok.is_ok());
5826
5827 let err = ensure_timestamp_in_bounds(
5829 2932896,
5830 NANOS_PER_DAY,
5831 &DataType::Date32,
5832 &DataType::Timestamp(TimeUnit::Nanosecond, None),
5833 );
5834 assert!(err.is_err());
5835 let msg = err.unwrap_err().to_string();
5836 assert!(msg.contains("Cannot cast Date32 value 2932896 to Timestamp(ns): converted value exceeds the representable i64 range"));
5837
5838 let overflow_millis: i64 = (i64::MAX / NANOS_PER_MILLISECOND) + 1;
5840 let err2 = ensure_timestamp_in_bounds(
5841 overflow_millis,
5842 NANOS_PER_MILLISECOND,
5843 &DataType::Date64,
5844 &DataType::Timestamp(TimeUnit::Nanosecond, None),
5845 );
5846 assert!(err2.is_err());
5847 }
5848
5849 #[test]
5850 fn test_scalar_value_from_for_struct() {
5851 let boolean = Arc::new(BooleanArray::from(vec![false]));
5852 let int = Arc::new(Int32Array::from(vec![42]));
5853
5854 let expected = StructArray::from(vec![
5855 (
5856 Arc::new(Field::new("b", DataType::Boolean, false)),
5857 Arc::clone(&boolean) as ArrayRef,
5858 ),
5859 (
5860 Arc::new(Field::new("c", DataType::Int32, false)),
5861 Arc::clone(&int) as ArrayRef,
5862 ),
5863 ]);
5864
5865 let sv = ScalarStructBuilder::new()
5866 .with_array(Field::new("b", DataType::Boolean, false), boolean)
5867 .with_array(Field::new("c", DataType::Int32, false), int)
5868 .build()
5869 .unwrap();
5870
5871 let struct_arr = sv.to_array().unwrap();
5872 let actual = as_struct_array(&struct_arr).unwrap();
5873 assert_eq!(actual, &expected);
5874 }
5875
5876 #[test]
5877 #[should_panic(
5878 expected = "InvalidArgumentError(\"Incorrect array length for StructArray field \\\"bool\\\", expected 1 got 4\")"
5879 )]
5880 fn test_scalar_value_from_for_struct_should_panic() {
5881 let _ = ScalarStructBuilder::new()
5882 .with_array(
5883 Field::new("bool", DataType::Boolean, false),
5884 Arc::new(BooleanArray::from(vec![false, true, false, false])),
5885 )
5886 .with_array(
5887 Field::new("i32", DataType::Int32, false),
5888 Arc::new(Int32Array::from(vec![42, 28, 19, 31])),
5889 )
5890 .build()
5891 .unwrap();
5892 }
5893
5894 #[test]
5895 fn test_to_array_of_size_for_nested() {
5896 let boolean = Arc::new(BooleanArray::from(vec![false, false, true, true]));
5898 let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
5899
5900 let struct_array = StructArray::from(vec![
5901 (
5902 Arc::new(Field::new("b", DataType::Boolean, false)),
5903 Arc::clone(&boolean) as ArrayRef,
5904 ),
5905 (
5906 Arc::new(Field::new("c", DataType::Int32, false)),
5907 Arc::clone(&int) as ArrayRef,
5908 ),
5909 ]);
5910 let sv = ScalarValue::Struct(Arc::new(struct_array));
5911 let actual_arr = sv.to_array_of_size(2).unwrap();
5912
5913 let boolean = Arc::new(BooleanArray::from(vec![
5914 false, false, true, true, false, false, true, true,
5915 ]));
5916 let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31, 42, 28, 19, 31]));
5917
5918 let struct_array = StructArray::from(vec![
5919 (
5920 Arc::new(Field::new("b", DataType::Boolean, false)),
5921 Arc::clone(&boolean) as ArrayRef,
5922 ),
5923 (
5924 Arc::new(Field::new("c", DataType::Int32, false)),
5925 Arc::clone(&int) as ArrayRef,
5926 ),
5927 ]);
5928
5929 let actual = as_struct_array(&actual_arr).unwrap();
5930 assert_eq!(actual, &struct_array);
5931
5932 let arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
5934 Some(1),
5935 None,
5936 Some(2),
5937 ])]);
5938
5939 let sv = ScalarValue::List(Arc::new(arr));
5940 let actual_arr = sv
5941 .to_array_of_size(2)
5942 .expect("Failed to convert to array of size");
5943 let actual_list_arr = actual_arr.as_list::<i32>();
5944
5945 let arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
5946 Some(vec![Some(1), None, Some(2)]),
5947 Some(vec![Some(1), None, Some(2)]),
5948 ]);
5949
5950 assert_eq!(&arr, actual_list_arr);
5951
5952 let arr =
5954 ListViewArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
5955 Some(1),
5956 None,
5957 Some(2),
5958 ])]);
5959
5960 let sv = ScalarValue::ListView(Arc::new(arr));
5961 let actual_arr = sv
5962 .to_array_of_size(2)
5963 .expect("Failed to convert to array of size");
5964 let actual_list_arr = actual_arr.as_list_view::<i32>();
5965
5966 let arr = ListViewArray::from_iter_primitive::<Int32Type, _, _>(vec![
5967 Some(vec![Some(1), None, Some(2)]),
5968 Some(vec![Some(1), None, Some(2)]),
5969 ]);
5970
5971 assert_eq!(&arr, actual_list_arr);
5972 }
5973
5974 #[test]
5975 fn test_to_array_of_size_for_fsl() {
5976 let values = Int32Array::from_iter([Some(1), None, Some(2)]);
5977 let field = Arc::new(Field::new_list_field(DataType::Int32, true));
5978 let arr = FixedSizeListArray::new(Arc::clone(&field), 3, Arc::new(values), None);
5979 let sv = ScalarValue::FixedSizeList(Arc::new(arr));
5980 let actual_arr = sv
5981 .to_array_of_size(2)
5982 .expect("Failed to convert to array of size");
5983
5984 let expected_values =
5985 Int32Array::from_iter([Some(1), None, Some(2), Some(1), None, Some(2)]);
5986 let expected_arr =
5987 FixedSizeListArray::new(field, 3, Arc::new(expected_values), None);
5988
5989 assert_eq!(
5990 &expected_arr,
5991 as_fixed_size_list_array(actual_arr.as_ref()).unwrap()
5992 );
5993
5994 let empty_array = sv
5995 .to_array_of_size(0)
5996 .expect("Failed to convert to empty array");
5997
5998 assert_eq!(empty_array.len(), 0);
5999 }
6000
6001 #[test]
6002 fn test_to_array_of_size_list_size_one() {
6003 let arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
6005 Some(10),
6006 Some(20),
6007 ])]);
6008 let sv = ScalarValue::List(Arc::new(arr.clone()));
6009 let result = sv.to_array_of_size(1).unwrap();
6010 assert_eq!(result.as_list::<i32>(), &arr);
6011 }
6012
6013 #[test]
6014 fn test_to_array_of_size_list_empty_inner() {
6015 let arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![])]);
6017 let sv = ScalarValue::List(Arc::new(arr));
6018 let result = sv.to_array_of_size(3).unwrap();
6019 let result_list = result.as_list::<i32>();
6020 assert_eq!(result_list.len(), 3);
6021 for i in 0..3 {
6022 assert_eq!(result_list.value(i).len(), 0);
6023 }
6024 }
6025
6026 #[test]
6027 fn test_to_array_of_size_large_list() {
6028 let arr =
6029 LargeListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
6030 Some(100),
6031 Some(200),
6032 ])]);
6033 let sv = ScalarValue::LargeList(Arc::new(arr));
6034 let result = sv.to_array_of_size(3).unwrap();
6035 let expected = LargeListArray::from_iter_primitive::<Int32Type, _, _>(vec![
6036 Some(vec![Some(100), Some(200)]),
6037 Some(vec![Some(100), Some(200)]),
6038 Some(vec![Some(100), Some(200)]),
6039 ]);
6040 assert_eq!(result.as_list::<i64>(), &expected);
6041 }
6042
6043 #[test]
6044 fn test_list_to_array_of_size_multi_row() {
6045 let arr = Int32Array::from(vec![Some(10), None, Some(30)]);
6047 let result = ScalarValue::list_to_array_of_size(&arr, 3).unwrap();
6048 let result = result.as_primitive::<Int32Type>();
6049 assert_eq!(
6050 result.iter().collect::<Vec<_>>(),
6051 vec![
6052 Some(10),
6053 None,
6054 Some(30),
6055 Some(10),
6056 None,
6057 Some(30),
6058 Some(10),
6059 None,
6060 Some(30),
6061 ]
6062 );
6063 }
6064
6065 #[test]
6066 fn test_to_array_of_size_null_list() {
6067 let dt = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
6068 let sv = ScalarValue::try_from(&dt).unwrap();
6069 let result = sv.to_array_of_size(3).unwrap();
6070 assert_eq!(result.len(), 3);
6071 assert_eq!(result.null_count(), 3);
6072 }
6073
6074 #[test]
6076 fn test_to_array_of_size_for_none_fsb() {
6077 let sv = ScalarValue::FixedSizeBinary(5, None);
6078 let result = sv
6079 .to_array_of_size(2)
6080 .expect("Failed to convert to array of size");
6081 assert_eq!(result.len(), 2);
6082 assert_eq!(result.null_count(), 2);
6083 assert_eq!(result.as_fixed_size_binary().values().len(), 10);
6084 }
6085
6086 #[test]
6087 fn test_list_to_array_string() {
6088 let scalars = vec![
6089 ScalarValue::from("rust"),
6090 ScalarValue::from("arrow"),
6091 ScalarValue::from("data-fusion"),
6092 ];
6093
6094 let result = ScalarValue::new_list_nullable(scalars.as_slice(), &DataType::Utf8);
6095
6096 let expected = single_row_list_array(vec!["rust", "arrow", "data-fusion"]);
6097 assert_eq!(*result, expected);
6098 }
6099
6100 fn single_row_list_array(items: Vec<&str>) -> ListArray {
6101 SingleRowListArrayBuilder::new(Arc::new(StringArray::from(items)))
6102 .build_list_array()
6103 }
6104
6105 fn build_list<O: OffsetSizeTrait>(
6106 values: Vec<Option<Vec<Option<i64>>>>,
6107 ) -> Vec<ScalarValue> {
6108 values
6109 .into_iter()
6110 .map(|v| {
6111 let arr = Arc::new(GenericListArray::<O>::from_iter_primitive::<
6112 Int64Type,
6113 _,
6114 _,
6115 >(vec![v])) as ArrayRef;
6116
6117 if O::IS_LARGE {
6118 ScalarValue::LargeList(arr.as_list::<i64>().to_owned().into())
6119 } else {
6120 ScalarValue::List(arr.as_list::<i32>().to_owned().into())
6121 }
6122 })
6123 .collect()
6124 }
6125
6126 fn build_list_view<O: OffsetSizeTrait>(
6127 values: Vec<Option<Vec<Option<i64>>>>,
6128 ) -> Vec<ScalarValue> {
6129 values
6130 .into_iter()
6131 .map(|v| {
6132 let arr = Arc::new(GenericListViewArray::<O>::from_iter_primitive::<
6133 Int64Type,
6134 _,
6135 _,
6136 >(vec![v])) as ArrayRef;
6137
6138 if O::IS_LARGE {
6139 ScalarValue::LargeListView(
6140 arr.as_list_view::<i64>().to_owned().into(),
6141 )
6142 } else {
6143 ScalarValue::ListView(arr.as_list_view::<i32>().to_owned().into())
6144 }
6145 })
6146 .collect()
6147 }
6148
6149 #[test]
6150 fn test_iter_to_array_fixed_size_list() {
6151 let field = Arc::new(Field::new_list_field(DataType::Int32, true));
6152 let f1 = Arc::new(FixedSizeListArray::new(
6153 Arc::clone(&field),
6154 3,
6155 Arc::new(Int32Array::from(vec![1, 2, 3])),
6156 None,
6157 ));
6158 let f2 = Arc::new(FixedSizeListArray::new(
6159 Arc::clone(&field),
6160 3,
6161 Arc::new(Int32Array::from(vec![4, 5, 6])),
6162 None,
6163 ));
6164 let f_nulls = Arc::new(FixedSizeListArray::new_null(field, 1, 1));
6165
6166 let scalars = vec![
6167 ScalarValue::FixedSizeList(Arc::clone(&f_nulls)),
6168 ScalarValue::FixedSizeList(f1),
6169 ScalarValue::FixedSizeList(f2),
6170 ScalarValue::FixedSizeList(f_nulls),
6171 ];
6172
6173 let array = ScalarValue::iter_to_array(scalars).unwrap();
6174
6175 let expected = FixedSizeListArray::from_iter_primitive::<Int32Type, _, _>(
6176 vec![
6177 None,
6178 Some(vec![Some(1), Some(2), Some(3)]),
6179 Some(vec![Some(4), Some(5), Some(6)]),
6180 None,
6181 ],
6182 3,
6183 );
6184 assert_eq!(array.as_ref(), &expected);
6185 }
6186
6187 #[test]
6188 fn test_iter_to_array_struct() {
6189 let s1 = StructArray::from(vec![
6190 (
6191 Arc::new(Field::new("A", DataType::Boolean, false)),
6192 Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
6193 ),
6194 (
6195 Arc::new(Field::new("B", DataType::Int32, false)),
6196 Arc::new(Int32Array::from(vec![42])) as ArrayRef,
6197 ),
6198 ]);
6199
6200 let s2 = StructArray::from(vec![
6201 (
6202 Arc::new(Field::new("A", DataType::Boolean, false)),
6203 Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
6204 ),
6205 (
6206 Arc::new(Field::new("B", DataType::Int32, false)),
6207 Arc::new(Int32Array::from(vec![42])) as ArrayRef,
6208 ),
6209 ]);
6210
6211 let scalars = vec![
6212 ScalarValue::Struct(Arc::new(s1)),
6213 ScalarValue::Struct(Arc::new(s2)),
6214 ];
6215
6216 let array = ScalarValue::iter_to_array(scalars).unwrap();
6217
6218 let expected = StructArray::from(vec![
6219 (
6220 Arc::new(Field::new("A", DataType::Boolean, false)),
6221 Arc::new(BooleanArray::from(vec![false, false])) as ArrayRef,
6222 ),
6223 (
6224 Arc::new(Field::new("B", DataType::Int32, false)),
6225 Arc::new(Int32Array::from(vec![42, 42])) as ArrayRef,
6226 ),
6227 ]);
6228 assert_eq!(array.as_ref(), &expected);
6229 }
6230
6231 #[test]
6232 fn test_iter_to_array_struct_with_nulls() {
6233 let s1 = StructArray::from((
6235 vec![
6236 (
6237 Arc::new(Field::new("A", DataType::Int32, false)),
6238 Arc::new(Int32Array::from(vec![1])) as ArrayRef,
6239 ),
6240 (
6241 Arc::new(Field::new("B", DataType::Int64, false)),
6242 Arc::new(Int64Array::from(vec![2])) as ArrayRef,
6243 ),
6244 ],
6245 Buffer::from(&[1]),
6247 ));
6248
6249 let s2 = StructArray::from((
6251 vec![
6252 (
6253 Arc::new(Field::new("A", DataType::Int32, false)),
6254 Arc::new(Int32Array::from(vec![3])) as ArrayRef,
6255 ),
6256 (
6257 Arc::new(Field::new("B", DataType::Int64, false)),
6258 Arc::new(Int64Array::from(vec![4])) as ArrayRef,
6259 ),
6260 ],
6261 Buffer::from(&[0]),
6262 ));
6263
6264 let scalars = vec![
6265 ScalarValue::Struct(Arc::new(s1)),
6266 ScalarValue::Struct(Arc::new(s2)),
6267 ];
6268
6269 let array = ScalarValue::iter_to_array(scalars).unwrap();
6270 let struct_array = array.as_struct();
6271 assert!(struct_array.is_valid(0));
6272 assert!(struct_array.is_null(1));
6273 }
6274
6275 #[test]
6276 fn iter_to_array_primitive_test() {
6277 let scalars = build_list::<i32>(vec![
6280 Some(vec![Some(1), Some(2), Some(3)]),
6281 None,
6282 Some(vec![Some(4), Some(5)]),
6283 ]);
6284 let array = ScalarValue::iter_to_array(scalars).unwrap();
6285 let list_array = as_list_array(&array).unwrap();
6286 let expected = ListArray::from_iter_primitive::<Int64Type, _, _>(vec![
6288 Some(vec![Some(1), Some(2), Some(3)]),
6289 None,
6290 Some(vec![Some(4), Some(5)]),
6291 ]);
6292 assert_eq!(list_array, &expected);
6293
6294 let scalars = build_list::<i64>(vec![
6297 Some(vec![Some(1), Some(2), Some(3)]),
6298 None,
6299 Some(vec![Some(4), Some(5)]),
6300 ]);
6301 let array = ScalarValue::iter_to_array(scalars).unwrap();
6302 let large_list_array = as_large_list_array(&array).unwrap();
6303 let expected = LargeListArray::from_iter_primitive::<Int64Type, _, _>(vec![
6304 Some(vec![Some(1), Some(2), Some(3)]),
6305 None,
6306 Some(vec![Some(4), Some(5)]),
6307 ]);
6308 assert_eq!(large_list_array, &expected);
6309
6310 let scalars = build_list_view::<i32>(vec![
6313 Some(vec![Some(1), Some(2), Some(3)]),
6314 None,
6315 Some(vec![Some(4), Some(5)]),
6316 ]);
6317
6318 let array = ScalarValue::iter_to_array(scalars).unwrap();
6319 let list_view_array = as_list_view_array(&array).unwrap();
6320 let expected = ListViewArray::from_iter_primitive::<Int64Type, _, _>(vec![
6322 Some(vec![Some(1), Some(2), Some(3)]),
6323 None,
6324 Some(vec![Some(4), Some(5)]),
6325 ]);
6326 assert_eq!(list_view_array, &expected);
6327
6328 let scalars = build_list_view::<i64>(vec![
6331 Some(vec![Some(1), Some(2), Some(3)]),
6332 None,
6333 Some(vec![Some(4), Some(5)]),
6334 ]);
6335
6336 let array = ScalarValue::iter_to_array(scalars).unwrap();
6337 let large_list_view_array = as_large_list_view_array(&array).unwrap();
6338 let expected = LargeListViewArray::from_iter_primitive::<Int64Type, _, _>(vec![
6340 Some(vec![Some(1), Some(2), Some(3)]),
6341 None,
6342 Some(vec![Some(4), Some(5)]),
6343 ]);
6344 assert_eq!(large_list_view_array, &expected);
6345 }
6346
6347 #[test]
6348 fn iter_to_array_string_test() {
6349 let arr1 = single_row_list_array(vec!["foo", "bar", "baz"]);
6350 let arr2 = single_row_list_array(vec!["rust", "world"]);
6351
6352 let scalars = vec![
6353 ScalarValue::List(Arc::new(arr1)),
6354 ScalarValue::List(Arc::new(arr2)),
6355 ];
6356
6357 let array = ScalarValue::iter_to_array(scalars).unwrap();
6358 let result = array.as_list::<i32>();
6359
6360 let string_builder = StringBuilder::with_capacity(5, 25);
6362 let mut list_of_string_builder = ListBuilder::new(string_builder);
6363
6364 list_of_string_builder.values().append_value("foo");
6365 list_of_string_builder.values().append_value("bar");
6366 list_of_string_builder.values().append_value("baz");
6367 list_of_string_builder.append(true);
6368
6369 list_of_string_builder.values().append_value("rust");
6370 list_of_string_builder.values().append_value("world");
6371 list_of_string_builder.append(true);
6372 let expected = list_of_string_builder.finish();
6373
6374 assert_eq!(result, &expected);
6375 }
6376
6377 #[test]
6378 fn test_list_scalar_eq_to_array() {
6379 let list_array: ArrayRef =
6380 Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
6381 Some(vec![Some(0), Some(1), Some(2)]),
6382 None,
6383 Some(vec![None, Some(5)]),
6384 ]));
6385
6386 let fsl_array: ArrayRef =
6387 Arc::new(FixedSizeListArray::from_iter_primitive::<Int32Type, _, _>(
6388 vec![
6389 Some(vec![Some(0), Some(1), Some(2)]),
6390 None,
6391 Some(vec![Some(3), None, Some(5)]),
6392 ],
6393 3,
6394 ));
6395 let list_view_array: ArrayRef =
6396 Arc::new(ListViewArray::from_iter_primitive::<Int32Type, _, _>(vec![
6397 Some(vec![Some(0), Some(1), Some(2)]),
6398 None,
6399 Some(vec![None, Some(5)]),
6400 ]));
6401
6402 for arr in [list_array, fsl_array, list_view_array] {
6403 for i in 0..arr.len() {
6404 let slice = arr.slice(i, 1);
6405 let scalar = match arr.data_type() {
6406 DataType::List(_) => {
6407 ScalarValue::List(slice.as_list::<i32>().to_owned().into())
6408 }
6409 DataType::FixedSizeList(_, _) => ScalarValue::FixedSizeList(
6410 slice.as_fixed_size_list().to_owned().into(),
6411 ),
6412 DataType::ListView(_) => ScalarValue::ListView(
6413 slice.as_list_view::<i32>().to_owned().into(),
6414 ),
6415 _ => unreachable!(),
6416 };
6417 assert!(scalar.eq_array(&arr, i).unwrap());
6418 }
6419 }
6420 }
6421
6422 #[test]
6423 fn test_eq_array_err_message() {
6424 assert_starts_with(
6425 ScalarValue::Utf8(Some("123".to_string()))
6426 .eq_array(&(Arc::new(Int32Array::from(vec![123])) as ArrayRef), 0)
6427 .unwrap_err()
6428 .message(),
6429 "could not cast array of type Int32 to arrow_array::array::byte_array::GenericByteArray<arrow_array::types::GenericStringType<i32>>",
6430 );
6431 }
6432
6433 #[test]
6434 fn scalar_add_trait_test() -> Result<()> {
6435 let float_value = ScalarValue::Float64(Some(123.));
6436 let float_value_2 = ScalarValue::Float64(Some(123.));
6437 assert_eq!(
6438 (float_value.add(&float_value_2))?,
6439 ScalarValue::Float64(Some(246.))
6440 );
6441 assert_eq!(
6442 (float_value.add(float_value_2))?,
6443 ScalarValue::Float64(Some(246.))
6444 );
6445 Ok(())
6446 }
6447
6448 #[test]
6449 fn scalar_add_trait_null_test() -> Result<()> {
6450 let int_value = ScalarValue::Int32(Some(42));
6451
6452 assert_eq!(
6453 int_value.add(ScalarValue::Int32(None))?,
6454 ScalarValue::Int32(None)
6455 );
6456
6457 Ok(())
6458 }
6459
6460 #[test]
6461 fn scalar_add_trait_wrapping_overflow_test() -> Result<()> {
6462 let int_value = ScalarValue::Int32(Some(i32::MAX));
6463 let one = ScalarValue::Int32(Some(1));
6464
6465 assert_eq!(int_value.add(one)?, ScalarValue::Int32(Some(i32::MIN)));
6466
6467 Ok(())
6468 }
6469
6470 #[test]
6471 fn scalar_add_trait_decimal_scale_test() -> Result<()> {
6472 let decimal = ScalarValue::Decimal128(Some(123), 10, 2);
6473 let decimal_2 = ScalarValue::Decimal128(Some(4), 9, 1);
6474
6475 assert_eq!(
6476 decimal.add(decimal_2)?,
6477 ScalarValue::Decimal128(Some(163), 11, 2)
6478 );
6479
6480 Ok(())
6481 }
6482
6483 #[test]
6484 fn scalar_add_trait_decimal256_scale_test() -> Result<()> {
6485 let decimal = ScalarValue::Decimal256(Some(i256::from(123)), 10, 2);
6486 let decimal_2 = ScalarValue::Decimal256(Some(i256::from(4)), 9, 1);
6487
6488 assert_eq!(
6489 decimal.add(decimal_2)?,
6490 ScalarValue::Decimal256(Some(i256::from(163)), 11, 2)
6491 );
6492
6493 Ok(())
6494 }
6495
6496 #[test]
6497 fn scalar_add_trait_decimal_negative_scale_test() -> Result<()> {
6498 let decimal = ScalarValue::Decimal128(Some(1), DECIMAL128_MAX_PRECISION, i8::MIN);
6499 let decimal_2 =
6500 ScalarValue::Decimal128(Some(1), DECIMAL128_MAX_PRECISION, i8::MIN);
6501
6502 assert_eq!(
6503 decimal.add(decimal_2)?,
6504 ScalarValue::Decimal128(Some(2), DECIMAL128_MAX_PRECISION, i8::MIN)
6505 );
6506
6507 Ok(())
6508 }
6509
6510 #[test]
6511 fn scalar_sub_trait_test() -> Result<()> {
6512 let float_value = ScalarValue::Float64(Some(123.));
6513 let float_value_2 = ScalarValue::Float64(Some(123.));
6514 assert_eq!(
6515 float_value.sub(&float_value_2)?,
6516 ScalarValue::Float64(Some(0.))
6517 );
6518 assert_eq!(
6519 float_value.sub(float_value_2)?,
6520 ScalarValue::Float64(Some(0.))
6521 );
6522 Ok(())
6523 }
6524
6525 #[test]
6526 fn scalar_sub_trait_int32_test() -> Result<()> {
6527 let int_value = ScalarValue::Int32(Some(42));
6528 let int_value_2 = ScalarValue::Int32(Some(100));
6529 assert_eq!(int_value.sub(&int_value_2)?, ScalarValue::Int32(Some(-58)));
6530 assert_eq!(int_value_2.sub(int_value)?, ScalarValue::Int32(Some(58)));
6531 Ok(())
6532 }
6533
6534 #[test]
6535 fn scalar_sub_trait_int32_overflow_test() {
6536 let int_value = ScalarValue::Int32(Some(i32::MAX));
6537 let int_value_2 = ScalarValue::Int32(Some(i32::MIN));
6538 let err = int_value
6539 .sub_checked(&int_value_2)
6540 .unwrap_err()
6541 .strip_backtrace();
6542 assert_eq!(
6543 err,
6544 "Arrow error: Arithmetic overflow: Overflow happened on: 2147483647 - -2147483648"
6545 )
6546 }
6547
6548 #[test]
6549 fn scalar_sub_trait_int64_test() -> Result<()> {
6550 let int_value = ScalarValue::Int64(Some(42));
6551 let int_value_2 = ScalarValue::Int64(Some(100));
6552 assert_eq!(int_value.sub(&int_value_2)?, ScalarValue::Int64(Some(-58)));
6553 assert_eq!(int_value_2.sub(int_value)?, ScalarValue::Int64(Some(58)));
6554 Ok(())
6555 }
6556
6557 #[test]
6558 fn scalar_sub_trait_int64_overflow_test() {
6559 let int_value = ScalarValue::Int64(Some(i64::MAX));
6560 let int_value_2 = ScalarValue::Int64(Some(i64::MIN));
6561 let err = int_value
6562 .sub_checked(&int_value_2)
6563 .unwrap_err()
6564 .strip_backtrace();
6565 assert_eq!(
6566 err,
6567 "Arrow error: Arithmetic overflow: Overflow happened on: 9223372036854775807 - -9223372036854775808"
6568 )
6569 }
6570
6571 #[test]
6572 fn scalar_add_overflow_test() -> Result<()> {
6573 check_scalar_add_overflow::<Int8Type>(
6574 ScalarValue::Int8(Some(i8::MAX)),
6575 ScalarValue::Int8(Some(i8::MAX)),
6576 );
6577 check_scalar_add_overflow::<UInt8Type>(
6578 ScalarValue::UInt8(Some(u8::MAX)),
6579 ScalarValue::UInt8(Some(u8::MAX)),
6580 );
6581 check_scalar_add_overflow::<Int16Type>(
6582 ScalarValue::Int16(Some(i16::MAX)),
6583 ScalarValue::Int16(Some(i16::MAX)),
6584 );
6585 check_scalar_add_overflow::<UInt16Type>(
6586 ScalarValue::UInt16(Some(u16::MAX)),
6587 ScalarValue::UInt16(Some(u16::MAX)),
6588 );
6589 check_scalar_add_overflow::<Int32Type>(
6590 ScalarValue::Int32(Some(i32::MAX)),
6591 ScalarValue::Int32(Some(i32::MAX)),
6592 );
6593 check_scalar_add_overflow::<UInt32Type>(
6594 ScalarValue::UInt32(Some(u32::MAX)),
6595 ScalarValue::UInt32(Some(u32::MAX)),
6596 );
6597 check_scalar_add_overflow::<Int64Type>(
6598 ScalarValue::Int64(Some(i64::MAX)),
6599 ScalarValue::Int64(Some(i64::MAX)),
6600 );
6601 check_scalar_add_overflow::<UInt64Type>(
6602 ScalarValue::UInt64(Some(u64::MAX)),
6603 ScalarValue::UInt64(Some(u64::MAX)),
6604 );
6605
6606 Ok(())
6607 }
6608
6609 #[test]
6610 fn scalar_decimal_add_overflow_test() {
6611 check_scalar_decimal_add_overflow::<Decimal128Type>(
6612 ScalarValue::Decimal128(Some(i128::MAX), DECIMAL128_MAX_PRECISION, 0),
6613 ScalarValue::Decimal128(Some(1), DECIMAL128_MAX_PRECISION, 0),
6614 );
6615 check_scalar_decimal_add_overflow::<Decimal256Type>(
6616 ScalarValue::Decimal256(Some(i256::MAX), DECIMAL256_MAX_PRECISION, 0),
6617 ScalarValue::Decimal256(Some(i256::ONE), DECIMAL256_MAX_PRECISION, 0),
6618 );
6619 }
6620
6621 #[test]
6622 fn scalar_decimal_in_place_add_error_preserves_lhs() {
6623 let mut lhs =
6624 ScalarValue::Decimal128(Some(i128::MAX), DECIMAL128_MAX_PRECISION, 0);
6625 let original = lhs.clone();
6626
6627 let err = lhs
6628 .try_add_checked_in_place(&ScalarValue::Decimal128(
6629 Some(1),
6630 DECIMAL128_MAX_PRECISION,
6631 0,
6632 ))
6633 .unwrap_err()
6634 .strip_backtrace();
6635
6636 assert_eq!(
6637 err,
6638 format!(
6639 "Arrow error: Arithmetic overflow: Overflow happened on: {} + 1",
6640 i128::MAX
6641 )
6642 );
6643 assert_eq!(lhs, original);
6644 }
6645
6646 fn check_scalar_add_overflow<T>(left: ScalarValue, right: ScalarValue)
6648 where
6649 T: ArrowNumericType,
6650 {
6651 let scalar_result = left.add_checked(&right);
6652
6653 let left_array = left.to_array().expect("Failed to convert to array");
6654 let right_array = right.to_array().expect("Failed to convert to array");
6655 let arrow_left_array = left_array.as_primitive::<T>();
6656 let arrow_right_array = right_array.as_primitive::<T>();
6657 let arrow_result = add(arrow_left_array, arrow_right_array);
6658
6659 assert_eq!(scalar_result.is_ok(), arrow_result.is_ok());
6660 }
6661
6662 fn check_scalar_decimal_add_overflow<T>(left: ScalarValue, right: ScalarValue)
6664 where
6665 T: ArrowPrimitiveType,
6666 {
6667 let scalar_result = left.add(&right);
6668
6669 let left_array = left.to_array().expect("Failed to convert to array");
6670 let right_array = right.to_array().expect("Failed to convert to array");
6671 let arrow_left_array = left_array.as_primitive::<T>();
6672 let arrow_right_array = right_array.as_primitive::<T>();
6673 let arrow_result = add_wrapping(arrow_left_array, arrow_right_array);
6674
6675 assert_eq!(scalar_result.is_ok(), arrow_result.is_ok());
6676 }
6677
6678 #[test]
6679 fn test_interval_add_timestamp() -> Result<()> {
6680 let interval = ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano {
6681 months: 1,
6682 days: 2,
6683 nanoseconds: 3,
6684 }));
6685 let timestamp = ScalarValue::TimestampNanosecond(Some(123), None);
6686 let result = interval.add(×tamp)?;
6687 let expect = timestamp.add(&interval)?;
6688 assert_eq!(result, expect);
6689
6690 let interval = ScalarValue::IntervalYearMonth(Some(123));
6691 let timestamp = ScalarValue::TimestampNanosecond(Some(123), None);
6692 let result = interval.add(×tamp)?;
6693 let expect = timestamp.add(&interval)?;
6694 assert_eq!(result, expect);
6695
6696 let interval = ScalarValue::IntervalDayTime(Some(IntervalDayTime {
6697 days: 1,
6698 milliseconds: 23,
6699 }));
6700 let timestamp = ScalarValue::TimestampNanosecond(Some(123), None);
6701 let result = interval.add(×tamp)?;
6702 let expect = timestamp.add(&interval)?;
6703 assert_eq!(result, expect);
6704 Ok(())
6705 }
6706
6707 #[test]
6708 fn test_try_cmp() {
6709 assert_eq!(
6710 ScalarValue::try_cmp(
6711 &ScalarValue::Int32(Some(1)),
6712 &ScalarValue::Int32(Some(2))
6713 )
6714 .unwrap(),
6715 Ordering::Less
6716 );
6717 assert_eq!(
6718 ScalarValue::try_cmp(&ScalarValue::Int32(None), &ScalarValue::Int32(Some(2)))
6719 .unwrap(),
6720 Ordering::Less
6721 );
6722 assert_starts_with(
6723 ScalarValue::try_cmp(
6724 &ScalarValue::Int32(Some(1)),
6725 &ScalarValue::Int64(Some(2)),
6726 )
6727 .unwrap_err()
6728 .message(),
6729 "Uncomparable values: Int32(1), Int64(2)",
6730 );
6731 }
6732
6733 #[test]
6734 fn scalar_decimal_test() -> Result<()> {
6735 let decimal_value = ScalarValue::Decimal128(Some(123), 10, 1);
6736 assert_eq!(DataType::Decimal128(10, 1), decimal_value.data_type());
6737 let try_into_value: i128 = decimal_value.clone().try_into().unwrap();
6738 assert_eq!(123_i128, try_into_value);
6739 assert!(!decimal_value.is_null());
6740 let neg_decimal_value = decimal_value.arithmetic_negate()?;
6741 match neg_decimal_value {
6742 ScalarValue::Decimal128(v, _, _) => {
6743 assert_eq!(-123, v.unwrap());
6744 }
6745 _ => {
6746 unreachable!();
6747 }
6748 }
6749
6750 let array = decimal_value
6752 .to_array()
6753 .expect("Failed to convert to array");
6754 let array = as_decimal128_array(&array)?;
6755 assert_eq!(1, array.len());
6756 assert_eq!(DataType::Decimal128(10, 1), array.data_type().clone());
6757 assert_eq!(123i128, array.value(0));
6758
6759 let array = decimal_value
6761 .to_array_of_size(10)
6762 .expect("Failed to convert to array of size");
6763 let array_decimal = as_decimal128_array(&array)?;
6764 assert_eq!(10, array.len());
6765 assert_eq!(DataType::Decimal128(10, 1), array.data_type().clone());
6766 assert_eq!(123i128, array_decimal.value(0));
6767 assert_eq!(123i128, array_decimal.value(9));
6768 assert!(
6770 decimal_value
6771 .eq_array(&array, 1)
6772 .expect("Failed to compare arrays")
6773 );
6774 assert!(
6775 decimal_value
6776 .eq_array(&array, 5)
6777 .expect("Failed to compare arrays")
6778 );
6779 assert_eq!(
6781 decimal_value,
6782 ScalarValue::try_from_array(&array, 5).unwrap()
6783 );
6784
6785 assert_eq!(
6786 decimal_value,
6787 ScalarValue::try_new_decimal128(123, 10, 1).unwrap()
6788 );
6789
6790 let left = ScalarValue::Decimal128(Some(123), 10, 2);
6792 let right = ScalarValue::Decimal128(Some(124), 10, 2);
6793 assert!(!left.eq(&right));
6794 let result = left < right;
6795 assert!(result);
6796 let result = left <= right;
6797 assert!(result);
6798 let right = ScalarValue::Decimal128(Some(124), 10, 3);
6799 let result = left.partial_cmp(&right);
6801 assert_eq!(None, result);
6802
6803 let decimal_vec = vec![
6804 ScalarValue::Decimal128(Some(1), 10, 2),
6805 ScalarValue::Decimal128(Some(2), 10, 2),
6806 ScalarValue::Decimal128(Some(3), 10, 2),
6807 ];
6808 let array = ScalarValue::iter_to_array(decimal_vec).unwrap();
6810 assert_eq!(3, array.len());
6811 assert_eq!(DataType::Decimal128(10, 2), array.data_type().clone());
6812
6813 let decimal_vec = vec![
6814 ScalarValue::Decimal128(Some(1), 10, 2),
6815 ScalarValue::Decimal128(Some(2), 10, 2),
6816 ScalarValue::Decimal128(Some(3), 10, 2),
6817 ScalarValue::Decimal128(None, 10, 2),
6818 ];
6819 let array = ScalarValue::iter_to_array(decimal_vec).unwrap();
6820 assert_eq!(4, array.len());
6821 assert_eq!(DataType::Decimal128(10, 2), array.data_type().clone());
6822
6823 assert!(
6824 ScalarValue::try_new_decimal128(1, 10, 2)
6825 .unwrap()
6826 .eq_array(&array, 0)
6827 .expect("Failed to compare arrays")
6828 );
6829 assert!(
6830 ScalarValue::try_new_decimal128(2, 10, 2)
6831 .unwrap()
6832 .eq_array(&array, 1)
6833 .expect("Failed to compare arrays")
6834 );
6835 assert!(
6836 ScalarValue::try_new_decimal128(3, 10, 2)
6837 .unwrap()
6838 .eq_array(&array, 2)
6839 .expect("Failed to compare arrays")
6840 );
6841 assert_eq!(
6842 ScalarValue::Decimal128(None, 10, 2),
6843 ScalarValue::try_from_array(&array, 3).unwrap()
6844 );
6845
6846 Ok(())
6847 }
6848
6849 #[test]
6850 fn test_new_one_decimal128() {
6851 assert_eq!(
6852 ScalarValue::new_one(&DataType::Decimal128(5, 0)).unwrap(),
6853 ScalarValue::Decimal128(Some(1), 5, 0)
6854 );
6855 assert_eq!(
6856 ScalarValue::new_one(&DataType::Decimal128(5, 1)).unwrap(),
6857 ScalarValue::Decimal128(Some(10), 5, 1)
6858 );
6859 assert_eq!(
6860 ScalarValue::new_one(&DataType::Decimal128(5, 2)).unwrap(),
6861 ScalarValue::Decimal128(Some(100), 5, 2)
6862 );
6863 assert_eq!(
6865 ScalarValue::new_one(&DataType::Decimal128(7, 2)).unwrap(),
6866 ScalarValue::Decimal128(Some(100), 7, 2)
6867 );
6868 assert!(ScalarValue::new_one(&DataType::Decimal128(5, -1)).is_err());
6870 assert!(ScalarValue::new_one(&DataType::Decimal128(0, 2)).is_err());
6872 assert!(ScalarValue::new_one(&DataType::Decimal128(5, 7)).is_err());
6873 }
6874
6875 #[test]
6876 fn test_new_one_decimal256() {
6877 assert_eq!(
6878 ScalarValue::new_one(&DataType::Decimal256(5, 0)).unwrap(),
6879 ScalarValue::Decimal256(Some(1.into()), 5, 0)
6880 );
6881 assert_eq!(
6882 ScalarValue::new_one(&DataType::Decimal256(5, 1)).unwrap(),
6883 ScalarValue::Decimal256(Some(10.into()), 5, 1)
6884 );
6885 assert_eq!(
6886 ScalarValue::new_one(&DataType::Decimal256(5, 2)).unwrap(),
6887 ScalarValue::Decimal256(Some(100.into()), 5, 2)
6888 );
6889 assert_eq!(
6891 ScalarValue::new_one(&DataType::Decimal256(7, 2)).unwrap(),
6892 ScalarValue::Decimal256(Some(100.into()), 7, 2)
6893 );
6894 assert!(ScalarValue::new_one(&DataType::Decimal256(5, -1)).is_err());
6896 assert!(ScalarValue::new_one(&DataType::Decimal256(0, 2)).is_err());
6898 assert!(ScalarValue::new_one(&DataType::Decimal256(5, 7)).is_err());
6899 }
6900
6901 #[test]
6902 fn test_new_ten_decimal128() {
6903 assert_eq!(
6904 ScalarValue::new_ten(&DataType::Decimal128(5, 1)).unwrap(),
6905 ScalarValue::Decimal128(Some(100), 5, 1)
6906 );
6907 assert_eq!(
6908 ScalarValue::new_ten(&DataType::Decimal128(5, 2)).unwrap(),
6909 ScalarValue::Decimal128(Some(1000), 5, 2)
6910 );
6911 assert_eq!(
6913 ScalarValue::new_ten(&DataType::Decimal128(7, 2)).unwrap(),
6914 ScalarValue::Decimal128(Some(1000), 7, 2)
6915 );
6916 assert!(ScalarValue::new_ten(&DataType::Decimal128(5, -1)).is_err());
6918 assert!(ScalarValue::new_ten(&DataType::Decimal128(0, 2)).is_err());
6920 assert!(ScalarValue::new_ten(&DataType::Decimal128(5, 7)).is_err());
6921 }
6922
6923 #[test]
6924 fn test_new_ten_decimal256() {
6925 assert_eq!(
6926 ScalarValue::new_ten(&DataType::Decimal256(5, 1)).unwrap(),
6927 ScalarValue::Decimal256(Some(100.into()), 5, 1)
6928 );
6929 assert_eq!(
6930 ScalarValue::new_ten(&DataType::Decimal256(5, 2)).unwrap(),
6931 ScalarValue::Decimal256(Some(1000.into()), 5, 2)
6932 );
6933 assert_eq!(
6935 ScalarValue::new_ten(&DataType::Decimal256(7, 2)).unwrap(),
6936 ScalarValue::Decimal256(Some(1000.into()), 7, 2)
6937 );
6938 assert!(ScalarValue::new_ten(&DataType::Decimal256(5, -1)).is_err());
6940 assert!(ScalarValue::new_ten(&DataType::Decimal256(0, 2)).is_err());
6942 assert!(ScalarValue::new_ten(&DataType::Decimal256(5, 7)).is_err());
6943 }
6944
6945 #[test]
6946 fn test_new_negative_one_decimal128() {
6947 assert_eq!(
6948 ScalarValue::new_negative_one(&DataType::Decimal128(5, 0)).unwrap(),
6949 ScalarValue::Decimal128(Some(-1), 5, 0)
6950 );
6951 assert_eq!(
6952 ScalarValue::new_negative_one(&DataType::Decimal128(5, 2)).unwrap(),
6953 ScalarValue::Decimal128(Some(-100), 5, 2)
6954 );
6955 }
6956
6957 #[test]
6958 fn test_list_partial_cmp() {
6959 let a =
6960 ScalarValue::List(Arc::new(
6961 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
6962 Some(1),
6963 Some(2),
6964 Some(3),
6965 ])]),
6966 ));
6967 let b =
6968 ScalarValue::List(Arc::new(
6969 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
6970 Some(1),
6971 Some(2),
6972 Some(3),
6973 ])]),
6974 ));
6975 assert_eq!(a.partial_cmp(&b), Some(Ordering::Equal));
6976
6977 let a =
6978 ScalarValue::List(Arc::new(
6979 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
6980 Some(10),
6981 Some(2),
6982 Some(3),
6983 ])]),
6984 ));
6985 let b =
6986 ScalarValue::List(Arc::new(
6987 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
6988 Some(1),
6989 Some(2),
6990 Some(30),
6991 ])]),
6992 ));
6993 assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
6994
6995 let a =
6996 ScalarValue::List(Arc::new(
6997 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
6998 Some(10),
6999 Some(2),
7000 Some(3),
7001 ])]),
7002 ));
7003 let b =
7004 ScalarValue::List(Arc::new(
7005 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
7006 Some(10),
7007 Some(2),
7008 Some(30),
7009 ])]),
7010 ));
7011 assert_eq!(a.partial_cmp(&b), Some(Ordering::Less));
7012
7013 let a =
7014 ScalarValue::List(Arc::new(
7015 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
7016 Some(1),
7017 Some(2),
7018 Some(3),
7019 ])]),
7020 ));
7021 let b =
7022 ScalarValue::List(Arc::new(
7023 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
7024 Some(2),
7025 Some(3),
7026 ])]),
7027 ));
7028 assert_eq!(a.partial_cmp(&b), Some(Ordering::Less));
7029
7030 let a =
7031 ScalarValue::List(Arc::new(
7032 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
7033 Some(2),
7034 Some(3),
7035 Some(4),
7036 ])]),
7037 ));
7038 let b =
7039 ScalarValue::List(Arc::new(
7040 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
7041 Some(1),
7042 Some(2),
7043 ])]),
7044 ));
7045 assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
7046
7047 let a =
7048 ScalarValue::List(Arc::new(
7049 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
7050 Some(1),
7051 Some(2),
7052 Some(3),
7053 ])]),
7054 ));
7055 let b =
7056 ScalarValue::List(Arc::new(
7057 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
7058 Some(1),
7059 Some(2),
7060 ])]),
7061 ));
7062 assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
7063
7064 let a =
7065 ScalarValue::List(Arc::new(
7066 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
7067 None,
7068 Some(2),
7069 Some(3),
7070 ])]),
7071 ));
7072 let b =
7073 ScalarValue::List(Arc::new(
7074 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
7075 Some(1),
7076 Some(2),
7077 Some(3),
7078 ])]),
7079 ));
7080 assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
7081
7082 let a = ScalarValue::LargeList(Arc::new(LargeListArray::from_iter_primitive::<
7083 Int64Type,
7084 _,
7085 _,
7086 >(vec![Some(vec![
7087 None,
7088 Some(2),
7089 Some(3),
7090 ])])));
7091 let b = ScalarValue::LargeList(Arc::new(LargeListArray::from_iter_primitive::<
7092 Int64Type,
7093 _,
7094 _,
7095 >(vec![Some(vec![
7096 Some(1),
7097 Some(2),
7098 Some(3),
7099 ])])));
7100 assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
7101
7102 let a = ScalarValue::FixedSizeList(Arc::new(
7103 FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(
7104 vec![Some(vec![None, Some(2), Some(3)])],
7105 3,
7106 ),
7107 ));
7108 let b = ScalarValue::FixedSizeList(Arc::new(
7109 FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(
7110 vec![Some(vec![Some(1), Some(2), Some(3)])],
7111 3,
7112 ),
7113 ));
7114 assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
7115
7116 let a = ScalarValue::ListView(Arc::new(ListViewArray::from_iter_primitive::<
7117 Int64Type,
7118 _,
7119 _,
7120 >(vec![Some(vec![
7121 None,
7122 Some(2),
7123 Some(3),
7124 ])])));
7125 let b = ScalarValue::ListView(Arc::new(ListViewArray::from_iter_primitive::<
7126 Int64Type,
7127 _,
7128 _,
7129 >(vec![Some(vec![
7130 Some(1),
7131 Some(2),
7132 Some(3),
7133 ])])));
7134 assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
7135
7136 let a =
7137 ScalarValue::LargeListView(Arc::new(
7138 LargeListViewArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(
7139 vec![None, Some(2), Some(3)],
7140 )]),
7141 ));
7142 let b =
7143 ScalarValue::LargeListView(Arc::new(
7144 LargeListViewArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(
7145 vec![Some(1), Some(2), Some(3)],
7146 )]),
7147 ));
7148 assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
7149 }
7150
7151 #[test]
7152 fn scalar_value_to_array_u64() -> Result<()> {
7153 let value = ScalarValue::UInt64(Some(13u64));
7154 let array = value.to_array().expect("Failed to convert to array");
7155 let array = as_uint64_array(&array)?;
7156 assert_eq!(array.len(), 1);
7157 assert!(!array.is_null(0));
7158 assert_eq!(array.value(0), 13);
7159
7160 let value = ScalarValue::UInt64(None);
7161 let array = value.to_array().expect("Failed to convert to array");
7162 let array = as_uint64_array(&array)?;
7163 assert_eq!(array.len(), 1);
7164 assert!(array.is_null(0));
7165 Ok(())
7166 }
7167
7168 #[test]
7169 fn scalar_value_to_array_u32() -> Result<()> {
7170 let value = ScalarValue::UInt32(Some(13u32));
7171 let array = value.to_array().expect("Failed to convert to array");
7172 let array = as_uint32_array(&array)?;
7173 assert_eq!(array.len(), 1);
7174 assert!(!array.is_null(0));
7175 assert_eq!(array.value(0), 13);
7176
7177 let value = ScalarValue::UInt32(None);
7178 let array = value.to_array().expect("Failed to convert to array");
7179 let array = as_uint32_array(&array)?;
7180 assert_eq!(array.len(), 1);
7181 assert!(array.is_null(0));
7182 Ok(())
7183 }
7184
7185 #[test]
7186 fn scalar_list_null_to_array() {
7187 let list_array = ScalarValue::new_list_nullable(&[], &DataType::UInt64);
7188
7189 assert_eq!(list_array.len(), 1);
7190 assert_eq!(list_array.values().len(), 0);
7191 }
7192
7193 #[test]
7194 fn scalar_large_list_null_to_array() {
7195 let list_array = ScalarValue::new_large_list(&[], &DataType::UInt64);
7196
7197 assert_eq!(list_array.len(), 1);
7198 assert_eq!(list_array.values().len(), 0);
7199 }
7200
7201 #[test]
7202 fn scalar_list_to_array() -> Result<()> {
7203 let values = vec![
7204 ScalarValue::UInt64(Some(100)),
7205 ScalarValue::UInt64(None),
7206 ScalarValue::UInt64(Some(101)),
7207 ];
7208 let list_array = ScalarValue::new_list_nullable(&values, &DataType::UInt64);
7209 assert_eq!(list_array.len(), 1);
7210 assert_eq!(list_array.values().len(), 3);
7211
7212 let prim_array_ref = list_array.value(0);
7213 let prim_array = as_uint64_array(&prim_array_ref)?;
7214 assert_eq!(prim_array.len(), 3);
7215 assert_eq!(prim_array.value(0), 100);
7216 assert!(prim_array.is_null(1));
7217 assert_eq!(prim_array.value(2), 101);
7218 Ok(())
7219 }
7220
7221 #[test]
7222 fn scalar_large_list_to_array() -> Result<()> {
7223 let values = vec![
7224 ScalarValue::UInt64(Some(100)),
7225 ScalarValue::UInt64(None),
7226 ScalarValue::UInt64(Some(101)),
7227 ];
7228 let list_array = ScalarValue::new_large_list(&values, &DataType::UInt64);
7229 assert_eq!(list_array.len(), 1);
7230 assert_eq!(list_array.values().len(), 3);
7231
7232 let prim_array_ref = list_array.value(0);
7233 let prim_array = as_uint64_array(&prim_array_ref)?;
7234 assert_eq!(prim_array.len(), 3);
7235 assert_eq!(prim_array.value(0), 100);
7236 assert!(prim_array.is_null(1));
7237 assert_eq!(prim_array.value(2), 101);
7238 Ok(())
7239 }
7240
7241 macro_rules! check_scalar_iter {
7243 ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
7244 let scalars: Vec<_> =
7245 $INPUT.iter().map(|v| ScalarValue::$SCALAR_T(*v)).collect();
7246
7247 let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
7248
7249 let expected: ArrayRef = Arc::new($ARRAYTYPE::from($INPUT));
7250
7251 assert_eq!(&array, &expected);
7252 }};
7253 }
7254
7255 macro_rules! check_scalar_iter_tz {
7258 ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
7259 let scalars: Vec<_> = $INPUT
7260 .iter()
7261 .map(|v| ScalarValue::$SCALAR_T(*v, None))
7262 .collect();
7263
7264 let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
7265
7266 let expected: ArrayRef = Arc::new($ARRAYTYPE::from($INPUT));
7267
7268 assert_eq!(&array, &expected);
7269 }};
7270 }
7271
7272 macro_rules! check_scalar_iter_string {
7275 ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
7276 let scalars: Vec<_> = $INPUT
7277 .iter()
7278 .map(|v| ScalarValue::$SCALAR_T(v.map(|v| v.to_string())))
7279 .collect();
7280
7281 let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
7282
7283 let expected: ArrayRef = Arc::new($ARRAYTYPE::from($INPUT));
7284
7285 assert_eq!(&array, &expected);
7286 }};
7287 }
7288
7289 macro_rules! check_scalar_iter_binary {
7292 ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
7293 let scalars: Vec<_> = $INPUT
7294 .iter()
7295 .map(|v| ScalarValue::$SCALAR_T(v.map(|v| v.to_vec())))
7296 .collect();
7297
7298 let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
7299
7300 let expected: $ARRAYTYPE =
7301 $INPUT.iter().map(|v| v.map(|v| v.to_vec())).collect();
7302
7303 let expected: ArrayRef = Arc::new(expected);
7304
7305 assert_eq!(&array, &expected);
7306 }};
7307 }
7308
7309 #[test]
7310 fn scalar_iter_to_array_boolean() {
7311 check_scalar_iter!(Boolean, BooleanArray, vec![Some(true), None, Some(false)]);
7312 check_scalar_iter!(Float32, Float32Array, vec![Some(1.9), None, Some(-2.1)]);
7313 check_scalar_iter!(Float64, Float64Array, vec![Some(1.9), None, Some(-2.1)]);
7314
7315 check_scalar_iter!(Int8, Int8Array, vec![Some(1), None, Some(3)]);
7316 check_scalar_iter!(Int16, Int16Array, vec![Some(1), None, Some(3)]);
7317 check_scalar_iter!(Int32, Int32Array, vec![Some(1), None, Some(3)]);
7318 check_scalar_iter!(Int64, Int64Array, vec![Some(1), None, Some(3)]);
7319
7320 check_scalar_iter!(UInt8, UInt8Array, vec![Some(1), None, Some(3)]);
7321 check_scalar_iter!(UInt16, UInt16Array, vec![Some(1), None, Some(3)]);
7322 check_scalar_iter!(UInt32, UInt32Array, vec![Some(1), None, Some(3)]);
7323 check_scalar_iter!(UInt64, UInt64Array, vec![Some(1), None, Some(3)]);
7324
7325 check_scalar_iter_tz!(
7326 TimestampSecond,
7327 TimestampSecondArray,
7328 vec![Some(1), None, Some(3)]
7329 );
7330 check_scalar_iter_tz!(
7331 TimestampMillisecond,
7332 TimestampMillisecondArray,
7333 vec![Some(1), None, Some(3)]
7334 );
7335 check_scalar_iter_tz!(
7336 TimestampMicrosecond,
7337 TimestampMicrosecondArray,
7338 vec![Some(1), None, Some(3)]
7339 );
7340 check_scalar_iter_tz!(
7341 TimestampNanosecond,
7342 TimestampNanosecondArray,
7343 vec![Some(1), None, Some(3)]
7344 );
7345
7346 check_scalar_iter_string!(
7347 Utf8,
7348 StringArray,
7349 vec![Some("foo"), None, Some("bar")]
7350 );
7351 check_scalar_iter_string!(
7352 LargeUtf8,
7353 LargeStringArray,
7354 vec![Some("foo"), None, Some("bar")]
7355 );
7356 check_scalar_iter_binary!(
7357 Binary,
7358 BinaryArray,
7359 [Some(b"foo"), None, Some(b"bar")]
7360 );
7361 check_scalar_iter_binary!(
7362 LargeBinary,
7363 LargeBinaryArray,
7364 [Some(b"foo"), None, Some(b"bar")]
7365 );
7366 }
7367
7368 #[test]
7369 fn scalar_iter_to_array_empty() {
7370 let scalars = vec![] as Vec<ScalarValue>;
7371
7372 let result = ScalarValue::iter_to_array(scalars).unwrap_err();
7373 assert!(
7374 result
7375 .to_string()
7376 .contains("Empty iterator passed to ScalarValue::iter_to_array"),
7377 "{}",
7378 result
7379 );
7380 }
7381
7382 #[test]
7383 fn scalar_iter_to_dictionary() {
7384 fn make_val(v: Option<String>) -> ScalarValue {
7385 let key_type = DataType::Int32;
7386 let value = ScalarValue::Utf8(v);
7387 ScalarValue::Dictionary(Box::new(key_type), Box::new(value))
7388 }
7389
7390 let scalars = [
7391 make_val(Some("Foo".into())),
7392 make_val(None),
7393 make_val(Some("Bar".into())),
7394 ];
7395
7396 let array = ScalarValue::iter_to_array(scalars).unwrap();
7397 let array = as_dictionary_array::<Int32Type>(&array).unwrap();
7398 let values_array = as_string_array(array.values()).unwrap();
7399
7400 let values = array
7401 .keys_iter()
7402 .map(|k| {
7403 k.map(|k| {
7404 assert!(values_array.is_valid(k));
7405 values_array.value(k)
7406 })
7407 })
7408 .collect::<Vec<_>>();
7409
7410 let expected = vec![Some("Foo"), None, Some("Bar")];
7411 assert_eq!(values, expected);
7412 }
7413
7414 #[test]
7415 fn scalar_iter_to_array_mismatched_types() {
7416 use ScalarValue::*;
7417 let scalars = [Boolean(Some(true)), Int32(Some(5))];
7419
7420 let result = ScalarValue::iter_to_array(scalars).unwrap_err();
7421 assert!(result.to_string().contains("Inconsistent types in ScalarValue::iter_to_array. Expected Boolean, got Int32(5)"),
7422 "{}", result);
7423 }
7424
7425 #[test]
7426 fn scalar_try_from_array_null() {
7427 let array = vec![Some(33), None].into_iter().collect::<Int64Array>();
7428 let array: ArrayRef = Arc::new(array);
7429
7430 assert_eq!(
7431 ScalarValue::Int64(Some(33)),
7432 ScalarValue::try_from_array(&array, 0).unwrap()
7433 );
7434 assert_eq!(
7435 ScalarValue::Int64(None),
7436 ScalarValue::try_from_array(&array, 1).unwrap()
7437 );
7438 }
7439
7440 #[test]
7441 fn scalar_try_from_array_list_array_null() {
7442 let list = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
7443 Some(vec![Some(1), Some(2)]),
7444 None,
7445 ]);
7446
7447 let non_null_list_scalar = ScalarValue::try_from_array(&list, 0).unwrap();
7448 let null_list_scalar = ScalarValue::try_from_array(&list, 1).unwrap();
7449
7450 let data_type =
7451 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
7452
7453 assert_eq!(non_null_list_scalar.data_type(), data_type);
7454 assert_eq!(null_list_scalar.data_type(), data_type);
7455 }
7456
7457 #[test]
7458 fn scalar_try_from_list_datatypes() {
7459 let inner_field = Arc::new(Field::new_list_field(DataType::Int32, true));
7460
7461 let data_type = &DataType::List(Arc::clone(&inner_field));
7463 let scalar: ScalarValue = data_type.try_into().unwrap();
7464 let expected = ScalarValue::List(
7465 new_null_array(data_type, 1)
7466 .as_list::<i32>()
7467 .to_owned()
7468 .into(),
7469 );
7470 assert_eq!(expected, scalar);
7471 assert!(expected.is_null());
7472
7473 let data_type = &DataType::LargeList(Arc::clone(&inner_field));
7475 let scalar: ScalarValue = data_type.try_into().unwrap();
7476 let expected = ScalarValue::LargeList(
7477 new_null_array(data_type, 1)
7478 .as_list::<i64>()
7479 .to_owned()
7480 .into(),
7481 );
7482 assert_eq!(expected, scalar);
7483 assert!(expected.is_null());
7484
7485 let data_type = &DataType::FixedSizeList(Arc::clone(&inner_field), 5);
7487 let scalar: ScalarValue = data_type.try_into().unwrap();
7488 let expected = ScalarValue::FixedSizeList(
7489 new_null_array(data_type, 1)
7490 .as_fixed_size_list()
7491 .to_owned()
7492 .into(),
7493 );
7494 assert_eq!(expected, scalar);
7495 assert!(expected.is_null());
7496
7497 let data_type = &DataType::ListView(Arc::clone(&inner_field));
7499 let scalar: ScalarValue = data_type.try_into().unwrap();
7500 let expected = ScalarValue::ListView(
7501 new_null_array(data_type, 1)
7502 .as_list_view::<i32>()
7503 .to_owned()
7504 .into(),
7505 );
7506 assert_eq!(expected, scalar);
7507 assert!(expected.is_null());
7508
7509 let data_type = &DataType::LargeListView(Arc::clone(&inner_field));
7511 let scalar: ScalarValue = data_type.try_into().unwrap();
7512 let expected = ScalarValue::LargeListView(
7513 new_null_array(data_type, 1)
7514 .as_list_view::<i64>()
7515 .to_owned()
7516 .into(),
7517 );
7518 assert_eq!(expected, scalar);
7519 assert!(expected.is_null());
7520 }
7521
7522 #[test]
7523 fn scalar_try_from_list_of_list() {
7524 let data_type = DataType::List(Arc::new(Field::new_list_field(
7525 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
7526 true,
7527 )));
7528 let data_type = &data_type;
7529 let scalar: ScalarValue = data_type.try_into().unwrap();
7530
7531 let expected = ScalarValue::List(
7532 new_null_array(
7533 &DataType::List(Arc::new(Field::new_list_field(
7534 DataType::List(Arc::new(Field::new_list_field(
7535 DataType::Int32,
7536 true,
7537 ))),
7538 true,
7539 ))),
7540 1,
7541 )
7542 .as_list::<i32>()
7543 .to_owned()
7544 .into(),
7545 );
7546
7547 assert_eq!(expected, scalar)
7548 }
7549
7550 #[test]
7551 fn scalar_try_from_not_equal_list_nested_list() {
7552 let list_data_type =
7553 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
7554 let data_type = &list_data_type;
7555 let list_scalar: ScalarValue = data_type.try_into().unwrap();
7556
7557 let nested_list_data_type = DataType::List(Arc::new(Field::new_list_field(
7558 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
7559 true,
7560 )));
7561 let data_type = &nested_list_data_type;
7562 let nested_list_scalar: ScalarValue = data_type.try_into().unwrap();
7563
7564 assert_ne!(list_scalar, nested_list_scalar);
7565 }
7566
7567 #[test]
7568 fn scalar_try_from_dict_datatype() {
7569 let data_type =
7570 DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8));
7571 let data_type = &data_type;
7572 let expected = ScalarValue::Dictionary(
7573 Box::new(DataType::Int8),
7574 Box::new(ScalarValue::Utf8(None)),
7575 );
7576 assert_eq!(expected, data_type.try_into().unwrap())
7577 }
7578
7579 #[test]
7580 fn size_of_scalar() {
7581 assert_eq!(size_of::<ScalarValue>(), 64);
7592 }
7593
7594 #[test]
7595 fn memory_size() {
7596 let sv = ScalarValue::Binary(Some(Vec::with_capacity(10)));
7597 assert_eq!(sv.size(), size_of::<ScalarValue>() + 10,);
7598 let sv_size = sv.size();
7599
7600 let mut v = Vec::with_capacity(10);
7601 v.push(sv);
7603 assert_eq!(v.capacity(), 10);
7604 assert_eq!(
7605 ScalarValue::size_of_vec(&v),
7606 size_of::<Vec<ScalarValue>>() + (9 * size_of::<ScalarValue>()) + sv_size,
7607 );
7608
7609 #[allow(clippy::allow_attributes, clippy::mutable_key_type)]
7610 let mut s = HashSet::with_capacity(0);
7612 s.insert(v.pop().unwrap());
7614 let s_capacity = s.capacity();
7616 assert_eq!(
7617 ScalarValue::size_of_hashset(&s),
7618 size_of::<HashSet<ScalarValue>>()
7619 + ((s_capacity - 1) * size_of::<ScalarValue>())
7620 + sv_size,
7621 );
7622 }
7623
7624 #[test]
7625 fn scalar_eq_array() {
7626 macro_rules! make_typed_vec {
7628 ($INPUT:expr, $TYPE:ident) => {{
7629 $INPUT
7630 .iter()
7631 .map(|v| v.map(|v| v as $TYPE))
7632 .collect::<Vec<_>>()
7633 }};
7634 }
7635
7636 let bool_vals = [Some(true), None, Some(false)];
7637 let f32_vals = [Some(-1.0), None, Some(1.0)];
7638 let f64_vals = make_typed_vec!(f32_vals, f64);
7639
7640 let i8_vals = [Some(-1), None, Some(1)];
7641 let i16_vals = make_typed_vec!(i8_vals, i16);
7642 let i32_vals = make_typed_vec!(i8_vals, i32);
7643 let i64_vals = make_typed_vec!(i8_vals, i64);
7644
7645 let u8_vals = [Some(0), None, Some(1)];
7646 let u16_vals = make_typed_vec!(u8_vals, u16);
7647 let u32_vals = make_typed_vec!(u8_vals, u32);
7648 let u64_vals = make_typed_vec!(u8_vals, u64);
7649
7650 let str_vals = [Some("foo"), None, Some("bar")];
7651
7652 let interval_dt_vals = [
7653 Some(IntervalDayTime::MINUS_ONE),
7654 None,
7655 Some(IntervalDayTime::ONE),
7656 ];
7657 let interval_mdn_vals = [
7658 Some(IntervalMonthDayNano::MINUS_ONE),
7659 None,
7660 Some(IntervalMonthDayNano::ONE),
7661 ];
7662
7663 #[derive(Debug)]
7667 struct TestCase {
7668 array: ArrayRef,
7669 scalars: Vec<ScalarValue>,
7670 }
7671
7672 macro_rules! make_test_case {
7674 ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident) => {{
7675 TestCase {
7676 array: Arc::new($INPUT.iter().collect::<$ARRAY_TY>()),
7677 scalars: $INPUT.iter().map(|v| ScalarValue::$SCALAR_TY(*v)).collect(),
7678 }
7679 }};
7680
7681 ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident, $TZ:expr) => {{
7682 let tz = $TZ;
7683 TestCase {
7684 array: Arc::new($INPUT.iter().collect::<$ARRAY_TY>()),
7685 scalars: $INPUT
7686 .iter()
7687 .map(|v| ScalarValue::$SCALAR_TY(*v, tz.clone()))
7688 .collect(),
7689 }
7690 }};
7691 }
7692
7693 macro_rules! make_str_test_case {
7694 ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident) => {{
7695 TestCase {
7696 array: Arc::new($INPUT.iter().cloned().collect::<$ARRAY_TY>()),
7697 scalars: $INPUT
7698 .iter()
7699 .map(|v| ScalarValue::$SCALAR_TY(v.map(|v| v.to_string())))
7700 .collect(),
7701 }
7702 }};
7703 }
7704
7705 macro_rules! make_binary_test_case {
7706 ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident) => {{
7707 TestCase {
7708 array: Arc::new($INPUT.iter().cloned().collect::<$ARRAY_TY>()),
7709 scalars: $INPUT
7710 .iter()
7711 .map(|v| {
7712 ScalarValue::$SCALAR_TY(v.map(|v| v.as_bytes().to_vec()))
7713 })
7714 .collect(),
7715 }
7716 }};
7717 }
7718
7719 macro_rules! make_str_dict_test_case {
7721 ($INPUT:expr, $INDEX_TY:ident) => {{
7722 TestCase {
7723 array: Arc::new(
7724 $INPUT
7725 .iter()
7726 .cloned()
7727 .collect::<DictionaryArray<$INDEX_TY>>(),
7728 ),
7729 scalars: $INPUT
7730 .iter()
7731 .map(|v| {
7732 ScalarValue::Dictionary(
7733 Box::new($INDEX_TY::DATA_TYPE),
7734 Box::new(ScalarValue::Utf8(v.map(|v| v.to_string()))),
7735 )
7736 })
7737 .collect(),
7738 }
7739 }};
7740 }
7741
7742 let cases = vec![
7743 make_test_case!(bool_vals, BooleanArray, Boolean),
7744 make_test_case!(f32_vals, Float32Array, Float32),
7745 make_test_case!(f64_vals, Float64Array, Float64),
7746 make_test_case!(i8_vals, Int8Array, Int8),
7747 make_test_case!(i16_vals, Int16Array, Int16),
7748 make_test_case!(i32_vals, Int32Array, Int32),
7749 make_test_case!(i64_vals, Int64Array, Int64),
7750 make_test_case!(u8_vals, UInt8Array, UInt8),
7751 make_test_case!(u16_vals, UInt16Array, UInt16),
7752 make_test_case!(u32_vals, UInt32Array, UInt32),
7753 make_test_case!(u64_vals, UInt64Array, UInt64),
7754 make_str_test_case!(str_vals, StringArray, Utf8),
7755 make_str_test_case!(str_vals, LargeStringArray, LargeUtf8),
7756 make_binary_test_case!(str_vals, BinaryArray, Binary),
7757 make_binary_test_case!(str_vals, LargeBinaryArray, LargeBinary),
7758 make_test_case!(i32_vals, Date32Array, Date32),
7759 make_test_case!(i64_vals, Date64Array, Date64),
7760 make_test_case!(i32_vals, Time32SecondArray, Time32Second),
7761 make_test_case!(i32_vals, Time32MillisecondArray, Time32Millisecond),
7762 make_test_case!(i64_vals, Time64MicrosecondArray, Time64Microsecond),
7763 make_test_case!(i64_vals, Time64NanosecondArray, Time64Nanosecond),
7764 make_test_case!(i64_vals, TimestampSecondArray, TimestampSecond, None),
7765 make_test_case!(
7766 i64_vals,
7767 TimestampSecondArray,
7768 TimestampSecond,
7769 Some("UTC".into())
7770 ),
7771 make_test_case!(
7772 i64_vals,
7773 TimestampMillisecondArray,
7774 TimestampMillisecond,
7775 None
7776 ),
7777 make_test_case!(
7778 i64_vals,
7779 TimestampMillisecondArray,
7780 TimestampMillisecond,
7781 Some("UTC".into())
7782 ),
7783 make_test_case!(
7784 i64_vals,
7785 TimestampMicrosecondArray,
7786 TimestampMicrosecond,
7787 None
7788 ),
7789 make_test_case!(
7790 i64_vals,
7791 TimestampMicrosecondArray,
7792 TimestampMicrosecond,
7793 Some("UTC".into())
7794 ),
7795 make_test_case!(
7796 i64_vals,
7797 TimestampNanosecondArray,
7798 TimestampNanosecond,
7799 None
7800 ),
7801 make_test_case!(
7802 i64_vals,
7803 TimestampNanosecondArray,
7804 TimestampNanosecond,
7805 Some("UTC".into())
7806 ),
7807 make_test_case!(i32_vals, IntervalYearMonthArray, IntervalYearMonth),
7808 make_test_case!(interval_dt_vals, IntervalDayTimeArray, IntervalDayTime),
7809 make_test_case!(
7810 interval_mdn_vals,
7811 IntervalMonthDayNanoArray,
7812 IntervalMonthDayNano
7813 ),
7814 make_str_dict_test_case!(str_vals, Int8Type),
7815 make_str_dict_test_case!(str_vals, Int16Type),
7816 make_str_dict_test_case!(str_vals, Int32Type),
7817 make_str_dict_test_case!(str_vals, Int64Type),
7818 make_str_dict_test_case!(str_vals, UInt8Type),
7819 make_str_dict_test_case!(str_vals, UInt16Type),
7820 make_str_dict_test_case!(str_vals, UInt32Type),
7821 make_str_dict_test_case!(str_vals, UInt64Type),
7822 ];
7823
7824 for case in cases {
7825 println!("**** Test Case *****");
7826 let TestCase { array, scalars } = case;
7827 println!("Input array type: {}", array.data_type());
7828 println!("Input scalars: {scalars:#?}");
7829 assert_eq!(array.len(), scalars.len());
7830
7831 for (index, scalar) in scalars.into_iter().enumerate() {
7832 assert!(
7833 scalar
7834 .eq_array(&array, index)
7835 .expect("Failed to compare arrays"),
7836 "Expected {scalar:?} to be equal to {array:?} at index {index}"
7837 );
7838
7839 for other_index in 0..array.len() {
7841 if index != other_index {
7842 assert!(
7843 !scalar
7844 .eq_array(&array, other_index)
7845 .expect("Failed to compare arrays"),
7846 "Expected {scalar:?} to be NOT equal to {array:?} at index {other_index}"
7847 );
7848 }
7849 }
7850 }
7851 }
7852 }
7853
7854 #[test]
7855 fn scalar_partial_ordering() {
7856 use ScalarValue::*;
7857
7858 assert_eq!(
7859 Int64(Some(33)).partial_cmp(&Int64(Some(0))),
7860 Some(Ordering::Greater)
7861 );
7862 assert_eq!(
7863 Int64(Some(0)).partial_cmp(&Int64(Some(33))),
7864 Some(Ordering::Less)
7865 );
7866 assert_eq!(
7867 Int64(Some(33)).partial_cmp(&Int64(Some(33))),
7868 Some(Ordering::Equal)
7869 );
7870 assert_eq!(Int64(Some(33)).partial_cmp(&Int32(Some(33))), None);
7872 assert_eq!(Int32(Some(33)).partial_cmp(&Int64(Some(33))), None);
7873
7874 assert_eq!(
7875 ScalarValue::from(vec![
7876 ("A", ScalarValue::from(1.0)),
7877 ("B", ScalarValue::from("Z")),
7878 ])
7879 .partial_cmp(&ScalarValue::from(vec![
7880 ("A", ScalarValue::from(2.0)),
7881 ("B", ScalarValue::from("A")),
7882 ])),
7883 Some(Ordering::Less)
7884 );
7885
7886 assert_eq!(
7888 ScalarValue::from(vec![
7889 ("A", ScalarValue::from(1.0)),
7890 ("B", ScalarValue::from("Z")),
7891 ])
7892 .partial_cmp(&ScalarValue::from(vec![
7893 ("a", ScalarValue::from(2.0)),
7894 ("b", ScalarValue::from("A")),
7895 ])),
7896 None
7897 );
7898 }
7899
7900 #[test]
7901 fn test_scalar_value_from_string() {
7902 let scalar = ScalarValue::from("foo");
7903 assert_eq!(scalar, ScalarValue::Utf8(Some("foo".to_string())));
7904 let scalar = ScalarValue::from("foo".to_string());
7905 assert_eq!(scalar, ScalarValue::Utf8(Some("foo".to_string())));
7906 let scalar = ScalarValue::from_str("foo").unwrap();
7907 assert_eq!(scalar, ScalarValue::Utf8(Some("foo".to_string())));
7908 }
7909
7910 #[test]
7911 fn test_scalar_struct() {
7912 let field_a = Arc::new(Field::new("A", DataType::Int32, false));
7913 let field_b = Arc::new(Field::new("B", DataType::Boolean, false));
7914 let field_c = Arc::new(Field::new("C", DataType::Utf8, false));
7915
7916 let field_e = Arc::new(Field::new("e", DataType::Int16, false));
7917 let field_f = Arc::new(Field::new("f", DataType::Int64, false));
7918 let field_d = Arc::new(Field::new(
7919 "D",
7920 DataType::Struct(vec![Arc::clone(&field_e), Arc::clone(&field_f)].into()),
7921 false,
7922 ));
7923
7924 let struct_array = StructArray::from(vec![
7925 (
7926 Arc::clone(&field_e),
7927 Arc::new(Int16Array::from(vec![2])) as ArrayRef,
7928 ),
7929 (
7930 Arc::clone(&field_f),
7931 Arc::new(Int64Array::from(vec![3])) as ArrayRef,
7932 ),
7933 ]);
7934
7935 let struct_array = StructArray::from(vec![
7936 (
7937 Arc::clone(&field_a),
7938 Arc::new(Int32Array::from(vec![23])) as ArrayRef,
7939 ),
7940 (
7941 Arc::clone(&field_b),
7942 Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
7943 ),
7944 (
7945 Arc::clone(&field_c),
7946 Arc::new(StringArray::from(vec!["Hello"])) as ArrayRef,
7947 ),
7948 (Arc::clone(&field_d), Arc::new(struct_array) as ArrayRef),
7949 ]);
7950 let scalar = ScalarValue::Struct(Arc::new(struct_array));
7951
7952 let array = scalar
7953 .to_array_of_size(2)
7954 .expect("Failed to convert to array of size");
7955
7956 let expected = Arc::new(StructArray::from(vec![
7957 (
7958 Arc::clone(&field_a),
7959 Arc::new(Int32Array::from(vec![23, 23])) as ArrayRef,
7960 ),
7961 (
7962 Arc::clone(&field_b),
7963 Arc::new(BooleanArray::from(vec![false, false])) as ArrayRef,
7964 ),
7965 (
7966 Arc::clone(&field_c),
7967 Arc::new(StringArray::from(vec!["Hello", "Hello"])) as ArrayRef,
7968 ),
7969 (
7970 Arc::clone(&field_d),
7971 Arc::new(StructArray::from(vec![
7972 (
7973 Arc::clone(&field_e),
7974 Arc::new(Int16Array::from(vec![2, 2])) as ArrayRef,
7975 ),
7976 (
7977 Arc::clone(&field_f),
7978 Arc::new(Int64Array::from(vec![3, 3])) as ArrayRef,
7979 ),
7980 ])) as ArrayRef,
7981 ),
7982 ])) as ArrayRef;
7983
7984 assert_eq!(&array, &expected);
7985
7986 let constructed = ScalarValue::try_from_array(&expected, 1).unwrap();
7988 assert_eq!(constructed, scalar);
7989
7990 let none_scalar = ScalarValue::try_from(array.data_type()).unwrap();
7992 assert!(none_scalar.is_null());
7993 assert_eq!(
7994 format!("{none_scalar:?}"),
7995 String::from("Struct({A:,B:,C:,D:})")
7996 );
7997
7998 let constructed = ScalarValue::from(vec![
8000 ("A", ScalarValue::from(23)),
8001 ("B", ScalarValue::from(false)),
8002 ("C", ScalarValue::from("Hello")),
8003 (
8004 "D",
8005 ScalarValue::from(vec![
8006 ("e", ScalarValue::from(2i16)),
8007 ("f", ScalarValue::from(3i64)),
8008 ]),
8009 ),
8010 ]);
8011 assert_eq!(constructed, scalar);
8012
8013 let scalars = vec![
8015 ScalarValue::from(vec![
8016 ("A", ScalarValue::from(23)),
8017 ("B", ScalarValue::from(false)),
8018 ("C", ScalarValue::from("Hello")),
8019 (
8020 "D",
8021 ScalarValue::from(vec![
8022 ("e", ScalarValue::from(2i16)),
8023 ("f", ScalarValue::from(3i64)),
8024 ]),
8025 ),
8026 ]),
8027 ScalarValue::from(vec![
8028 ("A", ScalarValue::from(7)),
8029 ("B", ScalarValue::from(true)),
8030 ("C", ScalarValue::from("World")),
8031 (
8032 "D",
8033 ScalarValue::from(vec![
8034 ("e", ScalarValue::from(4i16)),
8035 ("f", ScalarValue::from(5i64)),
8036 ]),
8037 ),
8038 ]),
8039 ScalarValue::from(vec![
8040 ("A", ScalarValue::from(-1000)),
8041 ("B", ScalarValue::from(true)),
8042 ("C", ScalarValue::from("!!!!!")),
8043 (
8044 "D",
8045 ScalarValue::from(vec![
8046 ("e", ScalarValue::from(6i16)),
8047 ("f", ScalarValue::from(7i64)),
8048 ]),
8049 ),
8050 ]),
8051 ];
8052 let array = ScalarValue::iter_to_array(scalars).unwrap();
8053
8054 let expected = Arc::new(StructArray::from(vec![
8055 (
8056 Arc::clone(&field_a),
8057 Arc::new(Int32Array::from(vec![23, 7, -1000])) as ArrayRef,
8058 ),
8059 (
8060 Arc::clone(&field_b),
8061 Arc::new(BooleanArray::from(vec![false, true, true])) as ArrayRef,
8062 ),
8063 (
8064 Arc::clone(&field_c),
8065 Arc::new(StringArray::from(vec!["Hello", "World", "!!!!!"])) as ArrayRef,
8066 ),
8067 (
8068 Arc::clone(&field_d),
8069 Arc::new(StructArray::from(vec![
8070 (
8071 Arc::clone(&field_e),
8072 Arc::new(Int16Array::from(vec![2, 4, 6])) as ArrayRef,
8073 ),
8074 (
8075 Arc::clone(&field_f),
8076 Arc::new(Int64Array::from(vec![3, 5, 7])) as ArrayRef,
8077 ),
8078 ])) as ArrayRef,
8079 ),
8080 ])) as ArrayRef;
8081
8082 assert_eq!(&array, &expected);
8083 }
8084
8085 #[test]
8086 fn round_trip() {
8087 let cases: Vec<ArrayRef> = vec![
8089 Arc::new(Int8Array::from(vec![Some(1), None, Some(3)])),
8091 Arc::new(Int16Array::from(vec![Some(1), None, Some(3)])),
8092 Arc::new(Int32Array::from(vec![Some(1), None, Some(3)])),
8093 Arc::new(Int64Array::from(vec![Some(1), None, Some(3)])),
8094 Arc::new(UInt8Array::from(vec![Some(1), None, Some(3)])),
8095 Arc::new(UInt16Array::from(vec![Some(1), None, Some(3)])),
8096 Arc::new(UInt32Array::from(vec![Some(1), None, Some(3)])),
8097 Arc::new(UInt64Array::from(vec![Some(1), None, Some(3)])),
8098 Arc::new(BooleanArray::from(vec![Some(true), None, Some(false)])),
8100 Arc::new(Float32Array::from(vec![Some(1.0), None, Some(3.0)])),
8102 Arc::new(Float64Array::from(vec![Some(1.0), None, Some(3.0)])),
8103 Arc::new(StringArray::from(vec![Some("foo"), None, Some("bar")])),
8105 Arc::new(LargeStringArray::from(vec![Some("foo"), None, Some("bar")])),
8106 Arc::new(StringViewArray::from(vec![Some("foo"), None, Some("bar")])),
8107 {
8109 let mut builder = StringDictionaryBuilder::<Int32Type>::new();
8110 builder.append("foo").unwrap();
8111 builder.append_null();
8112 builder.append("bar").unwrap();
8113 Arc::new(builder.finish())
8114 },
8115 Arc::new(BinaryArray::from_iter(vec![
8117 Some(b"foo"),
8118 None,
8119 Some(b"bar"),
8120 ])),
8121 Arc::new(LargeBinaryArray::from_iter(vec![
8122 Some(b"foo"),
8123 None,
8124 Some(b"bar"),
8125 ])),
8126 Arc::new(BinaryViewArray::from_iter(vec![
8127 Some(b"foo"),
8128 None,
8129 Some(b"bar"),
8130 ])),
8131 Arc::new(TimestampSecondArray::from(vec![Some(1), None, Some(3)])),
8133 Arc::new(TimestampMillisecondArray::from(vec![
8134 Some(1),
8135 None,
8136 Some(3),
8137 ])),
8138 Arc::new(TimestampMicrosecondArray::from(vec![
8139 Some(1),
8140 None,
8141 Some(3),
8142 ])),
8143 Arc::new(TimestampNanosecondArray::from(vec![Some(1), None, Some(3)])),
8144 Arc::new(
8146 TimestampSecondArray::from(vec![Some(1), None, Some(3)])
8147 .with_timezone_opt(Some("UTC")),
8148 ),
8149 Arc::new(
8150 TimestampMillisecondArray::from(vec![Some(1), None, Some(3)])
8151 .with_timezone_opt(Some("UTC")),
8152 ),
8153 Arc::new(
8154 TimestampMicrosecondArray::from(vec![Some(1), None, Some(3)])
8155 .with_timezone_opt(Some("UTC")),
8156 ),
8157 Arc::new(
8158 TimestampNanosecondArray::from(vec![Some(1), None, Some(3)])
8159 .with_timezone_opt(Some("UTC")),
8160 ),
8161 Arc::new(Date32Array::from(vec![Some(1), None, Some(3)])),
8163 Arc::new(Date64Array::from(vec![Some(1), None, Some(3)])),
8164 Arc::new(Time32SecondArray::from(vec![Some(1), None, Some(3)])),
8166 Arc::new(Time32MillisecondArray::from(vec![Some(1), None, Some(3)])),
8167 Arc::new(Time64MicrosecondArray::from(vec![Some(1), None, Some(3)])),
8168 Arc::new(Time64NanosecondArray::from(vec![Some(1), None, Some(3)])),
8169 Arc::new(NullArray::new(3)),
8171 {
8173 let mut builder = UnionBuilder::new_dense();
8174 builder.append::<Int32Type>("a", 1).unwrap();
8175 builder.append::<Float64Type>("b", 3.4).unwrap();
8176 Arc::new(builder.build().unwrap())
8177 },
8178 {
8180 let mut builder = UnionBuilder::new_sparse();
8181 builder.append::<Int32Type>("a", 1).unwrap();
8182 builder.append::<Float64Type>("b", 3.4).unwrap();
8183 Arc::new(builder.build().unwrap())
8184 },
8185 {
8187 let values_builder = StringBuilder::new();
8188 let mut builder = ListBuilder::new(values_builder);
8189 builder.values().append_value("A");
8191 builder.values().append_value("B");
8192 builder.append(true);
8193 builder.append(true);
8195 builder.values().append_value("?"); builder.append(false);
8198 Arc::new(builder.finish())
8199 },
8200 {
8202 let values_builder = StringBuilder::new();
8203 let mut builder = LargeListBuilder::new(values_builder);
8204 builder.values().append_value("A");
8206 builder.values().append_value("B");
8207 builder.append(true);
8208 builder.append(true);
8210 builder.append(false);
8212 Arc::new(builder.finish())
8213 },
8214 {
8216 let values_builder = Int32Builder::new();
8217 let mut builder = FixedSizeListBuilder::new(values_builder, 3);
8218
8219 builder.values().append_value(0);
8221 builder.values().append_value(1);
8222 builder.values().append_value(2);
8223 builder.append(true);
8224 builder.values().append_null();
8225 builder.values().append_null();
8226 builder.values().append_null();
8227 builder.append(false);
8228 builder.values().append_value(3);
8229 builder.values().append_null();
8230 builder.values().append_value(5);
8231 builder.append(true);
8232 Arc::new(builder.finish())
8233 },
8234 {
8236 let values_builder = StringBuilder::new();
8237 let mut builder = ListViewBuilder::new(values_builder);
8238 builder.values().append_value("A");
8240 builder.values().append_value("B");
8241 builder.append(true);
8242 builder.append(true);
8244 builder.append(false);
8246 Arc::new(builder.finish())
8247 },
8248 {
8250 let values_builder = StringBuilder::new();
8251 let mut builder = LargeListViewBuilder::new(values_builder);
8252 builder.values().append_value("A");
8254 builder.values().append_value("B");
8255 builder.append(true);
8256 builder.append(true);
8258 builder.append(false);
8260 Arc::new(builder.finish())
8261 },
8262 {
8264 let string_builder = StringBuilder::new();
8265 let int_builder = Int32Builder::with_capacity(4);
8266
8267 let mut builder = MapBuilder::new(None, string_builder, int_builder);
8268 builder.keys().append_value("joe");
8270 builder.values().append_value(1);
8271 builder.append(true).unwrap();
8272 builder.append(true).unwrap();
8274 builder.append(false).unwrap();
8276
8277 Arc::new(builder.finish())
8278 },
8279 ];
8280
8281 for arr in cases {
8282 round_trip_through_scalar(arr);
8283 }
8284 }
8285
8286 fn round_trip_through_scalar(arr: ArrayRef) {
8291 for i in 0..arr.len() {
8292 let scalar = ScalarValue::try_from_array(&arr, i).unwrap();
8294 let array = scalar.to_array_of_size(1).unwrap();
8295 assert_eq!(array.len(), 1);
8296 assert_eq!(array.data_type(), arr.data_type());
8297 assert_eq!(array.as_ref(), arr.slice(i, 1).as_ref());
8298 }
8299 }
8300
8301 #[test]
8302 fn roundtrip_run_array() {
8303 let run_ends = Int16Array::from(vec![2, 3]);
8307 let values = Int64Array::from(vec![Some(1), None]);
8308 let run_array = RunArray::try_new(&run_ends, &values).unwrap();
8309 let run_array = run_array.downcast::<Int64Array>().unwrap();
8310
8311 let expected_values = run_array.into_iter().collect::<Vec<_>>();
8312
8313 for i in 0..run_array.len() {
8314 let scalar = ScalarValue::try_from_array(&run_array, i).unwrap();
8315 let array = scalar.to_array_of_size(1).unwrap();
8316 assert_eq!(array.data_type(), run_array.data_type());
8317 let array = array.as_run::<Int16Type>();
8318 let array = array.downcast::<Int64Array>().unwrap();
8319 assert_eq!(
8320 array.into_iter().collect::<Vec<_>>(),
8321 expected_values[i..i + 1]
8322 );
8323 }
8324 }
8325
8326 #[test]
8327 fn test_scalar_union_sparse() {
8328 let field_a = Arc::new(Field::new("A", DataType::Int32, true));
8329 let field_b = Arc::new(Field::new("B", DataType::Boolean, true));
8330 let field_c = Arc::new(Field::new("C", DataType::Utf8, true));
8331 let fields = UnionFields::from_iter([(0, field_a), (1, field_b), (2, field_c)]);
8332
8333 let mut values_a = vec![None; 6];
8334 values_a[0] = Some(42);
8335 let mut values_b = vec![None; 6];
8336 values_b[1] = Some(true);
8337 let mut values_c = vec![None; 6];
8338 values_c[2] = Some("foo");
8339 let children: Vec<ArrayRef> = vec![
8340 Arc::new(Int32Array::from(values_a)),
8341 Arc::new(BooleanArray::from(values_b)),
8342 Arc::new(StringArray::from(values_c)),
8343 ];
8344
8345 let type_ids = ScalarBuffer::from(vec![0, 1, 2, 0, 1, 2]);
8346 let array: ArrayRef = Arc::new(
8347 UnionArray::try_new(fields.clone(), type_ids, None, children)
8348 .expect("UnionArray"),
8349 );
8350
8351 let expected = [
8352 (0, ScalarValue::from(42)),
8353 (1, ScalarValue::from(true)),
8354 (2, ScalarValue::from("foo")),
8355 (0, ScalarValue::Int32(None)),
8356 (1, ScalarValue::Boolean(None)),
8357 (2, ScalarValue::Utf8(None)),
8358 ];
8359
8360 for (i, (ti, value)) in expected.into_iter().enumerate() {
8361 let is_null = value.is_null();
8362 let value = Some((ti, Box::new(value)));
8363 let expected = ScalarValue::Union(value, fields.clone(), UnionMode::Sparse);
8364 let actual = ScalarValue::try_from_array(&array, i).expect("try_from_array");
8365
8366 assert_eq!(
8367 actual, expected,
8368 "[{i}] {actual} was not equal to {expected}"
8369 );
8370
8371 assert!(
8372 expected.eq_array(&array, i).expect("eq_array"),
8373 "[{i}] {expected}.eq_array was false"
8374 );
8375
8376 if is_null {
8377 assert!(actual.is_null(), "[{i}] {actual} was not null")
8378 }
8379 }
8380 }
8381
8382 #[test]
8383 fn test_scalar_union_dense() {
8384 let field_a = Arc::new(Field::new("A", DataType::Int32, true));
8385 let field_b = Arc::new(Field::new("B", DataType::Boolean, true));
8386 let field_c = Arc::new(Field::new("C", DataType::Utf8, true));
8387 let fields = UnionFields::from_iter([(0, field_a), (1, field_b), (2, field_c)]);
8388 let children: Vec<ArrayRef> = vec![
8389 Arc::new(Int32Array::from(vec![Some(42), None])),
8390 Arc::new(BooleanArray::from(vec![Some(true), None])),
8391 Arc::new(StringArray::from(vec![Some("foo"), None])),
8392 ];
8393
8394 let type_ids = ScalarBuffer::from(vec![0, 1, 2, 0, 1, 2]);
8395 let offsets = ScalarBuffer::from(vec![0, 0, 0, 1, 1, 1]);
8396 let array: ArrayRef = Arc::new(
8397 UnionArray::try_new(fields.clone(), type_ids, Some(offsets), children)
8398 .expect("UnionArray"),
8399 );
8400
8401 let expected = [
8402 (0, ScalarValue::from(42)),
8403 (1, ScalarValue::from(true)),
8404 (2, ScalarValue::from("foo")),
8405 (0, ScalarValue::Int32(None)),
8406 (1, ScalarValue::Boolean(None)),
8407 (2, ScalarValue::Utf8(None)),
8408 ];
8409
8410 for (i, (ti, value)) in expected.into_iter().enumerate() {
8411 let is_null = value.is_null();
8412 let value = Some((ti, Box::new(value)));
8413 let expected = ScalarValue::Union(value, fields.clone(), UnionMode::Dense);
8414 let actual = ScalarValue::try_from_array(&array, i).expect("try_from_array");
8415
8416 assert_eq!(
8417 actual, expected,
8418 "[{i}] {actual} was not equal to {expected}"
8419 );
8420
8421 assert!(
8422 expected.eq_array(&array, i).expect("eq_array"),
8423 "[{i}] {expected}.eq_array was false"
8424 );
8425
8426 if is_null {
8427 assert!(actual.is_null(), "[{i}] {actual} was not null")
8428 }
8429 }
8430 }
8431
8432 #[test]
8433 fn test_lists_in_struct() {
8434 let field_a = Arc::new(Field::new("A", DataType::Utf8, false));
8435 let field_primitive_list = Arc::new(Field::new(
8436 "primitive_list",
8437 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
8438 false,
8439 ));
8440
8441 let l0 =
8443 ScalarValue::List(Arc::new(
8444 ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
8445 Some(1),
8446 Some(2),
8447 Some(3),
8448 ])]),
8449 ));
8450 let l1 =
8451 ScalarValue::List(Arc::new(
8452 ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
8453 Some(4),
8454 Some(5),
8455 ])]),
8456 ));
8457 let l2 = ScalarValue::List(Arc::new(ListArray::from_iter_primitive::<
8458 Int32Type,
8459 _,
8460 _,
8461 >(vec![Some(vec![Some(6)])])));
8462
8463 let s0 = ScalarValue::from(vec![
8465 ("A", ScalarValue::from("First")),
8466 ("primitive_list", l0),
8467 ]);
8468
8469 let s1 = ScalarValue::from(vec![
8470 ("A", ScalarValue::from("Second")),
8471 ("primitive_list", l1),
8472 ]);
8473
8474 let s2 = ScalarValue::from(vec![
8475 ("A", ScalarValue::from("Third")),
8476 ("primitive_list", l2),
8477 ]);
8478
8479 let array =
8481 ScalarValue::iter_to_array(vec![s0.clone(), s1.clone(), s2.clone()]).unwrap();
8482
8483 let array = as_struct_array(&array).unwrap();
8484 let expected = StructArray::from(vec![
8485 (
8486 Arc::clone(&field_a),
8487 Arc::new(StringArray::from(vec!["First", "Second", "Third"])) as ArrayRef,
8488 ),
8489 (
8490 Arc::clone(&field_primitive_list),
8491 Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
8492 Some(vec![Some(1), Some(2), Some(3)]),
8493 Some(vec![Some(4), Some(5)]),
8494 Some(vec![Some(6)]),
8495 ])),
8496 ),
8497 ]);
8498
8499 assert_eq!(array, &expected);
8500
8501 let nl0_array = ScalarValue::iter_to_array(vec![s0, s1.clone()]).unwrap();
8504 let nl0 = SingleRowListArrayBuilder::new(nl0_array).build_list_scalar();
8505
8506 let nl1_array = ScalarValue::iter_to_array(vec![s2]).unwrap();
8507 let nl1 = SingleRowListArrayBuilder::new(nl1_array).build_list_scalar();
8508
8509 let nl2_array = ScalarValue::iter_to_array(vec![s1]).unwrap();
8510 let nl2 = SingleRowListArrayBuilder::new(nl2_array).build_list_scalar();
8511
8512 let array = ScalarValue::iter_to_array(vec![nl0, nl1, nl2]).unwrap();
8514 let array = array.as_list::<i32>();
8515
8516 let field_a_builder = StringBuilder::with_capacity(4, 1024);
8518 let primitive_value_builder = Int32Array::builder(8);
8519 let field_primitive_list_builder = ListBuilder::new(primitive_value_builder);
8520
8521 let element_builder = StructBuilder::new(
8522 vec![field_a, field_primitive_list],
8523 vec![
8524 Box::new(field_a_builder),
8525 Box::new(field_primitive_list_builder),
8526 ],
8527 );
8528
8529 let mut list_builder = ListBuilder::new(element_builder);
8530
8531 list_builder
8532 .values()
8533 .field_builder::<StringBuilder>(0)
8534 .unwrap()
8535 .append_value("First");
8536 list_builder
8537 .values()
8538 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
8539 .unwrap()
8540 .values()
8541 .append_value(1);
8542 list_builder
8543 .values()
8544 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
8545 .unwrap()
8546 .values()
8547 .append_value(2);
8548 list_builder
8549 .values()
8550 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
8551 .unwrap()
8552 .values()
8553 .append_value(3);
8554 list_builder
8555 .values()
8556 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
8557 .unwrap()
8558 .append(true);
8559 list_builder.values().append(true);
8560
8561 list_builder
8562 .values()
8563 .field_builder::<StringBuilder>(0)
8564 .unwrap()
8565 .append_value("Second");
8566 list_builder
8567 .values()
8568 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
8569 .unwrap()
8570 .values()
8571 .append_value(4);
8572 list_builder
8573 .values()
8574 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
8575 .unwrap()
8576 .values()
8577 .append_value(5);
8578 list_builder
8579 .values()
8580 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
8581 .unwrap()
8582 .append(true);
8583 list_builder.values().append(true);
8584 list_builder.append(true);
8585
8586 list_builder
8587 .values()
8588 .field_builder::<StringBuilder>(0)
8589 .unwrap()
8590 .append_value("Third");
8591 list_builder
8592 .values()
8593 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
8594 .unwrap()
8595 .values()
8596 .append_value(6);
8597 list_builder
8598 .values()
8599 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
8600 .unwrap()
8601 .append(true);
8602 list_builder.values().append(true);
8603 list_builder.append(true);
8604
8605 list_builder
8606 .values()
8607 .field_builder::<StringBuilder>(0)
8608 .unwrap()
8609 .append_value("Second");
8610 list_builder
8611 .values()
8612 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
8613 .unwrap()
8614 .values()
8615 .append_value(4);
8616 list_builder
8617 .values()
8618 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
8619 .unwrap()
8620 .values()
8621 .append_value(5);
8622 list_builder
8623 .values()
8624 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
8625 .unwrap()
8626 .append(true);
8627 list_builder.values().append(true);
8628 list_builder.append(true);
8629
8630 let expected = list_builder.finish();
8631
8632 assert_eq!(array, &expected);
8633 }
8634
8635 fn build_2d_list(data: Vec<Option<i32>>) -> ListArray {
8636 let a1 = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(data)]);
8637 ListArray::new(
8638 Arc::new(Field::new_list_field(
8639 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
8640 true,
8641 )),
8642 OffsetBuffer::<i32>::from_lengths([1]),
8643 Arc::new(a1),
8644 None,
8645 )
8646 }
8647
8648 #[test]
8649 fn test_nested_lists() {
8650 let arr1 = build_2d_list(vec![Some(1), Some(2), Some(3)]);
8652 let arr2 = build_2d_list(vec![Some(4), Some(5)]);
8653 let arr3 = build_2d_list(vec![Some(6)]);
8654
8655 let array = ScalarValue::iter_to_array(vec![
8656 ScalarValue::List(Arc::new(arr1)),
8657 ScalarValue::List(Arc::new(arr2)),
8658 ScalarValue::List(Arc::new(arr3)),
8659 ])
8660 .unwrap();
8661 let array = array.as_list::<i32>();
8662
8663 let inner_builder = Int32Array::builder(6);
8665 let middle_builder = ListBuilder::new(inner_builder);
8666 let mut outer_builder = ListBuilder::new(middle_builder);
8667
8668 outer_builder.values().values().append_value(1);
8669 outer_builder.values().values().append_value(2);
8670 outer_builder.values().values().append_value(3);
8671 outer_builder.values().append(true);
8672 outer_builder.append(true);
8673
8674 outer_builder.values().values().append_value(4);
8675 outer_builder.values().values().append_value(5);
8676 outer_builder.values().append(true);
8677 outer_builder.append(true);
8678
8679 outer_builder.values().values().append_value(6);
8680 outer_builder.values().append(true);
8681 outer_builder.append(true);
8682
8683 let expected = outer_builder.finish();
8684
8685 assert_eq!(array, &expected);
8686 }
8687
8688 #[test]
8689 fn scalar_timestamp_ns_utc_timezone() {
8690 let scalar = ScalarValue::TimestampNanosecond(
8691 Some(1599566400000000000),
8692 Some("UTC".into()),
8693 );
8694
8695 assert_eq!(
8696 scalar.data_type(),
8697 DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()))
8698 );
8699
8700 let array = scalar.to_array().expect("Failed to convert to array");
8701 assert_eq!(array.len(), 1);
8702 assert_eq!(
8703 array.data_type(),
8704 &DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()))
8705 );
8706
8707 let new_scalar = ScalarValue::try_from_array(&array, 0).unwrap();
8708 assert_eq!(
8709 new_scalar.data_type(),
8710 DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()))
8711 );
8712 }
8713
8714 #[test]
8715 fn cast_round_trip() {
8716 check_scalar_cast(ScalarValue::Int8(Some(5)), DataType::Int16);
8717 check_scalar_cast(ScalarValue::Int8(None), DataType::Int16);
8718
8719 check_scalar_cast(ScalarValue::Float64(Some(5.5)), DataType::Int16);
8720
8721 check_scalar_cast(ScalarValue::Float64(None), DataType::Int16);
8722
8723 check_scalar_cast(
8724 ScalarValue::from("foo"),
8725 DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
8726 );
8727
8728 check_scalar_cast(
8729 ScalarValue::Utf8(None),
8730 DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
8731 );
8732
8733 check_scalar_cast(ScalarValue::Utf8(None), DataType::Utf8View);
8734 check_scalar_cast(ScalarValue::from("foo"), DataType::Utf8View);
8735 check_scalar_cast(
8736 ScalarValue::from("larger than 12 bytes string"),
8737 DataType::Utf8View,
8738 );
8739 check_scalar_cast(
8740 {
8741 let element_field =
8742 Arc::new(Field::new("element", DataType::Int32, true));
8743
8744 let mut builder =
8745 ListBuilder::new(Int32Builder::new()).with_field(element_field);
8746 builder.append_value([Some(1)]);
8747 builder.append(true);
8748
8749 ScalarValue::List(Arc::new(builder.finish()))
8750 },
8751 DataType::List(Arc::new(Field::new("element", DataType::Int64, true))),
8752 );
8753 check_scalar_cast(
8754 {
8755 let element_field =
8756 Arc::new(Field::new("element", DataType::Int32, true));
8757
8758 let mut builder = FixedSizeListBuilder::new(Int32Builder::new(), 1)
8759 .with_field(element_field);
8760 builder.values().append_value(1);
8761 builder.append(true);
8762
8763 ScalarValue::FixedSizeList(Arc::new(builder.finish()))
8764 },
8765 DataType::FixedSizeList(
8766 Arc::new(Field::new("element", DataType::Int64, true)),
8767 1,
8768 ),
8769 );
8770 check_scalar_cast(
8771 {
8772 let element_field =
8773 Arc::new(Field::new("element", DataType::Int32, true));
8774
8775 let mut builder =
8776 LargeListBuilder::new(Int32Builder::new()).with_field(element_field);
8777 builder.append_value([Some(1)]);
8778 builder.append(true);
8779
8780 ScalarValue::LargeList(Arc::new(builder.finish()))
8781 },
8782 DataType::LargeList(Arc::new(Field::new("element", DataType::Int64, true))),
8783 );
8784 check_scalar_cast(
8785 {
8786 let element_field =
8787 Arc::new(Field::new("element", DataType::Int32, true));
8788
8789 let mut builder =
8790 ListViewBuilder::new(Int32Builder::new()).with_field(element_field);
8791 builder.append_value([Some(1)]);
8792 builder.append(true);
8793
8794 ScalarValue::ListView(Arc::new(builder.finish()))
8795 },
8796 DataType::ListView(Arc::new(Field::new("element", DataType::Int64, true))),
8797 );
8798 check_scalar_cast(
8799 {
8800 let element_field =
8801 Arc::new(Field::new("element", DataType::Int32, true));
8802
8803 let mut builder = LargeListViewBuilder::new(Int32Builder::new())
8804 .with_field(element_field);
8805 builder.append_value([Some(1)]);
8806 builder.append(true);
8807
8808 ScalarValue::LargeListView(Arc::new(builder.finish()))
8809 },
8810 DataType::LargeListView(Arc::new(Field::new(
8811 "element",
8812 DataType::Int64,
8813 true,
8814 ))),
8815 );
8816 }
8817
8818 fn check_scalar_cast(scalar: ScalarValue, desired_type: DataType) {
8820 let scalar_array = scalar.to_array().expect("Failed to convert to array");
8822 let cast_array = kernels::cast::cast(&scalar_array, &desired_type).unwrap();
8824
8825 let cast_scalar = ScalarValue::try_from_array(&cast_array, 0).unwrap();
8827 assert_eq!(cast_scalar.data_type(), desired_type);
8828
8829 let array = cast_scalar
8831 .to_array_of_size(10)
8832 .expect("Failed to convert to array of size");
8833
8834 assert_eq!(array.data_type(), &desired_type)
8836 }
8837
8838 #[test]
8839 fn test_scalar_negative() -> Result<()> {
8840 let value = ScalarValue::Int32(Some(12));
8842 assert_eq!(ScalarValue::Int32(Some(-12)), value.arithmetic_negate()?);
8843 let value = ScalarValue::Int32(None);
8844 assert_eq!(ScalarValue::Int32(None), value.arithmetic_negate()?);
8845
8846 let value = ScalarValue::UInt8(Some(12));
8848 assert!(value.arithmetic_negate().is_err());
8849 let value = ScalarValue::Boolean(None);
8850 assert!(value.arithmetic_negate().is_err());
8851 Ok(())
8852 }
8853
8854 #[test]
8855 fn test_scalar_negative_overflows() -> Result<()> {
8856 macro_rules! test_overflow_on_value {
8857 ($($val:expr),* $(,)?) => {$(
8858 {
8859 let value: ScalarValue = $val;
8860 let err = value.arithmetic_negate().expect_err("Should receive overflow error on negating {value:?}");
8861 let root_err = err.find_root();
8862 match root_err{
8863 DataFusionError::ArrowError(err, _) if matches!(err.as_ref(), ArrowError::ArithmeticOverflow(_)) => {}
8864 _ => return Err(err),
8865 };
8866 }
8867 )*};
8868 }
8869 test_overflow_on_value!(
8870 i8::MIN.into(),
8872 i16::MIN.into(),
8873 i32::MIN.into(),
8874 i64::MIN.into(),
8875 ScalarValue::try_new_decimal128(i128::MIN, 10, 5)?,
8877 ScalarValue::Decimal256(Some(i256::MIN), 20, 5),
8878 ScalarValue::IntervalYearMonth(Some(i32::MIN)),
8880 ScalarValue::new_interval_dt(i32::MIN, 999),
8881 ScalarValue::new_interval_dt(1, i32::MIN),
8882 ScalarValue::new_interval_mdn(i32::MIN, 15, 123_456),
8883 ScalarValue::new_interval_mdn(12, i32::MIN, 123_456),
8884 ScalarValue::new_interval_mdn(12, 15, i64::MIN),
8885 ScalarValue::TimestampSecond(Some(i64::MIN), None),
8887 ScalarValue::TimestampMillisecond(Some(i64::MIN), None),
8888 ScalarValue::TimestampMicrosecond(Some(i64::MIN), None),
8889 ScalarValue::TimestampNanosecond(Some(i64::MIN), None),
8890 );
8891
8892 let float_cases = [
8893 (
8894 ScalarValue::Float16(Some(f16::MIN)),
8895 ScalarValue::Float16(Some(f16::MAX)),
8896 ),
8897 (
8898 ScalarValue::Float16(Some(f16::MAX)),
8899 ScalarValue::Float16(Some(f16::MIN)),
8900 ),
8901 (f32::MIN.into(), f32::MAX.into()),
8902 (f32::MAX.into(), f32::MIN.into()),
8903 (f64::MIN.into(), f64::MAX.into()),
8904 (f64::MAX.into(), f64::MIN.into()),
8905 ];
8906 for (test, expected) in float_cases.into_iter().skip(2) {
8908 assert_eq!(test.arithmetic_negate()?, expected);
8909 }
8910 Ok(())
8911 }
8912
8913 #[test]
8914 fn f16_test_overflow() {
8915 let cases = [
8917 (
8918 ScalarValue::Float16(Some(f16::MIN)),
8919 ScalarValue::Float16(Some(f16::MAX)),
8920 ),
8921 (
8922 ScalarValue::Float16(Some(f16::MAX)),
8923 ScalarValue::Float16(Some(f16::MIN)),
8924 ),
8925 ];
8926
8927 for (test, expected) in cases {
8928 assert_eq!(test.arithmetic_negate().unwrap(), expected);
8929 }
8930 }
8931
8932 macro_rules! expect_operation_error {
8933 ($TEST_NAME:ident, $FUNCTION:ident, $EXPECTED_ERROR:expr) => {
8934 #[test]
8935 fn $TEST_NAME() {
8936 let lhs = ScalarValue::UInt64(Some(12));
8937 let rhs = ScalarValue::Int32(Some(-3));
8938 match lhs.$FUNCTION(&rhs) {
8939 Ok(_result) => {
8940 panic!(
8941 "Expected binary operation error between lhs: '{:?}', rhs: {:?}",
8942 lhs, rhs
8943 );
8944 }
8945 Err(e) => {
8946 let error_message = e.to_string();
8947 assert!(
8948 error_message.contains($EXPECTED_ERROR),
8949 "Expected error '{}' not found in actual error '{}'",
8950 $EXPECTED_ERROR,
8951 error_message
8952 );
8953 }
8954 }
8955 }
8956 };
8957 }
8958
8959 expect_operation_error!(
8960 expect_add_error,
8961 add,
8962 "Invalid arithmetic operation: UInt64 + Int32"
8963 );
8964 expect_operation_error!(
8965 expect_sub_error,
8966 sub,
8967 "Invalid arithmetic operation: UInt64 - Int32"
8968 );
8969
8970 macro_rules! decimal_op_test_cases {
8971 ($OPERATION:ident, [$([$L_VALUE:expr, $L_PRECISION:expr, $L_SCALE:expr, $R_VALUE:expr, $R_PRECISION:expr, $R_SCALE:expr, $O_VALUE:expr, $O_PRECISION:expr, $O_SCALE:expr]),+]) => {
8972 $(
8973
8974 let left = ScalarValue::Decimal128($L_VALUE, $L_PRECISION, $L_SCALE);
8975 let right = ScalarValue::Decimal128($R_VALUE, $R_PRECISION, $R_SCALE);
8976 let result = left.$OPERATION(&right).unwrap();
8977 assert_eq!(ScalarValue::Decimal128($O_VALUE, $O_PRECISION, $O_SCALE), result);
8978
8979 )+
8980 };
8981 }
8982
8983 #[test]
8984 fn decimal_operations() {
8985 decimal_op_test_cases!(
8986 add,
8987 [
8988 [Some(123), 10, 2, Some(124), 10, 2, Some(123 + 124), 11, 2],
8989 [
8991 Some(123),
8992 10,
8993 3,
8994 Some(124),
8995 10,
8996 2,
8997 Some(123 + 124 * 10_i128.pow(1)),
8998 12,
8999 3
9000 ],
9001 [
9003 Some(123),
9004 10,
9005 2,
9006 Some(124),
9007 11,
9008 3,
9009 Some(123 * 10_i128.pow(3 - 2) + 124),
9010 12,
9011 3
9012 ]
9013 ]
9014 );
9015 }
9016
9017 #[test]
9018 fn decimal_operations_with_nulls() {
9019 decimal_op_test_cases!(
9020 add,
9021 [
9022 [None, 10, 2, Some(123), 10, 2, None, 11, 2],
9024 [Some(123), 10, 2, None, 10, 2, None, 11, 2],
9026 [Some(123), 8, 2, None, 10, 3, None, 11, 3],
9028 [None, 8, 2, Some(123), 10, 3, None, 11, 3],
9030 [Some(123), 8, 4, None, 10, 3, None, 12, 4],
9032 [None, 10, 3, Some(123), 8, 4, None, 12, 4]
9034 ]
9035 );
9036 }
9037
9038 #[test]
9039 fn test_scalar_distance() {
9040 let cases = [
9041 (ScalarValue::Int8(Some(1)), ScalarValue::Int8(Some(2)), 1),
9044 (ScalarValue::Int8(Some(2)), ScalarValue::Int8(Some(1)), 1),
9045 (
9046 ScalarValue::Int16(Some(-5)),
9047 ScalarValue::Int16(Some(5)),
9048 10,
9049 ),
9050 (
9051 ScalarValue::Int16(Some(5)),
9052 ScalarValue::Int16(Some(-5)),
9053 10,
9054 ),
9055 (ScalarValue::Int32(Some(0)), ScalarValue::Int32(Some(0)), 0),
9056 (
9057 ScalarValue::Int32(Some(-5)),
9058 ScalarValue::Int32(Some(-10)),
9059 5,
9060 ),
9061 (
9062 ScalarValue::Int64(Some(-10)),
9063 ScalarValue::Int64(Some(-5)),
9064 5,
9065 ),
9066 (ScalarValue::UInt8(Some(1)), ScalarValue::UInt8(Some(2)), 1),
9067 (ScalarValue::UInt8(Some(0)), ScalarValue::UInt8(Some(0)), 0),
9068 (
9069 ScalarValue::UInt16(Some(5)),
9070 ScalarValue::UInt16(Some(10)),
9071 5,
9072 ),
9073 (
9074 ScalarValue::UInt32(Some(10)),
9075 ScalarValue::UInt32(Some(5)),
9076 5,
9077 ),
9078 (
9079 ScalarValue::UInt64(Some(5)),
9080 ScalarValue::UInt64(Some(10)),
9081 5,
9082 ),
9083 (
9084 ScalarValue::Float16(Some(f16::from_f32(1.1))),
9085 ScalarValue::Float16(Some(f16::from_f32(1.9))),
9086 1,
9087 ),
9088 (
9089 ScalarValue::Float16(Some(f16::from_f32(-5.3))),
9090 ScalarValue::Float16(Some(f16::from_f32(-9.2))),
9091 4,
9092 ),
9093 (
9094 ScalarValue::Float16(Some(f16::from_f32(-5.3))),
9095 ScalarValue::Float16(Some(f16::from_f32(-9.7))),
9096 4,
9097 ),
9098 (
9099 ScalarValue::Float32(Some(1.0)),
9100 ScalarValue::Float32(Some(2.0)),
9101 1,
9102 ),
9103 (
9104 ScalarValue::Float32(Some(2.0)),
9105 ScalarValue::Float32(Some(1.0)),
9106 1,
9107 ),
9108 (
9109 ScalarValue::Float64(Some(0.0)),
9110 ScalarValue::Float64(Some(0.0)),
9111 0,
9112 ),
9113 (
9114 ScalarValue::Float64(Some(-5.0)),
9115 ScalarValue::Float64(Some(-10.0)),
9116 5,
9117 ),
9118 (
9119 ScalarValue::Float64(Some(-10.0)),
9120 ScalarValue::Float64(Some(-5.0)),
9121 5,
9122 ),
9123 (
9127 ScalarValue::Float32(Some(1.2)),
9128 ScalarValue::Float32(Some(1.3)),
9129 0,
9130 ),
9131 (
9132 ScalarValue::Float32(Some(1.1)),
9133 ScalarValue::Float32(Some(1.9)),
9134 1,
9135 ),
9136 (
9137 ScalarValue::Float64(Some(-5.3)),
9138 ScalarValue::Float64(Some(-9.2)),
9139 4,
9140 ),
9141 (
9142 ScalarValue::Float64(Some(-5.3)),
9143 ScalarValue::Float64(Some(-9.7)),
9144 4,
9145 ),
9146 (
9147 ScalarValue::Float64(Some(-5.3)),
9148 ScalarValue::Float64(Some(-9.9)),
9149 5,
9150 ),
9151 (
9152 ScalarValue::Decimal128(Some(10), 1, 0),
9153 ScalarValue::Decimal128(Some(5), 1, 0),
9154 5,
9155 ),
9156 (
9157 ScalarValue::Decimal128(Some(5), 1, 0),
9158 ScalarValue::Decimal128(Some(10), 1, 0),
9159 5,
9160 ),
9161 (
9162 ScalarValue::Decimal256(Some(10.into()), 1, 0),
9163 ScalarValue::Decimal256(Some(5.into()), 1, 0),
9164 5,
9165 ),
9166 (
9167 ScalarValue::Decimal256(Some(5.into()), 1, 0),
9168 ScalarValue::Decimal256(Some(10.into()), 1, 0),
9169 5,
9170 ),
9171 (
9173 ScalarValue::Date32(Some(0)),
9174 ScalarValue::Date32(Some(10)),
9175 10,
9176 ),
9177 (
9178 ScalarValue::Date32(Some(10)),
9179 ScalarValue::Date32(Some(0)),
9180 10,
9181 ),
9182 (
9183 ScalarValue::Date64(Some(1000)),
9184 ScalarValue::Date64(Some(5000)),
9185 4000,
9186 ),
9187 (
9188 ScalarValue::TimestampSecond(Some(100), None),
9189 ScalarValue::TimestampSecond(Some(200), None),
9190 100,
9191 ),
9192 (
9193 ScalarValue::TimestampMillisecond(Some(1000), None),
9194 ScalarValue::TimestampMillisecond(Some(5000), None),
9195 4000,
9196 ),
9197 (
9198 ScalarValue::TimestampMicrosecond(Some(0), None),
9199 ScalarValue::TimestampMicrosecond(Some(1_000_000), None),
9200 1_000_000,
9201 ),
9202 (
9203 ScalarValue::TimestampNanosecond(Some(1_000_000_000), None),
9204 ScalarValue::TimestampNanosecond(Some(2_000_000_000), None),
9205 1_000_000_000,
9206 ),
9207 ];
9208 for (lhs, rhs, expected) in cases.iter() {
9209 let distance = lhs.distance(rhs).unwrap();
9210 assert_eq!(distance, *expected);
9211 }
9212 }
9213
9214 #[test]
9215 fn test_distance_none() {
9216 let cases = [
9217 (
9218 ScalarValue::Decimal128(Some(i128::MAX), DECIMAL128_MAX_PRECISION, 0),
9219 ScalarValue::Decimal128(Some(-i128::MAX), DECIMAL128_MAX_PRECISION, 0),
9220 ),
9221 (
9222 ScalarValue::Decimal256(Some(i256::MAX), DECIMAL256_MAX_PRECISION, 0),
9223 ScalarValue::Decimal256(Some(-i256::MAX), DECIMAL256_MAX_PRECISION, 0),
9224 ),
9225 ];
9226 for (lhs, rhs) in cases.iter() {
9227 let distance = lhs.distance(rhs);
9228 assert!(distance.is_none(), "{lhs} vs {rhs}");
9229 }
9230 }
9231
9232 #[test]
9233 fn test_scalar_distance_invalid() {
9234 let cases = [
9235 (ScalarValue::Int8(None), ScalarValue::Int8(None)),
9239 (ScalarValue::Int8(None), ScalarValue::Int8(Some(1))),
9240 (ScalarValue::Int8(Some(1)), ScalarValue::Int8(None)),
9241 (ScalarValue::Int8(Some(1)), ScalarValue::Int16(Some(1))),
9243 (ScalarValue::Int8(Some(1)), ScalarValue::Float32(Some(1.0))),
9244 (
9245 ScalarValue::Float16(Some(f16::from_f32(1.0))),
9246 ScalarValue::Float32(Some(1.0)),
9247 ),
9248 (
9249 ScalarValue::Float16(Some(f16::from_f32(1.0))),
9250 ScalarValue::Int32(Some(1)),
9251 ),
9252 (
9253 ScalarValue::Float64(Some(1.1)),
9254 ScalarValue::Float32(Some(2.2)),
9255 ),
9256 (
9257 ScalarValue::UInt64(Some(777)),
9258 ScalarValue::Int32(Some(111)),
9259 ),
9260 (ScalarValue::Int8(None), ScalarValue::Int16(Some(1))),
9262 (ScalarValue::Int8(Some(1)), ScalarValue::Int16(None)),
9263 (ScalarValue::from("foo"), ScalarValue::from("bar")),
9265 (
9266 ScalarValue::Boolean(Some(true)),
9267 ScalarValue::Boolean(Some(false)),
9268 ),
9269 (
9270 ScalarValue::Decimal128(Some(123), 5, 5),
9271 ScalarValue::Decimal128(Some(120), 5, 3),
9272 ),
9273 (
9274 ScalarValue::Decimal128(Some(123), 5, 5),
9275 ScalarValue::Decimal128(Some(120), 3, 5),
9276 ),
9277 (
9278 ScalarValue::Decimal256(Some(123.into()), 5, 5),
9279 ScalarValue::Decimal256(Some(120.into()), 3, 5),
9280 ),
9281 (
9283 ScalarValue::Decimal256(
9284 Some(i256::from_parts(0, 2_i64.pow(50).into())),
9285 1,
9286 0,
9287 ),
9288 ScalarValue::Decimal256(
9289 Some(i256::from_parts(0, (-(2_i64).pow(50)).into())),
9290 1,
9291 0,
9292 ),
9293 ),
9294 (
9296 ScalarValue::Decimal256(Some(i256::from_parts(0, i128::MAX)), 1, 0),
9297 ScalarValue::Decimal256(Some(i256::from_parts(0, -i128::MAX)), 1, 0),
9298 ),
9299 ];
9300 for (lhs, rhs) in cases {
9301 let distance = lhs.distance(&rhs);
9302 assert!(distance.is_none());
9303 }
9304 }
9305
9306 #[test]
9307 fn test_scalar_interval_negate() {
9308 let cases = [
9309 (
9310 ScalarValue::new_interval_ym(1, 12),
9311 ScalarValue::new_interval_ym(-1, -12),
9312 ),
9313 (
9314 ScalarValue::new_interval_dt(1, 999),
9315 ScalarValue::new_interval_dt(-1, -999),
9316 ),
9317 (
9318 ScalarValue::new_interval_mdn(12, 15, 123_456),
9319 ScalarValue::new_interval_mdn(-12, -15, -123_456),
9320 ),
9321 ];
9322 for (expr, expected) in cases.iter() {
9323 let result = expr.arithmetic_negate().unwrap();
9324 assert_eq!(*expected, result, "-expr:{expr:?}");
9325 }
9326 }
9327
9328 #[test]
9329 fn test_scalar_interval_add() {
9330 let cases = [
9331 (
9332 ScalarValue::new_interval_ym(1, 12),
9333 ScalarValue::new_interval_ym(1, 12),
9334 ScalarValue::new_interval_ym(2, 24),
9335 ),
9336 (
9337 ScalarValue::new_interval_dt(1, 999),
9338 ScalarValue::new_interval_dt(1, 999),
9339 ScalarValue::new_interval_dt(2, 1998),
9340 ),
9341 (
9342 ScalarValue::new_interval_mdn(12, 15, 123_456),
9343 ScalarValue::new_interval_mdn(12, 15, 123_456),
9344 ScalarValue::new_interval_mdn(24, 30, 246_912),
9345 ),
9346 ];
9347 for (lhs, rhs, expected) in cases.iter() {
9348 let result = lhs.add(rhs).unwrap();
9349 let result_commute = rhs.add(lhs).unwrap();
9350 assert_eq!(*expected, result, "lhs:{lhs:?} + rhs:{rhs:?}");
9351 assert_eq!(*expected, result_commute, "lhs:{rhs:?} + rhs:{lhs:?}");
9352 }
9353 }
9354
9355 #[test]
9356 fn test_scalar_interval_sub() {
9357 let cases = [
9358 (
9359 ScalarValue::new_interval_ym(1, 12),
9360 ScalarValue::new_interval_ym(1, 12),
9361 ScalarValue::new_interval_ym(0, 0),
9362 ),
9363 (
9364 ScalarValue::new_interval_dt(1, 999),
9365 ScalarValue::new_interval_dt(1, 999),
9366 ScalarValue::new_interval_dt(0, 0),
9367 ),
9368 (
9369 ScalarValue::new_interval_mdn(12, 15, 123_456),
9370 ScalarValue::new_interval_mdn(12, 15, 123_456),
9371 ScalarValue::new_interval_mdn(0, 0, 0),
9372 ),
9373 ];
9374 for (lhs, rhs, expected) in cases.iter() {
9375 let result = lhs.sub(rhs).unwrap();
9376 assert_eq!(*expected, result, "lhs:{lhs:?} - rhs:{rhs:?}");
9377 }
9378 }
9379
9380 #[test]
9381 fn timestamp_op_random_tests() {
9382 let sample_size = 1000;
9385 let timestamps1 = get_random_timestamps(sample_size);
9386 let intervals = get_random_intervals(sample_size);
9387 for (idx, ts1) in timestamps1.iter().enumerate() {
9391 if idx % 2 == 0 {
9392 let timestamp2 = ts1.add(intervals[idx].clone()).unwrap();
9393 let back = timestamp2.sub(intervals[idx].clone()).unwrap();
9394 assert_eq!(ts1, &back);
9395 } else {
9396 let timestamp2 = ts1.sub(intervals[idx].clone()).unwrap();
9397 let back = timestamp2.add(intervals[idx].clone()).unwrap();
9398 assert_eq!(ts1, &back);
9399 };
9400 }
9401 }
9402
9403 #[test]
9404 fn test_struct_nulls() {
9405 let fields_b = Fields::from(vec![
9406 Field::new("ba", DataType::UInt64, true),
9407 Field::new("bb", DataType::UInt64, true),
9408 ]);
9409 let fields = Fields::from(vec![
9410 Field::new("a", DataType::UInt64, true),
9411 Field::new("b", DataType::Struct(fields_b.clone()), true),
9412 ]);
9413
9414 let struct_value = vec![
9415 (
9416 Arc::clone(&fields[0]),
9417 Arc::new(UInt64Array::from(vec![Some(1)])) as ArrayRef,
9418 ),
9419 (
9420 Arc::clone(&fields[1]),
9421 Arc::new(StructArray::from(vec![
9422 (
9423 Arc::clone(&fields_b[0]),
9424 Arc::new(UInt64Array::from(vec![Some(2)])) as ArrayRef,
9425 ),
9426 (
9427 Arc::clone(&fields_b[1]),
9428 Arc::new(UInt64Array::from(vec![Some(3)])) as ArrayRef,
9429 ),
9430 ])) as ArrayRef,
9431 ),
9432 ];
9433
9434 let struct_value_with_nulls = vec![
9435 (
9436 Arc::clone(&fields[0]),
9437 Arc::new(UInt64Array::from(vec![Some(1)])) as ArrayRef,
9438 ),
9439 (
9440 Arc::clone(&fields[1]),
9441 Arc::new(StructArray::from((
9442 vec![
9443 (
9444 Arc::clone(&fields_b[0]),
9445 Arc::new(UInt64Array::from(vec![Some(2)])) as ArrayRef,
9446 ),
9447 (
9448 Arc::clone(&fields_b[1]),
9449 Arc::new(UInt64Array::from(vec![Some(3)])) as ArrayRef,
9450 ),
9451 ],
9452 Buffer::from(&[0]),
9453 ))) as ArrayRef,
9454 ),
9455 ];
9456
9457 let scalars = vec![
9458 ScalarValue::Struct(Arc::new(StructArray::from((
9460 struct_value.clone(),
9461 Buffer::from(&[0]),
9462 )))),
9463 ScalarValue::Struct(Arc::new(StructArray::from((
9465 struct_value_with_nulls.clone(),
9466 Buffer::from(&[1]),
9467 )))),
9468 ScalarValue::Struct(Arc::new(StructArray::from((
9470 struct_value.clone(),
9471 Buffer::from(&[1]),
9472 )))),
9473 ];
9474
9475 let check_array = |array: Arc<dyn Array>| {
9476 let is_null = is_null(&array).unwrap();
9477 assert_eq!(is_null, BooleanArray::from(vec![true, false, false]));
9478
9479 let formatted = pretty_format_columns("col", &[array]).unwrap().to_string();
9480 let formatted = formatted.split('\n').collect::<Vec<_>>();
9481 let expected = vec![
9482 "+---------------------------+",
9483 "| col |",
9484 "+---------------------------+",
9485 "| |",
9486 "| {a: 1, b: } |",
9487 "| {a: 1, b: {ba: 2, bb: 3}} |",
9488 "+---------------------------+",
9489 ];
9490 assert_eq!(
9491 formatted, expected,
9492 "Actual:\n{formatted:#?}\n\nExpected:\n{expected:#?}"
9493 );
9494 };
9495
9496 let array = ScalarValue::iter_to_array(scalars.clone()).unwrap();
9498 check_array(array);
9499
9500 let arrays = scalars
9502 .iter()
9503 .map(ScalarValue::to_array)
9504 .collect::<Result<Vec<_>>>()
9505 .expect("Failed to convert to array");
9506 let arrays = arrays.iter().map(|a| a.as_ref()).collect::<Vec<_>>();
9507 let array = arrow::compute::concat(&arrays).unwrap();
9508 check_array(array);
9509 }
9510
9511 #[test]
9512 fn test_struct_display() {
9513 let field_a = Field::new("a", DataType::Int32, true);
9514 let field_b = Field::new("b", DataType::Utf8, true);
9515
9516 let s = ScalarStructBuilder::new()
9517 .with_scalar(field_a, ScalarValue::from(1i32))
9518 .with_scalar(field_b, ScalarValue::Utf8(None))
9519 .build()
9520 .unwrap();
9521
9522 assert_eq!(s.to_string(), "{a:1,b:}");
9523 assert_eq!(format!("{s:?}"), r#"Struct({a:1,b:})"#);
9524
9525 let ScalarValue::Struct(arr) = s else {
9526 panic!("Expected struct");
9527 };
9528
9529 let batch = RecordBatch::try_from_iter(vec![("s", arr as _)]).unwrap();
9531 assert_snapshot!(batches_to_string(&[batch]), @r"
9532 +-------------+
9533 | s |
9534 +-------------+
9535 | {a: 1, b: } |
9536 +-------------+
9537 ");
9538 }
9539
9540 #[test]
9541 fn test_list_view_display() {
9542 let s = ScalarValue::ListView(
9543 ListViewArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
9544 Some(1),
9545 None,
9546 Some(3),
9547 ])])
9548 .into(),
9549 );
9550
9551 assert_eq!(s.to_string(), "[1, , 3]");
9552 assert_eq!(format!("{s:?}"), "ListView([1, , 3])");
9553 }
9554
9555 #[test]
9556 fn test_null_bug() {
9557 let field_a = Field::new("a", DataType::Int32, true);
9558 let field_b = Field::new("b", DataType::Int32, true);
9559 let fields = Fields::from(vec![field_a, field_b]);
9560
9561 let array_a = Arc::new(Int32Array::from_iter_values([1]));
9562 let array_b = Arc::new(Int32Array::from_iter_values([2]));
9563 let arrays: Vec<ArrayRef> = vec![array_a, array_b];
9564
9565 let mut not_nulls = NullBufferBuilder::new(1);
9566
9567 not_nulls.append_non_null();
9568
9569 let ar = StructArray::new(fields, arrays, not_nulls.finish());
9570 let s = ScalarValue::Struct(Arc::new(ar));
9571
9572 assert_eq!(s.to_string(), "{a:1,b:2}");
9573 assert_eq!(format!("{s:?}"), r#"Struct({a:1,b:2})"#);
9574
9575 let ScalarValue::Struct(arr) = s else {
9576 panic!("Expected struct");
9577 };
9578
9579 let batch = RecordBatch::try_from_iter(vec![("s", arr as _)]).unwrap();
9581 assert_snapshot!(batches_to_string(&[batch]), @r"
9582 +--------------+
9583 | s |
9584 +--------------+
9585 | {a: 1, b: 2} |
9586 +--------------+
9587 ");
9588 }
9589
9590 #[test]
9591 fn test_display_date64_large_values() {
9592 assert_eq!(
9593 format!("{}", ScalarValue::Date64(Some(790179464505))),
9594 "1995-01-15"
9595 );
9596 assert_eq!(
9598 format!("{}", ScalarValue::Date64(Some(-790179464505600000))),
9599 ""
9600 );
9601 }
9602
9603 #[test]
9604 fn test_struct_display_null() {
9605 let fields = vec![Field::new("a", DataType::Int32, false)];
9606 let s = ScalarStructBuilder::new_null(fields);
9607 assert_eq!(s.to_string(), "NULL");
9608
9609 let ScalarValue::Struct(arr) = s else {
9610 panic!("Expected struct");
9611 };
9612
9613 let batch = RecordBatch::try_from_iter(vec![("s", arr as _)]).unwrap();
9615
9616 assert_snapshot!(batches_to_string(&[batch]), @r"
9617 +---+
9618 | s |
9619 +---+
9620 | |
9621 +---+
9622 ");
9623 }
9624
9625 #[test]
9626 fn test_map_display_and_debug() {
9627 let string_builder = StringBuilder::new();
9628 let int_builder = Int32Builder::with_capacity(4);
9629 let mut builder = MapBuilder::new(None, string_builder, int_builder);
9630 builder.keys().append_value("joe");
9631 builder.values().append_value(1);
9632 builder.append(true).unwrap();
9633
9634 builder.keys().append_value("blogs");
9635 builder.values().append_value(2);
9636 builder.keys().append_value("foo");
9637 builder.values().append_value(4);
9638 builder.append(true).unwrap();
9639 builder.append(true).unwrap();
9640 builder.append(false).unwrap();
9641
9642 let map_value = ScalarValue::Map(Arc::new(builder.finish()));
9643
9644 assert_eq!(map_value.to_string(), "[{joe:1},{blogs:2,foo:4},{},NULL]");
9645 assert_eq!(
9646 format!("{map_value:?}"),
9647 r#"Map([{"joe":"1"},{"blogs":"2","foo":"4"},{},NULL])"#
9648 );
9649
9650 let ScalarValue::Map(arr) = map_value else {
9651 panic!("Expected map");
9652 };
9653
9654 let batch = RecordBatch::try_from_iter(vec![("m", arr as _)]).unwrap();
9656 assert_snapshot!(batches_to_string(&[batch]), @r"
9657 +--------------------+
9658 | m |
9659 +--------------------+
9660 | {joe: 1} |
9661 | {blogs: 2, foo: 4} |
9662 | {} |
9663 | |
9664 +--------------------+
9665 ");
9666 }
9667
9668 #[test]
9669 fn test_binary_display() {
9670 let no_binary_value = ScalarValue::Binary(None);
9671 assert_eq!(format!("{no_binary_value}"), "NULL");
9672 let single_binary_value = ScalarValue::Binary(Some(vec![42u8]));
9673 assert_eq!(format!("{single_binary_value}"), "2A");
9674 let small_binary_value = ScalarValue::Binary(Some(vec![1u8, 2, 3]));
9675 assert_eq!(format!("{small_binary_value}"), "010203");
9676 let large_binary_value =
9677 ScalarValue::Binary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
9678 assert_eq!(format!("{large_binary_value}"), "0102030405060708090A...");
9679
9680 let no_binary_value = ScalarValue::BinaryView(None);
9681 assert_eq!(format!("{no_binary_value}"), "NULL");
9682 let small_binary_value = ScalarValue::BinaryView(Some(vec![1u8, 2, 3]));
9683 assert_eq!(format!("{small_binary_value}"), "010203");
9684 let large_binary_value =
9685 ScalarValue::BinaryView(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
9686 assert_eq!(format!("{large_binary_value}"), "0102030405060708090A...");
9687
9688 let no_binary_value = ScalarValue::LargeBinary(None);
9689 assert_eq!(format!("{no_binary_value}"), "NULL");
9690 let small_binary_value = ScalarValue::LargeBinary(Some(vec![1u8, 2, 3]));
9691 assert_eq!(format!("{small_binary_value}"), "010203");
9692 let large_binary_value =
9693 ScalarValue::LargeBinary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
9694 assert_eq!(format!("{large_binary_value}"), "0102030405060708090A...");
9695
9696 let no_binary_value = ScalarValue::FixedSizeBinary(3, None);
9697 assert_eq!(format!("{no_binary_value}"), "NULL");
9698 let small_binary_value = ScalarValue::FixedSizeBinary(3, Some(vec![1u8, 2, 3]));
9699 assert_eq!(format!("{small_binary_value}"), "010203");
9700 let large_binary_value = ScalarValue::FixedSizeBinary(
9701 11,
9702 Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
9703 );
9704 assert_eq!(format!("{large_binary_value}"), "0102030405060708090A...");
9705 }
9706
9707 #[test]
9708 fn test_binary_debug() {
9709 let no_binary_value = ScalarValue::Binary(None);
9710 assert_eq!(format!("{no_binary_value:?}"), "Binary(NULL)");
9711 let single_binary_value = ScalarValue::Binary(Some(vec![42u8]));
9712 assert_eq!(format!("{single_binary_value:?}"), "Binary(\"42\")");
9713 let small_binary_value = ScalarValue::Binary(Some(vec![1u8, 2, 3]));
9714 assert_eq!(format!("{small_binary_value:?}"), "Binary(\"1,2,3\")");
9715 let large_binary_value =
9716 ScalarValue::Binary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
9717 assert_eq!(
9718 format!("{large_binary_value:?}"),
9719 "Binary(\"1,2,3,4,5,6,7,8,9,10,11\")"
9720 );
9721
9722 let no_binary_value = ScalarValue::BinaryView(None);
9723 assert_eq!(format!("{no_binary_value:?}"), "BinaryView(NULL)");
9724 let small_binary_value = ScalarValue::BinaryView(Some(vec![1u8, 2, 3]));
9725 assert_eq!(format!("{small_binary_value:?}"), "BinaryView(\"1,2,3\")");
9726 let large_binary_value =
9727 ScalarValue::BinaryView(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
9728 assert_eq!(
9729 format!("{large_binary_value:?}"),
9730 "BinaryView(\"1,2,3,4,5,6,7,8,9,10,11\")"
9731 );
9732
9733 let no_binary_value = ScalarValue::LargeBinary(None);
9734 assert_eq!(format!("{no_binary_value:?}"), "LargeBinary(NULL)");
9735 let small_binary_value = ScalarValue::LargeBinary(Some(vec![1u8, 2, 3]));
9736 assert_eq!(format!("{small_binary_value:?}"), "LargeBinary(\"1,2,3\")");
9737 let large_binary_value =
9738 ScalarValue::LargeBinary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
9739 assert_eq!(
9740 format!("{large_binary_value:?}"),
9741 "LargeBinary(\"1,2,3,4,5,6,7,8,9,10,11\")"
9742 );
9743
9744 let no_binary_value = ScalarValue::FixedSizeBinary(3, None);
9745 assert_eq!(format!("{no_binary_value:?}"), "FixedSizeBinary(3, NULL)");
9746 let small_binary_value = ScalarValue::FixedSizeBinary(3, Some(vec![1u8, 2, 3]));
9747 assert_eq!(
9748 format!("{small_binary_value:?}"),
9749 "FixedSizeBinary(3, \"1,2,3\")"
9750 );
9751 let large_binary_value = ScalarValue::FixedSizeBinary(
9752 11,
9753 Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
9754 );
9755 assert_eq!(
9756 format!("{large_binary_value:?}"),
9757 "FixedSizeBinary(11, \"1,2,3,4,5,6,7,8,9,10,11\")"
9758 );
9759 }
9760
9761 #[test]
9762 fn test_build_timestamp_millisecond_list() {
9763 let values = vec![ScalarValue::TimestampMillisecond(Some(1), None)];
9764 let arr = ScalarValue::new_list_nullable(
9765 &values,
9766 &DataType::Timestamp(TimeUnit::Millisecond, None),
9767 );
9768 assert_eq!(1, arr.len());
9769 }
9770
9771 #[test]
9772 fn test_newlist_timestamp_zone() {
9773 let s: &'static str = "UTC";
9774 let values = vec![ScalarValue::TimestampMillisecond(Some(1), Some(s.into()))];
9775 let arr = ScalarValue::new_list_nullable(
9776 &values,
9777 &DataType::Timestamp(TimeUnit::Millisecond, Some(s.into())),
9778 );
9779 assert_eq!(1, arr.len());
9780 assert_eq!(
9781 arr.data_type(),
9782 &DataType::List(Arc::new(Field::new_list_field(
9783 DataType::Timestamp(TimeUnit::Millisecond, Some(s.into())),
9784 true,
9785 )))
9786 );
9787 }
9788
9789 fn get_random_timestamps(sample_size: u64) -> Vec<ScalarValue> {
9790 let vector_size = sample_size;
9791 let mut timestamp = vec![];
9792 let mut rng = rand::rng();
9793 for i in 0..vector_size {
9794 let year = rng.random_range(1995..=2050);
9795 let month = rng.random_range(1..=12);
9796 let day = rng.random_range(1..=28); let hour = rng.random_range(0..=23);
9798 let minute = rng.random_range(0..=59);
9799 let second = rng.random_range(0..=59);
9800 if i % 4 == 0 {
9801 timestamp.push(ScalarValue::TimestampSecond(
9802 Some(
9803 NaiveDate::from_ymd_opt(year, month, day)
9804 .unwrap()
9805 .and_hms_opt(hour, minute, second)
9806 .unwrap()
9807 .and_utc()
9808 .timestamp(),
9809 ),
9810 None,
9811 ))
9812 } else if i % 4 == 1 {
9813 let millisec = rng.random_range(0..=999);
9814 timestamp.push(ScalarValue::TimestampMillisecond(
9815 Some(
9816 NaiveDate::from_ymd_opt(year, month, day)
9817 .unwrap()
9818 .and_hms_milli_opt(hour, minute, second, millisec)
9819 .unwrap()
9820 .and_utc()
9821 .timestamp_millis(),
9822 ),
9823 None,
9824 ))
9825 } else if i % 4 == 2 {
9826 let microsec = rng.random_range(0..=999_999);
9827 timestamp.push(ScalarValue::TimestampMicrosecond(
9828 Some(
9829 NaiveDate::from_ymd_opt(year, month, day)
9830 .unwrap()
9831 .and_hms_micro_opt(hour, minute, second, microsec)
9832 .unwrap()
9833 .and_utc()
9834 .timestamp_micros(),
9835 ),
9836 None,
9837 ))
9838 } else if i % 4 == 3 {
9839 let nanosec = rng.random_range(0..=999_999_999);
9840 timestamp.push(ScalarValue::TimestampNanosecond(
9841 Some(
9842 NaiveDate::from_ymd_opt(year, month, day)
9843 .unwrap()
9844 .and_hms_nano_opt(hour, minute, second, nanosec)
9845 .unwrap()
9846 .and_utc()
9847 .timestamp_nanos_opt()
9848 .unwrap(),
9849 ),
9850 None,
9851 ))
9852 }
9853 }
9854 timestamp
9855 }
9856
9857 fn get_random_intervals(sample_size: u64) -> Vec<ScalarValue> {
9858 const MILLISECS_IN_ONE_DAY: i64 = 86_400_000;
9859 const NANOSECS_IN_ONE_DAY: i64 = 86_400_000_000_000;
9860
9861 let vector_size = sample_size;
9862 let mut intervals = vec![];
9863 let mut rng = rand::rng();
9864 const SECS_IN_ONE_DAY: i32 = 86_400;
9865 const MICROSECS_IN_ONE_DAY: i64 = 86_400_000_000;
9866 for i in 0..vector_size {
9867 if i % 4 == 0 {
9868 let days = rng.random_range(0..5000);
9869 let millis = rng.random_range(0..SECS_IN_ONE_DAY) * 1000;
9871 intervals.push(ScalarValue::new_interval_dt(days, millis));
9872 } else if i % 4 == 1 {
9873 let days = rng.random_range(0..5000);
9874 let millisec = rng.random_range(0..(MILLISECS_IN_ONE_DAY as i32));
9875 intervals.push(ScalarValue::new_interval_dt(days, millisec));
9876 } else if i % 4 == 2 {
9877 let days = rng.random_range(0..5000);
9878 let nanosec = rng.random_range(0..MICROSECS_IN_ONE_DAY) * 1000;
9880 intervals.push(ScalarValue::new_interval_mdn(0, days, nanosec));
9881 } else {
9882 let days = rng.random_range(0..5000);
9883 let nanosec = rng.random_range(0..NANOSECS_IN_ONE_DAY);
9884 intervals.push(ScalarValue::new_interval_mdn(0, days, nanosec));
9885 }
9886 }
9887 intervals
9888 }
9889
9890 fn union_fields() -> UnionFields {
9891 [
9892 (0, Arc::new(Field::new("A", DataType::Int32, true))),
9893 (1, Arc::new(Field::new("B", DataType::Float64, true))),
9894 ]
9895 .into_iter()
9896 .collect()
9897 }
9898
9899 #[test]
9900 fn sparse_scalar_union_is_null() {
9901 let sparse_scalar = ScalarValue::Union(
9902 Some((0_i8, Box::new(ScalarValue::Int32(None)))),
9903 union_fields(),
9904 UnionMode::Sparse,
9905 );
9906 assert!(sparse_scalar.is_null());
9907 }
9908
9909 #[test]
9910 fn dense_scalar_union_is_null() {
9911 let dense_scalar = ScalarValue::Union(
9912 Some((0_i8, Box::new(ScalarValue::Int32(None)))),
9913 union_fields(),
9914 UnionMode::Dense,
9915 );
9916 assert!(dense_scalar.is_null());
9917 }
9918
9919 #[test]
9920 fn cast_date_to_timestamp_overflow_returns_error() {
9921 let scalar = ScalarValue::Date32(Some(i32::MAX));
9922 let err = scalar
9923 .cast_to(&DataType::Timestamp(TimeUnit::Nanosecond, None))
9924 .expect_err("expected cast to fail");
9925 assert!(
9926 err.to_string()
9927 .contains("converted value exceeds the representable i64 range"),
9928 "unexpected error: {err}"
9929 );
9930 }
9931
9932 #[test]
9933 fn null_dictionary_scalar_produces_null_dictionary_array() {
9934 let dictionary_scalar = ScalarValue::Dictionary(
9935 Box::new(DataType::Int32),
9936 Box::new(ScalarValue::Null),
9937 );
9938 assert!(dictionary_scalar.is_null());
9939 let dictionary_array = dictionary_scalar.to_array().unwrap();
9940 assert!(dictionary_array.is_null(0));
9941 }
9942
9943 #[test]
9944 fn test_scalar_value_try_new_null() {
9945 let scalars = vec![
9946 ScalarValue::try_new_null(&DataType::Boolean).unwrap(),
9947 ScalarValue::try_new_null(&DataType::Int8).unwrap(),
9948 ScalarValue::try_new_null(&DataType::Int16).unwrap(),
9949 ScalarValue::try_new_null(&DataType::Int32).unwrap(),
9950 ScalarValue::try_new_null(&DataType::Int64).unwrap(),
9951 ScalarValue::try_new_null(&DataType::UInt8).unwrap(),
9952 ScalarValue::try_new_null(&DataType::UInt16).unwrap(),
9953 ScalarValue::try_new_null(&DataType::UInt32).unwrap(),
9954 ScalarValue::try_new_null(&DataType::UInt64).unwrap(),
9955 ScalarValue::try_new_null(&DataType::Float16).unwrap(),
9956 ScalarValue::try_new_null(&DataType::Float32).unwrap(),
9957 ScalarValue::try_new_null(&DataType::Float64).unwrap(),
9958 ScalarValue::try_new_null(&DataType::Decimal128(42, 42)).unwrap(),
9959 ScalarValue::try_new_null(&DataType::Decimal256(42, 42)).unwrap(),
9960 ScalarValue::try_new_null(&DataType::Utf8).unwrap(),
9961 ScalarValue::try_new_null(&DataType::LargeUtf8).unwrap(),
9962 ScalarValue::try_new_null(&DataType::Utf8View).unwrap(),
9963 ScalarValue::try_new_null(&DataType::Binary).unwrap(),
9964 ScalarValue::try_new_null(&DataType::BinaryView).unwrap(),
9965 ScalarValue::try_new_null(&DataType::FixedSizeBinary(42)).unwrap(),
9966 ScalarValue::try_new_null(&DataType::LargeBinary).unwrap(),
9967 ScalarValue::try_new_null(&DataType::Date32).unwrap(),
9968 ScalarValue::try_new_null(&DataType::Date64).unwrap(),
9969 ScalarValue::try_new_null(&DataType::Time32(TimeUnit::Second)).unwrap(),
9970 ScalarValue::try_new_null(&DataType::Time32(TimeUnit::Millisecond)).unwrap(),
9971 ScalarValue::try_new_null(&DataType::Time64(TimeUnit::Microsecond)).unwrap(),
9972 ScalarValue::try_new_null(&DataType::Time64(TimeUnit::Nanosecond)).unwrap(),
9973 ScalarValue::try_new_null(&DataType::Timestamp(TimeUnit::Second, None))
9974 .unwrap(),
9975 ScalarValue::try_new_null(&DataType::Timestamp(TimeUnit::Millisecond, None))
9976 .unwrap(),
9977 ScalarValue::try_new_null(&DataType::Timestamp(TimeUnit::Microsecond, None))
9978 .unwrap(),
9979 ScalarValue::try_new_null(&DataType::Timestamp(TimeUnit::Nanosecond, None))
9980 .unwrap(),
9981 ScalarValue::try_new_null(&DataType::Interval(IntervalUnit::YearMonth))
9982 .unwrap(),
9983 ScalarValue::try_new_null(&DataType::Interval(IntervalUnit::DayTime))
9984 .unwrap(),
9985 ScalarValue::try_new_null(&DataType::Interval(IntervalUnit::MonthDayNano))
9986 .unwrap(),
9987 ScalarValue::try_new_null(&DataType::Duration(TimeUnit::Second)).unwrap(),
9988 ScalarValue::try_new_null(&DataType::Duration(TimeUnit::Microsecond))
9989 .unwrap(),
9990 ScalarValue::try_new_null(&DataType::Duration(TimeUnit::Nanosecond)).unwrap(),
9991 ScalarValue::try_new_null(&DataType::Null).unwrap(),
9992 ];
9993 assert!(scalars.iter().all(|s| s.is_null()));
9994
9995 let field_ref = Arc::new(Field::new("foo", DataType::Int32, true));
9996 let map_field_ref = Arc::new(Field::new(
9997 "foo",
9998 DataType::Struct(Fields::from(vec![
9999 Field::new("bar", DataType::Utf8, true),
10000 Field::new("baz", DataType::Int32, true),
10001 ])),
10002 true,
10003 ));
10004 let scalars = [
10005 ScalarValue::try_new_null(&DataType::List(Arc::clone(&field_ref))).unwrap(),
10006 ScalarValue::try_new_null(&DataType::LargeList(Arc::clone(&field_ref)))
10007 .unwrap(),
10008 ScalarValue::try_new_null(&DataType::FixedSizeList(
10009 Arc::clone(&field_ref),
10010 42,
10011 ))
10012 .unwrap(),
10013 ScalarValue::try_new_null(&DataType::ListView(Arc::clone(&field_ref)))
10014 .unwrap(),
10015 ScalarValue::try_new_null(&DataType::LargeListView(Arc::clone(&field_ref)))
10016 .unwrap(),
10017 ScalarValue::try_new_null(&DataType::Struct(
10018 vec![Arc::clone(&field_ref)].into(),
10019 ))
10020 .unwrap(),
10021 ScalarValue::try_new_null(&DataType::Map(map_field_ref, false)).unwrap(),
10022 ScalarValue::try_new_null(&DataType::Union(
10023 UnionFields::try_new(vec![42], vec![field_ref]).unwrap(),
10024 UnionMode::Dense,
10025 ))
10026 .unwrap(),
10027 ];
10028 assert!(scalars.iter().all(|s| s.is_null()));
10029 }
10030
10031 fn assert_starts_with(actual: impl AsRef<str>, expected_prefix: impl AsRef<str>) {
10034 let actual = actual.as_ref();
10035 let expected_prefix = expected_prefix.as_ref();
10036 assert!(
10037 actual.starts_with(expected_prefix),
10038 "Expected '{actual}' to start with '{expected_prefix}'"
10039 );
10040 }
10041
10042 #[test]
10043 fn test_new_default() {
10044 assert_eq!(
10046 ScalarValue::new_default(&DataType::Int32).unwrap(),
10047 ScalarValue::Int32(Some(0))
10048 );
10049 assert_eq!(
10050 ScalarValue::new_default(&DataType::Float64).unwrap(),
10051 ScalarValue::Float64(Some(0.0))
10052 );
10053 assert_eq!(
10054 ScalarValue::new_default(&DataType::Boolean).unwrap(),
10055 ScalarValue::Boolean(Some(false))
10056 );
10057
10058 assert_eq!(
10060 ScalarValue::new_default(&DataType::Utf8).unwrap(),
10061 ScalarValue::Utf8(Some("".to_string()))
10062 );
10063 assert_eq!(
10064 ScalarValue::new_default(&DataType::LargeUtf8).unwrap(),
10065 ScalarValue::LargeUtf8(Some("".to_string()))
10066 );
10067
10068 assert_eq!(
10070 ScalarValue::new_default(&DataType::Binary).unwrap(),
10071 ScalarValue::Binary(Some(vec![]))
10072 );
10073
10074 assert_eq!(
10076 ScalarValue::new_default(&DataType::FixedSizeBinary(5)).unwrap(),
10077 ScalarValue::FixedSizeBinary(5, Some(vec![0, 0, 0, 0, 0]))
10078 );
10079
10080 assert_eq!(
10082 ScalarValue::new_default(&DataType::Date32).unwrap(),
10083 ScalarValue::Date32(Some(0))
10084 );
10085 assert_eq!(
10086 ScalarValue::new_default(&DataType::Time32(TimeUnit::Second)).unwrap(),
10087 ScalarValue::Time32Second(Some(0))
10088 );
10089
10090 assert_eq!(
10092 ScalarValue::new_default(&DataType::Decimal128(10, 2)).unwrap(),
10093 ScalarValue::Decimal128(Some(0), 10, 2)
10094 );
10095
10096 let list_field = Field::new_list_field(DataType::Int32, true);
10098 let list_result =
10099 ScalarValue::new_default(&DataType::List(Arc::new(list_field.clone())))
10100 .unwrap();
10101 match list_result {
10102 ScalarValue::List(arr) => {
10103 assert_eq!(arr.len(), 1);
10104 assert_eq!(arr.value_length(0), 0); }
10106 _ => panic!("Expected List"),
10107 }
10108
10109 let list_field = Field::new_list_field(DataType::Int32, true);
10110 let list_result =
10111 ScalarValue::new_default(&DataType::LargeList(Arc::new(list_field.clone())))
10112 .unwrap();
10113 match list_result {
10114 ScalarValue::LargeList(arr) => {
10115 assert_eq!(arr.len(), 1);
10116 assert_eq!(arr.value_length(0), 0); }
10118 _ => panic!("Expected LargeList"),
10119 }
10120
10121 let list_result =
10122 ScalarValue::new_default(&DataType::ListView(Arc::new(list_field.clone())))
10123 .unwrap();
10124 match list_result {
10125 ScalarValue::ListView(arr) => {
10126 assert_eq!(arr.len(), 1);
10127 assert_eq!(arr.value_size(0), 0); }
10129 _ => panic!("Expected ListView"),
10130 }
10131
10132 let list_result = ScalarValue::new_default(&DataType::LargeListView(Arc::new(
10133 list_field.clone(),
10134 )))
10135 .unwrap();
10136 match list_result {
10137 ScalarValue::LargeListView(arr) => {
10138 assert_eq!(arr.len(), 1);
10139 assert_eq!(arr.value_size(0), 0); }
10141 _ => panic!("Expected LargeListView"),
10142 }
10143
10144 let struct_fields = Fields::from(vec![
10146 Field::new("a", DataType::Int32, false),
10147 Field::new("b", DataType::Utf8, false),
10148 ]);
10149 let struct_result =
10150 ScalarValue::new_default(&DataType::Struct(struct_fields.clone())).unwrap();
10151 match struct_result {
10152 ScalarValue::Struct(arr) => {
10153 assert_eq!(arr.len(), 1);
10154 assert_eq!(arr.column(0).as_primitive::<Int32Type>().value(0), 0);
10155 assert_eq!(arr.column(1).as_string::<i32>().value(0), "");
10156 }
10157 _ => panic!("Expected Struct"),
10158 }
10159
10160 let union_fields = UnionFields::try_new(
10162 vec![0, 1],
10163 vec![
10164 Field::new("i32", DataType::Int32, false),
10165 Field::new("f64", DataType::Float64, false),
10166 ],
10167 )
10168 .unwrap();
10169 let union_result = ScalarValue::new_default(&DataType::Union(
10170 union_fields.clone(),
10171 UnionMode::Sparse,
10172 ))
10173 .unwrap();
10174 match union_result {
10175 ScalarValue::Union(Some((type_id, value)), _, _) => {
10176 assert_eq!(type_id, 0);
10177 assert_eq!(*value, ScalarValue::Int32(Some(0)));
10178 }
10179 _ => panic!("Expected Union"),
10180 }
10181 }
10182
10183 #[test]
10184 fn test_scalar_min() {
10185 assert_eq!(
10187 ScalarValue::min(&DataType::Int8),
10188 Some(ScalarValue::Int8(Some(i8::MIN)))
10189 );
10190 assert_eq!(
10191 ScalarValue::min(&DataType::Int32),
10192 Some(ScalarValue::Int32(Some(i32::MIN)))
10193 );
10194 assert_eq!(
10195 ScalarValue::min(&DataType::UInt8),
10196 Some(ScalarValue::UInt8(Some(0)))
10197 );
10198 assert_eq!(
10199 ScalarValue::min(&DataType::UInt64),
10200 Some(ScalarValue::UInt64(Some(0)))
10201 );
10202
10203 assert_eq!(
10205 ScalarValue::min(&DataType::Float32),
10206 Some(ScalarValue::Float32(Some(f32::NEG_INFINITY)))
10207 );
10208 assert_eq!(
10209 ScalarValue::min(&DataType::Float64),
10210 Some(ScalarValue::Float64(Some(f64::NEG_INFINITY)))
10211 );
10212
10213 let decimal_min = ScalarValue::min(&DataType::Decimal128(5, 2)).unwrap();
10215 match decimal_min {
10216 ScalarValue::Decimal128(Some(val), 5, 2) => {
10217 assert_eq!(val, -99999); }
10219 _ => panic!("Expected Decimal128"),
10220 }
10221
10222 assert_eq!(
10224 ScalarValue::min(&DataType::Date32),
10225 Some(ScalarValue::Date32(Some(i32::MIN)))
10226 );
10227 assert_eq!(
10228 ScalarValue::min(&DataType::Time32(TimeUnit::Second)),
10229 Some(ScalarValue::Time32Second(Some(0)))
10230 );
10231 assert_eq!(
10232 ScalarValue::min(&DataType::Timestamp(TimeUnit::Nanosecond, None)),
10233 Some(ScalarValue::TimestampNanosecond(Some(i64::MIN), None))
10234 );
10235
10236 assert_eq!(
10238 ScalarValue::min(&DataType::Duration(TimeUnit::Second)),
10239 Some(ScalarValue::DurationSecond(Some(i64::MIN)))
10240 );
10241
10242 assert_eq!(ScalarValue::min(&DataType::Utf8), None);
10244 assert_eq!(ScalarValue::min(&DataType::Binary), None);
10245 assert_eq!(
10246 ScalarValue::min(&DataType::List(Arc::new(Field::new(
10247 "item",
10248 DataType::Int32,
10249 true
10250 )))),
10251 None
10252 );
10253 assert_eq!(
10254 ScalarValue::min(&DataType::LargeList(Arc::new(Field::new(
10255 "item",
10256 DataType::Int32,
10257 true
10258 )))),
10259 None
10260 );
10261 assert_eq!(
10262 ScalarValue::min(&DataType::ListView(Arc::new(Field::new(
10263 "item",
10264 DataType::Int32,
10265 true
10266 )))),
10267 None
10268 );
10269 assert_eq!(
10270 ScalarValue::min(&DataType::LargeListView(Arc::new(Field::new(
10271 "item",
10272 DataType::Int32,
10273 true
10274 )))),
10275 None
10276 );
10277 }
10278
10279 #[test]
10280 fn test_scalar_max() {
10281 assert_eq!(
10283 ScalarValue::max(&DataType::Int8),
10284 Some(ScalarValue::Int8(Some(i8::MAX)))
10285 );
10286 assert_eq!(
10287 ScalarValue::max(&DataType::Int32),
10288 Some(ScalarValue::Int32(Some(i32::MAX)))
10289 );
10290 assert_eq!(
10291 ScalarValue::max(&DataType::UInt8),
10292 Some(ScalarValue::UInt8(Some(u8::MAX)))
10293 );
10294 assert_eq!(
10295 ScalarValue::max(&DataType::UInt64),
10296 Some(ScalarValue::UInt64(Some(u64::MAX)))
10297 );
10298
10299 assert_eq!(
10301 ScalarValue::max(&DataType::Float32),
10302 Some(ScalarValue::Float32(Some(f32::INFINITY)))
10303 );
10304 assert_eq!(
10305 ScalarValue::max(&DataType::Float64),
10306 Some(ScalarValue::Float64(Some(f64::INFINITY)))
10307 );
10308
10309 let decimal_max = ScalarValue::max(&DataType::Decimal128(5, 2)).unwrap();
10311 match decimal_max {
10312 ScalarValue::Decimal128(Some(val), 5, 2) => {
10313 assert_eq!(val, 99999); }
10315 _ => panic!("Expected Decimal128"),
10316 }
10317
10318 assert_eq!(
10320 ScalarValue::max(&DataType::Date32),
10321 Some(ScalarValue::Date32(Some(i32::MAX)))
10322 );
10323 assert_eq!(
10324 ScalarValue::max(&DataType::Time32(TimeUnit::Second)),
10325 Some(ScalarValue::Time32Second(Some(86_399))) );
10327 assert_eq!(
10328 ScalarValue::max(&DataType::Time64(TimeUnit::Microsecond)),
10329 Some(ScalarValue::Time64Microsecond(Some(86_399_999_999))) );
10331 assert_eq!(
10332 ScalarValue::max(&DataType::Timestamp(TimeUnit::Nanosecond, None)),
10333 Some(ScalarValue::TimestampNanosecond(Some(i64::MAX), None))
10334 );
10335
10336 assert_eq!(
10338 ScalarValue::max(&DataType::Duration(TimeUnit::Millisecond)),
10339 Some(ScalarValue::DurationMillisecond(Some(i64::MAX)))
10340 );
10341
10342 assert_eq!(ScalarValue::max(&DataType::Utf8), None);
10344 assert_eq!(ScalarValue::max(&DataType::Binary), None);
10345 assert_eq!(
10346 ScalarValue::max(&DataType::Struct(Fields::from(vec![Field::new(
10347 "field",
10348 DataType::Int32,
10349 true
10350 )]))),
10351 None
10352 );
10353 assert_eq!(
10354 ScalarValue::max(&DataType::ListView(Arc::new(Field::new(
10355 "item",
10356 DataType::Int32,
10357 true
10358 )))),
10359 None
10360 );
10361 assert_eq!(
10362 ScalarValue::max(&DataType::LargeListView(Arc::new(Field::new(
10363 "item",
10364 DataType::Int32,
10365 true
10366 )))),
10367 None
10368 );
10369 }
10370
10371 #[test]
10372 fn test_min_max_float16() {
10373 let min_f16 = ScalarValue::min(&DataType::Float16).unwrap();
10375 match min_f16 {
10376 ScalarValue::Float16(Some(val)) => {
10377 assert_eq!(val, f16::NEG_INFINITY);
10378 }
10379 _ => panic!("Expected Float16"),
10380 }
10381
10382 let max_f16 = ScalarValue::max(&DataType::Float16).unwrap();
10383 match max_f16 {
10384 ScalarValue::Float16(Some(val)) => {
10385 assert_eq!(val, f16::INFINITY);
10386 }
10387 _ => panic!("Expected Float16"),
10388 }
10389 }
10390
10391 #[test]
10392 fn test_new_default_interval() {
10393 assert_eq!(
10395 ScalarValue::new_default(&DataType::Interval(IntervalUnit::YearMonth))
10396 .unwrap(),
10397 ScalarValue::IntervalYearMonth(Some(0))
10398 );
10399 assert_eq!(
10400 ScalarValue::new_default(&DataType::Interval(IntervalUnit::DayTime)).unwrap(),
10401 ScalarValue::IntervalDayTime(Some(IntervalDayTime::ZERO))
10402 );
10403 assert_eq!(
10404 ScalarValue::new_default(&DataType::Interval(IntervalUnit::MonthDayNano))
10405 .unwrap(),
10406 ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano::ZERO))
10407 );
10408 }
10409
10410 #[test]
10411 fn test_min_max_with_timezone() {
10412 let tz = Some(Arc::from("UTC"));
10413
10414 let min_ts =
10416 ScalarValue::min(&DataType::Timestamp(TimeUnit::Second, tz.clone())).unwrap();
10417 match min_ts {
10418 ScalarValue::TimestampSecond(Some(val), Some(tz_str)) => {
10419 assert_eq!(val, i64::MIN);
10420 assert_eq!(tz_str.as_ref(), "UTC");
10421 }
10422 _ => panic!("Expected TimestampSecond with timezone"),
10423 }
10424
10425 let max_ts =
10426 ScalarValue::max(&DataType::Timestamp(TimeUnit::Millisecond, tz.clone()))
10427 .unwrap();
10428 match max_ts {
10429 ScalarValue::TimestampMillisecond(Some(val), Some(tz_str)) => {
10430 assert_eq!(val, i64::MAX);
10431 assert_eq!(tz_str.as_ref(), "UTC");
10432 }
10433 _ => panic!("Expected TimestampMillisecond with timezone"),
10434 }
10435 }
10436
10437 #[test]
10438 fn test_views_minimize_memory() {
10439 let value = "this string is longer than 12 bytes".to_string();
10440
10441 let scalar = ScalarValue::Utf8View(Some(value.clone()));
10442 let array = scalar.to_array_of_size(10).unwrap();
10443 let array = array.as_string_view();
10444 let buffers = array.data_buffers();
10445 assert_eq!(1, buffers.len());
10446 assert_eq!(value.len(), buffers[0].len());
10448
10449 let scalar = ScalarValue::BinaryView(Some(value.bytes().collect()));
10451 let array = scalar.to_array_of_size(10).unwrap();
10452 let array = array.as_binary_view();
10453 let buffers = array.data_buffers();
10454 assert_eq!(1, buffers.len());
10455 assert_eq!(value.len(), buffers[0].len());
10456 }
10457
10458 #[test]
10459 fn test_to_array_of_size_run_end_encoded() {
10460 fn run_test<R: RunEndIndexType>() {
10461 let value = Box::new(ScalarValue::Float32(Some(1.0)));
10462 let size = 5;
10463 let scalar = ScalarValue::RunEndEncoded(
10464 Field::new("run_ends", R::DATA_TYPE, false).into(),
10465 Field::new("values", DataType::Float32, true).into(),
10466 value.clone(),
10467 );
10468 let array = scalar.to_array_of_size(size).unwrap();
10469 let array = array.as_run::<R>();
10470 let array = array.downcast::<Float32Array>().unwrap();
10471 assert_eq!(vec![Some(1.0); size], array.into_iter().collect::<Vec<_>>());
10472 assert_eq!(1, array.values().len());
10473 }
10474
10475 run_test::<Int16Type>();
10476 run_test::<Int32Type>();
10477 run_test::<Int64Type>();
10478
10479 let scalar = ScalarValue::RunEndEncoded(
10480 Field::new("run_ends", DataType::Int16, false).into(),
10481 Field::new("values", DataType::Float32, true).into(),
10482 Box::new(ScalarValue::Float32(Some(1.0))),
10483 );
10484 let err = scalar.to_array_of_size(i16::MAX as usize + 10).unwrap_err();
10485 assert_eq!(
10486 "Execution error: Cannot construct RunArray of size 32777: Overflows run-ends type Int16",
10487 err.to_string()
10488 )
10489 }
10490
10491 #[test]
10492 fn test_eq_array_run_end_encoded() {
10493 let run_ends = Int16Array::from(vec![1, 3]);
10494 let values = Float32Array::from(vec![None, Some(1.0)]);
10495 let run_array =
10496 Arc::new(RunArray::try_new(&run_ends, &values).unwrap()) as ArrayRef;
10497
10498 let scalar = ScalarValue::RunEndEncoded(
10499 Field::new("run_ends", DataType::Int16, false).into(),
10500 Field::new("values", DataType::Float32, true).into(),
10501 Box::new(ScalarValue::Float32(None)),
10502 );
10503 assert!(scalar.eq_array(&run_array, 0).unwrap());
10504
10505 let scalar = ScalarValue::RunEndEncoded(
10506 Field::new("run_ends", DataType::Int16, false).into(),
10507 Field::new("values", DataType::Float32, true).into(),
10508 Box::new(ScalarValue::Float32(Some(1.0))),
10509 );
10510 assert!(scalar.eq_array(&run_array, 1).unwrap());
10511 assert!(scalar.eq_array(&run_array, 2).unwrap());
10512
10513 let scalar = ScalarValue::RunEndEncoded(
10515 Field::new("run_ends", DataType::Int16, false).into(),
10516 Field::new("values", DataType::Float64, true).into(),
10517 Box::new(ScalarValue::Float64(Some(1.0))),
10518 );
10519 let err = scalar.eq_array(&run_array, 1).unwrap_err();
10520 let expected = "Internal error: could not cast array of type Float32 to arrow_array::array::primitive_array::PrimitiveArray<arrow_array::types::Float64Type>";
10521 assert!(err.to_string().starts_with(expected));
10522
10523 let scalar = ScalarValue::RunEndEncoded(
10525 Field::new("run_ends", DataType::Int32, false).into(),
10526 Field::new("values", DataType::Float32, true).into(),
10527 Box::new(ScalarValue::Float32(None)),
10528 );
10529 let err = scalar.eq_array(&run_array, 0).unwrap_err();
10530 let expected = "Internal error: could not cast array of type RunEndEncoded(\"run_ends\": non-null Int16, \"values\": Float32) to arrow_array::array::run_array::RunArray<arrow_array::types::Int32Type>";
10531 assert!(err.to_string().starts_with(expected));
10532 }
10533
10534 #[test]
10535 fn test_iter_to_array_run_end_encoded() {
10536 let run_ends_field = Arc::new(Field::new("run_ends", DataType::Int16, false));
10537 let values_field = Arc::new(Field::new("values", DataType::Int64, true));
10538 let scalars = vec![
10539 ScalarValue::RunEndEncoded(
10540 Arc::clone(&run_ends_field),
10541 Arc::clone(&values_field),
10542 Box::new(ScalarValue::Int64(Some(1))),
10543 ),
10544 ScalarValue::RunEndEncoded(
10545 Arc::clone(&run_ends_field),
10546 Arc::clone(&values_field),
10547 Box::new(ScalarValue::Int64(Some(1))),
10548 ),
10549 ScalarValue::RunEndEncoded(
10550 Arc::clone(&run_ends_field),
10551 Arc::clone(&values_field),
10552 Box::new(ScalarValue::Int64(None)),
10553 ),
10554 ScalarValue::RunEndEncoded(
10555 Arc::clone(&run_ends_field),
10556 Arc::clone(&values_field),
10557 Box::new(ScalarValue::Int64(Some(2))),
10558 ),
10559 ScalarValue::RunEndEncoded(
10560 Arc::clone(&run_ends_field),
10561 Arc::clone(&values_field),
10562 Box::new(ScalarValue::Int64(Some(2))),
10563 ),
10564 ScalarValue::RunEndEncoded(
10565 Arc::clone(&run_ends_field),
10566 Arc::clone(&values_field),
10567 Box::new(ScalarValue::Int64(Some(2))),
10568 ),
10569 ];
10570
10571 let run_array = ScalarValue::iter_to_array(scalars).unwrap();
10572 let expected = RunArray::try_new(
10573 &Int16Array::from(vec![2, 3, 6]),
10574 &Int64Array::from(vec![Some(1), None, Some(2)]),
10575 )
10576 .unwrap();
10577 assert_eq!(&expected as &dyn Array, run_array.as_ref());
10578
10579 let scalars = vec![
10581 ScalarValue::RunEndEncoded(
10582 Arc::clone(&run_ends_field),
10583 Arc::clone(&values_field),
10584 Box::new(ScalarValue::Int64(Some(1))),
10585 ),
10586 ScalarValue::RunEndEncoded(
10587 Field::new("run_ends", DataType::Int32, false).into(),
10588 Arc::clone(&values_field),
10589 Box::new(ScalarValue::Int64(Some(1))),
10590 ),
10591 ];
10592 let err = ScalarValue::iter_to_array(scalars).unwrap_err();
10593 let expected = "Execution error: Expected RunEndEncoded scalar with run-ends field Field { \"run_ends\": Int16 } but got: RunEndEncoded(Field { name: \"run_ends\", data_type: Int32 }, Field { name: \"values\", data_type: Int64, nullable: true }, Int64(1))";
10594 assert!(err.to_string().starts_with(expected));
10595
10596 let scalars = vec![
10598 ScalarValue::RunEndEncoded(
10599 Arc::clone(&run_ends_field),
10600 Arc::clone(&values_field),
10601 Box::new(ScalarValue::Int64(Some(1))),
10602 ),
10603 ScalarValue::RunEndEncoded(
10604 Arc::clone(&run_ends_field),
10605 Field::new("values", DataType::Int32, true).into(),
10606 Box::new(ScalarValue::Int32(Some(1))),
10607 ),
10608 ];
10609 let err = ScalarValue::iter_to_array(scalars).unwrap_err();
10610 let expected = "Execution error: Expected RunEndEncoded scalar with run-ends field Field { \"run_ends\": Int16 } but got: RunEndEncoded(Field { name: \"run_ends\", data_type: Int16 }, Field { name: \"values\", data_type: Int32, nullable: true }, Int32(1))";
10611 assert!(err.to_string().starts_with(expected));
10612
10613 let scalars = vec![
10615 ScalarValue::RunEndEncoded(
10616 Arc::clone(&run_ends_field),
10617 Arc::clone(&values_field),
10618 Box::new(ScalarValue::Int64(Some(1))),
10619 ),
10620 ScalarValue::Int64(Some(1)),
10621 ];
10622 let err = ScalarValue::iter_to_array(scalars).unwrap_err();
10623 let expected = "Execution error: Expected RunEndEncoded scalar with run-ends field Field { \"run_ends\": Int16 } but got: Int64(1)";
10624 assert!(err.to_string().starts_with(expected));
10625 }
10626
10627 #[test]
10628 fn test_convert_array_to_scalar_vec() {
10629 let list = ListArray::from_iter_primitive::<Int64Type, _, _>(vec![
10631 Some(vec![Some(1), Some(2)]),
10632 None,
10633 Some(vec![Some(3), None, Some(4)]),
10634 ]);
10635 let converted = ScalarValue::convert_array_to_scalar_vec(&list).unwrap();
10636 assert_eq!(
10637 converted,
10638 vec![
10639 Some(vec![
10640 ScalarValue::Int64(Some(1)),
10641 ScalarValue::Int64(Some(2))
10642 ]),
10643 None,
10644 Some(vec![
10645 ScalarValue::Int64(Some(3)),
10646 ScalarValue::Int64(None),
10647 ScalarValue::Int64(Some(4))
10648 ]),
10649 ]
10650 );
10651
10652 let large_list = LargeListArray::from_iter_primitive::<Int64Type, _, _>(vec![
10654 Some(vec![Some(1), Some(2)]),
10655 None,
10656 Some(vec![Some(3), None, Some(4)]),
10657 ]);
10658 let converted = ScalarValue::convert_array_to_scalar_vec(&large_list).unwrap();
10659 assert_eq!(
10660 converted,
10661 vec![
10662 Some(vec![
10663 ScalarValue::Int64(Some(1)),
10664 ScalarValue::Int64(Some(2))
10665 ]),
10666 None,
10667 Some(vec![
10668 ScalarValue::Int64(Some(3)),
10669 ScalarValue::Int64(None),
10670 ScalarValue::Int64(Some(4))
10671 ]),
10672 ]
10673 );
10674
10675 let funky = ListArray::new(
10679 Field::new_list_field(DataType::Int64, true).into(),
10680 OffsetBuffer::new(vec![0, 2, 4, 5].into()),
10681 Arc::new(Int64Array::from(vec![1, 2, 3, 4, 5, 6])),
10682 Some(NullBuffer::from(vec![true, false, true])),
10683 );
10684 let converted = ScalarValue::convert_array_to_scalar_vec(&funky).unwrap();
10685 assert_eq!(
10686 converted,
10687 vec![
10688 Some(vec![
10689 ScalarValue::Int64(Some(1)),
10690 ScalarValue::Int64(Some(2))
10691 ]),
10692 None,
10693 Some(vec![ScalarValue::Int64(Some(5))]),
10694 ]
10695 );
10696
10697 let array4 = ListArray::new(
10701 Field::new_list_field(DataType::Int64, true).into(),
10702 OffsetBuffer::new(vec![0, 2, 2, 5].into()),
10703 Arc::new(Int64Array::from(vec![1, 2, 3, 4, 5, 6])),
10704 Some(NullBuffer::from(vec![true, false, true])),
10705 );
10706 let converted = ScalarValue::convert_array_to_scalar_vec(&array4).unwrap();
10707 assert_eq!(
10708 converted,
10709 vec![
10710 Some(vec![
10711 ScalarValue::Int64(Some(1)),
10712 ScalarValue::Int64(Some(2))
10713 ]),
10714 None,
10715 Some(vec![
10716 ScalarValue::Int64(Some(3)),
10717 ScalarValue::Int64(Some(4)),
10718 ScalarValue::Int64(Some(5)),
10719 ]),
10720 ]
10721 );
10722
10723 let array5 = ListArray::new(
10726 Field::new_list_field(DataType::Int64, true).into(),
10727 OffsetBuffer::new(vec![0, 2, 2, 5].into()),
10728 Arc::new(Int64Array::from(vec![1, 2, 3, 4, 5, 6])),
10729 Some(NullBuffer::from(vec![true, true, true])),
10730 );
10731 let converted = ScalarValue::convert_array_to_scalar_vec(&array5).unwrap();
10732 assert_eq!(
10733 converted,
10734 vec![
10735 Some(vec![
10736 ScalarValue::Int64(Some(1)),
10737 ScalarValue::Int64(Some(2))
10738 ]),
10739 Some(vec![]),
10740 Some(vec![
10741 ScalarValue::Int64(Some(3)),
10742 ScalarValue::Int64(Some(4)),
10743 ScalarValue::Int64(Some(5)),
10744 ]),
10745 ]
10746 );
10747
10748 let list = ListViewArray::from_iter_primitive::<Int64Type, _, _>(vec![
10750 Some(vec![Some(1), Some(2)]),
10751 None,
10752 Some(vec![Some(3), None, Some(4)]),
10753 ]);
10754 let converted = ScalarValue::convert_array_to_scalar_vec(&list).unwrap();
10755 assert_eq!(
10756 converted,
10757 vec![
10758 Some(vec![
10759 ScalarValue::Int64(Some(1)),
10760 ScalarValue::Int64(Some(2))
10761 ]),
10762 None,
10763 Some(vec![
10764 ScalarValue::Int64(Some(3)),
10765 ScalarValue::Int64(None),
10766 ScalarValue::Int64(Some(4))
10767 ]),
10768 ]
10769 );
10770
10771 let large_list =
10773 LargeListViewArray::from_iter_primitive::<Int64Type, _, _>(vec![
10774 Some(vec![Some(1), Some(2)]),
10775 None,
10776 Some(vec![Some(3), None, Some(4)]),
10777 ]);
10778 let converted = ScalarValue::convert_array_to_scalar_vec(&large_list).unwrap();
10779 assert_eq!(
10780 converted,
10781 vec![
10782 Some(vec![
10783 ScalarValue::Int64(Some(1)),
10784 ScalarValue::Int64(Some(2))
10785 ]),
10786 None,
10787 Some(vec![
10788 ScalarValue::Int64(Some(3)),
10789 ScalarValue::Int64(None),
10790 ScalarValue::Int64(Some(4))
10791 ]),
10792 ]
10793 );
10794 }
10795
10796 fn make_long_strings(n: usize) -> StringViewArray {
10801 let mut b = StringViewBuilder::new();
10802 for i in 0..n {
10803 b.append_value(format!("long_string_value_pad_{i:04}"));
10804 }
10805 b.finish()
10806 }
10807
10808 fn utf8view_buffer_bytes(a: &StringViewArray) -> usize {
10810 a.data_buffers().iter().map(|b| b.len()).sum()
10811 }
10812
10813 #[test]
10814 fn test_compact_list_utf8view() {
10815 const N: usize = 50;
10816 let strings = make_long_strings(N);
10817 let one_len = strings.value(0).len();
10818 assert!(utf8view_buffer_bytes(&strings) >= N * one_len);
10819
10820 let single_row_list_array =
10821 SingleRowListArrayBuilder::new(Arc::new(strings.slice(0, 1)) as ArrayRef)
10822 .build_list_array();
10823 let mut scalar = ScalarValue::List(Arc::new(single_row_list_array));
10824 scalar.compact();
10825
10826 let ScalarValue::List(arr) = &scalar else {
10827 panic!("expected List")
10828 };
10829 assert_eq!(
10830 utf8view_buffer_bytes(arr.values().as_string_view()),
10831 one_len
10832 );
10833 assert_eq!(arr.values().as_string_view().value(0), strings.value(0));
10834 }
10835
10836 #[test]
10837 fn test_compact_large_list_utf8view() {
10838 const N: usize = 50;
10839 let strings = make_long_strings(N);
10840 let one_len = strings.value(0).len();
10841 assert!(utf8view_buffer_bytes(&strings) >= N * one_len);
10842
10843 let single_row_list_array =
10844 SingleRowListArrayBuilder::new(Arc::new(strings.slice(0, 1)) as ArrayRef)
10845 .build_large_list_array();
10846 let mut scalar = ScalarValue::LargeList(Arc::new(single_row_list_array));
10847 scalar.compact();
10848
10849 let ScalarValue::LargeList(arr) = &scalar else {
10850 panic!("expected LargeList")
10851 };
10852 assert_eq!(
10853 utf8view_buffer_bytes(arr.values().as_string_view()),
10854 one_len
10855 );
10856 assert_eq!(arr.values().as_string_view().value(0), strings.value(0));
10857 }
10858
10859 #[test]
10860 fn test_compact_fixed_size_list_utf8view() {
10861 const N: usize = 50;
10862 let strings = make_long_strings(N);
10863 let one_len = strings.value(0).len();
10864 assert!(utf8view_buffer_bytes(&strings) >= N * one_len);
10865
10866 let single_row_list_array =
10867 SingleRowListArrayBuilder::new(Arc::new(strings.slice(0, 1)) as ArrayRef)
10868 .build_fixed_size_list_array(1);
10869 let mut scalar = ScalarValue::FixedSizeList(Arc::new(single_row_list_array));
10870 scalar.compact();
10871
10872 let ScalarValue::FixedSizeList(arr) = &scalar else {
10873 panic!("expected FixedSizeList")
10874 };
10875 assert_eq!(
10876 utf8view_buffer_bytes(arr.values().as_string_view()),
10877 one_len
10878 );
10879 assert_eq!(arr.values().as_string_view().value(0), strings.value(0));
10880 }
10881
10882 #[test]
10883 fn test_compact_list_view_utf8view() {
10884 const N: usize = 50;
10885 let strings = make_long_strings(N);
10886 let one_len = strings.value(0).len();
10887 assert!(utf8view_buffer_bytes(&strings) >= N * one_len);
10888
10889 let single_row_list_array =
10890 SingleRowListArrayBuilder::new(Arc::new(strings.slice(0, 1)) as ArrayRef)
10891 .build_list_view_array();
10892 let mut scalar = ScalarValue::ListView(Arc::new(single_row_list_array));
10893 scalar.compact();
10894
10895 let ScalarValue::ListView(arr) = &scalar else {
10896 panic!("expected ListView")
10897 };
10898 assert_eq!(
10899 utf8view_buffer_bytes(arr.values().as_string_view()),
10900 one_len
10901 );
10902 assert_eq!(arr.values().as_string_view().value(0), strings.value(0));
10903 }
10904
10905 #[test]
10906 fn test_compact_large_list_view_utf8view() {
10907 const N: usize = 50;
10908 let strings = make_long_strings(N);
10909 let one_len = strings.value(0).len();
10910 assert!(utf8view_buffer_bytes(&strings) >= N * one_len);
10911
10912 let single_row_list_array =
10913 SingleRowListArrayBuilder::new(Arc::new(strings.slice(0, 1)) as ArrayRef)
10914 .build_large_list_view_array();
10915 let mut scalar = ScalarValue::LargeListView(Arc::new(single_row_list_array));
10916 scalar.compact();
10917
10918 let ScalarValue::LargeListView(arr) = &scalar else {
10919 panic!("expected LargeListView")
10920 };
10921 assert_eq!(
10922 utf8view_buffer_bytes(arr.values().as_string_view()),
10923 one_len
10924 );
10925 assert_eq!(arr.values().as_string_view().value(0), strings.value(0));
10926 }
10927
10928 #[test]
10929 fn test_compact_struct_utf8view() {
10930 const N: usize = 50;
10931 let strings = make_long_strings(N);
10932 let one_len = strings.value(0).len();
10933
10934 let field = Arc::new(Field::new("name", DataType::Utf8View, true));
10935 let struct_arr = StructArray::new(
10936 Fields::from(vec![Arc::clone(&field)]),
10937 vec![Arc::new(strings.slice(0, 1)) as ArrayRef],
10938 None,
10939 );
10940
10941 let mut scalar = ScalarValue::Struct(Arc::new(struct_arr));
10942 scalar.compact();
10943
10944 let ScalarValue::Struct(arr) = &scalar else {
10945 panic!("expected Struct")
10946 };
10947 let col = arr.column(0).as_string_view();
10948 assert_eq!(utf8view_buffer_bytes(col), one_len);
10949 assert_eq!(col.value(0), strings.value(0));
10950 }
10951
10952 #[test]
10953 fn test_compact_map_utf8view() {
10954 const N: usize = 50;
10955 let strings = make_long_strings(N);
10956 let one_len = strings.value(0).len();
10957
10958 let key_field = Arc::new(Field::new("key", DataType::Utf8View, false));
10959 let val_field = Arc::new(Field::new("value", DataType::Int32, true));
10960 let entries = StructArray::new(
10961 Fields::from(vec![Arc::clone(&key_field), Arc::clone(&val_field)]),
10962 vec![
10963 Arc::new(strings.slice(0, 1)) as ArrayRef,
10964 Arc::new(Int32Array::from(vec![1i32])) as ArrayRef,
10965 ],
10966 None,
10967 );
10968 let entries_field = Arc::new(Field::new(
10969 "entries",
10970 DataType::Struct(Fields::from(vec![key_field, val_field])),
10971 false,
10972 ));
10973 let map = MapArray::new(
10974 entries_field,
10975 OffsetBuffer::new(vec![0i32, 1].into()),
10976 entries,
10977 None,
10978 false,
10979 );
10980
10981 let mut scalar = ScalarValue::Map(Arc::new(map));
10982 scalar.compact();
10983
10984 let ScalarValue::Map(arr) = &scalar else {
10985 panic!("expected Map")
10986 };
10987 let keys = arr.entries().column(0).as_string_view();
10988 assert_eq!(utf8view_buffer_bytes(keys), one_len);
10989 assert_eq!(keys.value(0), strings.value(0));
10990 }
10991}