1mod cache;
21mod consts;
22mod struct_builder;
23
24use std::borrow::Borrow;
25use std::cmp::Ordering;
26use std::collections::{HashSet, VecDeque};
27use std::convert::Infallible;
28use std::fmt;
29use std::hash::Hash;
30use std::hash::Hasher;
31use std::iter::repeat_n;
32use std::mem::{size_of, size_of_val};
33use std::str::FromStr;
34use std::sync::Arc;
35
36use crate::assert_or_internal_err;
37use crate::cast::{
38 as_binary_array, as_binary_view_array, as_boolean_array, as_date32_array,
39 as_date64_array, as_decimal32_array, as_decimal64_array, as_decimal128_array,
40 as_decimal256_array, as_dictionary_array, as_duration_microsecond_array,
41 as_duration_millisecond_array, as_duration_nanosecond_array,
42 as_duration_second_array, as_fixed_size_binary_array, as_fixed_size_list_array,
43 as_float16_array, as_float32_array, as_float64_array, as_int8_array, as_int16_array,
44 as_int32_array, as_int64_array, as_interval_dt_array, as_interval_mdn_array,
45 as_interval_ym_array, as_large_binary_array, as_large_list_array,
46 as_large_string_array, as_run_array, as_string_array, as_string_view_array,
47 as_time32_millisecond_array, as_time32_second_array, as_time64_microsecond_array,
48 as_time64_nanosecond_array, as_timestamp_microsecond_array,
49 as_timestamp_millisecond_array, as_timestamp_nanosecond_array,
50 as_timestamp_second_array, as_uint8_array, as_uint16_array, as_uint32_array,
51 as_uint64_array, as_union_array,
52};
53use crate::error::{_exec_err, _internal_err, _not_impl_err, DataFusionError, Result};
54use crate::format::DEFAULT_CAST_OPTIONS;
55use crate::hash_utils::create_hashes;
56use crate::utils::SingleRowListArrayBuilder;
57use crate::{_internal_datafusion_err, arrow_datafusion_err};
58use arrow::array::{
59 Array, ArrayData, ArrayDataBuilder, ArrayRef, ArrowNativeTypeOp, ArrowPrimitiveType,
60 AsArray, BinaryArray, BinaryViewArray, BinaryViewBuilder, BooleanArray, Date32Array,
61 Date64Array, Decimal32Array, Decimal64Array, Decimal128Array, Decimal256Array,
62 DictionaryArray, DurationMicrosecondArray, DurationMillisecondArray,
63 DurationNanosecondArray, DurationSecondArray, FixedSizeBinaryArray,
64 FixedSizeListArray, Float16Array, Float32Array, Float64Array, GenericListArray,
65 Int8Array, Int16Array, Int32Array, Int64Array, IntervalDayTimeArray,
66 IntervalMonthDayNanoArray, IntervalYearMonthArray, LargeBinaryArray, LargeListArray,
67 LargeStringArray, ListArray, MapArray, MutableArrayData, OffsetSizeTrait,
68 PrimitiveArray, RunArray, Scalar, StringArray, StringViewArray, StringViewBuilder,
69 StructArray, Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray,
70 Time64NanosecondArray, TimestampMicrosecondArray, TimestampMillisecondArray,
71 TimestampNanosecondArray, TimestampSecondArray, UInt8Array, UInt16Array, UInt32Array,
72 UInt64Array, UnionArray, downcast_run_array, new_empty_array, new_null_array,
73};
74use arrow::buffer::{BooleanBuffer, ScalarBuffer};
75use arrow::compute::kernels::cast::{CastOptions, cast_with_options};
76use arrow::compute::kernels::numeric::{
77 add, add_wrapping, div, mul, mul_wrapping, rem, sub, sub_wrapping,
78};
79use arrow::datatypes::{
80 ArrowDictionaryKeyType, ArrowNativeType, ArrowTimestampType, DataType, Date32Type,
81 Decimal32Type, Decimal64Type, Decimal128Type, Decimal256Type, DecimalType, Field,
82 FieldRef, Float32Type, Int8Type, Int16Type, Int32Type, Int64Type, IntervalDayTime,
83 IntervalDayTimeType, IntervalMonthDayNano, IntervalMonthDayNanoType, IntervalUnit,
84 IntervalYearMonthType, RunEndIndexType, TimeUnit, TimestampMicrosecondType,
85 TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, UInt8Type,
86 UInt16Type, UInt32Type, UInt64Type, UnionFields, UnionMode, i256,
87 validate_decimal_precision_and_scale,
88};
89use arrow::util::display::{ArrayFormatter, FormatOptions, array_value_to_string};
90use cache::{get_or_create_cached_key_array, get_or_create_cached_null_array};
91use chrono::{Duration, NaiveDate};
92use half::f16;
93pub use struct_builder::ScalarStructBuilder;
94
95const SECONDS_PER_DAY: i64 = 86_400;
96const MILLIS_PER_DAY: i64 = SECONDS_PER_DAY * 1_000;
97const MICROS_PER_DAY: i64 = MILLIS_PER_DAY * 1_000;
98const NANOS_PER_DAY: i64 = MICROS_PER_DAY * 1_000;
99const MICROS_PER_MILLISECOND: i64 = 1_000;
100const NANOS_PER_MILLISECOND: i64 = 1_000_000;
101
102pub fn date_to_timestamp_multiplier(
106 source_type: &DataType,
107 target_type: &DataType,
108) -> Option<i64> {
109 let DataType::Timestamp(target_unit, _) = target_type else {
110 return None;
111 };
112
113 match source_type {
120 DataType::Date32 => Some(match target_unit {
124 TimeUnit::Second => SECONDS_PER_DAY,
125 TimeUnit::Millisecond => MILLIS_PER_DAY,
126 TimeUnit::Microsecond => MICROS_PER_DAY,
127 TimeUnit::Nanosecond => NANOS_PER_DAY,
128 }),
129
130 DataType::Date64 => match target_unit {
135 TimeUnit::Second => None,
136 TimeUnit::Millisecond => None,
141 TimeUnit::Microsecond => Some(MICROS_PER_MILLISECOND),
142 TimeUnit::Nanosecond => Some(NANOS_PER_MILLISECOND),
143 },
144
145 _ => None,
146 }
147}
148
149pub fn ensure_timestamp_in_bounds(
153 value: i64,
154 multiplier: i64,
155 source_type: &DataType,
156 target_type: &DataType,
157) -> Result<()> {
158 if multiplier <= 1 {
159 return Ok(());
160 }
161
162 if value.checked_mul(multiplier).is_none() {
163 let target = format_timestamp_type_for_error(target_type);
164 _exec_err!(
165 "Cannot cast {} value {} to {}: converted value exceeds the representable i64 range",
166 source_type,
167 value,
168 target
169 )
170 } else {
171 Ok(())
172 }
173}
174
175pub(crate) fn format_timestamp_type_for_error(target_type: &DataType) -> String {
178 match target_type {
179 DataType::Timestamp(unit, _) => {
180 let s = match unit {
181 TimeUnit::Second => "s",
182 TimeUnit::Millisecond => "ms",
183 TimeUnit::Microsecond => "us",
184 TimeUnit::Nanosecond => "ns",
185 };
186 format!("Timestamp({s})")
187 }
188 other => format!("{other}"),
189 }
190}
191
192#[derive(Clone)]
324pub enum ScalarValue {
325 Null,
327 Boolean(Option<bool>),
329 Float16(Option<f16>),
331 Float32(Option<f32>),
333 Float64(Option<f64>),
335 Decimal32(Option<i32>, u8, i8),
337 Decimal64(Option<i64>, u8, i8),
339 Decimal128(Option<i128>, u8, i8),
341 Decimal256(Option<i256>, u8, i8),
343 Int8(Option<i8>),
345 Int16(Option<i16>),
347 Int32(Option<i32>),
349 Int64(Option<i64>),
351 UInt8(Option<u8>),
353 UInt16(Option<u16>),
355 UInt32(Option<u32>),
357 UInt64(Option<u64>),
359 Utf8(Option<String>),
361 Utf8View(Option<String>),
363 LargeUtf8(Option<String>),
365 Binary(Option<Vec<u8>>),
367 BinaryView(Option<Vec<u8>>),
369 FixedSizeBinary(i32, Option<Vec<u8>>),
371 LargeBinary(Option<Vec<u8>>),
373 FixedSizeList(Arc<FixedSizeListArray>),
377 List(Arc<ListArray>),
381 LargeList(Arc<LargeListArray>),
383 Struct(Arc<StructArray>),
386 Map(Arc<MapArray>),
388 Date32(Option<i32>),
390 Date64(Option<i64>),
392 Time32Second(Option<i32>),
394 Time32Millisecond(Option<i32>),
396 Time64Microsecond(Option<i64>),
398 Time64Nanosecond(Option<i64>),
400 TimestampSecond(Option<i64>, Option<Arc<str>>),
402 TimestampMillisecond(Option<i64>, Option<Arc<str>>),
404 TimestampMicrosecond(Option<i64>, Option<Arc<str>>),
406 TimestampNanosecond(Option<i64>, Option<Arc<str>>),
408 IntervalYearMonth(Option<i32>),
410 IntervalDayTime(Option<IntervalDayTime>),
413 IntervalMonthDayNano(Option<IntervalMonthDayNano>),
417 DurationSecond(Option<i64>),
419 DurationMillisecond(Option<i64>),
421 DurationMicrosecond(Option<i64>),
423 DurationNanosecond(Option<i64>),
425 Union(Option<(i8, Box<ScalarValue>)>, UnionFields, UnionMode),
430 Dictionary(Box<DataType>, Box<ScalarValue>),
432 RunEndEncoded(FieldRef, FieldRef, Box<ScalarValue>),
434}
435
436impl Hash for Fl<f16> {
437 fn hash<H: Hasher>(&self, state: &mut H) {
438 self.0.to_bits().hash(state);
439 }
440}
441
442impl PartialEq for ScalarValue {
444 fn eq(&self, other: &Self) -> bool {
445 use ScalarValue::*;
446 match (self, other) {
450 (Decimal32(v1, p1, s1), Decimal32(v2, p2, s2)) => {
451 v1.eq(v2) && p1.eq(p2) && s1.eq(s2)
452 }
453 (Decimal32(_, _, _), _) => false,
454 (Decimal64(v1, p1, s1), Decimal64(v2, p2, s2)) => {
455 v1.eq(v2) && p1.eq(p2) && s1.eq(s2)
456 }
457 (Decimal64(_, _, _), _) => false,
458 (Decimal128(v1, p1, s1), Decimal128(v2, p2, s2)) => {
459 v1.eq(v2) && p1.eq(p2) && s1.eq(s2)
460 }
461 (Decimal128(_, _, _), _) => false,
462 (Decimal256(v1, p1, s1), Decimal256(v2, p2, s2)) => {
463 v1.eq(v2) && p1.eq(p2) && s1.eq(s2)
464 }
465 (Decimal256(_, _, _), _) => false,
466 (Boolean(v1), Boolean(v2)) => v1.eq(v2),
467 (Boolean(_), _) => false,
468 (Float32(v1), Float32(v2)) => match (v1, v2) {
469 (Some(f1), Some(f2)) => f1.to_bits() == f2.to_bits(),
470 _ => v1.eq(v2),
471 },
472 (Float16(v1), Float16(v2)) => match (v1, v2) {
473 (Some(f1), Some(f2)) => f1.to_bits() == f2.to_bits(),
474 _ => v1.eq(v2),
475 },
476 (Float32(_), _) => false,
477 (Float16(_), _) => false,
478 (Float64(v1), Float64(v2)) => match (v1, v2) {
479 (Some(f1), Some(f2)) => f1.to_bits() == f2.to_bits(),
480 _ => v1.eq(v2),
481 },
482 (Float64(_), _) => false,
483 (Int8(v1), Int8(v2)) => v1.eq(v2),
484 (Int8(_), _) => false,
485 (Int16(v1), Int16(v2)) => v1.eq(v2),
486 (Int16(_), _) => false,
487 (Int32(v1), Int32(v2)) => v1.eq(v2),
488 (Int32(_), _) => false,
489 (Int64(v1), Int64(v2)) => v1.eq(v2),
490 (Int64(_), _) => false,
491 (UInt8(v1), UInt8(v2)) => v1.eq(v2),
492 (UInt8(_), _) => false,
493 (UInt16(v1), UInt16(v2)) => v1.eq(v2),
494 (UInt16(_), _) => false,
495 (UInt32(v1), UInt32(v2)) => v1.eq(v2),
496 (UInt32(_), _) => false,
497 (UInt64(v1), UInt64(v2)) => v1.eq(v2),
498 (UInt64(_), _) => false,
499 (Utf8(v1), Utf8(v2)) => v1.eq(v2),
500 (Utf8(_), _) => false,
501 (Utf8View(v1), Utf8View(v2)) => v1.eq(v2),
502 (Utf8View(_), _) => false,
503 (LargeUtf8(v1), LargeUtf8(v2)) => v1.eq(v2),
504 (LargeUtf8(_), _) => false,
505 (Binary(v1), Binary(v2)) => v1.eq(v2),
506 (Binary(_), _) => false,
507 (BinaryView(v1), BinaryView(v2)) => v1.eq(v2),
508 (BinaryView(_), _) => false,
509 (FixedSizeBinary(_, v1), FixedSizeBinary(_, v2)) => v1.eq(v2),
510 (FixedSizeBinary(_, _), _) => false,
511 (LargeBinary(v1), LargeBinary(v2)) => v1.eq(v2),
512 (LargeBinary(_), _) => false,
513 (FixedSizeList(v1), FixedSizeList(v2)) => v1.eq(v2),
514 (FixedSizeList(_), _) => false,
515 (List(v1), List(v2)) => v1.eq(v2),
516 (List(_), _) => false,
517 (LargeList(v1), LargeList(v2)) => v1.eq(v2),
518 (LargeList(_), _) => false,
519 (Struct(v1), Struct(v2)) => v1.eq(v2),
520 (Struct(_), _) => false,
521 (Map(v1), Map(v2)) => v1.eq(v2),
522 (Map(_), _) => false,
523 (Date32(v1), Date32(v2)) => v1.eq(v2),
524 (Date32(_), _) => false,
525 (Date64(v1), Date64(v2)) => v1.eq(v2),
526 (Date64(_), _) => false,
527 (Time32Second(v1), Time32Second(v2)) => v1.eq(v2),
528 (Time32Second(_), _) => false,
529 (Time32Millisecond(v1), Time32Millisecond(v2)) => v1.eq(v2),
530 (Time32Millisecond(_), _) => false,
531 (Time64Microsecond(v1), Time64Microsecond(v2)) => v1.eq(v2),
532 (Time64Microsecond(_), _) => false,
533 (Time64Nanosecond(v1), Time64Nanosecond(v2)) => v1.eq(v2),
534 (Time64Nanosecond(_), _) => false,
535 (TimestampSecond(v1, _), TimestampSecond(v2, _)) => v1.eq(v2),
536 (TimestampSecond(_, _), _) => false,
537 (TimestampMillisecond(v1, _), TimestampMillisecond(v2, _)) => v1.eq(v2),
538 (TimestampMillisecond(_, _), _) => false,
539 (TimestampMicrosecond(v1, _), TimestampMicrosecond(v2, _)) => v1.eq(v2),
540 (TimestampMicrosecond(_, _), _) => false,
541 (TimestampNanosecond(v1, _), TimestampNanosecond(v2, _)) => v1.eq(v2),
542 (TimestampNanosecond(_, _), _) => false,
543 (DurationSecond(v1), DurationSecond(v2)) => v1.eq(v2),
544 (DurationSecond(_), _) => false,
545 (DurationMillisecond(v1), DurationMillisecond(v2)) => v1.eq(v2),
546 (DurationMillisecond(_), _) => false,
547 (DurationMicrosecond(v1), DurationMicrosecond(v2)) => v1.eq(v2),
548 (DurationMicrosecond(_), _) => false,
549 (DurationNanosecond(v1), DurationNanosecond(v2)) => v1.eq(v2),
550 (DurationNanosecond(_), _) => false,
551 (IntervalYearMonth(v1), IntervalYearMonth(v2)) => v1.eq(v2),
552 (IntervalYearMonth(_), _) => false,
553 (IntervalDayTime(v1), IntervalDayTime(v2)) => v1.eq(v2),
554 (IntervalDayTime(_), _) => false,
555 (IntervalMonthDayNano(v1), IntervalMonthDayNano(v2)) => v1.eq(v2),
556 (IntervalMonthDayNano(_), _) => false,
557 (Union(val1, fields1, mode1), Union(val2, fields2, mode2)) => {
558 val1.eq(val2) && fields1.eq(fields2) && mode1.eq(mode2)
559 }
560 (Union(_, _, _), _) => false,
561 (Dictionary(k1, v1), Dictionary(k2, v2)) => k1.eq(k2) && v1.eq(v2),
562 (Dictionary(_, _), _) => false,
563 (RunEndEncoded(rf1, vf1, v1), RunEndEncoded(rf2, vf2, v2)) => {
564 rf1.eq(rf2) && vf1.eq(vf2) && v1.eq(v2)
565 }
566 (RunEndEncoded(_, _, _), _) => false,
567 (Null, Null) => true,
568 (Null, _) => false,
569 }
570 }
571}
572
573impl PartialOrd for ScalarValue {
575 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
576 use ScalarValue::*;
577 match (self, other) {
581 (Decimal32(v1, p1, s1), Decimal32(v2, p2, s2)) => {
582 if p1.eq(p2) && s1.eq(s2) {
583 v1.partial_cmp(v2)
584 } else {
585 None
587 }
588 }
589 (Decimal32(_, _, _), _) => None,
590 (Decimal64(v1, p1, s1), Decimal64(v2, p2, s2)) => {
591 if p1.eq(p2) && s1.eq(s2) {
592 v1.partial_cmp(v2)
593 } else {
594 None
596 }
597 }
598 (Decimal64(_, _, _), _) => None,
599 (Decimal128(v1, p1, s1), Decimal128(v2, p2, s2)) => {
600 if p1.eq(p2) && s1.eq(s2) {
601 v1.partial_cmp(v2)
602 } else {
603 None
605 }
606 }
607 (Decimal128(_, _, _), _) => None,
608 (Decimal256(v1, p1, s1), Decimal256(v2, p2, s2)) => {
609 if p1.eq(p2) && s1.eq(s2) {
610 v1.partial_cmp(v2)
611 } else {
612 None
614 }
615 }
616 (Decimal256(_, _, _), _) => None,
617 (Boolean(v1), Boolean(v2)) => v1.partial_cmp(v2),
618 (Boolean(_), _) => None,
619 (Float32(v1), Float32(v2)) => match (v1, v2) {
620 (Some(f1), Some(f2)) => Some(f1.total_cmp(f2)),
621 _ => v1.partial_cmp(v2),
622 },
623 (Float16(v1), Float16(v2)) => match (v1, v2) {
624 (Some(f1), Some(f2)) => Some(f1.total_cmp(f2)),
625 _ => v1.partial_cmp(v2),
626 },
627 (Float32(_), _) => None,
628 (Float16(_), _) => None,
629 (Float64(v1), Float64(v2)) => match (v1, v2) {
630 (Some(f1), Some(f2)) => Some(f1.total_cmp(f2)),
631 _ => v1.partial_cmp(v2),
632 },
633 (Float64(_), _) => None,
634 (Int8(v1), Int8(v2)) => v1.partial_cmp(v2),
635 (Int8(_), _) => None,
636 (Int16(v1), Int16(v2)) => v1.partial_cmp(v2),
637 (Int16(_), _) => None,
638 (Int32(v1), Int32(v2)) => v1.partial_cmp(v2),
639 (Int32(_), _) => None,
640 (Int64(v1), Int64(v2)) => v1.partial_cmp(v2),
641 (Int64(_), _) => None,
642 (UInt8(v1), UInt8(v2)) => v1.partial_cmp(v2),
643 (UInt8(_), _) => None,
644 (UInt16(v1), UInt16(v2)) => v1.partial_cmp(v2),
645 (UInt16(_), _) => None,
646 (UInt32(v1), UInt32(v2)) => v1.partial_cmp(v2),
647 (UInt32(_), _) => None,
648 (UInt64(v1), UInt64(v2)) => v1.partial_cmp(v2),
649 (UInt64(_), _) => None,
650 (Utf8(v1), Utf8(v2)) => v1.partial_cmp(v2),
651 (Utf8(_), _) => None,
652 (LargeUtf8(v1), LargeUtf8(v2)) => v1.partial_cmp(v2),
653 (LargeUtf8(_), _) => None,
654 (Utf8View(v1), Utf8View(v2)) => v1.partial_cmp(v2),
655 (Utf8View(_), _) => None,
656 (Binary(v1), Binary(v2)) => v1.partial_cmp(v2),
657 (Binary(_), _) => None,
658 (BinaryView(v1), BinaryView(v2)) => v1.partial_cmp(v2),
659 (BinaryView(_), _) => None,
660 (FixedSizeBinary(_, v1), FixedSizeBinary(_, v2)) => v1.partial_cmp(v2),
661 (FixedSizeBinary(_, _), _) => None,
662 (LargeBinary(v1), LargeBinary(v2)) => v1.partial_cmp(v2),
663 (LargeBinary(_), _) => None,
664 (List(arr1), List(arr2)) => partial_cmp_list(arr1.as_ref(), arr2.as_ref()),
666 (FixedSizeList(arr1), FixedSizeList(arr2)) => {
667 partial_cmp_list(arr1.as_ref(), arr2.as_ref())
668 }
669 (LargeList(arr1), LargeList(arr2)) => {
670 partial_cmp_list(arr1.as_ref(), arr2.as_ref())
671 }
672 (List(_), _) | (LargeList(_), _) | (FixedSizeList(_), _) => None,
673 (Struct(struct_arr1), Struct(struct_arr2)) => {
674 partial_cmp_struct(struct_arr1.as_ref(), struct_arr2.as_ref())
675 }
676 (Struct(_), _) => None,
677 (Map(map_arr1), Map(map_arr2)) => partial_cmp_map(map_arr1, map_arr2),
678 (Map(_), _) => None,
679 (Date32(v1), Date32(v2)) => v1.partial_cmp(v2),
680 (Date32(_), _) => None,
681 (Date64(v1), Date64(v2)) => v1.partial_cmp(v2),
682 (Date64(_), _) => None,
683 (Time32Second(v1), Time32Second(v2)) => v1.partial_cmp(v2),
684 (Time32Second(_), _) => None,
685 (Time32Millisecond(v1), Time32Millisecond(v2)) => v1.partial_cmp(v2),
686 (Time32Millisecond(_), _) => None,
687 (Time64Microsecond(v1), Time64Microsecond(v2)) => v1.partial_cmp(v2),
688 (Time64Microsecond(_), _) => None,
689 (Time64Nanosecond(v1), Time64Nanosecond(v2)) => v1.partial_cmp(v2),
690 (Time64Nanosecond(_), _) => None,
691 (TimestampSecond(v1, _), TimestampSecond(v2, _)) => v1.partial_cmp(v2),
692 (TimestampSecond(_, _), _) => None,
693 (TimestampMillisecond(v1, _), TimestampMillisecond(v2, _)) => {
694 v1.partial_cmp(v2)
695 }
696 (TimestampMillisecond(_, _), _) => None,
697 (TimestampMicrosecond(v1, _), TimestampMicrosecond(v2, _)) => {
698 v1.partial_cmp(v2)
699 }
700 (TimestampMicrosecond(_, _), _) => None,
701 (TimestampNanosecond(v1, _), TimestampNanosecond(v2, _)) => {
702 v1.partial_cmp(v2)
703 }
704 (TimestampNanosecond(_, _), _) => None,
705 (IntervalYearMonth(v1), IntervalYearMonth(v2)) => v1.partial_cmp(v2),
706 (IntervalYearMonth(_), _) => None,
707 (IntervalDayTime(v1), IntervalDayTime(v2)) => v1.partial_cmp(v2),
708 (IntervalDayTime(_), _) => None,
709 (IntervalMonthDayNano(v1), IntervalMonthDayNano(v2)) => v1.partial_cmp(v2),
710 (IntervalMonthDayNano(_), _) => None,
711 (DurationSecond(v1), DurationSecond(v2)) => v1.partial_cmp(v2),
712 (DurationSecond(_), _) => None,
713 (DurationMillisecond(v1), DurationMillisecond(v2)) => v1.partial_cmp(v2),
714 (DurationMillisecond(_), _) => None,
715 (DurationMicrosecond(v1), DurationMicrosecond(v2)) => v1.partial_cmp(v2),
716 (DurationMicrosecond(_), _) => None,
717 (DurationNanosecond(v1), DurationNanosecond(v2)) => v1.partial_cmp(v2),
718 (DurationNanosecond(_), _) => None,
719 (Union(v1, t1, m1), Union(v2, t2, m2)) => {
720 if t1.eq(t2) && m1.eq(m2) {
721 v1.partial_cmp(v2)
722 } else {
723 None
724 }
725 }
726 (Union(_, _, _), _) => None,
727 (Dictionary(k1, v1), Dictionary(k2, v2)) => {
728 if k1 == k2 { v1.partial_cmp(v2) } else { None }
730 }
731 (Dictionary(_, _), _) => None,
732 (RunEndEncoded(rf1, vf1, v1), RunEndEncoded(rf2, vf2, v2)) => {
733 if rf1 == rf2 && vf1 == vf2 {
735 v1.partial_cmp(v2)
736 } else {
737 None
738 }
739 }
740 (RunEndEncoded(_, _, _), _) => None,
741 (Null, Null) => Some(Ordering::Equal),
742 (Null, _) => None,
743 }
744 }
745}
746
747fn first_array_for_list(arr: &dyn Array) -> ArrayRef {
750 assert_eq!(arr.len(), 1);
751 if let Some(arr) = arr.as_list_opt::<i32>() {
752 arr.value(0)
753 } else if let Some(arr) = arr.as_list_opt::<i64>() {
754 arr.value(0)
755 } else if let Some(arr) = arr.as_fixed_size_list_opt() {
756 arr.value(0)
757 } else {
758 unreachable!(
759 "Since only List / LargeList / FixedSizeList are supported, this should never happen"
760 )
761 }
762}
763
764fn partial_cmp_list(arr1: &dyn Array, arr2: &dyn Array) -> Option<Ordering> {
766 if arr1.data_type() != arr2.data_type() {
767 return None;
768 }
769 let arr1 = first_array_for_list(arr1);
770 let arr2 = first_array_for_list(arr2);
771
772 let min_length = arr1.len().min(arr2.len());
773 let arr1_trimmed = arr1.slice(0, min_length);
774 let arr2_trimmed = arr2.slice(0, min_length);
775
776 let lt_res = arrow::compute::kernels::cmp::lt(&arr1_trimmed, &arr2_trimmed).ok()?;
777 let eq_res = arrow::compute::kernels::cmp::eq(&arr1_trimmed, &arr2_trimmed).ok()?;
778
779 for j in 0..lt_res.len() {
780 if arr1_trimmed.is_null(j) && !arr2_trimmed.is_null(j) {
788 return Some(Ordering::Greater);
789 }
790 if !arr1_trimmed.is_null(j) && arr2_trimmed.is_null(j) {
791 return Some(Ordering::Less);
792 }
793
794 if lt_res.is_valid(j) && lt_res.value(j) {
795 return Some(Ordering::Less);
796 }
797 if eq_res.is_valid(j) && !eq_res.value(j) {
798 return Some(Ordering::Greater);
799 }
800 }
801
802 Some(arr1.len().cmp(&arr2.len()))
803}
804
805fn flatten<'a>(array: &'a StructArray, columns: &mut Vec<&'a ArrayRef>) {
806 for i in 0..array.num_columns() {
807 let column = array.column(i);
808 if let Some(nested_struct) = column.as_any().downcast_ref::<StructArray>() {
809 flatten(nested_struct, columns);
811 } else {
812 columns.push(column);
814 }
815 }
816}
817
818pub fn partial_cmp_struct(s1: &StructArray, s2: &StructArray) -> Option<Ordering> {
819 if s1.len() != s2.len() {
820 return None;
821 }
822
823 if s1.data_type() != s2.data_type() {
824 return None;
825 }
826
827 let mut expanded_columns1 = Vec::with_capacity(s1.num_columns());
828 let mut expanded_columns2 = Vec::with_capacity(s2.num_columns());
829
830 flatten(s1, &mut expanded_columns1);
831 flatten(s2, &mut expanded_columns2);
832
833 for col_index in 0..expanded_columns1.len() {
834 let arr1 = expanded_columns1[col_index];
835 let arr2 = expanded_columns2[col_index];
836
837 let lt_res = arrow::compute::kernels::cmp::lt(arr1, arr2).ok()?;
838 let eq_res = arrow::compute::kernels::cmp::eq(arr1, arr2).ok()?;
839
840 for j in 0..lt_res.len() {
841 if lt_res.is_valid(j) && lt_res.value(j) {
842 return Some(Ordering::Less);
843 }
844 if eq_res.is_valid(j) && !eq_res.value(j) {
845 return Some(Ordering::Greater);
846 }
847 }
848 }
849 Some(Ordering::Equal)
850}
851
852fn partial_cmp_map(m1: &Arc<MapArray>, m2: &Arc<MapArray>) -> Option<Ordering> {
853 if m1.len() != m2.len() {
854 return None;
855 }
856
857 if m1.data_type() != m2.data_type() {
858 return None;
859 }
860
861 for col_index in 0..m1.len() {
862 let arr1 = m1.entries().column(col_index);
863 let arr2 = m2.entries().column(col_index);
864
865 let lt_res = arrow::compute::kernels::cmp::lt(arr1, arr2).ok()?;
866 let eq_res = arrow::compute::kernels::cmp::eq(arr1, arr2).ok()?;
867
868 for j in 0..lt_res.len() {
869 if lt_res.is_valid(j) && lt_res.value(j) {
870 return Some(Ordering::Less);
871 }
872 if eq_res.is_valid(j) && !eq_res.value(j) {
873 return Some(Ordering::Greater);
874 }
875 }
876 }
877 Some(Ordering::Equal)
878}
879
880impl Eq for ScalarValue {}
881
882struct Fl<T>(T);
884
885macro_rules! hash_float_value {
886 ($(($t:ty, $i:ty)),+) => {
887 $(impl std::hash::Hash for Fl<$t> {
888 #[inline]
889 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
890 state.write(&<$i>::from_ne_bytes(self.0.to_ne_bytes()).to_ne_bytes())
891 }
892 })+
893 };
894}
895
896hash_float_value!((f64, u64), (f32, u32));
897
898impl Hash for ScalarValue {
904 fn hash<H: Hasher>(&self, state: &mut H) {
905 use ScalarValue::*;
906 match self {
907 Decimal32(v, p, s) => {
908 v.hash(state);
909 p.hash(state);
910 s.hash(state)
911 }
912 Decimal64(v, p, s) => {
913 v.hash(state);
914 p.hash(state);
915 s.hash(state)
916 }
917 Decimal128(v, p, s) => {
918 v.hash(state);
919 p.hash(state);
920 s.hash(state)
921 }
922 Decimal256(v, p, s) => {
923 v.hash(state);
924 p.hash(state);
925 s.hash(state)
926 }
927 Boolean(v) => v.hash(state),
928 Float16(v) => v.map(Fl).hash(state),
929 Float32(v) => v.map(Fl).hash(state),
930 Float64(v) => v.map(Fl).hash(state),
931 Int8(v) => v.hash(state),
932 Int16(v) => v.hash(state),
933 Int32(v) => v.hash(state),
934 Int64(v) => v.hash(state),
935 UInt8(v) => v.hash(state),
936 UInt16(v) => v.hash(state),
937 UInt32(v) => v.hash(state),
938 UInt64(v) => v.hash(state),
939 Utf8(v) | LargeUtf8(v) | Utf8View(v) => v.hash(state),
940 Binary(v) | FixedSizeBinary(_, v) | LargeBinary(v) | BinaryView(v) => {
941 v.hash(state)
942 }
943 List(arr) => {
944 hash_nested_array(arr.to_owned() as ArrayRef, state);
945 }
946 LargeList(arr) => {
947 hash_nested_array(arr.to_owned() as ArrayRef, state);
948 }
949 FixedSizeList(arr) => {
950 hash_nested_array(arr.to_owned() as ArrayRef, state);
951 }
952 Struct(arr) => {
953 hash_nested_array(arr.to_owned() as ArrayRef, state);
954 }
955 Map(arr) => {
956 hash_nested_array(arr.to_owned() as ArrayRef, state);
957 }
958 Date32(v) => v.hash(state),
959 Date64(v) => v.hash(state),
960 Time32Second(v) => v.hash(state),
961 Time32Millisecond(v) => v.hash(state),
962 Time64Microsecond(v) => v.hash(state),
963 Time64Nanosecond(v) => v.hash(state),
964 TimestampSecond(v, _) => v.hash(state),
965 TimestampMillisecond(v, _) => v.hash(state),
966 TimestampMicrosecond(v, _) => v.hash(state),
967 TimestampNanosecond(v, _) => v.hash(state),
968 DurationSecond(v) => v.hash(state),
969 DurationMillisecond(v) => v.hash(state),
970 DurationMicrosecond(v) => v.hash(state),
971 DurationNanosecond(v) => v.hash(state),
972 IntervalYearMonth(v) => v.hash(state),
973 IntervalDayTime(v) => v.hash(state),
974 IntervalMonthDayNano(v) => v.hash(state),
975 Union(v, t, m) => {
976 v.hash(state);
977 t.hash(state);
978 m.hash(state);
979 }
980 Dictionary(k, v) => {
981 k.hash(state);
982 v.hash(state);
983 }
984 RunEndEncoded(rf, vf, v) => {
985 rf.hash(state);
986 vf.hash(state);
987 v.hash(state);
988 }
989 Null => 1.hash(state),
991 }
992 }
993}
994
995fn hash_nested_array<H: Hasher>(arr: ArrayRef, state: &mut H) {
996 let len = arr.len();
997 let hashes_buffer = &mut vec![0; len];
998 let random_state = ahash::RandomState::with_seeds(0, 0, 0, 0);
999 let hashes = create_hashes(&[arr], &random_state, hashes_buffer)
1000 .expect("hash_nested_array: failed to create row hashes");
1001 hashes.hash(state);
1003}
1004
1005#[inline]
1012pub fn get_dict_value<K: ArrowDictionaryKeyType>(
1013 array: &dyn Array,
1014 index: usize,
1015) -> Result<(&ArrayRef, Option<usize>)> {
1016 let dict_array = as_dictionary_array::<K>(array)?;
1017 Ok((dict_array.values(), dict_array.key(index)))
1018}
1019
1020fn dict_from_scalar<K: ArrowDictionaryKeyType>(
1023 value: &ScalarValue,
1024 size: usize,
1025) -> Result<ArrayRef> {
1026 let values_array = value.to_array_of_size(1)?;
1028
1029 let key_array: PrimitiveArray<K> =
1032 get_or_create_cached_key_array::<K>(size, value.is_null());
1033
1034 Ok(Arc::new(
1040 DictionaryArray::<K>::try_new(key_array, values_array)?, ))
1042}
1043
1044pub fn dict_from_values<K: ArrowDictionaryKeyType>(
1059 values_array: ArrayRef,
1060) -> Result<ArrayRef> {
1061 let key_array: PrimitiveArray<K> = (0..values_array.len())
1064 .map(|index| {
1065 if values_array.is_valid(index) {
1066 let native_index = K::Native::from_usize(index).ok_or_else(|| {
1067 _internal_datafusion_err!(
1068 "Can not create index of type {} from value {index}",
1069 K::DATA_TYPE
1070 )
1071 })?;
1072 Ok(Some(native_index))
1073 } else {
1074 Ok(None)
1075 }
1076 })
1077 .collect::<Result<Vec<_>>>()?
1078 .into_iter()
1079 .collect();
1080
1081 let dict_array = DictionaryArray::<K>::try_new(key_array, values_array)?;
1087 Ok(Arc::new(dict_array))
1088}
1089
1090macro_rules! typed_cast_tz {
1091 ($array:expr, $index:expr, $array_cast:ident, $SCALAR:ident, $TZ:expr) => {{
1092 let array = $array_cast($array)?;
1093 Ok::<ScalarValue, DataFusionError>(ScalarValue::$SCALAR(
1094 match array.is_null($index) {
1095 true => None,
1096 false => Some(array.value($index).into()),
1097 },
1098 $TZ.clone(),
1099 ))
1100 }};
1101}
1102
1103macro_rules! typed_cast {
1104 ($array:expr, $index:expr, $array_cast:ident, $SCALAR:ident) => {{
1105 let array = $array_cast($array)?;
1106 Ok::<ScalarValue, DataFusionError>(ScalarValue::$SCALAR(
1107 match array.is_null($index) {
1108 true => None,
1109 false => Some(array.value($index).into()),
1110 },
1111 ))
1112 }};
1113}
1114
1115macro_rules! build_array_from_option {
1116 ($DATA_TYPE:ident, $ARRAY_TYPE:ident, $EXPR:expr, $SIZE:expr) => {{
1117 match $EXPR {
1118 Some(value) => Arc::new($ARRAY_TYPE::from_value(*value, $SIZE)),
1119 None => new_null_array(&DataType::$DATA_TYPE, $SIZE),
1120 }
1121 }};
1122 ($DATA_TYPE:ident, $ENUM:expr, $ARRAY_TYPE:ident, $EXPR:expr, $SIZE:expr) => {{
1123 match $EXPR {
1124 Some(value) => Arc::new($ARRAY_TYPE::from_value(*value, $SIZE)),
1125 None => new_null_array(&DataType::$DATA_TYPE($ENUM), $SIZE),
1126 }
1127 }};
1128}
1129
1130macro_rules! build_timestamp_array_from_option {
1131 ($TIME_UNIT:expr, $TZ:expr, $ARRAY_TYPE:ident, $EXPR:expr, $SIZE:expr) => {
1132 match $EXPR {
1133 Some(value) => {
1134 Arc::new($ARRAY_TYPE::from_value(*value, $SIZE).with_timezone_opt($TZ))
1135 }
1136 None => new_null_array(&DataType::Timestamp($TIME_UNIT, $TZ), $SIZE),
1137 }
1138 };
1139}
1140
1141macro_rules! eq_array_primitive {
1142 ($array:expr, $index:expr, $array_cast:ident, $VALUE:expr) => {{
1143 let array = $array_cast($array)?;
1144 let is_valid = array.is_valid($index);
1145 Ok::<bool, DataFusionError>(match $VALUE {
1146 Some(val) => is_valid && &array.value($index) == val,
1147 None => !is_valid,
1148 })
1149 }};
1150}
1151
1152impl ScalarValue {
1153 pub fn new_primitive<T: ArrowPrimitiveType>(
1159 a: Option<T::Native>,
1160 d: &DataType,
1161 ) -> Result<Self> {
1162 match a {
1163 None => d.try_into(),
1164 Some(v) => {
1165 let array = PrimitiveArray::<T>::new(vec![v].into(), None)
1166 .with_data_type(d.clone());
1167 Self::try_from_array(&array, 0)
1168 }
1169 }
1170 }
1171
1172 pub fn try_new_decimal128(value: i128, precision: u8, scale: i8) -> Result<Self> {
1174 Self::validate_decimal_or_internal_err::<Decimal128Type>(precision, scale)?;
1175 Ok(ScalarValue::Decimal128(Some(value), precision, scale))
1176 }
1177
1178 pub fn try_new_null(data_type: &DataType) -> Result<Self> {
1190 Ok(match data_type {
1191 DataType::Boolean => ScalarValue::Boolean(None),
1192 DataType::Float16 => ScalarValue::Float16(None),
1193 DataType::Float64 => ScalarValue::Float64(None),
1194 DataType::Float32 => ScalarValue::Float32(None),
1195 DataType::Int8 => ScalarValue::Int8(None),
1196 DataType::Int16 => ScalarValue::Int16(None),
1197 DataType::Int32 => ScalarValue::Int32(None),
1198 DataType::Int64 => ScalarValue::Int64(None),
1199 DataType::UInt8 => ScalarValue::UInt8(None),
1200 DataType::UInt16 => ScalarValue::UInt16(None),
1201 DataType::UInt32 => ScalarValue::UInt32(None),
1202 DataType::UInt64 => ScalarValue::UInt64(None),
1203 DataType::Decimal32(precision, scale) => {
1204 ScalarValue::Decimal32(None, *precision, *scale)
1205 }
1206 DataType::Decimal64(precision, scale) => {
1207 ScalarValue::Decimal64(None, *precision, *scale)
1208 }
1209 DataType::Decimal128(precision, scale) => {
1210 ScalarValue::Decimal128(None, *precision, *scale)
1211 }
1212 DataType::Decimal256(precision, scale) => {
1213 ScalarValue::Decimal256(None, *precision, *scale)
1214 }
1215 DataType::Utf8 => ScalarValue::Utf8(None),
1216 DataType::LargeUtf8 => ScalarValue::LargeUtf8(None),
1217 DataType::Utf8View => ScalarValue::Utf8View(None),
1218 DataType::Binary => ScalarValue::Binary(None),
1219 DataType::BinaryView => ScalarValue::BinaryView(None),
1220 DataType::FixedSizeBinary(len) => ScalarValue::FixedSizeBinary(*len, None),
1221 DataType::LargeBinary => ScalarValue::LargeBinary(None),
1222 DataType::Date32 => ScalarValue::Date32(None),
1223 DataType::Date64 => ScalarValue::Date64(None),
1224 DataType::Time32(TimeUnit::Second) => ScalarValue::Time32Second(None),
1225 DataType::Time32(TimeUnit::Millisecond) => {
1226 ScalarValue::Time32Millisecond(None)
1227 }
1228 DataType::Time64(TimeUnit::Microsecond) => {
1229 ScalarValue::Time64Microsecond(None)
1230 }
1231 DataType::Time64(TimeUnit::Nanosecond) => ScalarValue::Time64Nanosecond(None),
1232 DataType::Timestamp(TimeUnit::Second, tz_opt) => {
1233 ScalarValue::TimestampSecond(None, tz_opt.clone())
1234 }
1235 DataType::Timestamp(TimeUnit::Millisecond, tz_opt) => {
1236 ScalarValue::TimestampMillisecond(None, tz_opt.clone())
1237 }
1238 DataType::Timestamp(TimeUnit::Microsecond, tz_opt) => {
1239 ScalarValue::TimestampMicrosecond(None, tz_opt.clone())
1240 }
1241 DataType::Timestamp(TimeUnit::Nanosecond, tz_opt) => {
1242 ScalarValue::TimestampNanosecond(None, tz_opt.clone())
1243 }
1244 DataType::Interval(IntervalUnit::YearMonth) => {
1245 ScalarValue::IntervalYearMonth(None)
1246 }
1247 DataType::Interval(IntervalUnit::DayTime) => {
1248 ScalarValue::IntervalDayTime(None)
1249 }
1250 DataType::Interval(IntervalUnit::MonthDayNano) => {
1251 ScalarValue::IntervalMonthDayNano(None)
1252 }
1253 DataType::Duration(TimeUnit::Second) => ScalarValue::DurationSecond(None),
1254 DataType::Duration(TimeUnit::Millisecond) => {
1255 ScalarValue::DurationMillisecond(None)
1256 }
1257 DataType::Duration(TimeUnit::Microsecond) => {
1258 ScalarValue::DurationMicrosecond(None)
1259 }
1260 DataType::Duration(TimeUnit::Nanosecond) => {
1261 ScalarValue::DurationNanosecond(None)
1262 }
1263 DataType::Dictionary(index_type, value_type) => ScalarValue::Dictionary(
1264 index_type.clone(),
1265 Box::new(value_type.as_ref().try_into()?),
1266 ),
1267 DataType::RunEndEncoded(run_ends_field, value_field) => {
1268 ScalarValue::RunEndEncoded(
1269 Arc::clone(run_ends_field),
1270 Arc::clone(value_field),
1271 Box::new(value_field.data_type().try_into()?),
1272 )
1273 }
1274 DataType::List(field_ref) => ScalarValue::List(Arc::new(
1276 GenericListArray::new_null(Arc::clone(field_ref), 1),
1277 )),
1278 DataType::LargeList(field_ref) => ScalarValue::LargeList(Arc::new(
1280 GenericListArray::new_null(Arc::clone(field_ref), 1),
1281 )),
1282 DataType::FixedSizeList(field_ref, fixed_length) => {
1284 ScalarValue::FixedSizeList(Arc::new(FixedSizeListArray::new_null(
1285 Arc::clone(field_ref),
1286 *fixed_length,
1287 1,
1288 )))
1289 }
1290 DataType::Struct(fields) => ScalarValue::Struct(
1291 new_null_array(&DataType::Struct(fields.to_owned()), 1)
1292 .as_struct()
1293 .to_owned()
1294 .into(),
1295 ),
1296 DataType::Map(fields, sorted) => ScalarValue::Map(
1297 new_null_array(&DataType::Map(fields.to_owned(), sorted.to_owned()), 1)
1298 .as_map()
1299 .to_owned()
1300 .into(),
1301 ),
1302 DataType::Union(fields, mode) => {
1303 ScalarValue::Union(None, fields.clone(), *mode)
1304 }
1305 DataType::Null => ScalarValue::Null,
1306 _ => {
1307 return _not_impl_err!(
1308 "Can't create a null scalar from data_type \"{data_type}\""
1309 );
1310 }
1311 })
1312 }
1313
1314 pub fn new_utf8(val: impl Into<String>) -> Self {
1316 ScalarValue::from(val.into())
1317 }
1318
1319 pub fn new_utf8view(val: impl Into<String>) -> Self {
1321 ScalarValue::Utf8View(Some(val.into()))
1322 }
1323
1324 pub fn new_interval_ym(years: i32, months: i32) -> Self {
1327 let val = IntervalYearMonthType::make_value(years, months);
1328 ScalarValue::IntervalYearMonth(Some(val))
1329 }
1330
1331 pub fn new_interval_dt(days: i32, millis: i32) -> Self {
1334 let val = IntervalDayTimeType::make_value(days, millis);
1335 Self::IntervalDayTime(Some(val))
1336 }
1337
1338 pub fn new_interval_mdn(months: i32, days: i32, nanos: i64) -> Self {
1341 let val = IntervalMonthDayNanoType::make_value(months, days, nanos);
1342 ScalarValue::IntervalMonthDayNano(Some(val))
1343 }
1344
1345 pub fn new_timestamp<T: ArrowTimestampType>(
1348 value: Option<i64>,
1349 tz_opt: Option<Arc<str>>,
1350 ) -> Self {
1351 match T::UNIT {
1352 TimeUnit::Second => ScalarValue::TimestampSecond(value, tz_opt),
1353 TimeUnit::Millisecond => ScalarValue::TimestampMillisecond(value, tz_opt),
1354 TimeUnit::Microsecond => ScalarValue::TimestampMicrosecond(value, tz_opt),
1355 TimeUnit::Nanosecond => ScalarValue::TimestampNanosecond(value, tz_opt),
1356 }
1357 }
1358
1359 pub fn new_pi(datatype: &DataType) -> Result<ScalarValue> {
1361 match datatype {
1362 DataType::Float16 => Ok(ScalarValue::from(f16::PI)),
1363 DataType::Float32 => Ok(ScalarValue::from(std::f32::consts::PI)),
1364 DataType::Float64 => Ok(ScalarValue::from(std::f64::consts::PI)),
1365 _ => _internal_err!("PI is not supported for data type: {}", datatype),
1366 }
1367 }
1368
1369 pub fn new_pi_upper(datatype: &DataType) -> Result<ScalarValue> {
1371 match datatype {
1372 DataType::Float16 => Ok(ScalarValue::Float16(Some(consts::PI_UPPER_F16))),
1373 DataType::Float32 => Ok(ScalarValue::from(consts::PI_UPPER_F32)),
1374 DataType::Float64 => Ok(ScalarValue::from(consts::PI_UPPER_F64)),
1375 _ => {
1376 _internal_err!("PI_UPPER is not supported for data type: {}", datatype)
1377 }
1378 }
1379 }
1380
1381 pub fn new_negative_pi_lower(datatype: &DataType) -> Result<ScalarValue> {
1383 match datatype {
1384 DataType::Float16 => {
1385 Ok(ScalarValue::Float16(Some(consts::NEGATIVE_PI_LOWER_F16)))
1386 }
1387 DataType::Float32 => Ok(ScalarValue::from(consts::NEGATIVE_PI_LOWER_F32)),
1388 DataType::Float64 => Ok(ScalarValue::from(consts::NEGATIVE_PI_LOWER_F64)),
1389 _ => {
1390 _internal_err!("-PI_LOWER is not supported for data type: {}", datatype)
1391 }
1392 }
1393 }
1394
1395 pub fn new_frac_pi_2_upper(datatype: &DataType) -> Result<ScalarValue> {
1397 match datatype {
1398 DataType::Float16 => {
1399 Ok(ScalarValue::Float16(Some(consts::FRAC_PI_2_UPPER_F16)))
1400 }
1401 DataType::Float32 => Ok(ScalarValue::from(consts::FRAC_PI_2_UPPER_F32)),
1402 DataType::Float64 => Ok(ScalarValue::from(consts::FRAC_PI_2_UPPER_F64)),
1403 _ => {
1404 _internal_err!("PI_UPPER/2 is not supported for data type: {}", datatype)
1405 }
1406 }
1407 }
1408
1409 pub fn new_neg_frac_pi_2_lower(datatype: &DataType) -> Result<ScalarValue> {
1411 match datatype {
1412 DataType::Float16 => Ok(ScalarValue::Float16(Some(
1413 consts::NEGATIVE_FRAC_PI_2_LOWER_F16,
1414 ))),
1415 DataType::Float32 => {
1416 Ok(ScalarValue::from(consts::NEGATIVE_FRAC_PI_2_LOWER_F32))
1417 }
1418 DataType::Float64 => {
1419 Ok(ScalarValue::from(consts::NEGATIVE_FRAC_PI_2_LOWER_F64))
1420 }
1421 _ => {
1422 _internal_err!("-PI/2_LOWER is not supported for data type: {}", datatype)
1423 }
1424 }
1425 }
1426
1427 pub fn new_negative_pi(datatype: &DataType) -> Result<ScalarValue> {
1429 match datatype {
1430 DataType::Float16 => Ok(ScalarValue::from(-f16::PI)),
1431 DataType::Float32 => Ok(ScalarValue::from(-std::f32::consts::PI)),
1432 DataType::Float64 => Ok(ScalarValue::from(-std::f64::consts::PI)),
1433 _ => _internal_err!("-PI is not supported for data type: {}", datatype),
1434 }
1435 }
1436
1437 pub fn new_frac_pi_2(datatype: &DataType) -> Result<ScalarValue> {
1439 match datatype {
1440 DataType::Float16 => Ok(ScalarValue::from(f16::FRAC_PI_2)),
1441 DataType::Float32 => Ok(ScalarValue::from(std::f32::consts::FRAC_PI_2)),
1442 DataType::Float64 => Ok(ScalarValue::from(std::f64::consts::FRAC_PI_2)),
1443 _ => _internal_err!("PI/2 is not supported for data type: {}", datatype),
1444 }
1445 }
1446
1447 pub fn new_neg_frac_pi_2(datatype: &DataType) -> Result<ScalarValue> {
1449 match datatype {
1450 DataType::Float16 => Ok(ScalarValue::from(-f16::FRAC_PI_2)),
1451 DataType::Float32 => Ok(ScalarValue::from(-std::f32::consts::FRAC_PI_2)),
1452 DataType::Float64 => Ok(ScalarValue::from(-std::f64::consts::FRAC_PI_2)),
1453 _ => _internal_err!("-PI/2 is not supported for data type: {}", datatype),
1454 }
1455 }
1456
1457 pub fn new_infinity(datatype: &DataType) -> Result<ScalarValue> {
1459 match datatype {
1460 DataType::Float16 => Ok(ScalarValue::from(f16::INFINITY)),
1461 DataType::Float32 => Ok(ScalarValue::from(f32::INFINITY)),
1462 DataType::Float64 => Ok(ScalarValue::from(f64::INFINITY)),
1463 _ => {
1464 _internal_err!("Infinity is not supported for data type: {}", datatype)
1465 }
1466 }
1467 }
1468
1469 pub fn new_neg_infinity(datatype: &DataType) -> Result<ScalarValue> {
1471 match datatype {
1472 DataType::Float16 => Ok(ScalarValue::from(f16::NEG_INFINITY)),
1473 DataType::Float32 => Ok(ScalarValue::from(f32::NEG_INFINITY)),
1474 DataType::Float64 => Ok(ScalarValue::from(f64::NEG_INFINITY)),
1475 _ => {
1476 _internal_err!(
1477 "Negative Infinity is not supported for data type: {}",
1478 datatype
1479 )
1480 }
1481 }
1482 }
1483
1484 pub fn new_zero(datatype: &DataType) -> Result<ScalarValue> {
1486 Ok(match datatype {
1487 DataType::Boolean => ScalarValue::Boolean(Some(false)),
1488 DataType::Int8 => ScalarValue::Int8(Some(0)),
1489 DataType::Int16 => ScalarValue::Int16(Some(0)),
1490 DataType::Int32 => ScalarValue::Int32(Some(0)),
1491 DataType::Int64 => ScalarValue::Int64(Some(0)),
1492 DataType::UInt8 => ScalarValue::UInt8(Some(0)),
1493 DataType::UInt16 => ScalarValue::UInt16(Some(0)),
1494 DataType::UInt32 => ScalarValue::UInt32(Some(0)),
1495 DataType::UInt64 => ScalarValue::UInt64(Some(0)),
1496 DataType::Float16 => ScalarValue::Float16(Some(f16::ZERO)),
1497 DataType::Float32 => ScalarValue::Float32(Some(0.0)),
1498 DataType::Float64 => ScalarValue::Float64(Some(0.0)),
1499 DataType::Decimal32(precision, scale) => {
1500 ScalarValue::Decimal32(Some(0), *precision, *scale)
1501 }
1502 DataType::Decimal64(precision, scale) => {
1503 ScalarValue::Decimal64(Some(0), *precision, *scale)
1504 }
1505 DataType::Decimal128(precision, scale) => {
1506 ScalarValue::Decimal128(Some(0), *precision, *scale)
1507 }
1508 DataType::Decimal256(precision, scale) => {
1509 ScalarValue::Decimal256(Some(i256::ZERO), *precision, *scale)
1510 }
1511 DataType::Timestamp(TimeUnit::Second, tz) => {
1512 ScalarValue::TimestampSecond(Some(0), tz.clone())
1513 }
1514 DataType::Timestamp(TimeUnit::Millisecond, tz) => {
1515 ScalarValue::TimestampMillisecond(Some(0), tz.clone())
1516 }
1517 DataType::Timestamp(TimeUnit::Microsecond, tz) => {
1518 ScalarValue::TimestampMicrosecond(Some(0), tz.clone())
1519 }
1520 DataType::Timestamp(TimeUnit::Nanosecond, tz) => {
1521 ScalarValue::TimestampNanosecond(Some(0), tz.clone())
1522 }
1523 DataType::Time32(TimeUnit::Second) => ScalarValue::Time32Second(Some(0)),
1524 DataType::Time32(TimeUnit::Millisecond) => {
1525 ScalarValue::Time32Millisecond(Some(0))
1526 }
1527 DataType::Time64(TimeUnit::Microsecond) => {
1528 ScalarValue::Time64Microsecond(Some(0))
1529 }
1530 DataType::Time64(TimeUnit::Nanosecond) => {
1531 ScalarValue::Time64Nanosecond(Some(0))
1532 }
1533 DataType::Interval(IntervalUnit::YearMonth) => {
1534 ScalarValue::IntervalYearMonth(Some(0))
1535 }
1536 DataType::Interval(IntervalUnit::DayTime) => {
1537 ScalarValue::IntervalDayTime(Some(IntervalDayTime::ZERO))
1538 }
1539 DataType::Interval(IntervalUnit::MonthDayNano) => {
1540 ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano::ZERO))
1541 }
1542 DataType::Duration(TimeUnit::Second) => ScalarValue::DurationSecond(Some(0)),
1543 DataType::Duration(TimeUnit::Millisecond) => {
1544 ScalarValue::DurationMillisecond(Some(0))
1545 }
1546 DataType::Duration(TimeUnit::Microsecond) => {
1547 ScalarValue::DurationMicrosecond(Some(0))
1548 }
1549 DataType::Duration(TimeUnit::Nanosecond) => {
1550 ScalarValue::DurationNanosecond(Some(0))
1551 }
1552 DataType::Date32 => ScalarValue::Date32(Some(0)),
1553 DataType::Date64 => ScalarValue::Date64(Some(0)),
1554 _ => {
1555 return _not_impl_err!(
1556 "Can't create a zero scalar from data_type \"{datatype}\""
1557 );
1558 }
1559 })
1560 }
1561
1562 pub fn new_default(datatype: &DataType) -> Result<ScalarValue> {
1587 match datatype {
1588 DataType::Null => Ok(ScalarValue::Null),
1590
1591 DataType::Boolean
1593 | DataType::Int8
1594 | DataType::Int16
1595 | DataType::Int32
1596 | DataType::Int64
1597 | DataType::UInt8
1598 | DataType::UInt16
1599 | DataType::UInt32
1600 | DataType::UInt64
1601 | DataType::Float16
1602 | DataType::Float32
1603 | DataType::Float64
1604 | DataType::Decimal32(_, _)
1605 | DataType::Decimal64(_, _)
1606 | DataType::Decimal128(_, _)
1607 | DataType::Decimal256(_, _)
1608 | DataType::Timestamp(_, _)
1609 | DataType::Time32(_)
1610 | DataType::Time64(_)
1611 | DataType::Interval(_)
1612 | DataType::Duration(_)
1613 | DataType::Date32
1614 | DataType::Date64 => ScalarValue::new_zero(datatype),
1615
1616 DataType::Utf8 => Ok(ScalarValue::Utf8(Some("".to_string()))),
1618 DataType::LargeUtf8 => Ok(ScalarValue::LargeUtf8(Some("".to_string()))),
1619 DataType::Utf8View => Ok(ScalarValue::Utf8View(Some("".to_string()))),
1620
1621 DataType::Binary => Ok(ScalarValue::Binary(Some(vec![]))),
1623 DataType::LargeBinary => Ok(ScalarValue::LargeBinary(Some(vec![]))),
1624 DataType::BinaryView => Ok(ScalarValue::BinaryView(Some(vec![]))),
1625
1626 DataType::FixedSizeBinary(size) => Ok(ScalarValue::FixedSizeBinary(
1628 *size,
1629 Some(vec![0; *size as usize]),
1630 )),
1631
1632 DataType::List(field) => {
1634 let list =
1635 ScalarValue::new_list(&[], field.data_type(), field.is_nullable());
1636 Ok(ScalarValue::List(list))
1637 }
1638 DataType::FixedSizeList(field, _size) => {
1639 let empty_arr = new_empty_array(field.data_type());
1640 let values = Arc::new(
1641 SingleRowListArrayBuilder::new(empty_arr)
1642 .with_nullable(field.is_nullable())
1643 .build_fixed_size_list_array(0),
1644 );
1645 Ok(ScalarValue::FixedSizeList(values))
1646 }
1647 DataType::LargeList(field) => {
1648 let list = ScalarValue::new_large_list(&[], field.data_type());
1649 Ok(ScalarValue::LargeList(list))
1650 }
1651
1652 DataType::Struct(fields) => {
1654 let values = fields
1655 .iter()
1656 .map(|f| ScalarValue::new_default(f.data_type()))
1657 .collect::<Result<Vec<_>>>()?;
1658 Ok(ScalarValue::Struct(Arc::new(StructArray::new(
1659 fields.clone(),
1660 values
1661 .into_iter()
1662 .map(|v| v.to_array())
1663 .collect::<Result<_>>()?,
1664 None,
1665 ))))
1666 }
1667
1668 DataType::Dictionary(key_type, value_type) => Ok(ScalarValue::Dictionary(
1670 key_type.clone(),
1671 Box::new(ScalarValue::new_default(value_type)?),
1672 )),
1673
1674 DataType::RunEndEncoded(run_ends_field, value_field) => {
1675 Ok(ScalarValue::RunEndEncoded(
1676 Arc::clone(run_ends_field),
1677 Arc::clone(value_field),
1678 Box::new(ScalarValue::new_default(value_field.data_type())?),
1679 ))
1680 }
1681
1682 DataType::Map(field, _) => Ok(ScalarValue::Map(Arc::new(MapArray::from(
1684 ArrayData::new_empty(field.data_type()),
1685 )))),
1686
1687 DataType::Union(fields, mode) => {
1689 if let Some((type_id, field)) = fields.iter().next() {
1690 let default_value = ScalarValue::new_default(field.data_type())?;
1691 Ok(ScalarValue::Union(
1692 Some((type_id, Box::new(default_value))),
1693 fields.clone(),
1694 *mode,
1695 ))
1696 } else {
1697 _internal_err!("Union type must have at least one field")
1698 }
1699 }
1700
1701 DataType::ListView(_) | DataType::LargeListView(_) => {
1702 _not_impl_err!(
1703 "Default value for data_type \"{datatype}\" is not implemented yet"
1704 )
1705 }
1706 }
1707 }
1708
1709 pub fn new_one(datatype: &DataType) -> Result<ScalarValue> {
1711 Ok(match datatype {
1712 DataType::Int8 => ScalarValue::Int8(Some(1)),
1713 DataType::Int16 => ScalarValue::Int16(Some(1)),
1714 DataType::Int32 => ScalarValue::Int32(Some(1)),
1715 DataType::Int64 => ScalarValue::Int64(Some(1)),
1716 DataType::UInt8 => ScalarValue::UInt8(Some(1)),
1717 DataType::UInt16 => ScalarValue::UInt16(Some(1)),
1718 DataType::UInt32 => ScalarValue::UInt32(Some(1)),
1719 DataType::UInt64 => ScalarValue::UInt64(Some(1)),
1720 DataType::Float16 => ScalarValue::Float16(Some(f16::ONE)),
1721 DataType::Float32 => ScalarValue::Float32(Some(1.0)),
1722 DataType::Float64 => ScalarValue::Float64(Some(1.0)),
1723 DataType::Decimal32(precision, scale) => {
1724 Self::validate_decimal_or_internal_err::<Decimal32Type>(
1725 *precision, *scale,
1726 )?;
1727 assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
1728 match 10_i32.checked_pow(*scale as u32) {
1729 Some(value) => {
1730 ScalarValue::Decimal32(Some(value), *precision, *scale)
1731 }
1732 None => return _internal_err!("Unsupported scale {scale}"),
1733 }
1734 }
1735 DataType::Decimal64(precision, scale) => {
1736 Self::validate_decimal_or_internal_err::<Decimal64Type>(
1737 *precision, *scale,
1738 )?;
1739 assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
1740 match i64::from(10).checked_pow(*scale as u32) {
1741 Some(value) => {
1742 ScalarValue::Decimal64(Some(value), *precision, *scale)
1743 }
1744 None => return _internal_err!("Unsupported scale {scale}"),
1745 }
1746 }
1747 DataType::Decimal128(precision, scale) => {
1748 Self::validate_decimal_or_internal_err::<Decimal128Type>(
1749 *precision, *scale,
1750 )?;
1751 assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
1752 match i128::from(10).checked_pow(*scale as u32) {
1753 Some(value) => {
1754 ScalarValue::Decimal128(Some(value), *precision, *scale)
1755 }
1756 None => return _internal_err!("Unsupported scale {scale}"),
1757 }
1758 }
1759 DataType::Decimal256(precision, scale) => {
1760 Self::validate_decimal_or_internal_err::<Decimal256Type>(
1761 *precision, *scale,
1762 )?;
1763 assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
1764 match i256::from(10).checked_pow(*scale as u32) {
1765 Some(value) => {
1766 ScalarValue::Decimal256(Some(value), *precision, *scale)
1767 }
1768 None => return _internal_err!("Unsupported scale {scale}"),
1769 }
1770 }
1771 _ => {
1772 return _not_impl_err!(
1773 "Can't create an one scalar from data_type \"{datatype}\""
1774 );
1775 }
1776 })
1777 }
1778
1779 pub fn new_negative_one(datatype: &DataType) -> Result<ScalarValue> {
1781 Ok(match datatype {
1782 DataType::Int8 | DataType::UInt8 => ScalarValue::Int8(Some(-1)),
1783 DataType::Int16 | DataType::UInt16 => ScalarValue::Int16(Some(-1)),
1784 DataType::Int32 | DataType::UInt32 => ScalarValue::Int32(Some(-1)),
1785 DataType::Int64 | DataType::UInt64 => ScalarValue::Int64(Some(-1)),
1786 DataType::Float16 => ScalarValue::Float16(Some(f16::NEG_ONE)),
1787 DataType::Float32 => ScalarValue::Float32(Some(-1.0)),
1788 DataType::Float64 => ScalarValue::Float64(Some(-1.0)),
1789 DataType::Decimal32(precision, scale) => {
1790 Self::validate_decimal_or_internal_err::<Decimal32Type>(
1791 *precision, *scale,
1792 )?;
1793 assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
1794 match 10_i32.checked_pow(*scale as u32) {
1795 Some(value) => {
1796 ScalarValue::Decimal32(Some(-value), *precision, *scale)
1797 }
1798 None => return _internal_err!("Unsupported scale {scale}"),
1799 }
1800 }
1801 DataType::Decimal64(precision, scale) => {
1802 Self::validate_decimal_or_internal_err::<Decimal64Type>(
1803 *precision, *scale,
1804 )?;
1805 assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
1806 match i64::from(10).checked_pow(*scale as u32) {
1807 Some(value) => {
1808 ScalarValue::Decimal64(Some(-value), *precision, *scale)
1809 }
1810 None => return _internal_err!("Unsupported scale {scale}"),
1811 }
1812 }
1813 DataType::Decimal128(precision, scale) => {
1814 Self::validate_decimal_or_internal_err::<Decimal128Type>(
1815 *precision, *scale,
1816 )?;
1817 assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
1818 match i128::from(10).checked_pow(*scale as u32) {
1819 Some(value) => {
1820 ScalarValue::Decimal128(Some(-value), *precision, *scale)
1821 }
1822 None => return _internal_err!("Unsupported scale {scale}"),
1823 }
1824 }
1825 DataType::Decimal256(precision, scale) => {
1826 Self::validate_decimal_or_internal_err::<Decimal256Type>(
1827 *precision, *scale,
1828 )?;
1829 assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
1830 match i256::from(10).checked_pow(*scale as u32) {
1831 Some(value) => {
1832 ScalarValue::Decimal256(Some(-value), *precision, *scale)
1833 }
1834 None => return _internal_err!("Unsupported scale {scale}"),
1835 }
1836 }
1837 _ => {
1838 return _not_impl_err!(
1839 "Can't create a negative one scalar from data_type \"{datatype}\""
1840 );
1841 }
1842 })
1843 }
1844
1845 pub fn new_ten(datatype: &DataType) -> Result<ScalarValue> {
1846 Ok(match datatype {
1847 DataType::Int8 => ScalarValue::Int8(Some(10)),
1848 DataType::Int16 => ScalarValue::Int16(Some(10)),
1849 DataType::Int32 => ScalarValue::Int32(Some(10)),
1850 DataType::Int64 => ScalarValue::Int64(Some(10)),
1851 DataType::UInt8 => ScalarValue::UInt8(Some(10)),
1852 DataType::UInt16 => ScalarValue::UInt16(Some(10)),
1853 DataType::UInt32 => ScalarValue::UInt32(Some(10)),
1854 DataType::UInt64 => ScalarValue::UInt64(Some(10)),
1855 DataType::Float16 => ScalarValue::Float16(Some(f16::from_f32(10.0))),
1856 DataType::Float32 => ScalarValue::Float32(Some(10.0)),
1857 DataType::Float64 => ScalarValue::Float64(Some(10.0)),
1858 DataType::Decimal32(precision, scale) => {
1859 Self::validate_decimal_or_internal_err::<Decimal32Type>(
1860 *precision, *scale,
1861 )?;
1862 assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
1863 match 10_i32.checked_pow((*scale + 1) as u32) {
1864 Some(value) => {
1865 ScalarValue::Decimal32(Some(value), *precision, *scale)
1866 }
1867 None => return _internal_err!("Unsupported scale {scale}"),
1868 }
1869 }
1870 DataType::Decimal64(precision, scale) => {
1871 Self::validate_decimal_or_internal_err::<Decimal64Type>(
1872 *precision, *scale,
1873 )?;
1874 assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
1875 match i64::from(10).checked_pow((*scale + 1) as u32) {
1876 Some(value) => {
1877 ScalarValue::Decimal64(Some(value), *precision, *scale)
1878 }
1879 None => return _internal_err!("Unsupported scale {scale}"),
1880 }
1881 }
1882 DataType::Decimal128(precision, scale) => {
1883 Self::validate_decimal_or_internal_err::<Decimal128Type>(
1884 *precision, *scale,
1885 )?;
1886 assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
1887 match i128::from(10).checked_pow((*scale + 1) as u32) {
1888 Some(value) => {
1889 ScalarValue::Decimal128(Some(value), *precision, *scale)
1890 }
1891 None => return _internal_err!("Unsupported scale {scale}"),
1892 }
1893 }
1894 DataType::Decimal256(precision, scale) => {
1895 Self::validate_decimal_or_internal_err::<Decimal256Type>(
1896 *precision, *scale,
1897 )?;
1898 assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
1899 match i256::from(10).checked_pow((*scale + 1) as u32) {
1900 Some(value) => {
1901 ScalarValue::Decimal256(Some(value), *precision, *scale)
1902 }
1903 None => return _internal_err!("Unsupported scale {scale}"),
1904 }
1905 }
1906 _ => {
1907 return _not_impl_err!(
1908 "Can't create a ten scalar from data_type \"{datatype}\""
1909 );
1910 }
1911 })
1912 }
1913
1914 pub fn data_type(&self) -> DataType {
1916 match self {
1917 ScalarValue::Boolean(_) => DataType::Boolean,
1918 ScalarValue::UInt8(_) => DataType::UInt8,
1919 ScalarValue::UInt16(_) => DataType::UInt16,
1920 ScalarValue::UInt32(_) => DataType::UInt32,
1921 ScalarValue::UInt64(_) => DataType::UInt64,
1922 ScalarValue::Int8(_) => DataType::Int8,
1923 ScalarValue::Int16(_) => DataType::Int16,
1924 ScalarValue::Int32(_) => DataType::Int32,
1925 ScalarValue::Int64(_) => DataType::Int64,
1926 ScalarValue::Decimal32(_, precision, scale) => {
1927 DataType::Decimal32(*precision, *scale)
1928 }
1929 ScalarValue::Decimal64(_, precision, scale) => {
1930 DataType::Decimal64(*precision, *scale)
1931 }
1932 ScalarValue::Decimal128(_, precision, scale) => {
1933 DataType::Decimal128(*precision, *scale)
1934 }
1935 ScalarValue::Decimal256(_, precision, scale) => {
1936 DataType::Decimal256(*precision, *scale)
1937 }
1938 ScalarValue::TimestampSecond(_, tz_opt) => {
1939 DataType::Timestamp(TimeUnit::Second, tz_opt.clone())
1940 }
1941 ScalarValue::TimestampMillisecond(_, tz_opt) => {
1942 DataType::Timestamp(TimeUnit::Millisecond, tz_opt.clone())
1943 }
1944 ScalarValue::TimestampMicrosecond(_, tz_opt) => {
1945 DataType::Timestamp(TimeUnit::Microsecond, tz_opt.clone())
1946 }
1947 ScalarValue::TimestampNanosecond(_, tz_opt) => {
1948 DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone())
1949 }
1950 ScalarValue::Float16(_) => DataType::Float16,
1951 ScalarValue::Float32(_) => DataType::Float32,
1952 ScalarValue::Float64(_) => DataType::Float64,
1953 ScalarValue::Utf8(_) => DataType::Utf8,
1954 ScalarValue::LargeUtf8(_) => DataType::LargeUtf8,
1955 ScalarValue::Utf8View(_) => DataType::Utf8View,
1956 ScalarValue::Binary(_) => DataType::Binary,
1957 ScalarValue::BinaryView(_) => DataType::BinaryView,
1958 ScalarValue::FixedSizeBinary(sz, _) => DataType::FixedSizeBinary(*sz),
1959 ScalarValue::LargeBinary(_) => DataType::LargeBinary,
1960 ScalarValue::List(arr) => arr.data_type().to_owned(),
1961 ScalarValue::LargeList(arr) => arr.data_type().to_owned(),
1962 ScalarValue::FixedSizeList(arr) => arr.data_type().to_owned(),
1963 ScalarValue::Struct(arr) => arr.data_type().to_owned(),
1964 ScalarValue::Map(arr) => arr.data_type().to_owned(),
1965 ScalarValue::Date32(_) => DataType::Date32,
1966 ScalarValue::Date64(_) => DataType::Date64,
1967 ScalarValue::Time32Second(_) => DataType::Time32(TimeUnit::Second),
1968 ScalarValue::Time32Millisecond(_) => DataType::Time32(TimeUnit::Millisecond),
1969 ScalarValue::Time64Microsecond(_) => DataType::Time64(TimeUnit::Microsecond),
1970 ScalarValue::Time64Nanosecond(_) => DataType::Time64(TimeUnit::Nanosecond),
1971 ScalarValue::IntervalYearMonth(_) => {
1972 DataType::Interval(IntervalUnit::YearMonth)
1973 }
1974 ScalarValue::IntervalDayTime(_) => DataType::Interval(IntervalUnit::DayTime),
1975 ScalarValue::IntervalMonthDayNano(_) => {
1976 DataType::Interval(IntervalUnit::MonthDayNano)
1977 }
1978 ScalarValue::DurationSecond(_) => DataType::Duration(TimeUnit::Second),
1979 ScalarValue::DurationMillisecond(_) => {
1980 DataType::Duration(TimeUnit::Millisecond)
1981 }
1982 ScalarValue::DurationMicrosecond(_) => {
1983 DataType::Duration(TimeUnit::Microsecond)
1984 }
1985 ScalarValue::DurationNanosecond(_) => {
1986 DataType::Duration(TimeUnit::Nanosecond)
1987 }
1988 ScalarValue::Union(_, fields, mode) => DataType::Union(fields.clone(), *mode),
1989 ScalarValue::Dictionary(k, v) => {
1990 DataType::Dictionary(k.clone(), Box::new(v.data_type()))
1991 }
1992 ScalarValue::RunEndEncoded(run_ends_field, value_field, _) => {
1993 DataType::RunEndEncoded(
1994 Arc::clone(run_ends_field),
1995 Arc::clone(value_field),
1996 )
1997 }
1998 ScalarValue::Null => DataType::Null,
1999 }
2000 }
2001
2002 pub fn arithmetic_negate(&self) -> Result<Self> {
2004 fn neg_checked_with_ctx<T: ArrowNativeTypeOp>(
2005 v: T,
2006 ctx: impl Fn() -> String,
2007 ) -> Result<T> {
2008 v.neg_checked()
2009 .map_err(|e| arrow_datafusion_err!(e).context(ctx()))
2010 }
2011 match self {
2012 ScalarValue::Int8(None)
2013 | ScalarValue::Int16(None)
2014 | ScalarValue::Int32(None)
2015 | ScalarValue::Int64(None)
2016 | ScalarValue::Float16(None)
2017 | ScalarValue::Float32(None)
2018 | ScalarValue::Float64(None) => Ok(self.clone()),
2019 ScalarValue::Float16(Some(v)) => Ok(ScalarValue::Float16(Some(-v))),
2020 ScalarValue::Float64(Some(v)) => Ok(ScalarValue::Float64(Some(-v))),
2021 ScalarValue::Float32(Some(v)) => Ok(ScalarValue::Float32(Some(-v))),
2022 ScalarValue::Int8(Some(v)) => Ok(ScalarValue::Int8(Some(v.neg_checked()?))),
2023 ScalarValue::Int16(Some(v)) => Ok(ScalarValue::Int16(Some(v.neg_checked()?))),
2024 ScalarValue::Int32(Some(v)) => Ok(ScalarValue::Int32(Some(v.neg_checked()?))),
2025 ScalarValue::Int64(Some(v)) => Ok(ScalarValue::Int64(Some(v.neg_checked()?))),
2026 ScalarValue::IntervalYearMonth(Some(v)) => Ok(
2027 ScalarValue::IntervalYearMonth(Some(neg_checked_with_ctx(*v, || {
2028 format!("In negation of IntervalYearMonth({v})")
2029 })?)),
2030 ),
2031 ScalarValue::IntervalDayTime(Some(v)) => {
2032 let (days, ms) = IntervalDayTimeType::to_parts(*v);
2033 let val = IntervalDayTimeType::make_value(
2034 neg_checked_with_ctx(days, || {
2035 format!("In negation of days {days} in IntervalDayTime")
2036 })?,
2037 neg_checked_with_ctx(ms, || {
2038 format!("In negation of milliseconds {ms} in IntervalDayTime")
2039 })?,
2040 );
2041 Ok(ScalarValue::IntervalDayTime(Some(val)))
2042 }
2043 ScalarValue::IntervalMonthDayNano(Some(v)) => {
2044 let (months, days, nanos) = IntervalMonthDayNanoType::to_parts(*v);
2045 let val = IntervalMonthDayNanoType::make_value(
2046 neg_checked_with_ctx(months, || {
2047 format!("In negation of months {months} of IntervalMonthDayNano")
2048 })?,
2049 neg_checked_with_ctx(days, || {
2050 format!("In negation of days {days} of IntervalMonthDayNano")
2051 })?,
2052 neg_checked_with_ctx(nanos, || {
2053 format!("In negation of nanos {nanos} of IntervalMonthDayNano")
2054 })?,
2055 );
2056 Ok(ScalarValue::IntervalMonthDayNano(Some(val)))
2057 }
2058 ScalarValue::Decimal32(Some(v), precision, scale) => {
2059 Ok(ScalarValue::Decimal32(
2060 Some(neg_checked_with_ctx(*v, || {
2061 format!("In negation of Decimal32({v}, {precision}, {scale})")
2062 })?),
2063 *precision,
2064 *scale,
2065 ))
2066 }
2067 ScalarValue::Decimal64(Some(v), precision, scale) => {
2068 Ok(ScalarValue::Decimal64(
2069 Some(neg_checked_with_ctx(*v, || {
2070 format!("In negation of Decimal64({v}, {precision}, {scale})")
2071 })?),
2072 *precision,
2073 *scale,
2074 ))
2075 }
2076 ScalarValue::Decimal128(Some(v), precision, scale) => {
2077 Ok(ScalarValue::Decimal128(
2078 Some(neg_checked_with_ctx(*v, || {
2079 format!("In negation of Decimal128({v}, {precision}, {scale})")
2080 })?),
2081 *precision,
2082 *scale,
2083 ))
2084 }
2085 ScalarValue::Decimal256(Some(v), precision, scale) => {
2086 Ok(ScalarValue::Decimal256(
2087 Some(neg_checked_with_ctx(*v, || {
2088 format!("In negation of Decimal256({v}, {precision}, {scale})")
2089 })?),
2090 *precision,
2091 *scale,
2092 ))
2093 }
2094 ScalarValue::TimestampSecond(Some(v), tz) => {
2095 Ok(ScalarValue::TimestampSecond(
2096 Some(neg_checked_with_ctx(*v, || {
2097 format!("In negation of TimestampSecond({v})")
2098 })?),
2099 tz.clone(),
2100 ))
2101 }
2102 ScalarValue::TimestampNanosecond(Some(v), tz) => {
2103 Ok(ScalarValue::TimestampNanosecond(
2104 Some(neg_checked_with_ctx(*v, || {
2105 format!("In negation of TimestampNanoSecond({v})")
2106 })?),
2107 tz.clone(),
2108 ))
2109 }
2110 ScalarValue::TimestampMicrosecond(Some(v), tz) => {
2111 Ok(ScalarValue::TimestampMicrosecond(
2112 Some(neg_checked_with_ctx(*v, || {
2113 format!("In negation of TimestampMicroSecond({v})")
2114 })?),
2115 tz.clone(),
2116 ))
2117 }
2118 ScalarValue::TimestampMillisecond(Some(v), tz) => {
2119 Ok(ScalarValue::TimestampMillisecond(
2120 Some(neg_checked_with_ctx(*v, || {
2121 format!("In negation of TimestampMilliSecond({v})")
2122 })?),
2123 tz.clone(),
2124 ))
2125 }
2126 value => _internal_err!(
2127 "Can not run arithmetic negative on scalar value {value:?}"
2128 ),
2129 }
2130 }
2131
2132 pub fn add<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
2137 let r = add_wrapping(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
2138 Self::try_from_array(r.as_ref(), 0)
2139 }
2140
2141 pub fn add_checked<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
2146 let r = add(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
2147 Self::try_from_array(r.as_ref(), 0)
2148 }
2149
2150 pub fn sub<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
2155 let r = sub_wrapping(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
2156 Self::try_from_array(r.as_ref(), 0)
2157 }
2158
2159 pub fn sub_checked<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
2164 let r = sub(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
2165 Self::try_from_array(r.as_ref(), 0)
2166 }
2167
2168 pub fn mul<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
2173 let r = mul_wrapping(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
2174 Self::try_from_array(r.as_ref(), 0)
2175 }
2176
2177 pub fn mul_checked<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
2182 let r = mul(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
2183 Self::try_from_array(r.as_ref(), 0)
2184 }
2185
2186 pub fn div<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
2194 let r = div(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
2195 Self::try_from_array(r.as_ref(), 0)
2196 }
2197
2198 pub fn rem<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
2206 let r = rem(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
2207 Self::try_from_array(r.as_ref(), 0)
2208 }
2209
2210 pub fn is_unsigned(&self) -> bool {
2211 matches!(
2212 self,
2213 ScalarValue::UInt8(_)
2214 | ScalarValue::UInt16(_)
2215 | ScalarValue::UInt32(_)
2216 | ScalarValue::UInt64(_)
2217 )
2218 }
2219
2220 pub fn is_null(&self) -> bool {
2222 match self {
2223 ScalarValue::Boolean(v) => v.is_none(),
2224 ScalarValue::Null => true,
2225 ScalarValue::Float16(v) => v.is_none(),
2226 ScalarValue::Float32(v) => v.is_none(),
2227 ScalarValue::Float64(v) => v.is_none(),
2228 ScalarValue::Decimal32(v, _, _) => v.is_none(),
2229 ScalarValue::Decimal64(v, _, _) => v.is_none(),
2230 ScalarValue::Decimal128(v, _, _) => v.is_none(),
2231 ScalarValue::Decimal256(v, _, _) => v.is_none(),
2232 ScalarValue::Int8(v) => v.is_none(),
2233 ScalarValue::Int16(v) => v.is_none(),
2234 ScalarValue::Int32(v) => v.is_none(),
2235 ScalarValue::Int64(v) => v.is_none(),
2236 ScalarValue::UInt8(v) => v.is_none(),
2237 ScalarValue::UInt16(v) => v.is_none(),
2238 ScalarValue::UInt32(v) => v.is_none(),
2239 ScalarValue::UInt64(v) => v.is_none(),
2240 ScalarValue::Utf8(v)
2241 | ScalarValue::Utf8View(v)
2242 | ScalarValue::LargeUtf8(v) => v.is_none(),
2243 ScalarValue::Binary(v)
2244 | ScalarValue::BinaryView(v)
2245 | ScalarValue::FixedSizeBinary(_, v)
2246 | ScalarValue::LargeBinary(v) => v.is_none(),
2247 ScalarValue::List(arr) => arr.len() == arr.null_count(),
2250 ScalarValue::LargeList(arr) => arr.len() == arr.null_count(),
2251 ScalarValue::FixedSizeList(arr) => arr.len() == arr.null_count(),
2252 ScalarValue::Struct(arr) => arr.len() == arr.null_count(),
2253 ScalarValue::Map(arr) => arr.len() == arr.null_count(),
2254 ScalarValue::Date32(v) => v.is_none(),
2255 ScalarValue::Date64(v) => v.is_none(),
2256 ScalarValue::Time32Second(v) => v.is_none(),
2257 ScalarValue::Time32Millisecond(v) => v.is_none(),
2258 ScalarValue::Time64Microsecond(v) => v.is_none(),
2259 ScalarValue::Time64Nanosecond(v) => v.is_none(),
2260 ScalarValue::TimestampSecond(v, _) => v.is_none(),
2261 ScalarValue::TimestampMillisecond(v, _) => v.is_none(),
2262 ScalarValue::TimestampMicrosecond(v, _) => v.is_none(),
2263 ScalarValue::TimestampNanosecond(v, _) => v.is_none(),
2264 ScalarValue::IntervalYearMonth(v) => v.is_none(),
2265 ScalarValue::IntervalDayTime(v) => v.is_none(),
2266 ScalarValue::IntervalMonthDayNano(v) => v.is_none(),
2267 ScalarValue::DurationSecond(v) => v.is_none(),
2268 ScalarValue::DurationMillisecond(v) => v.is_none(),
2269 ScalarValue::DurationMicrosecond(v) => v.is_none(),
2270 ScalarValue::DurationNanosecond(v) => v.is_none(),
2271 ScalarValue::Union(v, _, _) => match v {
2272 Some((_, s)) => s.is_null(),
2273 None => true,
2274 },
2275 ScalarValue::Dictionary(_, v) => v.is_null(),
2276 ScalarValue::RunEndEncoded(_, _, v) => v.is_null(),
2277 }
2278 }
2279
2280 pub fn distance(&self, other: &ScalarValue) -> Option<usize> {
2288 match (self, other) {
2289 (Self::Int8(Some(l)), Self::Int8(Some(r))) => Some(l.abs_diff(*r) as _),
2290 (Self::Int16(Some(l)), Self::Int16(Some(r))) => Some(l.abs_diff(*r) as _),
2291 (Self::Int32(Some(l)), Self::Int32(Some(r))) => Some(l.abs_diff(*r) as _),
2292 (Self::Int64(Some(l)), Self::Int64(Some(r))) => Some(l.abs_diff(*r) as _),
2293 (Self::UInt8(Some(l)), Self::UInt8(Some(r))) => Some(l.abs_diff(*r) as _),
2294 (Self::UInt16(Some(l)), Self::UInt16(Some(r))) => Some(l.abs_diff(*r) as _),
2295 (Self::UInt32(Some(l)), Self::UInt32(Some(r))) => Some(l.abs_diff(*r) as _),
2296 (Self::UInt64(Some(l)), Self::UInt64(Some(r))) => Some(l.abs_diff(*r) as _),
2297 (Self::Float16(Some(l)), Self::Float16(Some(r))) => {
2299 Some((f16::to_f32(*l) - f16::to_f32(*r)).abs().round() as _)
2300 }
2301 (Self::Float32(Some(l)), Self::Float32(Some(r))) => {
2302 Some((l - r).abs().round() as _)
2303 }
2304 (Self::Float64(Some(l)), Self::Float64(Some(r))) => {
2305 Some((l - r).abs().round() as _)
2306 }
2307 (
2308 Self::Decimal128(Some(l), lprecision, lscale),
2309 Self::Decimal128(Some(r), rprecision, rscale),
2310 ) => {
2311 if lprecision == rprecision && lscale == rscale {
2312 l.checked_sub(*r)?.checked_abs()?.to_usize()
2313 } else {
2314 None
2315 }
2316 }
2317 (
2318 Self::Decimal256(Some(l), lprecision, lscale),
2319 Self::Decimal256(Some(r), rprecision, rscale),
2320 ) => {
2321 if lprecision == rprecision && lscale == rscale {
2322 l.checked_sub(*r)?.checked_abs()?.to_usize()
2323 } else {
2324 None
2325 }
2326 }
2327 _ => None,
2328 }
2329 }
2330
2331 pub fn to_array(&self) -> Result<ArrayRef> {
2337 self.to_array_of_size(1)
2338 }
2339
2340 pub fn to_scalar(&self) -> Result<Scalar<ArrayRef>> {
2366 Ok(Scalar::new(self.to_array_of_size(1)?))
2367 }
2368
2369 pub fn iter_to_array(
2396 scalars: impl IntoIterator<Item = ScalarValue>,
2397 ) -> Result<ArrayRef> {
2398 let mut scalars = scalars.into_iter().peekable();
2399
2400 let data_type = match scalars.peek() {
2402 None => {
2403 return _exec_err!("Empty iterator passed to ScalarValue::iter_to_array");
2404 }
2405 Some(sv) => sv.data_type(),
2406 };
2407
2408 macro_rules! build_array_primitive {
2411 ($ARRAY_TY:ident, $SCALAR_TY:ident) => {{
2412 {
2413 let array = scalars
2414 .map(|sv| {
2415 if let ScalarValue::$SCALAR_TY(v) = sv {
2416 Ok(v)
2417 } else {
2418 _exec_err!(
2419 "Inconsistent types in ScalarValue::iter_to_array. \
2420 Expected {:?}, got {:?}",
2421 data_type,
2422 sv
2423 )
2424 }
2425 })
2426 .collect::<Result<$ARRAY_TY>>()?;
2427 Arc::new(array)
2428 }
2429 }};
2430 }
2431
2432 macro_rules! build_array_primitive_tz {
2433 ($ARRAY_TY:ident, $SCALAR_TY:ident, $TZ:expr) => {{
2434 {
2435 let array = scalars
2436 .map(|sv| {
2437 if let ScalarValue::$SCALAR_TY(v, _) = sv {
2438 Ok(v)
2439 } else {
2440 _exec_err!(
2441 "Inconsistent types in ScalarValue::iter_to_array. \
2442 Expected {:?}, got {:?}",
2443 data_type,
2444 sv
2445 )
2446 }
2447 })
2448 .collect::<Result<$ARRAY_TY>>()?;
2449 Arc::new(array.with_timezone_opt($TZ.clone()))
2450 }
2451 }};
2452 }
2453
2454 macro_rules! build_array_string {
2457 ($ARRAY_TY:ident, $SCALAR_TY:ident) => {{
2458 {
2459 let array = scalars
2460 .map(|sv| {
2461 if let ScalarValue::$SCALAR_TY(v) = sv {
2462 Ok(v)
2463 } else {
2464 _exec_err!(
2465 "Inconsistent types in ScalarValue::iter_to_array. \
2466 Expected {:?}, got {:?}",
2467 data_type,
2468 sv
2469 )
2470 }
2471 })
2472 .collect::<Result<$ARRAY_TY>>()?;
2473 Arc::new(array)
2474 }
2475 }};
2476 }
2477
2478 let array: ArrayRef = match &data_type {
2479 DataType::Decimal32(precision, scale) => {
2480 let decimal_array =
2481 ScalarValue::iter_to_decimal32_array(scalars, *precision, *scale)?;
2482 Arc::new(decimal_array)
2483 }
2484 DataType::Decimal64(precision, scale) => {
2485 let decimal_array =
2486 ScalarValue::iter_to_decimal64_array(scalars, *precision, *scale)?;
2487 Arc::new(decimal_array)
2488 }
2489 DataType::Decimal128(precision, scale) => {
2490 let decimal_array =
2491 ScalarValue::iter_to_decimal128_array(scalars, *precision, *scale)?;
2492 Arc::new(decimal_array)
2493 }
2494 DataType::Decimal256(precision, scale) => {
2495 let decimal_array =
2496 ScalarValue::iter_to_decimal256_array(scalars, *precision, *scale)?;
2497 Arc::new(decimal_array)
2498 }
2499 DataType::Null => ScalarValue::iter_to_null_array(scalars)?,
2500 DataType::Boolean => build_array_primitive!(BooleanArray, Boolean),
2501 DataType::Float16 => build_array_primitive!(Float16Array, Float16),
2502 DataType::Float32 => build_array_primitive!(Float32Array, Float32),
2503 DataType::Float64 => build_array_primitive!(Float64Array, Float64),
2504 DataType::Int8 => build_array_primitive!(Int8Array, Int8),
2505 DataType::Int16 => build_array_primitive!(Int16Array, Int16),
2506 DataType::Int32 => build_array_primitive!(Int32Array, Int32),
2507 DataType::Int64 => build_array_primitive!(Int64Array, Int64),
2508 DataType::UInt8 => build_array_primitive!(UInt8Array, UInt8),
2509 DataType::UInt16 => build_array_primitive!(UInt16Array, UInt16),
2510 DataType::UInt32 => build_array_primitive!(UInt32Array, UInt32),
2511 DataType::UInt64 => build_array_primitive!(UInt64Array, UInt64),
2512 DataType::Utf8View => build_array_string!(StringViewArray, Utf8View),
2513 DataType::Utf8 => build_array_string!(StringArray, Utf8),
2514 DataType::LargeUtf8 => build_array_string!(LargeStringArray, LargeUtf8),
2515 DataType::BinaryView => build_array_string!(BinaryViewArray, BinaryView),
2516 DataType::Binary => build_array_string!(BinaryArray, Binary),
2517 DataType::LargeBinary => build_array_string!(LargeBinaryArray, LargeBinary),
2518 DataType::Date32 => build_array_primitive!(Date32Array, Date32),
2519 DataType::Date64 => build_array_primitive!(Date64Array, Date64),
2520 DataType::Time32(TimeUnit::Second) => {
2521 build_array_primitive!(Time32SecondArray, Time32Second)
2522 }
2523 DataType::Time32(TimeUnit::Millisecond) => {
2524 build_array_primitive!(Time32MillisecondArray, Time32Millisecond)
2525 }
2526 DataType::Time64(TimeUnit::Microsecond) => {
2527 build_array_primitive!(Time64MicrosecondArray, Time64Microsecond)
2528 }
2529 DataType::Time64(TimeUnit::Nanosecond) => {
2530 build_array_primitive!(Time64NanosecondArray, Time64Nanosecond)
2531 }
2532 DataType::Timestamp(TimeUnit::Second, tz) => {
2533 build_array_primitive_tz!(TimestampSecondArray, TimestampSecond, tz)
2534 }
2535 DataType::Timestamp(TimeUnit::Millisecond, tz) => {
2536 build_array_primitive_tz!(
2537 TimestampMillisecondArray,
2538 TimestampMillisecond,
2539 tz
2540 )
2541 }
2542 DataType::Timestamp(TimeUnit::Microsecond, tz) => {
2543 build_array_primitive_tz!(
2544 TimestampMicrosecondArray,
2545 TimestampMicrosecond,
2546 tz
2547 )
2548 }
2549 DataType::Timestamp(TimeUnit::Nanosecond, tz) => {
2550 build_array_primitive_tz!(
2551 TimestampNanosecondArray,
2552 TimestampNanosecond,
2553 tz
2554 )
2555 }
2556 DataType::Duration(TimeUnit::Second) => {
2557 build_array_primitive!(DurationSecondArray, DurationSecond)
2558 }
2559 DataType::Duration(TimeUnit::Millisecond) => {
2560 build_array_primitive!(DurationMillisecondArray, DurationMillisecond)
2561 }
2562 DataType::Duration(TimeUnit::Microsecond) => {
2563 build_array_primitive!(DurationMicrosecondArray, DurationMicrosecond)
2564 }
2565 DataType::Duration(TimeUnit::Nanosecond) => {
2566 build_array_primitive!(DurationNanosecondArray, DurationNanosecond)
2567 }
2568 DataType::Interval(IntervalUnit::DayTime) => {
2569 build_array_primitive!(IntervalDayTimeArray, IntervalDayTime)
2570 }
2571 DataType::Interval(IntervalUnit::YearMonth) => {
2572 build_array_primitive!(IntervalYearMonthArray, IntervalYearMonth)
2573 }
2574 DataType::Interval(IntervalUnit::MonthDayNano) => {
2575 build_array_primitive!(IntervalMonthDayNanoArray, IntervalMonthDayNano)
2576 }
2577 DataType::FixedSizeList(_, _) => {
2578 let mut arrays =
2582 scalars.map(|s| s.to_array()).collect::<Result<Vec<_>>>()?;
2583 let first_non_null_data_type = arrays
2584 .iter()
2585 .find(|sv| !sv.is_null(0))
2586 .map(|sv| sv.data_type().to_owned());
2587 if let Some(DataType::FixedSizeList(f, l)) = first_non_null_data_type {
2588 for array in arrays.iter_mut() {
2589 if array.is_null(0) {
2590 *array = Arc::new(FixedSizeListArray::new_null(
2591 Arc::clone(&f),
2592 l,
2593 1,
2594 ));
2595 }
2596 }
2597 }
2598 let arrays = arrays.iter().map(|a| a.as_ref()).collect::<Vec<_>>();
2599 arrow::compute::concat(arrays.as_slice())?
2600 }
2601 DataType::List(_)
2602 | DataType::LargeList(_)
2603 | DataType::Map(_, _)
2604 | DataType::Struct(_)
2605 | DataType::Union(_, _) => {
2606 let arrays = scalars.map(|s| s.to_array()).collect::<Result<Vec<_>>>()?;
2607 let arrays = arrays.iter().map(|a| a.as_ref()).collect::<Vec<_>>();
2608 arrow::compute::concat(arrays.as_slice())?
2609 }
2610 DataType::Dictionary(key_type, value_type) => {
2611 let value_scalars = scalars
2613 .map(|scalar| match scalar {
2614 ScalarValue::Dictionary(inner_key_type, scalar) => {
2615 if &inner_key_type == key_type {
2616 Ok(*scalar)
2617 } else {
2618 _exec_err!("Expected inner key type of {key_type} but found: {inner_key_type}, value was ({scalar:?})")
2619 }
2620 }
2621 _ => {
2622 _exec_err!(
2623 "Expected scalar of type {value_type} but found: {scalar} {scalar:?}"
2624 )
2625 }
2626 })
2627 .collect::<Result<Vec<_>>>()?;
2628
2629 let values = Self::iter_to_array(value_scalars)?;
2630 assert_eq!(values.data_type(), value_type.as_ref());
2631
2632 match key_type.as_ref() {
2633 DataType::Int8 => dict_from_values::<Int8Type>(values)?,
2634 DataType::Int16 => dict_from_values::<Int16Type>(values)?,
2635 DataType::Int32 => dict_from_values::<Int32Type>(values)?,
2636 DataType::Int64 => dict_from_values::<Int64Type>(values)?,
2637 DataType::UInt8 => dict_from_values::<UInt8Type>(values)?,
2638 DataType::UInt16 => dict_from_values::<UInt16Type>(values)?,
2639 DataType::UInt32 => dict_from_values::<UInt32Type>(values)?,
2640 DataType::UInt64 => dict_from_values::<UInt64Type>(values)?,
2641 _ => unreachable!("Invalid dictionary keys type: {}", key_type),
2642 }
2643 }
2644 DataType::RunEndEncoded(run_ends_field, value_field) => {
2645 fn make_run_array<R: RunEndIndexType>(
2646 scalars: impl IntoIterator<Item = ScalarValue>,
2647 run_ends_field: &FieldRef,
2648 values_field: &FieldRef,
2649 ) -> Result<ArrayRef> {
2650 let mut scalars = scalars.into_iter();
2651
2652 let mut run_ends = vec![];
2653 let mut value_scalars = vec![];
2654
2655 let mut len = R::Native::ONE;
2656 let mut current =
2657 if let Some(ScalarValue::RunEndEncoded(_, _, scalar)) =
2658 scalars.next()
2659 {
2660 *scalar
2661 } else {
2662 unreachable!()
2665 };
2666 for scalar in scalars {
2667 let scalar = match scalar {
2668 ScalarValue::RunEndEncoded(
2669 inner_run_ends_field,
2670 inner_value_field,
2671 scalar,
2672 ) if &inner_run_ends_field == run_ends_field
2673 && &inner_value_field == values_field =>
2674 {
2675 *scalar
2676 }
2677 _ => {
2678 return _exec_err!(
2679 "Expected RunEndEncoded scalar with run-ends field {run_ends_field} but got: {scalar:?}"
2680 );
2681 }
2682 };
2683
2684 if scalar != current {
2686 run_ends.push(len);
2687 value_scalars.push(current);
2688 current = scalar;
2689 }
2690
2691 len = len.add_checked(R::Native::ONE).map_err(|_| {
2692 DataFusionError::Execution(format!(
2693 "Cannot construct RunArray: Overflows run-ends type {}",
2694 run_ends_field.data_type()
2695 ))
2696 })?;
2697 }
2698
2699 run_ends.push(len);
2700 value_scalars.push(current);
2701
2702 let run_ends = PrimitiveArray::<R>::from_iter_values(run_ends);
2703 let values = ScalarValue::iter_to_array(value_scalars)?;
2704
2705 let dt = DataType::RunEndEncoded(
2707 Arc::clone(run_ends_field),
2708 Arc::clone(values_field),
2709 );
2710 let builder = ArrayDataBuilder::new(dt)
2711 .len(RunArray::logical_len(&run_ends))
2712 .add_child_data(run_ends.to_data())
2713 .add_child_data(values.to_data());
2714 let run_array = RunArray::<R>::from(builder.build()?);
2715
2716 Ok(Arc::new(run_array))
2717 }
2718
2719 match run_ends_field.data_type() {
2720 DataType::Int16 => {
2721 make_run_array::<Int16Type>(scalars, run_ends_field, value_field)?
2722 }
2723 DataType::Int32 => {
2724 make_run_array::<Int32Type>(scalars, run_ends_field, value_field)?
2725 }
2726 DataType::Int64 => {
2727 make_run_array::<Int64Type>(scalars, run_ends_field, value_field)?
2728 }
2729 dt => unreachable!("Invalid run-ends type: {dt}"),
2730 }
2731 }
2732 DataType::FixedSizeBinary(size) => {
2733 let array = scalars
2734 .map(|sv| {
2735 if let ScalarValue::FixedSizeBinary(_, v) = sv {
2736 Ok(v)
2737 } else {
2738 _exec_err!(
2739 "Inconsistent types in ScalarValue::iter_to_array. \
2740 Expected {data_type}, got {sv:?}"
2741 )
2742 }
2743 })
2744 .collect::<Result<Vec<_>>>()?;
2745 let array = FixedSizeBinaryArray::try_from_sparse_iter_with_size(
2746 array.into_iter(),
2747 *size,
2748 )?;
2749 Arc::new(array)
2750 }
2751 DataType::Time32(TimeUnit::Microsecond)
2757 | DataType::Time32(TimeUnit::Nanosecond)
2758 | DataType::Time64(TimeUnit::Second)
2759 | DataType::Time64(TimeUnit::Millisecond)
2760 | DataType::ListView(_)
2761 | DataType::LargeListView(_) => {
2762 return _not_impl_err!(
2763 "Unsupported creation of {:?} array from ScalarValue {:?}",
2764 data_type,
2765 scalars.peek()
2766 );
2767 }
2768 };
2769 Ok(array)
2770 }
2771
2772 fn iter_to_null_array(
2773 scalars: impl IntoIterator<Item = ScalarValue>,
2774 ) -> Result<ArrayRef> {
2775 let length = scalars.into_iter().try_fold(
2776 0usize,
2777 |r, element: ScalarValue| match element {
2778 ScalarValue::Null => Ok::<usize, DataFusionError>(r + 1),
2779 s => {
2780 _internal_err!("Expected ScalarValue::Null element. Received {s:?}")
2781 }
2782 },
2783 )?;
2784 Ok(new_null_array(&DataType::Null, length))
2785 }
2786
2787 fn iter_to_decimal32_array(
2788 scalars: impl IntoIterator<Item = ScalarValue>,
2789 precision: u8,
2790 scale: i8,
2791 ) -> Result<Decimal32Array> {
2792 let array = scalars
2793 .into_iter()
2794 .map(|element: ScalarValue| match element {
2795 ScalarValue::Decimal32(v1, _, _) => Ok(v1),
2796 s => {
2797 _internal_err!("Expected ScalarValue::Null element. Received {s:?}")
2798 }
2799 })
2800 .collect::<Result<Decimal32Array>>()?
2801 .with_precision_and_scale(precision, scale)?;
2802 Ok(array)
2803 }
2804
2805 fn iter_to_decimal64_array(
2806 scalars: impl IntoIterator<Item = ScalarValue>,
2807 precision: u8,
2808 scale: i8,
2809 ) -> Result<Decimal64Array> {
2810 let array = scalars
2811 .into_iter()
2812 .map(|element: ScalarValue| match element {
2813 ScalarValue::Decimal64(v1, _, _) => Ok(v1),
2814 s => {
2815 _internal_err!("Expected ScalarValue::Null element. Received {s:?}")
2816 }
2817 })
2818 .collect::<Result<Decimal64Array>>()?
2819 .with_precision_and_scale(precision, scale)?;
2820 Ok(array)
2821 }
2822
2823 fn iter_to_decimal128_array(
2824 scalars: impl IntoIterator<Item = ScalarValue>,
2825 precision: u8,
2826 scale: i8,
2827 ) -> Result<Decimal128Array> {
2828 let array = scalars
2829 .into_iter()
2830 .map(|element: ScalarValue| match element {
2831 ScalarValue::Decimal128(v1, _, _) => Ok(v1),
2832 s => {
2833 _internal_err!("Expected ScalarValue::Null element. Received {s:?}")
2834 }
2835 })
2836 .collect::<Result<Decimal128Array>>()?
2837 .with_precision_and_scale(precision, scale)?;
2838 Ok(array)
2839 }
2840
2841 fn iter_to_decimal256_array(
2842 scalars: impl IntoIterator<Item = ScalarValue>,
2843 precision: u8,
2844 scale: i8,
2845 ) -> Result<Decimal256Array> {
2846 let array = scalars
2847 .into_iter()
2848 .map(|element: ScalarValue| match element {
2849 ScalarValue::Decimal256(v1, _, _) => Ok(v1),
2850 s => {
2851 _internal_err!(
2852 "Expected ScalarValue::Decimal256 element. Received {s:?}"
2853 )
2854 }
2855 })
2856 .collect::<Result<Decimal256Array>>()?
2857 .with_precision_and_scale(precision, scale)?;
2858 Ok(array)
2859 }
2860
2861 pub fn new_list(
2888 values: &[ScalarValue],
2889 data_type: &DataType,
2890 nullable: bool,
2891 ) -> Arc<ListArray> {
2892 let values = if values.is_empty() {
2893 new_empty_array(data_type)
2894 } else {
2895 Self::iter_to_array(values.iter().cloned()).unwrap()
2896 };
2897 Arc::new(
2898 SingleRowListArrayBuilder::new(values)
2899 .with_nullable(nullable)
2900 .build_list_array(),
2901 )
2902 }
2903
2904 pub fn new_list_nullable(
2906 values: &[ScalarValue],
2907 data_type: &DataType,
2908 ) -> Arc<ListArray> {
2909 Self::new_list(values, data_type, true)
2910 }
2911
2912 pub fn new_null_list(data_type: DataType, nullable: bool, null_len: usize) -> Self {
2916 let data_type = DataType::List(Field::new_list_field(data_type, nullable).into());
2917 Self::List(Arc::new(ListArray::from(ArrayData::new_null(
2918 &data_type, null_len,
2919 ))))
2920 }
2921
2922 pub fn new_list_from_iter(
2950 values: impl IntoIterator<Item = ScalarValue> + ExactSizeIterator,
2951 data_type: &DataType,
2952 nullable: bool,
2953 ) -> Arc<ListArray> {
2954 let values = if values.len() == 0 {
2955 new_empty_array(data_type)
2956 } else {
2957 Self::iter_to_array(values).unwrap()
2958 };
2959 Arc::new(
2960 SingleRowListArrayBuilder::new(values)
2961 .with_nullable(nullable)
2962 .build_list_array(),
2963 )
2964 }
2965
2966 pub fn new_large_list(
2994 values: &[ScalarValue],
2995 data_type: &DataType,
2996 ) -> Arc<LargeListArray> {
2997 let values = if values.is_empty() {
2998 new_empty_array(data_type)
2999 } else {
3000 Self::iter_to_array(values.iter().cloned()).unwrap()
3001 };
3002 Arc::new(SingleRowListArrayBuilder::new(values).build_large_list_array())
3003 }
3004
3005 pub fn to_array_of_size(&self, size: usize) -> Result<ArrayRef> {
3015 Ok(match self {
3016 ScalarValue::Decimal32(Some(e), precision, scale) => Arc::new(
3017 Decimal32Array::from_value(*e, size)
3018 .with_precision_and_scale(*precision, *scale)?,
3019 ),
3020 ScalarValue::Decimal32(None, precision, scale) => {
3021 new_null_array(&DataType::Decimal32(*precision, *scale), size)
3022 }
3023 ScalarValue::Decimal64(Some(e), precision, scale) => Arc::new(
3024 Decimal64Array::from_value(*e, size)
3025 .with_precision_and_scale(*precision, *scale)?,
3026 ),
3027 ScalarValue::Decimal64(None, precision, scale) => {
3028 new_null_array(&DataType::Decimal64(*precision, *scale), size)
3029 }
3030 ScalarValue::Decimal128(Some(e), precision, scale) => Arc::new(
3031 Decimal128Array::from_value(*e, size)
3032 .with_precision_and_scale(*precision, *scale)?,
3033 ),
3034 ScalarValue::Decimal128(None, precision, scale) => {
3035 new_null_array(&DataType::Decimal128(*precision, *scale), size)
3036 }
3037 ScalarValue::Decimal256(Some(e), precision, scale) => Arc::new(
3038 Decimal256Array::from_value(*e, size)
3039 .with_precision_and_scale(*precision, *scale)?,
3040 ),
3041 ScalarValue::Decimal256(None, precision, scale) => {
3042 new_null_array(&DataType::Decimal256(*precision, *scale), size)
3043 }
3044
3045 ScalarValue::Boolean(e) => match e {
3046 None => new_null_array(&DataType::Boolean, size),
3047 Some(true) => {
3048 Arc::new(BooleanArray::new(BooleanBuffer::new_set(size), None))
3049 as ArrayRef
3050 }
3051 Some(false) => {
3052 Arc::new(BooleanArray::new(BooleanBuffer::new_unset(size), None))
3053 as ArrayRef
3054 }
3055 },
3056 ScalarValue::Float64(e) => {
3057 build_array_from_option!(Float64, Float64Array, e, size)
3058 }
3059 ScalarValue::Float32(e) => {
3060 build_array_from_option!(Float32, Float32Array, e, size)
3061 }
3062 ScalarValue::Float16(e) => {
3063 build_array_from_option!(Float16, Float16Array, e, size)
3064 }
3065 ScalarValue::Int8(e) => build_array_from_option!(Int8, Int8Array, e, size),
3066 ScalarValue::Int16(e) => build_array_from_option!(Int16, Int16Array, e, size),
3067 ScalarValue::Int32(e) => build_array_from_option!(Int32, Int32Array, e, size),
3068 ScalarValue::Int64(e) => build_array_from_option!(Int64, Int64Array, e, size),
3069 ScalarValue::UInt8(e) => build_array_from_option!(UInt8, UInt8Array, e, size),
3070 ScalarValue::UInt16(e) => {
3071 build_array_from_option!(UInt16, UInt16Array, e, size)
3072 }
3073 ScalarValue::UInt32(e) => {
3074 build_array_from_option!(UInt32, UInt32Array, e, size)
3075 }
3076 ScalarValue::UInt64(e) => {
3077 build_array_from_option!(UInt64, UInt64Array, e, size)
3078 }
3079 ScalarValue::TimestampSecond(e, tz_opt) => {
3080 build_timestamp_array_from_option!(
3081 TimeUnit::Second,
3082 tz_opt.clone(),
3083 TimestampSecondArray,
3084 e,
3085 size
3086 )
3087 }
3088 ScalarValue::TimestampMillisecond(e, tz_opt) => {
3089 build_timestamp_array_from_option!(
3090 TimeUnit::Millisecond,
3091 tz_opt.clone(),
3092 TimestampMillisecondArray,
3093 e,
3094 size
3095 )
3096 }
3097
3098 ScalarValue::TimestampMicrosecond(e, tz_opt) => {
3099 build_timestamp_array_from_option!(
3100 TimeUnit::Microsecond,
3101 tz_opt.clone(),
3102 TimestampMicrosecondArray,
3103 e,
3104 size
3105 )
3106 }
3107 ScalarValue::TimestampNanosecond(e, tz_opt) => {
3108 build_timestamp_array_from_option!(
3109 TimeUnit::Nanosecond,
3110 tz_opt.clone(),
3111 TimestampNanosecondArray,
3112 e,
3113 size
3114 )
3115 }
3116 ScalarValue::Utf8(e) => match e {
3117 Some(value) => Arc::new(StringArray::new_repeated(value, size)),
3118 None => new_null_array(&DataType::Utf8, size),
3119 },
3120 ScalarValue::Utf8View(e) => match e {
3121 Some(value) => {
3122 let mut builder = StringViewBuilder::with_capacity(size);
3123 builder.try_append_value_n(value, size)?;
3124 let array = builder.finish();
3125 Arc::new(array)
3126 }
3127 None => new_null_array(&DataType::Utf8View, size),
3128 },
3129 ScalarValue::LargeUtf8(e) => match e {
3130 Some(value) => Arc::new(LargeStringArray::new_repeated(value, size)),
3131 None => new_null_array(&DataType::LargeUtf8, size),
3132 },
3133 ScalarValue::Binary(e) => match e {
3134 Some(value) => {
3135 Arc::new(BinaryArray::new_repeated(value.as_slice(), size))
3136 }
3137 None => new_null_array(&DataType::Binary, size),
3138 },
3139 ScalarValue::BinaryView(e) => match e {
3140 Some(value) => {
3141 let mut builder = BinaryViewBuilder::with_capacity(size);
3142 builder.try_append_value_n(value, size)?;
3143 let array = builder.finish();
3144 Arc::new(array)
3145 }
3146 None => new_null_array(&DataType::BinaryView, size),
3147 },
3148 ScalarValue::FixedSizeBinary(s, e) => match e {
3149 Some(value) => Arc::new(
3150 FixedSizeBinaryArray::try_from_sparse_iter_with_size(
3151 repeat_n(Some(value.as_slice()), size),
3152 *s,
3153 )
3154 .unwrap(),
3155 ),
3156 None => Arc::new(FixedSizeBinaryArray::new_null(*s, size)),
3157 },
3158 ScalarValue::LargeBinary(e) => match e {
3159 Some(value) => {
3160 Arc::new(LargeBinaryArray::new_repeated(value.as_slice(), size))
3161 }
3162 None => new_null_array(&DataType::LargeBinary, size),
3163 },
3164 ScalarValue::List(arr) => {
3165 if size == 1 {
3166 return Ok(Arc::clone(arr) as Arc<dyn Array>);
3167 }
3168 Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
3169 }
3170 ScalarValue::LargeList(arr) => {
3171 if size == 1 {
3172 return Ok(Arc::clone(arr) as Arc<dyn Array>);
3173 }
3174 Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
3175 }
3176 ScalarValue::FixedSizeList(arr) => {
3177 if size == 1 {
3178 return Ok(Arc::clone(arr) as Arc<dyn Array>);
3179 }
3180 Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
3181 }
3182 ScalarValue::Struct(arr) => {
3183 if size == 1 {
3184 return Ok(Arc::clone(arr) as Arc<dyn Array>);
3185 }
3186 Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
3187 }
3188 ScalarValue::Map(arr) => {
3189 if size == 1 {
3190 return Ok(Arc::clone(arr) as Arc<dyn Array>);
3191 }
3192 Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
3193 }
3194 ScalarValue::Date32(e) => {
3195 build_array_from_option!(Date32, Date32Array, e, size)
3196 }
3197 ScalarValue::Date64(e) => {
3198 build_array_from_option!(Date64, Date64Array, e, size)
3199 }
3200 ScalarValue::Time32Second(e) => {
3201 build_array_from_option!(
3202 Time32,
3203 TimeUnit::Second,
3204 Time32SecondArray,
3205 e,
3206 size
3207 )
3208 }
3209 ScalarValue::Time32Millisecond(e) => {
3210 build_array_from_option!(
3211 Time32,
3212 TimeUnit::Millisecond,
3213 Time32MillisecondArray,
3214 e,
3215 size
3216 )
3217 }
3218 ScalarValue::Time64Microsecond(e) => {
3219 build_array_from_option!(
3220 Time64,
3221 TimeUnit::Microsecond,
3222 Time64MicrosecondArray,
3223 e,
3224 size
3225 )
3226 }
3227 ScalarValue::Time64Nanosecond(e) => {
3228 build_array_from_option!(
3229 Time64,
3230 TimeUnit::Nanosecond,
3231 Time64NanosecondArray,
3232 e,
3233 size
3234 )
3235 }
3236 ScalarValue::IntervalDayTime(e) => build_array_from_option!(
3237 Interval,
3238 IntervalUnit::DayTime,
3239 IntervalDayTimeArray,
3240 e,
3241 size
3242 ),
3243 ScalarValue::IntervalYearMonth(e) => build_array_from_option!(
3244 Interval,
3245 IntervalUnit::YearMonth,
3246 IntervalYearMonthArray,
3247 e,
3248 size
3249 ),
3250 ScalarValue::IntervalMonthDayNano(e) => build_array_from_option!(
3251 Interval,
3252 IntervalUnit::MonthDayNano,
3253 IntervalMonthDayNanoArray,
3254 e,
3255 size
3256 ),
3257 ScalarValue::DurationSecond(e) => build_array_from_option!(
3258 Duration,
3259 TimeUnit::Second,
3260 DurationSecondArray,
3261 e,
3262 size
3263 ),
3264 ScalarValue::DurationMillisecond(e) => build_array_from_option!(
3265 Duration,
3266 TimeUnit::Millisecond,
3267 DurationMillisecondArray,
3268 e,
3269 size
3270 ),
3271 ScalarValue::DurationMicrosecond(e) => build_array_from_option!(
3272 Duration,
3273 TimeUnit::Microsecond,
3274 DurationMicrosecondArray,
3275 e,
3276 size
3277 ),
3278 ScalarValue::DurationNanosecond(e) => build_array_from_option!(
3279 Duration,
3280 TimeUnit::Nanosecond,
3281 DurationNanosecondArray,
3282 e,
3283 size
3284 ),
3285 ScalarValue::Union(value, fields, mode) => match value {
3286 Some((v_id, value)) => {
3287 let mut new_fields = Vec::with_capacity(fields.len());
3288 let mut child_arrays = Vec::<ArrayRef>::with_capacity(fields.len());
3289 for (f_id, field) in fields.iter() {
3290 let ar = if f_id == *v_id {
3291 value.to_array_of_size(size)?
3292 } else {
3293 let dt = field.data_type();
3294 match mode {
3295 UnionMode::Sparse => new_null_array(dt, size),
3296 UnionMode::Dense => new_null_array(dt, 0),
3299 }
3300 };
3301 let field = (**field).clone();
3302 child_arrays.push(ar);
3303 new_fields.push(field.clone());
3304 }
3305 let type_ids = repeat_n(*v_id, size);
3306 let type_ids = ScalarBuffer::<i8>::from_iter(type_ids);
3307 let value_offsets = match mode {
3308 UnionMode::Sparse => None,
3309 UnionMode::Dense => Some(ScalarBuffer::from_iter(0..size as i32)),
3310 };
3311 let ar = UnionArray::try_new(
3312 fields.clone(),
3313 type_ids,
3314 value_offsets,
3315 child_arrays,
3316 )
3317 .map_err(|e| DataFusionError::ArrowError(Box::new(e), None))?;
3318 Arc::new(ar)
3319 }
3320 None => new_null_array(&DataType::Union(fields.clone(), *mode), size),
3321 },
3322 ScalarValue::Dictionary(key_type, v) => {
3323 match key_type.as_ref() {
3325 DataType::Int8 => dict_from_scalar::<Int8Type>(v, size)?,
3326 DataType::Int16 => dict_from_scalar::<Int16Type>(v, size)?,
3327 DataType::Int32 => dict_from_scalar::<Int32Type>(v, size)?,
3328 DataType::Int64 => dict_from_scalar::<Int64Type>(v, size)?,
3329 DataType::UInt8 => dict_from_scalar::<UInt8Type>(v, size)?,
3330 DataType::UInt16 => dict_from_scalar::<UInt16Type>(v, size)?,
3331 DataType::UInt32 => dict_from_scalar::<UInt32Type>(v, size)?,
3332 DataType::UInt64 => dict_from_scalar::<UInt64Type>(v, size)?,
3333 _ => unreachable!("Invalid dictionary keys type: {}", key_type),
3334 }
3335 }
3336 ScalarValue::RunEndEncoded(run_ends_field, values_field, value) => {
3337 fn make_run_array<R: RunEndIndexType>(
3338 run_ends_field: &Arc<Field>,
3339 values_field: &Arc<Field>,
3340 value: &ScalarValue,
3341 size: usize,
3342 ) -> Result<ArrayRef> {
3343 let size_native = R::Native::from_usize(size)
3344 .ok_or_else(|| DataFusionError::Execution(format!("Cannot construct RunArray of size {size}: Overflows run-ends type {}", R::DATA_TYPE)))?;
3345 let values = value.to_array_of_size(1)?;
3346 let run_ends =
3347 PrimitiveArray::<R>::new(vec![size_native].into(), None);
3348
3349 let dt = DataType::RunEndEncoded(
3351 Arc::clone(run_ends_field),
3352 Arc::clone(values_field),
3353 );
3354 let builder = ArrayDataBuilder::new(dt)
3355 .len(size)
3356 .add_child_data(run_ends.to_data())
3357 .add_child_data(values.to_data());
3358 let run_array = RunArray::<R>::from(builder.build()?);
3359
3360 Ok(Arc::new(run_array))
3361 }
3362 match run_ends_field.data_type() {
3363 DataType::Int16 => make_run_array::<Int16Type>(
3364 run_ends_field,
3365 values_field,
3366 value,
3367 size,
3368 )?,
3369 DataType::Int32 => make_run_array::<Int32Type>(
3370 run_ends_field,
3371 values_field,
3372 value,
3373 size,
3374 )?,
3375 DataType::Int64 => make_run_array::<Int64Type>(
3376 run_ends_field,
3377 values_field,
3378 value,
3379 size,
3380 )?,
3381 dt => unreachable!("Invalid run-ends type: {dt}"),
3382 }
3383 }
3384 ScalarValue::Null => get_or_create_cached_null_array(size),
3385 })
3386 }
3387
3388 fn get_decimal_value_from_array(
3389 array: &dyn Array,
3390 index: usize,
3391 precision: u8,
3392 scale: i8,
3393 ) -> Result<ScalarValue> {
3394 match array.data_type() {
3395 DataType::Decimal32(_, _) => {
3396 let array = as_decimal32_array(array)?;
3397 if array.is_null(index) {
3398 Ok(ScalarValue::Decimal32(None, precision, scale))
3399 } else {
3400 let value = array.value(index);
3401 Ok(ScalarValue::Decimal32(Some(value), precision, scale))
3402 }
3403 }
3404 DataType::Decimal64(_, _) => {
3405 let array = as_decimal64_array(array)?;
3406 if array.is_null(index) {
3407 Ok(ScalarValue::Decimal64(None, precision, scale))
3408 } else {
3409 let value = array.value(index);
3410 Ok(ScalarValue::Decimal64(Some(value), precision, scale))
3411 }
3412 }
3413 DataType::Decimal128(_, _) => {
3414 let array = as_decimal128_array(array)?;
3415 if array.is_null(index) {
3416 Ok(ScalarValue::Decimal128(None, precision, scale))
3417 } else {
3418 let value = array.value(index);
3419 Ok(ScalarValue::Decimal128(Some(value), precision, scale))
3420 }
3421 }
3422 DataType::Decimal256(_, _) => {
3423 let array = as_decimal256_array(array)?;
3424 if array.is_null(index) {
3425 Ok(ScalarValue::Decimal256(None, precision, scale))
3426 } else {
3427 let value = array.value(index);
3428 Ok(ScalarValue::Decimal256(Some(value), precision, scale))
3429 }
3430 }
3431 other => {
3432 unreachable!("Invalid type isn't decimal: {other:?}")
3433 }
3434 }
3435 }
3436
3437 fn list_to_array_of_size(arr: &dyn Array, size: usize) -> Result<ArrayRef> {
3440 if size == 0 {
3441 return Ok(arr.slice(0, 0));
3442 }
3443
3444 let n = arr.len() as u32;
3451 let indices = UInt32Array::from_iter_values((0..size).flat_map(|_| 0..n));
3452 Ok(arrow::compute::take(arr, &indices, None)?)
3453 }
3454
3455 pub fn convert_array_to_scalar_vec(
3557 array: &dyn Array,
3558 ) -> Result<Vec<Option<Vec<Self>>>> {
3559 fn generic_collect<OffsetSize: OffsetSizeTrait>(
3560 array: &dyn Array,
3561 ) -> Result<Vec<Option<Vec<ScalarValue>>>> {
3562 array
3563 .as_list::<OffsetSize>()
3564 .iter()
3565 .map(|nested_array| {
3566 nested_array
3567 .map(|array| {
3568 (0..array.len())
3569 .map(|i| ScalarValue::try_from_array(&array, i))
3570 .collect::<Result<Vec<_>>>()
3571 })
3572 .transpose()
3573 })
3574 .collect()
3575 }
3576
3577 match array.data_type() {
3578 DataType::List(_) => generic_collect::<i32>(array),
3579 DataType::LargeList(_) => generic_collect::<i64>(array),
3580 _ => _internal_err!(
3581 "ScalarValue::convert_array_to_scalar_vec input must be a List/LargeList type"
3582 ),
3583 }
3584 }
3585
3586 #[deprecated(
3587 since = "46.0.0",
3588 note = "This function is obsolete. Use `to_array` instead"
3589 )]
3590 pub fn raw_data(&self) -> Result<ArrayRef> {
3591 match self {
3592 ScalarValue::List(arr) => Ok(arr.to_owned()),
3593 _ => _internal_err!("ScalarValue is not a list"),
3594 }
3595 }
3596
3597 pub fn try_from_array(array: &dyn Array, index: usize) -> Result<Self> {
3599 if array.is_null(index) {
3601 return array.data_type().try_into();
3602 }
3603
3604 Ok(match array.data_type() {
3605 DataType::Null => ScalarValue::Null,
3606 DataType::Decimal32(precision, scale) => {
3607 ScalarValue::get_decimal_value_from_array(
3608 array, index, *precision, *scale,
3609 )?
3610 }
3611 DataType::Decimal64(precision, scale) => {
3612 ScalarValue::get_decimal_value_from_array(
3613 array, index, *precision, *scale,
3614 )?
3615 }
3616 DataType::Decimal128(precision, scale) => {
3617 ScalarValue::get_decimal_value_from_array(
3618 array, index, *precision, *scale,
3619 )?
3620 }
3621 DataType::Decimal256(precision, scale) => {
3622 ScalarValue::get_decimal_value_from_array(
3623 array, index, *precision, *scale,
3624 )?
3625 }
3626 DataType::Boolean => typed_cast!(array, index, as_boolean_array, Boolean)?,
3627 DataType::Float64 => typed_cast!(array, index, as_float64_array, Float64)?,
3628 DataType::Float32 => typed_cast!(array, index, as_float32_array, Float32)?,
3629 DataType::Float16 => typed_cast!(array, index, as_float16_array, Float16)?,
3630 DataType::UInt64 => typed_cast!(array, index, as_uint64_array, UInt64)?,
3631 DataType::UInt32 => typed_cast!(array, index, as_uint32_array, UInt32)?,
3632 DataType::UInt16 => typed_cast!(array, index, as_uint16_array, UInt16)?,
3633 DataType::UInt8 => typed_cast!(array, index, as_uint8_array, UInt8)?,
3634 DataType::Int64 => typed_cast!(array, index, as_int64_array, Int64)?,
3635 DataType::Int32 => typed_cast!(array, index, as_int32_array, Int32)?,
3636 DataType::Int16 => typed_cast!(array, index, as_int16_array, Int16)?,
3637 DataType::Int8 => typed_cast!(array, index, as_int8_array, Int8)?,
3638 DataType::Binary => typed_cast!(array, index, as_binary_array, Binary)?,
3639 DataType::LargeBinary => {
3640 typed_cast!(array, index, as_large_binary_array, LargeBinary)?
3641 }
3642 DataType::BinaryView => {
3643 typed_cast!(array, index, as_binary_view_array, BinaryView)?
3644 }
3645 DataType::Utf8 => typed_cast!(array, index, as_string_array, Utf8)?,
3646 DataType::LargeUtf8 => {
3647 typed_cast!(array, index, as_large_string_array, LargeUtf8)?
3648 }
3649 DataType::Utf8View => {
3650 typed_cast!(array, index, as_string_view_array, Utf8View)?
3651 }
3652 DataType::List(field) => {
3653 let list_array = array.as_list::<i32>();
3654 let nested_array = list_array.value(index);
3655 SingleRowListArrayBuilder::new(nested_array)
3657 .with_field(field)
3658 .build_list_scalar()
3659 }
3660 DataType::LargeList(field) => {
3661 let list_array = as_large_list_array(array)?;
3662 let nested_array = list_array.value(index);
3663 SingleRowListArrayBuilder::new(nested_array)
3665 .with_field(field)
3666 .build_large_list_scalar()
3667 }
3668 DataType::FixedSizeList(field, _) => {
3670 let list_array = as_fixed_size_list_array(array)?;
3671 let nested_array = list_array.value(index);
3672 let list_size = nested_array.len();
3674 SingleRowListArrayBuilder::new(nested_array)
3675 .with_field(field)
3676 .build_fixed_size_list_scalar(list_size)
3677 }
3678 DataType::Date32 => typed_cast!(array, index, as_date32_array, Date32)?,
3679 DataType::Date64 => typed_cast!(array, index, as_date64_array, Date64)?,
3680 DataType::Time32(TimeUnit::Second) => {
3681 typed_cast!(array, index, as_time32_second_array, Time32Second)?
3682 }
3683 DataType::Time32(TimeUnit::Millisecond) => {
3684 typed_cast!(array, index, as_time32_millisecond_array, Time32Millisecond)?
3685 }
3686 DataType::Time64(TimeUnit::Microsecond) => {
3687 typed_cast!(array, index, as_time64_microsecond_array, Time64Microsecond)?
3688 }
3689 DataType::Time64(TimeUnit::Nanosecond) => {
3690 typed_cast!(array, index, as_time64_nanosecond_array, Time64Nanosecond)?
3691 }
3692 DataType::Timestamp(TimeUnit::Second, tz_opt) => typed_cast_tz!(
3693 array,
3694 index,
3695 as_timestamp_second_array,
3696 TimestampSecond,
3697 tz_opt
3698 )?,
3699 DataType::Timestamp(TimeUnit::Millisecond, tz_opt) => typed_cast_tz!(
3700 array,
3701 index,
3702 as_timestamp_millisecond_array,
3703 TimestampMillisecond,
3704 tz_opt
3705 )?,
3706 DataType::Timestamp(TimeUnit::Microsecond, tz_opt) => typed_cast_tz!(
3707 array,
3708 index,
3709 as_timestamp_microsecond_array,
3710 TimestampMicrosecond,
3711 tz_opt
3712 )?,
3713 DataType::Timestamp(TimeUnit::Nanosecond, tz_opt) => typed_cast_tz!(
3714 array,
3715 index,
3716 as_timestamp_nanosecond_array,
3717 TimestampNanosecond,
3718 tz_opt
3719 )?,
3720 DataType::Dictionary(key_type, _) => {
3721 let (values_array, values_index) = match key_type.as_ref() {
3722 DataType::Int8 => get_dict_value::<Int8Type>(array, index)?,
3723 DataType::Int16 => get_dict_value::<Int16Type>(array, index)?,
3724 DataType::Int32 => get_dict_value::<Int32Type>(array, index)?,
3725 DataType::Int64 => get_dict_value::<Int64Type>(array, index)?,
3726 DataType::UInt8 => get_dict_value::<UInt8Type>(array, index)?,
3727 DataType::UInt16 => get_dict_value::<UInt16Type>(array, index)?,
3728 DataType::UInt32 => get_dict_value::<UInt32Type>(array, index)?,
3729 DataType::UInt64 => get_dict_value::<UInt64Type>(array, index)?,
3730 _ => unreachable!("Invalid dictionary keys type: {}", key_type),
3731 };
3732 let value = match values_index {
3734 Some(values_index) => {
3735 ScalarValue::try_from_array(values_array, values_index)
3736 }
3737 None => values_array.data_type().try_into(),
3739 }?;
3740
3741 Self::Dictionary(key_type.clone(), Box::new(value))
3742 }
3743 DataType::RunEndEncoded(run_ends_field, value_field) => {
3744 if index > array.len() {
3747 return _exec_err!(
3748 "Index {index} out of bounds for array of length {}",
3749 array.len()
3750 );
3751 }
3752 let scalar = downcast_run_array!(
3753 array => {
3754 let index = array.get_physical_index(index);
3755 ScalarValue::try_from_array(array.values(), index)?
3756 },
3757 dt => unreachable!("Invalid run-ends type: {dt}")
3758 );
3759 Self::RunEndEncoded(
3760 Arc::clone(run_ends_field),
3761 Arc::clone(value_field),
3762 Box::new(scalar),
3763 )
3764 }
3765 DataType::Struct(_) => {
3766 let a = array.slice(index, 1);
3767 Self::Struct(Arc::new(a.as_struct().to_owned()))
3768 }
3769 DataType::FixedSizeBinary(_) => {
3770 let array = as_fixed_size_binary_array(array)?;
3771 let size = match array.data_type() {
3772 DataType::FixedSizeBinary(size) => *size,
3773 _ => unreachable!(),
3774 };
3775 ScalarValue::FixedSizeBinary(
3776 size,
3777 match array.is_null(index) {
3778 true => None,
3779 false => Some(array.value(index).into()),
3780 },
3781 )
3782 }
3783 DataType::Interval(IntervalUnit::DayTime) => {
3784 typed_cast!(array, index, as_interval_dt_array, IntervalDayTime)?
3785 }
3786 DataType::Interval(IntervalUnit::YearMonth) => {
3787 typed_cast!(array, index, as_interval_ym_array, IntervalYearMonth)?
3788 }
3789 DataType::Interval(IntervalUnit::MonthDayNano) => {
3790 typed_cast!(array, index, as_interval_mdn_array, IntervalMonthDayNano)?
3791 }
3792
3793 DataType::Duration(TimeUnit::Second) => {
3794 typed_cast!(array, index, as_duration_second_array, DurationSecond)?
3795 }
3796 DataType::Duration(TimeUnit::Millisecond) => typed_cast!(
3797 array,
3798 index,
3799 as_duration_millisecond_array,
3800 DurationMillisecond
3801 )?,
3802 DataType::Duration(TimeUnit::Microsecond) => typed_cast!(
3803 array,
3804 index,
3805 as_duration_microsecond_array,
3806 DurationMicrosecond
3807 )?,
3808 DataType::Duration(TimeUnit::Nanosecond) => typed_cast!(
3809 array,
3810 index,
3811 as_duration_nanosecond_array,
3812 DurationNanosecond
3813 )?,
3814 DataType::Map(_, _) => {
3815 let a = array.slice(index, 1);
3816 Self::Map(Arc::new(a.as_map().to_owned()))
3817 }
3818 DataType::Union(fields, mode) => {
3819 let array = as_union_array(array)?;
3820 let ti = array.type_id(index);
3821 let index = array.value_offset(index);
3822 let value = ScalarValue::try_from_array(array.child(ti), index)?;
3823 ScalarValue::Union(Some((ti, Box::new(value))), fields.clone(), *mode)
3824 }
3825 other => {
3826 return _not_impl_err!(
3827 "Can't create a scalar from array of type \"{other:?}\""
3828 );
3829 }
3830 })
3831 }
3832
3833 pub fn try_from_string(value: String, target_type: &DataType) -> Result<Self> {
3835 ScalarValue::from(value).cast_to(target_type)
3836 }
3837
3838 pub fn try_as_str(&self) -> Option<Option<&str>> {
3872 let v = match self {
3873 ScalarValue::Utf8(v) => v,
3874 ScalarValue::LargeUtf8(v) => v,
3875 ScalarValue::Utf8View(v) => v,
3876 ScalarValue::Dictionary(_, v) => return v.try_as_str(),
3877 ScalarValue::RunEndEncoded(_, _, v) => return v.try_as_str(),
3878 _ => return None,
3879 };
3880 Some(v.as_ref().map(|v| v.as_str()))
3881 }
3882
3883 pub fn cast_to(&self, target_type: &DataType) -> Result<Self> {
3885 self.cast_to_with_options(target_type, &DEFAULT_CAST_OPTIONS)
3886 }
3887
3888 pub fn cast_to_with_options(
3890 &self,
3891 target_type: &DataType,
3892 cast_options: &CastOptions<'static>,
3893 ) -> Result<Self> {
3894 let source_type = self.data_type();
3895 if let Some(multiplier) = date_to_timestamp_multiplier(&source_type, target_type)
3896 && let Some(value) = self.date_scalar_value_as_i64()
3897 {
3898 ensure_timestamp_in_bounds(value, multiplier, &source_type, target_type)?;
3899 }
3900
3901 let scalar_array = self.to_array()?;
3902
3903 let cast_arr = match target_type {
3907 DataType::Struct(_) => {
3908 let target_field = Field::new("_", target_type.clone(), true);
3910 crate::nested_struct::cast_column(
3911 &scalar_array,
3912 &target_field,
3913 cast_options,
3914 )?
3915 }
3916 _ => cast_with_options(&scalar_array, target_type, cast_options)?,
3917 };
3918
3919 ScalarValue::try_from_array(&cast_arr, 0)
3920 }
3921
3922 fn date_scalar_value_as_i64(&self) -> Option<i64> {
3923 match self {
3924 ScalarValue::Date32(Some(value)) => Some(i64::from(*value)),
3925 ScalarValue::Date64(Some(value)) => Some(*value),
3926 _ => None,
3927 }
3928 }
3929
3930 fn eq_array_decimal32(
3931 array: &ArrayRef,
3932 index: usize,
3933 value: Option<&i32>,
3934 precision: u8,
3935 scale: i8,
3936 ) -> Result<bool> {
3937 let array = as_decimal32_array(array)?;
3938 if array.precision() != precision || array.scale() != scale {
3939 return Ok(false);
3940 }
3941 let is_null = array.is_null(index);
3942 if let Some(v) = value {
3943 Ok(!array.is_null(index) && array.value(index) == *v)
3944 } else {
3945 Ok(is_null)
3946 }
3947 }
3948
3949 fn eq_array_decimal64(
3950 array: &ArrayRef,
3951 index: usize,
3952 value: Option<&i64>,
3953 precision: u8,
3954 scale: i8,
3955 ) -> Result<bool> {
3956 let array = as_decimal64_array(array)?;
3957 if array.precision() != precision || array.scale() != scale {
3958 return Ok(false);
3959 }
3960 let is_null = array.is_null(index);
3961 if let Some(v) = value {
3962 Ok(!array.is_null(index) && array.value(index) == *v)
3963 } else {
3964 Ok(is_null)
3965 }
3966 }
3967
3968 fn eq_array_decimal(
3969 array: &ArrayRef,
3970 index: usize,
3971 value: Option<&i128>,
3972 precision: u8,
3973 scale: i8,
3974 ) -> Result<bool> {
3975 let array = as_decimal128_array(array)?;
3976 if array.precision() != precision || array.scale() != scale {
3977 return Ok(false);
3978 }
3979 let is_null = array.is_null(index);
3980 if let Some(v) = value {
3981 Ok(!array.is_null(index) && array.value(index) == *v)
3982 } else {
3983 Ok(is_null)
3984 }
3985 }
3986
3987 fn eq_array_decimal256(
3988 array: &ArrayRef,
3989 index: usize,
3990 value: Option<&i256>,
3991 precision: u8,
3992 scale: i8,
3993 ) -> Result<bool> {
3994 let array = as_decimal256_array(array)?;
3995 if array.precision() != precision || array.scale() != scale {
3996 return Ok(false);
3997 }
3998 let is_null = array.is_null(index);
3999 if let Some(v) = value {
4000 Ok(!array.is_null(index) && array.value(index) == *v)
4001 } else {
4002 Ok(is_null)
4003 }
4004 }
4005
4006 #[inline]
4033 pub fn eq_array(&self, array: &ArrayRef, index: usize) -> Result<bool> {
4034 Ok(match self {
4035 ScalarValue::Decimal32(v, precision, scale) => {
4036 ScalarValue::eq_array_decimal32(
4037 array,
4038 index,
4039 v.as_ref(),
4040 *precision,
4041 *scale,
4042 )?
4043 }
4044 ScalarValue::Decimal64(v, precision, scale) => {
4045 ScalarValue::eq_array_decimal64(
4046 array,
4047 index,
4048 v.as_ref(),
4049 *precision,
4050 *scale,
4051 )?
4052 }
4053 ScalarValue::Decimal128(v, precision, scale) => {
4054 ScalarValue::eq_array_decimal(
4055 array,
4056 index,
4057 v.as_ref(),
4058 *precision,
4059 *scale,
4060 )?
4061 }
4062 ScalarValue::Decimal256(v, precision, scale) => {
4063 ScalarValue::eq_array_decimal256(
4064 array,
4065 index,
4066 v.as_ref(),
4067 *precision,
4068 *scale,
4069 )?
4070 }
4071 ScalarValue::Boolean(val) => {
4072 eq_array_primitive!(array, index, as_boolean_array, val)?
4073 }
4074 ScalarValue::Float16(val) => {
4075 eq_array_primitive!(array, index, as_float16_array, val)?
4076 }
4077 ScalarValue::Float32(val) => {
4078 eq_array_primitive!(array, index, as_float32_array, val)?
4079 }
4080 ScalarValue::Float64(val) => {
4081 eq_array_primitive!(array, index, as_float64_array, val)?
4082 }
4083 ScalarValue::Int8(val) => {
4084 eq_array_primitive!(array, index, as_int8_array, val)?
4085 }
4086 ScalarValue::Int16(val) => {
4087 eq_array_primitive!(array, index, as_int16_array, val)?
4088 }
4089 ScalarValue::Int32(val) => {
4090 eq_array_primitive!(array, index, as_int32_array, val)?
4091 }
4092 ScalarValue::Int64(val) => {
4093 eq_array_primitive!(array, index, as_int64_array, val)?
4094 }
4095 ScalarValue::UInt8(val) => {
4096 eq_array_primitive!(array, index, as_uint8_array, val)?
4097 }
4098 ScalarValue::UInt16(val) => {
4099 eq_array_primitive!(array, index, as_uint16_array, val)?
4100 }
4101 ScalarValue::UInt32(val) => {
4102 eq_array_primitive!(array, index, as_uint32_array, val)?
4103 }
4104 ScalarValue::UInt64(val) => {
4105 eq_array_primitive!(array, index, as_uint64_array, val)?
4106 }
4107 ScalarValue::Utf8(val) => {
4108 eq_array_primitive!(array, index, as_string_array, val)?
4109 }
4110 ScalarValue::Utf8View(val) => {
4111 eq_array_primitive!(array, index, as_string_view_array, val)?
4112 }
4113 ScalarValue::LargeUtf8(val) => {
4114 eq_array_primitive!(array, index, as_large_string_array, val)?
4115 }
4116 ScalarValue::Binary(val) => {
4117 eq_array_primitive!(array, index, as_binary_array, val)?
4118 }
4119 ScalarValue::BinaryView(val) => {
4120 eq_array_primitive!(array, index, as_binary_view_array, val)?
4121 }
4122 ScalarValue::FixedSizeBinary(_, val) => {
4123 eq_array_primitive!(array, index, as_fixed_size_binary_array, val)?
4124 }
4125 ScalarValue::LargeBinary(val) => {
4126 eq_array_primitive!(array, index, as_large_binary_array, val)?
4127 }
4128 ScalarValue::List(arr) => {
4129 Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
4130 }
4131 ScalarValue::LargeList(arr) => {
4132 Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
4133 }
4134 ScalarValue::FixedSizeList(arr) => {
4135 Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
4136 }
4137 ScalarValue::Struct(arr) => {
4138 Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
4139 }
4140 ScalarValue::Map(arr) => {
4141 Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
4142 }
4143 ScalarValue::Date32(val) => {
4144 eq_array_primitive!(array, index, as_date32_array, val)?
4145 }
4146 ScalarValue::Date64(val) => {
4147 eq_array_primitive!(array, index, as_date64_array, val)?
4148 }
4149 ScalarValue::Time32Second(val) => {
4150 eq_array_primitive!(array, index, as_time32_second_array, val)?
4151 }
4152 ScalarValue::Time32Millisecond(val) => {
4153 eq_array_primitive!(array, index, as_time32_millisecond_array, val)?
4154 }
4155 ScalarValue::Time64Microsecond(val) => {
4156 eq_array_primitive!(array, index, as_time64_microsecond_array, val)?
4157 }
4158 ScalarValue::Time64Nanosecond(val) => {
4159 eq_array_primitive!(array, index, as_time64_nanosecond_array, val)?
4160 }
4161 ScalarValue::TimestampSecond(val, _) => {
4162 eq_array_primitive!(array, index, as_timestamp_second_array, val)?
4163 }
4164 ScalarValue::TimestampMillisecond(val, _) => {
4165 eq_array_primitive!(array, index, as_timestamp_millisecond_array, val)?
4166 }
4167 ScalarValue::TimestampMicrosecond(val, _) => {
4168 eq_array_primitive!(array, index, as_timestamp_microsecond_array, val)?
4169 }
4170 ScalarValue::TimestampNanosecond(val, _) => {
4171 eq_array_primitive!(array, index, as_timestamp_nanosecond_array, val)?
4172 }
4173 ScalarValue::IntervalYearMonth(val) => {
4174 eq_array_primitive!(array, index, as_interval_ym_array, val)?
4175 }
4176 ScalarValue::IntervalDayTime(val) => {
4177 eq_array_primitive!(array, index, as_interval_dt_array, val)?
4178 }
4179 ScalarValue::IntervalMonthDayNano(val) => {
4180 eq_array_primitive!(array, index, as_interval_mdn_array, val)?
4181 }
4182 ScalarValue::DurationSecond(val) => {
4183 eq_array_primitive!(array, index, as_duration_second_array, val)?
4184 }
4185 ScalarValue::DurationMillisecond(val) => {
4186 eq_array_primitive!(array, index, as_duration_millisecond_array, val)?
4187 }
4188 ScalarValue::DurationMicrosecond(val) => {
4189 eq_array_primitive!(array, index, as_duration_microsecond_array, val)?
4190 }
4191 ScalarValue::DurationNanosecond(val) => {
4192 eq_array_primitive!(array, index, as_duration_nanosecond_array, val)?
4193 }
4194 ScalarValue::Union(value, _, _) => {
4195 let array = as_union_array(array)?;
4196 let ti = array.type_id(index);
4197 let index = array.value_offset(index);
4198 if let Some((ti_v, value)) = value {
4199 ti_v == &ti && value.eq_array(array.child(ti), index)?
4200 } else {
4201 array.child(ti).is_null(index)
4202 }
4203 }
4204 ScalarValue::Dictionary(key_type, v) => {
4205 let (values_array, values_index) = match key_type.as_ref() {
4206 DataType::Int8 => get_dict_value::<Int8Type>(array, index)?,
4207 DataType::Int16 => get_dict_value::<Int16Type>(array, index)?,
4208 DataType::Int32 => get_dict_value::<Int32Type>(array, index)?,
4209 DataType::Int64 => get_dict_value::<Int64Type>(array, index)?,
4210 DataType::UInt8 => get_dict_value::<UInt8Type>(array, index)?,
4211 DataType::UInt16 => get_dict_value::<UInt16Type>(array, index)?,
4212 DataType::UInt32 => get_dict_value::<UInt32Type>(array, index)?,
4213 DataType::UInt64 => get_dict_value::<UInt64Type>(array, index)?,
4214 _ => unreachable!("Invalid dictionary keys type: {}", key_type),
4215 };
4216 match values_index {
4218 Some(values_index) => v.eq_array(values_array, values_index)?,
4219 None => v.is_null(),
4220 }
4221 }
4222 ScalarValue::RunEndEncoded(run_ends_field, _, value) => {
4223 if index > array.len() {
4226 return _exec_err!(
4227 "Index {index} out of bounds for array of length {}",
4228 array.len()
4229 );
4230 }
4231 match run_ends_field.data_type() {
4232 DataType::Int16 => {
4233 let array = as_run_array::<Int16Type>(array)?;
4234 let index = array.get_physical_index(index);
4235 value.eq_array(array.values(), index)?
4236 }
4237 DataType::Int32 => {
4238 let array = as_run_array::<Int32Type>(array)?;
4239 let index = array.get_physical_index(index);
4240 value.eq_array(array.values(), index)?
4241 }
4242 DataType::Int64 => {
4243 let array = as_run_array::<Int64Type>(array)?;
4244 let index = array.get_physical_index(index);
4245 value.eq_array(array.values(), index)?
4246 }
4247 dt => unreachable!("Invalid run-ends type: {dt}"),
4248 }
4249 }
4250 ScalarValue::Null => array.is_null(index),
4251 })
4252 }
4253
4254 fn eq_array_list(arr1: &ArrayRef, arr2: &ArrayRef, index: usize) -> bool {
4255 let right = arr2.slice(index, 1);
4256 arr1 == &right
4257 }
4258
4259 pub fn try_cmp(&self, other: &Self) -> Result<Ordering> {
4264 self.partial_cmp(other).ok_or_else(|| {
4265 _internal_datafusion_err!("Uncomparable values: {self:?}, {other:?}")
4266 })
4267 }
4268
4269 pub fn size(&self) -> usize {
4272 size_of_val(self)
4273 + match self {
4274 ScalarValue::Null
4275 | ScalarValue::Boolean(_)
4276 | ScalarValue::Float16(_)
4277 | ScalarValue::Float32(_)
4278 | ScalarValue::Float64(_)
4279 | ScalarValue::Decimal32(_, _, _)
4280 | ScalarValue::Decimal64(_, _, _)
4281 | ScalarValue::Decimal128(_, _, _)
4282 | ScalarValue::Decimal256(_, _, _)
4283 | ScalarValue::Int8(_)
4284 | ScalarValue::Int16(_)
4285 | ScalarValue::Int32(_)
4286 | ScalarValue::Int64(_)
4287 | ScalarValue::UInt8(_)
4288 | ScalarValue::UInt16(_)
4289 | ScalarValue::UInt32(_)
4290 | ScalarValue::UInt64(_)
4291 | ScalarValue::Date32(_)
4292 | ScalarValue::Date64(_)
4293 | ScalarValue::Time32Second(_)
4294 | ScalarValue::Time32Millisecond(_)
4295 | ScalarValue::Time64Microsecond(_)
4296 | ScalarValue::Time64Nanosecond(_)
4297 | ScalarValue::IntervalYearMonth(_)
4298 | ScalarValue::IntervalDayTime(_)
4299 | ScalarValue::IntervalMonthDayNano(_)
4300 | ScalarValue::DurationSecond(_)
4301 | ScalarValue::DurationMillisecond(_)
4302 | ScalarValue::DurationMicrosecond(_)
4303 | ScalarValue::DurationNanosecond(_) => 0,
4304 ScalarValue::Utf8(s)
4305 | ScalarValue::LargeUtf8(s)
4306 | ScalarValue::Utf8View(s) => {
4307 s.as_ref().map(|s| s.capacity()).unwrap_or_default()
4308 }
4309 ScalarValue::TimestampSecond(_, s)
4310 | ScalarValue::TimestampMillisecond(_, s)
4311 | ScalarValue::TimestampMicrosecond(_, s)
4312 | ScalarValue::TimestampNanosecond(_, s) => {
4313 s.as_ref().map(|s| s.len()).unwrap_or_default()
4314 }
4315 ScalarValue::Binary(b)
4316 | ScalarValue::FixedSizeBinary(_, b)
4317 | ScalarValue::LargeBinary(b)
4318 | ScalarValue::BinaryView(b) => {
4319 b.as_ref().map(|b| b.capacity()).unwrap_or_default()
4320 }
4321 ScalarValue::List(arr) => arr.get_array_memory_size(),
4322 ScalarValue::LargeList(arr) => arr.get_array_memory_size(),
4323 ScalarValue::FixedSizeList(arr) => arr.get_array_memory_size(),
4324 ScalarValue::Struct(arr) => arr.get_array_memory_size(),
4325 ScalarValue::Map(arr) => arr.get_array_memory_size(),
4326 ScalarValue::Union(vals, fields, _mode) => {
4327 vals.as_ref()
4328 .map(|(_id, sv)| sv.size() - size_of_val(sv))
4329 .unwrap_or_default()
4330 + size_of_val(fields)
4332 + (size_of::<Field>() * fields.len())
4333 + fields.iter().map(|(_idx, field)| field.size() - size_of_val(field)).sum::<usize>()
4334 }
4335 ScalarValue::Dictionary(dt, sv) => {
4336 dt.size() + sv.size()
4338 }
4339 ScalarValue::RunEndEncoded(rf, vf, v) => rf.size() + vf.size() + v.size(),
4340 }
4341 }
4342
4343 pub fn size_of_vec(vec: &Vec<Self>) -> usize {
4347 size_of_val(vec)
4348 + (size_of::<ScalarValue>() * vec.capacity())
4349 + vec
4350 .iter()
4351 .map(|sv| sv.size() - size_of_val(sv))
4352 .sum::<usize>()
4353 }
4354
4355 pub fn size_of_vec_deque(vec_deque: &VecDeque<Self>) -> usize {
4359 size_of_val(vec_deque)
4360 + (size_of::<ScalarValue>() * vec_deque.capacity())
4361 + vec_deque
4362 .iter()
4363 .map(|sv| sv.size() - size_of_val(sv))
4364 .sum::<usize>()
4365 }
4366
4367 pub fn size_of_hashset<S>(set: &HashSet<Self, S>) -> usize {
4371 size_of_val(set)
4372 + (size_of::<ScalarValue>() * set.capacity())
4373 + set
4374 .iter()
4375 .map(|sv| sv.size() - size_of_val(sv))
4376 .sum::<usize>()
4377 }
4378
4379 pub fn compact(&mut self) {
4385 match self {
4386 ScalarValue::Null
4387 | ScalarValue::Boolean(_)
4388 | ScalarValue::Float16(_)
4389 | ScalarValue::Float32(_)
4390 | ScalarValue::Float64(_)
4391 | ScalarValue::Decimal32(_, _, _)
4392 | ScalarValue::Decimal64(_, _, _)
4393 | ScalarValue::Decimal128(_, _, _)
4394 | ScalarValue::Decimal256(_, _, _)
4395 | ScalarValue::Int8(_)
4396 | ScalarValue::Int16(_)
4397 | ScalarValue::Int32(_)
4398 | ScalarValue::Int64(_)
4399 | ScalarValue::UInt8(_)
4400 | ScalarValue::UInt16(_)
4401 | ScalarValue::UInt32(_)
4402 | ScalarValue::UInt64(_)
4403 | ScalarValue::Date32(_)
4404 | ScalarValue::Date64(_)
4405 | ScalarValue::Time32Second(_)
4406 | ScalarValue::Time32Millisecond(_)
4407 | ScalarValue::Time64Microsecond(_)
4408 | ScalarValue::Time64Nanosecond(_)
4409 | ScalarValue::IntervalYearMonth(_)
4410 | ScalarValue::IntervalDayTime(_)
4411 | ScalarValue::IntervalMonthDayNano(_)
4412 | ScalarValue::DurationSecond(_)
4413 | ScalarValue::DurationMillisecond(_)
4414 | ScalarValue::DurationMicrosecond(_)
4415 | ScalarValue::DurationNanosecond(_)
4416 | ScalarValue::Utf8(_)
4417 | ScalarValue::LargeUtf8(_)
4418 | ScalarValue::Utf8View(_)
4419 | ScalarValue::TimestampSecond(_, _)
4420 | ScalarValue::TimestampMillisecond(_, _)
4421 | ScalarValue::TimestampMicrosecond(_, _)
4422 | ScalarValue::TimestampNanosecond(_, _)
4423 | ScalarValue::Binary(_)
4424 | ScalarValue::FixedSizeBinary(_, _)
4425 | ScalarValue::LargeBinary(_)
4426 | ScalarValue::BinaryView(_) => (),
4427 ScalarValue::FixedSizeList(arr) => {
4428 let array = copy_array_data(&arr.to_data());
4429 *Arc::make_mut(arr) = FixedSizeListArray::from(array);
4430 }
4431 ScalarValue::List(arr) => {
4432 let array = copy_array_data(&arr.to_data());
4433 *Arc::make_mut(arr) = ListArray::from(array);
4434 }
4435 ScalarValue::LargeList(arr) => {
4436 let array = copy_array_data(&arr.to_data());
4437 *Arc::make_mut(arr) = LargeListArray::from(array)
4438 }
4439 ScalarValue::Struct(arr) => {
4440 let array = copy_array_data(&arr.to_data());
4441 *Arc::make_mut(arr) = StructArray::from(array);
4442 }
4443 ScalarValue::Map(arr) => {
4444 let array = copy_array_data(&arr.to_data());
4445 *Arc::make_mut(arr) = MapArray::from(array);
4446 }
4447 ScalarValue::Union(val, _, _) => {
4448 if let Some((_, value)) = val.as_mut() {
4449 value.compact();
4450 }
4451 }
4452 ScalarValue::Dictionary(_, value) => {
4453 value.compact();
4454 }
4455 ScalarValue::RunEndEncoded(_, _, value) => {
4456 value.compact();
4457 }
4458 }
4459 }
4460
4461 pub fn compacted(mut self) -> Self {
4463 self.compact();
4464 self
4465 }
4466
4467 pub fn min(datatype: &DataType) -> Option<ScalarValue> {
4482 match datatype {
4483 DataType::Int8 => Some(ScalarValue::Int8(Some(i8::MIN))),
4484 DataType::Int16 => Some(ScalarValue::Int16(Some(i16::MIN))),
4485 DataType::Int32 => Some(ScalarValue::Int32(Some(i32::MIN))),
4486 DataType::Int64 => Some(ScalarValue::Int64(Some(i64::MIN))),
4487 DataType::UInt8 => Some(ScalarValue::UInt8(Some(u8::MIN))),
4488 DataType::UInt16 => Some(ScalarValue::UInt16(Some(u16::MIN))),
4489 DataType::UInt32 => Some(ScalarValue::UInt32(Some(u32::MIN))),
4490 DataType::UInt64 => Some(ScalarValue::UInt64(Some(u64::MIN))),
4491 DataType::Float16 => Some(ScalarValue::Float16(Some(f16::NEG_INFINITY))),
4492 DataType::Float32 => Some(ScalarValue::Float32(Some(f32::NEG_INFINITY))),
4493 DataType::Float64 => Some(ScalarValue::Float64(Some(f64::NEG_INFINITY))),
4494 DataType::Decimal128(precision, scale) => {
4495 let max_digits = 10_i128.pow(*precision as u32) - 1;
4498 Some(ScalarValue::Decimal128(
4499 Some(-max_digits),
4500 *precision,
4501 *scale,
4502 ))
4503 }
4504 DataType::Decimal256(precision, scale) => {
4505 let max_digits = i256::from_i128(10_i128)
4508 .checked_pow(*precision as u32)
4509 .and_then(|v| v.checked_sub(i256::from_i128(1)))
4510 .unwrap_or(i256::MAX);
4511 Some(ScalarValue::Decimal256(
4512 Some(max_digits.neg_wrapping()),
4513 *precision,
4514 *scale,
4515 ))
4516 }
4517 DataType::Date32 => Some(ScalarValue::Date32(Some(i32::MIN))),
4518 DataType::Date64 => Some(ScalarValue::Date64(Some(i64::MIN))),
4519 DataType::Time32(TimeUnit::Second) => {
4520 Some(ScalarValue::Time32Second(Some(0)))
4521 }
4522 DataType::Time32(TimeUnit::Millisecond) => {
4523 Some(ScalarValue::Time32Millisecond(Some(0)))
4524 }
4525 DataType::Time64(TimeUnit::Microsecond) => {
4526 Some(ScalarValue::Time64Microsecond(Some(0)))
4527 }
4528 DataType::Time64(TimeUnit::Nanosecond) => {
4529 Some(ScalarValue::Time64Nanosecond(Some(0)))
4530 }
4531 DataType::Timestamp(unit, tz) => match unit {
4532 TimeUnit::Second => {
4533 Some(ScalarValue::TimestampSecond(Some(i64::MIN), tz.clone()))
4534 }
4535 TimeUnit::Millisecond => Some(ScalarValue::TimestampMillisecond(
4536 Some(i64::MIN),
4537 tz.clone(),
4538 )),
4539 TimeUnit::Microsecond => Some(ScalarValue::TimestampMicrosecond(
4540 Some(i64::MIN),
4541 tz.clone(),
4542 )),
4543 TimeUnit::Nanosecond => {
4544 Some(ScalarValue::TimestampNanosecond(Some(i64::MIN), tz.clone()))
4545 }
4546 },
4547 DataType::Duration(unit) => match unit {
4548 TimeUnit::Second => Some(ScalarValue::DurationSecond(Some(i64::MIN))),
4549 TimeUnit::Millisecond => {
4550 Some(ScalarValue::DurationMillisecond(Some(i64::MIN)))
4551 }
4552 TimeUnit::Microsecond => {
4553 Some(ScalarValue::DurationMicrosecond(Some(i64::MIN)))
4554 }
4555 TimeUnit::Nanosecond => {
4556 Some(ScalarValue::DurationNanosecond(Some(i64::MIN)))
4557 }
4558 },
4559 _ => None,
4560 }
4561 }
4562
4563 pub fn max(datatype: &DataType) -> Option<ScalarValue> {
4578 match datatype {
4579 DataType::Int8 => Some(ScalarValue::Int8(Some(i8::MAX))),
4580 DataType::Int16 => Some(ScalarValue::Int16(Some(i16::MAX))),
4581 DataType::Int32 => Some(ScalarValue::Int32(Some(i32::MAX))),
4582 DataType::Int64 => Some(ScalarValue::Int64(Some(i64::MAX))),
4583 DataType::UInt8 => Some(ScalarValue::UInt8(Some(u8::MAX))),
4584 DataType::UInt16 => Some(ScalarValue::UInt16(Some(u16::MAX))),
4585 DataType::UInt32 => Some(ScalarValue::UInt32(Some(u32::MAX))),
4586 DataType::UInt64 => Some(ScalarValue::UInt64(Some(u64::MAX))),
4587 DataType::Float16 => Some(ScalarValue::Float16(Some(f16::INFINITY))),
4588 DataType::Float32 => Some(ScalarValue::Float32(Some(f32::INFINITY))),
4589 DataType::Float64 => Some(ScalarValue::Float64(Some(f64::INFINITY))),
4590 DataType::Decimal128(precision, scale) => {
4591 let max_digits = 10_i128.pow(*precision as u32) - 1;
4594 Some(ScalarValue::Decimal128(
4595 Some(max_digits),
4596 *precision,
4597 *scale,
4598 ))
4599 }
4600 DataType::Decimal256(precision, scale) => {
4601 let max_digits = i256::from_i128(10_i128)
4603 .checked_pow(*precision as u32)
4604 .and_then(|v| v.checked_sub(i256::from_i128(1)))
4605 .unwrap_or(i256::MAX);
4606 Some(ScalarValue::Decimal256(
4607 Some(max_digits),
4608 *precision,
4609 *scale,
4610 ))
4611 }
4612 DataType::Date32 => Some(ScalarValue::Date32(Some(i32::MAX))),
4613 DataType::Date64 => Some(ScalarValue::Date64(Some(i64::MAX))),
4614 DataType::Time32(TimeUnit::Second) => {
4615 Some(ScalarValue::Time32Second(Some(86_399)))
4617 }
4618 DataType::Time32(TimeUnit::Millisecond) => {
4619 Some(ScalarValue::Time32Millisecond(Some(86_399_999)))
4621 }
4622 DataType::Time64(TimeUnit::Microsecond) => {
4623 Some(ScalarValue::Time64Microsecond(Some(86_399_999_999)))
4625 }
4626 DataType::Time64(TimeUnit::Nanosecond) => {
4627 Some(ScalarValue::Time64Nanosecond(Some(86_399_999_999_999)))
4629 }
4630 DataType::Timestamp(unit, tz) => match unit {
4631 TimeUnit::Second => {
4632 Some(ScalarValue::TimestampSecond(Some(i64::MAX), tz.clone()))
4633 }
4634 TimeUnit::Millisecond => Some(ScalarValue::TimestampMillisecond(
4635 Some(i64::MAX),
4636 tz.clone(),
4637 )),
4638 TimeUnit::Microsecond => Some(ScalarValue::TimestampMicrosecond(
4639 Some(i64::MAX),
4640 tz.clone(),
4641 )),
4642 TimeUnit::Nanosecond => {
4643 Some(ScalarValue::TimestampNanosecond(Some(i64::MAX), tz.clone()))
4644 }
4645 },
4646 DataType::Duration(unit) => match unit {
4647 TimeUnit::Second => Some(ScalarValue::DurationSecond(Some(i64::MAX))),
4648 TimeUnit::Millisecond => {
4649 Some(ScalarValue::DurationMillisecond(Some(i64::MAX)))
4650 }
4651 TimeUnit::Microsecond => {
4652 Some(ScalarValue::DurationMicrosecond(Some(i64::MAX)))
4653 }
4654 TimeUnit::Nanosecond => {
4655 Some(ScalarValue::DurationNanosecond(Some(i64::MAX)))
4656 }
4657 },
4658 _ => None,
4659 }
4660 }
4661
4662 fn validate_decimal_or_internal_err<T: DecimalType>(
4665 precision: u8,
4666 scale: i8,
4667 ) -> Result<()> {
4668 validate_decimal_precision_and_scale::<T>(precision, scale).map_err(|err| {
4669 _internal_datafusion_err!(
4670 "Decimal precision/scale invariant violated \
4671 (precision={precision}, scale={scale}): {err}"
4672 )
4673 })
4674 }
4675}
4676
4677pub fn copy_array_data(src_data: &ArrayData) -> ArrayData {
4705 let mut copy = MutableArrayData::new(vec![&src_data], true, src_data.len());
4706 copy.extend(0, 0, src_data.len());
4707 copy.freeze()
4708}
4709
4710macro_rules! impl_scalar {
4711 ($ty:ty, $scalar:tt) => {
4712 impl From<$ty> for ScalarValue {
4713 fn from(value: $ty) -> Self {
4714 ScalarValue::$scalar(Some(value))
4715 }
4716 }
4717
4718 impl From<Option<$ty>> for ScalarValue {
4719 fn from(value: Option<$ty>) -> Self {
4720 ScalarValue::$scalar(value)
4721 }
4722 }
4723 };
4724}
4725
4726impl_scalar!(f64, Float64);
4727impl_scalar!(f32, Float32);
4728impl_scalar!(f16, Float16);
4729impl_scalar!(i8, Int8);
4730impl_scalar!(i16, Int16);
4731impl_scalar!(i32, Int32);
4732impl_scalar!(i64, Int64);
4733impl_scalar!(bool, Boolean);
4734impl_scalar!(u8, UInt8);
4735impl_scalar!(u16, UInt16);
4736impl_scalar!(u32, UInt32);
4737impl_scalar!(u64, UInt64);
4738
4739impl From<&str> for ScalarValue {
4740 fn from(value: &str) -> Self {
4741 Some(value).into()
4742 }
4743}
4744
4745impl From<Option<&str>> for ScalarValue {
4746 fn from(value: Option<&str>) -> Self {
4747 let value = value.map(|s| s.to_string());
4748 value.into()
4749 }
4750}
4751
4752impl From<Vec<(&str, ScalarValue)>> for ScalarValue {
4754 fn from(value: Vec<(&str, ScalarValue)>) -> Self {
4755 value
4756 .into_iter()
4757 .fold(ScalarStructBuilder::new(), |builder, (name, value)| {
4758 builder.with_name_and_scalar(name, value)
4759 })
4760 .build()
4761 .unwrap()
4762 }
4763}
4764
4765impl FromStr for ScalarValue {
4766 type Err = Infallible;
4767
4768 fn from_str(s: &str) -> Result<Self, Self::Err> {
4769 Ok(s.into())
4770 }
4771}
4772
4773impl From<String> for ScalarValue {
4774 fn from(value: String) -> Self {
4775 Some(value).into()
4776 }
4777}
4778
4779impl From<Option<String>> for ScalarValue {
4780 fn from(value: Option<String>) -> Self {
4781 ScalarValue::Utf8(value)
4782 }
4783}
4784
4785macro_rules! impl_try_from {
4786 ($SCALAR:ident, $NATIVE:ident) => {
4787 impl TryFrom<ScalarValue> for $NATIVE {
4788 type Error = DataFusionError;
4789
4790 fn try_from(value: ScalarValue) -> Result<Self> {
4791 match value {
4792 ScalarValue::$SCALAR(Some(inner_value)) => Ok(inner_value),
4793 _ => _internal_err!(
4794 "Cannot convert {:?} to {}",
4795 value,
4796 std::any::type_name::<Self>()
4797 ),
4798 }
4799 }
4800 }
4801 };
4802}
4803
4804impl_try_from!(Int8, i8);
4805impl_try_from!(Int16, i16);
4806
4807impl TryFrom<ScalarValue> for i32 {
4809 type Error = DataFusionError;
4810
4811 fn try_from(value: ScalarValue) -> Result<Self> {
4812 match value {
4813 ScalarValue::Int32(Some(inner_value))
4814 | ScalarValue::Date32(Some(inner_value))
4815 | ScalarValue::Time32Second(Some(inner_value))
4816 | ScalarValue::Time32Millisecond(Some(inner_value)) => Ok(inner_value),
4817 _ => _internal_err!(
4818 "Cannot convert {:?} to {}",
4819 value,
4820 std::any::type_name::<Self>()
4821 ),
4822 }
4823 }
4824}
4825
4826impl TryFrom<ScalarValue> for i64 {
4828 type Error = DataFusionError;
4829
4830 fn try_from(value: ScalarValue) -> Result<Self> {
4831 match value {
4832 ScalarValue::Int64(Some(inner_value))
4833 | ScalarValue::Date64(Some(inner_value))
4834 | ScalarValue::Time64Microsecond(Some(inner_value))
4835 | ScalarValue::Time64Nanosecond(Some(inner_value))
4836 | ScalarValue::TimestampNanosecond(Some(inner_value), _)
4837 | ScalarValue::TimestampMicrosecond(Some(inner_value), _)
4838 | ScalarValue::TimestampMillisecond(Some(inner_value), _)
4839 | ScalarValue::TimestampSecond(Some(inner_value), _) => Ok(inner_value),
4840 _ => _internal_err!(
4841 "Cannot convert {:?} to {}",
4842 value,
4843 std::any::type_name::<Self>()
4844 ),
4845 }
4846 }
4847}
4848
4849impl TryFrom<ScalarValue> for i128 {
4851 type Error = DataFusionError;
4852
4853 fn try_from(value: ScalarValue) -> Result<Self> {
4854 match value {
4855 ScalarValue::Decimal128(Some(inner_value), _, _) => Ok(inner_value),
4856 _ => _internal_err!(
4857 "Cannot convert {:?} to {}",
4858 value,
4859 std::any::type_name::<Self>()
4860 ),
4861 }
4862 }
4863}
4864
4865impl TryFrom<ScalarValue> for i256 {
4867 type Error = DataFusionError;
4868
4869 fn try_from(value: ScalarValue) -> Result<Self> {
4870 match value {
4871 ScalarValue::Decimal256(Some(inner_value), _, _) => Ok(inner_value),
4872 _ => _internal_err!(
4873 "Cannot convert {:?} to {}",
4874 value,
4875 std::any::type_name::<Self>()
4876 ),
4877 }
4878 }
4879}
4880
4881impl_try_from!(UInt8, u8);
4882impl_try_from!(UInt16, u16);
4883impl_try_from!(UInt32, u32);
4884impl_try_from!(UInt64, u64);
4885impl_try_from!(Float16, f16);
4886impl_try_from!(Float32, f32);
4887impl_try_from!(Float64, f64);
4888impl_try_from!(Boolean, bool);
4889
4890impl TryFrom<DataType> for ScalarValue {
4891 type Error = DataFusionError;
4892
4893 fn try_from(datatype: DataType) -> Result<Self> {
4895 (&datatype).try_into()
4896 }
4897}
4898
4899impl TryFrom<&DataType> for ScalarValue {
4900 type Error = DataFusionError;
4901
4902 fn try_from(data_type: &DataType) -> Result<Self> {
4904 Self::try_new_null(data_type)
4905 }
4906}
4907
4908macro_rules! format_option {
4909 ($F:expr, $EXPR:expr) => {{
4910 match $EXPR {
4911 Some(e) => write!($F, "{e}"),
4912 None => write!($F, "NULL"),
4913 }
4914 }};
4915}
4916
4917impl fmt::Display for ScalarValue {
4923 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
4924 match self {
4925 ScalarValue::Decimal32(v, p, s) => {
4926 write!(f, "{v:?},{p:?},{s:?}")?;
4927 }
4928 ScalarValue::Decimal64(v, p, s) => {
4929 write!(f, "{v:?},{p:?},{s:?}")?;
4930 }
4931 ScalarValue::Decimal128(v, p, s) => {
4932 write!(f, "{v:?},{p:?},{s:?}")?;
4933 }
4934 ScalarValue::Decimal256(v, p, s) => {
4935 write!(f, "{v:?},{p:?},{s:?}")?;
4936 }
4937 ScalarValue::Boolean(e) => format_option!(f, e)?,
4938 ScalarValue::Float16(e) => format_option!(f, e)?,
4939 ScalarValue::Float32(e) => format_option!(f, e)?,
4940 ScalarValue::Float64(e) => format_option!(f, e)?,
4941 ScalarValue::Int8(e) => format_option!(f, e)?,
4942 ScalarValue::Int16(e) => format_option!(f, e)?,
4943 ScalarValue::Int32(e) => format_option!(f, e)?,
4944 ScalarValue::Int64(e) => format_option!(f, e)?,
4945 ScalarValue::UInt8(e) => format_option!(f, e)?,
4946 ScalarValue::UInt16(e) => format_option!(f, e)?,
4947 ScalarValue::UInt32(e) => format_option!(f, e)?,
4948 ScalarValue::UInt64(e) => format_option!(f, e)?,
4949 ScalarValue::TimestampSecond(e, _) => format_option!(f, e)?,
4950 ScalarValue::TimestampMillisecond(e, _) => format_option!(f, e)?,
4951 ScalarValue::TimestampMicrosecond(e, _) => format_option!(f, e)?,
4952 ScalarValue::TimestampNanosecond(e, _) => format_option!(f, e)?,
4953 ScalarValue::Utf8(e)
4954 | ScalarValue::LargeUtf8(e)
4955 | ScalarValue::Utf8View(e) => format_option!(f, e)?,
4956 ScalarValue::Binary(e)
4957 | ScalarValue::FixedSizeBinary(_, e)
4958 | ScalarValue::LargeBinary(e)
4959 | ScalarValue::BinaryView(e) => match e {
4960 Some(bytes) => {
4961 for b in bytes.iter().take(10) {
4963 write!(f, "{b:02X}")?;
4964 }
4965 if bytes.len() > 10 {
4966 write!(f, "...")?;
4967 }
4968 }
4969 None => write!(f, "NULL")?,
4970 },
4971 ScalarValue::List(arr) => fmt_list(arr.as_ref(), f)?,
4972 ScalarValue::LargeList(arr) => fmt_list(arr.as_ref(), f)?,
4973 ScalarValue::FixedSizeList(arr) => fmt_list(arr.as_ref(), f)?,
4974 ScalarValue::Date32(e) => format_option!(
4975 f,
4976 e.map(|v| {
4977 let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
4978 match epoch.checked_add_signed(Duration::try_days(v as i64).unwrap())
4979 {
4980 Some(date) => date.to_string(),
4981 None => "".to_string(),
4982 }
4983 })
4984 )?,
4985 ScalarValue::Date64(e) => format_option!(
4986 f,
4987 e.map(|v| {
4988 let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
4989 match epoch.checked_add_signed(Duration::try_milliseconds(v).unwrap())
4990 {
4991 Some(date) => date.to_string(),
4992 None => "".to_string(),
4993 }
4994 })
4995 )?,
4996 ScalarValue::Time32Second(e) => format_option!(f, e)?,
4997 ScalarValue::Time32Millisecond(e) => format_option!(f, e)?,
4998 ScalarValue::Time64Microsecond(e) => format_option!(f, e)?,
4999 ScalarValue::Time64Nanosecond(e) => format_option!(f, e)?,
5000 ScalarValue::IntervalYearMonth(e) => format_option!(f, e)?,
5001 ScalarValue::IntervalMonthDayNano(e) => {
5002 format_option!(f, e.map(|v| format!("{v:?}")))?
5003 }
5004 ScalarValue::IntervalDayTime(e) => {
5005 format_option!(f, e.map(|v| format!("{v:?}")))?;
5006 }
5007 ScalarValue::DurationSecond(e) => format_option!(f, e)?,
5008 ScalarValue::DurationMillisecond(e) => format_option!(f, e)?,
5009 ScalarValue::DurationMicrosecond(e) => format_option!(f, e)?,
5010 ScalarValue::DurationNanosecond(e) => format_option!(f, e)?,
5011 ScalarValue::Struct(struct_arr) => {
5012 assert_eq!(struct_arr.len(), 1);
5014
5015 if struct_arr.null_count() == struct_arr.len() {
5016 write!(f, "NULL")?;
5017 return Ok(());
5018 }
5019
5020 let columns = struct_arr.columns();
5021 let fields = struct_arr.fields();
5022 let nulls = struct_arr.nulls();
5023
5024 write!(
5025 f,
5026 "{{{}}}",
5027 columns
5028 .iter()
5029 .zip(fields.iter())
5030 .map(|(column, field)| {
5031 if nulls.is_some_and(|b| b.is_null(0)) {
5032 format!("{}:NULL", field.name())
5033 } else if let DataType::Struct(_) = field.data_type() {
5034 let sv = ScalarValue::Struct(Arc::new(
5035 column.as_struct().to_owned(),
5036 ));
5037 format!("{}:{sv}", field.name())
5038 } else {
5039 let sv = array_value_to_string(column, 0).unwrap();
5040 format!("{}:{sv}", field.name())
5041 }
5042 })
5043 .collect::<Vec<_>>()
5044 .join(",")
5045 )?
5046 }
5047 ScalarValue::Map(map_arr) => {
5048 if map_arr.null_count() == map_arr.len() {
5049 write!(f, "NULL")?;
5050 return Ok(());
5051 }
5052
5053 write!(
5054 f,
5055 "[{}]",
5056 map_arr
5057 .iter()
5058 .map(|struct_array| {
5059 if let Some(arr) = struct_array {
5060 let mut buffer = VecDeque::new();
5061 for i in 0..arr.len() {
5062 let key =
5063 array_value_to_string(arr.column(0), i).unwrap();
5064 let value =
5065 array_value_to_string(arr.column(1), i).unwrap();
5066 buffer.push_back(format!("{key}:{value}"));
5067 }
5068 format!(
5069 "{{{}}}",
5070 buffer
5071 .into_iter()
5072 .collect::<Vec<_>>()
5073 .join(",")
5074 .as_str()
5075 )
5076 } else {
5077 "NULL".to_string()
5078 }
5079 })
5080 .collect::<Vec<_>>()
5081 .join(",")
5082 )?
5083 }
5084 ScalarValue::Union(val, _fields, _mode) => match val {
5085 Some((id, val)) => write!(f, "{id}:{val}")?,
5086 None => write!(f, "NULL")?,
5087 },
5088 ScalarValue::Dictionary(_k, v) => write!(f, "{v}")?,
5089 ScalarValue::RunEndEncoded(_, _, v) => write!(f, "{v}")?,
5090 ScalarValue::Null => write!(f, "NULL")?,
5091 };
5092 Ok(())
5093 }
5094}
5095
5096fn fmt_list(arr: &dyn Array, f: &mut fmt::Formatter) -> fmt::Result {
5097 assert_eq!(arr.len(), 1);
5099 let options = FormatOptions::default().with_display_error(true);
5100 let formatter = ArrayFormatter::try_new(arr, &options).unwrap();
5101 let value_formatter = formatter.value(0);
5102 write!(f, "{value_formatter}")
5103}
5104
5105fn fmt_binary(data: &[u8], f: &mut fmt::Formatter) -> fmt::Result {
5107 let mut iter = data.iter();
5108 if let Some(b) = iter.next() {
5109 write!(f, "{b}")?;
5110 }
5111 for b in iter {
5112 write!(f, ",{b}")?;
5113 }
5114 Ok(())
5115}
5116
5117impl fmt::Debug for ScalarValue {
5118 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
5119 match self {
5120 ScalarValue::Decimal32(_, _, _) => write!(f, "Decimal32({self})"),
5121 ScalarValue::Decimal64(_, _, _) => write!(f, "Decimal64({self})"),
5122 ScalarValue::Decimal128(_, _, _) => write!(f, "Decimal128({self})"),
5123 ScalarValue::Decimal256(_, _, _) => write!(f, "Decimal256({self})"),
5124 ScalarValue::Boolean(_) => write!(f, "Boolean({self})"),
5125 ScalarValue::Float16(_) => write!(f, "Float16({self})"),
5126 ScalarValue::Float32(_) => write!(f, "Float32({self})"),
5127 ScalarValue::Float64(_) => write!(f, "Float64({self})"),
5128 ScalarValue::Int8(_) => write!(f, "Int8({self})"),
5129 ScalarValue::Int16(_) => write!(f, "Int16({self})"),
5130 ScalarValue::Int32(_) => write!(f, "Int32({self})"),
5131 ScalarValue::Int64(_) => write!(f, "Int64({self})"),
5132 ScalarValue::UInt8(_) => write!(f, "UInt8({self})"),
5133 ScalarValue::UInt16(_) => write!(f, "UInt16({self})"),
5134 ScalarValue::UInt32(_) => write!(f, "UInt32({self})"),
5135 ScalarValue::UInt64(_) => write!(f, "UInt64({self})"),
5136 ScalarValue::TimestampSecond(_, tz_opt) => {
5137 write!(f, "TimestampSecond({self}, {tz_opt:?})")
5138 }
5139 ScalarValue::TimestampMillisecond(_, tz_opt) => {
5140 write!(f, "TimestampMillisecond({self}, {tz_opt:?})")
5141 }
5142 ScalarValue::TimestampMicrosecond(_, tz_opt) => {
5143 write!(f, "TimestampMicrosecond({self}, {tz_opt:?})")
5144 }
5145 ScalarValue::TimestampNanosecond(_, tz_opt) => {
5146 write!(f, "TimestampNanosecond({self}, {tz_opt:?})")
5147 }
5148 ScalarValue::Utf8(None) => write!(f, "Utf8({self})"),
5149 ScalarValue::Utf8(Some(_)) => write!(f, "Utf8(\"{self}\")"),
5150 ScalarValue::Utf8View(None) => write!(f, "Utf8View({self})"),
5151 ScalarValue::Utf8View(Some(_)) => write!(f, "Utf8View(\"{self}\")"),
5152 ScalarValue::LargeUtf8(None) => write!(f, "LargeUtf8({self})"),
5153 ScalarValue::LargeUtf8(Some(_)) => write!(f, "LargeUtf8(\"{self}\")"),
5154 ScalarValue::Binary(None) => write!(f, "Binary({self})"),
5155 ScalarValue::Binary(Some(b)) => {
5156 write!(f, "Binary(\"")?;
5157 fmt_binary(b.as_slice(), f)?;
5158 write!(f, "\")")
5159 }
5160 ScalarValue::BinaryView(None) => write!(f, "BinaryView({self})"),
5161 ScalarValue::BinaryView(Some(b)) => {
5162 write!(f, "BinaryView(\"")?;
5163 fmt_binary(b.as_slice(), f)?;
5164 write!(f, "\")")
5165 }
5166 ScalarValue::FixedSizeBinary(size, None) => {
5167 write!(f, "FixedSizeBinary({size}, {self})")
5168 }
5169 ScalarValue::FixedSizeBinary(size, Some(b)) => {
5170 write!(f, "FixedSizeBinary({size}, \"")?;
5171 fmt_binary(b.as_slice(), f)?;
5172 write!(f, "\")")
5173 }
5174 ScalarValue::LargeBinary(None) => write!(f, "LargeBinary({self})"),
5175 ScalarValue::LargeBinary(Some(b)) => {
5176 write!(f, "LargeBinary(\"")?;
5177 fmt_binary(b.as_slice(), f)?;
5178 write!(f, "\")")
5179 }
5180 ScalarValue::FixedSizeList(_) => write!(f, "FixedSizeList({self})"),
5181 ScalarValue::List(_) => write!(f, "List({self})"),
5182 ScalarValue::LargeList(_) => write!(f, "LargeList({self})"),
5183 ScalarValue::Struct(struct_arr) => {
5184 assert_eq!(struct_arr.len(), 1);
5186
5187 let columns = struct_arr.columns();
5188 let fields = struct_arr.fields();
5189
5190 write!(
5191 f,
5192 "Struct({{{}}})",
5193 columns
5194 .iter()
5195 .zip(fields.iter())
5196 .map(|(column, field)| {
5197 let sv = array_value_to_string(column, 0).unwrap();
5198 let name = field.name();
5199 format!("{name}:{sv}")
5200 })
5201 .collect::<Vec<_>>()
5202 .join(",")
5203 )
5204 }
5205 ScalarValue::Map(map_arr) => {
5206 write!(
5207 f,
5208 "Map([{}])",
5209 map_arr
5210 .iter()
5211 .map(|struct_array| {
5212 if let Some(arr) = struct_array {
5213 let buffer: Vec<String> = (0..arr.len())
5214 .map(|i| {
5215 let key = array_value_to_string(arr.column(0), i)
5216 .unwrap();
5217 let value =
5218 array_value_to_string(arr.column(1), i)
5219 .unwrap();
5220 format!("{key:?}:{value:?}")
5221 })
5222 .collect();
5223 format!("{{{}}}", buffer.join(","))
5224 } else {
5225 "NULL".to_string()
5226 }
5227 })
5228 .collect::<Vec<_>>()
5229 .join(",")
5230 )
5231 }
5232 ScalarValue::Date32(_) => write!(f, "Date32(\"{self}\")"),
5233 ScalarValue::Date64(_) => write!(f, "Date64(\"{self}\")"),
5234 ScalarValue::Time32Second(_) => write!(f, "Time32Second(\"{self}\")"),
5235 ScalarValue::Time32Millisecond(_) => {
5236 write!(f, "Time32Millisecond(\"{self}\")")
5237 }
5238 ScalarValue::Time64Microsecond(_) => {
5239 write!(f, "Time64Microsecond(\"{self}\")")
5240 }
5241 ScalarValue::Time64Nanosecond(_) => {
5242 write!(f, "Time64Nanosecond(\"{self}\")")
5243 }
5244 ScalarValue::IntervalDayTime(_) => {
5245 write!(f, "IntervalDayTime(\"{self}\")")
5246 }
5247 ScalarValue::IntervalYearMonth(_) => {
5248 write!(f, "IntervalYearMonth(\"{self}\")")
5249 }
5250 ScalarValue::IntervalMonthDayNano(_) => {
5251 write!(f, "IntervalMonthDayNano(\"{self}\")")
5252 }
5253 ScalarValue::DurationSecond(_) => write!(f, "DurationSecond(\"{self}\")"),
5254 ScalarValue::DurationMillisecond(_) => {
5255 write!(f, "DurationMillisecond(\"{self}\")")
5256 }
5257 ScalarValue::DurationMicrosecond(_) => {
5258 write!(f, "DurationMicrosecond(\"{self}\")")
5259 }
5260 ScalarValue::DurationNanosecond(_) => {
5261 write!(f, "DurationNanosecond(\"{self}\")")
5262 }
5263 ScalarValue::Union(val, _fields, _mode) => match val {
5264 Some((id, val)) => write!(f, "Union {id}:{val}"),
5265 None => write!(f, "Union(NULL)"),
5266 },
5267 ScalarValue::Dictionary(k, v) => write!(f, "Dictionary({k:?}, {v:?})"),
5268 ScalarValue::RunEndEncoded(rf, vf, v) => {
5269 write!(f, "RunEndEncoded({rf:?}, {vf:?}, {v:?})")
5270 }
5271 ScalarValue::Null => write!(f, "NULL"),
5272 }
5273 }
5274}
5275
5276pub trait ScalarType<T: ArrowNativeType> {
5278 fn scalar(r: Option<T>) -> ScalarValue;
5280}
5281
5282impl ScalarType<f32> for Float32Type {
5283 fn scalar(r: Option<f32>) -> ScalarValue {
5284 ScalarValue::Float32(r)
5285 }
5286}
5287
5288impl ScalarType<i64> for TimestampSecondType {
5289 fn scalar(r: Option<i64>) -> ScalarValue {
5290 ScalarValue::TimestampSecond(r, None)
5291 }
5292}
5293
5294impl ScalarType<i64> for TimestampMillisecondType {
5295 fn scalar(r: Option<i64>) -> ScalarValue {
5296 ScalarValue::TimestampMillisecond(r, None)
5297 }
5298}
5299
5300impl ScalarType<i64> for TimestampMicrosecondType {
5301 fn scalar(r: Option<i64>) -> ScalarValue {
5302 ScalarValue::TimestampMicrosecond(r, None)
5303 }
5304}
5305
5306impl ScalarType<i64> for TimestampNanosecondType {
5307 fn scalar(r: Option<i64>) -> ScalarValue {
5308 ScalarValue::TimestampNanosecond(r, None)
5309 }
5310}
5311
5312impl ScalarType<i32> for Date32Type {
5313 fn scalar(r: Option<i32>) -> ScalarValue {
5314 ScalarValue::Date32(r)
5315 }
5316}
5317
5318#[cfg(test)]
5319mod tests {
5320 use std::sync::Arc;
5321
5322 use super::*;
5323 use crate::cast::{as_list_array, as_map_array, as_struct_array};
5324 use crate::test_util::batches_to_string;
5325 use arrow::array::{
5326 FixedSizeListBuilder, Int32Builder, LargeListBuilder, ListBuilder, MapBuilder,
5327 NullArray, NullBufferBuilder, OffsetSizeTrait, PrimitiveBuilder, RecordBatch,
5328 StringBuilder, StringDictionaryBuilder, StructBuilder, UnionBuilder,
5329 };
5330 use arrow::buffer::{Buffer, NullBuffer, OffsetBuffer};
5331 use arrow::compute::{is_null, kernels};
5332 use arrow::datatypes::{
5333 ArrowNumericType, DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION, Fields,
5334 Float64Type, TimeUnit,
5335 };
5336 use arrow::error::ArrowError;
5337 use arrow::util::pretty::pretty_format_columns;
5338 use chrono::NaiveDate;
5339 use insta::assert_snapshot;
5340 use rand::Rng;
5341
5342 #[test]
5343 fn test_scalar_value_from_for_map() {
5344 let string_builder = StringBuilder::new();
5345 let int_builder = Int32Builder::with_capacity(4);
5346 let mut builder = MapBuilder::new(None, string_builder, int_builder);
5347 builder.keys().append_value("joe");
5348 builder.values().append_value(1);
5349 builder.append(true).unwrap();
5350
5351 builder.keys().append_value("blogs");
5352 builder.values().append_value(2);
5353 builder.keys().append_value("foo");
5354 builder.values().append_value(4);
5355 builder.append(true).unwrap();
5356 builder.append(true).unwrap();
5357 builder.append(false).unwrap();
5358
5359 let expected = builder.finish();
5360
5361 let sv = ScalarValue::Map(Arc::new(expected.clone()));
5362 let map_arr = sv.to_array().unwrap();
5363 let actual = as_map_array(&map_arr).unwrap();
5364 assert_eq!(actual, &expected);
5365 }
5366
5367 #[test]
5368 fn test_format_timestamp_type_for_error_and_bounds() {
5369 let ts_ns = format_timestamp_type_for_error(&DataType::Timestamp(
5371 TimeUnit::Nanosecond,
5372 None,
5373 ));
5374 assert_eq!(ts_ns, "Timestamp(ns)");
5375
5376 let ts_us = format_timestamp_type_for_error(&DataType::Timestamp(
5377 TimeUnit::Microsecond,
5378 None,
5379 ));
5380 assert_eq!(ts_us, "Timestamp(us)");
5381
5382 let ok = ensure_timestamp_in_bounds(
5384 1000,
5385 NANOS_PER_DAY,
5386 &DataType::Date32,
5387 &DataType::Timestamp(TimeUnit::Nanosecond, None),
5388 );
5389 assert!(ok.is_ok());
5390
5391 let err = ensure_timestamp_in_bounds(
5393 2932896,
5394 NANOS_PER_DAY,
5395 &DataType::Date32,
5396 &DataType::Timestamp(TimeUnit::Nanosecond, None),
5397 );
5398 assert!(err.is_err());
5399 let msg = err.unwrap_err().to_string();
5400 assert!(msg.contains("Cannot cast Date32 value 2932896 to Timestamp(ns): converted value exceeds the representable i64 range"));
5401
5402 let overflow_millis: i64 = (i64::MAX / NANOS_PER_MILLISECOND) + 1;
5404 let err2 = ensure_timestamp_in_bounds(
5405 overflow_millis,
5406 NANOS_PER_MILLISECOND,
5407 &DataType::Date64,
5408 &DataType::Timestamp(TimeUnit::Nanosecond, None),
5409 );
5410 assert!(err2.is_err());
5411 }
5412
5413 #[test]
5414 fn test_scalar_value_from_for_struct() {
5415 let boolean = Arc::new(BooleanArray::from(vec![false]));
5416 let int = Arc::new(Int32Array::from(vec![42]));
5417
5418 let expected = StructArray::from(vec![
5419 (
5420 Arc::new(Field::new("b", DataType::Boolean, false)),
5421 Arc::clone(&boolean) as ArrayRef,
5422 ),
5423 (
5424 Arc::new(Field::new("c", DataType::Int32, false)),
5425 Arc::clone(&int) as ArrayRef,
5426 ),
5427 ]);
5428
5429 let sv = ScalarStructBuilder::new()
5430 .with_array(Field::new("b", DataType::Boolean, false), boolean)
5431 .with_array(Field::new("c", DataType::Int32, false), int)
5432 .build()
5433 .unwrap();
5434
5435 let struct_arr = sv.to_array().unwrap();
5436 let actual = as_struct_array(&struct_arr).unwrap();
5437 assert_eq!(actual, &expected);
5438 }
5439
5440 #[test]
5441 #[should_panic(
5442 expected = "InvalidArgumentError(\"Incorrect array length for StructArray field \\\"bool\\\", expected 1 got 4\")"
5443 )]
5444 fn test_scalar_value_from_for_struct_should_panic() {
5445 let _ = ScalarStructBuilder::new()
5446 .with_array(
5447 Field::new("bool", DataType::Boolean, false),
5448 Arc::new(BooleanArray::from(vec![false, true, false, false])),
5449 )
5450 .with_array(
5451 Field::new("i32", DataType::Int32, false),
5452 Arc::new(Int32Array::from(vec![42, 28, 19, 31])),
5453 )
5454 .build()
5455 .unwrap();
5456 }
5457
5458 #[test]
5459 fn test_to_array_of_size_for_nested() {
5460 let boolean = Arc::new(BooleanArray::from(vec![false, false, true, true]));
5462 let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
5463
5464 let struct_array = StructArray::from(vec![
5465 (
5466 Arc::new(Field::new("b", DataType::Boolean, false)),
5467 Arc::clone(&boolean) as ArrayRef,
5468 ),
5469 (
5470 Arc::new(Field::new("c", DataType::Int32, false)),
5471 Arc::clone(&int) as ArrayRef,
5472 ),
5473 ]);
5474 let sv = ScalarValue::Struct(Arc::new(struct_array));
5475 let actual_arr = sv.to_array_of_size(2).unwrap();
5476
5477 let boolean = Arc::new(BooleanArray::from(vec![
5478 false, false, true, true, false, false, true, true,
5479 ]));
5480 let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31, 42, 28, 19, 31]));
5481
5482 let struct_array = StructArray::from(vec![
5483 (
5484 Arc::new(Field::new("b", DataType::Boolean, false)),
5485 Arc::clone(&boolean) as ArrayRef,
5486 ),
5487 (
5488 Arc::new(Field::new("c", DataType::Int32, false)),
5489 Arc::clone(&int) as ArrayRef,
5490 ),
5491 ]);
5492
5493 let actual = as_struct_array(&actual_arr).unwrap();
5494 assert_eq!(actual, &struct_array);
5495
5496 let arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
5498 Some(1),
5499 None,
5500 Some(2),
5501 ])]);
5502
5503 let sv = ScalarValue::List(Arc::new(arr));
5504 let actual_arr = sv
5505 .to_array_of_size(2)
5506 .expect("Failed to convert to array of size");
5507 let actual_list_arr = actual_arr.as_list::<i32>();
5508
5509 let arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
5510 Some(vec![Some(1), None, Some(2)]),
5511 Some(vec![Some(1), None, Some(2)]),
5512 ]);
5513
5514 assert_eq!(&arr, actual_list_arr);
5515 }
5516
5517 #[test]
5518 fn test_to_array_of_size_for_fsl() {
5519 let values = Int32Array::from_iter([Some(1), None, Some(2)]);
5520 let field = Arc::new(Field::new_list_field(DataType::Int32, true));
5521 let arr = FixedSizeListArray::new(Arc::clone(&field), 3, Arc::new(values), None);
5522 let sv = ScalarValue::FixedSizeList(Arc::new(arr));
5523 let actual_arr = sv
5524 .to_array_of_size(2)
5525 .expect("Failed to convert to array of size");
5526
5527 let expected_values =
5528 Int32Array::from_iter([Some(1), None, Some(2), Some(1), None, Some(2)]);
5529 let expected_arr =
5530 FixedSizeListArray::new(field, 3, Arc::new(expected_values), None);
5531
5532 assert_eq!(
5533 &expected_arr,
5534 as_fixed_size_list_array(actual_arr.as_ref()).unwrap()
5535 );
5536
5537 let empty_array = sv
5538 .to_array_of_size(0)
5539 .expect("Failed to convert to empty array");
5540
5541 assert_eq!(empty_array.len(), 0);
5542 }
5543
5544 #[test]
5545 fn test_to_array_of_size_list_size_one() {
5546 let arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
5548 Some(10),
5549 Some(20),
5550 ])]);
5551 let sv = ScalarValue::List(Arc::new(arr.clone()));
5552 let result = sv.to_array_of_size(1).unwrap();
5553 assert_eq!(result.as_list::<i32>(), &arr);
5554 }
5555
5556 #[test]
5557 fn test_to_array_of_size_list_empty_inner() {
5558 let arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![])]);
5560 let sv = ScalarValue::List(Arc::new(arr));
5561 let result = sv.to_array_of_size(3).unwrap();
5562 let result_list = result.as_list::<i32>();
5563 assert_eq!(result_list.len(), 3);
5564 for i in 0..3 {
5565 assert_eq!(result_list.value(i).len(), 0);
5566 }
5567 }
5568
5569 #[test]
5570 fn test_to_array_of_size_large_list() {
5571 let arr =
5572 LargeListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
5573 Some(100),
5574 Some(200),
5575 ])]);
5576 let sv = ScalarValue::LargeList(Arc::new(arr));
5577 let result = sv.to_array_of_size(3).unwrap();
5578 let expected = LargeListArray::from_iter_primitive::<Int32Type, _, _>(vec![
5579 Some(vec![Some(100), Some(200)]),
5580 Some(vec![Some(100), Some(200)]),
5581 Some(vec![Some(100), Some(200)]),
5582 ]);
5583 assert_eq!(result.as_list::<i64>(), &expected);
5584 }
5585
5586 #[test]
5587 fn test_list_to_array_of_size_multi_row() {
5588 let arr = Int32Array::from(vec![Some(10), None, Some(30)]);
5590 let result = ScalarValue::list_to_array_of_size(&arr, 3).unwrap();
5591 let result = result.as_primitive::<Int32Type>();
5592 assert_eq!(
5593 result.iter().collect::<Vec<_>>(),
5594 vec![
5595 Some(10),
5596 None,
5597 Some(30),
5598 Some(10),
5599 None,
5600 Some(30),
5601 Some(10),
5602 None,
5603 Some(30),
5604 ]
5605 );
5606 }
5607
5608 #[test]
5609 fn test_to_array_of_size_null_list() {
5610 let dt = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
5611 let sv = ScalarValue::try_from(&dt).unwrap();
5612 let result = sv.to_array_of_size(3).unwrap();
5613 assert_eq!(result.len(), 3);
5614 assert_eq!(result.null_count(), 3);
5615 }
5616
5617 #[test]
5619 fn test_to_array_of_size_for_none_fsb() {
5620 let sv = ScalarValue::FixedSizeBinary(5, None);
5621 let result = sv
5622 .to_array_of_size(2)
5623 .expect("Failed to convert to array of size");
5624 assert_eq!(result.len(), 2);
5625 assert_eq!(result.null_count(), 2);
5626 assert_eq!(result.as_fixed_size_binary().values().len(), 10);
5627 }
5628
5629 #[test]
5630 fn test_list_to_array_string() {
5631 let scalars = vec![
5632 ScalarValue::from("rust"),
5633 ScalarValue::from("arrow"),
5634 ScalarValue::from("data-fusion"),
5635 ];
5636
5637 let result = ScalarValue::new_list_nullable(scalars.as_slice(), &DataType::Utf8);
5638
5639 let expected = single_row_list_array(vec!["rust", "arrow", "data-fusion"]);
5640 assert_eq!(*result, expected);
5641 }
5642
5643 fn single_row_list_array(items: Vec<&str>) -> ListArray {
5644 SingleRowListArrayBuilder::new(Arc::new(StringArray::from(items)))
5645 .build_list_array()
5646 }
5647
5648 fn build_list<O: OffsetSizeTrait>(
5649 values: Vec<Option<Vec<Option<i64>>>>,
5650 ) -> Vec<ScalarValue> {
5651 values
5652 .into_iter()
5653 .map(|v| {
5654 let arr = if v.is_some() {
5655 Arc::new(
5656 GenericListArray::<O>::from_iter_primitive::<Int64Type, _, _>(
5657 vec![v],
5658 ),
5659 )
5660 } else if O::IS_LARGE {
5661 new_null_array(
5662 &DataType::LargeList(Arc::new(Field::new_list_field(
5663 DataType::Int64,
5664 true,
5665 ))),
5666 1,
5667 )
5668 } else {
5669 new_null_array(
5670 &DataType::List(Arc::new(Field::new_list_field(
5671 DataType::Int64,
5672 true,
5673 ))),
5674 1,
5675 )
5676 };
5677
5678 if O::IS_LARGE {
5679 ScalarValue::LargeList(arr.as_list::<i64>().to_owned().into())
5680 } else {
5681 ScalarValue::List(arr.as_list::<i32>().to_owned().into())
5682 }
5683 })
5684 .collect()
5685 }
5686
5687 #[test]
5688 fn test_iter_to_array_fixed_size_list() {
5689 let field = Arc::new(Field::new_list_field(DataType::Int32, true));
5690 let f1 = Arc::new(FixedSizeListArray::new(
5691 Arc::clone(&field),
5692 3,
5693 Arc::new(Int32Array::from(vec![1, 2, 3])),
5694 None,
5695 ));
5696 let f2 = Arc::new(FixedSizeListArray::new(
5697 Arc::clone(&field),
5698 3,
5699 Arc::new(Int32Array::from(vec![4, 5, 6])),
5700 None,
5701 ));
5702 let f_nulls = Arc::new(FixedSizeListArray::new_null(field, 1, 1));
5703
5704 let scalars = vec![
5705 ScalarValue::FixedSizeList(Arc::clone(&f_nulls)),
5706 ScalarValue::FixedSizeList(f1),
5707 ScalarValue::FixedSizeList(f2),
5708 ScalarValue::FixedSizeList(f_nulls),
5709 ];
5710
5711 let array = ScalarValue::iter_to_array(scalars).unwrap();
5712
5713 let expected = FixedSizeListArray::from_iter_primitive::<Int32Type, _, _>(
5714 vec![
5715 None,
5716 Some(vec![Some(1), Some(2), Some(3)]),
5717 Some(vec![Some(4), Some(5), Some(6)]),
5718 None,
5719 ],
5720 3,
5721 );
5722 assert_eq!(array.as_ref(), &expected);
5723 }
5724
5725 #[test]
5726 fn test_iter_to_array_struct() {
5727 let s1 = StructArray::from(vec![
5728 (
5729 Arc::new(Field::new("A", DataType::Boolean, false)),
5730 Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
5731 ),
5732 (
5733 Arc::new(Field::new("B", DataType::Int32, false)),
5734 Arc::new(Int32Array::from(vec![42])) as ArrayRef,
5735 ),
5736 ]);
5737
5738 let s2 = StructArray::from(vec![
5739 (
5740 Arc::new(Field::new("A", DataType::Boolean, false)),
5741 Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
5742 ),
5743 (
5744 Arc::new(Field::new("B", DataType::Int32, false)),
5745 Arc::new(Int32Array::from(vec![42])) as ArrayRef,
5746 ),
5747 ]);
5748
5749 let scalars = vec![
5750 ScalarValue::Struct(Arc::new(s1)),
5751 ScalarValue::Struct(Arc::new(s2)),
5752 ];
5753
5754 let array = ScalarValue::iter_to_array(scalars).unwrap();
5755
5756 let expected = StructArray::from(vec![
5757 (
5758 Arc::new(Field::new("A", DataType::Boolean, false)),
5759 Arc::new(BooleanArray::from(vec![false, false])) as ArrayRef,
5760 ),
5761 (
5762 Arc::new(Field::new("B", DataType::Int32, false)),
5763 Arc::new(Int32Array::from(vec![42, 42])) as ArrayRef,
5764 ),
5765 ]);
5766 assert_eq!(array.as_ref(), &expected);
5767 }
5768
5769 #[test]
5770 fn test_iter_to_array_struct_with_nulls() {
5771 let s1 = StructArray::from((
5773 vec![
5774 (
5775 Arc::new(Field::new("A", DataType::Int32, false)),
5776 Arc::new(Int32Array::from(vec![1])) as ArrayRef,
5777 ),
5778 (
5779 Arc::new(Field::new("B", DataType::Int64, false)),
5780 Arc::new(Int64Array::from(vec![2])) as ArrayRef,
5781 ),
5782 ],
5783 Buffer::from(&[1]),
5785 ));
5786
5787 let s2 = StructArray::from((
5789 vec![
5790 (
5791 Arc::new(Field::new("A", DataType::Int32, false)),
5792 Arc::new(Int32Array::from(vec![3])) as ArrayRef,
5793 ),
5794 (
5795 Arc::new(Field::new("B", DataType::Int64, false)),
5796 Arc::new(Int64Array::from(vec![4])) as ArrayRef,
5797 ),
5798 ],
5799 Buffer::from(&[0]),
5800 ));
5801
5802 let scalars = vec![
5803 ScalarValue::Struct(Arc::new(s1)),
5804 ScalarValue::Struct(Arc::new(s2)),
5805 ];
5806
5807 let array = ScalarValue::iter_to_array(scalars).unwrap();
5808 let struct_array = array.as_struct();
5809 assert!(struct_array.is_valid(0));
5810 assert!(struct_array.is_null(1));
5811 }
5812
5813 #[test]
5814 fn iter_to_array_primitive_test() {
5815 let scalars = build_list::<i32>(vec![
5817 Some(vec![Some(1), Some(2), Some(3)]),
5818 None,
5819 Some(vec![Some(4), Some(5)]),
5820 ]);
5821
5822 let array = ScalarValue::iter_to_array(scalars).unwrap();
5823 let list_array = as_list_array(&array).unwrap();
5824 let expected = ListArray::from_iter_primitive::<Int64Type, _, _>(vec![
5826 Some(vec![Some(1), Some(2), Some(3)]),
5827 None,
5828 Some(vec![Some(4), Some(5)]),
5829 ]);
5830 assert_eq!(list_array, &expected);
5831
5832 let scalars = build_list::<i64>(vec![
5833 Some(vec![Some(1), Some(2), Some(3)]),
5834 None,
5835 Some(vec![Some(4), Some(5)]),
5836 ]);
5837
5838 let array = ScalarValue::iter_to_array(scalars).unwrap();
5839 let list_array = as_large_list_array(&array).unwrap();
5840 let expected = LargeListArray::from_iter_primitive::<Int64Type, _, _>(vec![
5841 Some(vec![Some(1), Some(2), Some(3)]),
5842 None,
5843 Some(vec![Some(4), Some(5)]),
5844 ]);
5845 assert_eq!(list_array, &expected);
5846 }
5847
5848 #[test]
5849 fn iter_to_array_string_test() {
5850 let arr1 = single_row_list_array(vec!["foo", "bar", "baz"]);
5851 let arr2 = single_row_list_array(vec!["rust", "world"]);
5852
5853 let scalars = vec![
5854 ScalarValue::List(Arc::new(arr1)),
5855 ScalarValue::List(Arc::new(arr2)),
5856 ];
5857
5858 let array = ScalarValue::iter_to_array(scalars).unwrap();
5859 let result = array.as_list::<i32>();
5860
5861 let string_builder = StringBuilder::with_capacity(5, 25);
5863 let mut list_of_string_builder = ListBuilder::new(string_builder);
5864
5865 list_of_string_builder.values().append_value("foo");
5866 list_of_string_builder.values().append_value("bar");
5867 list_of_string_builder.values().append_value("baz");
5868 list_of_string_builder.append(true);
5869
5870 list_of_string_builder.values().append_value("rust");
5871 list_of_string_builder.values().append_value("world");
5872 list_of_string_builder.append(true);
5873 let expected = list_of_string_builder.finish();
5874
5875 assert_eq!(result, &expected);
5876 }
5877
5878 #[test]
5879 fn test_list_scalar_eq_to_array() {
5880 let list_array: ArrayRef =
5881 Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
5882 Some(vec![Some(0), Some(1), Some(2)]),
5883 None,
5884 Some(vec![None, Some(5)]),
5885 ]));
5886
5887 let fsl_array: ArrayRef =
5888 Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
5889 Some(vec![Some(0), Some(1), Some(2)]),
5890 None,
5891 Some(vec![Some(3), None, Some(5)]),
5892 ]));
5893
5894 for arr in [list_array, fsl_array] {
5895 for i in 0..arr.len() {
5896 let scalar =
5897 ScalarValue::List(arr.slice(i, 1).as_list::<i32>().to_owned().into());
5898 assert!(scalar.eq_array(&arr, i).unwrap());
5899 }
5900 }
5901 }
5902
5903 #[test]
5904 fn test_eq_array_err_message() {
5905 assert_starts_with(
5906 ScalarValue::Utf8(Some("123".to_string()))
5907 .eq_array(&(Arc::new(Int32Array::from(vec![123])) as ArrayRef), 0)
5908 .unwrap_err()
5909 .message(),
5910 "could not cast array of type Int32 to arrow_array::array::byte_array::GenericByteArray<arrow_array::types::GenericStringType<i32>>",
5911 );
5912 }
5913
5914 #[test]
5915 fn scalar_add_trait_test() -> Result<()> {
5916 let float_value = ScalarValue::Float64(Some(123.));
5917 let float_value_2 = ScalarValue::Float64(Some(123.));
5918 assert_eq!(
5919 (float_value.add(&float_value_2))?,
5920 ScalarValue::Float64(Some(246.))
5921 );
5922 assert_eq!(
5923 (float_value.add(float_value_2))?,
5924 ScalarValue::Float64(Some(246.))
5925 );
5926 Ok(())
5927 }
5928
5929 #[test]
5930 fn scalar_sub_trait_test() -> Result<()> {
5931 let float_value = ScalarValue::Float64(Some(123.));
5932 let float_value_2 = ScalarValue::Float64(Some(123.));
5933 assert_eq!(
5934 float_value.sub(&float_value_2)?,
5935 ScalarValue::Float64(Some(0.))
5936 );
5937 assert_eq!(
5938 float_value.sub(float_value_2)?,
5939 ScalarValue::Float64(Some(0.))
5940 );
5941 Ok(())
5942 }
5943
5944 #[test]
5945 fn scalar_sub_trait_int32_test() -> Result<()> {
5946 let int_value = ScalarValue::Int32(Some(42));
5947 let int_value_2 = ScalarValue::Int32(Some(100));
5948 assert_eq!(int_value.sub(&int_value_2)?, ScalarValue::Int32(Some(-58)));
5949 assert_eq!(int_value_2.sub(int_value)?, ScalarValue::Int32(Some(58)));
5950 Ok(())
5951 }
5952
5953 #[test]
5954 fn scalar_sub_trait_int32_overflow_test() {
5955 let int_value = ScalarValue::Int32(Some(i32::MAX));
5956 let int_value_2 = ScalarValue::Int32(Some(i32::MIN));
5957 let err = int_value
5958 .sub_checked(&int_value_2)
5959 .unwrap_err()
5960 .strip_backtrace();
5961 assert_eq!(
5962 err,
5963 "Arrow error: Arithmetic overflow: Overflow happened on: 2147483647 - -2147483648"
5964 )
5965 }
5966
5967 #[test]
5968 fn scalar_sub_trait_int64_test() -> Result<()> {
5969 let int_value = ScalarValue::Int64(Some(42));
5970 let int_value_2 = ScalarValue::Int64(Some(100));
5971 assert_eq!(int_value.sub(&int_value_2)?, ScalarValue::Int64(Some(-58)));
5972 assert_eq!(int_value_2.sub(int_value)?, ScalarValue::Int64(Some(58)));
5973 Ok(())
5974 }
5975
5976 #[test]
5977 fn scalar_sub_trait_int64_overflow_test() {
5978 let int_value = ScalarValue::Int64(Some(i64::MAX));
5979 let int_value_2 = ScalarValue::Int64(Some(i64::MIN));
5980 let err = int_value
5981 .sub_checked(&int_value_2)
5982 .unwrap_err()
5983 .strip_backtrace();
5984 assert_eq!(
5985 err,
5986 "Arrow error: Arithmetic overflow: Overflow happened on: 9223372036854775807 - -9223372036854775808"
5987 )
5988 }
5989
5990 #[test]
5991 fn scalar_add_overflow_test() -> Result<()> {
5992 check_scalar_add_overflow::<Int8Type>(
5993 ScalarValue::Int8(Some(i8::MAX)),
5994 ScalarValue::Int8(Some(i8::MAX)),
5995 );
5996 check_scalar_add_overflow::<UInt8Type>(
5997 ScalarValue::UInt8(Some(u8::MAX)),
5998 ScalarValue::UInt8(Some(u8::MAX)),
5999 );
6000 check_scalar_add_overflow::<Int16Type>(
6001 ScalarValue::Int16(Some(i16::MAX)),
6002 ScalarValue::Int16(Some(i16::MAX)),
6003 );
6004 check_scalar_add_overflow::<UInt16Type>(
6005 ScalarValue::UInt16(Some(u16::MAX)),
6006 ScalarValue::UInt16(Some(u16::MAX)),
6007 );
6008 check_scalar_add_overflow::<Int32Type>(
6009 ScalarValue::Int32(Some(i32::MAX)),
6010 ScalarValue::Int32(Some(i32::MAX)),
6011 );
6012 check_scalar_add_overflow::<UInt32Type>(
6013 ScalarValue::UInt32(Some(u32::MAX)),
6014 ScalarValue::UInt32(Some(u32::MAX)),
6015 );
6016 check_scalar_add_overflow::<Int64Type>(
6017 ScalarValue::Int64(Some(i64::MAX)),
6018 ScalarValue::Int64(Some(i64::MAX)),
6019 );
6020 check_scalar_add_overflow::<UInt64Type>(
6021 ScalarValue::UInt64(Some(u64::MAX)),
6022 ScalarValue::UInt64(Some(u64::MAX)),
6023 );
6024
6025 Ok(())
6026 }
6027
6028 fn check_scalar_add_overflow<T>(left: ScalarValue, right: ScalarValue)
6030 where
6031 T: ArrowNumericType,
6032 {
6033 let scalar_result = left.add_checked(&right);
6034
6035 let left_array = left.to_array().expect("Failed to convert to array");
6036 let right_array = right.to_array().expect("Failed to convert to array");
6037 let arrow_left_array = left_array.as_primitive::<T>();
6038 let arrow_right_array = right_array.as_primitive::<T>();
6039 let arrow_result = add(arrow_left_array, arrow_right_array);
6040
6041 assert_eq!(scalar_result.is_ok(), arrow_result.is_ok());
6042 }
6043
6044 #[test]
6045 fn test_interval_add_timestamp() -> Result<()> {
6046 let interval = ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano {
6047 months: 1,
6048 days: 2,
6049 nanoseconds: 3,
6050 }));
6051 let timestamp = ScalarValue::TimestampNanosecond(Some(123), None);
6052 let result = interval.add(×tamp)?;
6053 let expect = timestamp.add(&interval)?;
6054 assert_eq!(result, expect);
6055
6056 let interval = ScalarValue::IntervalYearMonth(Some(123));
6057 let timestamp = ScalarValue::TimestampNanosecond(Some(123), None);
6058 let result = interval.add(×tamp)?;
6059 let expect = timestamp.add(&interval)?;
6060 assert_eq!(result, expect);
6061
6062 let interval = ScalarValue::IntervalDayTime(Some(IntervalDayTime {
6063 days: 1,
6064 milliseconds: 23,
6065 }));
6066 let timestamp = ScalarValue::TimestampNanosecond(Some(123), None);
6067 let result = interval.add(×tamp)?;
6068 let expect = timestamp.add(&interval)?;
6069 assert_eq!(result, expect);
6070 Ok(())
6071 }
6072
6073 #[test]
6074 fn test_try_cmp() {
6075 assert_eq!(
6076 ScalarValue::try_cmp(
6077 &ScalarValue::Int32(Some(1)),
6078 &ScalarValue::Int32(Some(2))
6079 )
6080 .unwrap(),
6081 Ordering::Less
6082 );
6083 assert_eq!(
6084 ScalarValue::try_cmp(&ScalarValue::Int32(None), &ScalarValue::Int32(Some(2)))
6085 .unwrap(),
6086 Ordering::Less
6087 );
6088 assert_starts_with(
6089 ScalarValue::try_cmp(
6090 &ScalarValue::Int32(Some(1)),
6091 &ScalarValue::Int64(Some(2)),
6092 )
6093 .unwrap_err()
6094 .message(),
6095 "Uncomparable values: Int32(1), Int64(2)",
6096 );
6097 }
6098
6099 #[test]
6100 fn scalar_decimal_test() -> Result<()> {
6101 let decimal_value = ScalarValue::Decimal128(Some(123), 10, 1);
6102 assert_eq!(DataType::Decimal128(10, 1), decimal_value.data_type());
6103 let try_into_value: i128 = decimal_value.clone().try_into().unwrap();
6104 assert_eq!(123_i128, try_into_value);
6105 assert!(!decimal_value.is_null());
6106 let neg_decimal_value = decimal_value.arithmetic_negate()?;
6107 match neg_decimal_value {
6108 ScalarValue::Decimal128(v, _, _) => {
6109 assert_eq!(-123, v.unwrap());
6110 }
6111 _ => {
6112 unreachable!();
6113 }
6114 }
6115
6116 let array = decimal_value
6118 .to_array()
6119 .expect("Failed to convert to array");
6120 let array = as_decimal128_array(&array)?;
6121 assert_eq!(1, array.len());
6122 assert_eq!(DataType::Decimal128(10, 1), array.data_type().clone());
6123 assert_eq!(123i128, array.value(0));
6124
6125 let array = decimal_value
6127 .to_array_of_size(10)
6128 .expect("Failed to convert to array of size");
6129 let array_decimal = as_decimal128_array(&array)?;
6130 assert_eq!(10, array.len());
6131 assert_eq!(DataType::Decimal128(10, 1), array.data_type().clone());
6132 assert_eq!(123i128, array_decimal.value(0));
6133 assert_eq!(123i128, array_decimal.value(9));
6134 assert!(
6136 decimal_value
6137 .eq_array(&array, 1)
6138 .expect("Failed to compare arrays")
6139 );
6140 assert!(
6141 decimal_value
6142 .eq_array(&array, 5)
6143 .expect("Failed to compare arrays")
6144 );
6145 assert_eq!(
6147 decimal_value,
6148 ScalarValue::try_from_array(&array, 5).unwrap()
6149 );
6150
6151 assert_eq!(
6152 decimal_value,
6153 ScalarValue::try_new_decimal128(123, 10, 1).unwrap()
6154 );
6155
6156 let left = ScalarValue::Decimal128(Some(123), 10, 2);
6158 let right = ScalarValue::Decimal128(Some(124), 10, 2);
6159 assert!(!left.eq(&right));
6160 let result = left < right;
6161 assert!(result);
6162 let result = left <= right;
6163 assert!(result);
6164 let right = ScalarValue::Decimal128(Some(124), 10, 3);
6165 let result = left.partial_cmp(&right);
6167 assert_eq!(None, result);
6168
6169 let decimal_vec = vec![
6170 ScalarValue::Decimal128(Some(1), 10, 2),
6171 ScalarValue::Decimal128(Some(2), 10, 2),
6172 ScalarValue::Decimal128(Some(3), 10, 2),
6173 ];
6174 let array = ScalarValue::iter_to_array(decimal_vec).unwrap();
6176 assert_eq!(3, array.len());
6177 assert_eq!(DataType::Decimal128(10, 2), array.data_type().clone());
6178
6179 let decimal_vec = vec![
6180 ScalarValue::Decimal128(Some(1), 10, 2),
6181 ScalarValue::Decimal128(Some(2), 10, 2),
6182 ScalarValue::Decimal128(Some(3), 10, 2),
6183 ScalarValue::Decimal128(None, 10, 2),
6184 ];
6185 let array = ScalarValue::iter_to_array(decimal_vec).unwrap();
6186 assert_eq!(4, array.len());
6187 assert_eq!(DataType::Decimal128(10, 2), array.data_type().clone());
6188
6189 assert!(
6190 ScalarValue::try_new_decimal128(1, 10, 2)
6191 .unwrap()
6192 .eq_array(&array, 0)
6193 .expect("Failed to compare arrays")
6194 );
6195 assert!(
6196 ScalarValue::try_new_decimal128(2, 10, 2)
6197 .unwrap()
6198 .eq_array(&array, 1)
6199 .expect("Failed to compare arrays")
6200 );
6201 assert!(
6202 ScalarValue::try_new_decimal128(3, 10, 2)
6203 .unwrap()
6204 .eq_array(&array, 2)
6205 .expect("Failed to compare arrays")
6206 );
6207 assert_eq!(
6208 ScalarValue::Decimal128(None, 10, 2),
6209 ScalarValue::try_from_array(&array, 3).unwrap()
6210 );
6211
6212 Ok(())
6213 }
6214
6215 #[test]
6216 fn test_new_one_decimal128() {
6217 assert_eq!(
6218 ScalarValue::new_one(&DataType::Decimal128(5, 0)).unwrap(),
6219 ScalarValue::Decimal128(Some(1), 5, 0)
6220 );
6221 assert_eq!(
6222 ScalarValue::new_one(&DataType::Decimal128(5, 1)).unwrap(),
6223 ScalarValue::Decimal128(Some(10), 5, 1)
6224 );
6225 assert_eq!(
6226 ScalarValue::new_one(&DataType::Decimal128(5, 2)).unwrap(),
6227 ScalarValue::Decimal128(Some(100), 5, 2)
6228 );
6229 assert_eq!(
6231 ScalarValue::new_one(&DataType::Decimal128(7, 2)).unwrap(),
6232 ScalarValue::Decimal128(Some(100), 7, 2)
6233 );
6234 assert!(ScalarValue::new_one(&DataType::Decimal128(5, -1)).is_err());
6236 assert!(ScalarValue::new_one(&DataType::Decimal128(0, 2)).is_err());
6238 assert!(ScalarValue::new_one(&DataType::Decimal128(5, 7)).is_err());
6239 }
6240
6241 #[test]
6242 fn test_new_one_decimal256() {
6243 assert_eq!(
6244 ScalarValue::new_one(&DataType::Decimal256(5, 0)).unwrap(),
6245 ScalarValue::Decimal256(Some(1.into()), 5, 0)
6246 );
6247 assert_eq!(
6248 ScalarValue::new_one(&DataType::Decimal256(5, 1)).unwrap(),
6249 ScalarValue::Decimal256(Some(10.into()), 5, 1)
6250 );
6251 assert_eq!(
6252 ScalarValue::new_one(&DataType::Decimal256(5, 2)).unwrap(),
6253 ScalarValue::Decimal256(Some(100.into()), 5, 2)
6254 );
6255 assert_eq!(
6257 ScalarValue::new_one(&DataType::Decimal256(7, 2)).unwrap(),
6258 ScalarValue::Decimal256(Some(100.into()), 7, 2)
6259 );
6260 assert!(ScalarValue::new_one(&DataType::Decimal256(5, -1)).is_err());
6262 assert!(ScalarValue::new_one(&DataType::Decimal256(0, 2)).is_err());
6264 assert!(ScalarValue::new_one(&DataType::Decimal256(5, 7)).is_err());
6265 }
6266
6267 #[test]
6268 fn test_new_ten_decimal128() {
6269 assert_eq!(
6270 ScalarValue::new_ten(&DataType::Decimal128(5, 1)).unwrap(),
6271 ScalarValue::Decimal128(Some(100), 5, 1)
6272 );
6273 assert_eq!(
6274 ScalarValue::new_ten(&DataType::Decimal128(5, 2)).unwrap(),
6275 ScalarValue::Decimal128(Some(1000), 5, 2)
6276 );
6277 assert_eq!(
6279 ScalarValue::new_ten(&DataType::Decimal128(7, 2)).unwrap(),
6280 ScalarValue::Decimal128(Some(1000), 7, 2)
6281 );
6282 assert!(ScalarValue::new_ten(&DataType::Decimal128(5, -1)).is_err());
6284 assert!(ScalarValue::new_ten(&DataType::Decimal128(0, 2)).is_err());
6286 assert!(ScalarValue::new_ten(&DataType::Decimal128(5, 7)).is_err());
6287 }
6288
6289 #[test]
6290 fn test_new_ten_decimal256() {
6291 assert_eq!(
6292 ScalarValue::new_ten(&DataType::Decimal256(5, 1)).unwrap(),
6293 ScalarValue::Decimal256(Some(100.into()), 5, 1)
6294 );
6295 assert_eq!(
6296 ScalarValue::new_ten(&DataType::Decimal256(5, 2)).unwrap(),
6297 ScalarValue::Decimal256(Some(1000.into()), 5, 2)
6298 );
6299 assert_eq!(
6301 ScalarValue::new_ten(&DataType::Decimal256(7, 2)).unwrap(),
6302 ScalarValue::Decimal256(Some(1000.into()), 7, 2)
6303 );
6304 assert!(ScalarValue::new_ten(&DataType::Decimal256(5, -1)).is_err());
6306 assert!(ScalarValue::new_ten(&DataType::Decimal256(0, 2)).is_err());
6308 assert!(ScalarValue::new_ten(&DataType::Decimal256(5, 7)).is_err());
6309 }
6310
6311 #[test]
6312 fn test_new_negative_one_decimal128() {
6313 assert_eq!(
6314 ScalarValue::new_negative_one(&DataType::Decimal128(5, 0)).unwrap(),
6315 ScalarValue::Decimal128(Some(-1), 5, 0)
6316 );
6317 assert_eq!(
6318 ScalarValue::new_negative_one(&DataType::Decimal128(5, 2)).unwrap(),
6319 ScalarValue::Decimal128(Some(-100), 5, 2)
6320 );
6321 }
6322
6323 #[test]
6324 fn test_list_partial_cmp() {
6325 let a =
6326 ScalarValue::List(Arc::new(
6327 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
6328 Some(1),
6329 Some(2),
6330 Some(3),
6331 ])]),
6332 ));
6333 let b =
6334 ScalarValue::List(Arc::new(
6335 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
6336 Some(1),
6337 Some(2),
6338 Some(3),
6339 ])]),
6340 ));
6341 assert_eq!(a.partial_cmp(&b), Some(Ordering::Equal));
6342
6343 let a =
6344 ScalarValue::List(Arc::new(
6345 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
6346 Some(10),
6347 Some(2),
6348 Some(3),
6349 ])]),
6350 ));
6351 let b =
6352 ScalarValue::List(Arc::new(
6353 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
6354 Some(1),
6355 Some(2),
6356 Some(30),
6357 ])]),
6358 ));
6359 assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
6360
6361 let a =
6362 ScalarValue::List(Arc::new(
6363 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
6364 Some(10),
6365 Some(2),
6366 Some(3),
6367 ])]),
6368 ));
6369 let b =
6370 ScalarValue::List(Arc::new(
6371 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
6372 Some(10),
6373 Some(2),
6374 Some(30),
6375 ])]),
6376 ));
6377 assert_eq!(a.partial_cmp(&b), Some(Ordering::Less));
6378
6379 let a =
6380 ScalarValue::List(Arc::new(
6381 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
6382 Some(1),
6383 Some(2),
6384 Some(3),
6385 ])]),
6386 ));
6387 let b =
6388 ScalarValue::List(Arc::new(
6389 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
6390 Some(2),
6391 Some(3),
6392 ])]),
6393 ));
6394 assert_eq!(a.partial_cmp(&b), Some(Ordering::Less));
6395
6396 let a =
6397 ScalarValue::List(Arc::new(
6398 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
6399 Some(2),
6400 Some(3),
6401 Some(4),
6402 ])]),
6403 ));
6404 let b =
6405 ScalarValue::List(Arc::new(
6406 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
6407 Some(1),
6408 Some(2),
6409 ])]),
6410 ));
6411 assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
6412
6413 let a =
6414 ScalarValue::List(Arc::new(
6415 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
6416 Some(1),
6417 Some(2),
6418 Some(3),
6419 ])]),
6420 ));
6421 let b =
6422 ScalarValue::List(Arc::new(
6423 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
6424 Some(1),
6425 Some(2),
6426 ])]),
6427 ));
6428 assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
6429
6430 let a =
6431 ScalarValue::List(Arc::new(
6432 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
6433 None,
6434 Some(2),
6435 Some(3),
6436 ])]),
6437 ));
6438 let b =
6439 ScalarValue::List(Arc::new(
6440 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
6441 Some(1),
6442 Some(2),
6443 Some(3),
6444 ])]),
6445 ));
6446 assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
6447
6448 let a = ScalarValue::LargeList(Arc::new(LargeListArray::from_iter_primitive::<
6449 Int64Type,
6450 _,
6451 _,
6452 >(vec![Some(vec![
6453 None,
6454 Some(2),
6455 Some(3),
6456 ])])));
6457 let b = ScalarValue::LargeList(Arc::new(LargeListArray::from_iter_primitive::<
6458 Int64Type,
6459 _,
6460 _,
6461 >(vec![Some(vec![
6462 Some(1),
6463 Some(2),
6464 Some(3),
6465 ])])));
6466 assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
6467
6468 let a = ScalarValue::FixedSizeList(Arc::new(
6469 FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(
6470 vec![Some(vec![None, Some(2), Some(3)])],
6471 3,
6472 ),
6473 ));
6474 let b = ScalarValue::FixedSizeList(Arc::new(
6475 FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(
6476 vec![Some(vec![Some(1), Some(2), Some(3)])],
6477 3,
6478 ),
6479 ));
6480 assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
6481 }
6482
6483 #[test]
6484 fn scalar_value_to_array_u64() -> Result<()> {
6485 let value = ScalarValue::UInt64(Some(13u64));
6486 let array = value.to_array().expect("Failed to convert to array");
6487 let array = as_uint64_array(&array)?;
6488 assert_eq!(array.len(), 1);
6489 assert!(!array.is_null(0));
6490 assert_eq!(array.value(0), 13);
6491
6492 let value = ScalarValue::UInt64(None);
6493 let array = value.to_array().expect("Failed to convert to array");
6494 let array = as_uint64_array(&array)?;
6495 assert_eq!(array.len(), 1);
6496 assert!(array.is_null(0));
6497 Ok(())
6498 }
6499
6500 #[test]
6501 fn scalar_value_to_array_u32() -> Result<()> {
6502 let value = ScalarValue::UInt32(Some(13u32));
6503 let array = value.to_array().expect("Failed to convert to array");
6504 let array = as_uint32_array(&array)?;
6505 assert_eq!(array.len(), 1);
6506 assert!(!array.is_null(0));
6507 assert_eq!(array.value(0), 13);
6508
6509 let value = ScalarValue::UInt32(None);
6510 let array = value.to_array().expect("Failed to convert to array");
6511 let array = as_uint32_array(&array)?;
6512 assert_eq!(array.len(), 1);
6513 assert!(array.is_null(0));
6514 Ok(())
6515 }
6516
6517 #[test]
6518 fn scalar_list_null_to_array() {
6519 let list_array = ScalarValue::new_list_nullable(&[], &DataType::UInt64);
6520
6521 assert_eq!(list_array.len(), 1);
6522 assert_eq!(list_array.values().len(), 0);
6523 }
6524
6525 #[test]
6526 fn scalar_large_list_null_to_array() {
6527 let list_array = ScalarValue::new_large_list(&[], &DataType::UInt64);
6528
6529 assert_eq!(list_array.len(), 1);
6530 assert_eq!(list_array.values().len(), 0);
6531 }
6532
6533 #[test]
6534 fn scalar_list_to_array() -> Result<()> {
6535 let values = vec![
6536 ScalarValue::UInt64(Some(100)),
6537 ScalarValue::UInt64(None),
6538 ScalarValue::UInt64(Some(101)),
6539 ];
6540 let list_array = ScalarValue::new_list_nullable(&values, &DataType::UInt64);
6541 assert_eq!(list_array.len(), 1);
6542 assert_eq!(list_array.values().len(), 3);
6543
6544 let prim_array_ref = list_array.value(0);
6545 let prim_array = as_uint64_array(&prim_array_ref)?;
6546 assert_eq!(prim_array.len(), 3);
6547 assert_eq!(prim_array.value(0), 100);
6548 assert!(prim_array.is_null(1));
6549 assert_eq!(prim_array.value(2), 101);
6550 Ok(())
6551 }
6552
6553 #[test]
6554 fn scalar_large_list_to_array() -> Result<()> {
6555 let values = vec![
6556 ScalarValue::UInt64(Some(100)),
6557 ScalarValue::UInt64(None),
6558 ScalarValue::UInt64(Some(101)),
6559 ];
6560 let list_array = ScalarValue::new_large_list(&values, &DataType::UInt64);
6561 assert_eq!(list_array.len(), 1);
6562 assert_eq!(list_array.values().len(), 3);
6563
6564 let prim_array_ref = list_array.value(0);
6565 let prim_array = as_uint64_array(&prim_array_ref)?;
6566 assert_eq!(prim_array.len(), 3);
6567 assert_eq!(prim_array.value(0), 100);
6568 assert!(prim_array.is_null(1));
6569 assert_eq!(prim_array.value(2), 101);
6570 Ok(())
6571 }
6572
6573 macro_rules! check_scalar_iter {
6575 ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
6576 let scalars: Vec<_> =
6577 $INPUT.iter().map(|v| ScalarValue::$SCALAR_T(*v)).collect();
6578
6579 let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
6580
6581 let expected: ArrayRef = Arc::new($ARRAYTYPE::from($INPUT));
6582
6583 assert_eq!(&array, &expected);
6584 }};
6585 }
6586
6587 macro_rules! check_scalar_iter_tz {
6590 ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
6591 let scalars: Vec<_> = $INPUT
6592 .iter()
6593 .map(|v| ScalarValue::$SCALAR_T(*v, None))
6594 .collect();
6595
6596 let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
6597
6598 let expected: ArrayRef = Arc::new($ARRAYTYPE::from($INPUT));
6599
6600 assert_eq!(&array, &expected);
6601 }};
6602 }
6603
6604 macro_rules! check_scalar_iter_string {
6607 ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
6608 let scalars: Vec<_> = $INPUT
6609 .iter()
6610 .map(|v| ScalarValue::$SCALAR_T(v.map(|v| v.to_string())))
6611 .collect();
6612
6613 let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
6614
6615 let expected: ArrayRef = Arc::new($ARRAYTYPE::from($INPUT));
6616
6617 assert_eq!(&array, &expected);
6618 }};
6619 }
6620
6621 macro_rules! check_scalar_iter_binary {
6624 ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
6625 let scalars: Vec<_> = $INPUT
6626 .iter()
6627 .map(|v| ScalarValue::$SCALAR_T(v.map(|v| v.to_vec())))
6628 .collect();
6629
6630 let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
6631
6632 let expected: $ARRAYTYPE =
6633 $INPUT.iter().map(|v| v.map(|v| v.to_vec())).collect();
6634
6635 let expected: ArrayRef = Arc::new(expected);
6636
6637 assert_eq!(&array, &expected);
6638 }};
6639 }
6640
6641 #[test]
6642 fn scalar_iter_to_array_boolean() {
6643 check_scalar_iter!(Boolean, BooleanArray, vec![Some(true), None, Some(false)]);
6644 check_scalar_iter!(Float32, Float32Array, vec![Some(1.9), None, Some(-2.1)]);
6645 check_scalar_iter!(Float64, Float64Array, vec![Some(1.9), None, Some(-2.1)]);
6646
6647 check_scalar_iter!(Int8, Int8Array, vec![Some(1), None, Some(3)]);
6648 check_scalar_iter!(Int16, Int16Array, vec![Some(1), None, Some(3)]);
6649 check_scalar_iter!(Int32, Int32Array, vec![Some(1), None, Some(3)]);
6650 check_scalar_iter!(Int64, Int64Array, vec![Some(1), None, Some(3)]);
6651
6652 check_scalar_iter!(UInt8, UInt8Array, vec![Some(1), None, Some(3)]);
6653 check_scalar_iter!(UInt16, UInt16Array, vec![Some(1), None, Some(3)]);
6654 check_scalar_iter!(UInt32, UInt32Array, vec![Some(1), None, Some(3)]);
6655 check_scalar_iter!(UInt64, UInt64Array, vec![Some(1), None, Some(3)]);
6656
6657 check_scalar_iter_tz!(
6658 TimestampSecond,
6659 TimestampSecondArray,
6660 vec![Some(1), None, Some(3)]
6661 );
6662 check_scalar_iter_tz!(
6663 TimestampMillisecond,
6664 TimestampMillisecondArray,
6665 vec![Some(1), None, Some(3)]
6666 );
6667 check_scalar_iter_tz!(
6668 TimestampMicrosecond,
6669 TimestampMicrosecondArray,
6670 vec![Some(1), None, Some(3)]
6671 );
6672 check_scalar_iter_tz!(
6673 TimestampNanosecond,
6674 TimestampNanosecondArray,
6675 vec![Some(1), None, Some(3)]
6676 );
6677
6678 check_scalar_iter_string!(
6679 Utf8,
6680 StringArray,
6681 vec![Some("foo"), None, Some("bar")]
6682 );
6683 check_scalar_iter_string!(
6684 LargeUtf8,
6685 LargeStringArray,
6686 vec![Some("foo"), None, Some("bar")]
6687 );
6688 check_scalar_iter_binary!(
6689 Binary,
6690 BinaryArray,
6691 [Some(b"foo"), None, Some(b"bar")]
6692 );
6693 check_scalar_iter_binary!(
6694 LargeBinary,
6695 LargeBinaryArray,
6696 [Some(b"foo"), None, Some(b"bar")]
6697 );
6698 }
6699
6700 #[test]
6701 fn scalar_iter_to_array_empty() {
6702 let scalars = vec![] as Vec<ScalarValue>;
6703
6704 let result = ScalarValue::iter_to_array(scalars).unwrap_err();
6705 assert!(
6706 result
6707 .to_string()
6708 .contains("Empty iterator passed to ScalarValue::iter_to_array"),
6709 "{}",
6710 result
6711 );
6712 }
6713
6714 #[test]
6715 fn scalar_iter_to_dictionary() {
6716 fn make_val(v: Option<String>) -> ScalarValue {
6717 let key_type = DataType::Int32;
6718 let value = ScalarValue::Utf8(v);
6719 ScalarValue::Dictionary(Box::new(key_type), Box::new(value))
6720 }
6721
6722 let scalars = [
6723 make_val(Some("Foo".into())),
6724 make_val(None),
6725 make_val(Some("Bar".into())),
6726 ];
6727
6728 let array = ScalarValue::iter_to_array(scalars).unwrap();
6729 let array = as_dictionary_array::<Int32Type>(&array).unwrap();
6730 let values_array = as_string_array(array.values()).unwrap();
6731
6732 let values = array
6733 .keys_iter()
6734 .map(|k| {
6735 k.map(|k| {
6736 assert!(values_array.is_valid(k));
6737 values_array.value(k)
6738 })
6739 })
6740 .collect::<Vec<_>>();
6741
6742 let expected = vec![Some("Foo"), None, Some("Bar")];
6743 assert_eq!(values, expected);
6744 }
6745
6746 #[test]
6747 fn scalar_iter_to_array_mismatched_types() {
6748 use ScalarValue::*;
6749 let scalars = [Boolean(Some(true)), Int32(Some(5))];
6751
6752 let result = ScalarValue::iter_to_array(scalars).unwrap_err();
6753 assert!(result.to_string().contains("Inconsistent types in ScalarValue::iter_to_array. Expected Boolean, got Int32(5)"),
6754 "{}", result);
6755 }
6756
6757 #[test]
6758 fn scalar_try_from_array_null() {
6759 let array = vec![Some(33), None].into_iter().collect::<Int64Array>();
6760 let array: ArrayRef = Arc::new(array);
6761
6762 assert_eq!(
6763 ScalarValue::Int64(Some(33)),
6764 ScalarValue::try_from_array(&array, 0).unwrap()
6765 );
6766 assert_eq!(
6767 ScalarValue::Int64(None),
6768 ScalarValue::try_from_array(&array, 1).unwrap()
6769 );
6770 }
6771
6772 #[test]
6773 fn scalar_try_from_array_list_array_null() {
6774 let list = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
6775 Some(vec![Some(1), Some(2)]),
6776 None,
6777 ]);
6778
6779 let non_null_list_scalar = ScalarValue::try_from_array(&list, 0).unwrap();
6780 let null_list_scalar = ScalarValue::try_from_array(&list, 1).unwrap();
6781
6782 let data_type =
6783 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
6784
6785 assert_eq!(non_null_list_scalar.data_type(), data_type);
6786 assert_eq!(null_list_scalar.data_type(), data_type);
6787 }
6788
6789 #[test]
6790 fn scalar_try_from_list_datatypes() {
6791 let inner_field = Arc::new(Field::new_list_field(DataType::Int32, true));
6792
6793 let data_type = &DataType::List(Arc::clone(&inner_field));
6795 let scalar: ScalarValue = data_type.try_into().unwrap();
6796 let expected = ScalarValue::List(
6797 new_null_array(data_type, 1)
6798 .as_list::<i32>()
6799 .to_owned()
6800 .into(),
6801 );
6802 assert_eq!(expected, scalar);
6803 assert!(expected.is_null());
6804
6805 let data_type = &DataType::LargeList(Arc::clone(&inner_field));
6807 let scalar: ScalarValue = data_type.try_into().unwrap();
6808 let expected = ScalarValue::LargeList(
6809 new_null_array(data_type, 1)
6810 .as_list::<i64>()
6811 .to_owned()
6812 .into(),
6813 );
6814 assert_eq!(expected, scalar);
6815 assert!(expected.is_null());
6816
6817 let data_type = &DataType::FixedSizeList(Arc::clone(&inner_field), 5);
6819 let scalar: ScalarValue = data_type.try_into().unwrap();
6820 let expected = ScalarValue::FixedSizeList(
6821 new_null_array(data_type, 1)
6822 .as_fixed_size_list()
6823 .to_owned()
6824 .into(),
6825 );
6826 assert_eq!(expected, scalar);
6827 assert!(expected.is_null());
6828 }
6829
6830 #[test]
6831 fn scalar_try_from_list_of_list() {
6832 let data_type = DataType::List(Arc::new(Field::new_list_field(
6833 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
6834 true,
6835 )));
6836 let data_type = &data_type;
6837 let scalar: ScalarValue = data_type.try_into().unwrap();
6838
6839 let expected = ScalarValue::List(
6840 new_null_array(
6841 &DataType::List(Arc::new(Field::new_list_field(
6842 DataType::List(Arc::new(Field::new_list_field(
6843 DataType::Int32,
6844 true,
6845 ))),
6846 true,
6847 ))),
6848 1,
6849 )
6850 .as_list::<i32>()
6851 .to_owned()
6852 .into(),
6853 );
6854
6855 assert_eq!(expected, scalar)
6856 }
6857
6858 #[test]
6859 fn scalar_try_from_not_equal_list_nested_list() {
6860 let list_data_type =
6861 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
6862 let data_type = &list_data_type;
6863 let list_scalar: ScalarValue = data_type.try_into().unwrap();
6864
6865 let nested_list_data_type = DataType::List(Arc::new(Field::new_list_field(
6866 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
6867 true,
6868 )));
6869 let data_type = &nested_list_data_type;
6870 let nested_list_scalar: ScalarValue = data_type.try_into().unwrap();
6871
6872 assert_ne!(list_scalar, nested_list_scalar);
6873 }
6874
6875 #[test]
6876 fn scalar_try_from_dict_datatype() {
6877 let data_type =
6878 DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8));
6879 let data_type = &data_type;
6880 let expected = ScalarValue::Dictionary(
6881 Box::new(DataType::Int8),
6882 Box::new(ScalarValue::Utf8(None)),
6883 );
6884 assert_eq!(expected, data_type.try_into().unwrap())
6885 }
6886
6887 #[test]
6888 fn size_of_scalar() {
6889 assert_eq!(size_of::<ScalarValue>(), 64);
6900 }
6901
6902 #[test]
6903 fn memory_size() {
6904 let sv = ScalarValue::Binary(Some(Vec::with_capacity(10)));
6905 assert_eq!(sv.size(), size_of::<ScalarValue>() + 10,);
6906 let sv_size = sv.size();
6907
6908 let mut v = Vec::with_capacity(10);
6909 v.push(sv);
6911 assert_eq!(v.capacity(), 10);
6912 assert_eq!(
6913 ScalarValue::size_of_vec(&v),
6914 size_of::<Vec<ScalarValue>>() + (9 * size_of::<ScalarValue>()) + sv_size,
6915 );
6916
6917 let mut s = HashSet::with_capacity(0);
6918 s.insert(v.pop().unwrap());
6920 let s_capacity = s.capacity();
6922 assert_eq!(
6923 ScalarValue::size_of_hashset(&s),
6924 size_of::<HashSet<ScalarValue>>()
6925 + ((s_capacity - 1) * size_of::<ScalarValue>())
6926 + sv_size,
6927 );
6928 }
6929
6930 #[test]
6931 fn scalar_eq_array() {
6932 macro_rules! make_typed_vec {
6934 ($INPUT:expr, $TYPE:ident) => {{
6935 $INPUT
6936 .iter()
6937 .map(|v| v.map(|v| v as $TYPE))
6938 .collect::<Vec<_>>()
6939 }};
6940 }
6941
6942 let bool_vals = [Some(true), None, Some(false)];
6943 let f32_vals = [Some(-1.0), None, Some(1.0)];
6944 let f64_vals = make_typed_vec!(f32_vals, f64);
6945
6946 let i8_vals = [Some(-1), None, Some(1)];
6947 let i16_vals = make_typed_vec!(i8_vals, i16);
6948 let i32_vals = make_typed_vec!(i8_vals, i32);
6949 let i64_vals = make_typed_vec!(i8_vals, i64);
6950
6951 let u8_vals = [Some(0), None, Some(1)];
6952 let u16_vals = make_typed_vec!(u8_vals, u16);
6953 let u32_vals = make_typed_vec!(u8_vals, u32);
6954 let u64_vals = make_typed_vec!(u8_vals, u64);
6955
6956 let str_vals = [Some("foo"), None, Some("bar")];
6957
6958 let interval_dt_vals = [
6959 Some(IntervalDayTime::MINUS_ONE),
6960 None,
6961 Some(IntervalDayTime::ONE),
6962 ];
6963 let interval_mdn_vals = [
6964 Some(IntervalMonthDayNano::MINUS_ONE),
6965 None,
6966 Some(IntervalMonthDayNano::ONE),
6967 ];
6968
6969 #[derive(Debug)]
6973 struct TestCase {
6974 array: ArrayRef,
6975 scalars: Vec<ScalarValue>,
6976 }
6977
6978 macro_rules! make_test_case {
6980 ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident) => {{
6981 TestCase {
6982 array: Arc::new($INPUT.iter().collect::<$ARRAY_TY>()),
6983 scalars: $INPUT.iter().map(|v| ScalarValue::$SCALAR_TY(*v)).collect(),
6984 }
6985 }};
6986
6987 ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident, $TZ:expr) => {{
6988 let tz = $TZ;
6989 TestCase {
6990 array: Arc::new($INPUT.iter().collect::<$ARRAY_TY>()),
6991 scalars: $INPUT
6992 .iter()
6993 .map(|v| ScalarValue::$SCALAR_TY(*v, tz.clone()))
6994 .collect(),
6995 }
6996 }};
6997 }
6998
6999 macro_rules! make_str_test_case {
7000 ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident) => {{
7001 TestCase {
7002 array: Arc::new($INPUT.iter().cloned().collect::<$ARRAY_TY>()),
7003 scalars: $INPUT
7004 .iter()
7005 .map(|v| ScalarValue::$SCALAR_TY(v.map(|v| v.to_string())))
7006 .collect(),
7007 }
7008 }};
7009 }
7010
7011 macro_rules! make_binary_test_case {
7012 ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident) => {{
7013 TestCase {
7014 array: Arc::new($INPUT.iter().cloned().collect::<$ARRAY_TY>()),
7015 scalars: $INPUT
7016 .iter()
7017 .map(|v| {
7018 ScalarValue::$SCALAR_TY(v.map(|v| v.as_bytes().to_vec()))
7019 })
7020 .collect(),
7021 }
7022 }};
7023 }
7024
7025 macro_rules! make_str_dict_test_case {
7027 ($INPUT:expr, $INDEX_TY:ident) => {{
7028 TestCase {
7029 array: Arc::new(
7030 $INPUT
7031 .iter()
7032 .cloned()
7033 .collect::<DictionaryArray<$INDEX_TY>>(),
7034 ),
7035 scalars: $INPUT
7036 .iter()
7037 .map(|v| {
7038 ScalarValue::Dictionary(
7039 Box::new($INDEX_TY::DATA_TYPE),
7040 Box::new(ScalarValue::Utf8(v.map(|v| v.to_string()))),
7041 )
7042 })
7043 .collect(),
7044 }
7045 }};
7046 }
7047
7048 let cases = vec![
7049 make_test_case!(bool_vals, BooleanArray, Boolean),
7050 make_test_case!(f32_vals, Float32Array, Float32),
7051 make_test_case!(f64_vals, Float64Array, Float64),
7052 make_test_case!(i8_vals, Int8Array, Int8),
7053 make_test_case!(i16_vals, Int16Array, Int16),
7054 make_test_case!(i32_vals, Int32Array, Int32),
7055 make_test_case!(i64_vals, Int64Array, Int64),
7056 make_test_case!(u8_vals, UInt8Array, UInt8),
7057 make_test_case!(u16_vals, UInt16Array, UInt16),
7058 make_test_case!(u32_vals, UInt32Array, UInt32),
7059 make_test_case!(u64_vals, UInt64Array, UInt64),
7060 make_str_test_case!(str_vals, StringArray, Utf8),
7061 make_str_test_case!(str_vals, LargeStringArray, LargeUtf8),
7062 make_binary_test_case!(str_vals, BinaryArray, Binary),
7063 make_binary_test_case!(str_vals, LargeBinaryArray, LargeBinary),
7064 make_test_case!(i32_vals, Date32Array, Date32),
7065 make_test_case!(i64_vals, Date64Array, Date64),
7066 make_test_case!(i32_vals, Time32SecondArray, Time32Second),
7067 make_test_case!(i32_vals, Time32MillisecondArray, Time32Millisecond),
7068 make_test_case!(i64_vals, Time64MicrosecondArray, Time64Microsecond),
7069 make_test_case!(i64_vals, Time64NanosecondArray, Time64Nanosecond),
7070 make_test_case!(i64_vals, TimestampSecondArray, TimestampSecond, None),
7071 make_test_case!(
7072 i64_vals,
7073 TimestampSecondArray,
7074 TimestampSecond,
7075 Some("UTC".into())
7076 ),
7077 make_test_case!(
7078 i64_vals,
7079 TimestampMillisecondArray,
7080 TimestampMillisecond,
7081 None
7082 ),
7083 make_test_case!(
7084 i64_vals,
7085 TimestampMillisecondArray,
7086 TimestampMillisecond,
7087 Some("UTC".into())
7088 ),
7089 make_test_case!(
7090 i64_vals,
7091 TimestampMicrosecondArray,
7092 TimestampMicrosecond,
7093 None
7094 ),
7095 make_test_case!(
7096 i64_vals,
7097 TimestampMicrosecondArray,
7098 TimestampMicrosecond,
7099 Some("UTC".into())
7100 ),
7101 make_test_case!(
7102 i64_vals,
7103 TimestampNanosecondArray,
7104 TimestampNanosecond,
7105 None
7106 ),
7107 make_test_case!(
7108 i64_vals,
7109 TimestampNanosecondArray,
7110 TimestampNanosecond,
7111 Some("UTC".into())
7112 ),
7113 make_test_case!(i32_vals, IntervalYearMonthArray, IntervalYearMonth),
7114 make_test_case!(interval_dt_vals, IntervalDayTimeArray, IntervalDayTime),
7115 make_test_case!(
7116 interval_mdn_vals,
7117 IntervalMonthDayNanoArray,
7118 IntervalMonthDayNano
7119 ),
7120 make_str_dict_test_case!(str_vals, Int8Type),
7121 make_str_dict_test_case!(str_vals, Int16Type),
7122 make_str_dict_test_case!(str_vals, Int32Type),
7123 make_str_dict_test_case!(str_vals, Int64Type),
7124 make_str_dict_test_case!(str_vals, UInt8Type),
7125 make_str_dict_test_case!(str_vals, UInt16Type),
7126 make_str_dict_test_case!(str_vals, UInt32Type),
7127 make_str_dict_test_case!(str_vals, UInt64Type),
7128 ];
7129
7130 for case in cases {
7131 println!("**** Test Case *****");
7132 let TestCase { array, scalars } = case;
7133 println!("Input array type: {}", array.data_type());
7134 println!("Input scalars: {scalars:#?}");
7135 assert_eq!(array.len(), scalars.len());
7136
7137 for (index, scalar) in scalars.into_iter().enumerate() {
7138 assert!(
7139 scalar
7140 .eq_array(&array, index)
7141 .expect("Failed to compare arrays"),
7142 "Expected {scalar:?} to be equal to {array:?} at index {index}"
7143 );
7144
7145 for other_index in 0..array.len() {
7147 if index != other_index {
7148 assert!(
7149 !scalar
7150 .eq_array(&array, other_index)
7151 .expect("Failed to compare arrays"),
7152 "Expected {scalar:?} to be NOT equal to {array:?} at index {other_index}"
7153 );
7154 }
7155 }
7156 }
7157 }
7158 }
7159
7160 #[test]
7161 fn scalar_partial_ordering() {
7162 use ScalarValue::*;
7163
7164 assert_eq!(
7165 Int64(Some(33)).partial_cmp(&Int64(Some(0))),
7166 Some(Ordering::Greater)
7167 );
7168 assert_eq!(
7169 Int64(Some(0)).partial_cmp(&Int64(Some(33))),
7170 Some(Ordering::Less)
7171 );
7172 assert_eq!(
7173 Int64(Some(33)).partial_cmp(&Int64(Some(33))),
7174 Some(Ordering::Equal)
7175 );
7176 assert_eq!(Int64(Some(33)).partial_cmp(&Int32(Some(33))), None);
7178 assert_eq!(Int32(Some(33)).partial_cmp(&Int64(Some(33))), None);
7179
7180 assert_eq!(
7181 ScalarValue::from(vec![
7182 ("A", ScalarValue::from(1.0)),
7183 ("B", ScalarValue::from("Z")),
7184 ])
7185 .partial_cmp(&ScalarValue::from(vec![
7186 ("A", ScalarValue::from(2.0)),
7187 ("B", ScalarValue::from("A")),
7188 ])),
7189 Some(Ordering::Less)
7190 );
7191
7192 assert_eq!(
7194 ScalarValue::from(vec![
7195 ("A", ScalarValue::from(1.0)),
7196 ("B", ScalarValue::from("Z")),
7197 ])
7198 .partial_cmp(&ScalarValue::from(vec![
7199 ("a", ScalarValue::from(2.0)),
7200 ("b", ScalarValue::from("A")),
7201 ])),
7202 None
7203 );
7204 }
7205
7206 #[test]
7207 fn test_scalar_value_from_string() {
7208 let scalar = ScalarValue::from("foo");
7209 assert_eq!(scalar, ScalarValue::Utf8(Some("foo".to_string())));
7210 let scalar = ScalarValue::from("foo".to_string());
7211 assert_eq!(scalar, ScalarValue::Utf8(Some("foo".to_string())));
7212 let scalar = ScalarValue::from_str("foo").unwrap();
7213 assert_eq!(scalar, ScalarValue::Utf8(Some("foo".to_string())));
7214 }
7215
7216 #[test]
7217 fn test_scalar_struct() {
7218 let field_a = Arc::new(Field::new("A", DataType::Int32, false));
7219 let field_b = Arc::new(Field::new("B", DataType::Boolean, false));
7220 let field_c = Arc::new(Field::new("C", DataType::Utf8, false));
7221
7222 let field_e = Arc::new(Field::new("e", DataType::Int16, false));
7223 let field_f = Arc::new(Field::new("f", DataType::Int64, false));
7224 let field_d = Arc::new(Field::new(
7225 "D",
7226 DataType::Struct(vec![Arc::clone(&field_e), Arc::clone(&field_f)].into()),
7227 false,
7228 ));
7229
7230 let struct_array = StructArray::from(vec![
7231 (
7232 Arc::clone(&field_e),
7233 Arc::new(Int16Array::from(vec![2])) as ArrayRef,
7234 ),
7235 (
7236 Arc::clone(&field_f),
7237 Arc::new(Int64Array::from(vec![3])) as ArrayRef,
7238 ),
7239 ]);
7240
7241 let struct_array = StructArray::from(vec![
7242 (
7243 Arc::clone(&field_a),
7244 Arc::new(Int32Array::from(vec![23])) as ArrayRef,
7245 ),
7246 (
7247 Arc::clone(&field_b),
7248 Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
7249 ),
7250 (
7251 Arc::clone(&field_c),
7252 Arc::new(StringArray::from(vec!["Hello"])) as ArrayRef,
7253 ),
7254 (Arc::clone(&field_d), Arc::new(struct_array) as ArrayRef),
7255 ]);
7256 let scalar = ScalarValue::Struct(Arc::new(struct_array));
7257
7258 let array = scalar
7259 .to_array_of_size(2)
7260 .expect("Failed to convert to array of size");
7261
7262 let expected = Arc::new(StructArray::from(vec![
7263 (
7264 Arc::clone(&field_a),
7265 Arc::new(Int32Array::from(vec![23, 23])) as ArrayRef,
7266 ),
7267 (
7268 Arc::clone(&field_b),
7269 Arc::new(BooleanArray::from(vec![false, false])) as ArrayRef,
7270 ),
7271 (
7272 Arc::clone(&field_c),
7273 Arc::new(StringArray::from(vec!["Hello", "Hello"])) as ArrayRef,
7274 ),
7275 (
7276 Arc::clone(&field_d),
7277 Arc::new(StructArray::from(vec![
7278 (
7279 Arc::clone(&field_e),
7280 Arc::new(Int16Array::from(vec![2, 2])) as ArrayRef,
7281 ),
7282 (
7283 Arc::clone(&field_f),
7284 Arc::new(Int64Array::from(vec![3, 3])) as ArrayRef,
7285 ),
7286 ])) as ArrayRef,
7287 ),
7288 ])) as ArrayRef;
7289
7290 assert_eq!(&array, &expected);
7291
7292 let constructed = ScalarValue::try_from_array(&expected, 1).unwrap();
7294 assert_eq!(constructed, scalar);
7295
7296 let none_scalar = ScalarValue::try_from(array.data_type()).unwrap();
7298 assert!(none_scalar.is_null());
7299 assert_eq!(
7300 format!("{none_scalar:?}"),
7301 String::from("Struct({A:,B:,C:,D:})")
7302 );
7303
7304 let constructed = ScalarValue::from(vec![
7306 ("A", ScalarValue::from(23)),
7307 ("B", ScalarValue::from(false)),
7308 ("C", ScalarValue::from("Hello")),
7309 (
7310 "D",
7311 ScalarValue::from(vec![
7312 ("e", ScalarValue::from(2i16)),
7313 ("f", ScalarValue::from(3i64)),
7314 ]),
7315 ),
7316 ]);
7317 assert_eq!(constructed, scalar);
7318
7319 let scalars = vec![
7321 ScalarValue::from(vec![
7322 ("A", ScalarValue::from(23)),
7323 ("B", ScalarValue::from(false)),
7324 ("C", ScalarValue::from("Hello")),
7325 (
7326 "D",
7327 ScalarValue::from(vec![
7328 ("e", ScalarValue::from(2i16)),
7329 ("f", ScalarValue::from(3i64)),
7330 ]),
7331 ),
7332 ]),
7333 ScalarValue::from(vec![
7334 ("A", ScalarValue::from(7)),
7335 ("B", ScalarValue::from(true)),
7336 ("C", ScalarValue::from("World")),
7337 (
7338 "D",
7339 ScalarValue::from(vec![
7340 ("e", ScalarValue::from(4i16)),
7341 ("f", ScalarValue::from(5i64)),
7342 ]),
7343 ),
7344 ]),
7345 ScalarValue::from(vec![
7346 ("A", ScalarValue::from(-1000)),
7347 ("B", ScalarValue::from(true)),
7348 ("C", ScalarValue::from("!!!!!")),
7349 (
7350 "D",
7351 ScalarValue::from(vec![
7352 ("e", ScalarValue::from(6i16)),
7353 ("f", ScalarValue::from(7i64)),
7354 ]),
7355 ),
7356 ]),
7357 ];
7358 let array = ScalarValue::iter_to_array(scalars).unwrap();
7359
7360 let expected = Arc::new(StructArray::from(vec![
7361 (
7362 Arc::clone(&field_a),
7363 Arc::new(Int32Array::from(vec![23, 7, -1000])) as ArrayRef,
7364 ),
7365 (
7366 Arc::clone(&field_b),
7367 Arc::new(BooleanArray::from(vec![false, true, true])) as ArrayRef,
7368 ),
7369 (
7370 Arc::clone(&field_c),
7371 Arc::new(StringArray::from(vec!["Hello", "World", "!!!!!"])) as ArrayRef,
7372 ),
7373 (
7374 Arc::clone(&field_d),
7375 Arc::new(StructArray::from(vec![
7376 (
7377 Arc::clone(&field_e),
7378 Arc::new(Int16Array::from(vec![2, 4, 6])) as ArrayRef,
7379 ),
7380 (
7381 Arc::clone(&field_f),
7382 Arc::new(Int64Array::from(vec![3, 5, 7])) as ArrayRef,
7383 ),
7384 ])) as ArrayRef,
7385 ),
7386 ])) as ArrayRef;
7387
7388 assert_eq!(&array, &expected);
7389 }
7390
7391 #[test]
7392 fn round_trip() {
7393 let cases: Vec<ArrayRef> = vec![
7395 Arc::new(Int8Array::from(vec![Some(1), None, Some(3)])),
7397 Arc::new(Int16Array::from(vec![Some(1), None, Some(3)])),
7398 Arc::new(Int32Array::from(vec![Some(1), None, Some(3)])),
7399 Arc::new(Int64Array::from(vec![Some(1), None, Some(3)])),
7400 Arc::new(UInt8Array::from(vec![Some(1), None, Some(3)])),
7401 Arc::new(UInt16Array::from(vec![Some(1), None, Some(3)])),
7402 Arc::new(UInt32Array::from(vec![Some(1), None, Some(3)])),
7403 Arc::new(UInt64Array::from(vec![Some(1), None, Some(3)])),
7404 Arc::new(BooleanArray::from(vec![Some(true), None, Some(false)])),
7406 Arc::new(Float32Array::from(vec![Some(1.0), None, Some(3.0)])),
7408 Arc::new(Float64Array::from(vec![Some(1.0), None, Some(3.0)])),
7409 Arc::new(StringArray::from(vec![Some("foo"), None, Some("bar")])),
7411 Arc::new(LargeStringArray::from(vec![Some("foo"), None, Some("bar")])),
7412 Arc::new(StringViewArray::from(vec![Some("foo"), None, Some("bar")])),
7413 {
7415 let mut builder = StringDictionaryBuilder::<Int32Type>::new();
7416 builder.append("foo").unwrap();
7417 builder.append_null();
7418 builder.append("bar").unwrap();
7419 Arc::new(builder.finish())
7420 },
7421 Arc::new(BinaryArray::from_iter(vec![
7423 Some(b"foo"),
7424 None,
7425 Some(b"bar"),
7426 ])),
7427 Arc::new(LargeBinaryArray::from_iter(vec![
7428 Some(b"foo"),
7429 None,
7430 Some(b"bar"),
7431 ])),
7432 Arc::new(BinaryViewArray::from_iter(vec![
7433 Some(b"foo"),
7434 None,
7435 Some(b"bar"),
7436 ])),
7437 Arc::new(TimestampSecondArray::from(vec![Some(1), None, Some(3)])),
7439 Arc::new(TimestampMillisecondArray::from(vec![
7440 Some(1),
7441 None,
7442 Some(3),
7443 ])),
7444 Arc::new(TimestampMicrosecondArray::from(vec![
7445 Some(1),
7446 None,
7447 Some(3),
7448 ])),
7449 Arc::new(TimestampNanosecondArray::from(vec![Some(1), None, Some(3)])),
7450 Arc::new(
7452 TimestampSecondArray::from(vec![Some(1), None, Some(3)])
7453 .with_timezone_opt(Some("UTC")),
7454 ),
7455 Arc::new(
7456 TimestampMillisecondArray::from(vec![Some(1), None, Some(3)])
7457 .with_timezone_opt(Some("UTC")),
7458 ),
7459 Arc::new(
7460 TimestampMicrosecondArray::from(vec![Some(1), None, Some(3)])
7461 .with_timezone_opt(Some("UTC")),
7462 ),
7463 Arc::new(
7464 TimestampNanosecondArray::from(vec![Some(1), None, Some(3)])
7465 .with_timezone_opt(Some("UTC")),
7466 ),
7467 Arc::new(Date32Array::from(vec![Some(1), None, Some(3)])),
7469 Arc::new(Date64Array::from(vec![Some(1), None, Some(3)])),
7470 Arc::new(Time32SecondArray::from(vec![Some(1), None, Some(3)])),
7472 Arc::new(Time32MillisecondArray::from(vec![Some(1), None, Some(3)])),
7473 Arc::new(Time64MicrosecondArray::from(vec![Some(1), None, Some(3)])),
7474 Arc::new(Time64NanosecondArray::from(vec![Some(1), None, Some(3)])),
7475 Arc::new(NullArray::new(3)),
7477 {
7479 let mut builder = UnionBuilder::new_dense();
7480 builder.append::<Int32Type>("a", 1).unwrap();
7481 builder.append::<Float64Type>("b", 3.4).unwrap();
7482 Arc::new(builder.build().unwrap())
7483 },
7484 {
7486 let mut builder = UnionBuilder::new_sparse();
7487 builder.append::<Int32Type>("a", 1).unwrap();
7488 builder.append::<Float64Type>("b", 3.4).unwrap();
7489 Arc::new(builder.build().unwrap())
7490 },
7491 {
7493 let values_builder = StringBuilder::new();
7494 let mut builder = ListBuilder::new(values_builder);
7495 builder.values().append_value("A");
7497 builder.values().append_value("B");
7498 builder.append(true);
7499 builder.append(true);
7501 builder.values().append_value("?"); builder.append(false);
7504 Arc::new(builder.finish())
7505 },
7506 {
7508 let values_builder = StringBuilder::new();
7509 let mut builder = LargeListBuilder::new(values_builder);
7510 builder.values().append_value("A");
7512 builder.values().append_value("B");
7513 builder.append(true);
7514 builder.append(true);
7516 builder.append(false);
7518 Arc::new(builder.finish())
7519 },
7520 {
7522 let values_builder = Int32Builder::new();
7523 let mut builder = FixedSizeListBuilder::new(values_builder, 3);
7524
7525 builder.values().append_value(0);
7527 builder.values().append_value(1);
7528 builder.values().append_value(2);
7529 builder.append(true);
7530 builder.values().append_null();
7531 builder.values().append_null();
7532 builder.values().append_null();
7533 builder.append(false);
7534 builder.values().append_value(3);
7535 builder.values().append_null();
7536 builder.values().append_value(5);
7537 builder.append(true);
7538 Arc::new(builder.finish())
7539 },
7540 {
7542 let string_builder = StringBuilder::new();
7543 let int_builder = Int32Builder::with_capacity(4);
7544
7545 let mut builder = MapBuilder::new(None, string_builder, int_builder);
7546 builder.keys().append_value("joe");
7548 builder.values().append_value(1);
7549 builder.append(true).unwrap();
7550 builder.append(true).unwrap();
7552 builder.append(false).unwrap();
7554
7555 Arc::new(builder.finish())
7556 },
7557 ];
7558
7559 for arr in cases {
7560 round_trip_through_scalar(arr);
7561 }
7562 }
7563
7564 fn round_trip_through_scalar(arr: ArrayRef) {
7569 for i in 0..arr.len() {
7570 let scalar = ScalarValue::try_from_array(&arr, i).unwrap();
7572 let array = scalar.to_array_of_size(1).unwrap();
7573 assert_eq!(array.len(), 1);
7574 assert_eq!(array.data_type(), arr.data_type());
7575 assert_eq!(array.as_ref(), arr.slice(i, 1).as_ref());
7576 }
7577 }
7578
7579 #[test]
7580 fn roundtrip_run_array() {
7581 let run_ends = Int16Array::from(vec![2, 3]);
7585 let values = Int64Array::from(vec![Some(1), None]);
7586 let run_array = RunArray::try_new(&run_ends, &values).unwrap();
7587 let run_array = run_array.downcast::<Int64Array>().unwrap();
7588
7589 let expected_values = run_array.into_iter().collect::<Vec<_>>();
7590
7591 for i in 0..run_array.len() {
7592 let scalar = ScalarValue::try_from_array(&run_array, i).unwrap();
7593 let array = scalar.to_array_of_size(1).unwrap();
7594 assert_eq!(array.data_type(), run_array.data_type());
7595 let array = array.as_run::<Int16Type>();
7596 let array = array.downcast::<Int64Array>().unwrap();
7597 assert_eq!(
7598 array.into_iter().collect::<Vec<_>>(),
7599 expected_values[i..i + 1]
7600 );
7601 }
7602 }
7603
7604 #[test]
7605 fn test_scalar_union_sparse() {
7606 let field_a = Arc::new(Field::new("A", DataType::Int32, true));
7607 let field_b = Arc::new(Field::new("B", DataType::Boolean, true));
7608 let field_c = Arc::new(Field::new("C", DataType::Utf8, true));
7609 let fields = UnionFields::from_iter([(0, field_a), (1, field_b), (2, field_c)]);
7610
7611 let mut values_a = vec![None; 6];
7612 values_a[0] = Some(42);
7613 let mut values_b = vec![None; 6];
7614 values_b[1] = Some(true);
7615 let mut values_c = vec![None; 6];
7616 values_c[2] = Some("foo");
7617 let children: Vec<ArrayRef> = vec![
7618 Arc::new(Int32Array::from(values_a)),
7619 Arc::new(BooleanArray::from(values_b)),
7620 Arc::new(StringArray::from(values_c)),
7621 ];
7622
7623 let type_ids = ScalarBuffer::from(vec![0, 1, 2, 0, 1, 2]);
7624 let array: ArrayRef = Arc::new(
7625 UnionArray::try_new(fields.clone(), type_ids, None, children)
7626 .expect("UnionArray"),
7627 );
7628
7629 let expected = [
7630 (0, ScalarValue::from(42)),
7631 (1, ScalarValue::from(true)),
7632 (2, ScalarValue::from("foo")),
7633 (0, ScalarValue::Int32(None)),
7634 (1, ScalarValue::Boolean(None)),
7635 (2, ScalarValue::Utf8(None)),
7636 ];
7637
7638 for (i, (ti, value)) in expected.into_iter().enumerate() {
7639 let is_null = value.is_null();
7640 let value = Some((ti, Box::new(value)));
7641 let expected = ScalarValue::Union(value, fields.clone(), UnionMode::Sparse);
7642 let actual = ScalarValue::try_from_array(&array, i).expect("try_from_array");
7643
7644 assert_eq!(
7645 actual, expected,
7646 "[{i}] {actual} was not equal to {expected}"
7647 );
7648
7649 assert!(
7650 expected.eq_array(&array, i).expect("eq_array"),
7651 "[{i}] {expected}.eq_array was false"
7652 );
7653
7654 if is_null {
7655 assert!(actual.is_null(), "[{i}] {actual} was not null")
7656 }
7657 }
7658 }
7659
7660 #[test]
7661 fn test_scalar_union_dense() {
7662 let field_a = Arc::new(Field::new("A", DataType::Int32, true));
7663 let field_b = Arc::new(Field::new("B", DataType::Boolean, true));
7664 let field_c = Arc::new(Field::new("C", DataType::Utf8, true));
7665 let fields = UnionFields::from_iter([(0, field_a), (1, field_b), (2, field_c)]);
7666 let children: Vec<ArrayRef> = vec![
7667 Arc::new(Int32Array::from(vec![Some(42), None])),
7668 Arc::new(BooleanArray::from(vec![Some(true), None])),
7669 Arc::new(StringArray::from(vec![Some("foo"), None])),
7670 ];
7671
7672 let type_ids = ScalarBuffer::from(vec![0, 1, 2, 0, 1, 2]);
7673 let offsets = ScalarBuffer::from(vec![0, 0, 0, 1, 1, 1]);
7674 let array: ArrayRef = Arc::new(
7675 UnionArray::try_new(fields.clone(), type_ids, Some(offsets), children)
7676 .expect("UnionArray"),
7677 );
7678
7679 let expected = [
7680 (0, ScalarValue::from(42)),
7681 (1, ScalarValue::from(true)),
7682 (2, ScalarValue::from("foo")),
7683 (0, ScalarValue::Int32(None)),
7684 (1, ScalarValue::Boolean(None)),
7685 (2, ScalarValue::Utf8(None)),
7686 ];
7687
7688 for (i, (ti, value)) in expected.into_iter().enumerate() {
7689 let is_null = value.is_null();
7690 let value = Some((ti, Box::new(value)));
7691 let expected = ScalarValue::Union(value, fields.clone(), UnionMode::Dense);
7692 let actual = ScalarValue::try_from_array(&array, i).expect("try_from_array");
7693
7694 assert_eq!(
7695 actual, expected,
7696 "[{i}] {actual} was not equal to {expected}"
7697 );
7698
7699 assert!(
7700 expected.eq_array(&array, i).expect("eq_array"),
7701 "[{i}] {expected}.eq_array was false"
7702 );
7703
7704 if is_null {
7705 assert!(actual.is_null(), "[{i}] {actual} was not null")
7706 }
7707 }
7708 }
7709
7710 #[test]
7711 fn test_lists_in_struct() {
7712 let field_a = Arc::new(Field::new("A", DataType::Utf8, false));
7713 let field_primitive_list = Arc::new(Field::new(
7714 "primitive_list",
7715 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
7716 false,
7717 ));
7718
7719 let l0 =
7721 ScalarValue::List(Arc::new(
7722 ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
7723 Some(1),
7724 Some(2),
7725 Some(3),
7726 ])]),
7727 ));
7728 let l1 =
7729 ScalarValue::List(Arc::new(
7730 ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
7731 Some(4),
7732 Some(5),
7733 ])]),
7734 ));
7735 let l2 = ScalarValue::List(Arc::new(ListArray::from_iter_primitive::<
7736 Int32Type,
7737 _,
7738 _,
7739 >(vec![Some(vec![Some(6)])])));
7740
7741 let s0 = ScalarValue::from(vec![
7743 ("A", ScalarValue::from("First")),
7744 ("primitive_list", l0),
7745 ]);
7746
7747 let s1 = ScalarValue::from(vec![
7748 ("A", ScalarValue::from("Second")),
7749 ("primitive_list", l1),
7750 ]);
7751
7752 let s2 = ScalarValue::from(vec![
7753 ("A", ScalarValue::from("Third")),
7754 ("primitive_list", l2),
7755 ]);
7756
7757 let array =
7759 ScalarValue::iter_to_array(vec![s0.clone(), s1.clone(), s2.clone()]).unwrap();
7760
7761 let array = as_struct_array(&array).unwrap();
7762 let expected = StructArray::from(vec![
7763 (
7764 Arc::clone(&field_a),
7765 Arc::new(StringArray::from(vec!["First", "Second", "Third"])) as ArrayRef,
7766 ),
7767 (
7768 Arc::clone(&field_primitive_list),
7769 Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
7770 Some(vec![Some(1), Some(2), Some(3)]),
7771 Some(vec![Some(4), Some(5)]),
7772 Some(vec![Some(6)]),
7773 ])),
7774 ),
7775 ]);
7776
7777 assert_eq!(array, &expected);
7778
7779 let nl0_array = ScalarValue::iter_to_array(vec![s0, s1.clone()]).unwrap();
7782 let nl0 = SingleRowListArrayBuilder::new(nl0_array).build_list_scalar();
7783
7784 let nl1_array = ScalarValue::iter_to_array(vec![s2]).unwrap();
7785 let nl1 = SingleRowListArrayBuilder::new(nl1_array).build_list_scalar();
7786
7787 let nl2_array = ScalarValue::iter_to_array(vec![s1]).unwrap();
7788 let nl2 = SingleRowListArrayBuilder::new(nl2_array).build_list_scalar();
7789
7790 let array = ScalarValue::iter_to_array(vec![nl0, nl1, nl2]).unwrap();
7792 let array = array.as_list::<i32>();
7793
7794 let field_a_builder = StringBuilder::with_capacity(4, 1024);
7796 let primitive_value_builder = Int32Array::builder(8);
7797 let field_primitive_list_builder = ListBuilder::new(primitive_value_builder);
7798
7799 let element_builder = StructBuilder::new(
7800 vec![field_a, field_primitive_list],
7801 vec![
7802 Box::new(field_a_builder),
7803 Box::new(field_primitive_list_builder),
7804 ],
7805 );
7806
7807 let mut list_builder = ListBuilder::new(element_builder);
7808
7809 list_builder
7810 .values()
7811 .field_builder::<StringBuilder>(0)
7812 .unwrap()
7813 .append_value("First");
7814 list_builder
7815 .values()
7816 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
7817 .unwrap()
7818 .values()
7819 .append_value(1);
7820 list_builder
7821 .values()
7822 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
7823 .unwrap()
7824 .values()
7825 .append_value(2);
7826 list_builder
7827 .values()
7828 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
7829 .unwrap()
7830 .values()
7831 .append_value(3);
7832 list_builder
7833 .values()
7834 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
7835 .unwrap()
7836 .append(true);
7837 list_builder.values().append(true);
7838
7839 list_builder
7840 .values()
7841 .field_builder::<StringBuilder>(0)
7842 .unwrap()
7843 .append_value("Second");
7844 list_builder
7845 .values()
7846 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
7847 .unwrap()
7848 .values()
7849 .append_value(4);
7850 list_builder
7851 .values()
7852 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
7853 .unwrap()
7854 .values()
7855 .append_value(5);
7856 list_builder
7857 .values()
7858 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
7859 .unwrap()
7860 .append(true);
7861 list_builder.values().append(true);
7862 list_builder.append(true);
7863
7864 list_builder
7865 .values()
7866 .field_builder::<StringBuilder>(0)
7867 .unwrap()
7868 .append_value("Third");
7869 list_builder
7870 .values()
7871 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
7872 .unwrap()
7873 .values()
7874 .append_value(6);
7875 list_builder
7876 .values()
7877 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
7878 .unwrap()
7879 .append(true);
7880 list_builder.values().append(true);
7881 list_builder.append(true);
7882
7883 list_builder
7884 .values()
7885 .field_builder::<StringBuilder>(0)
7886 .unwrap()
7887 .append_value("Second");
7888 list_builder
7889 .values()
7890 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
7891 .unwrap()
7892 .values()
7893 .append_value(4);
7894 list_builder
7895 .values()
7896 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
7897 .unwrap()
7898 .values()
7899 .append_value(5);
7900 list_builder
7901 .values()
7902 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
7903 .unwrap()
7904 .append(true);
7905 list_builder.values().append(true);
7906 list_builder.append(true);
7907
7908 let expected = list_builder.finish();
7909
7910 assert_eq!(array, &expected);
7911 }
7912
7913 fn build_2d_list(data: Vec<Option<i32>>) -> ListArray {
7914 let a1 = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(data)]);
7915 ListArray::new(
7916 Arc::new(Field::new_list_field(
7917 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
7918 true,
7919 )),
7920 OffsetBuffer::<i32>::from_lengths([1]),
7921 Arc::new(a1),
7922 None,
7923 )
7924 }
7925
7926 #[test]
7927 fn test_nested_lists() {
7928 let arr1 = build_2d_list(vec![Some(1), Some(2), Some(3)]);
7930 let arr2 = build_2d_list(vec![Some(4), Some(5)]);
7931 let arr3 = build_2d_list(vec![Some(6)]);
7932
7933 let array = ScalarValue::iter_to_array(vec![
7934 ScalarValue::List(Arc::new(arr1)),
7935 ScalarValue::List(Arc::new(arr2)),
7936 ScalarValue::List(Arc::new(arr3)),
7937 ])
7938 .unwrap();
7939 let array = array.as_list::<i32>();
7940
7941 let inner_builder = Int32Array::builder(6);
7943 let middle_builder = ListBuilder::new(inner_builder);
7944 let mut outer_builder = ListBuilder::new(middle_builder);
7945
7946 outer_builder.values().values().append_value(1);
7947 outer_builder.values().values().append_value(2);
7948 outer_builder.values().values().append_value(3);
7949 outer_builder.values().append(true);
7950 outer_builder.append(true);
7951
7952 outer_builder.values().values().append_value(4);
7953 outer_builder.values().values().append_value(5);
7954 outer_builder.values().append(true);
7955 outer_builder.append(true);
7956
7957 outer_builder.values().values().append_value(6);
7958 outer_builder.values().append(true);
7959 outer_builder.append(true);
7960
7961 let expected = outer_builder.finish();
7962
7963 assert_eq!(array, &expected);
7964 }
7965
7966 #[test]
7967 fn scalar_timestamp_ns_utc_timezone() {
7968 let scalar = ScalarValue::TimestampNanosecond(
7969 Some(1599566400000000000),
7970 Some("UTC".into()),
7971 );
7972
7973 assert_eq!(
7974 scalar.data_type(),
7975 DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()))
7976 );
7977
7978 let array = scalar.to_array().expect("Failed to convert to array");
7979 assert_eq!(array.len(), 1);
7980 assert_eq!(
7981 array.data_type(),
7982 &DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()))
7983 );
7984
7985 let new_scalar = ScalarValue::try_from_array(&array, 0).unwrap();
7986 assert_eq!(
7987 new_scalar.data_type(),
7988 DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()))
7989 );
7990 }
7991
7992 #[test]
7993 fn cast_round_trip() {
7994 check_scalar_cast(ScalarValue::Int8(Some(5)), DataType::Int16);
7995 check_scalar_cast(ScalarValue::Int8(None), DataType::Int16);
7996
7997 check_scalar_cast(ScalarValue::Float64(Some(5.5)), DataType::Int16);
7998
7999 check_scalar_cast(ScalarValue::Float64(None), DataType::Int16);
8000
8001 check_scalar_cast(
8002 ScalarValue::from("foo"),
8003 DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
8004 );
8005
8006 check_scalar_cast(
8007 ScalarValue::Utf8(None),
8008 DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
8009 );
8010
8011 check_scalar_cast(ScalarValue::Utf8(None), DataType::Utf8View);
8012 check_scalar_cast(ScalarValue::from("foo"), DataType::Utf8View);
8013 check_scalar_cast(
8014 ScalarValue::from("larger than 12 bytes string"),
8015 DataType::Utf8View,
8016 );
8017 check_scalar_cast(
8018 {
8019 let element_field =
8020 Arc::new(Field::new("element", DataType::Int32, true));
8021
8022 let mut builder =
8023 ListBuilder::new(Int32Builder::new()).with_field(element_field);
8024 builder.append_value([Some(1)]);
8025 builder.append(true);
8026
8027 ScalarValue::List(Arc::new(builder.finish()))
8028 },
8029 DataType::List(Arc::new(Field::new("element", DataType::Int64, true))),
8030 );
8031 check_scalar_cast(
8032 {
8033 let element_field =
8034 Arc::new(Field::new("element", DataType::Int32, true));
8035
8036 let mut builder = FixedSizeListBuilder::new(Int32Builder::new(), 1)
8037 .with_field(element_field);
8038 builder.values().append_value(1);
8039 builder.append(true);
8040
8041 ScalarValue::FixedSizeList(Arc::new(builder.finish()))
8042 },
8043 DataType::FixedSizeList(
8044 Arc::new(Field::new("element", DataType::Int64, true)),
8045 1,
8046 ),
8047 );
8048 check_scalar_cast(
8049 {
8050 let element_field =
8051 Arc::new(Field::new("element", DataType::Int32, true));
8052
8053 let mut builder =
8054 LargeListBuilder::new(Int32Builder::new()).with_field(element_field);
8055 builder.append_value([Some(1)]);
8056 builder.append(true);
8057
8058 ScalarValue::LargeList(Arc::new(builder.finish()))
8059 },
8060 DataType::LargeList(Arc::new(Field::new("element", DataType::Int64, true))),
8061 );
8062 }
8063
8064 fn check_scalar_cast(scalar: ScalarValue, desired_type: DataType) {
8066 let scalar_array = scalar.to_array().expect("Failed to convert to array");
8068 let cast_array = kernels::cast::cast(&scalar_array, &desired_type).unwrap();
8070
8071 let cast_scalar = ScalarValue::try_from_array(&cast_array, 0).unwrap();
8073 assert_eq!(cast_scalar.data_type(), desired_type);
8074
8075 let array = cast_scalar
8077 .to_array_of_size(10)
8078 .expect("Failed to convert to array of size");
8079
8080 assert_eq!(array.data_type(), &desired_type)
8082 }
8083
8084 #[test]
8085 fn test_scalar_negative() -> Result<()> {
8086 let value = ScalarValue::Int32(Some(12));
8088 assert_eq!(ScalarValue::Int32(Some(-12)), value.arithmetic_negate()?);
8089 let value = ScalarValue::Int32(None);
8090 assert_eq!(ScalarValue::Int32(None), value.arithmetic_negate()?);
8091
8092 let value = ScalarValue::UInt8(Some(12));
8094 assert!(value.arithmetic_negate().is_err());
8095 let value = ScalarValue::Boolean(None);
8096 assert!(value.arithmetic_negate().is_err());
8097 Ok(())
8098 }
8099
8100 #[test]
8101 fn test_scalar_negative_overflows() -> Result<()> {
8102 macro_rules! test_overflow_on_value {
8103 ($($val:expr),* $(,)?) => {$(
8104 {
8105 let value: ScalarValue = $val;
8106 let err = value.arithmetic_negate().expect_err("Should receive overflow error on negating {value:?}");
8107 let root_err = err.find_root();
8108 match root_err{
8109 DataFusionError::ArrowError(err, _) if matches!(err.as_ref(), ArrowError::ArithmeticOverflow(_)) => {}
8110 _ => return Err(err),
8111 };
8112 }
8113 )*};
8114 }
8115 test_overflow_on_value!(
8116 i8::MIN.into(),
8118 i16::MIN.into(),
8119 i32::MIN.into(),
8120 i64::MIN.into(),
8121 ScalarValue::try_new_decimal128(i128::MIN, 10, 5)?,
8123 ScalarValue::Decimal256(Some(i256::MIN), 20, 5),
8124 ScalarValue::IntervalYearMonth(Some(i32::MIN)),
8126 ScalarValue::new_interval_dt(i32::MIN, 999),
8127 ScalarValue::new_interval_dt(1, i32::MIN),
8128 ScalarValue::new_interval_mdn(i32::MIN, 15, 123_456),
8129 ScalarValue::new_interval_mdn(12, i32::MIN, 123_456),
8130 ScalarValue::new_interval_mdn(12, 15, i64::MIN),
8131 ScalarValue::TimestampSecond(Some(i64::MIN), None),
8133 ScalarValue::TimestampMillisecond(Some(i64::MIN), None),
8134 ScalarValue::TimestampMicrosecond(Some(i64::MIN), None),
8135 ScalarValue::TimestampNanosecond(Some(i64::MIN), None),
8136 );
8137
8138 let float_cases = [
8139 (
8140 ScalarValue::Float16(Some(f16::MIN)),
8141 ScalarValue::Float16(Some(f16::MAX)),
8142 ),
8143 (
8144 ScalarValue::Float16(Some(f16::MAX)),
8145 ScalarValue::Float16(Some(f16::MIN)),
8146 ),
8147 (f32::MIN.into(), f32::MAX.into()),
8148 (f32::MAX.into(), f32::MIN.into()),
8149 (f64::MIN.into(), f64::MAX.into()),
8150 (f64::MAX.into(), f64::MIN.into()),
8151 ];
8152 for (test, expected) in float_cases.into_iter().skip(2) {
8154 assert_eq!(test.arithmetic_negate()?, expected);
8155 }
8156 Ok(())
8157 }
8158
8159 #[test]
8160 fn f16_test_overflow() {
8161 let cases = [
8163 (
8164 ScalarValue::Float16(Some(f16::MIN)),
8165 ScalarValue::Float16(Some(f16::MAX)),
8166 ),
8167 (
8168 ScalarValue::Float16(Some(f16::MAX)),
8169 ScalarValue::Float16(Some(f16::MIN)),
8170 ),
8171 ];
8172
8173 for (test, expected) in cases {
8174 assert_eq!(test.arithmetic_negate().unwrap(), expected);
8175 }
8176 }
8177
8178 macro_rules! expect_operation_error {
8179 ($TEST_NAME:ident, $FUNCTION:ident, $EXPECTED_ERROR:expr) => {
8180 #[test]
8181 fn $TEST_NAME() {
8182 let lhs = ScalarValue::UInt64(Some(12));
8183 let rhs = ScalarValue::Int32(Some(-3));
8184 match lhs.$FUNCTION(&rhs) {
8185 Ok(_result) => {
8186 panic!(
8187 "Expected binary operation error between lhs: '{:?}', rhs: {:?}",
8188 lhs, rhs
8189 );
8190 }
8191 Err(e) => {
8192 let error_message = e.to_string();
8193 assert!(
8194 error_message.contains($EXPECTED_ERROR),
8195 "Expected error '{}' not found in actual error '{}'",
8196 $EXPECTED_ERROR,
8197 error_message
8198 );
8199 }
8200 }
8201 }
8202 };
8203 }
8204
8205 expect_operation_error!(
8206 expect_add_error,
8207 add,
8208 "Invalid arithmetic operation: UInt64 + Int32"
8209 );
8210 expect_operation_error!(
8211 expect_sub_error,
8212 sub,
8213 "Invalid arithmetic operation: UInt64 - Int32"
8214 );
8215
8216 macro_rules! decimal_op_test_cases {
8217 ($OPERATION:ident, [$([$L_VALUE:expr, $L_PRECISION:expr, $L_SCALE:expr, $R_VALUE:expr, $R_PRECISION:expr, $R_SCALE:expr, $O_VALUE:expr, $O_PRECISION:expr, $O_SCALE:expr]),+]) => {
8218 $(
8219
8220 let left = ScalarValue::Decimal128($L_VALUE, $L_PRECISION, $L_SCALE);
8221 let right = ScalarValue::Decimal128($R_VALUE, $R_PRECISION, $R_SCALE);
8222 let result = left.$OPERATION(&right).unwrap();
8223 assert_eq!(ScalarValue::Decimal128($O_VALUE, $O_PRECISION, $O_SCALE), result);
8224
8225 )+
8226 };
8227 }
8228
8229 #[test]
8230 fn decimal_operations() {
8231 decimal_op_test_cases!(
8232 add,
8233 [
8234 [Some(123), 10, 2, Some(124), 10, 2, Some(123 + 124), 11, 2],
8235 [
8237 Some(123),
8238 10,
8239 3,
8240 Some(124),
8241 10,
8242 2,
8243 Some(123 + 124 * 10_i128.pow(1)),
8244 12,
8245 3
8246 ],
8247 [
8249 Some(123),
8250 10,
8251 2,
8252 Some(124),
8253 11,
8254 3,
8255 Some(123 * 10_i128.pow(3 - 2) + 124),
8256 12,
8257 3
8258 ]
8259 ]
8260 );
8261 }
8262
8263 #[test]
8264 fn decimal_operations_with_nulls() {
8265 decimal_op_test_cases!(
8266 add,
8267 [
8268 [None, 10, 2, Some(123), 10, 2, None, 11, 2],
8270 [Some(123), 10, 2, None, 10, 2, None, 11, 2],
8272 [Some(123), 8, 2, None, 10, 3, None, 11, 3],
8274 [None, 8, 2, Some(123), 10, 3, None, 11, 3],
8276 [Some(123), 8, 4, None, 10, 3, None, 12, 4],
8278 [None, 10, 3, Some(123), 8, 4, None, 12, 4]
8280 ]
8281 );
8282 }
8283
8284 #[test]
8285 fn test_scalar_distance() {
8286 let cases = [
8287 (ScalarValue::Int8(Some(1)), ScalarValue::Int8(Some(2)), 1),
8290 (ScalarValue::Int8(Some(2)), ScalarValue::Int8(Some(1)), 1),
8291 (
8292 ScalarValue::Int16(Some(-5)),
8293 ScalarValue::Int16(Some(5)),
8294 10,
8295 ),
8296 (
8297 ScalarValue::Int16(Some(5)),
8298 ScalarValue::Int16(Some(-5)),
8299 10,
8300 ),
8301 (ScalarValue::Int32(Some(0)), ScalarValue::Int32(Some(0)), 0),
8302 (
8303 ScalarValue::Int32(Some(-5)),
8304 ScalarValue::Int32(Some(-10)),
8305 5,
8306 ),
8307 (
8308 ScalarValue::Int64(Some(-10)),
8309 ScalarValue::Int64(Some(-5)),
8310 5,
8311 ),
8312 (ScalarValue::UInt8(Some(1)), ScalarValue::UInt8(Some(2)), 1),
8313 (ScalarValue::UInt8(Some(0)), ScalarValue::UInt8(Some(0)), 0),
8314 (
8315 ScalarValue::UInt16(Some(5)),
8316 ScalarValue::UInt16(Some(10)),
8317 5,
8318 ),
8319 (
8320 ScalarValue::UInt32(Some(10)),
8321 ScalarValue::UInt32(Some(5)),
8322 5,
8323 ),
8324 (
8325 ScalarValue::UInt64(Some(5)),
8326 ScalarValue::UInt64(Some(10)),
8327 5,
8328 ),
8329 (
8330 ScalarValue::Float16(Some(f16::from_f32(1.1))),
8331 ScalarValue::Float16(Some(f16::from_f32(1.9))),
8332 1,
8333 ),
8334 (
8335 ScalarValue::Float16(Some(f16::from_f32(-5.3))),
8336 ScalarValue::Float16(Some(f16::from_f32(-9.2))),
8337 4,
8338 ),
8339 (
8340 ScalarValue::Float16(Some(f16::from_f32(-5.3))),
8341 ScalarValue::Float16(Some(f16::from_f32(-9.7))),
8342 4,
8343 ),
8344 (
8345 ScalarValue::Float32(Some(1.0)),
8346 ScalarValue::Float32(Some(2.0)),
8347 1,
8348 ),
8349 (
8350 ScalarValue::Float32(Some(2.0)),
8351 ScalarValue::Float32(Some(1.0)),
8352 1,
8353 ),
8354 (
8355 ScalarValue::Float64(Some(0.0)),
8356 ScalarValue::Float64(Some(0.0)),
8357 0,
8358 ),
8359 (
8360 ScalarValue::Float64(Some(-5.0)),
8361 ScalarValue::Float64(Some(-10.0)),
8362 5,
8363 ),
8364 (
8365 ScalarValue::Float64(Some(-10.0)),
8366 ScalarValue::Float64(Some(-5.0)),
8367 5,
8368 ),
8369 (
8373 ScalarValue::Float32(Some(1.2)),
8374 ScalarValue::Float32(Some(1.3)),
8375 0,
8376 ),
8377 (
8378 ScalarValue::Float32(Some(1.1)),
8379 ScalarValue::Float32(Some(1.9)),
8380 1,
8381 ),
8382 (
8383 ScalarValue::Float64(Some(-5.3)),
8384 ScalarValue::Float64(Some(-9.2)),
8385 4,
8386 ),
8387 (
8388 ScalarValue::Float64(Some(-5.3)),
8389 ScalarValue::Float64(Some(-9.7)),
8390 4,
8391 ),
8392 (
8393 ScalarValue::Float64(Some(-5.3)),
8394 ScalarValue::Float64(Some(-9.9)),
8395 5,
8396 ),
8397 (
8398 ScalarValue::Decimal128(Some(10), 1, 0),
8399 ScalarValue::Decimal128(Some(5), 1, 0),
8400 5,
8401 ),
8402 (
8403 ScalarValue::Decimal128(Some(5), 1, 0),
8404 ScalarValue::Decimal128(Some(10), 1, 0),
8405 5,
8406 ),
8407 (
8408 ScalarValue::Decimal256(Some(10.into()), 1, 0),
8409 ScalarValue::Decimal256(Some(5.into()), 1, 0),
8410 5,
8411 ),
8412 (
8413 ScalarValue::Decimal256(Some(5.into()), 1, 0),
8414 ScalarValue::Decimal256(Some(10.into()), 1, 0),
8415 5,
8416 ),
8417 ];
8418 for (lhs, rhs, expected) in cases.iter() {
8419 let distance = lhs.distance(rhs).unwrap();
8420 assert_eq!(distance, *expected);
8421 }
8422 }
8423
8424 #[test]
8425 fn test_distance_none() {
8426 let cases = [
8427 (
8428 ScalarValue::Decimal128(Some(i128::MAX), DECIMAL128_MAX_PRECISION, 0),
8429 ScalarValue::Decimal128(Some(-i128::MAX), DECIMAL128_MAX_PRECISION, 0),
8430 ),
8431 (
8432 ScalarValue::Decimal256(Some(i256::MAX), DECIMAL256_MAX_PRECISION, 0),
8433 ScalarValue::Decimal256(Some(-i256::MAX), DECIMAL256_MAX_PRECISION, 0),
8434 ),
8435 ];
8436 for (lhs, rhs) in cases.iter() {
8437 let distance = lhs.distance(rhs);
8438 assert!(distance.is_none(), "{lhs} vs {rhs}");
8439 }
8440 }
8441
8442 #[test]
8443 fn test_scalar_distance_invalid() {
8444 let cases = [
8445 (ScalarValue::Int8(None), ScalarValue::Int8(None)),
8449 (ScalarValue::Int8(None), ScalarValue::Int8(Some(1))),
8450 (ScalarValue::Int8(Some(1)), ScalarValue::Int8(None)),
8451 (ScalarValue::Int8(Some(1)), ScalarValue::Int16(Some(1))),
8453 (ScalarValue::Int8(Some(1)), ScalarValue::Float32(Some(1.0))),
8454 (
8455 ScalarValue::Float16(Some(f16::from_f32(1.0))),
8456 ScalarValue::Float32(Some(1.0)),
8457 ),
8458 (
8459 ScalarValue::Float16(Some(f16::from_f32(1.0))),
8460 ScalarValue::Int32(Some(1)),
8461 ),
8462 (
8463 ScalarValue::Float64(Some(1.1)),
8464 ScalarValue::Float32(Some(2.2)),
8465 ),
8466 (
8467 ScalarValue::UInt64(Some(777)),
8468 ScalarValue::Int32(Some(111)),
8469 ),
8470 (ScalarValue::Int8(None), ScalarValue::Int16(Some(1))),
8472 (ScalarValue::Int8(Some(1)), ScalarValue::Int16(None)),
8473 (ScalarValue::from("foo"), ScalarValue::from("bar")),
8475 (
8476 ScalarValue::Boolean(Some(true)),
8477 ScalarValue::Boolean(Some(false)),
8478 ),
8479 (ScalarValue::Date32(Some(0)), ScalarValue::Date32(Some(1))),
8480 (ScalarValue::Date64(Some(0)), ScalarValue::Date64(Some(1))),
8481 (
8482 ScalarValue::Decimal128(Some(123), 5, 5),
8483 ScalarValue::Decimal128(Some(120), 5, 3),
8484 ),
8485 (
8486 ScalarValue::Decimal128(Some(123), 5, 5),
8487 ScalarValue::Decimal128(Some(120), 3, 5),
8488 ),
8489 (
8490 ScalarValue::Decimal256(Some(123.into()), 5, 5),
8491 ScalarValue::Decimal256(Some(120.into()), 3, 5),
8492 ),
8493 (
8495 ScalarValue::Decimal256(
8496 Some(i256::from_parts(0, 2_i64.pow(50).into())),
8497 1,
8498 0,
8499 ),
8500 ScalarValue::Decimal256(
8501 Some(i256::from_parts(0, (-(2_i64).pow(50)).into())),
8502 1,
8503 0,
8504 ),
8505 ),
8506 (
8508 ScalarValue::Decimal256(Some(i256::from_parts(0, i128::MAX)), 1, 0),
8509 ScalarValue::Decimal256(Some(i256::from_parts(0, -i128::MAX)), 1, 0),
8510 ),
8511 ];
8512 for (lhs, rhs) in cases {
8513 let distance = lhs.distance(&rhs);
8514 assert!(distance.is_none());
8515 }
8516 }
8517
8518 #[test]
8519 fn test_scalar_interval_negate() {
8520 let cases = [
8521 (
8522 ScalarValue::new_interval_ym(1, 12),
8523 ScalarValue::new_interval_ym(-1, -12),
8524 ),
8525 (
8526 ScalarValue::new_interval_dt(1, 999),
8527 ScalarValue::new_interval_dt(-1, -999),
8528 ),
8529 (
8530 ScalarValue::new_interval_mdn(12, 15, 123_456),
8531 ScalarValue::new_interval_mdn(-12, -15, -123_456),
8532 ),
8533 ];
8534 for (expr, expected) in cases.iter() {
8535 let result = expr.arithmetic_negate().unwrap();
8536 assert_eq!(*expected, result, "-expr:{expr:?}");
8537 }
8538 }
8539
8540 #[test]
8541 fn test_scalar_interval_add() {
8542 let cases = [
8543 (
8544 ScalarValue::new_interval_ym(1, 12),
8545 ScalarValue::new_interval_ym(1, 12),
8546 ScalarValue::new_interval_ym(2, 24),
8547 ),
8548 (
8549 ScalarValue::new_interval_dt(1, 999),
8550 ScalarValue::new_interval_dt(1, 999),
8551 ScalarValue::new_interval_dt(2, 1998),
8552 ),
8553 (
8554 ScalarValue::new_interval_mdn(12, 15, 123_456),
8555 ScalarValue::new_interval_mdn(12, 15, 123_456),
8556 ScalarValue::new_interval_mdn(24, 30, 246_912),
8557 ),
8558 ];
8559 for (lhs, rhs, expected) in cases.iter() {
8560 let result = lhs.add(rhs).unwrap();
8561 let result_commute = rhs.add(lhs).unwrap();
8562 assert_eq!(*expected, result, "lhs:{lhs:?} + rhs:{rhs:?}");
8563 assert_eq!(*expected, result_commute, "lhs:{rhs:?} + rhs:{lhs:?}");
8564 }
8565 }
8566
8567 #[test]
8568 fn test_scalar_interval_sub() {
8569 let cases = [
8570 (
8571 ScalarValue::new_interval_ym(1, 12),
8572 ScalarValue::new_interval_ym(1, 12),
8573 ScalarValue::new_interval_ym(0, 0),
8574 ),
8575 (
8576 ScalarValue::new_interval_dt(1, 999),
8577 ScalarValue::new_interval_dt(1, 999),
8578 ScalarValue::new_interval_dt(0, 0),
8579 ),
8580 (
8581 ScalarValue::new_interval_mdn(12, 15, 123_456),
8582 ScalarValue::new_interval_mdn(12, 15, 123_456),
8583 ScalarValue::new_interval_mdn(0, 0, 0),
8584 ),
8585 ];
8586 for (lhs, rhs, expected) in cases.iter() {
8587 let result = lhs.sub(rhs).unwrap();
8588 assert_eq!(*expected, result, "lhs:{lhs:?} - rhs:{rhs:?}");
8589 }
8590 }
8591
8592 #[test]
8593 fn timestamp_op_random_tests() {
8594 let sample_size = 1000;
8597 let timestamps1 = get_random_timestamps(sample_size);
8598 let intervals = get_random_intervals(sample_size);
8599 for (idx, ts1) in timestamps1.iter().enumerate() {
8603 if idx % 2 == 0 {
8604 let timestamp2 = ts1.add(intervals[idx].clone()).unwrap();
8605 let back = timestamp2.sub(intervals[idx].clone()).unwrap();
8606 assert_eq!(ts1, &back);
8607 } else {
8608 let timestamp2 = ts1.sub(intervals[idx].clone()).unwrap();
8609 let back = timestamp2.add(intervals[idx].clone()).unwrap();
8610 assert_eq!(ts1, &back);
8611 };
8612 }
8613 }
8614
8615 #[test]
8616 fn test_struct_nulls() {
8617 let fields_b = Fields::from(vec![
8618 Field::new("ba", DataType::UInt64, true),
8619 Field::new("bb", DataType::UInt64, true),
8620 ]);
8621 let fields = Fields::from(vec![
8622 Field::new("a", DataType::UInt64, true),
8623 Field::new("b", DataType::Struct(fields_b.clone()), true),
8624 ]);
8625
8626 let struct_value = vec![
8627 (
8628 Arc::clone(&fields[0]),
8629 Arc::new(UInt64Array::from(vec![Some(1)])) as ArrayRef,
8630 ),
8631 (
8632 Arc::clone(&fields[1]),
8633 Arc::new(StructArray::from(vec![
8634 (
8635 Arc::clone(&fields_b[0]),
8636 Arc::new(UInt64Array::from(vec![Some(2)])) as ArrayRef,
8637 ),
8638 (
8639 Arc::clone(&fields_b[1]),
8640 Arc::new(UInt64Array::from(vec![Some(3)])) as ArrayRef,
8641 ),
8642 ])) as ArrayRef,
8643 ),
8644 ];
8645
8646 let struct_value_with_nulls = vec![
8647 (
8648 Arc::clone(&fields[0]),
8649 Arc::new(UInt64Array::from(vec![Some(1)])) as ArrayRef,
8650 ),
8651 (
8652 Arc::clone(&fields[1]),
8653 Arc::new(StructArray::from((
8654 vec![
8655 (
8656 Arc::clone(&fields_b[0]),
8657 Arc::new(UInt64Array::from(vec![Some(2)])) as ArrayRef,
8658 ),
8659 (
8660 Arc::clone(&fields_b[1]),
8661 Arc::new(UInt64Array::from(vec![Some(3)])) as ArrayRef,
8662 ),
8663 ],
8664 Buffer::from(&[0]),
8665 ))) as ArrayRef,
8666 ),
8667 ];
8668
8669 let scalars = vec![
8670 ScalarValue::Struct(Arc::new(StructArray::from((
8672 struct_value.clone(),
8673 Buffer::from(&[0]),
8674 )))),
8675 ScalarValue::Struct(Arc::new(StructArray::from((
8677 struct_value_with_nulls.clone(),
8678 Buffer::from(&[1]),
8679 )))),
8680 ScalarValue::Struct(Arc::new(StructArray::from((
8682 struct_value.clone(),
8683 Buffer::from(&[1]),
8684 )))),
8685 ];
8686
8687 let check_array = |array| {
8688 let is_null = is_null(&array).unwrap();
8689 assert_eq!(is_null, BooleanArray::from(vec![true, false, false]));
8690
8691 let formatted = pretty_format_columns("col", &[array]).unwrap().to_string();
8692 let formatted = formatted.split('\n').collect::<Vec<_>>();
8693 let expected = vec![
8694 "+---------------------------+",
8695 "| col |",
8696 "+---------------------------+",
8697 "| |",
8698 "| {a: 1, b: } |",
8699 "| {a: 1, b: {ba: 2, bb: 3}} |",
8700 "+---------------------------+",
8701 ];
8702 assert_eq!(
8703 formatted, expected,
8704 "Actual:\n{formatted:#?}\n\nExpected:\n{expected:#?}"
8705 );
8706 };
8707
8708 let array = ScalarValue::iter_to_array(scalars.clone()).unwrap();
8710 check_array(array);
8711
8712 let arrays = scalars
8714 .iter()
8715 .map(ScalarValue::to_array)
8716 .collect::<Result<Vec<_>>>()
8717 .expect("Failed to convert to array");
8718 let arrays = arrays.iter().map(|a| a.as_ref()).collect::<Vec<_>>();
8719 let array = arrow::compute::concat(&arrays).unwrap();
8720 check_array(array);
8721 }
8722
8723 #[test]
8724 fn test_struct_display() {
8725 let field_a = Field::new("a", DataType::Int32, true);
8726 let field_b = Field::new("b", DataType::Utf8, true);
8727
8728 let s = ScalarStructBuilder::new()
8729 .with_scalar(field_a, ScalarValue::from(1i32))
8730 .with_scalar(field_b, ScalarValue::Utf8(None))
8731 .build()
8732 .unwrap();
8733
8734 assert_eq!(s.to_string(), "{a:1,b:}");
8735 assert_eq!(format!("{s:?}"), r#"Struct({a:1,b:})"#);
8736
8737 let ScalarValue::Struct(arr) = s else {
8738 panic!("Expected struct");
8739 };
8740
8741 let batch = RecordBatch::try_from_iter(vec![("s", arr as _)]).unwrap();
8743 assert_snapshot!(batches_to_string(&[batch]), @r"
8744 +-------------+
8745 | s |
8746 +-------------+
8747 | {a: 1, b: } |
8748 +-------------+
8749 ");
8750 }
8751
8752 #[test]
8753 fn test_null_bug() {
8754 let field_a = Field::new("a", DataType::Int32, true);
8755 let field_b = Field::new("b", DataType::Int32, true);
8756 let fields = Fields::from(vec![field_a, field_b]);
8757
8758 let array_a = Arc::new(Int32Array::from_iter_values([1]));
8759 let array_b = Arc::new(Int32Array::from_iter_values([2]));
8760 let arrays: Vec<ArrayRef> = vec![array_a, array_b];
8761
8762 let mut not_nulls = NullBufferBuilder::new(1);
8763
8764 not_nulls.append_non_null();
8765
8766 let ar = StructArray::new(fields, arrays, not_nulls.finish());
8767 let s = ScalarValue::Struct(Arc::new(ar));
8768
8769 assert_eq!(s.to_string(), "{a:1,b:2}");
8770 assert_eq!(format!("{s:?}"), r#"Struct({a:1,b:2})"#);
8771
8772 let ScalarValue::Struct(arr) = s else {
8773 panic!("Expected struct");
8774 };
8775
8776 let batch = RecordBatch::try_from_iter(vec![("s", arr as _)]).unwrap();
8778 assert_snapshot!(batches_to_string(&[batch]), @r"
8779 +--------------+
8780 | s |
8781 +--------------+
8782 | {a: 1, b: 2} |
8783 +--------------+
8784 ");
8785 }
8786
8787 #[test]
8788 fn test_display_date64_large_values() {
8789 assert_eq!(
8790 format!("{}", ScalarValue::Date64(Some(790179464505))),
8791 "1995-01-15"
8792 );
8793 assert_eq!(
8795 format!("{}", ScalarValue::Date64(Some(-790179464505600000))),
8796 ""
8797 );
8798 }
8799
8800 #[test]
8801 fn test_struct_display_null() {
8802 let fields = vec![Field::new("a", DataType::Int32, false)];
8803 let s = ScalarStructBuilder::new_null(fields);
8804 assert_eq!(s.to_string(), "NULL");
8805
8806 let ScalarValue::Struct(arr) = s else {
8807 panic!("Expected struct");
8808 };
8809
8810 let batch = RecordBatch::try_from_iter(vec![("s", arr as _)]).unwrap();
8812
8813 assert_snapshot!(batches_to_string(&[batch]), @r"
8814 +---+
8815 | s |
8816 +---+
8817 | |
8818 +---+
8819 ");
8820 }
8821
8822 #[test]
8823 fn test_map_display_and_debug() {
8824 let string_builder = StringBuilder::new();
8825 let int_builder = Int32Builder::with_capacity(4);
8826 let mut builder = MapBuilder::new(None, string_builder, int_builder);
8827 builder.keys().append_value("joe");
8828 builder.values().append_value(1);
8829 builder.append(true).unwrap();
8830
8831 builder.keys().append_value("blogs");
8832 builder.values().append_value(2);
8833 builder.keys().append_value("foo");
8834 builder.values().append_value(4);
8835 builder.append(true).unwrap();
8836 builder.append(true).unwrap();
8837 builder.append(false).unwrap();
8838
8839 let map_value = ScalarValue::Map(Arc::new(builder.finish()));
8840
8841 assert_eq!(map_value.to_string(), "[{joe:1},{blogs:2,foo:4},{},NULL]");
8842 assert_eq!(
8843 format!("{map_value:?}"),
8844 r#"Map([{"joe":"1"},{"blogs":"2","foo":"4"},{},NULL])"#
8845 );
8846
8847 let ScalarValue::Map(arr) = map_value else {
8848 panic!("Expected map");
8849 };
8850
8851 let batch = RecordBatch::try_from_iter(vec![("m", arr as _)]).unwrap();
8853 assert_snapshot!(batches_to_string(&[batch]), @r"
8854 +--------------------+
8855 | m |
8856 +--------------------+
8857 | {joe: 1} |
8858 | {blogs: 2, foo: 4} |
8859 | {} |
8860 | |
8861 +--------------------+
8862 ");
8863 }
8864
8865 #[test]
8866 fn test_binary_display() {
8867 let no_binary_value = ScalarValue::Binary(None);
8868 assert_eq!(format!("{no_binary_value}"), "NULL");
8869 let single_binary_value = ScalarValue::Binary(Some(vec![42u8]));
8870 assert_eq!(format!("{single_binary_value}"), "2A");
8871 let small_binary_value = ScalarValue::Binary(Some(vec![1u8, 2, 3]));
8872 assert_eq!(format!("{small_binary_value}"), "010203");
8873 let large_binary_value =
8874 ScalarValue::Binary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
8875 assert_eq!(format!("{large_binary_value}"), "0102030405060708090A...");
8876
8877 let no_binary_value = ScalarValue::BinaryView(None);
8878 assert_eq!(format!("{no_binary_value}"), "NULL");
8879 let small_binary_value = ScalarValue::BinaryView(Some(vec![1u8, 2, 3]));
8880 assert_eq!(format!("{small_binary_value}"), "010203");
8881 let large_binary_value =
8882 ScalarValue::BinaryView(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
8883 assert_eq!(format!("{large_binary_value}"), "0102030405060708090A...");
8884
8885 let no_binary_value = ScalarValue::LargeBinary(None);
8886 assert_eq!(format!("{no_binary_value}"), "NULL");
8887 let small_binary_value = ScalarValue::LargeBinary(Some(vec![1u8, 2, 3]));
8888 assert_eq!(format!("{small_binary_value}"), "010203");
8889 let large_binary_value =
8890 ScalarValue::LargeBinary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
8891 assert_eq!(format!("{large_binary_value}"), "0102030405060708090A...");
8892
8893 let no_binary_value = ScalarValue::FixedSizeBinary(3, None);
8894 assert_eq!(format!("{no_binary_value}"), "NULL");
8895 let small_binary_value = ScalarValue::FixedSizeBinary(3, Some(vec![1u8, 2, 3]));
8896 assert_eq!(format!("{small_binary_value}"), "010203");
8897 let large_binary_value = ScalarValue::FixedSizeBinary(
8898 11,
8899 Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
8900 );
8901 assert_eq!(format!("{large_binary_value}"), "0102030405060708090A...");
8902 }
8903
8904 #[test]
8905 fn test_binary_debug() {
8906 let no_binary_value = ScalarValue::Binary(None);
8907 assert_eq!(format!("{no_binary_value:?}"), "Binary(NULL)");
8908 let single_binary_value = ScalarValue::Binary(Some(vec![42u8]));
8909 assert_eq!(format!("{single_binary_value:?}"), "Binary(\"42\")");
8910 let small_binary_value = ScalarValue::Binary(Some(vec![1u8, 2, 3]));
8911 assert_eq!(format!("{small_binary_value:?}"), "Binary(\"1,2,3\")");
8912 let large_binary_value =
8913 ScalarValue::Binary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
8914 assert_eq!(
8915 format!("{large_binary_value:?}"),
8916 "Binary(\"1,2,3,4,5,6,7,8,9,10,11\")"
8917 );
8918
8919 let no_binary_value = ScalarValue::BinaryView(None);
8920 assert_eq!(format!("{no_binary_value:?}"), "BinaryView(NULL)");
8921 let small_binary_value = ScalarValue::BinaryView(Some(vec![1u8, 2, 3]));
8922 assert_eq!(format!("{small_binary_value:?}"), "BinaryView(\"1,2,3\")");
8923 let large_binary_value =
8924 ScalarValue::BinaryView(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
8925 assert_eq!(
8926 format!("{large_binary_value:?}"),
8927 "BinaryView(\"1,2,3,4,5,6,7,8,9,10,11\")"
8928 );
8929
8930 let no_binary_value = ScalarValue::LargeBinary(None);
8931 assert_eq!(format!("{no_binary_value:?}"), "LargeBinary(NULL)");
8932 let small_binary_value = ScalarValue::LargeBinary(Some(vec![1u8, 2, 3]));
8933 assert_eq!(format!("{small_binary_value:?}"), "LargeBinary(\"1,2,3\")");
8934 let large_binary_value =
8935 ScalarValue::LargeBinary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
8936 assert_eq!(
8937 format!("{large_binary_value:?}"),
8938 "LargeBinary(\"1,2,3,4,5,6,7,8,9,10,11\")"
8939 );
8940
8941 let no_binary_value = ScalarValue::FixedSizeBinary(3, None);
8942 assert_eq!(format!("{no_binary_value:?}"), "FixedSizeBinary(3, NULL)");
8943 let small_binary_value = ScalarValue::FixedSizeBinary(3, Some(vec![1u8, 2, 3]));
8944 assert_eq!(
8945 format!("{small_binary_value:?}"),
8946 "FixedSizeBinary(3, \"1,2,3\")"
8947 );
8948 let large_binary_value = ScalarValue::FixedSizeBinary(
8949 11,
8950 Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
8951 );
8952 assert_eq!(
8953 format!("{large_binary_value:?}"),
8954 "FixedSizeBinary(11, \"1,2,3,4,5,6,7,8,9,10,11\")"
8955 );
8956 }
8957
8958 #[test]
8959 fn test_build_timestamp_millisecond_list() {
8960 let values = vec![ScalarValue::TimestampMillisecond(Some(1), None)];
8961 let arr = ScalarValue::new_list_nullable(
8962 &values,
8963 &DataType::Timestamp(TimeUnit::Millisecond, None),
8964 );
8965 assert_eq!(1, arr.len());
8966 }
8967
8968 #[test]
8969 fn test_newlist_timestamp_zone() {
8970 let s: &'static str = "UTC";
8971 let values = vec![ScalarValue::TimestampMillisecond(Some(1), Some(s.into()))];
8972 let arr = ScalarValue::new_list_nullable(
8973 &values,
8974 &DataType::Timestamp(TimeUnit::Millisecond, Some(s.into())),
8975 );
8976 assert_eq!(1, arr.len());
8977 assert_eq!(
8978 arr.data_type(),
8979 &DataType::List(Arc::new(Field::new_list_field(
8980 DataType::Timestamp(TimeUnit::Millisecond, Some(s.into())),
8981 true,
8982 )))
8983 );
8984 }
8985
8986 fn get_random_timestamps(sample_size: u64) -> Vec<ScalarValue> {
8987 let vector_size = sample_size;
8988 let mut timestamp = vec![];
8989 let mut rng = rand::rng();
8990 for i in 0..vector_size {
8991 let year = rng.random_range(1995..=2050);
8992 let month = rng.random_range(1..=12);
8993 let day = rng.random_range(1..=28); let hour = rng.random_range(0..=23);
8995 let minute = rng.random_range(0..=59);
8996 let second = rng.random_range(0..=59);
8997 if i % 4 == 0 {
8998 timestamp.push(ScalarValue::TimestampSecond(
8999 Some(
9000 NaiveDate::from_ymd_opt(year, month, day)
9001 .unwrap()
9002 .and_hms_opt(hour, minute, second)
9003 .unwrap()
9004 .and_utc()
9005 .timestamp(),
9006 ),
9007 None,
9008 ))
9009 } else if i % 4 == 1 {
9010 let millisec = rng.random_range(0..=999);
9011 timestamp.push(ScalarValue::TimestampMillisecond(
9012 Some(
9013 NaiveDate::from_ymd_opt(year, month, day)
9014 .unwrap()
9015 .and_hms_milli_opt(hour, minute, second, millisec)
9016 .unwrap()
9017 .and_utc()
9018 .timestamp_millis(),
9019 ),
9020 None,
9021 ))
9022 } else if i % 4 == 2 {
9023 let microsec = rng.random_range(0..=999_999);
9024 timestamp.push(ScalarValue::TimestampMicrosecond(
9025 Some(
9026 NaiveDate::from_ymd_opt(year, month, day)
9027 .unwrap()
9028 .and_hms_micro_opt(hour, minute, second, microsec)
9029 .unwrap()
9030 .and_utc()
9031 .timestamp_micros(),
9032 ),
9033 None,
9034 ))
9035 } else if i % 4 == 3 {
9036 let nanosec = rng.random_range(0..=999_999_999);
9037 timestamp.push(ScalarValue::TimestampNanosecond(
9038 Some(
9039 NaiveDate::from_ymd_opt(year, month, day)
9040 .unwrap()
9041 .and_hms_nano_opt(hour, minute, second, nanosec)
9042 .unwrap()
9043 .and_utc()
9044 .timestamp_nanos_opt()
9045 .unwrap(),
9046 ),
9047 None,
9048 ))
9049 }
9050 }
9051 timestamp
9052 }
9053
9054 fn get_random_intervals(sample_size: u64) -> Vec<ScalarValue> {
9055 const MILLISECS_IN_ONE_DAY: i64 = 86_400_000;
9056 const NANOSECS_IN_ONE_DAY: i64 = 86_400_000_000_000;
9057
9058 let vector_size = sample_size;
9059 let mut intervals = vec![];
9060 let mut rng = rand::rng();
9061 const SECS_IN_ONE_DAY: i32 = 86_400;
9062 const MICROSECS_IN_ONE_DAY: i64 = 86_400_000_000;
9063 for i in 0..vector_size {
9064 if i % 4 == 0 {
9065 let days = rng.random_range(0..5000);
9066 let millis = rng.random_range(0..SECS_IN_ONE_DAY) * 1000;
9068 intervals.push(ScalarValue::new_interval_dt(days, millis));
9069 } else if i % 4 == 1 {
9070 let days = rng.random_range(0..5000);
9071 let millisec = rng.random_range(0..(MILLISECS_IN_ONE_DAY as i32));
9072 intervals.push(ScalarValue::new_interval_dt(days, millisec));
9073 } else if i % 4 == 2 {
9074 let days = rng.random_range(0..5000);
9075 let nanosec = rng.random_range(0..MICROSECS_IN_ONE_DAY) * 1000;
9077 intervals.push(ScalarValue::new_interval_mdn(0, days, nanosec));
9078 } else {
9079 let days = rng.random_range(0..5000);
9080 let nanosec = rng.random_range(0..NANOSECS_IN_ONE_DAY);
9081 intervals.push(ScalarValue::new_interval_mdn(0, days, nanosec));
9082 }
9083 }
9084 intervals
9085 }
9086
9087 fn union_fields() -> UnionFields {
9088 [
9089 (0, Arc::new(Field::new("A", DataType::Int32, true))),
9090 (1, Arc::new(Field::new("B", DataType::Float64, true))),
9091 ]
9092 .into_iter()
9093 .collect()
9094 }
9095
9096 #[test]
9097 fn sparse_scalar_union_is_null() {
9098 let sparse_scalar = ScalarValue::Union(
9099 Some((0_i8, Box::new(ScalarValue::Int32(None)))),
9100 union_fields(),
9101 UnionMode::Sparse,
9102 );
9103 assert!(sparse_scalar.is_null());
9104 }
9105
9106 #[test]
9107 fn dense_scalar_union_is_null() {
9108 let dense_scalar = ScalarValue::Union(
9109 Some((0_i8, Box::new(ScalarValue::Int32(None)))),
9110 union_fields(),
9111 UnionMode::Dense,
9112 );
9113 assert!(dense_scalar.is_null());
9114 }
9115
9116 #[test]
9117 fn cast_date_to_timestamp_overflow_returns_error() {
9118 let scalar = ScalarValue::Date32(Some(i32::MAX));
9119 let err = scalar
9120 .cast_to(&DataType::Timestamp(TimeUnit::Nanosecond, None))
9121 .expect_err("expected cast to fail");
9122 assert!(
9123 err.to_string()
9124 .contains("converted value exceeds the representable i64 range"),
9125 "unexpected error: {err}"
9126 );
9127 }
9128
9129 #[test]
9130 fn null_dictionary_scalar_produces_null_dictionary_array() {
9131 let dictionary_scalar = ScalarValue::Dictionary(
9132 Box::new(DataType::Int32),
9133 Box::new(ScalarValue::Null),
9134 );
9135 assert!(dictionary_scalar.is_null());
9136 let dictionary_array = dictionary_scalar.to_array().unwrap();
9137 assert!(dictionary_array.is_null(0));
9138 }
9139
9140 #[test]
9141 fn test_scalar_value_try_new_null() {
9142 let scalars = vec![
9143 ScalarValue::try_new_null(&DataType::Boolean).unwrap(),
9144 ScalarValue::try_new_null(&DataType::Int8).unwrap(),
9145 ScalarValue::try_new_null(&DataType::Int16).unwrap(),
9146 ScalarValue::try_new_null(&DataType::Int32).unwrap(),
9147 ScalarValue::try_new_null(&DataType::Int64).unwrap(),
9148 ScalarValue::try_new_null(&DataType::UInt8).unwrap(),
9149 ScalarValue::try_new_null(&DataType::UInt16).unwrap(),
9150 ScalarValue::try_new_null(&DataType::UInt32).unwrap(),
9151 ScalarValue::try_new_null(&DataType::UInt64).unwrap(),
9152 ScalarValue::try_new_null(&DataType::Float16).unwrap(),
9153 ScalarValue::try_new_null(&DataType::Float32).unwrap(),
9154 ScalarValue::try_new_null(&DataType::Float64).unwrap(),
9155 ScalarValue::try_new_null(&DataType::Decimal128(42, 42)).unwrap(),
9156 ScalarValue::try_new_null(&DataType::Decimal256(42, 42)).unwrap(),
9157 ScalarValue::try_new_null(&DataType::Utf8).unwrap(),
9158 ScalarValue::try_new_null(&DataType::LargeUtf8).unwrap(),
9159 ScalarValue::try_new_null(&DataType::Utf8View).unwrap(),
9160 ScalarValue::try_new_null(&DataType::Binary).unwrap(),
9161 ScalarValue::try_new_null(&DataType::BinaryView).unwrap(),
9162 ScalarValue::try_new_null(&DataType::FixedSizeBinary(42)).unwrap(),
9163 ScalarValue::try_new_null(&DataType::LargeBinary).unwrap(),
9164 ScalarValue::try_new_null(&DataType::Date32).unwrap(),
9165 ScalarValue::try_new_null(&DataType::Date64).unwrap(),
9166 ScalarValue::try_new_null(&DataType::Time32(TimeUnit::Second)).unwrap(),
9167 ScalarValue::try_new_null(&DataType::Time32(TimeUnit::Millisecond)).unwrap(),
9168 ScalarValue::try_new_null(&DataType::Time64(TimeUnit::Microsecond)).unwrap(),
9169 ScalarValue::try_new_null(&DataType::Time64(TimeUnit::Nanosecond)).unwrap(),
9170 ScalarValue::try_new_null(&DataType::Timestamp(TimeUnit::Second, None))
9171 .unwrap(),
9172 ScalarValue::try_new_null(&DataType::Timestamp(TimeUnit::Millisecond, None))
9173 .unwrap(),
9174 ScalarValue::try_new_null(&DataType::Timestamp(TimeUnit::Microsecond, None))
9175 .unwrap(),
9176 ScalarValue::try_new_null(&DataType::Timestamp(TimeUnit::Nanosecond, None))
9177 .unwrap(),
9178 ScalarValue::try_new_null(&DataType::Interval(IntervalUnit::YearMonth))
9179 .unwrap(),
9180 ScalarValue::try_new_null(&DataType::Interval(IntervalUnit::DayTime))
9181 .unwrap(),
9182 ScalarValue::try_new_null(&DataType::Interval(IntervalUnit::MonthDayNano))
9183 .unwrap(),
9184 ScalarValue::try_new_null(&DataType::Duration(TimeUnit::Second)).unwrap(),
9185 ScalarValue::try_new_null(&DataType::Duration(TimeUnit::Microsecond))
9186 .unwrap(),
9187 ScalarValue::try_new_null(&DataType::Duration(TimeUnit::Nanosecond)).unwrap(),
9188 ScalarValue::try_new_null(&DataType::Null).unwrap(),
9189 ];
9190 assert!(scalars.iter().all(|s| s.is_null()));
9191
9192 let field_ref = Arc::new(Field::new("foo", DataType::Int32, true));
9193 let map_field_ref = Arc::new(Field::new(
9194 "foo",
9195 DataType::Struct(Fields::from(vec![
9196 Field::new("bar", DataType::Utf8, true),
9197 Field::new("baz", DataType::Int32, true),
9198 ])),
9199 true,
9200 ));
9201 let scalars = [
9202 ScalarValue::try_new_null(&DataType::List(Arc::clone(&field_ref))).unwrap(),
9203 ScalarValue::try_new_null(&DataType::LargeList(Arc::clone(&field_ref)))
9204 .unwrap(),
9205 ScalarValue::try_new_null(&DataType::FixedSizeList(
9206 Arc::clone(&field_ref),
9207 42,
9208 ))
9209 .unwrap(),
9210 ScalarValue::try_new_null(&DataType::Struct(
9211 vec![Arc::clone(&field_ref)].into(),
9212 ))
9213 .unwrap(),
9214 ScalarValue::try_new_null(&DataType::Map(map_field_ref, false)).unwrap(),
9215 ScalarValue::try_new_null(&DataType::Union(
9216 UnionFields::try_new(vec![42], vec![field_ref]).unwrap(),
9217 UnionMode::Dense,
9218 ))
9219 .unwrap(),
9220 ];
9221 assert!(scalars.iter().all(|s| s.is_null()));
9222 }
9223
9224 fn assert_starts_with(actual: impl AsRef<str>, expected_prefix: impl AsRef<str>) {
9227 let actual = actual.as_ref();
9228 let expected_prefix = expected_prefix.as_ref();
9229 assert!(
9230 actual.starts_with(expected_prefix),
9231 "Expected '{actual}' to start with '{expected_prefix}'"
9232 );
9233 }
9234
9235 #[test]
9236 fn test_new_default() {
9237 assert_eq!(
9239 ScalarValue::new_default(&DataType::Int32).unwrap(),
9240 ScalarValue::Int32(Some(0))
9241 );
9242 assert_eq!(
9243 ScalarValue::new_default(&DataType::Float64).unwrap(),
9244 ScalarValue::Float64(Some(0.0))
9245 );
9246 assert_eq!(
9247 ScalarValue::new_default(&DataType::Boolean).unwrap(),
9248 ScalarValue::Boolean(Some(false))
9249 );
9250
9251 assert_eq!(
9253 ScalarValue::new_default(&DataType::Utf8).unwrap(),
9254 ScalarValue::Utf8(Some("".to_string()))
9255 );
9256 assert_eq!(
9257 ScalarValue::new_default(&DataType::LargeUtf8).unwrap(),
9258 ScalarValue::LargeUtf8(Some("".to_string()))
9259 );
9260
9261 assert_eq!(
9263 ScalarValue::new_default(&DataType::Binary).unwrap(),
9264 ScalarValue::Binary(Some(vec![]))
9265 );
9266
9267 assert_eq!(
9269 ScalarValue::new_default(&DataType::FixedSizeBinary(5)).unwrap(),
9270 ScalarValue::FixedSizeBinary(5, Some(vec![0, 0, 0, 0, 0]))
9271 );
9272
9273 assert_eq!(
9275 ScalarValue::new_default(&DataType::Date32).unwrap(),
9276 ScalarValue::Date32(Some(0))
9277 );
9278 assert_eq!(
9279 ScalarValue::new_default(&DataType::Time32(TimeUnit::Second)).unwrap(),
9280 ScalarValue::Time32Second(Some(0))
9281 );
9282
9283 assert_eq!(
9285 ScalarValue::new_default(&DataType::Decimal128(10, 2)).unwrap(),
9286 ScalarValue::Decimal128(Some(0), 10, 2)
9287 );
9288
9289 let list_field = Field::new_list_field(DataType::Int32, true);
9291 let list_result =
9292 ScalarValue::new_default(&DataType::List(Arc::new(list_field.clone())))
9293 .unwrap();
9294 match list_result {
9295 ScalarValue::List(arr) => {
9296 assert_eq!(arr.len(), 1);
9297 assert_eq!(arr.value_length(0), 0); }
9299 _ => panic!("Expected List"),
9300 }
9301
9302 let struct_fields = Fields::from(vec![
9304 Field::new("a", DataType::Int32, false),
9305 Field::new("b", DataType::Utf8, false),
9306 ]);
9307 let struct_result =
9308 ScalarValue::new_default(&DataType::Struct(struct_fields.clone())).unwrap();
9309 match struct_result {
9310 ScalarValue::Struct(arr) => {
9311 assert_eq!(arr.len(), 1);
9312 assert_eq!(arr.column(0).as_primitive::<Int32Type>().value(0), 0);
9313 assert_eq!(arr.column(1).as_string::<i32>().value(0), "");
9314 }
9315 _ => panic!("Expected Struct"),
9316 }
9317
9318 let union_fields = UnionFields::try_new(
9320 vec![0, 1],
9321 vec![
9322 Field::new("i32", DataType::Int32, false),
9323 Field::new("f64", DataType::Float64, false),
9324 ],
9325 )
9326 .unwrap();
9327 let union_result = ScalarValue::new_default(&DataType::Union(
9328 union_fields.clone(),
9329 UnionMode::Sparse,
9330 ))
9331 .unwrap();
9332 match union_result {
9333 ScalarValue::Union(Some((type_id, value)), _, _) => {
9334 assert_eq!(type_id, 0);
9335 assert_eq!(*value, ScalarValue::Int32(Some(0)));
9336 }
9337 _ => panic!("Expected Union"),
9338 }
9339 }
9340
9341 #[test]
9342 fn test_scalar_min() {
9343 assert_eq!(
9345 ScalarValue::min(&DataType::Int8),
9346 Some(ScalarValue::Int8(Some(i8::MIN)))
9347 );
9348 assert_eq!(
9349 ScalarValue::min(&DataType::Int32),
9350 Some(ScalarValue::Int32(Some(i32::MIN)))
9351 );
9352 assert_eq!(
9353 ScalarValue::min(&DataType::UInt8),
9354 Some(ScalarValue::UInt8(Some(0)))
9355 );
9356 assert_eq!(
9357 ScalarValue::min(&DataType::UInt64),
9358 Some(ScalarValue::UInt64(Some(0)))
9359 );
9360
9361 assert_eq!(
9363 ScalarValue::min(&DataType::Float32),
9364 Some(ScalarValue::Float32(Some(f32::NEG_INFINITY)))
9365 );
9366 assert_eq!(
9367 ScalarValue::min(&DataType::Float64),
9368 Some(ScalarValue::Float64(Some(f64::NEG_INFINITY)))
9369 );
9370
9371 let decimal_min = ScalarValue::min(&DataType::Decimal128(5, 2)).unwrap();
9373 match decimal_min {
9374 ScalarValue::Decimal128(Some(val), 5, 2) => {
9375 assert_eq!(val, -99999); }
9377 _ => panic!("Expected Decimal128"),
9378 }
9379
9380 assert_eq!(
9382 ScalarValue::min(&DataType::Date32),
9383 Some(ScalarValue::Date32(Some(i32::MIN)))
9384 );
9385 assert_eq!(
9386 ScalarValue::min(&DataType::Time32(TimeUnit::Second)),
9387 Some(ScalarValue::Time32Second(Some(0)))
9388 );
9389 assert_eq!(
9390 ScalarValue::min(&DataType::Timestamp(TimeUnit::Nanosecond, None)),
9391 Some(ScalarValue::TimestampNanosecond(Some(i64::MIN), None))
9392 );
9393
9394 assert_eq!(
9396 ScalarValue::min(&DataType::Duration(TimeUnit::Second)),
9397 Some(ScalarValue::DurationSecond(Some(i64::MIN)))
9398 );
9399
9400 assert_eq!(ScalarValue::min(&DataType::Utf8), None);
9402 assert_eq!(ScalarValue::min(&DataType::Binary), None);
9403 assert_eq!(
9404 ScalarValue::min(&DataType::List(Arc::new(Field::new(
9405 "item",
9406 DataType::Int32,
9407 true
9408 )))),
9409 None
9410 );
9411 }
9412
9413 #[test]
9414 fn test_scalar_max() {
9415 assert_eq!(
9417 ScalarValue::max(&DataType::Int8),
9418 Some(ScalarValue::Int8(Some(i8::MAX)))
9419 );
9420 assert_eq!(
9421 ScalarValue::max(&DataType::Int32),
9422 Some(ScalarValue::Int32(Some(i32::MAX)))
9423 );
9424 assert_eq!(
9425 ScalarValue::max(&DataType::UInt8),
9426 Some(ScalarValue::UInt8(Some(u8::MAX)))
9427 );
9428 assert_eq!(
9429 ScalarValue::max(&DataType::UInt64),
9430 Some(ScalarValue::UInt64(Some(u64::MAX)))
9431 );
9432
9433 assert_eq!(
9435 ScalarValue::max(&DataType::Float32),
9436 Some(ScalarValue::Float32(Some(f32::INFINITY)))
9437 );
9438 assert_eq!(
9439 ScalarValue::max(&DataType::Float64),
9440 Some(ScalarValue::Float64(Some(f64::INFINITY)))
9441 );
9442
9443 let decimal_max = ScalarValue::max(&DataType::Decimal128(5, 2)).unwrap();
9445 match decimal_max {
9446 ScalarValue::Decimal128(Some(val), 5, 2) => {
9447 assert_eq!(val, 99999); }
9449 _ => panic!("Expected Decimal128"),
9450 }
9451
9452 assert_eq!(
9454 ScalarValue::max(&DataType::Date32),
9455 Some(ScalarValue::Date32(Some(i32::MAX)))
9456 );
9457 assert_eq!(
9458 ScalarValue::max(&DataType::Time32(TimeUnit::Second)),
9459 Some(ScalarValue::Time32Second(Some(86_399))) );
9461 assert_eq!(
9462 ScalarValue::max(&DataType::Time64(TimeUnit::Microsecond)),
9463 Some(ScalarValue::Time64Microsecond(Some(86_399_999_999))) );
9465 assert_eq!(
9466 ScalarValue::max(&DataType::Timestamp(TimeUnit::Nanosecond, None)),
9467 Some(ScalarValue::TimestampNanosecond(Some(i64::MAX), None))
9468 );
9469
9470 assert_eq!(
9472 ScalarValue::max(&DataType::Duration(TimeUnit::Millisecond)),
9473 Some(ScalarValue::DurationMillisecond(Some(i64::MAX)))
9474 );
9475
9476 assert_eq!(ScalarValue::max(&DataType::Utf8), None);
9478 assert_eq!(ScalarValue::max(&DataType::Binary), None);
9479 assert_eq!(
9480 ScalarValue::max(&DataType::Struct(Fields::from(vec![Field::new(
9481 "field",
9482 DataType::Int32,
9483 true
9484 )]))),
9485 None
9486 );
9487 }
9488
9489 #[test]
9490 fn test_min_max_float16() {
9491 let min_f16 = ScalarValue::min(&DataType::Float16).unwrap();
9493 match min_f16 {
9494 ScalarValue::Float16(Some(val)) => {
9495 assert_eq!(val, f16::NEG_INFINITY);
9496 }
9497 _ => panic!("Expected Float16"),
9498 }
9499
9500 let max_f16 = ScalarValue::max(&DataType::Float16).unwrap();
9501 match max_f16 {
9502 ScalarValue::Float16(Some(val)) => {
9503 assert_eq!(val, f16::INFINITY);
9504 }
9505 _ => panic!("Expected Float16"),
9506 }
9507 }
9508
9509 #[test]
9510 fn test_new_default_interval() {
9511 assert_eq!(
9513 ScalarValue::new_default(&DataType::Interval(IntervalUnit::YearMonth))
9514 .unwrap(),
9515 ScalarValue::IntervalYearMonth(Some(0))
9516 );
9517 assert_eq!(
9518 ScalarValue::new_default(&DataType::Interval(IntervalUnit::DayTime)).unwrap(),
9519 ScalarValue::IntervalDayTime(Some(IntervalDayTime::ZERO))
9520 );
9521 assert_eq!(
9522 ScalarValue::new_default(&DataType::Interval(IntervalUnit::MonthDayNano))
9523 .unwrap(),
9524 ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano::ZERO))
9525 );
9526 }
9527
9528 #[test]
9529 fn test_min_max_with_timezone() {
9530 let tz = Some(Arc::from("UTC"));
9531
9532 let min_ts =
9534 ScalarValue::min(&DataType::Timestamp(TimeUnit::Second, tz.clone())).unwrap();
9535 match min_ts {
9536 ScalarValue::TimestampSecond(Some(val), Some(tz_str)) => {
9537 assert_eq!(val, i64::MIN);
9538 assert_eq!(tz_str.as_ref(), "UTC");
9539 }
9540 _ => panic!("Expected TimestampSecond with timezone"),
9541 }
9542
9543 let max_ts =
9544 ScalarValue::max(&DataType::Timestamp(TimeUnit::Millisecond, tz.clone()))
9545 .unwrap();
9546 match max_ts {
9547 ScalarValue::TimestampMillisecond(Some(val), Some(tz_str)) => {
9548 assert_eq!(val, i64::MAX);
9549 assert_eq!(tz_str.as_ref(), "UTC");
9550 }
9551 _ => panic!("Expected TimestampMillisecond with timezone"),
9552 }
9553 }
9554
9555 #[test]
9556 fn test_views_minimize_memory() {
9557 let value = "this string is longer than 12 bytes".to_string();
9558
9559 let scalar = ScalarValue::Utf8View(Some(value.clone()));
9560 let array = scalar.to_array_of_size(10).unwrap();
9561 let array = array.as_string_view();
9562 let buffers = array.data_buffers();
9563 assert_eq!(1, buffers.len());
9564 assert_eq!(value.len(), buffers[0].len());
9566
9567 let scalar = ScalarValue::BinaryView(Some(value.bytes().collect()));
9569 let array = scalar.to_array_of_size(10).unwrap();
9570 let array = array.as_binary_view();
9571 let buffers = array.data_buffers();
9572 assert_eq!(1, buffers.len());
9573 assert_eq!(value.len(), buffers[0].len());
9574 }
9575
9576 #[test]
9577 fn test_to_array_of_size_run_end_encoded() {
9578 fn run_test<R: RunEndIndexType>() {
9579 let value = Box::new(ScalarValue::Float32(Some(1.0)));
9580 let size = 5;
9581 let scalar = ScalarValue::RunEndEncoded(
9582 Field::new("run_ends", R::DATA_TYPE, false).into(),
9583 Field::new("values", DataType::Float32, true).into(),
9584 value.clone(),
9585 );
9586 let array = scalar.to_array_of_size(size).unwrap();
9587 let array = array.as_run::<R>();
9588 let array = array.downcast::<Float32Array>().unwrap();
9589 assert_eq!(vec![Some(1.0); size], array.into_iter().collect::<Vec<_>>());
9590 assert_eq!(1, array.values().len());
9591 }
9592
9593 run_test::<Int16Type>();
9594 run_test::<Int32Type>();
9595 run_test::<Int64Type>();
9596
9597 let scalar = ScalarValue::RunEndEncoded(
9598 Field::new("run_ends", DataType::Int16, false).into(),
9599 Field::new("values", DataType::Float32, true).into(),
9600 Box::new(ScalarValue::Float32(Some(1.0))),
9601 );
9602 let err = scalar.to_array_of_size(i16::MAX as usize + 10).unwrap_err();
9603 assert_eq!(
9604 "Execution error: Cannot construct RunArray of size 32777: Overflows run-ends type Int16",
9605 err.to_string()
9606 )
9607 }
9608
9609 #[test]
9610 fn test_eq_array_run_end_encoded() {
9611 let run_ends = Int16Array::from(vec![1, 3]);
9612 let values = Float32Array::from(vec![None, Some(1.0)]);
9613 let run_array =
9614 Arc::new(RunArray::try_new(&run_ends, &values).unwrap()) as ArrayRef;
9615
9616 let scalar = ScalarValue::RunEndEncoded(
9617 Field::new("run_ends", DataType::Int16, false).into(),
9618 Field::new("values", DataType::Float32, true).into(),
9619 Box::new(ScalarValue::Float32(None)),
9620 );
9621 assert!(scalar.eq_array(&run_array, 0).unwrap());
9622
9623 let scalar = ScalarValue::RunEndEncoded(
9624 Field::new("run_ends", DataType::Int16, false).into(),
9625 Field::new("values", DataType::Float32, true).into(),
9626 Box::new(ScalarValue::Float32(Some(1.0))),
9627 );
9628 assert!(scalar.eq_array(&run_array, 1).unwrap());
9629 assert!(scalar.eq_array(&run_array, 2).unwrap());
9630
9631 let scalar = ScalarValue::RunEndEncoded(
9633 Field::new("run_ends", DataType::Int16, false).into(),
9634 Field::new("values", DataType::Float64, true).into(),
9635 Box::new(ScalarValue::Float64(Some(1.0))),
9636 );
9637 let err = scalar.eq_array(&run_array, 1).unwrap_err();
9638 let expected = "Internal error: could not cast array of type Float32 to arrow_array::array::primitive_array::PrimitiveArray<arrow_array::types::Float64Type>";
9639 assert!(err.to_string().starts_with(expected));
9640
9641 let scalar = ScalarValue::RunEndEncoded(
9643 Field::new("run_ends", DataType::Int32, false).into(),
9644 Field::new("values", DataType::Float32, true).into(),
9645 Box::new(ScalarValue::Float32(None)),
9646 );
9647 let err = scalar.eq_array(&run_array, 0).unwrap_err();
9648 let expected = "Internal error: could not cast array of type RunEndEncoded(\"run_ends\": non-null Int16, \"values\": Float32) to arrow_array::array::run_array::RunArray<arrow_array::types::Int32Type>";
9649 assert!(err.to_string().starts_with(expected));
9650 }
9651
9652 #[test]
9653 fn test_iter_to_array_run_end_encoded() {
9654 let run_ends_field = Arc::new(Field::new("run_ends", DataType::Int16, false));
9655 let values_field = Arc::new(Field::new("values", DataType::Int64, true));
9656 let scalars = vec![
9657 ScalarValue::RunEndEncoded(
9658 Arc::clone(&run_ends_field),
9659 Arc::clone(&values_field),
9660 Box::new(ScalarValue::Int64(Some(1))),
9661 ),
9662 ScalarValue::RunEndEncoded(
9663 Arc::clone(&run_ends_field),
9664 Arc::clone(&values_field),
9665 Box::new(ScalarValue::Int64(Some(1))),
9666 ),
9667 ScalarValue::RunEndEncoded(
9668 Arc::clone(&run_ends_field),
9669 Arc::clone(&values_field),
9670 Box::new(ScalarValue::Int64(None)),
9671 ),
9672 ScalarValue::RunEndEncoded(
9673 Arc::clone(&run_ends_field),
9674 Arc::clone(&values_field),
9675 Box::new(ScalarValue::Int64(Some(2))),
9676 ),
9677 ScalarValue::RunEndEncoded(
9678 Arc::clone(&run_ends_field),
9679 Arc::clone(&values_field),
9680 Box::new(ScalarValue::Int64(Some(2))),
9681 ),
9682 ScalarValue::RunEndEncoded(
9683 Arc::clone(&run_ends_field),
9684 Arc::clone(&values_field),
9685 Box::new(ScalarValue::Int64(Some(2))),
9686 ),
9687 ];
9688
9689 let run_array = ScalarValue::iter_to_array(scalars).unwrap();
9690 let expected = RunArray::try_new(
9691 &Int16Array::from(vec![2, 3, 6]),
9692 &Int64Array::from(vec![Some(1), None, Some(2)]),
9693 )
9694 .unwrap();
9695 assert_eq!(&expected as &dyn Array, run_array.as_ref());
9696
9697 let scalars = vec![
9699 ScalarValue::RunEndEncoded(
9700 Arc::clone(&run_ends_field),
9701 Arc::clone(&values_field),
9702 Box::new(ScalarValue::Int64(Some(1))),
9703 ),
9704 ScalarValue::RunEndEncoded(
9705 Field::new("run_ends", DataType::Int32, false).into(),
9706 Arc::clone(&values_field),
9707 Box::new(ScalarValue::Int64(Some(1))),
9708 ),
9709 ];
9710 let err = ScalarValue::iter_to_array(scalars).unwrap_err();
9711 let expected = "Execution error: Expected RunEndEncoded scalar with run-ends field Field { \"run_ends\": Int16 } but got: RunEndEncoded(Field { name: \"run_ends\", data_type: Int32 }, Field { name: \"values\", data_type: Int64, nullable: true }, Int64(1))";
9712 assert!(err.to_string().starts_with(expected));
9713
9714 let scalars = vec![
9716 ScalarValue::RunEndEncoded(
9717 Arc::clone(&run_ends_field),
9718 Arc::clone(&values_field),
9719 Box::new(ScalarValue::Int64(Some(1))),
9720 ),
9721 ScalarValue::RunEndEncoded(
9722 Arc::clone(&run_ends_field),
9723 Field::new("values", DataType::Int32, true).into(),
9724 Box::new(ScalarValue::Int32(Some(1))),
9725 ),
9726 ];
9727 let err = ScalarValue::iter_to_array(scalars).unwrap_err();
9728 let expected = "Execution error: Expected RunEndEncoded scalar with run-ends field Field { \"run_ends\": Int16 } but got: RunEndEncoded(Field { name: \"run_ends\", data_type: Int16 }, Field { name: \"values\", data_type: Int32, nullable: true }, Int32(1))";
9729 assert!(err.to_string().starts_with(expected));
9730
9731 let scalars = vec![
9733 ScalarValue::RunEndEncoded(
9734 Arc::clone(&run_ends_field),
9735 Arc::clone(&values_field),
9736 Box::new(ScalarValue::Int64(Some(1))),
9737 ),
9738 ScalarValue::Int64(Some(1)),
9739 ];
9740 let err = ScalarValue::iter_to_array(scalars).unwrap_err();
9741 let expected = "Execution error: Expected RunEndEncoded scalar with run-ends field Field { \"run_ends\": Int16 } but got: Int64(1)";
9742 assert!(err.to_string().starts_with(expected));
9743 }
9744
9745 #[test]
9746 fn test_convert_array_to_scalar_vec() {
9747 let list = ListArray::from_iter_primitive::<Int64Type, _, _>(vec![
9749 Some(vec![Some(1), Some(2)]),
9750 None,
9751 Some(vec![Some(3), None, Some(4)]),
9752 ]);
9753 let converted = ScalarValue::convert_array_to_scalar_vec(&list).unwrap();
9754 assert_eq!(
9755 converted,
9756 vec![
9757 Some(vec![
9758 ScalarValue::Int64(Some(1)),
9759 ScalarValue::Int64(Some(2))
9760 ]),
9761 None,
9762 Some(vec![
9763 ScalarValue::Int64(Some(3)),
9764 ScalarValue::Int64(None),
9765 ScalarValue::Int64(Some(4))
9766 ]),
9767 ]
9768 );
9769
9770 let large_list = LargeListArray::from_iter_primitive::<Int64Type, _, _>(vec![
9772 Some(vec![Some(1), Some(2)]),
9773 None,
9774 Some(vec![Some(3), None, Some(4)]),
9775 ]);
9776 let converted = ScalarValue::convert_array_to_scalar_vec(&large_list).unwrap();
9777 assert_eq!(
9778 converted,
9779 vec![
9780 Some(vec![
9781 ScalarValue::Int64(Some(1)),
9782 ScalarValue::Int64(Some(2))
9783 ]),
9784 None,
9785 Some(vec![
9786 ScalarValue::Int64(Some(3)),
9787 ScalarValue::Int64(None),
9788 ScalarValue::Int64(Some(4))
9789 ]),
9790 ]
9791 );
9792
9793 let funky = ListArray::new(
9797 Field::new_list_field(DataType::Int64, true).into(),
9798 OffsetBuffer::new(vec![0, 2, 4, 5].into()),
9799 Arc::new(Int64Array::from(vec![1, 2, 3, 4, 5, 6])),
9800 Some(NullBuffer::from(vec![true, false, true])),
9801 );
9802 let converted = ScalarValue::convert_array_to_scalar_vec(&funky).unwrap();
9803 assert_eq!(
9804 converted,
9805 vec![
9806 Some(vec![
9807 ScalarValue::Int64(Some(1)),
9808 ScalarValue::Int64(Some(2))
9809 ]),
9810 None,
9811 Some(vec![ScalarValue::Int64(Some(5))]),
9812 ]
9813 );
9814
9815 let array4 = ListArray::new(
9819 Field::new_list_field(DataType::Int64, true).into(),
9820 OffsetBuffer::new(vec![0, 2, 2, 5].into()),
9821 Arc::new(Int64Array::from(vec![1, 2, 3, 4, 5, 6])),
9822 Some(NullBuffer::from(vec![true, false, true])),
9823 );
9824 let converted = ScalarValue::convert_array_to_scalar_vec(&array4).unwrap();
9825 assert_eq!(
9826 converted,
9827 vec![
9828 Some(vec![
9829 ScalarValue::Int64(Some(1)),
9830 ScalarValue::Int64(Some(2))
9831 ]),
9832 None,
9833 Some(vec![
9834 ScalarValue::Int64(Some(3)),
9835 ScalarValue::Int64(Some(4)),
9836 ScalarValue::Int64(Some(5)),
9837 ]),
9838 ]
9839 );
9840
9841 let array5 = ListArray::new(
9844 Field::new_list_field(DataType::Int64, true).into(),
9845 OffsetBuffer::new(vec![0, 2, 2, 5].into()),
9846 Arc::new(Int64Array::from(vec![1, 2, 3, 4, 5, 6])),
9847 Some(NullBuffer::from(vec![true, true, true])),
9848 );
9849 let converted = ScalarValue::convert_array_to_scalar_vec(&array5).unwrap();
9850 assert_eq!(
9851 converted,
9852 vec![
9853 Some(vec![
9854 ScalarValue::Int64(Some(1)),
9855 ScalarValue::Int64(Some(2))
9856 ]),
9857 Some(vec![]),
9858 Some(vec![
9859 ScalarValue::Int64(Some(3)),
9860 ScalarValue::Int64(Some(4)),
9861 ScalarValue::Int64(Some(5)),
9862 ]),
9863 ]
9864 );
9865 }
9866}