1mod consts;
21mod struct_builder;
22
23use std::borrow::Borrow;
24use std::cmp::Ordering;
25use std::collections::{HashSet, VecDeque};
26use std::convert::Infallible;
27use std::fmt;
28use std::hash::Hash;
29use std::hash::Hasher;
30use std::iter::repeat_n;
31use std::mem::{size_of, size_of_val};
32use std::str::FromStr;
33use std::sync::Arc;
34
35use crate::cast::{
36 as_binary_array, as_binary_view_array, as_boolean_array, as_date32_array,
37 as_date64_array, as_decimal128_array, as_decimal256_array, as_dictionary_array,
38 as_duration_microsecond_array, as_duration_millisecond_array,
39 as_duration_nanosecond_array, as_duration_second_array, as_fixed_size_binary_array,
40 as_fixed_size_list_array, as_float16_array, as_float32_array, as_float64_array,
41 as_int16_array, as_int32_array, as_int64_array, as_int8_array, as_interval_dt_array,
42 as_interval_mdn_array, as_interval_ym_array, as_large_binary_array,
43 as_large_list_array, as_large_string_array, as_string_array, as_string_view_array,
44 as_time32_millisecond_array, as_time32_second_array, as_time64_microsecond_array,
45 as_time64_nanosecond_array, as_timestamp_microsecond_array,
46 as_timestamp_millisecond_array, as_timestamp_nanosecond_array,
47 as_timestamp_second_array, as_uint16_array, as_uint32_array, as_uint64_array,
48 as_uint8_array, as_union_array,
49};
50use crate::error::{DataFusionError, Result, _exec_err, _internal_err, _not_impl_err};
51use crate::format::DEFAULT_CAST_OPTIONS;
52use crate::hash_utils::create_hashes;
53use crate::utils::SingleRowListArrayBuilder;
54use crate::{_internal_datafusion_err, arrow_datafusion_err};
55use arrow::array::{
56 new_empty_array, new_null_array, Array, ArrayData, ArrayRef, ArrowNativeTypeOp,
57 ArrowPrimitiveType, AsArray, BinaryArray, BinaryViewArray, BooleanArray, Date32Array,
58 Date64Array, Decimal128Array, Decimal256Array, DictionaryArray,
59 DurationMicrosecondArray, DurationMillisecondArray, DurationNanosecondArray,
60 DurationSecondArray, FixedSizeBinaryArray, FixedSizeListArray, Float16Array,
61 Float32Array, Float64Array, GenericListArray, Int16Array, Int32Array, Int64Array,
62 Int8Array, IntervalDayTimeArray, IntervalMonthDayNanoArray, IntervalYearMonthArray,
63 LargeBinaryArray, LargeListArray, LargeStringArray, ListArray, MapArray,
64 MutableArrayData, PrimitiveArray, Scalar, StringArray, StringViewArray, StructArray,
65 Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray,
66 Time64NanosecondArray, TimestampMicrosecondArray, TimestampMillisecondArray,
67 TimestampNanosecondArray, TimestampSecondArray, UInt16Array, UInt32Array,
68 UInt64Array, UInt8Array, UnionArray,
69};
70use arrow::buffer::ScalarBuffer;
71use arrow::compute::kernels::cast::{cast_with_options, CastOptions};
72use arrow::compute::kernels::numeric::{
73 add, add_wrapping, div, mul, mul_wrapping, rem, sub, sub_wrapping,
74};
75use arrow::datatypes::{
76 i256, ArrowDictionaryKeyType, ArrowNativeType, ArrowTimestampType, DataType,
77 Date32Type, Field, Float32Type, Int16Type, Int32Type, Int64Type, Int8Type,
78 IntervalDayTime, IntervalDayTimeType, IntervalMonthDayNano, IntervalMonthDayNanoType,
79 IntervalUnit, IntervalYearMonthType, TimeUnit, TimestampMicrosecondType,
80 TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, UInt16Type,
81 UInt32Type, UInt64Type, UInt8Type, UnionFields, UnionMode, DECIMAL128_MAX_PRECISION,
82};
83use arrow::util::display::{array_value_to_string, ArrayFormatter, FormatOptions};
84use chrono::{Duration, NaiveDate};
85use half::f16;
86pub use struct_builder::ScalarStructBuilder;
87
88#[derive(Clone)]
220pub enum ScalarValue {
221 Null,
223 Boolean(Option<bool>),
225 Float16(Option<f16>),
227 Float32(Option<f32>),
229 Float64(Option<f64>),
231 Decimal128(Option<i128>, u8, i8),
233 Decimal256(Option<i256>, u8, i8),
235 Int8(Option<i8>),
237 Int16(Option<i16>),
239 Int32(Option<i32>),
241 Int64(Option<i64>),
243 UInt8(Option<u8>),
245 UInt16(Option<u16>),
247 UInt32(Option<u32>),
249 UInt64(Option<u64>),
251 Utf8(Option<String>),
253 Utf8View(Option<String>),
255 LargeUtf8(Option<String>),
257 Binary(Option<Vec<u8>>),
259 BinaryView(Option<Vec<u8>>),
261 FixedSizeBinary(i32, Option<Vec<u8>>),
263 LargeBinary(Option<Vec<u8>>),
265 FixedSizeList(Arc<FixedSizeListArray>),
269 List(Arc<ListArray>),
273 LargeList(Arc<LargeListArray>),
275 Struct(Arc<StructArray>),
278 Map(Arc<MapArray>),
280 Date32(Option<i32>),
282 Date64(Option<i64>),
284 Time32Second(Option<i32>),
286 Time32Millisecond(Option<i32>),
288 Time64Microsecond(Option<i64>),
290 Time64Nanosecond(Option<i64>),
292 TimestampSecond(Option<i64>, Option<Arc<str>>),
294 TimestampMillisecond(Option<i64>, Option<Arc<str>>),
296 TimestampMicrosecond(Option<i64>, Option<Arc<str>>),
298 TimestampNanosecond(Option<i64>, Option<Arc<str>>),
300 IntervalYearMonth(Option<i32>),
302 IntervalDayTime(Option<IntervalDayTime>),
305 IntervalMonthDayNano(Option<IntervalMonthDayNano>),
309 DurationSecond(Option<i64>),
311 DurationMillisecond(Option<i64>),
313 DurationMicrosecond(Option<i64>),
315 DurationNanosecond(Option<i64>),
317 Union(Option<(i8, Box<ScalarValue>)>, UnionFields, UnionMode),
322 Dictionary(Box<DataType>, Box<ScalarValue>),
324}
325
326impl Hash for Fl<f16> {
327 fn hash<H: Hasher>(&self, state: &mut H) {
328 self.0.to_bits().hash(state);
329 }
330}
331
332impl PartialEq for ScalarValue {
334 fn eq(&self, other: &Self) -> bool {
335 use ScalarValue::*;
336 match (self, other) {
340 (Decimal128(v1, p1, s1), Decimal128(v2, p2, s2)) => {
341 v1.eq(v2) && p1.eq(p2) && s1.eq(s2)
342 }
343 (Decimal128(_, _, _), _) => false,
344 (Decimal256(v1, p1, s1), Decimal256(v2, p2, s2)) => {
345 v1.eq(v2) && p1.eq(p2) && s1.eq(s2)
346 }
347 (Decimal256(_, _, _), _) => false,
348 (Boolean(v1), Boolean(v2)) => v1.eq(v2),
349 (Boolean(_), _) => false,
350 (Float32(v1), Float32(v2)) => match (v1, v2) {
351 (Some(f1), Some(f2)) => f1.to_bits() == f2.to_bits(),
352 _ => v1.eq(v2),
353 },
354 (Float16(v1), Float16(v2)) => match (v1, v2) {
355 (Some(f1), Some(f2)) => f1.to_bits() == f2.to_bits(),
356 _ => v1.eq(v2),
357 },
358 (Float32(_), _) => false,
359 (Float16(_), _) => false,
360 (Float64(v1), Float64(v2)) => match (v1, v2) {
361 (Some(f1), Some(f2)) => f1.to_bits() == f2.to_bits(),
362 _ => v1.eq(v2),
363 },
364 (Float64(_), _) => false,
365 (Int8(v1), Int8(v2)) => v1.eq(v2),
366 (Int8(_), _) => false,
367 (Int16(v1), Int16(v2)) => v1.eq(v2),
368 (Int16(_), _) => false,
369 (Int32(v1), Int32(v2)) => v1.eq(v2),
370 (Int32(_), _) => false,
371 (Int64(v1), Int64(v2)) => v1.eq(v2),
372 (Int64(_), _) => false,
373 (UInt8(v1), UInt8(v2)) => v1.eq(v2),
374 (UInt8(_), _) => false,
375 (UInt16(v1), UInt16(v2)) => v1.eq(v2),
376 (UInt16(_), _) => false,
377 (UInt32(v1), UInt32(v2)) => v1.eq(v2),
378 (UInt32(_), _) => false,
379 (UInt64(v1), UInt64(v2)) => v1.eq(v2),
380 (UInt64(_), _) => false,
381 (Utf8(v1), Utf8(v2)) => v1.eq(v2),
382 (Utf8(_), _) => false,
383 (Utf8View(v1), Utf8View(v2)) => v1.eq(v2),
384 (Utf8View(_), _) => false,
385 (LargeUtf8(v1), LargeUtf8(v2)) => v1.eq(v2),
386 (LargeUtf8(_), _) => false,
387 (Binary(v1), Binary(v2)) => v1.eq(v2),
388 (Binary(_), _) => false,
389 (BinaryView(v1), BinaryView(v2)) => v1.eq(v2),
390 (BinaryView(_), _) => false,
391 (FixedSizeBinary(_, v1), FixedSizeBinary(_, v2)) => v1.eq(v2),
392 (FixedSizeBinary(_, _), _) => false,
393 (LargeBinary(v1), LargeBinary(v2)) => v1.eq(v2),
394 (LargeBinary(_), _) => false,
395 (FixedSizeList(v1), FixedSizeList(v2)) => v1.eq(v2),
396 (FixedSizeList(_), _) => false,
397 (List(v1), List(v2)) => v1.eq(v2),
398 (List(_), _) => false,
399 (LargeList(v1), LargeList(v2)) => v1.eq(v2),
400 (LargeList(_), _) => false,
401 (Struct(v1), Struct(v2)) => v1.eq(v2),
402 (Struct(_), _) => false,
403 (Map(v1), Map(v2)) => v1.eq(v2),
404 (Map(_), _) => false,
405 (Date32(v1), Date32(v2)) => v1.eq(v2),
406 (Date32(_), _) => false,
407 (Date64(v1), Date64(v2)) => v1.eq(v2),
408 (Date64(_), _) => false,
409 (Time32Second(v1), Time32Second(v2)) => v1.eq(v2),
410 (Time32Second(_), _) => false,
411 (Time32Millisecond(v1), Time32Millisecond(v2)) => v1.eq(v2),
412 (Time32Millisecond(_), _) => false,
413 (Time64Microsecond(v1), Time64Microsecond(v2)) => v1.eq(v2),
414 (Time64Microsecond(_), _) => false,
415 (Time64Nanosecond(v1), Time64Nanosecond(v2)) => v1.eq(v2),
416 (Time64Nanosecond(_), _) => false,
417 (TimestampSecond(v1, _), TimestampSecond(v2, _)) => v1.eq(v2),
418 (TimestampSecond(_, _), _) => false,
419 (TimestampMillisecond(v1, _), TimestampMillisecond(v2, _)) => v1.eq(v2),
420 (TimestampMillisecond(_, _), _) => false,
421 (TimestampMicrosecond(v1, _), TimestampMicrosecond(v2, _)) => v1.eq(v2),
422 (TimestampMicrosecond(_, _), _) => false,
423 (TimestampNanosecond(v1, _), TimestampNanosecond(v2, _)) => v1.eq(v2),
424 (TimestampNanosecond(_, _), _) => false,
425 (DurationSecond(v1), DurationSecond(v2)) => v1.eq(v2),
426 (DurationSecond(_), _) => false,
427 (DurationMillisecond(v1), DurationMillisecond(v2)) => v1.eq(v2),
428 (DurationMillisecond(_), _) => false,
429 (DurationMicrosecond(v1), DurationMicrosecond(v2)) => v1.eq(v2),
430 (DurationMicrosecond(_), _) => false,
431 (DurationNanosecond(v1), DurationNanosecond(v2)) => v1.eq(v2),
432 (DurationNanosecond(_), _) => false,
433 (IntervalYearMonth(v1), IntervalYearMonth(v2)) => v1.eq(v2),
434 (IntervalYearMonth(_), _) => false,
435 (IntervalDayTime(v1), IntervalDayTime(v2)) => v1.eq(v2),
436 (IntervalDayTime(_), _) => false,
437 (IntervalMonthDayNano(v1), IntervalMonthDayNano(v2)) => v1.eq(v2),
438 (IntervalMonthDayNano(_), _) => false,
439 (Union(val1, fields1, mode1), Union(val2, fields2, mode2)) => {
440 val1.eq(val2) && fields1.eq(fields2) && mode1.eq(mode2)
441 }
442 (Union(_, _, _), _) => false,
443 (Dictionary(k1, v1), Dictionary(k2, v2)) => k1.eq(k2) && v1.eq(v2),
444 (Dictionary(_, _), _) => false,
445 (Null, Null) => true,
446 (Null, _) => false,
447 }
448 }
449}
450
451impl PartialOrd for ScalarValue {
453 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
454 use ScalarValue::*;
455 match (self, other) {
459 (Decimal128(v1, p1, s1), Decimal128(v2, p2, s2)) => {
460 if p1.eq(p2) && s1.eq(s2) {
461 v1.partial_cmp(v2)
462 } else {
463 None
465 }
466 }
467 (Decimal128(_, _, _), _) => None,
468 (Decimal256(v1, p1, s1), Decimal256(v2, p2, s2)) => {
469 if p1.eq(p2) && s1.eq(s2) {
470 v1.partial_cmp(v2)
471 } else {
472 None
474 }
475 }
476 (Decimal256(_, _, _), _) => None,
477 (Boolean(v1), Boolean(v2)) => v1.partial_cmp(v2),
478 (Boolean(_), _) => None,
479 (Float32(v1), Float32(v2)) => match (v1, v2) {
480 (Some(f1), Some(f2)) => Some(f1.total_cmp(f2)),
481 _ => v1.partial_cmp(v2),
482 },
483 (Float16(v1), Float16(v2)) => match (v1, v2) {
484 (Some(f1), Some(f2)) => Some(f1.total_cmp(f2)),
485 _ => v1.partial_cmp(v2),
486 },
487 (Float32(_), _) => None,
488 (Float16(_), _) => None,
489 (Float64(v1), Float64(v2)) => match (v1, v2) {
490 (Some(f1), Some(f2)) => Some(f1.total_cmp(f2)),
491 _ => v1.partial_cmp(v2),
492 },
493 (Float64(_), _) => None,
494 (Int8(v1), Int8(v2)) => v1.partial_cmp(v2),
495 (Int8(_), _) => None,
496 (Int16(v1), Int16(v2)) => v1.partial_cmp(v2),
497 (Int16(_), _) => None,
498 (Int32(v1), Int32(v2)) => v1.partial_cmp(v2),
499 (Int32(_), _) => None,
500 (Int64(v1), Int64(v2)) => v1.partial_cmp(v2),
501 (Int64(_), _) => None,
502 (UInt8(v1), UInt8(v2)) => v1.partial_cmp(v2),
503 (UInt8(_), _) => None,
504 (UInt16(v1), UInt16(v2)) => v1.partial_cmp(v2),
505 (UInt16(_), _) => None,
506 (UInt32(v1), UInt32(v2)) => v1.partial_cmp(v2),
507 (UInt32(_), _) => None,
508 (UInt64(v1), UInt64(v2)) => v1.partial_cmp(v2),
509 (UInt64(_), _) => None,
510 (Utf8(v1), Utf8(v2)) => v1.partial_cmp(v2),
511 (Utf8(_), _) => None,
512 (LargeUtf8(v1), LargeUtf8(v2)) => v1.partial_cmp(v2),
513 (LargeUtf8(_), _) => None,
514 (Utf8View(v1), Utf8View(v2)) => v1.partial_cmp(v2),
515 (Utf8View(_), _) => None,
516 (Binary(v1), Binary(v2)) => v1.partial_cmp(v2),
517 (Binary(_), _) => None,
518 (BinaryView(v1), BinaryView(v2)) => v1.partial_cmp(v2),
519 (BinaryView(_), _) => None,
520 (FixedSizeBinary(_, v1), FixedSizeBinary(_, v2)) => v1.partial_cmp(v2),
521 (FixedSizeBinary(_, _), _) => None,
522 (LargeBinary(v1), LargeBinary(v2)) => v1.partial_cmp(v2),
523 (LargeBinary(_), _) => None,
524 (List(arr1), List(arr2)) => partial_cmp_list(arr1.as_ref(), arr2.as_ref()),
526 (FixedSizeList(arr1), FixedSizeList(arr2)) => {
527 partial_cmp_list(arr1.as_ref(), arr2.as_ref())
528 }
529 (LargeList(arr1), LargeList(arr2)) => {
530 partial_cmp_list(arr1.as_ref(), arr2.as_ref())
531 }
532 (List(_), _) | (LargeList(_), _) | (FixedSizeList(_), _) => None,
533 (Struct(struct_arr1), Struct(struct_arr2)) => {
534 partial_cmp_struct(struct_arr1.as_ref(), struct_arr2.as_ref())
535 }
536 (Struct(_), _) => None,
537 (Map(map_arr1), Map(map_arr2)) => partial_cmp_map(map_arr1, map_arr2),
538 (Map(_), _) => None,
539 (Date32(v1), Date32(v2)) => v1.partial_cmp(v2),
540 (Date32(_), _) => None,
541 (Date64(v1), Date64(v2)) => v1.partial_cmp(v2),
542 (Date64(_), _) => None,
543 (Time32Second(v1), Time32Second(v2)) => v1.partial_cmp(v2),
544 (Time32Second(_), _) => None,
545 (Time32Millisecond(v1), Time32Millisecond(v2)) => v1.partial_cmp(v2),
546 (Time32Millisecond(_), _) => None,
547 (Time64Microsecond(v1), Time64Microsecond(v2)) => v1.partial_cmp(v2),
548 (Time64Microsecond(_), _) => None,
549 (Time64Nanosecond(v1), Time64Nanosecond(v2)) => v1.partial_cmp(v2),
550 (Time64Nanosecond(_), _) => None,
551 (TimestampSecond(v1, _), TimestampSecond(v2, _)) => v1.partial_cmp(v2),
552 (TimestampSecond(_, _), _) => None,
553 (TimestampMillisecond(v1, _), TimestampMillisecond(v2, _)) => {
554 v1.partial_cmp(v2)
555 }
556 (TimestampMillisecond(_, _), _) => None,
557 (TimestampMicrosecond(v1, _), TimestampMicrosecond(v2, _)) => {
558 v1.partial_cmp(v2)
559 }
560 (TimestampMicrosecond(_, _), _) => None,
561 (TimestampNanosecond(v1, _), TimestampNanosecond(v2, _)) => {
562 v1.partial_cmp(v2)
563 }
564 (TimestampNanosecond(_, _), _) => None,
565 (IntervalYearMonth(v1), IntervalYearMonth(v2)) => v1.partial_cmp(v2),
566 (IntervalYearMonth(_), _) => None,
567 (IntervalDayTime(v1), IntervalDayTime(v2)) => v1.partial_cmp(v2),
568 (IntervalDayTime(_), _) => None,
569 (IntervalMonthDayNano(v1), IntervalMonthDayNano(v2)) => v1.partial_cmp(v2),
570 (IntervalMonthDayNano(_), _) => None,
571 (DurationSecond(v1), DurationSecond(v2)) => v1.partial_cmp(v2),
572 (DurationSecond(_), _) => None,
573 (DurationMillisecond(v1), DurationMillisecond(v2)) => v1.partial_cmp(v2),
574 (DurationMillisecond(_), _) => None,
575 (DurationMicrosecond(v1), DurationMicrosecond(v2)) => v1.partial_cmp(v2),
576 (DurationMicrosecond(_), _) => None,
577 (DurationNanosecond(v1), DurationNanosecond(v2)) => v1.partial_cmp(v2),
578 (DurationNanosecond(_), _) => None,
579 (Union(v1, t1, m1), Union(v2, t2, m2)) => {
580 if t1.eq(t2) && m1.eq(m2) {
581 v1.partial_cmp(v2)
582 } else {
583 None
584 }
585 }
586 (Union(_, _, _), _) => None,
587 (Dictionary(k1, v1), Dictionary(k2, v2)) => {
588 if k1 == k2 {
590 v1.partial_cmp(v2)
591 } else {
592 None
593 }
594 }
595 (Dictionary(_, _), _) => None,
596 (Null, Null) => Some(Ordering::Equal),
597 (Null, _) => None,
598 }
599 }
600}
601
602fn first_array_for_list(arr: &dyn Array) -> ArrayRef {
605 assert_eq!(arr.len(), 1);
606 if let Some(arr) = arr.as_list_opt::<i32>() {
607 arr.value(0)
608 } else if let Some(arr) = arr.as_list_opt::<i64>() {
609 arr.value(0)
610 } else if let Some(arr) = arr.as_fixed_size_list_opt() {
611 arr.value(0)
612 } else {
613 unreachable!("Since only List / LargeList / FixedSizeList are supported, this should never happen")
614 }
615}
616
617fn partial_cmp_list(arr1: &dyn Array, arr2: &dyn Array) -> Option<Ordering> {
619 if arr1.data_type() != arr2.data_type() {
620 return None;
621 }
622 let arr1 = first_array_for_list(arr1);
623 let arr2 = first_array_for_list(arr2);
624
625 let min_length = arr1.len().min(arr2.len());
626 let arr1_trimmed = arr1.slice(0, min_length);
627 let arr2_trimmed = arr2.slice(0, min_length);
628
629 let lt_res = arrow::compute::kernels::cmp::lt(&arr1_trimmed, &arr2_trimmed).ok()?;
630 let eq_res = arrow::compute::kernels::cmp::eq(&arr1_trimmed, &arr2_trimmed).ok()?;
631
632 for j in 0..lt_res.len() {
633 if arr1_trimmed.is_null(j) && !arr2_trimmed.is_null(j) {
641 return Some(Ordering::Greater);
642 }
643 if !arr1_trimmed.is_null(j) && arr2_trimmed.is_null(j) {
644 return Some(Ordering::Less);
645 }
646
647 if lt_res.is_valid(j) && lt_res.value(j) {
648 return Some(Ordering::Less);
649 }
650 if eq_res.is_valid(j) && !eq_res.value(j) {
651 return Some(Ordering::Greater);
652 }
653 }
654
655 Some(arr1.len().cmp(&arr2.len()))
656}
657
658fn flatten<'a>(array: &'a StructArray, columns: &mut Vec<&'a ArrayRef>) {
659 for i in 0..array.num_columns() {
660 let column = array.column(i);
661 if let Some(nested_struct) = column.as_any().downcast_ref::<StructArray>() {
662 flatten(nested_struct, columns);
664 } else {
665 columns.push(column);
667 }
668 }
669}
670
671pub fn partial_cmp_struct(s1: &StructArray, s2: &StructArray) -> Option<Ordering> {
672 if s1.len() != s2.len() {
673 return None;
674 }
675
676 if s1.data_type() != s2.data_type() {
677 return None;
678 }
679
680 let mut expanded_columns1 = Vec::with_capacity(s1.num_columns());
681 let mut expanded_columns2 = Vec::with_capacity(s2.num_columns());
682
683 flatten(s1, &mut expanded_columns1);
684 flatten(s2, &mut expanded_columns2);
685
686 for col_index in 0..expanded_columns1.len() {
687 let arr1 = expanded_columns1[col_index];
688 let arr2 = expanded_columns2[col_index];
689
690 let lt_res = arrow::compute::kernels::cmp::lt(arr1, arr2).ok()?;
691 let eq_res = arrow::compute::kernels::cmp::eq(arr1, arr2).ok()?;
692
693 for j in 0..lt_res.len() {
694 if lt_res.is_valid(j) && lt_res.value(j) {
695 return Some(Ordering::Less);
696 }
697 if eq_res.is_valid(j) && !eq_res.value(j) {
698 return Some(Ordering::Greater);
699 }
700 }
701 }
702 Some(Ordering::Equal)
703}
704
705fn partial_cmp_map(m1: &Arc<MapArray>, m2: &Arc<MapArray>) -> Option<Ordering> {
706 if m1.len() != m2.len() {
707 return None;
708 }
709
710 if m1.data_type() != m2.data_type() {
711 return None;
712 }
713
714 for col_index in 0..m1.len() {
715 let arr1 = m1.entries().column(col_index);
716 let arr2 = m2.entries().column(col_index);
717
718 let lt_res = arrow::compute::kernels::cmp::lt(arr1, arr2).ok()?;
719 let eq_res = arrow::compute::kernels::cmp::eq(arr1, arr2).ok()?;
720
721 for j in 0..lt_res.len() {
722 if lt_res.is_valid(j) && lt_res.value(j) {
723 return Some(Ordering::Less);
724 }
725 if eq_res.is_valid(j) && !eq_res.value(j) {
726 return Some(Ordering::Greater);
727 }
728 }
729 }
730 Some(Ordering::Equal)
731}
732
733impl Eq for ScalarValue {}
734
735struct Fl<T>(T);
737
738macro_rules! hash_float_value {
739 ($(($t:ty, $i:ty)),+) => {
740 $(impl std::hash::Hash for Fl<$t> {
741 #[inline]
742 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
743 state.write(&<$i>::from_ne_bytes(self.0.to_ne_bytes()).to_ne_bytes())
744 }
745 })+
746 };
747}
748
749hash_float_value!((f64, u64), (f32, u32));
750
751impl Hash for ScalarValue {
757 fn hash<H: Hasher>(&self, state: &mut H) {
758 use ScalarValue::*;
759 match self {
760 Decimal128(v, p, s) => {
761 v.hash(state);
762 p.hash(state);
763 s.hash(state)
764 }
765 Decimal256(v, p, s) => {
766 v.hash(state);
767 p.hash(state);
768 s.hash(state)
769 }
770 Boolean(v) => v.hash(state),
771 Float16(v) => v.map(Fl).hash(state),
772 Float32(v) => v.map(Fl).hash(state),
773 Float64(v) => v.map(Fl).hash(state),
774 Int8(v) => v.hash(state),
775 Int16(v) => v.hash(state),
776 Int32(v) => v.hash(state),
777 Int64(v) => v.hash(state),
778 UInt8(v) => v.hash(state),
779 UInt16(v) => v.hash(state),
780 UInt32(v) => v.hash(state),
781 UInt64(v) => v.hash(state),
782 Utf8(v) | LargeUtf8(v) | Utf8View(v) => v.hash(state),
783 Binary(v) | FixedSizeBinary(_, v) | LargeBinary(v) | BinaryView(v) => {
784 v.hash(state)
785 }
786 List(arr) => {
787 hash_nested_array(arr.to_owned() as ArrayRef, state);
788 }
789 LargeList(arr) => {
790 hash_nested_array(arr.to_owned() as ArrayRef, state);
791 }
792 FixedSizeList(arr) => {
793 hash_nested_array(arr.to_owned() as ArrayRef, state);
794 }
795 Struct(arr) => {
796 hash_nested_array(arr.to_owned() as ArrayRef, state);
797 }
798 Map(arr) => {
799 hash_nested_array(arr.to_owned() as ArrayRef, state);
800 }
801 Date32(v) => v.hash(state),
802 Date64(v) => v.hash(state),
803 Time32Second(v) => v.hash(state),
804 Time32Millisecond(v) => v.hash(state),
805 Time64Microsecond(v) => v.hash(state),
806 Time64Nanosecond(v) => v.hash(state),
807 TimestampSecond(v, _) => v.hash(state),
808 TimestampMillisecond(v, _) => v.hash(state),
809 TimestampMicrosecond(v, _) => v.hash(state),
810 TimestampNanosecond(v, _) => v.hash(state),
811 DurationSecond(v) => v.hash(state),
812 DurationMillisecond(v) => v.hash(state),
813 DurationMicrosecond(v) => v.hash(state),
814 DurationNanosecond(v) => v.hash(state),
815 IntervalYearMonth(v) => v.hash(state),
816 IntervalDayTime(v) => v.hash(state),
817 IntervalMonthDayNano(v) => v.hash(state),
818 Union(v, t, m) => {
819 v.hash(state);
820 t.hash(state);
821 m.hash(state);
822 }
823 Dictionary(k, v) => {
824 k.hash(state);
825 v.hash(state);
826 }
827 Null => 1.hash(state),
829 }
830 }
831}
832
833fn hash_nested_array<H: Hasher>(arr: ArrayRef, state: &mut H) {
834 let arrays = vec![arr.to_owned()];
835 let hashes_buffer = &mut vec![0; arr.len()];
836 let random_state = ahash::RandomState::with_seeds(0, 0, 0, 0);
837 let hashes = create_hashes(&arrays, &random_state, hashes_buffer).unwrap();
838 hashes.hash(state);
840}
841
842#[inline]
849pub fn get_dict_value<K: ArrowDictionaryKeyType>(
850 array: &dyn Array,
851 index: usize,
852) -> Result<(&ArrayRef, Option<usize>)> {
853 let dict_array = as_dictionary_array::<K>(array)?;
854 Ok((dict_array.values(), dict_array.key(index)))
855}
856
857fn dict_from_scalar<K: ArrowDictionaryKeyType>(
860 value: &ScalarValue,
861 size: usize,
862) -> Result<ArrayRef> {
863 let values_array = value.to_array_of_size(1)?;
865
866 let key_array: PrimitiveArray<K> = repeat_n(
868 if value.is_null() {
869 None
870 } else {
871 Some(K::default_value())
872 },
873 size,
874 )
875 .collect();
876
877 Ok(Arc::new(
883 DictionaryArray::<K>::try_new(key_array, values_array)?, ))
885}
886
887fn dict_from_values<K: ArrowDictionaryKeyType>(
889 values_array: ArrayRef,
890) -> Result<ArrayRef> {
891 let key_array: PrimitiveArray<K> = (0..values_array.len())
894 .map(|index| {
895 if values_array.is_valid(index) {
896 let native_index = K::Native::from_usize(index).ok_or_else(|| {
897 DataFusionError::Internal(format!(
898 "Can not create index of type {} from value {}",
899 K::DATA_TYPE,
900 index
901 ))
902 })?;
903 Ok(Some(native_index))
904 } else {
905 Ok(None)
906 }
907 })
908 .collect::<Result<Vec<_>>>()?
909 .into_iter()
910 .collect();
911
912 let dict_array = DictionaryArray::<K>::try_new(key_array, values_array)?;
918 Ok(Arc::new(dict_array))
919}
920
921macro_rules! typed_cast_tz {
922 ($array:expr, $index:expr, $array_cast:ident, $SCALAR:ident, $TZ:expr) => {{
923 let array = $array_cast($array)?;
924 Ok::<ScalarValue, DataFusionError>(ScalarValue::$SCALAR(
925 match array.is_null($index) {
926 true => None,
927 false => Some(array.value($index).into()),
928 },
929 $TZ.clone(),
930 ))
931 }};
932}
933
934macro_rules! typed_cast {
935 ($array:expr, $index:expr, $array_cast:ident, $SCALAR:ident) => {{
936 let array = $array_cast($array)?;
937 Ok::<ScalarValue, DataFusionError>(ScalarValue::$SCALAR(
938 match array.is_null($index) {
939 true => None,
940 false => Some(array.value($index).into()),
941 },
942 ))
943 }};
944}
945
946macro_rules! build_array_from_option {
947 ($DATA_TYPE:ident, $ARRAY_TYPE:ident, $EXPR:expr, $SIZE:expr) => {{
948 match $EXPR {
949 Some(value) => Arc::new($ARRAY_TYPE::from_value(*value, $SIZE)),
950 None => new_null_array(&DataType::$DATA_TYPE, $SIZE),
951 }
952 }};
953 ($DATA_TYPE:ident, $ENUM:expr, $ARRAY_TYPE:ident, $EXPR:expr, $SIZE:expr) => {{
954 match $EXPR {
955 Some(value) => Arc::new($ARRAY_TYPE::from_value(*value, $SIZE)),
956 None => new_null_array(&DataType::$DATA_TYPE($ENUM), $SIZE),
957 }
958 }};
959}
960
961macro_rules! build_timestamp_array_from_option {
962 ($TIME_UNIT:expr, $TZ:expr, $ARRAY_TYPE:ident, $EXPR:expr, $SIZE:expr) => {
963 match $EXPR {
964 Some(value) => {
965 Arc::new($ARRAY_TYPE::from_value(*value, $SIZE).with_timezone_opt($TZ))
966 }
967 None => new_null_array(&DataType::Timestamp($TIME_UNIT, $TZ), $SIZE),
968 }
969 };
970}
971
972macro_rules! eq_array_primitive {
973 ($array:expr, $index:expr, $array_cast:ident, $VALUE:expr) => {{
974 let array = $array_cast($array)?;
975 let is_valid = array.is_valid($index);
976 Ok::<bool, DataFusionError>(match $VALUE {
977 Some(val) => is_valid && &array.value($index) == val,
978 None => !is_valid,
979 })
980 }};
981}
982
983impl ScalarValue {
984 pub fn new_primitive<T: ArrowPrimitiveType>(
990 a: Option<T::Native>,
991 d: &DataType,
992 ) -> Result<Self> {
993 match a {
994 None => d.try_into(),
995 Some(v) => {
996 let array = PrimitiveArray::<T>::new(vec![v].into(), None)
997 .with_data_type(d.clone());
998 Self::try_from_array(&array, 0)
999 }
1000 }
1001 }
1002
1003 pub fn try_new_decimal128(value: i128, precision: u8, scale: i8) -> Result<Self> {
1005 if precision <= DECIMAL128_MAX_PRECISION && scale.unsigned_abs() <= precision {
1007 return Ok(ScalarValue::Decimal128(Some(value), precision, scale));
1008 }
1009 _internal_err!(
1010 "Can not new a decimal type ScalarValue for precision {precision} and scale {scale}"
1011 )
1012 }
1013
1014 pub fn try_new_null(data_type: &DataType) -> Result<Self> {
1026 Ok(match data_type {
1027 DataType::Boolean => ScalarValue::Boolean(None),
1028 DataType::Float16 => ScalarValue::Float16(None),
1029 DataType::Float64 => ScalarValue::Float64(None),
1030 DataType::Float32 => ScalarValue::Float32(None),
1031 DataType::Int8 => ScalarValue::Int8(None),
1032 DataType::Int16 => ScalarValue::Int16(None),
1033 DataType::Int32 => ScalarValue::Int32(None),
1034 DataType::Int64 => ScalarValue::Int64(None),
1035 DataType::UInt8 => ScalarValue::UInt8(None),
1036 DataType::UInt16 => ScalarValue::UInt16(None),
1037 DataType::UInt32 => ScalarValue::UInt32(None),
1038 DataType::UInt64 => ScalarValue::UInt64(None),
1039 DataType::Decimal128(precision, scale) => {
1040 ScalarValue::Decimal128(None, *precision, *scale)
1041 }
1042 DataType::Decimal256(precision, scale) => {
1043 ScalarValue::Decimal256(None, *precision, *scale)
1044 }
1045 DataType::Utf8 => ScalarValue::Utf8(None),
1046 DataType::LargeUtf8 => ScalarValue::LargeUtf8(None),
1047 DataType::Utf8View => ScalarValue::Utf8View(None),
1048 DataType::Binary => ScalarValue::Binary(None),
1049 DataType::BinaryView => ScalarValue::BinaryView(None),
1050 DataType::FixedSizeBinary(len) => ScalarValue::FixedSizeBinary(*len, None),
1051 DataType::LargeBinary => ScalarValue::LargeBinary(None),
1052 DataType::Date32 => ScalarValue::Date32(None),
1053 DataType::Date64 => ScalarValue::Date64(None),
1054 DataType::Time32(TimeUnit::Second) => ScalarValue::Time32Second(None),
1055 DataType::Time32(TimeUnit::Millisecond) => {
1056 ScalarValue::Time32Millisecond(None)
1057 }
1058 DataType::Time64(TimeUnit::Microsecond) => {
1059 ScalarValue::Time64Microsecond(None)
1060 }
1061 DataType::Time64(TimeUnit::Nanosecond) => ScalarValue::Time64Nanosecond(None),
1062 DataType::Timestamp(TimeUnit::Second, tz_opt) => {
1063 ScalarValue::TimestampSecond(None, tz_opt.clone())
1064 }
1065 DataType::Timestamp(TimeUnit::Millisecond, tz_opt) => {
1066 ScalarValue::TimestampMillisecond(None, tz_opt.clone())
1067 }
1068 DataType::Timestamp(TimeUnit::Microsecond, tz_opt) => {
1069 ScalarValue::TimestampMicrosecond(None, tz_opt.clone())
1070 }
1071 DataType::Timestamp(TimeUnit::Nanosecond, tz_opt) => {
1072 ScalarValue::TimestampNanosecond(None, tz_opt.clone())
1073 }
1074 DataType::Interval(IntervalUnit::YearMonth) => {
1075 ScalarValue::IntervalYearMonth(None)
1076 }
1077 DataType::Interval(IntervalUnit::DayTime) => {
1078 ScalarValue::IntervalDayTime(None)
1079 }
1080 DataType::Interval(IntervalUnit::MonthDayNano) => {
1081 ScalarValue::IntervalMonthDayNano(None)
1082 }
1083 DataType::Duration(TimeUnit::Second) => ScalarValue::DurationSecond(None),
1084 DataType::Duration(TimeUnit::Millisecond) => {
1085 ScalarValue::DurationMillisecond(None)
1086 }
1087 DataType::Duration(TimeUnit::Microsecond) => {
1088 ScalarValue::DurationMicrosecond(None)
1089 }
1090 DataType::Duration(TimeUnit::Nanosecond) => {
1091 ScalarValue::DurationNanosecond(None)
1092 }
1093 DataType::Dictionary(index_type, value_type) => ScalarValue::Dictionary(
1094 index_type.clone(),
1095 Box::new(value_type.as_ref().try_into()?),
1096 ),
1097 DataType::List(field_ref) => ScalarValue::List(Arc::new(
1099 GenericListArray::new_null(Arc::clone(field_ref), 1),
1100 )),
1101 DataType::LargeList(field_ref) => ScalarValue::LargeList(Arc::new(
1103 GenericListArray::new_null(Arc::clone(field_ref), 1),
1104 )),
1105 DataType::FixedSizeList(field_ref, fixed_length) => {
1107 ScalarValue::FixedSizeList(Arc::new(FixedSizeListArray::new_null(
1108 Arc::clone(field_ref),
1109 *fixed_length,
1110 1,
1111 )))
1112 }
1113 DataType::Struct(fields) => ScalarValue::Struct(
1114 new_null_array(&DataType::Struct(fields.to_owned()), 1)
1115 .as_struct()
1116 .to_owned()
1117 .into(),
1118 ),
1119 DataType::Map(fields, sorted) => ScalarValue::Map(
1120 new_null_array(&DataType::Map(fields.to_owned(), sorted.to_owned()), 1)
1121 .as_map()
1122 .to_owned()
1123 .into(),
1124 ),
1125 DataType::Union(fields, mode) => {
1126 ScalarValue::Union(None, fields.clone(), *mode)
1127 }
1128 DataType::Null => ScalarValue::Null,
1129 _ => {
1130 return _not_impl_err!(
1131 "Can't create a null scalar from data_type \"{data_type:?}\""
1132 );
1133 }
1134 })
1135 }
1136
1137 pub fn new_utf8(val: impl Into<String>) -> Self {
1139 ScalarValue::from(val.into())
1140 }
1141
1142 pub fn new_utf8view(val: impl Into<String>) -> Self {
1144 ScalarValue::Utf8View(Some(val.into()))
1145 }
1146
1147 pub fn new_interval_ym(years: i32, months: i32) -> Self {
1150 let val = IntervalYearMonthType::make_value(years, months);
1151 ScalarValue::IntervalYearMonth(Some(val))
1152 }
1153
1154 pub fn new_interval_dt(days: i32, millis: i32) -> Self {
1157 let val = IntervalDayTimeType::make_value(days, millis);
1158 Self::IntervalDayTime(Some(val))
1159 }
1160
1161 pub fn new_interval_mdn(months: i32, days: i32, nanos: i64) -> Self {
1164 let val = IntervalMonthDayNanoType::make_value(months, days, nanos);
1165 ScalarValue::IntervalMonthDayNano(Some(val))
1166 }
1167
1168 pub fn new_timestamp<T: ArrowTimestampType>(
1171 value: Option<i64>,
1172 tz_opt: Option<Arc<str>>,
1173 ) -> Self {
1174 match T::UNIT {
1175 TimeUnit::Second => ScalarValue::TimestampSecond(value, tz_opt),
1176 TimeUnit::Millisecond => ScalarValue::TimestampMillisecond(value, tz_opt),
1177 TimeUnit::Microsecond => ScalarValue::TimestampMicrosecond(value, tz_opt),
1178 TimeUnit::Nanosecond => ScalarValue::TimestampNanosecond(value, tz_opt),
1179 }
1180 }
1181
1182 pub fn new_pi(datatype: &DataType) -> Result<ScalarValue> {
1184 match datatype {
1185 DataType::Float32 => Ok(ScalarValue::from(std::f32::consts::PI)),
1186 DataType::Float64 => Ok(ScalarValue::from(std::f64::consts::PI)),
1187 _ => _internal_err!("PI is not supported for data type: {:?}", datatype),
1188 }
1189 }
1190
1191 pub fn new_pi_upper(datatype: &DataType) -> Result<ScalarValue> {
1193 match datatype {
1196 DataType::Float32 => Ok(ScalarValue::from(consts::PI_UPPER_F32)),
1197 DataType::Float64 => Ok(ScalarValue::from(consts::PI_UPPER_F64)),
1198 _ => {
1199 _internal_err!("PI_UPPER is not supported for data type: {:?}", datatype)
1200 }
1201 }
1202 }
1203
1204 pub fn new_negative_pi_lower(datatype: &DataType) -> Result<ScalarValue> {
1206 match datatype {
1207 DataType::Float32 => Ok(ScalarValue::from(consts::NEGATIVE_PI_LOWER_F32)),
1208 DataType::Float64 => Ok(ScalarValue::from(consts::NEGATIVE_PI_LOWER_F64)),
1209 _ => {
1210 _internal_err!("-PI_LOWER is not supported for data type: {:?}", datatype)
1211 }
1212 }
1213 }
1214
1215 pub fn new_frac_pi_2_upper(datatype: &DataType) -> Result<ScalarValue> {
1217 match datatype {
1218 DataType::Float32 => Ok(ScalarValue::from(consts::FRAC_PI_2_UPPER_F32)),
1219 DataType::Float64 => Ok(ScalarValue::from(consts::FRAC_PI_2_UPPER_F64)),
1220 _ => {
1221 _internal_err!(
1222 "PI_UPPER/2 is not supported for data type: {:?}",
1223 datatype
1224 )
1225 }
1226 }
1227 }
1228
1229 pub fn new_neg_frac_pi_2_lower(datatype: &DataType) -> Result<ScalarValue> {
1231 match datatype {
1232 DataType::Float32 => {
1233 Ok(ScalarValue::from(consts::NEGATIVE_FRAC_PI_2_LOWER_F32))
1234 }
1235 DataType::Float64 => {
1236 Ok(ScalarValue::from(consts::NEGATIVE_FRAC_PI_2_LOWER_F64))
1237 }
1238 _ => {
1239 _internal_err!(
1240 "-PI/2_LOWER is not supported for data type: {:?}",
1241 datatype
1242 )
1243 }
1244 }
1245 }
1246
1247 pub fn new_negative_pi(datatype: &DataType) -> Result<ScalarValue> {
1249 match datatype {
1250 DataType::Float32 => Ok(ScalarValue::from(-std::f32::consts::PI)),
1251 DataType::Float64 => Ok(ScalarValue::from(-std::f64::consts::PI)),
1252 _ => _internal_err!("-PI is not supported for data type: {:?}", datatype),
1253 }
1254 }
1255
1256 pub fn new_frac_pi_2(datatype: &DataType) -> Result<ScalarValue> {
1258 match datatype {
1259 DataType::Float32 => Ok(ScalarValue::from(std::f32::consts::FRAC_PI_2)),
1260 DataType::Float64 => Ok(ScalarValue::from(std::f64::consts::FRAC_PI_2)),
1261 _ => _internal_err!("PI/2 is not supported for data type: {:?}", datatype),
1262 }
1263 }
1264
1265 pub fn new_neg_frac_pi_2(datatype: &DataType) -> Result<ScalarValue> {
1267 match datatype {
1268 DataType::Float32 => Ok(ScalarValue::from(-std::f32::consts::FRAC_PI_2)),
1269 DataType::Float64 => Ok(ScalarValue::from(-std::f64::consts::FRAC_PI_2)),
1270 _ => _internal_err!("-PI/2 is not supported for data type: {:?}", datatype),
1271 }
1272 }
1273
1274 pub fn new_infinity(datatype: &DataType) -> Result<ScalarValue> {
1276 match datatype {
1277 DataType::Float32 => Ok(ScalarValue::from(f32::INFINITY)),
1278 DataType::Float64 => Ok(ScalarValue::from(f64::INFINITY)),
1279 _ => {
1280 _internal_err!("Infinity is not supported for data type: {:?}", datatype)
1281 }
1282 }
1283 }
1284
1285 pub fn new_neg_infinity(datatype: &DataType) -> Result<ScalarValue> {
1287 match datatype {
1288 DataType::Float32 => Ok(ScalarValue::from(f32::NEG_INFINITY)),
1289 DataType::Float64 => Ok(ScalarValue::from(f64::NEG_INFINITY)),
1290 _ => {
1291 _internal_err!(
1292 "Negative Infinity is not supported for data type: {:?}",
1293 datatype
1294 )
1295 }
1296 }
1297 }
1298
1299 pub fn new_zero(datatype: &DataType) -> Result<ScalarValue> {
1301 Ok(match datatype {
1302 DataType::Boolean => ScalarValue::Boolean(Some(false)),
1303 DataType::Int8 => ScalarValue::Int8(Some(0)),
1304 DataType::Int16 => ScalarValue::Int16(Some(0)),
1305 DataType::Int32 => ScalarValue::Int32(Some(0)),
1306 DataType::Int64 => ScalarValue::Int64(Some(0)),
1307 DataType::UInt8 => ScalarValue::UInt8(Some(0)),
1308 DataType::UInt16 => ScalarValue::UInt16(Some(0)),
1309 DataType::UInt32 => ScalarValue::UInt32(Some(0)),
1310 DataType::UInt64 => ScalarValue::UInt64(Some(0)),
1311 DataType::Float16 => ScalarValue::Float16(Some(f16::from_f32(0.0))),
1312 DataType::Float32 => ScalarValue::Float32(Some(0.0)),
1313 DataType::Float64 => ScalarValue::Float64(Some(0.0)),
1314 DataType::Decimal128(precision, scale) => {
1315 ScalarValue::Decimal128(Some(0), *precision, *scale)
1316 }
1317 DataType::Decimal256(precision, scale) => {
1318 ScalarValue::Decimal256(Some(i256::ZERO), *precision, *scale)
1319 }
1320 DataType::Timestamp(TimeUnit::Second, tz) => {
1321 ScalarValue::TimestampSecond(Some(0), tz.clone())
1322 }
1323 DataType::Timestamp(TimeUnit::Millisecond, tz) => {
1324 ScalarValue::TimestampMillisecond(Some(0), tz.clone())
1325 }
1326 DataType::Timestamp(TimeUnit::Microsecond, tz) => {
1327 ScalarValue::TimestampMicrosecond(Some(0), tz.clone())
1328 }
1329 DataType::Timestamp(TimeUnit::Nanosecond, tz) => {
1330 ScalarValue::TimestampNanosecond(Some(0), tz.clone())
1331 }
1332 DataType::Time32(TimeUnit::Second) => ScalarValue::Time32Second(Some(0)),
1333 DataType::Time32(TimeUnit::Millisecond) => {
1334 ScalarValue::Time32Millisecond(Some(0))
1335 }
1336 DataType::Time64(TimeUnit::Microsecond) => {
1337 ScalarValue::Time64Microsecond(Some(0))
1338 }
1339 DataType::Time64(TimeUnit::Nanosecond) => {
1340 ScalarValue::Time64Nanosecond(Some(0))
1341 }
1342 DataType::Interval(IntervalUnit::YearMonth) => {
1343 ScalarValue::IntervalYearMonth(Some(0))
1344 }
1345 DataType::Interval(IntervalUnit::DayTime) => {
1346 ScalarValue::IntervalDayTime(Some(IntervalDayTime::ZERO))
1347 }
1348 DataType::Interval(IntervalUnit::MonthDayNano) => {
1349 ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano::ZERO))
1350 }
1351 DataType::Duration(TimeUnit::Second) => ScalarValue::DurationSecond(Some(0)),
1352 DataType::Duration(TimeUnit::Millisecond) => {
1353 ScalarValue::DurationMillisecond(Some(0))
1354 }
1355 DataType::Duration(TimeUnit::Microsecond) => {
1356 ScalarValue::DurationMicrosecond(Some(0))
1357 }
1358 DataType::Duration(TimeUnit::Nanosecond) => {
1359 ScalarValue::DurationNanosecond(Some(0))
1360 }
1361 DataType::Date32 => ScalarValue::Date32(Some(0)),
1362 DataType::Date64 => ScalarValue::Date64(Some(0)),
1363 _ => {
1364 return _not_impl_err!(
1365 "Can't create a zero scalar from data_type \"{datatype:?}\""
1366 );
1367 }
1368 })
1369 }
1370
1371 pub fn new_one(datatype: &DataType) -> Result<ScalarValue> {
1373 Ok(match datatype {
1374 DataType::Int8 => ScalarValue::Int8(Some(1)),
1375 DataType::Int16 => ScalarValue::Int16(Some(1)),
1376 DataType::Int32 => ScalarValue::Int32(Some(1)),
1377 DataType::Int64 => ScalarValue::Int64(Some(1)),
1378 DataType::UInt8 => ScalarValue::UInt8(Some(1)),
1379 DataType::UInt16 => ScalarValue::UInt16(Some(1)),
1380 DataType::UInt32 => ScalarValue::UInt32(Some(1)),
1381 DataType::UInt64 => ScalarValue::UInt64(Some(1)),
1382 DataType::Float16 => ScalarValue::Float16(Some(f16::from_f32(1.0))),
1383 DataType::Float32 => ScalarValue::Float32(Some(1.0)),
1384 DataType::Float64 => ScalarValue::Float64(Some(1.0)),
1385 _ => {
1386 return _not_impl_err!(
1387 "Can't create an one scalar from data_type \"{datatype:?}\""
1388 );
1389 }
1390 })
1391 }
1392
1393 pub fn new_negative_one(datatype: &DataType) -> Result<ScalarValue> {
1395 Ok(match datatype {
1396 DataType::Int8 | DataType::UInt8 => ScalarValue::Int8(Some(-1)),
1397 DataType::Int16 | DataType::UInt16 => ScalarValue::Int16(Some(-1)),
1398 DataType::Int32 | DataType::UInt32 => ScalarValue::Int32(Some(-1)),
1399 DataType::Int64 | DataType::UInt64 => ScalarValue::Int64(Some(-1)),
1400 DataType::Float16 => ScalarValue::Float16(Some(f16::from_f32(-1.0))),
1401 DataType::Float32 => ScalarValue::Float32(Some(-1.0)),
1402 DataType::Float64 => ScalarValue::Float64(Some(-1.0)),
1403 _ => {
1404 return _not_impl_err!(
1405 "Can't create a negative one scalar from data_type \"{datatype:?}\""
1406 );
1407 }
1408 })
1409 }
1410
1411 pub fn new_ten(datatype: &DataType) -> Result<ScalarValue> {
1412 Ok(match datatype {
1413 DataType::Int8 => ScalarValue::Int8(Some(10)),
1414 DataType::Int16 => ScalarValue::Int16(Some(10)),
1415 DataType::Int32 => ScalarValue::Int32(Some(10)),
1416 DataType::Int64 => ScalarValue::Int64(Some(10)),
1417 DataType::UInt8 => ScalarValue::UInt8(Some(10)),
1418 DataType::UInt16 => ScalarValue::UInt16(Some(10)),
1419 DataType::UInt32 => ScalarValue::UInt32(Some(10)),
1420 DataType::UInt64 => ScalarValue::UInt64(Some(10)),
1421 DataType::Float16 => ScalarValue::Float16(Some(f16::from_f32(10.0))),
1422 DataType::Float32 => ScalarValue::Float32(Some(10.0)),
1423 DataType::Float64 => ScalarValue::Float64(Some(10.0)),
1424 _ => {
1425 return _not_impl_err!(
1426 "Can't create a ten scalar from data_type \"{datatype:?}\""
1427 );
1428 }
1429 })
1430 }
1431
1432 pub fn data_type(&self) -> DataType {
1434 match self {
1435 ScalarValue::Boolean(_) => DataType::Boolean,
1436 ScalarValue::UInt8(_) => DataType::UInt8,
1437 ScalarValue::UInt16(_) => DataType::UInt16,
1438 ScalarValue::UInt32(_) => DataType::UInt32,
1439 ScalarValue::UInt64(_) => DataType::UInt64,
1440 ScalarValue::Int8(_) => DataType::Int8,
1441 ScalarValue::Int16(_) => DataType::Int16,
1442 ScalarValue::Int32(_) => DataType::Int32,
1443 ScalarValue::Int64(_) => DataType::Int64,
1444 ScalarValue::Decimal128(_, precision, scale) => {
1445 DataType::Decimal128(*precision, *scale)
1446 }
1447 ScalarValue::Decimal256(_, precision, scale) => {
1448 DataType::Decimal256(*precision, *scale)
1449 }
1450 ScalarValue::TimestampSecond(_, tz_opt) => {
1451 DataType::Timestamp(TimeUnit::Second, tz_opt.clone())
1452 }
1453 ScalarValue::TimestampMillisecond(_, tz_opt) => {
1454 DataType::Timestamp(TimeUnit::Millisecond, tz_opt.clone())
1455 }
1456 ScalarValue::TimestampMicrosecond(_, tz_opt) => {
1457 DataType::Timestamp(TimeUnit::Microsecond, tz_opt.clone())
1458 }
1459 ScalarValue::TimestampNanosecond(_, tz_opt) => {
1460 DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone())
1461 }
1462 ScalarValue::Float16(_) => DataType::Float16,
1463 ScalarValue::Float32(_) => DataType::Float32,
1464 ScalarValue::Float64(_) => DataType::Float64,
1465 ScalarValue::Utf8(_) => DataType::Utf8,
1466 ScalarValue::LargeUtf8(_) => DataType::LargeUtf8,
1467 ScalarValue::Utf8View(_) => DataType::Utf8View,
1468 ScalarValue::Binary(_) => DataType::Binary,
1469 ScalarValue::BinaryView(_) => DataType::BinaryView,
1470 ScalarValue::FixedSizeBinary(sz, _) => DataType::FixedSizeBinary(*sz),
1471 ScalarValue::LargeBinary(_) => DataType::LargeBinary,
1472 ScalarValue::List(arr) => arr.data_type().to_owned(),
1473 ScalarValue::LargeList(arr) => arr.data_type().to_owned(),
1474 ScalarValue::FixedSizeList(arr) => arr.data_type().to_owned(),
1475 ScalarValue::Struct(arr) => arr.data_type().to_owned(),
1476 ScalarValue::Map(arr) => arr.data_type().to_owned(),
1477 ScalarValue::Date32(_) => DataType::Date32,
1478 ScalarValue::Date64(_) => DataType::Date64,
1479 ScalarValue::Time32Second(_) => DataType::Time32(TimeUnit::Second),
1480 ScalarValue::Time32Millisecond(_) => DataType::Time32(TimeUnit::Millisecond),
1481 ScalarValue::Time64Microsecond(_) => DataType::Time64(TimeUnit::Microsecond),
1482 ScalarValue::Time64Nanosecond(_) => DataType::Time64(TimeUnit::Nanosecond),
1483 ScalarValue::IntervalYearMonth(_) => {
1484 DataType::Interval(IntervalUnit::YearMonth)
1485 }
1486 ScalarValue::IntervalDayTime(_) => DataType::Interval(IntervalUnit::DayTime),
1487 ScalarValue::IntervalMonthDayNano(_) => {
1488 DataType::Interval(IntervalUnit::MonthDayNano)
1489 }
1490 ScalarValue::DurationSecond(_) => DataType::Duration(TimeUnit::Second),
1491 ScalarValue::DurationMillisecond(_) => {
1492 DataType::Duration(TimeUnit::Millisecond)
1493 }
1494 ScalarValue::DurationMicrosecond(_) => {
1495 DataType::Duration(TimeUnit::Microsecond)
1496 }
1497 ScalarValue::DurationNanosecond(_) => {
1498 DataType::Duration(TimeUnit::Nanosecond)
1499 }
1500 ScalarValue::Union(_, fields, mode) => DataType::Union(fields.clone(), *mode),
1501 ScalarValue::Dictionary(k, v) => {
1502 DataType::Dictionary(k.clone(), Box::new(v.data_type()))
1503 }
1504 ScalarValue::Null => DataType::Null,
1505 }
1506 }
1507
1508 pub fn arithmetic_negate(&self) -> Result<Self> {
1510 fn neg_checked_with_ctx<T: ArrowNativeTypeOp>(
1511 v: T,
1512 ctx: impl Fn() -> String,
1513 ) -> Result<T> {
1514 v.neg_checked()
1515 .map_err(|e| arrow_datafusion_err!(e).context(ctx()))
1516 }
1517 match self {
1518 ScalarValue::Int8(None)
1519 | ScalarValue::Int16(None)
1520 | ScalarValue::Int32(None)
1521 | ScalarValue::Int64(None)
1522 | ScalarValue::Float16(None)
1523 | ScalarValue::Float32(None)
1524 | ScalarValue::Float64(None) => Ok(self.clone()),
1525 ScalarValue::Float16(Some(v)) => {
1526 Ok(ScalarValue::Float16(Some(f16::from_f32(-v.to_f32()))))
1527 }
1528 ScalarValue::Float64(Some(v)) => Ok(ScalarValue::Float64(Some(-v))),
1529 ScalarValue::Float32(Some(v)) => Ok(ScalarValue::Float32(Some(-v))),
1530 ScalarValue::Int8(Some(v)) => Ok(ScalarValue::Int8(Some(v.neg_checked()?))),
1531 ScalarValue::Int16(Some(v)) => Ok(ScalarValue::Int16(Some(v.neg_checked()?))),
1532 ScalarValue::Int32(Some(v)) => Ok(ScalarValue::Int32(Some(v.neg_checked()?))),
1533 ScalarValue::Int64(Some(v)) => Ok(ScalarValue::Int64(Some(v.neg_checked()?))),
1534 ScalarValue::IntervalYearMonth(Some(v)) => Ok(
1535 ScalarValue::IntervalYearMonth(Some(neg_checked_with_ctx(*v, || {
1536 format!("In negation of IntervalYearMonth({v})")
1537 })?)),
1538 ),
1539 ScalarValue::IntervalDayTime(Some(v)) => {
1540 let (days, ms) = IntervalDayTimeType::to_parts(*v);
1541 let val = IntervalDayTimeType::make_value(
1542 neg_checked_with_ctx(days, || {
1543 format!("In negation of days {days} in IntervalDayTime")
1544 })?,
1545 neg_checked_with_ctx(ms, || {
1546 format!("In negation of milliseconds {ms} in IntervalDayTime")
1547 })?,
1548 );
1549 Ok(ScalarValue::IntervalDayTime(Some(val)))
1550 }
1551 ScalarValue::IntervalMonthDayNano(Some(v)) => {
1552 let (months, days, nanos) = IntervalMonthDayNanoType::to_parts(*v);
1553 let val = IntervalMonthDayNanoType::make_value(
1554 neg_checked_with_ctx(months, || {
1555 format!("In negation of months {months} of IntervalMonthDayNano")
1556 })?,
1557 neg_checked_with_ctx(days, || {
1558 format!("In negation of days {days} of IntervalMonthDayNano")
1559 })?,
1560 neg_checked_with_ctx(nanos, || {
1561 format!("In negation of nanos {nanos} of IntervalMonthDayNano")
1562 })?,
1563 );
1564 Ok(ScalarValue::IntervalMonthDayNano(Some(val)))
1565 }
1566 ScalarValue::Decimal128(Some(v), precision, scale) => {
1567 Ok(ScalarValue::Decimal128(
1568 Some(neg_checked_with_ctx(*v, || {
1569 format!("In negation of Decimal128({v}, {precision}, {scale})")
1570 })?),
1571 *precision,
1572 *scale,
1573 ))
1574 }
1575 ScalarValue::Decimal256(Some(v), precision, scale) => {
1576 Ok(ScalarValue::Decimal256(
1577 Some(neg_checked_with_ctx(*v, || {
1578 format!("In negation of Decimal256({v}, {precision}, {scale})")
1579 })?),
1580 *precision,
1581 *scale,
1582 ))
1583 }
1584 ScalarValue::TimestampSecond(Some(v), tz) => {
1585 Ok(ScalarValue::TimestampSecond(
1586 Some(neg_checked_with_ctx(*v, || {
1587 format!("In negation of TimestampSecond({v})")
1588 })?),
1589 tz.clone(),
1590 ))
1591 }
1592 ScalarValue::TimestampNanosecond(Some(v), tz) => {
1593 Ok(ScalarValue::TimestampNanosecond(
1594 Some(neg_checked_with_ctx(*v, || {
1595 format!("In negation of TimestampNanoSecond({v})")
1596 })?),
1597 tz.clone(),
1598 ))
1599 }
1600 ScalarValue::TimestampMicrosecond(Some(v), tz) => {
1601 Ok(ScalarValue::TimestampMicrosecond(
1602 Some(neg_checked_with_ctx(*v, || {
1603 format!("In negation of TimestampMicroSecond({v})")
1604 })?),
1605 tz.clone(),
1606 ))
1607 }
1608 ScalarValue::TimestampMillisecond(Some(v), tz) => {
1609 Ok(ScalarValue::TimestampMillisecond(
1610 Some(neg_checked_with_ctx(*v, || {
1611 format!("In negation of TimestampMilliSecond({v})")
1612 })?),
1613 tz.clone(),
1614 ))
1615 }
1616 value => _internal_err!(
1617 "Can not run arithmetic negative on scalar value {value:?}"
1618 ),
1619 }
1620 }
1621
1622 pub fn add<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
1627 let r = add_wrapping(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
1628 Self::try_from_array(r.as_ref(), 0)
1629 }
1630 pub fn add_checked<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
1635 let r = add(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
1636 Self::try_from_array(r.as_ref(), 0)
1637 }
1638
1639 pub fn sub<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
1644 let r = sub_wrapping(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
1645 Self::try_from_array(r.as_ref(), 0)
1646 }
1647
1648 pub fn sub_checked<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
1653 let r = sub(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
1654 Self::try_from_array(r.as_ref(), 0)
1655 }
1656
1657 pub fn mul<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
1662 let r = mul_wrapping(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
1663 Self::try_from_array(r.as_ref(), 0)
1664 }
1665
1666 pub fn mul_checked<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
1671 let r = mul(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
1672 Self::try_from_array(r.as_ref(), 0)
1673 }
1674
1675 pub fn div<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
1683 let r = div(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
1684 Self::try_from_array(r.as_ref(), 0)
1685 }
1686
1687 pub fn rem<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
1695 let r = rem(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
1696 Self::try_from_array(r.as_ref(), 0)
1697 }
1698
1699 pub fn is_unsigned(&self) -> bool {
1700 matches!(
1701 self,
1702 ScalarValue::UInt8(_)
1703 | ScalarValue::UInt16(_)
1704 | ScalarValue::UInt32(_)
1705 | ScalarValue::UInt64(_)
1706 )
1707 }
1708
1709 pub fn is_null(&self) -> bool {
1711 match self {
1712 ScalarValue::Boolean(v) => v.is_none(),
1713 ScalarValue::Null => true,
1714 ScalarValue::Float16(v) => v.is_none(),
1715 ScalarValue::Float32(v) => v.is_none(),
1716 ScalarValue::Float64(v) => v.is_none(),
1717 ScalarValue::Decimal128(v, _, _) => v.is_none(),
1718 ScalarValue::Decimal256(v, _, _) => v.is_none(),
1719 ScalarValue::Int8(v) => v.is_none(),
1720 ScalarValue::Int16(v) => v.is_none(),
1721 ScalarValue::Int32(v) => v.is_none(),
1722 ScalarValue::Int64(v) => v.is_none(),
1723 ScalarValue::UInt8(v) => v.is_none(),
1724 ScalarValue::UInt16(v) => v.is_none(),
1725 ScalarValue::UInt32(v) => v.is_none(),
1726 ScalarValue::UInt64(v) => v.is_none(),
1727 ScalarValue::Utf8(v)
1728 | ScalarValue::Utf8View(v)
1729 | ScalarValue::LargeUtf8(v) => v.is_none(),
1730 ScalarValue::Binary(v)
1731 | ScalarValue::BinaryView(v)
1732 | ScalarValue::FixedSizeBinary(_, v)
1733 | ScalarValue::LargeBinary(v) => v.is_none(),
1734 ScalarValue::List(arr) => arr.len() == arr.null_count(),
1737 ScalarValue::LargeList(arr) => arr.len() == arr.null_count(),
1738 ScalarValue::FixedSizeList(arr) => arr.len() == arr.null_count(),
1739 ScalarValue::Struct(arr) => arr.len() == arr.null_count(),
1740 ScalarValue::Map(arr) => arr.len() == arr.null_count(),
1741 ScalarValue::Date32(v) => v.is_none(),
1742 ScalarValue::Date64(v) => v.is_none(),
1743 ScalarValue::Time32Second(v) => v.is_none(),
1744 ScalarValue::Time32Millisecond(v) => v.is_none(),
1745 ScalarValue::Time64Microsecond(v) => v.is_none(),
1746 ScalarValue::Time64Nanosecond(v) => v.is_none(),
1747 ScalarValue::TimestampSecond(v, _) => v.is_none(),
1748 ScalarValue::TimestampMillisecond(v, _) => v.is_none(),
1749 ScalarValue::TimestampMicrosecond(v, _) => v.is_none(),
1750 ScalarValue::TimestampNanosecond(v, _) => v.is_none(),
1751 ScalarValue::IntervalYearMonth(v) => v.is_none(),
1752 ScalarValue::IntervalDayTime(v) => v.is_none(),
1753 ScalarValue::IntervalMonthDayNano(v) => v.is_none(),
1754 ScalarValue::DurationSecond(v) => v.is_none(),
1755 ScalarValue::DurationMillisecond(v) => v.is_none(),
1756 ScalarValue::DurationMicrosecond(v) => v.is_none(),
1757 ScalarValue::DurationNanosecond(v) => v.is_none(),
1758 ScalarValue::Union(v, _, _) => match v {
1759 Some((_, s)) => s.is_null(),
1760 None => true,
1761 },
1762 ScalarValue::Dictionary(_, v) => v.is_null(),
1763 }
1764 }
1765
1766 pub fn distance(&self, other: &ScalarValue) -> Option<usize> {
1774 match (self, other) {
1775 (Self::Int8(Some(l)), Self::Int8(Some(r))) => Some(l.abs_diff(*r) as _),
1776 (Self::Int16(Some(l)), Self::Int16(Some(r))) => Some(l.abs_diff(*r) as _),
1777 (Self::Int32(Some(l)), Self::Int32(Some(r))) => Some(l.abs_diff(*r) as _),
1778 (Self::Int64(Some(l)), Self::Int64(Some(r))) => Some(l.abs_diff(*r) as _),
1779 (Self::UInt8(Some(l)), Self::UInt8(Some(r))) => Some(l.abs_diff(*r) as _),
1780 (Self::UInt16(Some(l)), Self::UInt16(Some(r))) => Some(l.abs_diff(*r) as _),
1781 (Self::UInt32(Some(l)), Self::UInt32(Some(r))) => Some(l.abs_diff(*r) as _),
1782 (Self::UInt64(Some(l)), Self::UInt64(Some(r))) => Some(l.abs_diff(*r) as _),
1783 (Self::Float16(Some(l)), Self::Float16(Some(r))) => {
1785 Some((f16::to_f32(*l) - f16::to_f32(*r)).abs().round() as _)
1786 }
1787 (Self::Float32(Some(l)), Self::Float32(Some(r))) => {
1788 Some((l - r).abs().round() as _)
1789 }
1790 (Self::Float64(Some(l)), Self::Float64(Some(r))) => {
1791 Some((l - r).abs().round() as _)
1792 }
1793 _ => None,
1794 }
1795 }
1796
1797 pub fn to_array(&self) -> Result<ArrayRef> {
1803 self.to_array_of_size(1)
1804 }
1805
1806 pub fn to_scalar(&self) -> Result<Scalar<ArrayRef>> {
1839 Ok(Scalar::new(self.to_array_of_size(1)?))
1840 }
1841
1842 pub fn iter_to_array(
1875 scalars: impl IntoIterator<Item = ScalarValue>,
1876 ) -> Result<ArrayRef> {
1877 let mut scalars = scalars.into_iter().peekable();
1878
1879 let data_type = match scalars.peek() {
1881 None => {
1882 return _exec_err!("Empty iterator passed to ScalarValue::iter_to_array");
1883 }
1884 Some(sv) => sv.data_type(),
1885 };
1886
1887 macro_rules! build_array_primitive {
1890 ($ARRAY_TY:ident, $SCALAR_TY:ident) => {{
1891 {
1892 let array = scalars.map(|sv| {
1893 if let ScalarValue::$SCALAR_TY(v) = sv {
1894 Ok(v)
1895 } else {
1896 _exec_err!(
1897 "Inconsistent types in ScalarValue::iter_to_array. \
1898 Expected {:?}, got {:?}",
1899 data_type, sv
1900 )
1901 }
1902 })
1903 .collect::<Result<$ARRAY_TY>>()?;
1904 Arc::new(array)
1905 }
1906 }};
1907 }
1908
1909 macro_rules! build_array_primitive_tz {
1910 ($ARRAY_TY:ident, $SCALAR_TY:ident, $TZ:expr) => {{
1911 {
1912 let array = scalars.map(|sv| {
1913 if let ScalarValue::$SCALAR_TY(v, _) = sv {
1914 Ok(v)
1915 } else {
1916 _exec_err!(
1917 "Inconsistent types in ScalarValue::iter_to_array. \
1918 Expected {:?}, got {:?}",
1919 data_type, sv
1920 )
1921 }
1922 })
1923 .collect::<Result<$ARRAY_TY>>()?;
1924 Arc::new(array.with_timezone_opt($TZ.clone()))
1925 }
1926 }};
1927 }
1928
1929 macro_rules! build_array_string {
1932 ($ARRAY_TY:ident, $SCALAR_TY:ident) => {{
1933 {
1934 let array = scalars.map(|sv| {
1935 if let ScalarValue::$SCALAR_TY(v) = sv {
1936 Ok(v)
1937 } else {
1938 _exec_err!(
1939 "Inconsistent types in ScalarValue::iter_to_array. \
1940 Expected {:?}, got {:?}",
1941 data_type, sv
1942 )
1943 }
1944 })
1945 .collect::<Result<$ARRAY_TY>>()?;
1946 Arc::new(array)
1947 }
1948 }};
1949 }
1950
1951 let array: ArrayRef = match &data_type {
1952 DataType::Decimal128(precision, scale) => {
1953 let decimal_array =
1954 ScalarValue::iter_to_decimal_array(scalars, *precision, *scale)?;
1955 Arc::new(decimal_array)
1956 }
1957 DataType::Decimal256(precision, scale) => {
1958 let decimal_array =
1959 ScalarValue::iter_to_decimal256_array(scalars, *precision, *scale)?;
1960 Arc::new(decimal_array)
1961 }
1962 DataType::Null => ScalarValue::iter_to_null_array(scalars)?,
1963 DataType::Boolean => build_array_primitive!(BooleanArray, Boolean),
1964 DataType::Float16 => build_array_primitive!(Float16Array, Float16),
1965 DataType::Float32 => build_array_primitive!(Float32Array, Float32),
1966 DataType::Float64 => build_array_primitive!(Float64Array, Float64),
1967 DataType::Int8 => build_array_primitive!(Int8Array, Int8),
1968 DataType::Int16 => build_array_primitive!(Int16Array, Int16),
1969 DataType::Int32 => build_array_primitive!(Int32Array, Int32),
1970 DataType::Int64 => build_array_primitive!(Int64Array, Int64),
1971 DataType::UInt8 => build_array_primitive!(UInt8Array, UInt8),
1972 DataType::UInt16 => build_array_primitive!(UInt16Array, UInt16),
1973 DataType::UInt32 => build_array_primitive!(UInt32Array, UInt32),
1974 DataType::UInt64 => build_array_primitive!(UInt64Array, UInt64),
1975 DataType::Utf8View => build_array_string!(StringViewArray, Utf8View),
1976 DataType::Utf8 => build_array_string!(StringArray, Utf8),
1977 DataType::LargeUtf8 => build_array_string!(LargeStringArray, LargeUtf8),
1978 DataType::BinaryView => build_array_string!(BinaryViewArray, BinaryView),
1979 DataType::Binary => build_array_string!(BinaryArray, Binary),
1980 DataType::LargeBinary => build_array_string!(LargeBinaryArray, LargeBinary),
1981 DataType::Date32 => build_array_primitive!(Date32Array, Date32),
1982 DataType::Date64 => build_array_primitive!(Date64Array, Date64),
1983 DataType::Time32(TimeUnit::Second) => {
1984 build_array_primitive!(Time32SecondArray, Time32Second)
1985 }
1986 DataType::Time32(TimeUnit::Millisecond) => {
1987 build_array_primitive!(Time32MillisecondArray, Time32Millisecond)
1988 }
1989 DataType::Time64(TimeUnit::Microsecond) => {
1990 build_array_primitive!(Time64MicrosecondArray, Time64Microsecond)
1991 }
1992 DataType::Time64(TimeUnit::Nanosecond) => {
1993 build_array_primitive!(Time64NanosecondArray, Time64Nanosecond)
1994 }
1995 DataType::Timestamp(TimeUnit::Second, tz) => {
1996 build_array_primitive_tz!(TimestampSecondArray, TimestampSecond, tz)
1997 }
1998 DataType::Timestamp(TimeUnit::Millisecond, tz) => {
1999 build_array_primitive_tz!(
2000 TimestampMillisecondArray,
2001 TimestampMillisecond,
2002 tz
2003 )
2004 }
2005 DataType::Timestamp(TimeUnit::Microsecond, tz) => {
2006 build_array_primitive_tz!(
2007 TimestampMicrosecondArray,
2008 TimestampMicrosecond,
2009 tz
2010 )
2011 }
2012 DataType::Timestamp(TimeUnit::Nanosecond, tz) => {
2013 build_array_primitive_tz!(
2014 TimestampNanosecondArray,
2015 TimestampNanosecond,
2016 tz
2017 )
2018 }
2019 DataType::Duration(TimeUnit::Second) => {
2020 build_array_primitive!(DurationSecondArray, DurationSecond)
2021 }
2022 DataType::Duration(TimeUnit::Millisecond) => {
2023 build_array_primitive!(DurationMillisecondArray, DurationMillisecond)
2024 }
2025 DataType::Duration(TimeUnit::Microsecond) => {
2026 build_array_primitive!(DurationMicrosecondArray, DurationMicrosecond)
2027 }
2028 DataType::Duration(TimeUnit::Nanosecond) => {
2029 build_array_primitive!(DurationNanosecondArray, DurationNanosecond)
2030 }
2031 DataType::Interval(IntervalUnit::DayTime) => {
2032 build_array_primitive!(IntervalDayTimeArray, IntervalDayTime)
2033 }
2034 DataType::Interval(IntervalUnit::YearMonth) => {
2035 build_array_primitive!(IntervalYearMonthArray, IntervalYearMonth)
2036 }
2037 DataType::Interval(IntervalUnit::MonthDayNano) => {
2038 build_array_primitive!(IntervalMonthDayNanoArray, IntervalMonthDayNano)
2039 }
2040 DataType::FixedSizeList(_, _) => {
2041 let mut arrays =
2045 scalars.map(|s| s.to_array()).collect::<Result<Vec<_>>>()?;
2046 let first_non_null_data_type = arrays
2047 .iter()
2048 .find(|sv| !sv.is_null(0))
2049 .map(|sv| sv.data_type().to_owned());
2050 if let Some(DataType::FixedSizeList(f, l)) = first_non_null_data_type {
2051 for array in arrays.iter_mut() {
2052 if array.is_null(0) {
2053 *array = Arc::new(FixedSizeListArray::new_null(
2054 Arc::clone(&f),
2055 l,
2056 1,
2057 ));
2058 }
2059 }
2060 }
2061 let arrays = arrays.iter().map(|a| a.as_ref()).collect::<Vec<_>>();
2062 arrow::compute::concat(arrays.as_slice())?
2063 }
2064 DataType::List(_)
2065 | DataType::LargeList(_)
2066 | DataType::Map(_, _)
2067 | DataType::Struct(_)
2068 | DataType::Union(_, _) => {
2069 let arrays = scalars.map(|s| s.to_array()).collect::<Result<Vec<_>>>()?;
2070 let arrays = arrays.iter().map(|a| a.as_ref()).collect::<Vec<_>>();
2071 arrow::compute::concat(arrays.as_slice())?
2072 }
2073 DataType::Dictionary(key_type, value_type) => {
2074 let value_scalars = scalars
2076 .map(|scalar| match scalar {
2077 ScalarValue::Dictionary(inner_key_type, scalar) => {
2078 if &inner_key_type == key_type {
2079 Ok(*scalar)
2080 } else {
2081 _exec_err!("Expected inner key type of {key_type} but found: {inner_key_type}, value was ({scalar:?})")
2082 }
2083 }
2084 _ => {
2085 _exec_err!(
2086 "Expected scalar of type {value_type} but found: {scalar} {scalar:?}"
2087 )
2088 }
2089 })
2090 .collect::<Result<Vec<_>>>()?;
2091
2092 let values = Self::iter_to_array(value_scalars)?;
2093 assert_eq!(values.data_type(), value_type.as_ref());
2094
2095 match key_type.as_ref() {
2096 DataType::Int8 => dict_from_values::<Int8Type>(values)?,
2097 DataType::Int16 => dict_from_values::<Int16Type>(values)?,
2098 DataType::Int32 => dict_from_values::<Int32Type>(values)?,
2099 DataType::Int64 => dict_from_values::<Int64Type>(values)?,
2100 DataType::UInt8 => dict_from_values::<UInt8Type>(values)?,
2101 DataType::UInt16 => dict_from_values::<UInt16Type>(values)?,
2102 DataType::UInt32 => dict_from_values::<UInt32Type>(values)?,
2103 DataType::UInt64 => dict_from_values::<UInt64Type>(values)?,
2104 _ => unreachable!("Invalid dictionary keys type: {:?}", key_type),
2105 }
2106 }
2107 DataType::FixedSizeBinary(size) => {
2108 let array = scalars
2109 .map(|sv| {
2110 if let ScalarValue::FixedSizeBinary(_, v) = sv {
2111 Ok(v)
2112 } else {
2113 _exec_err!(
2114 "Inconsistent types in ScalarValue::iter_to_array. \
2115 Expected {data_type:?}, got {sv:?}"
2116 )
2117 }
2118 })
2119 .collect::<Result<Vec<_>>>()?;
2120 let array = FixedSizeBinaryArray::try_from_sparse_iter_with_size(
2121 array.into_iter(),
2122 *size,
2123 )?;
2124 Arc::new(array)
2125 }
2126 DataType::Time32(TimeUnit::Microsecond)
2132 | DataType::Time32(TimeUnit::Nanosecond)
2133 | DataType::Time64(TimeUnit::Second)
2134 | DataType::Time64(TimeUnit::Millisecond)
2135 | DataType::RunEndEncoded(_, _)
2136 | DataType::ListView(_)
2137 | DataType::LargeListView(_) => {
2138 return _not_impl_err!(
2139 "Unsupported creation of {:?} array from ScalarValue {:?}",
2140 data_type,
2141 scalars.peek()
2142 );
2143 }
2144 };
2145 Ok(array)
2146 }
2147
2148 fn iter_to_null_array(
2149 scalars: impl IntoIterator<Item = ScalarValue>,
2150 ) -> Result<ArrayRef> {
2151 let length = scalars.into_iter().try_fold(
2152 0usize,
2153 |r, element: ScalarValue| match element {
2154 ScalarValue::Null => Ok::<usize, DataFusionError>(r + 1),
2155 s => {
2156 _internal_err!("Expected ScalarValue::Null element. Received {s:?}")
2157 }
2158 },
2159 )?;
2160 Ok(new_null_array(&DataType::Null, length))
2161 }
2162
2163 fn iter_to_decimal_array(
2164 scalars: impl IntoIterator<Item = ScalarValue>,
2165 precision: u8,
2166 scale: i8,
2167 ) -> Result<Decimal128Array> {
2168 let array = scalars
2169 .into_iter()
2170 .map(|element: ScalarValue| match element {
2171 ScalarValue::Decimal128(v1, _, _) => Ok(v1),
2172 s => {
2173 _internal_err!("Expected ScalarValue::Null element. Received {s:?}")
2174 }
2175 })
2176 .collect::<Result<Decimal128Array>>()?
2177 .with_precision_and_scale(precision, scale)?;
2178 Ok(array)
2179 }
2180
2181 fn iter_to_decimal256_array(
2182 scalars: impl IntoIterator<Item = ScalarValue>,
2183 precision: u8,
2184 scale: i8,
2185 ) -> Result<Decimal256Array> {
2186 let array = scalars
2187 .into_iter()
2188 .map(|element: ScalarValue| match element {
2189 ScalarValue::Decimal256(v1, _, _) => Ok(v1),
2190 s => {
2191 _internal_err!(
2192 "Expected ScalarValue::Decimal256 element. Received {s:?}"
2193 )
2194 }
2195 })
2196 .collect::<Result<Decimal256Array>>()?
2197 .with_precision_and_scale(precision, scale)?;
2198 Ok(array)
2199 }
2200
2201 fn build_decimal_array(
2202 value: Option<i128>,
2203 precision: u8,
2204 scale: i8,
2205 size: usize,
2206 ) -> Result<Decimal128Array> {
2207 Ok(match value {
2208 Some(val) => Decimal128Array::from(vec![val; size])
2209 .with_precision_and_scale(precision, scale)?,
2210 None => {
2211 let mut builder = Decimal128Array::builder(size)
2212 .with_precision_and_scale(precision, scale)?;
2213 builder.append_nulls(size);
2214 builder.finish()
2215 }
2216 })
2217 }
2218
2219 fn build_decimal256_array(
2220 value: Option<i256>,
2221 precision: u8,
2222 scale: i8,
2223 size: usize,
2224 ) -> Result<Decimal256Array> {
2225 Ok(repeat_n(value, size)
2226 .collect::<Decimal256Array>()
2227 .with_precision_and_scale(precision, scale)?)
2228 }
2229
2230 pub fn new_list(
2256 values: &[ScalarValue],
2257 data_type: &DataType,
2258 nullable: bool,
2259 ) -> Arc<ListArray> {
2260 let values = if values.is_empty() {
2261 new_empty_array(data_type)
2262 } else {
2263 Self::iter_to_array(values.iter().cloned()).unwrap()
2264 };
2265 Arc::new(
2266 SingleRowListArrayBuilder::new(values)
2267 .with_nullable(nullable)
2268 .build_list_array(),
2269 )
2270 }
2271
2272 pub fn new_list_nullable(
2274 values: &[ScalarValue],
2275 data_type: &DataType,
2276 ) -> Arc<ListArray> {
2277 Self::new_list(values, data_type, true)
2278 }
2279
2280 pub fn new_null_list(data_type: DataType, nullable: bool, null_len: usize) -> Self {
2284 let data_type = DataType::List(Field::new_list_field(data_type, nullable).into());
2285 Self::List(Arc::new(ListArray::from(ArrayData::new_null(
2286 &data_type, null_len,
2287 ))))
2288 }
2289
2290 pub fn new_list_from_iter(
2316 values: impl IntoIterator<Item = ScalarValue> + ExactSizeIterator,
2317 data_type: &DataType,
2318 nullable: bool,
2319 ) -> Arc<ListArray> {
2320 let values = if values.len() == 0 {
2321 new_empty_array(data_type)
2322 } else {
2323 Self::iter_to_array(values).unwrap()
2324 };
2325 Arc::new(
2326 SingleRowListArrayBuilder::new(values)
2327 .with_nullable(nullable)
2328 .build_list_array(),
2329 )
2330 }
2331
2332 pub fn new_large_list(
2358 values: &[ScalarValue],
2359 data_type: &DataType,
2360 ) -> Arc<LargeListArray> {
2361 let values = if values.is_empty() {
2362 new_empty_array(data_type)
2363 } else {
2364 Self::iter_to_array(values.iter().cloned()).unwrap()
2365 };
2366 Arc::new(SingleRowListArrayBuilder::new(values).build_large_list_array())
2367 }
2368
2369 pub fn to_array_of_size(&self, size: usize) -> Result<ArrayRef> {
2379 Ok(match self {
2380 ScalarValue::Decimal128(e, precision, scale) => Arc::new(
2381 ScalarValue::build_decimal_array(*e, *precision, *scale, size)?,
2382 ),
2383 ScalarValue::Decimal256(e, precision, scale) => Arc::new(
2384 ScalarValue::build_decimal256_array(*e, *precision, *scale, size)?,
2385 ),
2386 ScalarValue::Boolean(e) => {
2387 Arc::new(BooleanArray::from(vec![*e; size])) as ArrayRef
2388 }
2389 ScalarValue::Float64(e) => {
2390 build_array_from_option!(Float64, Float64Array, e, size)
2391 }
2392 ScalarValue::Float32(e) => {
2393 build_array_from_option!(Float32, Float32Array, e, size)
2394 }
2395 ScalarValue::Float16(e) => {
2396 build_array_from_option!(Float16, Float16Array, e, size)
2397 }
2398 ScalarValue::Int8(e) => build_array_from_option!(Int8, Int8Array, e, size),
2399 ScalarValue::Int16(e) => build_array_from_option!(Int16, Int16Array, e, size),
2400 ScalarValue::Int32(e) => build_array_from_option!(Int32, Int32Array, e, size),
2401 ScalarValue::Int64(e) => build_array_from_option!(Int64, Int64Array, e, size),
2402 ScalarValue::UInt8(e) => build_array_from_option!(UInt8, UInt8Array, e, size),
2403 ScalarValue::UInt16(e) => {
2404 build_array_from_option!(UInt16, UInt16Array, e, size)
2405 }
2406 ScalarValue::UInt32(e) => {
2407 build_array_from_option!(UInt32, UInt32Array, e, size)
2408 }
2409 ScalarValue::UInt64(e) => {
2410 build_array_from_option!(UInt64, UInt64Array, e, size)
2411 }
2412 ScalarValue::TimestampSecond(e, tz_opt) => {
2413 build_timestamp_array_from_option!(
2414 TimeUnit::Second,
2415 tz_opt.clone(),
2416 TimestampSecondArray,
2417 e,
2418 size
2419 )
2420 }
2421 ScalarValue::TimestampMillisecond(e, tz_opt) => {
2422 build_timestamp_array_from_option!(
2423 TimeUnit::Millisecond,
2424 tz_opt.clone(),
2425 TimestampMillisecondArray,
2426 e,
2427 size
2428 )
2429 }
2430
2431 ScalarValue::TimestampMicrosecond(e, tz_opt) => {
2432 build_timestamp_array_from_option!(
2433 TimeUnit::Microsecond,
2434 tz_opt.clone(),
2435 TimestampMicrosecondArray,
2436 e,
2437 size
2438 )
2439 }
2440 ScalarValue::TimestampNanosecond(e, tz_opt) => {
2441 build_timestamp_array_from_option!(
2442 TimeUnit::Nanosecond,
2443 tz_opt.clone(),
2444 TimestampNanosecondArray,
2445 e,
2446 size
2447 )
2448 }
2449 ScalarValue::Utf8(e) => match e {
2450 Some(value) => {
2451 Arc::new(StringArray::from_iter_values(repeat_n(value, size)))
2452 }
2453 None => new_null_array(&DataType::Utf8, size),
2454 },
2455 ScalarValue::Utf8View(e) => match e {
2456 Some(value) => {
2457 Arc::new(StringViewArray::from_iter_values(repeat_n(value, size)))
2458 }
2459 None => new_null_array(&DataType::Utf8View, size),
2460 },
2461 ScalarValue::LargeUtf8(e) => match e {
2462 Some(value) => {
2463 Arc::new(LargeStringArray::from_iter_values(repeat_n(value, size)))
2464 }
2465 None => new_null_array(&DataType::LargeUtf8, size),
2466 },
2467 ScalarValue::Binary(e) => match e {
2468 Some(value) => Arc::new(
2469 repeat_n(Some(value.as_slice()), size).collect::<BinaryArray>(),
2470 ),
2471 None => Arc::new(repeat_n(None::<&str>, size).collect::<BinaryArray>()),
2472 },
2473 ScalarValue::BinaryView(e) => match e {
2474 Some(value) => Arc::new(
2475 repeat_n(Some(value.as_slice()), size).collect::<BinaryViewArray>(),
2476 ),
2477 None => {
2478 Arc::new(repeat_n(None::<&str>, size).collect::<BinaryViewArray>())
2479 }
2480 },
2481 ScalarValue::FixedSizeBinary(s, e) => match e {
2482 Some(value) => Arc::new(
2483 FixedSizeBinaryArray::try_from_sparse_iter_with_size(
2484 repeat_n(Some(value.as_slice()), size),
2485 *s,
2486 )
2487 .unwrap(),
2488 ),
2489 None => Arc::new(
2490 FixedSizeBinaryArray::try_from_sparse_iter_with_size(
2491 repeat_n(None::<&[u8]>, size),
2492 *s,
2493 )
2494 .unwrap(),
2495 ),
2496 },
2497 ScalarValue::LargeBinary(e) => match e {
2498 Some(value) => Arc::new(
2499 repeat_n(Some(value.as_slice()), size).collect::<LargeBinaryArray>(),
2500 ),
2501 None => {
2502 Arc::new(repeat_n(None::<&str>, size).collect::<LargeBinaryArray>())
2503 }
2504 },
2505 ScalarValue::List(arr) => {
2506 if size == 1 {
2507 return Ok(Arc::clone(arr) as Arc<dyn Array>);
2508 }
2509 Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
2510 }
2511 ScalarValue::LargeList(arr) => {
2512 if size == 1 {
2513 return Ok(Arc::clone(arr) as Arc<dyn Array>);
2514 }
2515 Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
2516 }
2517 ScalarValue::FixedSizeList(arr) => {
2518 if size == 1 {
2519 return Ok(Arc::clone(arr) as Arc<dyn Array>);
2520 }
2521 Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
2522 }
2523 ScalarValue::Struct(arr) => {
2524 if size == 1 {
2525 return Ok(Arc::clone(arr) as Arc<dyn Array>);
2526 }
2527 Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
2528 }
2529 ScalarValue::Map(arr) => {
2530 if size == 1 {
2531 return Ok(Arc::clone(arr) as Arc<dyn Array>);
2532 }
2533 Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
2534 }
2535 ScalarValue::Date32(e) => {
2536 build_array_from_option!(Date32, Date32Array, e, size)
2537 }
2538 ScalarValue::Date64(e) => {
2539 build_array_from_option!(Date64, Date64Array, e, size)
2540 }
2541 ScalarValue::Time32Second(e) => {
2542 build_array_from_option!(
2543 Time32,
2544 TimeUnit::Second,
2545 Time32SecondArray,
2546 e,
2547 size
2548 )
2549 }
2550 ScalarValue::Time32Millisecond(e) => {
2551 build_array_from_option!(
2552 Time32,
2553 TimeUnit::Millisecond,
2554 Time32MillisecondArray,
2555 e,
2556 size
2557 )
2558 }
2559 ScalarValue::Time64Microsecond(e) => {
2560 build_array_from_option!(
2561 Time64,
2562 TimeUnit::Microsecond,
2563 Time64MicrosecondArray,
2564 e,
2565 size
2566 )
2567 }
2568 ScalarValue::Time64Nanosecond(e) => {
2569 build_array_from_option!(
2570 Time64,
2571 TimeUnit::Nanosecond,
2572 Time64NanosecondArray,
2573 e,
2574 size
2575 )
2576 }
2577 ScalarValue::IntervalDayTime(e) => build_array_from_option!(
2578 Interval,
2579 IntervalUnit::DayTime,
2580 IntervalDayTimeArray,
2581 e,
2582 size
2583 ),
2584 ScalarValue::IntervalYearMonth(e) => build_array_from_option!(
2585 Interval,
2586 IntervalUnit::YearMonth,
2587 IntervalYearMonthArray,
2588 e,
2589 size
2590 ),
2591 ScalarValue::IntervalMonthDayNano(e) => build_array_from_option!(
2592 Interval,
2593 IntervalUnit::MonthDayNano,
2594 IntervalMonthDayNanoArray,
2595 e,
2596 size
2597 ),
2598 ScalarValue::DurationSecond(e) => build_array_from_option!(
2599 Duration,
2600 TimeUnit::Second,
2601 DurationSecondArray,
2602 e,
2603 size
2604 ),
2605 ScalarValue::DurationMillisecond(e) => build_array_from_option!(
2606 Duration,
2607 TimeUnit::Millisecond,
2608 DurationMillisecondArray,
2609 e,
2610 size
2611 ),
2612 ScalarValue::DurationMicrosecond(e) => build_array_from_option!(
2613 Duration,
2614 TimeUnit::Microsecond,
2615 DurationMicrosecondArray,
2616 e,
2617 size
2618 ),
2619 ScalarValue::DurationNanosecond(e) => build_array_from_option!(
2620 Duration,
2621 TimeUnit::Nanosecond,
2622 DurationNanosecondArray,
2623 e,
2624 size
2625 ),
2626 ScalarValue::Union(value, fields, mode) => match value {
2627 Some((v_id, value)) => {
2628 let mut new_fields = Vec::with_capacity(fields.len());
2629 let mut child_arrays = Vec::<ArrayRef>::with_capacity(fields.len());
2630 for (f_id, field) in fields.iter() {
2631 let ar = if f_id == *v_id {
2632 value.to_array_of_size(size)?
2633 } else {
2634 let dt = field.data_type();
2635 match mode {
2636 UnionMode::Sparse => new_null_array(dt, size),
2637 UnionMode::Dense => new_null_array(dt, 0),
2640 }
2641 };
2642 let field = (**field).clone();
2643 child_arrays.push(ar);
2644 new_fields.push(field.clone());
2645 }
2646 let type_ids = repeat_n(*v_id, size);
2647 let type_ids = ScalarBuffer::<i8>::from_iter(type_ids);
2648 let value_offsets = match mode {
2649 UnionMode::Sparse => None,
2650 UnionMode::Dense => Some(ScalarBuffer::from_iter(0..size as i32)),
2651 };
2652 let ar = UnionArray::try_new(
2653 fields.clone(),
2654 type_ids,
2655 value_offsets,
2656 child_arrays,
2657 )
2658 .map_err(|e| DataFusionError::ArrowError(Box::new(e), None))?;
2659 Arc::new(ar)
2660 }
2661 None => {
2662 let dt = self.data_type();
2663 new_null_array(&dt, size)
2664 }
2665 },
2666 ScalarValue::Dictionary(key_type, v) => {
2667 match key_type.as_ref() {
2669 DataType::Int8 => dict_from_scalar::<Int8Type>(v, size)?,
2670 DataType::Int16 => dict_from_scalar::<Int16Type>(v, size)?,
2671 DataType::Int32 => dict_from_scalar::<Int32Type>(v, size)?,
2672 DataType::Int64 => dict_from_scalar::<Int64Type>(v, size)?,
2673 DataType::UInt8 => dict_from_scalar::<UInt8Type>(v, size)?,
2674 DataType::UInt16 => dict_from_scalar::<UInt16Type>(v, size)?,
2675 DataType::UInt32 => dict_from_scalar::<UInt32Type>(v, size)?,
2676 DataType::UInt64 => dict_from_scalar::<UInt64Type>(v, size)?,
2677 _ => unreachable!("Invalid dictionary keys type: {:?}", key_type),
2678 }
2679 }
2680 ScalarValue::Null => new_null_array(&DataType::Null, size),
2681 })
2682 }
2683
2684 fn get_decimal_value_from_array(
2685 array: &dyn Array,
2686 index: usize,
2687 precision: u8,
2688 scale: i8,
2689 ) -> Result<ScalarValue> {
2690 match array.data_type() {
2691 DataType::Decimal128(_, _) => {
2692 let array = as_decimal128_array(array)?;
2693 if array.is_null(index) {
2694 Ok(ScalarValue::Decimal128(None, precision, scale))
2695 } else {
2696 let value = array.value(index);
2697 Ok(ScalarValue::Decimal128(Some(value), precision, scale))
2698 }
2699 }
2700 DataType::Decimal256(_, _) => {
2701 let array = as_decimal256_array(array)?;
2702 if array.is_null(index) {
2703 Ok(ScalarValue::Decimal256(None, precision, scale))
2704 } else {
2705 let value = array.value(index);
2706 Ok(ScalarValue::Decimal256(Some(value), precision, scale))
2707 }
2708 }
2709 _ => _internal_err!("Unsupported decimal type"),
2710 }
2711 }
2712
2713 fn list_to_array_of_size(arr: &dyn Array, size: usize) -> Result<ArrayRef> {
2714 let arrays = repeat_n(arr, size).collect::<Vec<_>>();
2715 let ret = match !arrays.is_empty() {
2716 true => arrow::compute::concat(arrays.as_slice())?,
2717 false => arr.slice(0, 0),
2718 };
2719 Ok(ret)
2720 }
2721
2722 pub fn convert_array_to_scalar_vec(array: &dyn Array) -> Result<Vec<Vec<Self>>> {
2790 let mut scalars = Vec::with_capacity(array.len());
2791
2792 for index in 0..array.len() {
2793 let nested_array = array.as_list::<i32>().value(index);
2794 let scalar_values = (0..nested_array.len())
2795 .map(|i| ScalarValue::try_from_array(&nested_array, i))
2796 .collect::<Result<Vec<_>>>()?;
2797 scalars.push(scalar_values);
2798 }
2799
2800 Ok(scalars)
2801 }
2802
2803 #[deprecated(
2804 since = "46.0.0",
2805 note = "This function is obsolete. Use `to_array` instead"
2806 )]
2807 pub fn raw_data(&self) -> Result<ArrayRef> {
2808 match self {
2809 ScalarValue::List(arr) => Ok(arr.to_owned()),
2810 _ => _internal_err!("ScalarValue is not a list"),
2811 }
2812 }
2813
2814 pub fn try_from_array(array: &dyn Array, index: usize) -> Result<Self> {
2816 if !array.is_valid(index) {
2818 return array.data_type().try_into();
2819 }
2820
2821 Ok(match array.data_type() {
2822 DataType::Null => ScalarValue::Null,
2823 DataType::Decimal128(precision, scale) => {
2824 ScalarValue::get_decimal_value_from_array(
2825 array, index, *precision, *scale,
2826 )?
2827 }
2828 DataType::Decimal256(precision, scale) => {
2829 ScalarValue::get_decimal_value_from_array(
2830 array, index, *precision, *scale,
2831 )?
2832 }
2833 DataType::Boolean => typed_cast!(array, index, as_boolean_array, Boolean)?,
2834 DataType::Float64 => typed_cast!(array, index, as_float64_array, Float64)?,
2835 DataType::Float32 => typed_cast!(array, index, as_float32_array, Float32)?,
2836 DataType::Float16 => typed_cast!(array, index, as_float16_array, Float16)?,
2837 DataType::UInt64 => typed_cast!(array, index, as_uint64_array, UInt64)?,
2838 DataType::UInt32 => typed_cast!(array, index, as_uint32_array, UInt32)?,
2839 DataType::UInt16 => typed_cast!(array, index, as_uint16_array, UInt16)?,
2840 DataType::UInt8 => typed_cast!(array, index, as_uint8_array, UInt8)?,
2841 DataType::Int64 => typed_cast!(array, index, as_int64_array, Int64)?,
2842 DataType::Int32 => typed_cast!(array, index, as_int32_array, Int32)?,
2843 DataType::Int16 => typed_cast!(array, index, as_int16_array, Int16)?,
2844 DataType::Int8 => typed_cast!(array, index, as_int8_array, Int8)?,
2845 DataType::Binary => typed_cast!(array, index, as_binary_array, Binary)?,
2846 DataType::LargeBinary => {
2847 typed_cast!(array, index, as_large_binary_array, LargeBinary)?
2848 }
2849 DataType::BinaryView => {
2850 typed_cast!(array, index, as_binary_view_array, BinaryView)?
2851 }
2852 DataType::Utf8 => typed_cast!(array, index, as_string_array, Utf8)?,
2853 DataType::LargeUtf8 => {
2854 typed_cast!(array, index, as_large_string_array, LargeUtf8)?
2855 }
2856 DataType::Utf8View => {
2857 typed_cast!(array, index, as_string_view_array, Utf8View)?
2858 }
2859 DataType::List(field) => {
2860 let list_array = array.as_list::<i32>();
2861 let nested_array = list_array.value(index);
2862 SingleRowListArrayBuilder::new(nested_array)
2864 .with_field(field)
2865 .build_list_scalar()
2866 }
2867 DataType::LargeList(field) => {
2868 let list_array = as_large_list_array(array)?;
2869 let nested_array = list_array.value(index);
2870 SingleRowListArrayBuilder::new(nested_array)
2872 .with_field(field)
2873 .build_large_list_scalar()
2874 }
2875 DataType::FixedSizeList(field, _) => {
2877 let list_array = as_fixed_size_list_array(array)?;
2878 let nested_array = list_array.value(index);
2879 let list_size = nested_array.len();
2881 SingleRowListArrayBuilder::new(nested_array)
2882 .with_field(field)
2883 .build_fixed_size_list_scalar(list_size)
2884 }
2885 DataType::Date32 => typed_cast!(array, index, as_date32_array, Date32)?,
2886 DataType::Date64 => typed_cast!(array, index, as_date64_array, Date64)?,
2887 DataType::Time32(TimeUnit::Second) => {
2888 typed_cast!(array, index, as_time32_second_array, Time32Second)?
2889 }
2890 DataType::Time32(TimeUnit::Millisecond) => {
2891 typed_cast!(array, index, as_time32_millisecond_array, Time32Millisecond)?
2892 }
2893 DataType::Time64(TimeUnit::Microsecond) => {
2894 typed_cast!(array, index, as_time64_microsecond_array, Time64Microsecond)?
2895 }
2896 DataType::Time64(TimeUnit::Nanosecond) => {
2897 typed_cast!(array, index, as_time64_nanosecond_array, Time64Nanosecond)?
2898 }
2899 DataType::Timestamp(TimeUnit::Second, tz_opt) => typed_cast_tz!(
2900 array,
2901 index,
2902 as_timestamp_second_array,
2903 TimestampSecond,
2904 tz_opt
2905 )?,
2906 DataType::Timestamp(TimeUnit::Millisecond, tz_opt) => typed_cast_tz!(
2907 array,
2908 index,
2909 as_timestamp_millisecond_array,
2910 TimestampMillisecond,
2911 tz_opt
2912 )?,
2913 DataType::Timestamp(TimeUnit::Microsecond, tz_opt) => typed_cast_tz!(
2914 array,
2915 index,
2916 as_timestamp_microsecond_array,
2917 TimestampMicrosecond,
2918 tz_opt
2919 )?,
2920 DataType::Timestamp(TimeUnit::Nanosecond, tz_opt) => typed_cast_tz!(
2921 array,
2922 index,
2923 as_timestamp_nanosecond_array,
2924 TimestampNanosecond,
2925 tz_opt
2926 )?,
2927 DataType::Dictionary(key_type, _) => {
2928 let (values_array, values_index) = match key_type.as_ref() {
2929 DataType::Int8 => get_dict_value::<Int8Type>(array, index)?,
2930 DataType::Int16 => get_dict_value::<Int16Type>(array, index)?,
2931 DataType::Int32 => get_dict_value::<Int32Type>(array, index)?,
2932 DataType::Int64 => get_dict_value::<Int64Type>(array, index)?,
2933 DataType::UInt8 => get_dict_value::<UInt8Type>(array, index)?,
2934 DataType::UInt16 => get_dict_value::<UInt16Type>(array, index)?,
2935 DataType::UInt32 => get_dict_value::<UInt32Type>(array, index)?,
2936 DataType::UInt64 => get_dict_value::<UInt64Type>(array, index)?,
2937 _ => unreachable!("Invalid dictionary keys type: {:?}", key_type),
2938 };
2939 let value = match values_index {
2941 Some(values_index) => {
2942 ScalarValue::try_from_array(values_array, values_index)
2943 }
2944 None => values_array.data_type().try_into(),
2946 }?;
2947
2948 Self::Dictionary(key_type.clone(), Box::new(value))
2949 }
2950 DataType::Struct(_) => {
2951 let a = array.slice(index, 1);
2952 Self::Struct(Arc::new(a.as_struct().to_owned()))
2953 }
2954 DataType::FixedSizeBinary(_) => {
2955 let array = as_fixed_size_binary_array(array)?;
2956 let size = match array.data_type() {
2957 DataType::FixedSizeBinary(size) => *size,
2958 _ => unreachable!(),
2959 };
2960 ScalarValue::FixedSizeBinary(
2961 size,
2962 match array.is_null(index) {
2963 true => None,
2964 false => Some(array.value(index).into()),
2965 },
2966 )
2967 }
2968 DataType::Interval(IntervalUnit::DayTime) => {
2969 typed_cast!(array, index, as_interval_dt_array, IntervalDayTime)?
2970 }
2971 DataType::Interval(IntervalUnit::YearMonth) => {
2972 typed_cast!(array, index, as_interval_ym_array, IntervalYearMonth)?
2973 }
2974 DataType::Interval(IntervalUnit::MonthDayNano) => {
2975 typed_cast!(array, index, as_interval_mdn_array, IntervalMonthDayNano)?
2976 }
2977
2978 DataType::Duration(TimeUnit::Second) => {
2979 typed_cast!(array, index, as_duration_second_array, DurationSecond)?
2980 }
2981 DataType::Duration(TimeUnit::Millisecond) => typed_cast!(
2982 array,
2983 index,
2984 as_duration_millisecond_array,
2985 DurationMillisecond
2986 )?,
2987 DataType::Duration(TimeUnit::Microsecond) => typed_cast!(
2988 array,
2989 index,
2990 as_duration_microsecond_array,
2991 DurationMicrosecond
2992 )?,
2993 DataType::Duration(TimeUnit::Nanosecond) => typed_cast!(
2994 array,
2995 index,
2996 as_duration_nanosecond_array,
2997 DurationNanosecond
2998 )?,
2999 DataType::Map(_, _) => {
3000 let a = array.slice(index, 1);
3001 Self::Map(Arc::new(a.as_map().to_owned()))
3002 }
3003 DataType::Union(fields, mode) => {
3004 let array = as_union_array(array)?;
3005 let ti = array.type_id(index);
3006 let index = array.value_offset(index);
3007 let value = ScalarValue::try_from_array(array.child(ti), index)?;
3008 ScalarValue::Union(Some((ti, Box::new(value))), fields.clone(), *mode)
3009 }
3010 other => {
3011 return _not_impl_err!(
3012 "Can't create a scalar from array of type \"{other:?}\""
3013 );
3014 }
3015 })
3016 }
3017
3018 pub fn try_from_string(value: String, target_type: &DataType) -> Result<Self> {
3020 ScalarValue::from(value).cast_to(target_type)
3021 }
3022
3023 pub fn try_as_str(&self) -> Option<Option<&str>> {
3057 let v = match self {
3058 ScalarValue::Utf8(v) => v,
3059 ScalarValue::LargeUtf8(v) => v,
3060 ScalarValue::Utf8View(v) => v,
3061 ScalarValue::Dictionary(_, v) => return v.try_as_str(),
3062 _ => return None,
3063 };
3064 Some(v.as_ref().map(|v| v.as_str()))
3065 }
3066
3067 pub fn cast_to(&self, target_type: &DataType) -> Result<Self> {
3069 self.cast_to_with_options(target_type, &DEFAULT_CAST_OPTIONS)
3070 }
3071
3072 pub fn cast_to_with_options(
3074 &self,
3075 target_type: &DataType,
3076 cast_options: &CastOptions<'static>,
3077 ) -> Result<Self> {
3078 let scalar_array = match (self, target_type) {
3079 (
3080 ScalarValue::Decimal128(Some(decimal_value), _, scale),
3081 DataType::Timestamp(time_unit, None),
3082 ) => {
3083 let scale_factor = 10_i128.pow(*scale as u32);
3084 let seconds = decimal_value / scale_factor;
3085 let fraction = decimal_value % scale_factor;
3086
3087 let timestamp_value = match time_unit {
3088 TimeUnit::Second => ScalarValue::Int64(Some(seconds as i64)),
3089 TimeUnit::Millisecond => {
3090 let millis = seconds * 1_000 + (fraction * 1_000) / scale_factor;
3091 ScalarValue::Int64(Some(millis as i64))
3092 }
3093 TimeUnit::Microsecond => {
3094 let micros =
3095 seconds * 1_000_000 + (fraction * 1_000_000) / scale_factor;
3096 ScalarValue::Int64(Some(micros as i64))
3097 }
3098 TimeUnit::Nanosecond => {
3099 let nanos = seconds * 1_000_000_000
3100 + (fraction * 1_000_000_000) / scale_factor;
3101 ScalarValue::Int64(Some(nanos as i64))
3102 }
3103 };
3104
3105 timestamp_value.to_array()?
3106 }
3107 _ => self.to_array()?,
3108 };
3109
3110 let cast_arr = cast_with_options(&scalar_array, target_type, cast_options)?;
3111 ScalarValue::try_from_array(&cast_arr, 0)
3112 }
3113
3114 fn eq_array_decimal(
3115 array: &ArrayRef,
3116 index: usize,
3117 value: Option<&i128>,
3118 precision: u8,
3119 scale: i8,
3120 ) -> Result<bool> {
3121 let array = as_decimal128_array(array)?;
3122 if array.precision() != precision || array.scale() != scale {
3123 return Ok(false);
3124 }
3125 let is_null = array.is_null(index);
3126 if let Some(v) = value {
3127 Ok(!array.is_null(index) && array.value(index) == *v)
3128 } else {
3129 Ok(is_null)
3130 }
3131 }
3132
3133 fn eq_array_decimal256(
3134 array: &ArrayRef,
3135 index: usize,
3136 value: Option<&i256>,
3137 precision: u8,
3138 scale: i8,
3139 ) -> Result<bool> {
3140 let array = as_decimal256_array(array)?;
3141 if array.precision() != precision || array.scale() != scale {
3142 return Ok(false);
3143 }
3144 let is_null = array.is_null(index);
3145 if let Some(v) = value {
3146 Ok(!array.is_null(index) && array.value(index) == *v)
3147 } else {
3148 Ok(is_null)
3149 }
3150 }
3151
3152 #[inline]
3179 pub fn eq_array(&self, array: &ArrayRef, index: usize) -> Result<bool> {
3180 Ok(match self {
3181 ScalarValue::Decimal128(v, precision, scale) => {
3182 ScalarValue::eq_array_decimal(
3183 array,
3184 index,
3185 v.as_ref(),
3186 *precision,
3187 *scale,
3188 )?
3189 }
3190 ScalarValue::Decimal256(v, precision, scale) => {
3191 ScalarValue::eq_array_decimal256(
3192 array,
3193 index,
3194 v.as_ref(),
3195 *precision,
3196 *scale,
3197 )?
3198 }
3199 ScalarValue::Boolean(val) => {
3200 eq_array_primitive!(array, index, as_boolean_array, val)?
3201 }
3202 ScalarValue::Float16(val) => {
3203 eq_array_primitive!(array, index, as_float16_array, val)?
3204 }
3205 ScalarValue::Float32(val) => {
3206 eq_array_primitive!(array, index, as_float32_array, val)?
3207 }
3208 ScalarValue::Float64(val) => {
3209 eq_array_primitive!(array, index, as_float64_array, val)?
3210 }
3211 ScalarValue::Int8(val) => {
3212 eq_array_primitive!(array, index, as_int8_array, val)?
3213 }
3214 ScalarValue::Int16(val) => {
3215 eq_array_primitive!(array, index, as_int16_array, val)?
3216 }
3217 ScalarValue::Int32(val) => {
3218 eq_array_primitive!(array, index, as_int32_array, val)?
3219 }
3220 ScalarValue::Int64(val) => {
3221 eq_array_primitive!(array, index, as_int64_array, val)?
3222 }
3223 ScalarValue::UInt8(val) => {
3224 eq_array_primitive!(array, index, as_uint8_array, val)?
3225 }
3226 ScalarValue::UInt16(val) => {
3227 eq_array_primitive!(array, index, as_uint16_array, val)?
3228 }
3229 ScalarValue::UInt32(val) => {
3230 eq_array_primitive!(array, index, as_uint32_array, val)?
3231 }
3232 ScalarValue::UInt64(val) => {
3233 eq_array_primitive!(array, index, as_uint64_array, val)?
3234 }
3235 ScalarValue::Utf8(val) => {
3236 eq_array_primitive!(array, index, as_string_array, val)?
3237 }
3238 ScalarValue::Utf8View(val) => {
3239 eq_array_primitive!(array, index, as_string_view_array, val)?
3240 }
3241 ScalarValue::LargeUtf8(val) => {
3242 eq_array_primitive!(array, index, as_large_string_array, val)?
3243 }
3244 ScalarValue::Binary(val) => {
3245 eq_array_primitive!(array, index, as_binary_array, val)?
3246 }
3247 ScalarValue::BinaryView(val) => {
3248 eq_array_primitive!(array, index, as_binary_view_array, val)?
3249 }
3250 ScalarValue::FixedSizeBinary(_, val) => {
3251 eq_array_primitive!(array, index, as_fixed_size_binary_array, val)?
3252 }
3253 ScalarValue::LargeBinary(val) => {
3254 eq_array_primitive!(array, index, as_large_binary_array, val)?
3255 }
3256 ScalarValue::List(arr) => {
3257 Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
3258 }
3259 ScalarValue::LargeList(arr) => {
3260 Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
3261 }
3262 ScalarValue::FixedSizeList(arr) => {
3263 Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
3264 }
3265 ScalarValue::Struct(arr) => {
3266 Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
3267 }
3268 ScalarValue::Map(arr) => {
3269 Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
3270 }
3271 ScalarValue::Date32(val) => {
3272 eq_array_primitive!(array, index, as_date32_array, val)?
3273 }
3274 ScalarValue::Date64(val) => {
3275 eq_array_primitive!(array, index, as_date64_array, val)?
3276 }
3277 ScalarValue::Time32Second(val) => {
3278 eq_array_primitive!(array, index, as_time32_second_array, val)?
3279 }
3280 ScalarValue::Time32Millisecond(val) => {
3281 eq_array_primitive!(array, index, as_time32_millisecond_array, val)?
3282 }
3283 ScalarValue::Time64Microsecond(val) => {
3284 eq_array_primitive!(array, index, as_time64_microsecond_array, val)?
3285 }
3286 ScalarValue::Time64Nanosecond(val) => {
3287 eq_array_primitive!(array, index, as_time64_nanosecond_array, val)?
3288 }
3289 ScalarValue::TimestampSecond(val, _) => {
3290 eq_array_primitive!(array, index, as_timestamp_second_array, val)?
3291 }
3292 ScalarValue::TimestampMillisecond(val, _) => {
3293 eq_array_primitive!(array, index, as_timestamp_millisecond_array, val)?
3294 }
3295 ScalarValue::TimestampMicrosecond(val, _) => {
3296 eq_array_primitive!(array, index, as_timestamp_microsecond_array, val)?
3297 }
3298 ScalarValue::TimestampNanosecond(val, _) => {
3299 eq_array_primitive!(array, index, as_timestamp_nanosecond_array, val)?
3300 }
3301 ScalarValue::IntervalYearMonth(val) => {
3302 eq_array_primitive!(array, index, as_interval_ym_array, val)?
3303 }
3304 ScalarValue::IntervalDayTime(val) => {
3305 eq_array_primitive!(array, index, as_interval_dt_array, val)?
3306 }
3307 ScalarValue::IntervalMonthDayNano(val) => {
3308 eq_array_primitive!(array, index, as_interval_mdn_array, val)?
3309 }
3310 ScalarValue::DurationSecond(val) => {
3311 eq_array_primitive!(array, index, as_duration_second_array, val)?
3312 }
3313 ScalarValue::DurationMillisecond(val) => {
3314 eq_array_primitive!(array, index, as_duration_millisecond_array, val)?
3315 }
3316 ScalarValue::DurationMicrosecond(val) => {
3317 eq_array_primitive!(array, index, as_duration_microsecond_array, val)?
3318 }
3319 ScalarValue::DurationNanosecond(val) => {
3320 eq_array_primitive!(array, index, as_duration_nanosecond_array, val)?
3321 }
3322 ScalarValue::Union(value, _, _) => {
3323 let array = as_union_array(array)?;
3324 let ti = array.type_id(index);
3325 let index = array.value_offset(index);
3326 if let Some((ti_v, value)) = value {
3327 ti_v == &ti && value.eq_array(array.child(ti), index)?
3328 } else {
3329 array.child(ti).is_null(index)
3330 }
3331 }
3332 ScalarValue::Dictionary(key_type, v) => {
3333 let (values_array, values_index) = match key_type.as_ref() {
3334 DataType::Int8 => get_dict_value::<Int8Type>(array, index)?,
3335 DataType::Int16 => get_dict_value::<Int16Type>(array, index)?,
3336 DataType::Int32 => get_dict_value::<Int32Type>(array, index)?,
3337 DataType::Int64 => get_dict_value::<Int64Type>(array, index)?,
3338 DataType::UInt8 => get_dict_value::<UInt8Type>(array, index)?,
3339 DataType::UInt16 => get_dict_value::<UInt16Type>(array, index)?,
3340 DataType::UInt32 => get_dict_value::<UInt32Type>(array, index)?,
3341 DataType::UInt64 => get_dict_value::<UInt64Type>(array, index)?,
3342 _ => unreachable!("Invalid dictionary keys type: {:?}", key_type),
3343 };
3344 match values_index {
3346 Some(values_index) => v.eq_array(values_array, values_index)?,
3347 None => v.is_null(),
3348 }
3349 }
3350 ScalarValue::Null => array.is_null(index),
3351 })
3352 }
3353
3354 fn eq_array_list(arr1: &ArrayRef, arr2: &ArrayRef, index: usize) -> bool {
3355 let right = arr2.slice(index, 1);
3356 arr1 == &right
3357 }
3358
3359 pub fn try_cmp(&self, other: &Self) -> Result<Ordering> {
3364 self.partial_cmp(other).ok_or_else(|| {
3365 _internal_datafusion_err!("Uncomparable values: {self:?}, {other:?}")
3366 })
3367 }
3368
3369 pub fn size(&self) -> usize {
3372 size_of_val(self)
3373 + match self {
3374 ScalarValue::Null
3375 | ScalarValue::Boolean(_)
3376 | ScalarValue::Float16(_)
3377 | ScalarValue::Float32(_)
3378 | ScalarValue::Float64(_)
3379 | ScalarValue::Decimal128(_, _, _)
3380 | ScalarValue::Decimal256(_, _, _)
3381 | ScalarValue::Int8(_)
3382 | ScalarValue::Int16(_)
3383 | ScalarValue::Int32(_)
3384 | ScalarValue::Int64(_)
3385 | ScalarValue::UInt8(_)
3386 | ScalarValue::UInt16(_)
3387 | ScalarValue::UInt32(_)
3388 | ScalarValue::UInt64(_)
3389 | ScalarValue::Date32(_)
3390 | ScalarValue::Date64(_)
3391 | ScalarValue::Time32Second(_)
3392 | ScalarValue::Time32Millisecond(_)
3393 | ScalarValue::Time64Microsecond(_)
3394 | ScalarValue::Time64Nanosecond(_)
3395 | ScalarValue::IntervalYearMonth(_)
3396 | ScalarValue::IntervalDayTime(_)
3397 | ScalarValue::IntervalMonthDayNano(_)
3398 | ScalarValue::DurationSecond(_)
3399 | ScalarValue::DurationMillisecond(_)
3400 | ScalarValue::DurationMicrosecond(_)
3401 | ScalarValue::DurationNanosecond(_) => 0,
3402 ScalarValue::Utf8(s)
3403 | ScalarValue::LargeUtf8(s)
3404 | ScalarValue::Utf8View(s) => {
3405 s.as_ref().map(|s| s.capacity()).unwrap_or_default()
3406 }
3407 ScalarValue::TimestampSecond(_, s)
3408 | ScalarValue::TimestampMillisecond(_, s)
3409 | ScalarValue::TimestampMicrosecond(_, s)
3410 | ScalarValue::TimestampNanosecond(_, s) => {
3411 s.as_ref().map(|s| s.len()).unwrap_or_default()
3412 }
3413 ScalarValue::Binary(b)
3414 | ScalarValue::FixedSizeBinary(_, b)
3415 | ScalarValue::LargeBinary(b)
3416 | ScalarValue::BinaryView(b) => {
3417 b.as_ref().map(|b| b.capacity()).unwrap_or_default()
3418 }
3419 ScalarValue::List(arr) => arr.get_array_memory_size(),
3420 ScalarValue::LargeList(arr) => arr.get_array_memory_size(),
3421 ScalarValue::FixedSizeList(arr) => arr.get_array_memory_size(),
3422 ScalarValue::Struct(arr) => arr.get_array_memory_size(),
3423 ScalarValue::Map(arr) => arr.get_array_memory_size(),
3424 ScalarValue::Union(vals, fields, _mode) => {
3425 vals.as_ref()
3426 .map(|(_id, sv)| sv.size() - size_of_val(sv))
3427 .unwrap_or_default()
3428 + size_of_val(fields)
3430 + (size_of::<Field>() * fields.len())
3431 + fields.iter().map(|(_idx, field)| field.size() - size_of_val(field)).sum::<usize>()
3432 }
3433 ScalarValue::Dictionary(dt, sv) => {
3434 dt.size() + sv.size()
3436 }
3437 }
3438 }
3439
3440 pub fn size_of_vec(vec: &Vec<Self>) -> usize {
3444 size_of_val(vec)
3445 + (size_of::<ScalarValue>() * vec.capacity())
3446 + vec
3447 .iter()
3448 .map(|sv| sv.size() - size_of_val(sv))
3449 .sum::<usize>()
3450 }
3451
3452 pub fn size_of_vec_deque(vec_deque: &VecDeque<Self>) -> usize {
3456 size_of_val(vec_deque)
3457 + (size_of::<ScalarValue>() * vec_deque.capacity())
3458 + vec_deque
3459 .iter()
3460 .map(|sv| sv.size() - size_of_val(sv))
3461 .sum::<usize>()
3462 }
3463
3464 pub fn size_of_hashset<S>(set: &HashSet<Self, S>) -> usize {
3468 size_of_val(set)
3469 + (size_of::<ScalarValue>() * set.capacity())
3470 + set
3471 .iter()
3472 .map(|sv| sv.size() - size_of_val(sv))
3473 .sum::<usize>()
3474 }
3475
3476 pub fn compact(&mut self) {
3482 match self {
3483 ScalarValue::Null
3484 | ScalarValue::Boolean(_)
3485 | ScalarValue::Float16(_)
3486 | ScalarValue::Float32(_)
3487 | ScalarValue::Float64(_)
3488 | ScalarValue::Decimal128(_, _, _)
3489 | ScalarValue::Decimal256(_, _, _)
3490 | ScalarValue::Int8(_)
3491 | ScalarValue::Int16(_)
3492 | ScalarValue::Int32(_)
3493 | ScalarValue::Int64(_)
3494 | ScalarValue::UInt8(_)
3495 | ScalarValue::UInt16(_)
3496 | ScalarValue::UInt32(_)
3497 | ScalarValue::UInt64(_)
3498 | ScalarValue::Date32(_)
3499 | ScalarValue::Date64(_)
3500 | ScalarValue::Time32Second(_)
3501 | ScalarValue::Time32Millisecond(_)
3502 | ScalarValue::Time64Microsecond(_)
3503 | ScalarValue::Time64Nanosecond(_)
3504 | ScalarValue::IntervalYearMonth(_)
3505 | ScalarValue::IntervalDayTime(_)
3506 | ScalarValue::IntervalMonthDayNano(_)
3507 | ScalarValue::DurationSecond(_)
3508 | ScalarValue::DurationMillisecond(_)
3509 | ScalarValue::DurationMicrosecond(_)
3510 | ScalarValue::DurationNanosecond(_)
3511 | ScalarValue::Utf8(_)
3512 | ScalarValue::LargeUtf8(_)
3513 | ScalarValue::Utf8View(_)
3514 | ScalarValue::TimestampSecond(_, _)
3515 | ScalarValue::TimestampMillisecond(_, _)
3516 | ScalarValue::TimestampMicrosecond(_, _)
3517 | ScalarValue::TimestampNanosecond(_, _)
3518 | ScalarValue::Binary(_)
3519 | ScalarValue::FixedSizeBinary(_, _)
3520 | ScalarValue::LargeBinary(_)
3521 | ScalarValue::BinaryView(_) => (),
3522 ScalarValue::FixedSizeList(arr) => {
3523 let array = copy_array_data(&arr.to_data());
3524 *Arc::make_mut(arr) = FixedSizeListArray::from(array);
3525 }
3526 ScalarValue::List(arr) => {
3527 let array = copy_array_data(&arr.to_data());
3528 *Arc::make_mut(arr) = ListArray::from(array);
3529 }
3530 ScalarValue::LargeList(arr) => {
3531 let array = copy_array_data(&arr.to_data());
3532 *Arc::make_mut(arr) = LargeListArray::from(array)
3533 }
3534 ScalarValue::Struct(arr) => {
3535 let array = copy_array_data(&arr.to_data());
3536 *Arc::make_mut(arr) = StructArray::from(array);
3537 }
3538 ScalarValue::Map(arr) => {
3539 let array = copy_array_data(&arr.to_data());
3540 *Arc::make_mut(arr) = MapArray::from(array);
3541 }
3542 ScalarValue::Union(val, _, _) => {
3543 if let Some((_, value)) = val.as_mut() {
3544 value.compact();
3545 }
3546 }
3547 ScalarValue::Dictionary(_, value) => {
3548 value.compact();
3549 }
3550 }
3551 }
3552
3553 pub fn compacted(mut self) -> Self {
3555 self.compact();
3556 self
3557 }
3558}
3559
3560pub fn copy_array_data(src_data: &ArrayData) -> ArrayData {
3588 let mut copy = MutableArrayData::new(vec![&src_data], true, src_data.len());
3589 copy.extend(0, 0, src_data.len());
3590 copy.freeze()
3591}
3592
3593macro_rules! impl_scalar {
3594 ($ty:ty, $scalar:tt) => {
3595 impl From<$ty> for ScalarValue {
3596 fn from(value: $ty) -> Self {
3597 ScalarValue::$scalar(Some(value))
3598 }
3599 }
3600
3601 impl From<Option<$ty>> for ScalarValue {
3602 fn from(value: Option<$ty>) -> Self {
3603 ScalarValue::$scalar(value)
3604 }
3605 }
3606 };
3607}
3608
3609impl_scalar!(f64, Float64);
3610impl_scalar!(f32, Float32);
3611impl_scalar!(i8, Int8);
3612impl_scalar!(i16, Int16);
3613impl_scalar!(i32, Int32);
3614impl_scalar!(i64, Int64);
3615impl_scalar!(bool, Boolean);
3616impl_scalar!(u8, UInt8);
3617impl_scalar!(u16, UInt16);
3618impl_scalar!(u32, UInt32);
3619impl_scalar!(u64, UInt64);
3620
3621impl From<&str> for ScalarValue {
3622 fn from(value: &str) -> Self {
3623 Some(value).into()
3624 }
3625}
3626
3627impl From<Option<&str>> for ScalarValue {
3628 fn from(value: Option<&str>) -> Self {
3629 let value = value.map(|s| s.to_string());
3630 ScalarValue::Utf8(value)
3631 }
3632}
3633
3634impl From<Vec<(&str, ScalarValue)>> for ScalarValue {
3636 fn from(value: Vec<(&str, ScalarValue)>) -> Self {
3637 value
3638 .into_iter()
3639 .fold(ScalarStructBuilder::new(), |builder, (name, value)| {
3640 builder.with_name_and_scalar(name, value)
3641 })
3642 .build()
3643 .unwrap()
3644 }
3645}
3646
3647impl FromStr for ScalarValue {
3648 type Err = Infallible;
3649
3650 fn from_str(s: &str) -> Result<Self, Self::Err> {
3651 Ok(s.into())
3652 }
3653}
3654
3655impl From<String> for ScalarValue {
3656 fn from(value: String) -> Self {
3657 ScalarValue::Utf8(Some(value))
3658 }
3659}
3660
3661macro_rules! impl_try_from {
3662 ($SCALAR:ident, $NATIVE:ident) => {
3663 impl TryFrom<ScalarValue> for $NATIVE {
3664 type Error = DataFusionError;
3665
3666 fn try_from(value: ScalarValue) -> Result<Self> {
3667 match value {
3668 ScalarValue::$SCALAR(Some(inner_value)) => Ok(inner_value),
3669 _ => _internal_err!(
3670 "Cannot convert {:?} to {}",
3671 value,
3672 std::any::type_name::<Self>()
3673 ),
3674 }
3675 }
3676 }
3677 };
3678}
3679
3680impl_try_from!(Int8, i8);
3681impl_try_from!(Int16, i16);
3682
3683impl TryFrom<ScalarValue> for i32 {
3685 type Error = DataFusionError;
3686
3687 fn try_from(value: ScalarValue) -> Result<Self> {
3688 match value {
3689 ScalarValue::Int32(Some(inner_value))
3690 | ScalarValue::Date32(Some(inner_value))
3691 | ScalarValue::Time32Second(Some(inner_value))
3692 | ScalarValue::Time32Millisecond(Some(inner_value)) => Ok(inner_value),
3693 _ => _internal_err!(
3694 "Cannot convert {:?} to {}",
3695 value,
3696 std::any::type_name::<Self>()
3697 ),
3698 }
3699 }
3700}
3701
3702impl TryFrom<ScalarValue> for i64 {
3704 type Error = DataFusionError;
3705
3706 fn try_from(value: ScalarValue) -> Result<Self> {
3707 match value {
3708 ScalarValue::Int64(Some(inner_value))
3709 | ScalarValue::Date64(Some(inner_value))
3710 | ScalarValue::Time64Microsecond(Some(inner_value))
3711 | ScalarValue::Time64Nanosecond(Some(inner_value))
3712 | ScalarValue::TimestampNanosecond(Some(inner_value), _)
3713 | ScalarValue::TimestampMicrosecond(Some(inner_value), _)
3714 | ScalarValue::TimestampMillisecond(Some(inner_value), _)
3715 | ScalarValue::TimestampSecond(Some(inner_value), _) => Ok(inner_value),
3716 _ => _internal_err!(
3717 "Cannot convert {:?} to {}",
3718 value,
3719 std::any::type_name::<Self>()
3720 ),
3721 }
3722 }
3723}
3724
3725impl TryFrom<ScalarValue> for i128 {
3727 type Error = DataFusionError;
3728
3729 fn try_from(value: ScalarValue) -> Result<Self> {
3730 match value {
3731 ScalarValue::Decimal128(Some(inner_value), _, _) => Ok(inner_value),
3732 _ => _internal_err!(
3733 "Cannot convert {:?} to {}",
3734 value,
3735 std::any::type_name::<Self>()
3736 ),
3737 }
3738 }
3739}
3740
3741impl TryFrom<ScalarValue> for i256 {
3743 type Error = DataFusionError;
3744
3745 fn try_from(value: ScalarValue) -> Result<Self> {
3746 match value {
3747 ScalarValue::Decimal256(Some(inner_value), _, _) => Ok(inner_value),
3748 _ => _internal_err!(
3749 "Cannot convert {:?} to {}",
3750 value,
3751 std::any::type_name::<Self>()
3752 ),
3753 }
3754 }
3755}
3756
3757impl_try_from!(UInt8, u8);
3758impl_try_from!(UInt16, u16);
3759impl_try_from!(UInt32, u32);
3760impl_try_from!(UInt64, u64);
3761impl_try_from!(Float32, f32);
3762impl_try_from!(Float64, f64);
3763impl_try_from!(Boolean, bool);
3764
3765impl TryFrom<DataType> for ScalarValue {
3766 type Error = DataFusionError;
3767
3768 fn try_from(datatype: DataType) -> Result<Self> {
3770 (&datatype).try_into()
3771 }
3772}
3773
3774impl TryFrom<&DataType> for ScalarValue {
3775 type Error = DataFusionError;
3776
3777 fn try_from(data_type: &DataType) -> Result<Self> {
3779 Self::try_new_null(data_type)
3780 }
3781}
3782
3783macro_rules! format_option {
3784 ($F:expr, $EXPR:expr) => {{
3785 match $EXPR {
3786 Some(e) => write!($F, "{e}"),
3787 None => write!($F, "NULL"),
3788 }
3789 }};
3790}
3791
3792impl fmt::Display for ScalarValue {
3798 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
3799 match self {
3800 ScalarValue::Decimal128(v, p, s) => {
3801 write!(f, "{v:?},{p:?},{s:?}")?;
3802 }
3803 ScalarValue::Decimal256(v, p, s) => {
3804 write!(f, "{v:?},{p:?},{s:?}")?;
3805 }
3806 ScalarValue::Boolean(e) => format_option!(f, e)?,
3807 ScalarValue::Float16(e) => format_option!(f, e)?,
3808 ScalarValue::Float32(e) => format_option!(f, e)?,
3809 ScalarValue::Float64(e) => format_option!(f, e)?,
3810 ScalarValue::Int8(e) => format_option!(f, e)?,
3811 ScalarValue::Int16(e) => format_option!(f, e)?,
3812 ScalarValue::Int32(e) => format_option!(f, e)?,
3813 ScalarValue::Int64(e) => format_option!(f, e)?,
3814 ScalarValue::UInt8(e) => format_option!(f, e)?,
3815 ScalarValue::UInt16(e) => format_option!(f, e)?,
3816 ScalarValue::UInt32(e) => format_option!(f, e)?,
3817 ScalarValue::UInt64(e) => format_option!(f, e)?,
3818 ScalarValue::TimestampSecond(e, _) => format_option!(f, e)?,
3819 ScalarValue::TimestampMillisecond(e, _) => format_option!(f, e)?,
3820 ScalarValue::TimestampMicrosecond(e, _) => format_option!(f, e)?,
3821 ScalarValue::TimestampNanosecond(e, _) => format_option!(f, e)?,
3822 ScalarValue::Utf8(e)
3823 | ScalarValue::LargeUtf8(e)
3824 | ScalarValue::Utf8View(e) => format_option!(f, e)?,
3825 ScalarValue::Binary(e)
3826 | ScalarValue::FixedSizeBinary(_, e)
3827 | ScalarValue::LargeBinary(e)
3828 | ScalarValue::BinaryView(e) => match e {
3829 Some(bytes) => {
3830 for b in bytes.iter().take(10) {
3832 write!(f, "{b:02X}")?;
3833 }
3834 if bytes.len() > 10 {
3835 write!(f, "...")?;
3836 }
3837 }
3838 None => write!(f, "NULL")?,
3839 },
3840 ScalarValue::List(arr) => fmt_list(arr.to_owned() as ArrayRef, f)?,
3841 ScalarValue::LargeList(arr) => fmt_list(arr.to_owned() as ArrayRef, f)?,
3842 ScalarValue::FixedSizeList(arr) => fmt_list(arr.to_owned() as ArrayRef, f)?,
3843 ScalarValue::Date32(e) => format_option!(
3844 f,
3845 e.map(|v| {
3846 let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
3847 match epoch.checked_add_signed(Duration::try_days(v as i64).unwrap())
3848 {
3849 Some(date) => date.to_string(),
3850 None => "".to_string(),
3851 }
3852 })
3853 )?,
3854 ScalarValue::Date64(e) => format_option!(
3855 f,
3856 e.map(|v| {
3857 let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
3858 match epoch.checked_add_signed(Duration::try_milliseconds(v).unwrap())
3859 {
3860 Some(date) => date.to_string(),
3861 None => "".to_string(),
3862 }
3863 })
3864 )?,
3865 ScalarValue::Time32Second(e) => format_option!(f, e)?,
3866 ScalarValue::Time32Millisecond(e) => format_option!(f, e)?,
3867 ScalarValue::Time64Microsecond(e) => format_option!(f, e)?,
3868 ScalarValue::Time64Nanosecond(e) => format_option!(f, e)?,
3869 ScalarValue::IntervalYearMonth(e) => format_option!(f, e)?,
3870 ScalarValue::IntervalMonthDayNano(e) => {
3871 format_option!(f, e.map(|v| format!("{v:?}")))?
3872 }
3873 ScalarValue::IntervalDayTime(e) => {
3874 format_option!(f, e.map(|v| format!("{v:?}")))?;
3875 }
3876 ScalarValue::DurationSecond(e) => format_option!(f, e)?,
3877 ScalarValue::DurationMillisecond(e) => format_option!(f, e)?,
3878 ScalarValue::DurationMicrosecond(e) => format_option!(f, e)?,
3879 ScalarValue::DurationNanosecond(e) => format_option!(f, e)?,
3880 ScalarValue::Struct(struct_arr) => {
3881 assert_eq!(struct_arr.len(), 1);
3883
3884 if struct_arr.null_count() == struct_arr.len() {
3885 write!(f, "NULL")?;
3886 return Ok(());
3887 }
3888
3889 let columns = struct_arr.columns();
3890 let fields = struct_arr.fields();
3891 let nulls = struct_arr.nulls();
3892
3893 write!(
3894 f,
3895 "{{{}}}",
3896 columns
3897 .iter()
3898 .zip(fields.iter())
3899 .map(|(column, field)| {
3900 if nulls.is_some_and(|b| b.is_null(0)) {
3901 format!("{}:NULL", field.name())
3902 } else if let DataType::Struct(_) = field.data_type() {
3903 let sv = ScalarValue::Struct(Arc::new(
3904 column.as_struct().to_owned(),
3905 ));
3906 format!("{}:{sv}", field.name())
3907 } else {
3908 let sv = array_value_to_string(column, 0).unwrap();
3909 format!("{}:{sv}", field.name())
3910 }
3911 })
3912 .collect::<Vec<_>>()
3913 .join(",")
3914 )?
3915 }
3916 ScalarValue::Map(map_arr) => {
3917 if map_arr.null_count() == map_arr.len() {
3918 write!(f, "NULL")?;
3919 return Ok(());
3920 }
3921
3922 write!(
3923 f,
3924 "[{}]",
3925 map_arr
3926 .iter()
3927 .map(|struct_array| {
3928 if let Some(arr) = struct_array {
3929 let mut buffer = VecDeque::new();
3930 for i in 0..arr.len() {
3931 let key =
3932 array_value_to_string(arr.column(0), i).unwrap();
3933 let value =
3934 array_value_to_string(arr.column(1), i).unwrap();
3935 buffer.push_back(format!("{key}:{value}"));
3936 }
3937 format!(
3938 "{{{}}}",
3939 buffer
3940 .into_iter()
3941 .collect::<Vec<_>>()
3942 .join(",")
3943 .as_str()
3944 )
3945 } else {
3946 "NULL".to_string()
3947 }
3948 })
3949 .collect::<Vec<_>>()
3950 .join(",")
3951 )?
3952 }
3953 ScalarValue::Union(val, _fields, _mode) => match val {
3954 Some((id, val)) => write!(f, "{id}:{val}")?,
3955 None => write!(f, "NULL")?,
3956 },
3957 ScalarValue::Dictionary(_k, v) => write!(f, "{v}")?,
3958 ScalarValue::Null => write!(f, "NULL")?,
3959 };
3960 Ok(())
3961 }
3962}
3963
3964fn fmt_list(arr: ArrayRef, f: &mut fmt::Formatter) -> fmt::Result {
3965 assert_eq!(arr.len(), 1);
3967 let options = FormatOptions::default().with_display_error(true);
3968 let formatter =
3969 ArrayFormatter::try_new(arr.as_ref() as &dyn Array, &options).unwrap();
3970 let value_formatter = formatter.value(0);
3971 write!(f, "{value_formatter}")
3972}
3973
3974fn fmt_binary(data: &[u8], f: &mut fmt::Formatter) -> fmt::Result {
3976 let mut iter = data.iter();
3977 if let Some(b) = iter.next() {
3978 write!(f, "{b}")?;
3979 }
3980 for b in iter {
3981 write!(f, ",{b}")?;
3982 }
3983 Ok(())
3984}
3985
3986impl fmt::Debug for ScalarValue {
3987 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
3988 match self {
3989 ScalarValue::Decimal128(_, _, _) => write!(f, "Decimal128({self})"),
3990 ScalarValue::Decimal256(_, _, _) => write!(f, "Decimal256({self})"),
3991 ScalarValue::Boolean(_) => write!(f, "Boolean({self})"),
3992 ScalarValue::Float16(_) => write!(f, "Float16({self})"),
3993 ScalarValue::Float32(_) => write!(f, "Float32({self})"),
3994 ScalarValue::Float64(_) => write!(f, "Float64({self})"),
3995 ScalarValue::Int8(_) => write!(f, "Int8({self})"),
3996 ScalarValue::Int16(_) => write!(f, "Int16({self})"),
3997 ScalarValue::Int32(_) => write!(f, "Int32({self})"),
3998 ScalarValue::Int64(_) => write!(f, "Int64({self})"),
3999 ScalarValue::UInt8(_) => write!(f, "UInt8({self})"),
4000 ScalarValue::UInt16(_) => write!(f, "UInt16({self})"),
4001 ScalarValue::UInt32(_) => write!(f, "UInt32({self})"),
4002 ScalarValue::UInt64(_) => write!(f, "UInt64({self})"),
4003 ScalarValue::TimestampSecond(_, tz_opt) => {
4004 write!(f, "TimestampSecond({self}, {tz_opt:?})")
4005 }
4006 ScalarValue::TimestampMillisecond(_, tz_opt) => {
4007 write!(f, "TimestampMillisecond({self}, {tz_opt:?})")
4008 }
4009 ScalarValue::TimestampMicrosecond(_, tz_opt) => {
4010 write!(f, "TimestampMicrosecond({self}, {tz_opt:?})")
4011 }
4012 ScalarValue::TimestampNanosecond(_, tz_opt) => {
4013 write!(f, "TimestampNanosecond({self}, {tz_opt:?})")
4014 }
4015 ScalarValue::Utf8(None) => write!(f, "Utf8({self})"),
4016 ScalarValue::Utf8(Some(_)) => write!(f, "Utf8(\"{self}\")"),
4017 ScalarValue::Utf8View(None) => write!(f, "Utf8View({self})"),
4018 ScalarValue::Utf8View(Some(_)) => write!(f, "Utf8View(\"{self}\")"),
4019 ScalarValue::LargeUtf8(None) => write!(f, "LargeUtf8({self})"),
4020 ScalarValue::LargeUtf8(Some(_)) => write!(f, "LargeUtf8(\"{self}\")"),
4021 ScalarValue::Binary(None) => write!(f, "Binary({self})"),
4022 ScalarValue::Binary(Some(b)) => {
4023 write!(f, "Binary(\"")?;
4024 fmt_binary(b.as_slice(), f)?;
4025 write!(f, "\")")
4026 }
4027 ScalarValue::BinaryView(None) => write!(f, "BinaryView({self})"),
4028 ScalarValue::BinaryView(Some(b)) => {
4029 write!(f, "BinaryView(\"")?;
4030 fmt_binary(b.as_slice(), f)?;
4031 write!(f, "\")")
4032 }
4033 ScalarValue::FixedSizeBinary(size, None) => {
4034 write!(f, "FixedSizeBinary({size}, {self})")
4035 }
4036 ScalarValue::FixedSizeBinary(size, Some(b)) => {
4037 write!(f, "FixedSizeBinary({size}, \"")?;
4038 fmt_binary(b.as_slice(), f)?;
4039 write!(f, "\")")
4040 }
4041 ScalarValue::LargeBinary(None) => write!(f, "LargeBinary({self})"),
4042 ScalarValue::LargeBinary(Some(b)) => {
4043 write!(f, "LargeBinary(\"")?;
4044 fmt_binary(b.as_slice(), f)?;
4045 write!(f, "\")")
4046 }
4047 ScalarValue::FixedSizeList(_) => write!(f, "FixedSizeList({self})"),
4048 ScalarValue::List(_) => write!(f, "List({self})"),
4049 ScalarValue::LargeList(_) => write!(f, "LargeList({self})"),
4050 ScalarValue::Struct(struct_arr) => {
4051 assert_eq!(struct_arr.len(), 1);
4053
4054 let columns = struct_arr.columns();
4055 let fields = struct_arr.fields();
4056
4057 write!(
4058 f,
4059 "Struct({{{}}})",
4060 columns
4061 .iter()
4062 .zip(fields.iter())
4063 .map(|(column, field)| {
4064 let sv = array_value_to_string(column, 0).unwrap();
4065 let name = field.name();
4066 format!("{name}:{sv}")
4067 })
4068 .collect::<Vec<_>>()
4069 .join(",")
4070 )
4071 }
4072 ScalarValue::Map(map_arr) => {
4073 write!(
4074 f,
4075 "Map([{}])",
4076 map_arr
4077 .iter()
4078 .map(|struct_array| {
4079 if let Some(arr) = struct_array {
4080 let buffer: Vec<String> = (0..arr.len())
4081 .map(|i| {
4082 let key = array_value_to_string(arr.column(0), i)
4083 .unwrap();
4084 let value =
4085 array_value_to_string(arr.column(1), i)
4086 .unwrap();
4087 format!("{key:?}:{value:?}")
4088 })
4089 .collect();
4090 format!("{{{}}}", buffer.join(","))
4091 } else {
4092 "NULL".to_string()
4093 }
4094 })
4095 .collect::<Vec<_>>()
4096 .join(",")
4097 )
4098 }
4099 ScalarValue::Date32(_) => write!(f, "Date32(\"{self}\")"),
4100 ScalarValue::Date64(_) => write!(f, "Date64(\"{self}\")"),
4101 ScalarValue::Time32Second(_) => write!(f, "Time32Second(\"{self}\")"),
4102 ScalarValue::Time32Millisecond(_) => {
4103 write!(f, "Time32Millisecond(\"{self}\")")
4104 }
4105 ScalarValue::Time64Microsecond(_) => {
4106 write!(f, "Time64Microsecond(\"{self}\")")
4107 }
4108 ScalarValue::Time64Nanosecond(_) => {
4109 write!(f, "Time64Nanosecond(\"{self}\")")
4110 }
4111 ScalarValue::IntervalDayTime(_) => {
4112 write!(f, "IntervalDayTime(\"{self}\")")
4113 }
4114 ScalarValue::IntervalYearMonth(_) => {
4115 write!(f, "IntervalYearMonth(\"{self}\")")
4116 }
4117 ScalarValue::IntervalMonthDayNano(_) => {
4118 write!(f, "IntervalMonthDayNano(\"{self}\")")
4119 }
4120 ScalarValue::DurationSecond(_) => write!(f, "DurationSecond(\"{self}\")"),
4121 ScalarValue::DurationMillisecond(_) => {
4122 write!(f, "DurationMillisecond(\"{self}\")")
4123 }
4124 ScalarValue::DurationMicrosecond(_) => {
4125 write!(f, "DurationMicrosecond(\"{self}\")")
4126 }
4127 ScalarValue::DurationNanosecond(_) => {
4128 write!(f, "DurationNanosecond(\"{self}\")")
4129 }
4130 ScalarValue::Union(val, _fields, _mode) => match val {
4131 Some((id, val)) => write!(f, "Union {id}:{val}"),
4132 None => write!(f, "Union(NULL)"),
4133 },
4134 ScalarValue::Dictionary(k, v) => write!(f, "Dictionary({k:?}, {v:?})"),
4135 ScalarValue::Null => write!(f, "NULL"),
4136 }
4137 }
4138}
4139
4140pub trait ScalarType<T: ArrowNativeType> {
4142 fn scalar(r: Option<T>) -> ScalarValue;
4144}
4145
4146impl ScalarType<f32> for Float32Type {
4147 fn scalar(r: Option<f32>) -> ScalarValue {
4148 ScalarValue::Float32(r)
4149 }
4150}
4151
4152impl ScalarType<i64> for TimestampSecondType {
4153 fn scalar(r: Option<i64>) -> ScalarValue {
4154 ScalarValue::TimestampSecond(r, None)
4155 }
4156}
4157
4158impl ScalarType<i64> for TimestampMillisecondType {
4159 fn scalar(r: Option<i64>) -> ScalarValue {
4160 ScalarValue::TimestampMillisecond(r, None)
4161 }
4162}
4163
4164impl ScalarType<i64> for TimestampMicrosecondType {
4165 fn scalar(r: Option<i64>) -> ScalarValue {
4166 ScalarValue::TimestampMicrosecond(r, None)
4167 }
4168}
4169
4170impl ScalarType<i64> for TimestampNanosecondType {
4171 fn scalar(r: Option<i64>) -> ScalarValue {
4172 ScalarValue::TimestampNanosecond(r, None)
4173 }
4174}
4175
4176impl ScalarType<i32> for Date32Type {
4177 fn scalar(r: Option<i32>) -> ScalarValue {
4178 ScalarValue::Date32(r)
4179 }
4180}
4181
4182#[cfg(test)]
4183mod tests {
4184 use super::*;
4185 use crate::cast::{as_list_array, as_map_array, as_struct_array};
4186 use crate::test_util::batches_to_string;
4187 use arrow::array::{
4188 FixedSizeListBuilder, Int32Builder, LargeListBuilder, ListBuilder, MapBuilder,
4189 NullArray, NullBufferBuilder, OffsetSizeTrait, PrimitiveBuilder, RecordBatch,
4190 StringBuilder, StringDictionaryBuilder, StructBuilder, UnionBuilder,
4191 };
4192 use arrow::buffer::{Buffer, OffsetBuffer};
4193 use arrow::compute::{is_null, kernels};
4194 use arrow::datatypes::{ArrowNumericType, Fields, Float64Type};
4195 use arrow::error::ArrowError;
4196 use arrow::util::pretty::pretty_format_columns;
4197 use chrono::NaiveDate;
4198 use insta::assert_snapshot;
4199 use rand::Rng;
4200
4201 #[test]
4202 fn test_scalar_value_from_for_map() {
4203 let string_builder = StringBuilder::new();
4204 let int_builder = Int32Builder::with_capacity(4);
4205 let mut builder = MapBuilder::new(None, string_builder, int_builder);
4206 builder.keys().append_value("joe");
4207 builder.values().append_value(1);
4208 builder.append(true).unwrap();
4209
4210 builder.keys().append_value("blogs");
4211 builder.values().append_value(2);
4212 builder.keys().append_value("foo");
4213 builder.values().append_value(4);
4214 builder.append(true).unwrap();
4215 builder.append(true).unwrap();
4216 builder.append(false).unwrap();
4217
4218 let expected = builder.finish();
4219
4220 let sv = ScalarValue::Map(Arc::new(expected.clone()));
4221 let map_arr = sv.to_array().unwrap();
4222 let actual = as_map_array(&map_arr).unwrap();
4223 assert_eq!(actual, &expected);
4224 }
4225
4226 #[test]
4227 fn test_scalar_value_from_for_struct() {
4228 let boolean = Arc::new(BooleanArray::from(vec![false]));
4229 let int = Arc::new(Int32Array::from(vec![42]));
4230
4231 let expected = StructArray::from(vec![
4232 (
4233 Arc::new(Field::new("b", DataType::Boolean, false)),
4234 Arc::clone(&boolean) as ArrayRef,
4235 ),
4236 (
4237 Arc::new(Field::new("c", DataType::Int32, false)),
4238 Arc::clone(&int) as ArrayRef,
4239 ),
4240 ]);
4241
4242 let sv = ScalarStructBuilder::new()
4243 .with_array(Field::new("b", DataType::Boolean, false), boolean)
4244 .with_array(Field::new("c", DataType::Int32, false), int)
4245 .build()
4246 .unwrap();
4247
4248 let struct_arr = sv.to_array().unwrap();
4249 let actual = as_struct_array(&struct_arr).unwrap();
4250 assert_eq!(actual, &expected);
4251 }
4252
4253 #[test]
4254 #[should_panic(
4255 expected = "InvalidArgumentError(\"Incorrect array length for StructArray field \\\"bool\\\", expected 1 got 4\")"
4256 )]
4257 fn test_scalar_value_from_for_struct_should_panic() {
4258 let _ = ScalarStructBuilder::new()
4259 .with_array(
4260 Field::new("bool", DataType::Boolean, false),
4261 Arc::new(BooleanArray::from(vec![false, true, false, false])),
4262 )
4263 .with_array(
4264 Field::new("i32", DataType::Int32, false),
4265 Arc::new(Int32Array::from(vec![42, 28, 19, 31])),
4266 )
4267 .build()
4268 .unwrap();
4269 }
4270
4271 #[test]
4272 fn test_to_array_of_size_for_nested() {
4273 let boolean = Arc::new(BooleanArray::from(vec![false, false, true, true]));
4275 let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
4276
4277 let struct_array = StructArray::from(vec![
4278 (
4279 Arc::new(Field::new("b", DataType::Boolean, false)),
4280 Arc::clone(&boolean) as ArrayRef,
4281 ),
4282 (
4283 Arc::new(Field::new("c", DataType::Int32, false)),
4284 Arc::clone(&int) as ArrayRef,
4285 ),
4286 ]);
4287 let sv = ScalarValue::Struct(Arc::new(struct_array));
4288 let actual_arr = sv.to_array_of_size(2).unwrap();
4289
4290 let boolean = Arc::new(BooleanArray::from(vec![
4291 false, false, true, true, false, false, true, true,
4292 ]));
4293 let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31, 42, 28, 19, 31]));
4294
4295 let struct_array = StructArray::from(vec![
4296 (
4297 Arc::new(Field::new("b", DataType::Boolean, false)),
4298 Arc::clone(&boolean) as ArrayRef,
4299 ),
4300 (
4301 Arc::new(Field::new("c", DataType::Int32, false)),
4302 Arc::clone(&int) as ArrayRef,
4303 ),
4304 ]);
4305
4306 let actual = as_struct_array(&actual_arr).unwrap();
4307 assert_eq!(actual, &struct_array);
4308
4309 let arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
4311 Some(1),
4312 None,
4313 Some(2),
4314 ])]);
4315
4316 let sv = ScalarValue::List(Arc::new(arr));
4317 let actual_arr = sv
4318 .to_array_of_size(2)
4319 .expect("Failed to convert to array of size");
4320 let actual_list_arr = actual_arr.as_list::<i32>();
4321
4322 let arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
4323 Some(vec![Some(1), None, Some(2)]),
4324 Some(vec![Some(1), None, Some(2)]),
4325 ]);
4326
4327 assert_eq!(&arr, actual_list_arr);
4328 }
4329
4330 #[test]
4331 fn test_to_array_of_size_for_fsl() {
4332 let values = Int32Array::from_iter([Some(1), None, Some(2)]);
4333 let field = Arc::new(Field::new_list_field(DataType::Int32, true));
4334 let arr = FixedSizeListArray::new(Arc::clone(&field), 3, Arc::new(values), None);
4335 let sv = ScalarValue::FixedSizeList(Arc::new(arr));
4336 let actual_arr = sv
4337 .to_array_of_size(2)
4338 .expect("Failed to convert to array of size");
4339
4340 let expected_values =
4341 Int32Array::from_iter([Some(1), None, Some(2), Some(1), None, Some(2)]);
4342 let expected_arr =
4343 FixedSizeListArray::new(field, 3, Arc::new(expected_values), None);
4344
4345 assert_eq!(
4346 &expected_arr,
4347 as_fixed_size_list_array(actual_arr.as_ref()).unwrap()
4348 );
4349
4350 let empty_array = sv
4351 .to_array_of_size(0)
4352 .expect("Failed to convert to empty array");
4353
4354 assert_eq!(empty_array.len(), 0);
4355 }
4356
4357 #[test]
4358 fn test_list_to_array_string() {
4359 let scalars = vec![
4360 ScalarValue::from("rust"),
4361 ScalarValue::from("arrow"),
4362 ScalarValue::from("data-fusion"),
4363 ];
4364
4365 let result = ScalarValue::new_list_nullable(scalars.as_slice(), &DataType::Utf8);
4366
4367 let expected = single_row_list_array(vec!["rust", "arrow", "data-fusion"]);
4368 assert_eq!(*result, expected);
4369 }
4370
4371 fn single_row_list_array(items: Vec<&str>) -> ListArray {
4372 SingleRowListArrayBuilder::new(Arc::new(StringArray::from(items)))
4373 .build_list_array()
4374 }
4375
4376 fn build_list<O: OffsetSizeTrait>(
4377 values: Vec<Option<Vec<Option<i64>>>>,
4378 ) -> Vec<ScalarValue> {
4379 values
4380 .into_iter()
4381 .map(|v| {
4382 let arr = if v.is_some() {
4383 Arc::new(
4384 GenericListArray::<O>::from_iter_primitive::<Int64Type, _, _>(
4385 vec![v],
4386 ),
4387 )
4388 } else if O::IS_LARGE {
4389 new_null_array(
4390 &DataType::LargeList(Arc::new(Field::new_list_field(
4391 DataType::Int64,
4392 true,
4393 ))),
4394 1,
4395 )
4396 } else {
4397 new_null_array(
4398 &DataType::List(Arc::new(Field::new_list_field(
4399 DataType::Int64,
4400 true,
4401 ))),
4402 1,
4403 )
4404 };
4405
4406 if O::IS_LARGE {
4407 ScalarValue::LargeList(arr.as_list::<i64>().to_owned().into())
4408 } else {
4409 ScalarValue::List(arr.as_list::<i32>().to_owned().into())
4410 }
4411 })
4412 .collect()
4413 }
4414
4415 #[test]
4416 fn test_iter_to_array_fixed_size_list() {
4417 let field = Arc::new(Field::new_list_field(DataType::Int32, true));
4418 let f1 = Arc::new(FixedSizeListArray::new(
4419 Arc::clone(&field),
4420 3,
4421 Arc::new(Int32Array::from(vec![1, 2, 3])),
4422 None,
4423 ));
4424 let f2 = Arc::new(FixedSizeListArray::new(
4425 Arc::clone(&field),
4426 3,
4427 Arc::new(Int32Array::from(vec![4, 5, 6])),
4428 None,
4429 ));
4430 let f_nulls = Arc::new(FixedSizeListArray::new_null(field, 1, 1));
4431
4432 let scalars = vec![
4433 ScalarValue::FixedSizeList(Arc::clone(&f_nulls)),
4434 ScalarValue::FixedSizeList(f1),
4435 ScalarValue::FixedSizeList(f2),
4436 ScalarValue::FixedSizeList(f_nulls),
4437 ];
4438
4439 let array = ScalarValue::iter_to_array(scalars).unwrap();
4440
4441 let expected = FixedSizeListArray::from_iter_primitive::<Int32Type, _, _>(
4442 vec![
4443 None,
4444 Some(vec![Some(1), Some(2), Some(3)]),
4445 Some(vec![Some(4), Some(5), Some(6)]),
4446 None,
4447 ],
4448 3,
4449 );
4450 assert_eq!(array.as_ref(), &expected);
4451 }
4452
4453 #[test]
4454 fn test_iter_to_array_struct() {
4455 let s1 = StructArray::from(vec![
4456 (
4457 Arc::new(Field::new("A", DataType::Boolean, false)),
4458 Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
4459 ),
4460 (
4461 Arc::new(Field::new("B", DataType::Int32, false)),
4462 Arc::new(Int32Array::from(vec![42])) as ArrayRef,
4463 ),
4464 ]);
4465
4466 let s2 = StructArray::from(vec![
4467 (
4468 Arc::new(Field::new("A", DataType::Boolean, false)),
4469 Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
4470 ),
4471 (
4472 Arc::new(Field::new("B", DataType::Int32, false)),
4473 Arc::new(Int32Array::from(vec![42])) as ArrayRef,
4474 ),
4475 ]);
4476
4477 let scalars = vec![
4478 ScalarValue::Struct(Arc::new(s1)),
4479 ScalarValue::Struct(Arc::new(s2)),
4480 ];
4481
4482 let array = ScalarValue::iter_to_array(scalars).unwrap();
4483
4484 let expected = StructArray::from(vec![
4485 (
4486 Arc::new(Field::new("A", DataType::Boolean, false)),
4487 Arc::new(BooleanArray::from(vec![false, false])) as ArrayRef,
4488 ),
4489 (
4490 Arc::new(Field::new("B", DataType::Int32, false)),
4491 Arc::new(Int32Array::from(vec![42, 42])) as ArrayRef,
4492 ),
4493 ]);
4494 assert_eq!(array.as_ref(), &expected);
4495 }
4496
4497 #[test]
4498 fn test_iter_to_array_struct_with_nulls() {
4499 let s1 = StructArray::from((
4501 vec![
4502 (
4503 Arc::new(Field::new("A", DataType::Int32, false)),
4504 Arc::new(Int32Array::from(vec![1])) as ArrayRef,
4505 ),
4506 (
4507 Arc::new(Field::new("B", DataType::Int64, false)),
4508 Arc::new(Int64Array::from(vec![2])) as ArrayRef,
4509 ),
4510 ],
4511 Buffer::from(&[1]),
4513 ));
4514
4515 let s2 = StructArray::from((
4517 vec![
4518 (
4519 Arc::new(Field::new("A", DataType::Int32, false)),
4520 Arc::new(Int32Array::from(vec![3])) as ArrayRef,
4521 ),
4522 (
4523 Arc::new(Field::new("B", DataType::Int64, false)),
4524 Arc::new(Int64Array::from(vec![4])) as ArrayRef,
4525 ),
4526 ],
4527 Buffer::from(&[0]),
4528 ));
4529
4530 let scalars = vec![
4531 ScalarValue::Struct(Arc::new(s1)),
4532 ScalarValue::Struct(Arc::new(s2)),
4533 ];
4534
4535 let array = ScalarValue::iter_to_array(scalars).unwrap();
4536 let struct_array = array.as_struct();
4537 assert!(struct_array.is_valid(0));
4538 assert!(struct_array.is_null(1));
4539 }
4540
4541 #[test]
4542 fn iter_to_array_primitive_test() {
4543 let scalars = build_list::<i32>(vec![
4545 Some(vec![Some(1), Some(2), Some(3)]),
4546 None,
4547 Some(vec![Some(4), Some(5)]),
4548 ]);
4549
4550 let array = ScalarValue::iter_to_array(scalars).unwrap();
4551 let list_array = as_list_array(&array).unwrap();
4552 let expected = ListArray::from_iter_primitive::<Int64Type, _, _>(vec![
4554 Some(vec![Some(1), Some(2), Some(3)]),
4555 None,
4556 Some(vec![Some(4), Some(5)]),
4557 ]);
4558 assert_eq!(list_array, &expected);
4559
4560 let scalars = build_list::<i64>(vec![
4561 Some(vec![Some(1), Some(2), Some(3)]),
4562 None,
4563 Some(vec![Some(4), Some(5)]),
4564 ]);
4565
4566 let array = ScalarValue::iter_to_array(scalars).unwrap();
4567 let list_array = as_large_list_array(&array).unwrap();
4568 let expected = LargeListArray::from_iter_primitive::<Int64Type, _, _>(vec![
4569 Some(vec![Some(1), Some(2), Some(3)]),
4570 None,
4571 Some(vec![Some(4), Some(5)]),
4572 ]);
4573 assert_eq!(list_array, &expected);
4574 }
4575
4576 #[test]
4577 fn iter_to_array_string_test() {
4578 let arr1 = single_row_list_array(vec!["foo", "bar", "baz"]);
4579 let arr2 = single_row_list_array(vec!["rust", "world"]);
4580
4581 let scalars = vec![
4582 ScalarValue::List(Arc::new(arr1)),
4583 ScalarValue::List(Arc::new(arr2)),
4584 ];
4585
4586 let array = ScalarValue::iter_to_array(scalars).unwrap();
4587 let result = array.as_list::<i32>();
4588
4589 let string_builder = StringBuilder::with_capacity(5, 25);
4591 let mut list_of_string_builder = ListBuilder::new(string_builder);
4592
4593 list_of_string_builder.values().append_value("foo");
4594 list_of_string_builder.values().append_value("bar");
4595 list_of_string_builder.values().append_value("baz");
4596 list_of_string_builder.append(true);
4597
4598 list_of_string_builder.values().append_value("rust");
4599 list_of_string_builder.values().append_value("world");
4600 list_of_string_builder.append(true);
4601 let expected = list_of_string_builder.finish();
4602
4603 assert_eq!(result, &expected);
4604 }
4605
4606 #[test]
4607 fn test_list_scalar_eq_to_array() {
4608 let list_array: ArrayRef =
4609 Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
4610 Some(vec![Some(0), Some(1), Some(2)]),
4611 None,
4612 Some(vec![None, Some(5)]),
4613 ]));
4614
4615 let fsl_array: ArrayRef =
4616 Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
4617 Some(vec![Some(0), Some(1), Some(2)]),
4618 None,
4619 Some(vec![Some(3), None, Some(5)]),
4620 ]));
4621
4622 for arr in [list_array, fsl_array] {
4623 for i in 0..arr.len() {
4624 let scalar =
4625 ScalarValue::List(arr.slice(i, 1).as_list::<i32>().to_owned().into());
4626 assert!(scalar.eq_array(&arr, i).unwrap());
4627 }
4628 }
4629 }
4630
4631 #[test]
4632 fn test_eq_array_err_message() {
4633 assert_starts_with(
4634 ScalarValue::Utf8(Some("123".to_string()))
4635 .eq_array(&(Arc::new(Int32Array::from(vec![123])) as ArrayRef), 0)
4636 .unwrap_err()
4637 .message(),
4638 "could not cast array of type Int32 to arrow_array::array::byte_array::GenericByteArray<arrow_array::types::GenericStringType<i32>>",
4639 );
4640 }
4641
4642 #[test]
4643 fn scalar_add_trait_test() -> Result<()> {
4644 let float_value = ScalarValue::Float64(Some(123.));
4645 let float_value_2 = ScalarValue::Float64(Some(123.));
4646 assert_eq!(
4647 (float_value.add(&float_value_2))?,
4648 ScalarValue::Float64(Some(246.))
4649 );
4650 assert_eq!(
4651 (float_value.add(float_value_2))?,
4652 ScalarValue::Float64(Some(246.))
4653 );
4654 Ok(())
4655 }
4656
4657 #[test]
4658 fn scalar_sub_trait_test() -> Result<()> {
4659 let float_value = ScalarValue::Float64(Some(123.));
4660 let float_value_2 = ScalarValue::Float64(Some(123.));
4661 assert_eq!(
4662 float_value.sub(&float_value_2)?,
4663 ScalarValue::Float64(Some(0.))
4664 );
4665 assert_eq!(
4666 float_value.sub(float_value_2)?,
4667 ScalarValue::Float64(Some(0.))
4668 );
4669 Ok(())
4670 }
4671
4672 #[test]
4673 fn scalar_sub_trait_int32_test() -> Result<()> {
4674 let int_value = ScalarValue::Int32(Some(42));
4675 let int_value_2 = ScalarValue::Int32(Some(100));
4676 assert_eq!(int_value.sub(&int_value_2)?, ScalarValue::Int32(Some(-58)));
4677 assert_eq!(int_value_2.sub(int_value)?, ScalarValue::Int32(Some(58)));
4678 Ok(())
4679 }
4680
4681 #[test]
4682 fn scalar_sub_trait_int32_overflow_test() {
4683 let int_value = ScalarValue::Int32(Some(i32::MAX));
4684 let int_value_2 = ScalarValue::Int32(Some(i32::MIN));
4685 let err = int_value
4686 .sub_checked(&int_value_2)
4687 .unwrap_err()
4688 .strip_backtrace();
4689 assert_eq!(
4690 err,
4691 "Arrow error: Arithmetic overflow: Overflow happened on: 2147483647 - -2147483648"
4692 )
4693 }
4694
4695 #[test]
4696 fn scalar_sub_trait_int64_test() -> Result<()> {
4697 let int_value = ScalarValue::Int64(Some(42));
4698 let int_value_2 = ScalarValue::Int64(Some(100));
4699 assert_eq!(int_value.sub(&int_value_2)?, ScalarValue::Int64(Some(-58)));
4700 assert_eq!(int_value_2.sub(int_value)?, ScalarValue::Int64(Some(58)));
4701 Ok(())
4702 }
4703
4704 #[test]
4705 fn scalar_sub_trait_int64_overflow_test() {
4706 let int_value = ScalarValue::Int64(Some(i64::MAX));
4707 let int_value_2 = ScalarValue::Int64(Some(i64::MIN));
4708 let err = int_value
4709 .sub_checked(&int_value_2)
4710 .unwrap_err()
4711 .strip_backtrace();
4712 assert_eq!(err, "Arrow error: Arithmetic overflow: Overflow happened on: 9223372036854775807 - -9223372036854775808")
4713 }
4714
4715 #[test]
4716 fn scalar_add_overflow_test() -> Result<()> {
4717 check_scalar_add_overflow::<Int8Type>(
4718 ScalarValue::Int8(Some(i8::MAX)),
4719 ScalarValue::Int8(Some(i8::MAX)),
4720 );
4721 check_scalar_add_overflow::<UInt8Type>(
4722 ScalarValue::UInt8(Some(u8::MAX)),
4723 ScalarValue::UInt8(Some(u8::MAX)),
4724 );
4725 check_scalar_add_overflow::<Int16Type>(
4726 ScalarValue::Int16(Some(i16::MAX)),
4727 ScalarValue::Int16(Some(i16::MAX)),
4728 );
4729 check_scalar_add_overflow::<UInt16Type>(
4730 ScalarValue::UInt16(Some(u16::MAX)),
4731 ScalarValue::UInt16(Some(u16::MAX)),
4732 );
4733 check_scalar_add_overflow::<Int32Type>(
4734 ScalarValue::Int32(Some(i32::MAX)),
4735 ScalarValue::Int32(Some(i32::MAX)),
4736 );
4737 check_scalar_add_overflow::<UInt32Type>(
4738 ScalarValue::UInt32(Some(u32::MAX)),
4739 ScalarValue::UInt32(Some(u32::MAX)),
4740 );
4741 check_scalar_add_overflow::<Int64Type>(
4742 ScalarValue::Int64(Some(i64::MAX)),
4743 ScalarValue::Int64(Some(i64::MAX)),
4744 );
4745 check_scalar_add_overflow::<UInt64Type>(
4746 ScalarValue::UInt64(Some(u64::MAX)),
4747 ScalarValue::UInt64(Some(u64::MAX)),
4748 );
4749
4750 Ok(())
4751 }
4752
4753 fn check_scalar_add_overflow<T>(left: ScalarValue, right: ScalarValue)
4755 where
4756 T: ArrowNumericType,
4757 {
4758 let scalar_result = left.add_checked(&right);
4759
4760 let left_array = left.to_array().expect("Failed to convert to array");
4761 let right_array = right.to_array().expect("Failed to convert to array");
4762 let arrow_left_array = left_array.as_primitive::<T>();
4763 let arrow_right_array = right_array.as_primitive::<T>();
4764 let arrow_result = add(arrow_left_array, arrow_right_array);
4765
4766 assert_eq!(scalar_result.is_ok(), arrow_result.is_ok());
4767 }
4768
4769 #[test]
4770 fn test_interval_add_timestamp() -> Result<()> {
4771 let interval = ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano {
4772 months: 1,
4773 days: 2,
4774 nanoseconds: 3,
4775 }));
4776 let timestamp = ScalarValue::TimestampNanosecond(Some(123), None);
4777 let result = interval.add(×tamp)?;
4778 let expect = timestamp.add(&interval)?;
4779 assert_eq!(result, expect);
4780
4781 let interval = ScalarValue::IntervalYearMonth(Some(123));
4782 let timestamp = ScalarValue::TimestampNanosecond(Some(123), None);
4783 let result = interval.add(×tamp)?;
4784 let expect = timestamp.add(&interval)?;
4785 assert_eq!(result, expect);
4786
4787 let interval = ScalarValue::IntervalDayTime(Some(IntervalDayTime {
4788 days: 1,
4789 milliseconds: 23,
4790 }));
4791 let timestamp = ScalarValue::TimestampNanosecond(Some(123), None);
4792 let result = interval.add(×tamp)?;
4793 let expect = timestamp.add(&interval)?;
4794 assert_eq!(result, expect);
4795 Ok(())
4796 }
4797
4798 #[test]
4799 fn test_try_cmp() {
4800 assert_eq!(
4801 ScalarValue::try_cmp(
4802 &ScalarValue::Int32(Some(1)),
4803 &ScalarValue::Int32(Some(2))
4804 )
4805 .unwrap(),
4806 Ordering::Less
4807 );
4808 assert_eq!(
4809 ScalarValue::try_cmp(&ScalarValue::Int32(None), &ScalarValue::Int32(Some(2)))
4810 .unwrap(),
4811 Ordering::Less
4812 );
4813 assert_starts_with(
4814 ScalarValue::try_cmp(
4815 &ScalarValue::Int32(Some(1)),
4816 &ScalarValue::Int64(Some(2)),
4817 )
4818 .unwrap_err()
4819 .message(),
4820 "Uncomparable values: Int32(1), Int64(2)",
4821 );
4822 }
4823
4824 #[test]
4825 fn scalar_decimal_test() -> Result<()> {
4826 let decimal_value = ScalarValue::Decimal128(Some(123), 10, 1);
4827 assert_eq!(DataType::Decimal128(10, 1), decimal_value.data_type());
4828 let try_into_value: i128 = decimal_value.clone().try_into().unwrap();
4829 assert_eq!(123_i128, try_into_value);
4830 assert!(!decimal_value.is_null());
4831 let neg_decimal_value = decimal_value.arithmetic_negate()?;
4832 match neg_decimal_value {
4833 ScalarValue::Decimal128(v, _, _) => {
4834 assert_eq!(-123, v.unwrap());
4835 }
4836 _ => {
4837 unreachable!();
4838 }
4839 }
4840
4841 let array = decimal_value
4843 .to_array()
4844 .expect("Failed to convert to array");
4845 let array = as_decimal128_array(&array)?;
4846 assert_eq!(1, array.len());
4847 assert_eq!(DataType::Decimal128(10, 1), array.data_type().clone());
4848 assert_eq!(123i128, array.value(0));
4849
4850 let array = decimal_value
4852 .to_array_of_size(10)
4853 .expect("Failed to convert to array of size");
4854 let array_decimal = as_decimal128_array(&array)?;
4855 assert_eq!(10, array.len());
4856 assert_eq!(DataType::Decimal128(10, 1), array.data_type().clone());
4857 assert_eq!(123i128, array_decimal.value(0));
4858 assert_eq!(123i128, array_decimal.value(9));
4859 assert!(decimal_value
4861 .eq_array(&array, 1)
4862 .expect("Failed to compare arrays"));
4863 assert!(decimal_value
4864 .eq_array(&array, 5)
4865 .expect("Failed to compare arrays"));
4866 assert_eq!(
4868 decimal_value,
4869 ScalarValue::try_from_array(&array, 5).unwrap()
4870 );
4871
4872 assert_eq!(
4873 decimal_value,
4874 ScalarValue::try_new_decimal128(123, 10, 1).unwrap()
4875 );
4876
4877 let left = ScalarValue::Decimal128(Some(123), 10, 2);
4879 let right = ScalarValue::Decimal128(Some(124), 10, 2);
4880 assert!(!left.eq(&right));
4881 let result = left < right;
4882 assert!(result);
4883 let result = left <= right;
4884 assert!(result);
4885 let right = ScalarValue::Decimal128(Some(124), 10, 3);
4886 let result = left.partial_cmp(&right);
4888 assert_eq!(None, result);
4889
4890 let decimal_vec = vec![
4891 ScalarValue::Decimal128(Some(1), 10, 2),
4892 ScalarValue::Decimal128(Some(2), 10, 2),
4893 ScalarValue::Decimal128(Some(3), 10, 2),
4894 ];
4895 let array = ScalarValue::iter_to_array(decimal_vec).unwrap();
4897 assert_eq!(3, array.len());
4898 assert_eq!(DataType::Decimal128(10, 2), array.data_type().clone());
4899
4900 let decimal_vec = vec![
4901 ScalarValue::Decimal128(Some(1), 10, 2),
4902 ScalarValue::Decimal128(Some(2), 10, 2),
4903 ScalarValue::Decimal128(Some(3), 10, 2),
4904 ScalarValue::Decimal128(None, 10, 2),
4905 ];
4906 let array = ScalarValue::iter_to_array(decimal_vec).unwrap();
4907 assert_eq!(4, array.len());
4908 assert_eq!(DataType::Decimal128(10, 2), array.data_type().clone());
4909
4910 assert!(ScalarValue::try_new_decimal128(1, 10, 2)
4911 .unwrap()
4912 .eq_array(&array, 0)
4913 .expect("Failed to compare arrays"));
4914 assert!(ScalarValue::try_new_decimal128(2, 10, 2)
4915 .unwrap()
4916 .eq_array(&array, 1)
4917 .expect("Failed to compare arrays"));
4918 assert!(ScalarValue::try_new_decimal128(3, 10, 2)
4919 .unwrap()
4920 .eq_array(&array, 2)
4921 .expect("Failed to compare arrays"));
4922 assert_eq!(
4923 ScalarValue::Decimal128(None, 10, 2),
4924 ScalarValue::try_from_array(&array, 3).unwrap()
4925 );
4926
4927 Ok(())
4928 }
4929
4930 #[test]
4931 fn test_list_partial_cmp() {
4932 let a =
4933 ScalarValue::List(Arc::new(
4934 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4935 Some(1),
4936 Some(2),
4937 Some(3),
4938 ])]),
4939 ));
4940 let b =
4941 ScalarValue::List(Arc::new(
4942 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4943 Some(1),
4944 Some(2),
4945 Some(3),
4946 ])]),
4947 ));
4948 assert_eq!(a.partial_cmp(&b), Some(Ordering::Equal));
4949
4950 let a =
4951 ScalarValue::List(Arc::new(
4952 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4953 Some(10),
4954 Some(2),
4955 Some(3),
4956 ])]),
4957 ));
4958 let b =
4959 ScalarValue::List(Arc::new(
4960 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4961 Some(1),
4962 Some(2),
4963 Some(30),
4964 ])]),
4965 ));
4966 assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
4967
4968 let a =
4969 ScalarValue::List(Arc::new(
4970 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4971 Some(10),
4972 Some(2),
4973 Some(3),
4974 ])]),
4975 ));
4976 let b =
4977 ScalarValue::List(Arc::new(
4978 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4979 Some(10),
4980 Some(2),
4981 Some(30),
4982 ])]),
4983 ));
4984 assert_eq!(a.partial_cmp(&b), Some(Ordering::Less));
4985
4986 let a =
4987 ScalarValue::List(Arc::new(
4988 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4989 Some(1),
4990 Some(2),
4991 Some(3),
4992 ])]),
4993 ));
4994 let b =
4995 ScalarValue::List(Arc::new(
4996 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4997 Some(2),
4998 Some(3),
4999 ])]),
5000 ));
5001 assert_eq!(a.partial_cmp(&b), Some(Ordering::Less));
5002
5003 let a =
5004 ScalarValue::List(Arc::new(
5005 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
5006 Some(2),
5007 Some(3),
5008 Some(4),
5009 ])]),
5010 ));
5011 let b =
5012 ScalarValue::List(Arc::new(
5013 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
5014 Some(1),
5015 Some(2),
5016 ])]),
5017 ));
5018 assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
5019
5020 let a =
5021 ScalarValue::List(Arc::new(
5022 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
5023 Some(1),
5024 Some(2),
5025 Some(3),
5026 ])]),
5027 ));
5028 let b =
5029 ScalarValue::List(Arc::new(
5030 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
5031 Some(1),
5032 Some(2),
5033 ])]),
5034 ));
5035 assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
5036
5037 let a =
5038 ScalarValue::List(Arc::new(
5039 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
5040 None,
5041 Some(2),
5042 Some(3),
5043 ])]),
5044 ));
5045 let b =
5046 ScalarValue::List(Arc::new(
5047 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
5048 Some(1),
5049 Some(2),
5050 Some(3),
5051 ])]),
5052 ));
5053 assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
5054
5055 let a = ScalarValue::LargeList(Arc::new(LargeListArray::from_iter_primitive::<
5056 Int64Type,
5057 _,
5058 _,
5059 >(vec![Some(vec![
5060 None,
5061 Some(2),
5062 Some(3),
5063 ])])));
5064 let b = ScalarValue::LargeList(Arc::new(LargeListArray::from_iter_primitive::<
5065 Int64Type,
5066 _,
5067 _,
5068 >(vec![Some(vec![
5069 Some(1),
5070 Some(2),
5071 Some(3),
5072 ])])));
5073 assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
5074
5075 let a = ScalarValue::FixedSizeList(Arc::new(
5076 FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(
5077 vec![Some(vec![None, Some(2), Some(3)])],
5078 3,
5079 ),
5080 ));
5081 let b = ScalarValue::FixedSizeList(Arc::new(
5082 FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(
5083 vec![Some(vec![Some(1), Some(2), Some(3)])],
5084 3,
5085 ),
5086 ));
5087 assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
5088 }
5089
5090 #[test]
5091 fn scalar_value_to_array_u64() -> Result<()> {
5092 let value = ScalarValue::UInt64(Some(13u64));
5093 let array = value.to_array().expect("Failed to convert to array");
5094 let array = as_uint64_array(&array)?;
5095 assert_eq!(array.len(), 1);
5096 assert!(!array.is_null(0));
5097 assert_eq!(array.value(0), 13);
5098
5099 let value = ScalarValue::UInt64(None);
5100 let array = value.to_array().expect("Failed to convert to array");
5101 let array = as_uint64_array(&array)?;
5102 assert_eq!(array.len(), 1);
5103 assert!(array.is_null(0));
5104 Ok(())
5105 }
5106
5107 #[test]
5108 fn scalar_value_to_array_u32() -> Result<()> {
5109 let value = ScalarValue::UInt32(Some(13u32));
5110 let array = value.to_array().expect("Failed to convert to array");
5111 let array = as_uint32_array(&array)?;
5112 assert_eq!(array.len(), 1);
5113 assert!(!array.is_null(0));
5114 assert_eq!(array.value(0), 13);
5115
5116 let value = ScalarValue::UInt32(None);
5117 let array = value.to_array().expect("Failed to convert to array");
5118 let array = as_uint32_array(&array)?;
5119 assert_eq!(array.len(), 1);
5120 assert!(array.is_null(0));
5121 Ok(())
5122 }
5123
5124 #[test]
5125 fn scalar_list_null_to_array() {
5126 let list_array = ScalarValue::new_list_nullable(&[], &DataType::UInt64);
5127
5128 assert_eq!(list_array.len(), 1);
5129 assert_eq!(list_array.values().len(), 0);
5130 }
5131
5132 #[test]
5133 fn scalar_large_list_null_to_array() {
5134 let list_array = ScalarValue::new_large_list(&[], &DataType::UInt64);
5135
5136 assert_eq!(list_array.len(), 1);
5137 assert_eq!(list_array.values().len(), 0);
5138 }
5139
5140 #[test]
5141 fn scalar_list_to_array() -> Result<()> {
5142 let values = vec![
5143 ScalarValue::UInt64(Some(100)),
5144 ScalarValue::UInt64(None),
5145 ScalarValue::UInt64(Some(101)),
5146 ];
5147 let list_array = ScalarValue::new_list_nullable(&values, &DataType::UInt64);
5148 assert_eq!(list_array.len(), 1);
5149 assert_eq!(list_array.values().len(), 3);
5150
5151 let prim_array_ref = list_array.value(0);
5152 let prim_array = as_uint64_array(&prim_array_ref)?;
5153 assert_eq!(prim_array.len(), 3);
5154 assert_eq!(prim_array.value(0), 100);
5155 assert!(prim_array.is_null(1));
5156 assert_eq!(prim_array.value(2), 101);
5157 Ok(())
5158 }
5159
5160 #[test]
5161 fn scalar_large_list_to_array() -> Result<()> {
5162 let values = vec![
5163 ScalarValue::UInt64(Some(100)),
5164 ScalarValue::UInt64(None),
5165 ScalarValue::UInt64(Some(101)),
5166 ];
5167 let list_array = ScalarValue::new_large_list(&values, &DataType::UInt64);
5168 assert_eq!(list_array.len(), 1);
5169 assert_eq!(list_array.values().len(), 3);
5170
5171 let prim_array_ref = list_array.value(0);
5172 let prim_array = as_uint64_array(&prim_array_ref)?;
5173 assert_eq!(prim_array.len(), 3);
5174 assert_eq!(prim_array.value(0), 100);
5175 assert!(prim_array.is_null(1));
5176 assert_eq!(prim_array.value(2), 101);
5177 Ok(())
5178 }
5179
5180 macro_rules! check_scalar_iter {
5182 ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
5183 let scalars: Vec<_> =
5184 $INPUT.iter().map(|v| ScalarValue::$SCALAR_T(*v)).collect();
5185
5186 let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
5187
5188 let expected: ArrayRef = Arc::new($ARRAYTYPE::from($INPUT));
5189
5190 assert_eq!(&array, &expected);
5191 }};
5192 }
5193
5194 macro_rules! check_scalar_iter_tz {
5197 ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
5198 let scalars: Vec<_> = $INPUT
5199 .iter()
5200 .map(|v| ScalarValue::$SCALAR_T(*v, None))
5201 .collect();
5202
5203 let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
5204
5205 let expected: ArrayRef = Arc::new($ARRAYTYPE::from($INPUT));
5206
5207 assert_eq!(&array, &expected);
5208 }};
5209 }
5210
5211 macro_rules! check_scalar_iter_string {
5214 ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
5215 let scalars: Vec<_> = $INPUT
5216 .iter()
5217 .map(|v| ScalarValue::$SCALAR_T(v.map(|v| v.to_string())))
5218 .collect();
5219
5220 let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
5221
5222 let expected: ArrayRef = Arc::new($ARRAYTYPE::from($INPUT));
5223
5224 assert_eq!(&array, &expected);
5225 }};
5226 }
5227
5228 macro_rules! check_scalar_iter_binary {
5231 ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
5232 let scalars: Vec<_> = $INPUT
5233 .iter()
5234 .map(|v| ScalarValue::$SCALAR_T(v.map(|v| v.to_vec())))
5235 .collect();
5236
5237 let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
5238
5239 let expected: $ARRAYTYPE =
5240 $INPUT.iter().map(|v| v.map(|v| v.to_vec())).collect();
5241
5242 let expected: ArrayRef = Arc::new(expected);
5243
5244 assert_eq!(&array, &expected);
5245 }};
5246 }
5247
5248 #[test]
5249 #[allow(clippy::useless_vec)]
5251 fn scalar_iter_to_array_boolean() {
5252 check_scalar_iter!(Boolean, BooleanArray, vec![Some(true), None, Some(false)]);
5253 check_scalar_iter!(Float32, Float32Array, vec![Some(1.9), None, Some(-2.1)]);
5254 check_scalar_iter!(Float64, Float64Array, vec![Some(1.9), None, Some(-2.1)]);
5255
5256 check_scalar_iter!(Int8, Int8Array, vec![Some(1), None, Some(3)]);
5257 check_scalar_iter!(Int16, Int16Array, vec![Some(1), None, Some(3)]);
5258 check_scalar_iter!(Int32, Int32Array, vec![Some(1), None, Some(3)]);
5259 check_scalar_iter!(Int64, Int64Array, vec![Some(1), None, Some(3)]);
5260
5261 check_scalar_iter!(UInt8, UInt8Array, vec![Some(1), None, Some(3)]);
5262 check_scalar_iter!(UInt16, UInt16Array, vec![Some(1), None, Some(3)]);
5263 check_scalar_iter!(UInt32, UInt32Array, vec![Some(1), None, Some(3)]);
5264 check_scalar_iter!(UInt64, UInt64Array, vec![Some(1), None, Some(3)]);
5265
5266 check_scalar_iter_tz!(
5267 TimestampSecond,
5268 TimestampSecondArray,
5269 vec![Some(1), None, Some(3)]
5270 );
5271 check_scalar_iter_tz!(
5272 TimestampMillisecond,
5273 TimestampMillisecondArray,
5274 vec![Some(1), None, Some(3)]
5275 );
5276 check_scalar_iter_tz!(
5277 TimestampMicrosecond,
5278 TimestampMicrosecondArray,
5279 vec![Some(1), None, Some(3)]
5280 );
5281 check_scalar_iter_tz!(
5282 TimestampNanosecond,
5283 TimestampNanosecondArray,
5284 vec![Some(1), None, Some(3)]
5285 );
5286
5287 check_scalar_iter_string!(
5288 Utf8,
5289 StringArray,
5290 vec![Some("foo"), None, Some("bar")]
5291 );
5292 check_scalar_iter_string!(
5293 LargeUtf8,
5294 LargeStringArray,
5295 vec![Some("foo"), None, Some("bar")]
5296 );
5297 check_scalar_iter_binary!(
5298 Binary,
5299 BinaryArray,
5300 vec![Some(b"foo"), None, Some(b"bar")]
5301 );
5302 check_scalar_iter_binary!(
5303 LargeBinary,
5304 LargeBinaryArray,
5305 vec![Some(b"foo"), None, Some(b"bar")]
5306 );
5307 }
5308
5309 #[test]
5310 fn scalar_iter_to_array_empty() {
5311 let scalars = vec![] as Vec<ScalarValue>;
5312
5313 let result = ScalarValue::iter_to_array(scalars).unwrap_err();
5314 assert!(
5315 result
5316 .to_string()
5317 .contains("Empty iterator passed to ScalarValue::iter_to_array"),
5318 "{}",
5319 result
5320 );
5321 }
5322
5323 #[test]
5324 fn scalar_iter_to_dictionary() {
5325 fn make_val(v: Option<String>) -> ScalarValue {
5326 let key_type = DataType::Int32;
5327 let value = ScalarValue::Utf8(v);
5328 ScalarValue::Dictionary(Box::new(key_type), Box::new(value))
5329 }
5330
5331 let scalars = [
5332 make_val(Some("Foo".into())),
5333 make_val(None),
5334 make_val(Some("Bar".into())),
5335 ];
5336
5337 let array = ScalarValue::iter_to_array(scalars).unwrap();
5338 let array = as_dictionary_array::<Int32Type>(&array).unwrap();
5339 let values_array = as_string_array(array.values()).unwrap();
5340
5341 let values = array
5342 .keys_iter()
5343 .map(|k| {
5344 k.map(|k| {
5345 assert!(values_array.is_valid(k));
5346 values_array.value(k)
5347 })
5348 })
5349 .collect::<Vec<_>>();
5350
5351 let expected = vec![Some("Foo"), None, Some("Bar")];
5352 assert_eq!(values, expected);
5353 }
5354
5355 #[test]
5356 fn scalar_iter_to_array_mismatched_types() {
5357 use ScalarValue::*;
5358 let scalars = [Boolean(Some(true)), Int32(Some(5))];
5360
5361 let result = ScalarValue::iter_to_array(scalars).unwrap_err();
5362 assert!(result.to_string().contains("Inconsistent types in ScalarValue::iter_to_array. Expected Boolean, got Int32(5)"),
5363 "{}", result);
5364 }
5365
5366 #[test]
5367 fn scalar_try_from_array_null() {
5368 let array = vec![Some(33), None].into_iter().collect::<Int64Array>();
5369 let array: ArrayRef = Arc::new(array);
5370
5371 assert_eq!(
5372 ScalarValue::Int64(Some(33)),
5373 ScalarValue::try_from_array(&array, 0).unwrap()
5374 );
5375 assert_eq!(
5376 ScalarValue::Int64(None),
5377 ScalarValue::try_from_array(&array, 1).unwrap()
5378 );
5379 }
5380
5381 #[test]
5382 fn scalar_try_from_array_list_array_null() {
5383 let list = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
5384 Some(vec![Some(1), Some(2)]),
5385 None,
5386 ]);
5387
5388 let non_null_list_scalar = ScalarValue::try_from_array(&list, 0).unwrap();
5389 let null_list_scalar = ScalarValue::try_from_array(&list, 1).unwrap();
5390
5391 let data_type =
5392 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
5393
5394 assert_eq!(non_null_list_scalar.data_type(), data_type);
5395 assert_eq!(null_list_scalar.data_type(), data_type);
5396 }
5397
5398 #[test]
5399 fn scalar_try_from_list_datatypes() {
5400 let inner_field = Arc::new(Field::new_list_field(DataType::Int32, true));
5401
5402 let data_type = &DataType::List(Arc::clone(&inner_field));
5404 let scalar: ScalarValue = data_type.try_into().unwrap();
5405 let expected = ScalarValue::List(
5406 new_null_array(data_type, 1)
5407 .as_list::<i32>()
5408 .to_owned()
5409 .into(),
5410 );
5411 assert_eq!(expected, scalar);
5412 assert!(expected.is_null());
5413
5414 let data_type = &DataType::LargeList(Arc::clone(&inner_field));
5416 let scalar: ScalarValue = data_type.try_into().unwrap();
5417 let expected = ScalarValue::LargeList(
5418 new_null_array(data_type, 1)
5419 .as_list::<i64>()
5420 .to_owned()
5421 .into(),
5422 );
5423 assert_eq!(expected, scalar);
5424 assert!(expected.is_null());
5425
5426 let data_type = &DataType::FixedSizeList(Arc::clone(&inner_field), 5);
5428 let scalar: ScalarValue = data_type.try_into().unwrap();
5429 let expected = ScalarValue::FixedSizeList(
5430 new_null_array(data_type, 1)
5431 .as_fixed_size_list()
5432 .to_owned()
5433 .into(),
5434 );
5435 assert_eq!(expected, scalar);
5436 assert!(expected.is_null());
5437 }
5438
5439 #[test]
5440 fn scalar_try_from_list_of_list() {
5441 let data_type = DataType::List(Arc::new(Field::new_list_field(
5442 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
5443 true,
5444 )));
5445 let data_type = &data_type;
5446 let scalar: ScalarValue = data_type.try_into().unwrap();
5447
5448 let expected = ScalarValue::List(
5449 new_null_array(
5450 &DataType::List(Arc::new(Field::new_list_field(
5451 DataType::List(Arc::new(Field::new_list_field(
5452 DataType::Int32,
5453 true,
5454 ))),
5455 true,
5456 ))),
5457 1,
5458 )
5459 .as_list::<i32>()
5460 .to_owned()
5461 .into(),
5462 );
5463
5464 assert_eq!(expected, scalar)
5465 }
5466
5467 #[test]
5468 fn scalar_try_from_not_equal_list_nested_list() {
5469 let list_data_type =
5470 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
5471 let data_type = &list_data_type;
5472 let list_scalar: ScalarValue = data_type.try_into().unwrap();
5473
5474 let nested_list_data_type = DataType::List(Arc::new(Field::new_list_field(
5475 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
5476 true,
5477 )));
5478 let data_type = &nested_list_data_type;
5479 let nested_list_scalar: ScalarValue = data_type.try_into().unwrap();
5480
5481 assert_ne!(list_scalar, nested_list_scalar);
5482 }
5483
5484 #[test]
5485 fn scalar_try_from_dict_datatype() {
5486 let data_type =
5487 DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8));
5488 let data_type = &data_type;
5489 let expected = ScalarValue::Dictionary(
5490 Box::new(DataType::Int8),
5491 Box::new(ScalarValue::Utf8(None)),
5492 );
5493 assert_eq!(expected, data_type.try_into().unwrap())
5494 }
5495
5496 #[test]
5497 fn size_of_scalar() {
5498 assert_eq!(size_of::<ScalarValue>(), 64);
5509 }
5510
5511 #[test]
5512 fn memory_size() {
5513 let sv = ScalarValue::Binary(Some(Vec::with_capacity(10)));
5514 assert_eq!(sv.size(), size_of::<ScalarValue>() + 10,);
5515 let sv_size = sv.size();
5516
5517 let mut v = Vec::with_capacity(10);
5518 v.push(sv);
5520 assert_eq!(v.capacity(), 10);
5521 assert_eq!(
5522 ScalarValue::size_of_vec(&v),
5523 size_of::<Vec<ScalarValue>>() + (9 * size_of::<ScalarValue>()) + sv_size,
5524 );
5525
5526 let mut s = HashSet::with_capacity(0);
5527 s.insert(v.pop().unwrap());
5529 let s_capacity = s.capacity();
5531 assert_eq!(
5532 ScalarValue::size_of_hashset(&s),
5533 size_of::<HashSet<ScalarValue>>()
5534 + ((s_capacity - 1) * size_of::<ScalarValue>())
5535 + sv_size,
5536 );
5537 }
5538
5539 #[test]
5540 fn scalar_eq_array() {
5541 macro_rules! make_typed_vec {
5543 ($INPUT:expr, $TYPE:ident) => {{
5544 $INPUT
5545 .iter()
5546 .map(|v| v.map(|v| v as $TYPE))
5547 .collect::<Vec<_>>()
5548 }};
5549 }
5550
5551 let bool_vals = [Some(true), None, Some(false)];
5552 let f32_vals = [Some(-1.0), None, Some(1.0)];
5553 let f64_vals = make_typed_vec!(f32_vals, f64);
5554
5555 let i8_vals = [Some(-1), None, Some(1)];
5556 let i16_vals = make_typed_vec!(i8_vals, i16);
5557 let i32_vals = make_typed_vec!(i8_vals, i32);
5558 let i64_vals = make_typed_vec!(i8_vals, i64);
5559
5560 let u8_vals = [Some(0), None, Some(1)];
5561 let u16_vals = make_typed_vec!(u8_vals, u16);
5562 let u32_vals = make_typed_vec!(u8_vals, u32);
5563 let u64_vals = make_typed_vec!(u8_vals, u64);
5564
5565 let str_vals = [Some("foo"), None, Some("bar")];
5566
5567 let interval_dt_vals = [
5568 Some(IntervalDayTime::MINUS_ONE),
5569 None,
5570 Some(IntervalDayTime::ONE),
5571 ];
5572 let interval_mdn_vals = [
5573 Some(IntervalMonthDayNano::MINUS_ONE),
5574 None,
5575 Some(IntervalMonthDayNano::ONE),
5576 ];
5577
5578 #[derive(Debug)]
5582 struct TestCase {
5583 array: ArrayRef,
5584 scalars: Vec<ScalarValue>,
5585 }
5586
5587 macro_rules! make_test_case {
5589 ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident) => {{
5590 TestCase {
5591 array: Arc::new($INPUT.iter().collect::<$ARRAY_TY>()),
5592 scalars: $INPUT.iter().map(|v| ScalarValue::$SCALAR_TY(*v)).collect(),
5593 }
5594 }};
5595
5596 ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident, $TZ:expr) => {{
5597 let tz = $TZ;
5598 TestCase {
5599 array: Arc::new($INPUT.iter().collect::<$ARRAY_TY>()),
5600 scalars: $INPUT
5601 .iter()
5602 .map(|v| ScalarValue::$SCALAR_TY(*v, tz.clone()))
5603 .collect(),
5604 }
5605 }};
5606 }
5607
5608 macro_rules! make_str_test_case {
5609 ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident) => {{
5610 TestCase {
5611 array: Arc::new($INPUT.iter().cloned().collect::<$ARRAY_TY>()),
5612 scalars: $INPUT
5613 .iter()
5614 .map(|v| ScalarValue::$SCALAR_TY(v.map(|v| v.to_string())))
5615 .collect(),
5616 }
5617 }};
5618 }
5619
5620 macro_rules! make_binary_test_case {
5621 ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident) => {{
5622 TestCase {
5623 array: Arc::new($INPUT.iter().cloned().collect::<$ARRAY_TY>()),
5624 scalars: $INPUT
5625 .iter()
5626 .map(|v| {
5627 ScalarValue::$SCALAR_TY(v.map(|v| v.as_bytes().to_vec()))
5628 })
5629 .collect(),
5630 }
5631 }};
5632 }
5633
5634 macro_rules! make_str_dict_test_case {
5636 ($INPUT:expr, $INDEX_TY:ident) => {{
5637 TestCase {
5638 array: Arc::new(
5639 $INPUT
5640 .iter()
5641 .cloned()
5642 .collect::<DictionaryArray<$INDEX_TY>>(),
5643 ),
5644 scalars: $INPUT
5645 .iter()
5646 .map(|v| {
5647 ScalarValue::Dictionary(
5648 Box::new($INDEX_TY::DATA_TYPE),
5649 Box::new(ScalarValue::Utf8(v.map(|v| v.to_string()))),
5650 )
5651 })
5652 .collect(),
5653 }
5654 }};
5655 }
5656
5657 let cases = vec![
5658 make_test_case!(bool_vals, BooleanArray, Boolean),
5659 make_test_case!(f32_vals, Float32Array, Float32),
5660 make_test_case!(f64_vals, Float64Array, Float64),
5661 make_test_case!(i8_vals, Int8Array, Int8),
5662 make_test_case!(i16_vals, Int16Array, Int16),
5663 make_test_case!(i32_vals, Int32Array, Int32),
5664 make_test_case!(i64_vals, Int64Array, Int64),
5665 make_test_case!(u8_vals, UInt8Array, UInt8),
5666 make_test_case!(u16_vals, UInt16Array, UInt16),
5667 make_test_case!(u32_vals, UInt32Array, UInt32),
5668 make_test_case!(u64_vals, UInt64Array, UInt64),
5669 make_str_test_case!(str_vals, StringArray, Utf8),
5670 make_str_test_case!(str_vals, LargeStringArray, LargeUtf8),
5671 make_binary_test_case!(str_vals, BinaryArray, Binary),
5672 make_binary_test_case!(str_vals, LargeBinaryArray, LargeBinary),
5673 make_test_case!(i32_vals, Date32Array, Date32),
5674 make_test_case!(i64_vals, Date64Array, Date64),
5675 make_test_case!(i32_vals, Time32SecondArray, Time32Second),
5676 make_test_case!(i32_vals, Time32MillisecondArray, Time32Millisecond),
5677 make_test_case!(i64_vals, Time64MicrosecondArray, Time64Microsecond),
5678 make_test_case!(i64_vals, Time64NanosecondArray, Time64Nanosecond),
5679 make_test_case!(i64_vals, TimestampSecondArray, TimestampSecond, None),
5680 make_test_case!(
5681 i64_vals,
5682 TimestampSecondArray,
5683 TimestampSecond,
5684 Some("UTC".into())
5685 ),
5686 make_test_case!(
5687 i64_vals,
5688 TimestampMillisecondArray,
5689 TimestampMillisecond,
5690 None
5691 ),
5692 make_test_case!(
5693 i64_vals,
5694 TimestampMillisecondArray,
5695 TimestampMillisecond,
5696 Some("UTC".into())
5697 ),
5698 make_test_case!(
5699 i64_vals,
5700 TimestampMicrosecondArray,
5701 TimestampMicrosecond,
5702 None
5703 ),
5704 make_test_case!(
5705 i64_vals,
5706 TimestampMicrosecondArray,
5707 TimestampMicrosecond,
5708 Some("UTC".into())
5709 ),
5710 make_test_case!(
5711 i64_vals,
5712 TimestampNanosecondArray,
5713 TimestampNanosecond,
5714 None
5715 ),
5716 make_test_case!(
5717 i64_vals,
5718 TimestampNanosecondArray,
5719 TimestampNanosecond,
5720 Some("UTC".into())
5721 ),
5722 make_test_case!(i32_vals, IntervalYearMonthArray, IntervalYearMonth),
5723 make_test_case!(interval_dt_vals, IntervalDayTimeArray, IntervalDayTime),
5724 make_test_case!(
5725 interval_mdn_vals,
5726 IntervalMonthDayNanoArray,
5727 IntervalMonthDayNano
5728 ),
5729 make_str_dict_test_case!(str_vals, Int8Type),
5730 make_str_dict_test_case!(str_vals, Int16Type),
5731 make_str_dict_test_case!(str_vals, Int32Type),
5732 make_str_dict_test_case!(str_vals, Int64Type),
5733 make_str_dict_test_case!(str_vals, UInt8Type),
5734 make_str_dict_test_case!(str_vals, UInt16Type),
5735 make_str_dict_test_case!(str_vals, UInt32Type),
5736 make_str_dict_test_case!(str_vals, UInt64Type),
5737 ];
5738
5739 for case in cases {
5740 println!("**** Test Case *****");
5741 let TestCase { array, scalars } = case;
5742 println!("Input array type: {}", array.data_type());
5743 println!("Input scalars: {scalars:#?}");
5744 assert_eq!(array.len(), scalars.len());
5745
5746 for (index, scalar) in scalars.into_iter().enumerate() {
5747 assert!(
5748 scalar
5749 .eq_array(&array, index)
5750 .expect("Failed to compare arrays"),
5751 "Expected {scalar:?} to be equal to {array:?} at index {index}"
5752 );
5753
5754 for other_index in 0..array.len() {
5756 if index != other_index {
5757 assert!(
5758 !scalar.eq_array(&array, other_index).expect("Failed to compare arrays"),
5759 "Expected {scalar:?} to be NOT equal to {array:?} at index {other_index}"
5760 );
5761 }
5762 }
5763 }
5764 }
5765 }
5766
5767 #[test]
5768 fn scalar_partial_ordering() {
5769 use ScalarValue::*;
5770
5771 assert_eq!(
5772 Int64(Some(33)).partial_cmp(&Int64(Some(0))),
5773 Some(Ordering::Greater)
5774 );
5775 assert_eq!(
5776 Int64(Some(0)).partial_cmp(&Int64(Some(33))),
5777 Some(Ordering::Less)
5778 );
5779 assert_eq!(
5780 Int64(Some(33)).partial_cmp(&Int64(Some(33))),
5781 Some(Ordering::Equal)
5782 );
5783 assert_eq!(Int64(Some(33)).partial_cmp(&Int32(Some(33))), None);
5785 assert_eq!(Int32(Some(33)).partial_cmp(&Int64(Some(33))), None);
5786
5787 assert_eq!(
5788 ScalarValue::from(vec![
5789 ("A", ScalarValue::from(1.0)),
5790 ("B", ScalarValue::from("Z")),
5791 ])
5792 .partial_cmp(&ScalarValue::from(vec![
5793 ("A", ScalarValue::from(2.0)),
5794 ("B", ScalarValue::from("A")),
5795 ])),
5796 Some(Ordering::Less)
5797 );
5798
5799 assert_eq!(
5801 ScalarValue::from(vec![
5802 ("A", ScalarValue::from(1.0)),
5803 ("B", ScalarValue::from("Z")),
5804 ])
5805 .partial_cmp(&ScalarValue::from(vec![
5806 ("a", ScalarValue::from(2.0)),
5807 ("b", ScalarValue::from("A")),
5808 ])),
5809 None
5810 );
5811 }
5812
5813 #[test]
5814 fn test_scalar_value_from_string() {
5815 let scalar = ScalarValue::from("foo");
5816 assert_eq!(scalar, ScalarValue::Utf8(Some("foo".to_string())));
5817 let scalar = ScalarValue::from("foo".to_string());
5818 assert_eq!(scalar, ScalarValue::Utf8(Some("foo".to_string())));
5819 let scalar = ScalarValue::from_str("foo").unwrap();
5820 assert_eq!(scalar, ScalarValue::Utf8(Some("foo".to_string())));
5821 }
5822
5823 #[test]
5824 fn test_scalar_struct() {
5825 let field_a = Arc::new(Field::new("A", DataType::Int32, false));
5826 let field_b = Arc::new(Field::new("B", DataType::Boolean, false));
5827 let field_c = Arc::new(Field::new("C", DataType::Utf8, false));
5828
5829 let field_e = Arc::new(Field::new("e", DataType::Int16, false));
5830 let field_f = Arc::new(Field::new("f", DataType::Int64, false));
5831 let field_d = Arc::new(Field::new(
5832 "D",
5833 DataType::Struct(vec![Arc::clone(&field_e), Arc::clone(&field_f)].into()),
5834 false,
5835 ));
5836
5837 let struct_array = StructArray::from(vec![
5838 (
5839 Arc::clone(&field_e),
5840 Arc::new(Int16Array::from(vec![2])) as ArrayRef,
5841 ),
5842 (
5843 Arc::clone(&field_f),
5844 Arc::new(Int64Array::from(vec![3])) as ArrayRef,
5845 ),
5846 ]);
5847
5848 let struct_array = StructArray::from(vec![
5849 (
5850 Arc::clone(&field_a),
5851 Arc::new(Int32Array::from(vec![23])) as ArrayRef,
5852 ),
5853 (
5854 Arc::clone(&field_b),
5855 Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
5856 ),
5857 (
5858 Arc::clone(&field_c),
5859 Arc::new(StringArray::from(vec!["Hello"])) as ArrayRef,
5860 ),
5861 (Arc::clone(&field_d), Arc::new(struct_array) as ArrayRef),
5862 ]);
5863 let scalar = ScalarValue::Struct(Arc::new(struct_array));
5864
5865 let array = scalar
5866 .to_array_of_size(2)
5867 .expect("Failed to convert to array of size");
5868
5869 let expected = Arc::new(StructArray::from(vec![
5870 (
5871 Arc::clone(&field_a),
5872 Arc::new(Int32Array::from(vec![23, 23])) as ArrayRef,
5873 ),
5874 (
5875 Arc::clone(&field_b),
5876 Arc::new(BooleanArray::from(vec![false, false])) as ArrayRef,
5877 ),
5878 (
5879 Arc::clone(&field_c),
5880 Arc::new(StringArray::from(vec!["Hello", "Hello"])) as ArrayRef,
5881 ),
5882 (
5883 Arc::clone(&field_d),
5884 Arc::new(StructArray::from(vec![
5885 (
5886 Arc::clone(&field_e),
5887 Arc::new(Int16Array::from(vec![2, 2])) as ArrayRef,
5888 ),
5889 (
5890 Arc::clone(&field_f),
5891 Arc::new(Int64Array::from(vec![3, 3])) as ArrayRef,
5892 ),
5893 ])) as ArrayRef,
5894 ),
5895 ])) as ArrayRef;
5896
5897 assert_eq!(&array, &expected);
5898
5899 let constructed = ScalarValue::try_from_array(&expected, 1).unwrap();
5901 assert_eq!(constructed, scalar);
5902
5903 let none_scalar = ScalarValue::try_from(array.data_type()).unwrap();
5905 assert!(none_scalar.is_null());
5906 assert_eq!(
5907 format!("{none_scalar:?}"),
5908 String::from("Struct({A:,B:,C:,D:})")
5909 );
5910
5911 let constructed = ScalarValue::from(vec![
5913 ("A", ScalarValue::from(23)),
5914 ("B", ScalarValue::from(false)),
5915 ("C", ScalarValue::from("Hello")),
5916 (
5917 "D",
5918 ScalarValue::from(vec![
5919 ("e", ScalarValue::from(2i16)),
5920 ("f", ScalarValue::from(3i64)),
5921 ]),
5922 ),
5923 ]);
5924 assert_eq!(constructed, scalar);
5925
5926 let scalars = vec![
5928 ScalarValue::from(vec![
5929 ("A", ScalarValue::from(23)),
5930 ("B", ScalarValue::from(false)),
5931 ("C", ScalarValue::from("Hello")),
5932 (
5933 "D",
5934 ScalarValue::from(vec![
5935 ("e", ScalarValue::from(2i16)),
5936 ("f", ScalarValue::from(3i64)),
5937 ]),
5938 ),
5939 ]),
5940 ScalarValue::from(vec![
5941 ("A", ScalarValue::from(7)),
5942 ("B", ScalarValue::from(true)),
5943 ("C", ScalarValue::from("World")),
5944 (
5945 "D",
5946 ScalarValue::from(vec![
5947 ("e", ScalarValue::from(4i16)),
5948 ("f", ScalarValue::from(5i64)),
5949 ]),
5950 ),
5951 ]),
5952 ScalarValue::from(vec![
5953 ("A", ScalarValue::from(-1000)),
5954 ("B", ScalarValue::from(true)),
5955 ("C", ScalarValue::from("!!!!!")),
5956 (
5957 "D",
5958 ScalarValue::from(vec![
5959 ("e", ScalarValue::from(6i16)),
5960 ("f", ScalarValue::from(7i64)),
5961 ]),
5962 ),
5963 ]),
5964 ];
5965 let array = ScalarValue::iter_to_array(scalars).unwrap();
5966
5967 let expected = Arc::new(StructArray::from(vec![
5968 (
5969 Arc::clone(&field_a),
5970 Arc::new(Int32Array::from(vec![23, 7, -1000])) as ArrayRef,
5971 ),
5972 (
5973 Arc::clone(&field_b),
5974 Arc::new(BooleanArray::from(vec![false, true, true])) as ArrayRef,
5975 ),
5976 (
5977 Arc::clone(&field_c),
5978 Arc::new(StringArray::from(vec!["Hello", "World", "!!!!!"])) as ArrayRef,
5979 ),
5980 (
5981 Arc::clone(&field_d),
5982 Arc::new(StructArray::from(vec![
5983 (
5984 Arc::clone(&field_e),
5985 Arc::new(Int16Array::from(vec![2, 4, 6])) as ArrayRef,
5986 ),
5987 (
5988 Arc::clone(&field_f),
5989 Arc::new(Int64Array::from(vec![3, 5, 7])) as ArrayRef,
5990 ),
5991 ])) as ArrayRef,
5992 ),
5993 ])) as ArrayRef;
5994
5995 assert_eq!(&array, &expected);
5996 }
5997
5998 #[test]
5999 fn round_trip() {
6000 let cases: Vec<ArrayRef> = vec![
6002 Arc::new(Int8Array::from(vec![Some(1), None, Some(3)])),
6004 Arc::new(Int16Array::from(vec![Some(1), None, Some(3)])),
6005 Arc::new(Int32Array::from(vec![Some(1), None, Some(3)])),
6006 Arc::new(Int64Array::from(vec![Some(1), None, Some(3)])),
6007 Arc::new(UInt8Array::from(vec![Some(1), None, Some(3)])),
6008 Arc::new(UInt16Array::from(vec![Some(1), None, Some(3)])),
6009 Arc::new(UInt32Array::from(vec![Some(1), None, Some(3)])),
6010 Arc::new(UInt64Array::from(vec![Some(1), None, Some(3)])),
6011 Arc::new(BooleanArray::from(vec![Some(true), None, Some(false)])),
6013 Arc::new(Float32Array::from(vec![Some(1.0), None, Some(3.0)])),
6015 Arc::new(Float64Array::from(vec![Some(1.0), None, Some(3.0)])),
6016 Arc::new(StringArray::from(vec![Some("foo"), None, Some("bar")])),
6018 Arc::new(LargeStringArray::from(vec![Some("foo"), None, Some("bar")])),
6019 Arc::new(StringViewArray::from(vec![Some("foo"), None, Some("bar")])),
6020 {
6022 let mut builder = StringDictionaryBuilder::<Int32Type>::new();
6023 builder.append("foo").unwrap();
6024 builder.append_null();
6025 builder.append("bar").unwrap();
6026 Arc::new(builder.finish())
6027 },
6028 Arc::new(BinaryArray::from_iter(vec![
6030 Some(b"foo"),
6031 None,
6032 Some(b"bar"),
6033 ])),
6034 Arc::new(LargeBinaryArray::from_iter(vec![
6035 Some(b"foo"),
6036 None,
6037 Some(b"bar"),
6038 ])),
6039 Arc::new(BinaryViewArray::from_iter(vec![
6040 Some(b"foo"),
6041 None,
6042 Some(b"bar"),
6043 ])),
6044 Arc::new(TimestampSecondArray::from(vec![Some(1), None, Some(3)])),
6046 Arc::new(TimestampMillisecondArray::from(vec![
6047 Some(1),
6048 None,
6049 Some(3),
6050 ])),
6051 Arc::new(TimestampMicrosecondArray::from(vec![
6052 Some(1),
6053 None,
6054 Some(3),
6055 ])),
6056 Arc::new(TimestampNanosecondArray::from(vec![Some(1), None, Some(3)])),
6057 Arc::new(
6059 TimestampSecondArray::from(vec![Some(1), None, Some(3)])
6060 .with_timezone_opt(Some("UTC")),
6061 ),
6062 Arc::new(
6063 TimestampMillisecondArray::from(vec![Some(1), None, Some(3)])
6064 .with_timezone_opt(Some("UTC")),
6065 ),
6066 Arc::new(
6067 TimestampMicrosecondArray::from(vec![Some(1), None, Some(3)])
6068 .with_timezone_opt(Some("UTC")),
6069 ),
6070 Arc::new(
6071 TimestampNanosecondArray::from(vec![Some(1), None, Some(3)])
6072 .with_timezone_opt(Some("UTC")),
6073 ),
6074 Arc::new(Date32Array::from(vec![Some(1), None, Some(3)])),
6076 Arc::new(Date64Array::from(vec![Some(1), None, Some(3)])),
6077 Arc::new(Time32SecondArray::from(vec![Some(1), None, Some(3)])),
6079 Arc::new(Time32MillisecondArray::from(vec![Some(1), None, Some(3)])),
6080 Arc::new(Time64MicrosecondArray::from(vec![Some(1), None, Some(3)])),
6081 Arc::new(Time64NanosecondArray::from(vec![Some(1), None, Some(3)])),
6082 Arc::new(NullArray::new(3)),
6084 {
6086 let mut builder = UnionBuilder::new_dense();
6087 builder.append::<Int32Type>("a", 1).unwrap();
6088 builder.append::<Float64Type>("b", 3.4).unwrap();
6089 Arc::new(builder.build().unwrap())
6090 },
6091 {
6093 let mut builder = UnionBuilder::new_sparse();
6094 builder.append::<Int32Type>("a", 1).unwrap();
6095 builder.append::<Float64Type>("b", 3.4).unwrap();
6096 Arc::new(builder.build().unwrap())
6097 },
6098 {
6100 let values_builder = StringBuilder::new();
6101 let mut builder = ListBuilder::new(values_builder);
6102 builder.values().append_value("A");
6104 builder.values().append_value("B");
6105 builder.append(true);
6106 builder.append(true);
6108 builder.values().append_value("?"); builder.append(false);
6111 Arc::new(builder.finish())
6112 },
6113 {
6115 let values_builder = StringBuilder::new();
6116 let mut builder = LargeListBuilder::new(values_builder);
6117 builder.values().append_value("A");
6119 builder.values().append_value("B");
6120 builder.append(true);
6121 builder.append(true);
6123 builder.append(false);
6125 Arc::new(builder.finish())
6126 },
6127 {
6129 let values_builder = Int32Builder::new();
6130 let mut builder = FixedSizeListBuilder::new(values_builder, 3);
6131
6132 builder.values().append_value(0);
6134 builder.values().append_value(1);
6135 builder.values().append_value(2);
6136 builder.append(true);
6137 builder.values().append_null();
6138 builder.values().append_null();
6139 builder.values().append_null();
6140 builder.append(false);
6141 builder.values().append_value(3);
6142 builder.values().append_null();
6143 builder.values().append_value(5);
6144 builder.append(true);
6145 Arc::new(builder.finish())
6146 },
6147 {
6149 let string_builder = StringBuilder::new();
6150 let int_builder = Int32Builder::with_capacity(4);
6151
6152 let mut builder = MapBuilder::new(None, string_builder, int_builder);
6153 builder.keys().append_value("joe");
6155 builder.values().append_value(1);
6156 builder.append(true).unwrap();
6157 builder.append(true).unwrap();
6159 builder.append(false).unwrap();
6161
6162 Arc::new(builder.finish())
6163 },
6164 ];
6165
6166 for arr in cases {
6167 round_trip_through_scalar(arr);
6168 }
6169 }
6170
6171 fn round_trip_through_scalar(arr: ArrayRef) {
6176 for i in 0..arr.len() {
6177 let scalar = ScalarValue::try_from_array(&arr, i).unwrap();
6179 let array = scalar.to_array_of_size(1).unwrap();
6180 assert_eq!(array.len(), 1);
6181 assert_eq!(array.data_type(), arr.data_type());
6182 assert_eq!(array.as_ref(), arr.slice(i, 1).as_ref());
6183 }
6184 }
6185
6186 #[test]
6187 fn test_scalar_union_sparse() {
6188 let field_a = Arc::new(Field::new("A", DataType::Int32, true));
6189 let field_b = Arc::new(Field::new("B", DataType::Boolean, true));
6190 let field_c = Arc::new(Field::new("C", DataType::Utf8, true));
6191 let fields = UnionFields::from_iter([(0, field_a), (1, field_b), (2, field_c)]);
6192
6193 let mut values_a = vec![None; 6];
6194 values_a[0] = Some(42);
6195 let mut values_b = vec![None; 6];
6196 values_b[1] = Some(true);
6197 let mut values_c = vec![None; 6];
6198 values_c[2] = Some("foo");
6199 let children: Vec<ArrayRef> = vec![
6200 Arc::new(Int32Array::from(values_a)),
6201 Arc::new(BooleanArray::from(values_b)),
6202 Arc::new(StringArray::from(values_c)),
6203 ];
6204
6205 let type_ids = ScalarBuffer::from(vec![0, 1, 2, 0, 1, 2]);
6206 let array: ArrayRef = Arc::new(
6207 UnionArray::try_new(fields.clone(), type_ids, None, children)
6208 .expect("UnionArray"),
6209 );
6210
6211 let expected = [
6212 (0, ScalarValue::from(42)),
6213 (1, ScalarValue::from(true)),
6214 (2, ScalarValue::from("foo")),
6215 (0, ScalarValue::Int32(None)),
6216 (1, ScalarValue::Boolean(None)),
6217 (2, ScalarValue::Utf8(None)),
6218 ];
6219
6220 for (i, (ti, value)) in expected.into_iter().enumerate() {
6221 let is_null = value.is_null();
6222 let value = Some((ti, Box::new(value)));
6223 let expected = ScalarValue::Union(value, fields.clone(), UnionMode::Sparse);
6224 let actual = ScalarValue::try_from_array(&array, i).expect("try_from_array");
6225
6226 assert_eq!(
6227 actual, expected,
6228 "[{i}] {actual} was not equal to {expected}"
6229 );
6230
6231 assert!(
6232 expected.eq_array(&array, i).expect("eq_array"),
6233 "[{i}] {expected}.eq_array was false"
6234 );
6235
6236 if is_null {
6237 assert!(actual.is_null(), "[{i}] {actual} was not null")
6238 }
6239 }
6240 }
6241
6242 #[test]
6243 fn test_scalar_union_dense() {
6244 let field_a = Arc::new(Field::new("A", DataType::Int32, true));
6245 let field_b = Arc::new(Field::new("B", DataType::Boolean, true));
6246 let field_c = Arc::new(Field::new("C", DataType::Utf8, true));
6247 let fields = UnionFields::from_iter([(0, field_a), (1, field_b), (2, field_c)]);
6248 let children: Vec<ArrayRef> = vec![
6249 Arc::new(Int32Array::from(vec![Some(42), None])),
6250 Arc::new(BooleanArray::from(vec![Some(true), None])),
6251 Arc::new(StringArray::from(vec![Some("foo"), None])),
6252 ];
6253
6254 let type_ids = ScalarBuffer::from(vec![0, 1, 2, 0, 1, 2]);
6255 let offsets = ScalarBuffer::from(vec![0, 0, 0, 1, 1, 1]);
6256 let array: ArrayRef = Arc::new(
6257 UnionArray::try_new(fields.clone(), type_ids, Some(offsets), children)
6258 .expect("UnionArray"),
6259 );
6260
6261 let expected = [
6262 (0, ScalarValue::from(42)),
6263 (1, ScalarValue::from(true)),
6264 (2, ScalarValue::from("foo")),
6265 (0, ScalarValue::Int32(None)),
6266 (1, ScalarValue::Boolean(None)),
6267 (2, ScalarValue::Utf8(None)),
6268 ];
6269
6270 for (i, (ti, value)) in expected.into_iter().enumerate() {
6271 let is_null = value.is_null();
6272 let value = Some((ti, Box::new(value)));
6273 let expected = ScalarValue::Union(value, fields.clone(), UnionMode::Dense);
6274 let actual = ScalarValue::try_from_array(&array, i).expect("try_from_array");
6275
6276 assert_eq!(
6277 actual, expected,
6278 "[{i}] {actual} was not equal to {expected}"
6279 );
6280
6281 assert!(
6282 expected.eq_array(&array, i).expect("eq_array"),
6283 "[{i}] {expected}.eq_array was false"
6284 );
6285
6286 if is_null {
6287 assert!(actual.is_null(), "[{i}] {actual} was not null")
6288 }
6289 }
6290 }
6291
6292 #[test]
6293 fn test_lists_in_struct() {
6294 let field_a = Arc::new(Field::new("A", DataType::Utf8, false));
6295 let field_primitive_list = Arc::new(Field::new(
6296 "primitive_list",
6297 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
6298 false,
6299 ));
6300
6301 let l0 =
6303 ScalarValue::List(Arc::new(
6304 ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
6305 Some(1),
6306 Some(2),
6307 Some(3),
6308 ])]),
6309 ));
6310 let l1 =
6311 ScalarValue::List(Arc::new(
6312 ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
6313 Some(4),
6314 Some(5),
6315 ])]),
6316 ));
6317 let l2 = ScalarValue::List(Arc::new(ListArray::from_iter_primitive::<
6318 Int32Type,
6319 _,
6320 _,
6321 >(vec![Some(vec![Some(6)])])));
6322
6323 let s0 = ScalarValue::from(vec![
6325 ("A", ScalarValue::from("First")),
6326 ("primitive_list", l0),
6327 ]);
6328
6329 let s1 = ScalarValue::from(vec![
6330 ("A", ScalarValue::from("Second")),
6331 ("primitive_list", l1),
6332 ]);
6333
6334 let s2 = ScalarValue::from(vec![
6335 ("A", ScalarValue::from("Third")),
6336 ("primitive_list", l2),
6337 ]);
6338
6339 let array =
6341 ScalarValue::iter_to_array(vec![s0.clone(), s1.clone(), s2.clone()]).unwrap();
6342
6343 let array = as_struct_array(&array).unwrap();
6344 let expected = StructArray::from(vec![
6345 (
6346 Arc::clone(&field_a),
6347 Arc::new(StringArray::from(vec!["First", "Second", "Third"])) as ArrayRef,
6348 ),
6349 (
6350 Arc::clone(&field_primitive_list),
6351 Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
6352 Some(vec![Some(1), Some(2), Some(3)]),
6353 Some(vec![Some(4), Some(5)]),
6354 Some(vec![Some(6)]),
6355 ])),
6356 ),
6357 ]);
6358
6359 assert_eq!(array, &expected);
6360
6361 let nl0_array = ScalarValue::iter_to_array(vec![s0, s1.clone()]).unwrap();
6364 let nl0 = SingleRowListArrayBuilder::new(nl0_array).build_list_scalar();
6365
6366 let nl1_array = ScalarValue::iter_to_array(vec![s2]).unwrap();
6367 let nl1 = SingleRowListArrayBuilder::new(nl1_array).build_list_scalar();
6368
6369 let nl2_array = ScalarValue::iter_to_array(vec![s1]).unwrap();
6370 let nl2 = SingleRowListArrayBuilder::new(nl2_array).build_list_scalar();
6371
6372 let array = ScalarValue::iter_to_array(vec![nl0, nl1, nl2]).unwrap();
6374 let array = array.as_list::<i32>();
6375
6376 let field_a_builder = StringBuilder::with_capacity(4, 1024);
6378 let primitive_value_builder = Int32Array::builder(8);
6379 let field_primitive_list_builder = ListBuilder::new(primitive_value_builder);
6380
6381 let element_builder = StructBuilder::new(
6382 vec![field_a, field_primitive_list],
6383 vec![
6384 Box::new(field_a_builder),
6385 Box::new(field_primitive_list_builder),
6386 ],
6387 );
6388
6389 let mut list_builder = ListBuilder::new(element_builder);
6390
6391 list_builder
6392 .values()
6393 .field_builder::<StringBuilder>(0)
6394 .unwrap()
6395 .append_value("First");
6396 list_builder
6397 .values()
6398 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6399 .unwrap()
6400 .values()
6401 .append_value(1);
6402 list_builder
6403 .values()
6404 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6405 .unwrap()
6406 .values()
6407 .append_value(2);
6408 list_builder
6409 .values()
6410 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6411 .unwrap()
6412 .values()
6413 .append_value(3);
6414 list_builder
6415 .values()
6416 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6417 .unwrap()
6418 .append(true);
6419 list_builder.values().append(true);
6420
6421 list_builder
6422 .values()
6423 .field_builder::<StringBuilder>(0)
6424 .unwrap()
6425 .append_value("Second");
6426 list_builder
6427 .values()
6428 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6429 .unwrap()
6430 .values()
6431 .append_value(4);
6432 list_builder
6433 .values()
6434 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6435 .unwrap()
6436 .values()
6437 .append_value(5);
6438 list_builder
6439 .values()
6440 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6441 .unwrap()
6442 .append(true);
6443 list_builder.values().append(true);
6444 list_builder.append(true);
6445
6446 list_builder
6447 .values()
6448 .field_builder::<StringBuilder>(0)
6449 .unwrap()
6450 .append_value("Third");
6451 list_builder
6452 .values()
6453 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6454 .unwrap()
6455 .values()
6456 .append_value(6);
6457 list_builder
6458 .values()
6459 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6460 .unwrap()
6461 .append(true);
6462 list_builder.values().append(true);
6463 list_builder.append(true);
6464
6465 list_builder
6466 .values()
6467 .field_builder::<StringBuilder>(0)
6468 .unwrap()
6469 .append_value("Second");
6470 list_builder
6471 .values()
6472 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6473 .unwrap()
6474 .values()
6475 .append_value(4);
6476 list_builder
6477 .values()
6478 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6479 .unwrap()
6480 .values()
6481 .append_value(5);
6482 list_builder
6483 .values()
6484 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6485 .unwrap()
6486 .append(true);
6487 list_builder.values().append(true);
6488 list_builder.append(true);
6489
6490 let expected = list_builder.finish();
6491
6492 assert_eq!(array, &expected);
6493 }
6494
6495 fn build_2d_list(data: Vec<Option<i32>>) -> ListArray {
6496 let a1 = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(data)]);
6497 ListArray::new(
6498 Arc::new(Field::new_list_field(
6499 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
6500 true,
6501 )),
6502 OffsetBuffer::<i32>::from_lengths([1]),
6503 Arc::new(a1),
6504 None,
6505 )
6506 }
6507
6508 #[test]
6509 fn test_nested_lists() {
6510 let arr1 = build_2d_list(vec![Some(1), Some(2), Some(3)]);
6512 let arr2 = build_2d_list(vec![Some(4), Some(5)]);
6513 let arr3 = build_2d_list(vec![Some(6)]);
6514
6515 let array = ScalarValue::iter_to_array(vec![
6516 ScalarValue::List(Arc::new(arr1)),
6517 ScalarValue::List(Arc::new(arr2)),
6518 ScalarValue::List(Arc::new(arr3)),
6519 ])
6520 .unwrap();
6521 let array = array.as_list::<i32>();
6522
6523 let inner_builder = Int32Array::builder(6);
6525 let middle_builder = ListBuilder::new(inner_builder);
6526 let mut outer_builder = ListBuilder::new(middle_builder);
6527
6528 outer_builder.values().values().append_value(1);
6529 outer_builder.values().values().append_value(2);
6530 outer_builder.values().values().append_value(3);
6531 outer_builder.values().append(true);
6532 outer_builder.append(true);
6533
6534 outer_builder.values().values().append_value(4);
6535 outer_builder.values().values().append_value(5);
6536 outer_builder.values().append(true);
6537 outer_builder.append(true);
6538
6539 outer_builder.values().values().append_value(6);
6540 outer_builder.values().append(true);
6541 outer_builder.append(true);
6542
6543 let expected = outer_builder.finish();
6544
6545 assert_eq!(array, &expected);
6546 }
6547
6548 #[test]
6549 fn scalar_timestamp_ns_utc_timezone() {
6550 let scalar = ScalarValue::TimestampNanosecond(
6551 Some(1599566400000000000),
6552 Some("UTC".into()),
6553 );
6554
6555 assert_eq!(
6556 scalar.data_type(),
6557 DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()))
6558 );
6559
6560 let array = scalar.to_array().expect("Failed to convert to array");
6561 assert_eq!(array.len(), 1);
6562 assert_eq!(
6563 array.data_type(),
6564 &DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()))
6565 );
6566
6567 let new_scalar = ScalarValue::try_from_array(&array, 0).unwrap();
6568 assert_eq!(
6569 new_scalar.data_type(),
6570 DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()))
6571 );
6572 }
6573
6574 #[test]
6575 fn cast_round_trip() {
6576 check_scalar_cast(ScalarValue::Int8(Some(5)), DataType::Int16);
6577 check_scalar_cast(ScalarValue::Int8(None), DataType::Int16);
6578
6579 check_scalar_cast(ScalarValue::Float64(Some(5.5)), DataType::Int16);
6580
6581 check_scalar_cast(ScalarValue::Float64(None), DataType::Int16);
6582
6583 check_scalar_cast(
6584 ScalarValue::from("foo"),
6585 DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
6586 );
6587
6588 check_scalar_cast(
6589 ScalarValue::Utf8(None),
6590 DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
6591 );
6592
6593 check_scalar_cast(ScalarValue::Utf8(None), DataType::Utf8View);
6594 check_scalar_cast(ScalarValue::from("foo"), DataType::Utf8View);
6595 check_scalar_cast(
6596 ScalarValue::from("larger than 12 bytes string"),
6597 DataType::Utf8View,
6598 );
6599 check_scalar_cast(
6600 {
6601 let element_field =
6602 Arc::new(Field::new("element", DataType::Int32, true));
6603
6604 let mut builder =
6605 ListBuilder::new(Int32Builder::new()).with_field(element_field);
6606 builder.append_value([Some(1)]);
6607 builder.append(true);
6608
6609 ScalarValue::List(Arc::new(builder.finish()))
6610 },
6611 DataType::List(Arc::new(Field::new("element", DataType::Int64, true))),
6612 );
6613 check_scalar_cast(
6614 {
6615 let element_field =
6616 Arc::new(Field::new("element", DataType::Int32, true));
6617
6618 let mut builder = FixedSizeListBuilder::new(Int32Builder::new(), 1)
6619 .with_field(element_field);
6620 builder.values().append_value(1);
6621 builder.append(true);
6622
6623 ScalarValue::FixedSizeList(Arc::new(builder.finish()))
6624 },
6625 DataType::FixedSizeList(
6626 Arc::new(Field::new("element", DataType::Int64, true)),
6627 1,
6628 ),
6629 );
6630 check_scalar_cast(
6631 {
6632 let element_field =
6633 Arc::new(Field::new("element", DataType::Int32, true));
6634
6635 let mut builder =
6636 LargeListBuilder::new(Int32Builder::new()).with_field(element_field);
6637 builder.append_value([Some(1)]);
6638 builder.append(true);
6639
6640 ScalarValue::LargeList(Arc::new(builder.finish()))
6641 },
6642 DataType::LargeList(Arc::new(Field::new("element", DataType::Int64, true))),
6643 );
6644 }
6645
6646 fn check_scalar_cast(scalar: ScalarValue, desired_type: DataType) {
6648 let scalar_array = scalar.to_array().expect("Failed to convert to array");
6650 let cast_array = kernels::cast::cast(&scalar_array, &desired_type).unwrap();
6652
6653 let cast_scalar = ScalarValue::try_from_array(&cast_array, 0).unwrap();
6655 assert_eq!(cast_scalar.data_type(), desired_type);
6656
6657 let array = cast_scalar
6659 .to_array_of_size(10)
6660 .expect("Failed to convert to array of size");
6661
6662 assert_eq!(array.data_type(), &desired_type)
6664 }
6665
6666 #[test]
6667 fn test_scalar_negative() -> Result<()> {
6668 let value = ScalarValue::Int32(Some(12));
6670 assert_eq!(ScalarValue::Int32(Some(-12)), value.arithmetic_negate()?);
6671 let value = ScalarValue::Int32(None);
6672 assert_eq!(ScalarValue::Int32(None), value.arithmetic_negate()?);
6673
6674 let value = ScalarValue::UInt8(Some(12));
6676 assert!(value.arithmetic_negate().is_err());
6677 let value = ScalarValue::Boolean(None);
6678 assert!(value.arithmetic_negate().is_err());
6679 Ok(())
6680 }
6681
6682 #[test]
6683 #[allow(arithmetic_overflow)] fn test_scalar_negative_overflows() -> Result<()> {
6685 macro_rules! test_overflow_on_value {
6686 ($($val:expr),* $(,)?) => {$(
6687 {
6688 let value: ScalarValue = $val;
6689 let err = value.arithmetic_negate().expect_err("Should receive overflow error on negating {value:?}");
6690 let root_err = err.find_root();
6691 match root_err{
6692 DataFusionError::ArrowError(err, _) if matches!(err.as_ref(), ArrowError::ArithmeticOverflow(_)) => {}
6693 _ => return Err(err),
6694 };
6695 }
6696 )*};
6697 }
6698 test_overflow_on_value!(
6699 i8::MIN.into(),
6701 i16::MIN.into(),
6702 i32::MIN.into(),
6703 i64::MIN.into(),
6704 ScalarValue::try_new_decimal128(i128::MIN, 10, 5)?,
6706 ScalarValue::Decimal256(Some(i256::MIN), 20, 5),
6707 ScalarValue::IntervalYearMonth(Some(i32::MIN)),
6709 ScalarValue::new_interval_dt(i32::MIN, 999),
6710 ScalarValue::new_interval_dt(1, i32::MIN),
6711 ScalarValue::new_interval_mdn(i32::MIN, 15, 123_456),
6712 ScalarValue::new_interval_mdn(12, i32::MIN, 123_456),
6713 ScalarValue::new_interval_mdn(12, 15, i64::MIN),
6714 ScalarValue::TimestampSecond(Some(i64::MIN), None),
6716 ScalarValue::TimestampMillisecond(Some(i64::MIN), None),
6717 ScalarValue::TimestampMicrosecond(Some(i64::MIN), None),
6718 ScalarValue::TimestampNanosecond(Some(i64::MIN), None),
6719 );
6720
6721 let float_cases = [
6722 (
6723 ScalarValue::Float16(Some(f16::MIN)),
6724 ScalarValue::Float16(Some(f16::MAX)),
6725 ),
6726 (
6727 ScalarValue::Float16(Some(f16::MAX)),
6728 ScalarValue::Float16(Some(f16::MIN)),
6729 ),
6730 (f32::MIN.into(), f32::MAX.into()),
6731 (f32::MAX.into(), f32::MIN.into()),
6732 (f64::MIN.into(), f64::MAX.into()),
6733 (f64::MAX.into(), f64::MIN.into()),
6734 ];
6735 for (test, expected) in float_cases.into_iter().skip(2) {
6737 assert_eq!(test.arithmetic_negate()?, expected);
6738 }
6739 Ok(())
6740 }
6741
6742 #[test]
6743 fn f16_test_overflow() {
6744 let cases = [
6746 (
6747 ScalarValue::Float16(Some(f16::MIN)),
6748 ScalarValue::Float16(Some(f16::MAX)),
6749 ),
6750 (
6751 ScalarValue::Float16(Some(f16::MAX)),
6752 ScalarValue::Float16(Some(f16::MIN)),
6753 ),
6754 ];
6755
6756 for (test, expected) in cases {
6757 assert_eq!(test.arithmetic_negate().unwrap(), expected);
6758 }
6759 }
6760
6761 macro_rules! expect_operation_error {
6762 ($TEST_NAME:ident, $FUNCTION:ident, $EXPECTED_ERROR:expr) => {
6763 #[test]
6764 fn $TEST_NAME() {
6765 let lhs = ScalarValue::UInt64(Some(12));
6766 let rhs = ScalarValue::Int32(Some(-3));
6767 match lhs.$FUNCTION(&rhs) {
6768 Ok(_result) => {
6769 panic!(
6770 "Expected binary operation error between lhs: '{:?}', rhs: {:?}",
6771 lhs, rhs
6772 );
6773 }
6774 Err(e) => {
6775 let error_message = e.to_string();
6776 assert!(
6777 error_message.contains($EXPECTED_ERROR),
6778 "Expected error '{}' not found in actual error '{}'",
6779 $EXPECTED_ERROR,
6780 error_message
6781 );
6782 }
6783 }
6784 }
6785 };
6786 }
6787
6788 expect_operation_error!(
6789 expect_add_error,
6790 add,
6791 "Invalid arithmetic operation: UInt64 + Int32"
6792 );
6793 expect_operation_error!(
6794 expect_sub_error,
6795 sub,
6796 "Invalid arithmetic operation: UInt64 - Int32"
6797 );
6798
6799 macro_rules! decimal_op_test_cases {
6800 ($OPERATION:ident, [$([$L_VALUE:expr, $L_PRECISION:expr, $L_SCALE:expr, $R_VALUE:expr, $R_PRECISION:expr, $R_SCALE:expr, $O_VALUE:expr, $O_PRECISION:expr, $O_SCALE:expr]),+]) => {
6801 $(
6802
6803 let left = ScalarValue::Decimal128($L_VALUE, $L_PRECISION, $L_SCALE);
6804 let right = ScalarValue::Decimal128($R_VALUE, $R_PRECISION, $R_SCALE);
6805 let result = left.$OPERATION(&right).unwrap();
6806 assert_eq!(ScalarValue::Decimal128($O_VALUE, $O_PRECISION, $O_SCALE), result);
6807
6808 )+
6809 };
6810 }
6811
6812 #[test]
6813 fn decimal_operations() {
6814 decimal_op_test_cases!(
6815 add,
6816 [
6817 [Some(123), 10, 2, Some(124), 10, 2, Some(123 + 124), 11, 2],
6818 [
6820 Some(123),
6821 10,
6822 3,
6823 Some(124),
6824 10,
6825 2,
6826 Some(123 + 124 * 10_i128.pow(1)),
6827 12,
6828 3
6829 ],
6830 [
6832 Some(123),
6833 10,
6834 2,
6835 Some(124),
6836 11,
6837 3,
6838 Some(123 * 10_i128.pow(3 - 2) + 124),
6839 12,
6840 3
6841 ]
6842 ]
6843 );
6844 }
6845
6846 #[test]
6847 fn decimal_operations_with_nulls() {
6848 decimal_op_test_cases!(
6849 add,
6850 [
6851 [None, 10, 2, Some(123), 10, 2, None, 11, 2],
6853 [Some(123), 10, 2, None, 10, 2, None, 11, 2],
6855 [Some(123), 8, 2, None, 10, 3, None, 11, 3],
6857 [None, 8, 2, Some(123), 10, 3, None, 11, 3],
6859 [Some(123), 8, 4, None, 10, 3, None, 12, 4],
6861 [None, 10, 3, Some(123), 8, 4, None, 12, 4]
6863 ]
6864 );
6865 }
6866
6867 #[test]
6868 fn test_scalar_distance() {
6869 let cases = [
6870 (ScalarValue::Int8(Some(1)), ScalarValue::Int8(Some(2)), 1),
6873 (ScalarValue::Int8(Some(2)), ScalarValue::Int8(Some(1)), 1),
6874 (
6875 ScalarValue::Int16(Some(-5)),
6876 ScalarValue::Int16(Some(5)),
6877 10,
6878 ),
6879 (
6880 ScalarValue::Int16(Some(5)),
6881 ScalarValue::Int16(Some(-5)),
6882 10,
6883 ),
6884 (ScalarValue::Int32(Some(0)), ScalarValue::Int32(Some(0)), 0),
6885 (
6886 ScalarValue::Int32(Some(-5)),
6887 ScalarValue::Int32(Some(-10)),
6888 5,
6889 ),
6890 (
6891 ScalarValue::Int64(Some(-10)),
6892 ScalarValue::Int64(Some(-5)),
6893 5,
6894 ),
6895 (ScalarValue::UInt8(Some(1)), ScalarValue::UInt8(Some(2)), 1),
6896 (ScalarValue::UInt8(Some(0)), ScalarValue::UInt8(Some(0)), 0),
6897 (
6898 ScalarValue::UInt16(Some(5)),
6899 ScalarValue::UInt16(Some(10)),
6900 5,
6901 ),
6902 (
6903 ScalarValue::UInt32(Some(10)),
6904 ScalarValue::UInt32(Some(5)),
6905 5,
6906 ),
6907 (
6908 ScalarValue::UInt64(Some(5)),
6909 ScalarValue::UInt64(Some(10)),
6910 5,
6911 ),
6912 (
6913 ScalarValue::Float16(Some(f16::from_f32(1.1))),
6914 ScalarValue::Float16(Some(f16::from_f32(1.9))),
6915 1,
6916 ),
6917 (
6918 ScalarValue::Float16(Some(f16::from_f32(-5.3))),
6919 ScalarValue::Float16(Some(f16::from_f32(-9.2))),
6920 4,
6921 ),
6922 (
6923 ScalarValue::Float16(Some(f16::from_f32(-5.3))),
6924 ScalarValue::Float16(Some(f16::from_f32(-9.7))),
6925 4,
6926 ),
6927 (
6928 ScalarValue::Float32(Some(1.0)),
6929 ScalarValue::Float32(Some(2.0)),
6930 1,
6931 ),
6932 (
6933 ScalarValue::Float32(Some(2.0)),
6934 ScalarValue::Float32(Some(1.0)),
6935 1,
6936 ),
6937 (
6938 ScalarValue::Float64(Some(0.0)),
6939 ScalarValue::Float64(Some(0.0)),
6940 0,
6941 ),
6942 (
6943 ScalarValue::Float64(Some(-5.0)),
6944 ScalarValue::Float64(Some(-10.0)),
6945 5,
6946 ),
6947 (
6948 ScalarValue::Float64(Some(-10.0)),
6949 ScalarValue::Float64(Some(-5.0)),
6950 5,
6951 ),
6952 (
6956 ScalarValue::Float32(Some(1.2)),
6957 ScalarValue::Float32(Some(1.3)),
6958 0,
6959 ),
6960 (
6961 ScalarValue::Float32(Some(1.1)),
6962 ScalarValue::Float32(Some(1.9)),
6963 1,
6964 ),
6965 (
6966 ScalarValue::Float64(Some(-5.3)),
6967 ScalarValue::Float64(Some(-9.2)),
6968 4,
6969 ),
6970 (
6971 ScalarValue::Float64(Some(-5.3)),
6972 ScalarValue::Float64(Some(-9.7)),
6973 4,
6974 ),
6975 (
6976 ScalarValue::Float64(Some(-5.3)),
6977 ScalarValue::Float64(Some(-9.9)),
6978 5,
6979 ),
6980 ];
6981 for (lhs, rhs, expected) in cases.iter() {
6982 let distance = lhs.distance(rhs).unwrap();
6983 assert_eq!(distance, *expected);
6984 }
6985 }
6986
6987 #[test]
6988 fn test_scalar_distance_invalid() {
6989 let cases = [
6990 (ScalarValue::Int8(None), ScalarValue::Int8(None)),
6994 (ScalarValue::Int8(None), ScalarValue::Int8(Some(1))),
6995 (ScalarValue::Int8(Some(1)), ScalarValue::Int8(None)),
6996 (ScalarValue::Int8(Some(1)), ScalarValue::Int16(Some(1))),
6998 (ScalarValue::Int8(Some(1)), ScalarValue::Float32(Some(1.0))),
6999 (
7000 ScalarValue::Float16(Some(f16::from_f32(1.0))),
7001 ScalarValue::Float32(Some(1.0)),
7002 ),
7003 (
7004 ScalarValue::Float16(Some(f16::from_f32(1.0))),
7005 ScalarValue::Int32(Some(1)),
7006 ),
7007 (
7008 ScalarValue::Float64(Some(1.1)),
7009 ScalarValue::Float32(Some(2.2)),
7010 ),
7011 (
7012 ScalarValue::UInt64(Some(777)),
7013 ScalarValue::Int32(Some(111)),
7014 ),
7015 (ScalarValue::Int8(None), ScalarValue::Int16(Some(1))),
7017 (ScalarValue::Int8(Some(1)), ScalarValue::Int16(None)),
7018 (ScalarValue::from("foo"), ScalarValue::from("bar")),
7020 (
7021 ScalarValue::Boolean(Some(true)),
7022 ScalarValue::Boolean(Some(false)),
7023 ),
7024 (ScalarValue::Date32(Some(0)), ScalarValue::Date32(Some(1))),
7025 (ScalarValue::Date64(Some(0)), ScalarValue::Date64(Some(1))),
7026 (
7027 ScalarValue::Decimal128(Some(123), 5, 5),
7028 ScalarValue::Decimal128(Some(120), 5, 5),
7029 ),
7030 ];
7031 for (lhs, rhs) in cases {
7032 let distance = lhs.distance(&rhs);
7033 assert!(distance.is_none());
7034 }
7035 }
7036
7037 #[test]
7038 fn test_scalar_interval_negate() {
7039 let cases = [
7040 (
7041 ScalarValue::new_interval_ym(1, 12),
7042 ScalarValue::new_interval_ym(-1, -12),
7043 ),
7044 (
7045 ScalarValue::new_interval_dt(1, 999),
7046 ScalarValue::new_interval_dt(-1, -999),
7047 ),
7048 (
7049 ScalarValue::new_interval_mdn(12, 15, 123_456),
7050 ScalarValue::new_interval_mdn(-12, -15, -123_456),
7051 ),
7052 ];
7053 for (expr, expected) in cases.iter() {
7054 let result = expr.arithmetic_negate().unwrap();
7055 assert_eq!(*expected, result, "-expr:{expr:?}");
7056 }
7057 }
7058
7059 #[test]
7060 fn test_scalar_interval_add() {
7061 let cases = [
7062 (
7063 ScalarValue::new_interval_ym(1, 12),
7064 ScalarValue::new_interval_ym(1, 12),
7065 ScalarValue::new_interval_ym(2, 24),
7066 ),
7067 (
7068 ScalarValue::new_interval_dt(1, 999),
7069 ScalarValue::new_interval_dt(1, 999),
7070 ScalarValue::new_interval_dt(2, 1998),
7071 ),
7072 (
7073 ScalarValue::new_interval_mdn(12, 15, 123_456),
7074 ScalarValue::new_interval_mdn(12, 15, 123_456),
7075 ScalarValue::new_interval_mdn(24, 30, 246_912),
7076 ),
7077 ];
7078 for (lhs, rhs, expected) in cases.iter() {
7079 let result = lhs.add(rhs).unwrap();
7080 let result_commute = rhs.add(lhs).unwrap();
7081 assert_eq!(*expected, result, "lhs:{lhs:?} + rhs:{rhs:?}");
7082 assert_eq!(*expected, result_commute, "lhs:{rhs:?} + rhs:{lhs:?}");
7083 }
7084 }
7085
7086 #[test]
7087 fn test_scalar_interval_sub() {
7088 let cases = [
7089 (
7090 ScalarValue::new_interval_ym(1, 12),
7091 ScalarValue::new_interval_ym(1, 12),
7092 ScalarValue::new_interval_ym(0, 0),
7093 ),
7094 (
7095 ScalarValue::new_interval_dt(1, 999),
7096 ScalarValue::new_interval_dt(1, 999),
7097 ScalarValue::new_interval_dt(0, 0),
7098 ),
7099 (
7100 ScalarValue::new_interval_mdn(12, 15, 123_456),
7101 ScalarValue::new_interval_mdn(12, 15, 123_456),
7102 ScalarValue::new_interval_mdn(0, 0, 0),
7103 ),
7104 ];
7105 for (lhs, rhs, expected) in cases.iter() {
7106 let result = lhs.sub(rhs).unwrap();
7107 assert_eq!(*expected, result, "lhs:{lhs:?} - rhs:{rhs:?}");
7108 }
7109 }
7110
7111 #[test]
7112 fn timestamp_op_random_tests() {
7113 let sample_size = 1000;
7116 let timestamps1 = get_random_timestamps(sample_size);
7117 let intervals = get_random_intervals(sample_size);
7118 for (idx, ts1) in timestamps1.iter().enumerate() {
7122 if idx % 2 == 0 {
7123 let timestamp2 = ts1.add(intervals[idx].clone()).unwrap();
7124 let back = timestamp2.sub(intervals[idx].clone()).unwrap();
7125 assert_eq!(ts1, &back);
7126 } else {
7127 let timestamp2 = ts1.sub(intervals[idx].clone()).unwrap();
7128 let back = timestamp2.add(intervals[idx].clone()).unwrap();
7129 assert_eq!(ts1, &back);
7130 };
7131 }
7132 }
7133
7134 #[test]
7135 fn test_struct_nulls() {
7136 let fields_b = Fields::from(vec![
7137 Field::new("ba", DataType::UInt64, true),
7138 Field::new("bb", DataType::UInt64, true),
7139 ]);
7140 let fields = Fields::from(vec![
7141 Field::new("a", DataType::UInt64, true),
7142 Field::new("b", DataType::Struct(fields_b.clone()), true),
7143 ]);
7144
7145 let struct_value = vec![
7146 (
7147 Arc::clone(&fields[0]),
7148 Arc::new(UInt64Array::from(vec![Some(1)])) as ArrayRef,
7149 ),
7150 (
7151 Arc::clone(&fields[1]),
7152 Arc::new(StructArray::from(vec![
7153 (
7154 Arc::clone(&fields_b[0]),
7155 Arc::new(UInt64Array::from(vec![Some(2)])) as ArrayRef,
7156 ),
7157 (
7158 Arc::clone(&fields_b[1]),
7159 Arc::new(UInt64Array::from(vec![Some(3)])) as ArrayRef,
7160 ),
7161 ])) as ArrayRef,
7162 ),
7163 ];
7164
7165 let struct_value_with_nulls = vec![
7166 (
7167 Arc::clone(&fields[0]),
7168 Arc::new(UInt64Array::from(vec![Some(1)])) as ArrayRef,
7169 ),
7170 (
7171 Arc::clone(&fields[1]),
7172 Arc::new(StructArray::from((
7173 vec![
7174 (
7175 Arc::clone(&fields_b[0]),
7176 Arc::new(UInt64Array::from(vec![Some(2)])) as ArrayRef,
7177 ),
7178 (
7179 Arc::clone(&fields_b[1]),
7180 Arc::new(UInt64Array::from(vec![Some(3)])) as ArrayRef,
7181 ),
7182 ],
7183 Buffer::from(&[0]),
7184 ))) as ArrayRef,
7185 ),
7186 ];
7187
7188 let scalars = vec![
7189 ScalarValue::Struct(Arc::new(StructArray::from((
7191 struct_value.clone(),
7192 Buffer::from(&[0]),
7193 )))),
7194 ScalarValue::Struct(Arc::new(StructArray::from((
7196 struct_value_with_nulls.clone(),
7197 Buffer::from(&[1]),
7198 )))),
7199 ScalarValue::Struct(Arc::new(StructArray::from((
7201 struct_value.clone(),
7202 Buffer::from(&[1]),
7203 )))),
7204 ];
7205
7206 let check_array = |array| {
7207 let is_null = is_null(&array).unwrap();
7208 assert_eq!(is_null, BooleanArray::from(vec![true, false, false]));
7209
7210 let formatted = pretty_format_columns("col", &[array]).unwrap().to_string();
7211 let formatted = formatted.split('\n').collect::<Vec<_>>();
7212 let expected = vec![
7213 "+---------------------------+",
7214 "| col |",
7215 "+---------------------------+",
7216 "| |",
7217 "| {a: 1, b: } |",
7218 "| {a: 1, b: {ba: 2, bb: 3}} |",
7219 "+---------------------------+",
7220 ];
7221 assert_eq!(
7222 formatted, expected,
7223 "Actual:\n{formatted:#?}\n\nExpected:\n{expected:#?}"
7224 );
7225 };
7226
7227 let array = ScalarValue::iter_to_array(scalars.clone()).unwrap();
7229 check_array(array);
7230
7231 let arrays = scalars
7233 .iter()
7234 .map(ScalarValue::to_array)
7235 .collect::<Result<Vec<_>>>()
7236 .expect("Failed to convert to array");
7237 let arrays = arrays.iter().map(|a| a.as_ref()).collect::<Vec<_>>();
7238 let array = arrow::compute::concat(&arrays).unwrap();
7239 check_array(array);
7240 }
7241
7242 #[test]
7243 fn test_struct_display() {
7244 let field_a = Field::new("a", DataType::Int32, true);
7245 let field_b = Field::new("b", DataType::Utf8, true);
7246
7247 let s = ScalarStructBuilder::new()
7248 .with_scalar(field_a, ScalarValue::from(1i32))
7249 .with_scalar(field_b, ScalarValue::Utf8(None))
7250 .build()
7251 .unwrap();
7252
7253 assert_eq!(s.to_string(), "{a:1,b:}");
7254 assert_eq!(format!("{s:?}"), r#"Struct({a:1,b:})"#);
7255
7256 let ScalarValue::Struct(arr) = s else {
7257 panic!("Expected struct");
7258 };
7259
7260 let batch = RecordBatch::try_from_iter(vec![("s", arr as _)]).unwrap();
7262 assert_snapshot!(batches_to_string(&[batch]), @r"
7263 +-------------+
7264 | s |
7265 +-------------+
7266 | {a: 1, b: } |
7267 +-------------+
7268 ");
7269 }
7270
7271 #[test]
7272 fn test_null_bug() {
7273 let field_a = Field::new("a", DataType::Int32, true);
7274 let field_b = Field::new("b", DataType::Int32, true);
7275 let fields = Fields::from(vec![field_a, field_b]);
7276
7277 let array_a = Arc::new(Int32Array::from_iter_values([1]));
7278 let array_b = Arc::new(Int32Array::from_iter_values([2]));
7279 let arrays: Vec<ArrayRef> = vec![array_a, array_b];
7280
7281 let mut not_nulls = NullBufferBuilder::new(1);
7282
7283 not_nulls.append_non_null();
7284
7285 let ar = StructArray::new(fields, arrays, not_nulls.finish());
7286 let s = ScalarValue::Struct(Arc::new(ar));
7287
7288 assert_eq!(s.to_string(), "{a:1,b:2}");
7289 assert_eq!(format!("{s:?}"), r#"Struct({a:1,b:2})"#);
7290
7291 let ScalarValue::Struct(arr) = s else {
7292 panic!("Expected struct");
7293 };
7294
7295 let batch = RecordBatch::try_from_iter(vec![("s", arr as _)]).unwrap();
7297 assert_snapshot!(batches_to_string(&[batch]), @r"
7298 +--------------+
7299 | s |
7300 +--------------+
7301 | {a: 1, b: 2} |
7302 +--------------+
7303 ");
7304 }
7305
7306 #[test]
7307 fn test_display_date64_large_values() {
7308 assert_eq!(
7309 format!("{}", ScalarValue::Date64(Some(790179464505))),
7310 "1995-01-15"
7311 );
7312 assert_eq!(
7314 format!("{}", ScalarValue::Date64(Some(-790179464505600000))),
7315 ""
7316 );
7317 }
7318
7319 #[test]
7320 fn test_struct_display_null() {
7321 let fields = vec![Field::new("a", DataType::Int32, false)];
7322 let s = ScalarStructBuilder::new_null(fields);
7323 assert_eq!(s.to_string(), "NULL");
7324
7325 let ScalarValue::Struct(arr) = s else {
7326 panic!("Expected struct");
7327 };
7328
7329 let batch = RecordBatch::try_from_iter(vec![("s", arr as _)]).unwrap();
7331
7332 assert_snapshot!(batches_to_string(&[batch]), @r"
7333 +---+
7334 | s |
7335 +---+
7336 | |
7337 +---+
7338 ");
7339 }
7340
7341 #[test]
7342 fn test_map_display_and_debug() {
7343 let string_builder = StringBuilder::new();
7344 let int_builder = Int32Builder::with_capacity(4);
7345 let mut builder = MapBuilder::new(None, string_builder, int_builder);
7346 builder.keys().append_value("joe");
7347 builder.values().append_value(1);
7348 builder.append(true).unwrap();
7349
7350 builder.keys().append_value("blogs");
7351 builder.values().append_value(2);
7352 builder.keys().append_value("foo");
7353 builder.values().append_value(4);
7354 builder.append(true).unwrap();
7355 builder.append(true).unwrap();
7356 builder.append(false).unwrap();
7357
7358 let map_value = ScalarValue::Map(Arc::new(builder.finish()));
7359
7360 assert_eq!(map_value.to_string(), "[{joe:1},{blogs:2,foo:4},{},NULL]");
7361 assert_eq!(
7362 format!("{map_value:?}"),
7363 r#"Map([{"joe":"1"},{"blogs":"2","foo":"4"},{},NULL])"#
7364 );
7365
7366 let ScalarValue::Map(arr) = map_value else {
7367 panic!("Expected map");
7368 };
7369
7370 let batch = RecordBatch::try_from_iter(vec![("m", arr as _)]).unwrap();
7372 assert_snapshot!(batches_to_string(&[batch]), @r"
7373 +--------------------+
7374 | m |
7375 +--------------------+
7376 | {joe: 1} |
7377 | {blogs: 2, foo: 4} |
7378 | {} |
7379 | |
7380 +--------------------+
7381 ");
7382 }
7383
7384 #[test]
7385 fn test_binary_display() {
7386 let no_binary_value = ScalarValue::Binary(None);
7387 assert_eq!(format!("{no_binary_value}"), "NULL");
7388 let single_binary_value = ScalarValue::Binary(Some(vec![42u8]));
7389 assert_eq!(format!("{single_binary_value}"), "2A");
7390 let small_binary_value = ScalarValue::Binary(Some(vec![1u8, 2, 3]));
7391 assert_eq!(format!("{small_binary_value}"), "010203");
7392 let large_binary_value =
7393 ScalarValue::Binary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
7394 assert_eq!(format!("{large_binary_value}"), "0102030405060708090A...");
7395
7396 let no_binary_value = ScalarValue::BinaryView(None);
7397 assert_eq!(format!("{no_binary_value}"), "NULL");
7398 let small_binary_value = ScalarValue::BinaryView(Some(vec![1u8, 2, 3]));
7399 assert_eq!(format!("{small_binary_value}"), "010203");
7400 let large_binary_value =
7401 ScalarValue::BinaryView(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
7402 assert_eq!(format!("{large_binary_value}"), "0102030405060708090A...");
7403
7404 let no_binary_value = ScalarValue::LargeBinary(None);
7405 assert_eq!(format!("{no_binary_value}"), "NULL");
7406 let small_binary_value = ScalarValue::LargeBinary(Some(vec![1u8, 2, 3]));
7407 assert_eq!(format!("{small_binary_value}"), "010203");
7408 let large_binary_value =
7409 ScalarValue::LargeBinary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
7410 assert_eq!(format!("{large_binary_value}"), "0102030405060708090A...");
7411
7412 let no_binary_value = ScalarValue::FixedSizeBinary(3, None);
7413 assert_eq!(format!("{no_binary_value}"), "NULL");
7414 let small_binary_value = ScalarValue::FixedSizeBinary(3, Some(vec![1u8, 2, 3]));
7415 assert_eq!(format!("{small_binary_value}"), "010203");
7416 let large_binary_value = ScalarValue::FixedSizeBinary(
7417 11,
7418 Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
7419 );
7420 assert_eq!(format!("{large_binary_value}"), "0102030405060708090A...");
7421 }
7422
7423 #[test]
7424 fn test_binary_debug() {
7425 let no_binary_value = ScalarValue::Binary(None);
7426 assert_eq!(format!("{no_binary_value:?}"), "Binary(NULL)");
7427 let single_binary_value = ScalarValue::Binary(Some(vec![42u8]));
7428 assert_eq!(format!("{single_binary_value:?}"), "Binary(\"42\")");
7429 let small_binary_value = ScalarValue::Binary(Some(vec![1u8, 2, 3]));
7430 assert_eq!(format!("{small_binary_value:?}"), "Binary(\"1,2,3\")");
7431 let large_binary_value =
7432 ScalarValue::Binary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
7433 assert_eq!(
7434 format!("{large_binary_value:?}"),
7435 "Binary(\"1,2,3,4,5,6,7,8,9,10,11\")"
7436 );
7437
7438 let no_binary_value = ScalarValue::BinaryView(None);
7439 assert_eq!(format!("{no_binary_value:?}"), "BinaryView(NULL)");
7440 let small_binary_value = ScalarValue::BinaryView(Some(vec![1u8, 2, 3]));
7441 assert_eq!(format!("{small_binary_value:?}"), "BinaryView(\"1,2,3\")");
7442 let large_binary_value =
7443 ScalarValue::BinaryView(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
7444 assert_eq!(
7445 format!("{large_binary_value:?}"),
7446 "BinaryView(\"1,2,3,4,5,6,7,8,9,10,11\")"
7447 );
7448
7449 let no_binary_value = ScalarValue::LargeBinary(None);
7450 assert_eq!(format!("{no_binary_value:?}"), "LargeBinary(NULL)");
7451 let small_binary_value = ScalarValue::LargeBinary(Some(vec![1u8, 2, 3]));
7452 assert_eq!(format!("{small_binary_value:?}"), "LargeBinary(\"1,2,3\")");
7453 let large_binary_value =
7454 ScalarValue::LargeBinary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
7455 assert_eq!(
7456 format!("{large_binary_value:?}"),
7457 "LargeBinary(\"1,2,3,4,5,6,7,8,9,10,11\")"
7458 );
7459
7460 let no_binary_value = ScalarValue::FixedSizeBinary(3, None);
7461 assert_eq!(format!("{no_binary_value:?}"), "FixedSizeBinary(3, NULL)");
7462 let small_binary_value = ScalarValue::FixedSizeBinary(3, Some(vec![1u8, 2, 3]));
7463 assert_eq!(
7464 format!("{small_binary_value:?}"),
7465 "FixedSizeBinary(3, \"1,2,3\")"
7466 );
7467 let large_binary_value = ScalarValue::FixedSizeBinary(
7468 11,
7469 Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
7470 );
7471 assert_eq!(
7472 format!("{large_binary_value:?}"),
7473 "FixedSizeBinary(11, \"1,2,3,4,5,6,7,8,9,10,11\")"
7474 );
7475 }
7476
7477 #[test]
7478 fn test_build_timestamp_millisecond_list() {
7479 let values = vec![ScalarValue::TimestampMillisecond(Some(1), None)];
7480 let arr = ScalarValue::new_list_nullable(
7481 &values,
7482 &DataType::Timestamp(TimeUnit::Millisecond, None),
7483 );
7484 assert_eq!(1, arr.len());
7485 }
7486
7487 #[test]
7488 fn test_newlist_timestamp_zone() {
7489 let s: &'static str = "UTC";
7490 let values = vec![ScalarValue::TimestampMillisecond(Some(1), Some(s.into()))];
7491 let arr = ScalarValue::new_list_nullable(
7492 &values,
7493 &DataType::Timestamp(TimeUnit::Millisecond, Some(s.into())),
7494 );
7495 assert_eq!(1, arr.len());
7496 assert_eq!(
7497 arr.data_type(),
7498 &DataType::List(Arc::new(Field::new_list_field(
7499 DataType::Timestamp(TimeUnit::Millisecond, Some(s.into())),
7500 true,
7501 )))
7502 );
7503 }
7504
7505 fn get_random_timestamps(sample_size: u64) -> Vec<ScalarValue> {
7506 let vector_size = sample_size;
7507 let mut timestamp = vec![];
7508 let mut rng = rand::rng();
7509 for i in 0..vector_size {
7510 let year = rng.random_range(1995..=2050);
7511 let month = rng.random_range(1..=12);
7512 let day = rng.random_range(1..=28); let hour = rng.random_range(0..=23);
7514 let minute = rng.random_range(0..=59);
7515 let second = rng.random_range(0..=59);
7516 if i % 4 == 0 {
7517 timestamp.push(ScalarValue::TimestampSecond(
7518 Some(
7519 NaiveDate::from_ymd_opt(year, month, day)
7520 .unwrap()
7521 .and_hms_opt(hour, minute, second)
7522 .unwrap()
7523 .and_utc()
7524 .timestamp(),
7525 ),
7526 None,
7527 ))
7528 } else if i % 4 == 1 {
7529 let millisec = rng.random_range(0..=999);
7530 timestamp.push(ScalarValue::TimestampMillisecond(
7531 Some(
7532 NaiveDate::from_ymd_opt(year, month, day)
7533 .unwrap()
7534 .and_hms_milli_opt(hour, minute, second, millisec)
7535 .unwrap()
7536 .and_utc()
7537 .timestamp_millis(),
7538 ),
7539 None,
7540 ))
7541 } else if i % 4 == 2 {
7542 let microsec = rng.random_range(0..=999_999);
7543 timestamp.push(ScalarValue::TimestampMicrosecond(
7544 Some(
7545 NaiveDate::from_ymd_opt(year, month, day)
7546 .unwrap()
7547 .and_hms_micro_opt(hour, minute, second, microsec)
7548 .unwrap()
7549 .and_utc()
7550 .timestamp_micros(),
7551 ),
7552 None,
7553 ))
7554 } else if i % 4 == 3 {
7555 let nanosec = rng.random_range(0..=999_999_999);
7556 timestamp.push(ScalarValue::TimestampNanosecond(
7557 Some(
7558 NaiveDate::from_ymd_opt(year, month, day)
7559 .unwrap()
7560 .and_hms_nano_opt(hour, minute, second, nanosec)
7561 .unwrap()
7562 .and_utc()
7563 .timestamp_nanos_opt()
7564 .unwrap(),
7565 ),
7566 None,
7567 ))
7568 }
7569 }
7570 timestamp
7571 }
7572
7573 fn get_random_intervals(sample_size: u64) -> Vec<ScalarValue> {
7574 const MILLISECS_IN_ONE_DAY: i64 = 86_400_000;
7575 const NANOSECS_IN_ONE_DAY: i64 = 86_400_000_000_000;
7576
7577 let vector_size = sample_size;
7578 let mut intervals = vec![];
7579 let mut rng = rand::rng();
7580 const SECS_IN_ONE_DAY: i32 = 86_400;
7581 const MICROSECS_IN_ONE_DAY: i64 = 86_400_000_000;
7582 for i in 0..vector_size {
7583 if i % 4 == 0 {
7584 let days = rng.random_range(0..5000);
7585 let millis = rng.random_range(0..SECS_IN_ONE_DAY) * 1000;
7587 intervals.push(ScalarValue::new_interval_dt(days, millis));
7588 } else if i % 4 == 1 {
7589 let days = rng.random_range(0..5000);
7590 let millisec = rng.random_range(0..(MILLISECS_IN_ONE_DAY as i32));
7591 intervals.push(ScalarValue::new_interval_dt(days, millisec));
7592 } else if i % 4 == 2 {
7593 let days = rng.random_range(0..5000);
7594 let nanosec = rng.random_range(0..MICROSECS_IN_ONE_DAY) * 1000;
7596 intervals.push(ScalarValue::new_interval_mdn(0, days, nanosec));
7597 } else {
7598 let days = rng.random_range(0..5000);
7599 let nanosec = rng.random_range(0..NANOSECS_IN_ONE_DAY);
7600 intervals.push(ScalarValue::new_interval_mdn(0, days, nanosec));
7601 }
7602 }
7603 intervals
7604 }
7605
7606 fn union_fields() -> UnionFields {
7607 [
7608 (0, Arc::new(Field::new("A", DataType::Int32, true))),
7609 (1, Arc::new(Field::new("B", DataType::Float64, true))),
7610 ]
7611 .into_iter()
7612 .collect()
7613 }
7614
7615 #[test]
7616 fn sparse_scalar_union_is_null() {
7617 let sparse_scalar = ScalarValue::Union(
7618 Some((0_i8, Box::new(ScalarValue::Int32(None)))),
7619 union_fields(),
7620 UnionMode::Sparse,
7621 );
7622 assert!(sparse_scalar.is_null());
7623 }
7624
7625 #[test]
7626 fn dense_scalar_union_is_null() {
7627 let dense_scalar = ScalarValue::Union(
7628 Some((0_i8, Box::new(ScalarValue::Int32(None)))),
7629 union_fields(),
7630 UnionMode::Dense,
7631 );
7632 assert!(dense_scalar.is_null());
7633 }
7634
7635 #[test]
7636 fn null_dictionary_scalar_produces_null_dictionary_array() {
7637 let dictionary_scalar = ScalarValue::Dictionary(
7638 Box::new(DataType::Int32),
7639 Box::new(ScalarValue::Null),
7640 );
7641 assert!(dictionary_scalar.is_null());
7642 let dictionary_array = dictionary_scalar.to_array().unwrap();
7643 assert!(dictionary_array.is_null(0));
7644 }
7645
7646 #[test]
7647 fn test_scalar_value_try_new_null() {
7648 let scalars = vec![
7649 ScalarValue::try_new_null(&DataType::Boolean).unwrap(),
7650 ScalarValue::try_new_null(&DataType::Int8).unwrap(),
7651 ScalarValue::try_new_null(&DataType::Int16).unwrap(),
7652 ScalarValue::try_new_null(&DataType::Int32).unwrap(),
7653 ScalarValue::try_new_null(&DataType::Int64).unwrap(),
7654 ScalarValue::try_new_null(&DataType::UInt8).unwrap(),
7655 ScalarValue::try_new_null(&DataType::UInt16).unwrap(),
7656 ScalarValue::try_new_null(&DataType::UInt32).unwrap(),
7657 ScalarValue::try_new_null(&DataType::UInt64).unwrap(),
7658 ScalarValue::try_new_null(&DataType::Float16).unwrap(),
7659 ScalarValue::try_new_null(&DataType::Float32).unwrap(),
7660 ScalarValue::try_new_null(&DataType::Float64).unwrap(),
7661 ScalarValue::try_new_null(&DataType::Decimal128(42, 42)).unwrap(),
7662 ScalarValue::try_new_null(&DataType::Decimal256(42, 42)).unwrap(),
7663 ScalarValue::try_new_null(&DataType::Utf8).unwrap(),
7664 ScalarValue::try_new_null(&DataType::LargeUtf8).unwrap(),
7665 ScalarValue::try_new_null(&DataType::Utf8View).unwrap(),
7666 ScalarValue::try_new_null(&DataType::Binary).unwrap(),
7667 ScalarValue::try_new_null(&DataType::BinaryView).unwrap(),
7668 ScalarValue::try_new_null(&DataType::FixedSizeBinary(42)).unwrap(),
7669 ScalarValue::try_new_null(&DataType::LargeBinary).unwrap(),
7670 ScalarValue::try_new_null(&DataType::Date32).unwrap(),
7671 ScalarValue::try_new_null(&DataType::Date64).unwrap(),
7672 ScalarValue::try_new_null(&DataType::Time32(TimeUnit::Second)).unwrap(),
7673 ScalarValue::try_new_null(&DataType::Time32(TimeUnit::Millisecond)).unwrap(),
7674 ScalarValue::try_new_null(&DataType::Time64(TimeUnit::Microsecond)).unwrap(),
7675 ScalarValue::try_new_null(&DataType::Time64(TimeUnit::Nanosecond)).unwrap(),
7676 ScalarValue::try_new_null(&DataType::Timestamp(TimeUnit::Second, None))
7677 .unwrap(),
7678 ScalarValue::try_new_null(&DataType::Timestamp(TimeUnit::Millisecond, None))
7679 .unwrap(),
7680 ScalarValue::try_new_null(&DataType::Timestamp(TimeUnit::Microsecond, None))
7681 .unwrap(),
7682 ScalarValue::try_new_null(&DataType::Timestamp(TimeUnit::Nanosecond, None))
7683 .unwrap(),
7684 ScalarValue::try_new_null(&DataType::Interval(IntervalUnit::YearMonth))
7685 .unwrap(),
7686 ScalarValue::try_new_null(&DataType::Interval(IntervalUnit::DayTime))
7687 .unwrap(),
7688 ScalarValue::try_new_null(&DataType::Interval(IntervalUnit::MonthDayNano))
7689 .unwrap(),
7690 ScalarValue::try_new_null(&DataType::Duration(TimeUnit::Second)).unwrap(),
7691 ScalarValue::try_new_null(&DataType::Duration(TimeUnit::Microsecond))
7692 .unwrap(),
7693 ScalarValue::try_new_null(&DataType::Duration(TimeUnit::Nanosecond)).unwrap(),
7694 ScalarValue::try_new_null(&DataType::Null).unwrap(),
7695 ];
7696 assert!(scalars.iter().all(|s| s.is_null()));
7697
7698 let field_ref = Arc::new(Field::new("foo", DataType::Int32, true));
7699 let map_field_ref = Arc::new(Field::new(
7700 "foo",
7701 DataType::Struct(Fields::from(vec![
7702 Field::new("bar", DataType::Utf8, true),
7703 Field::new("baz", DataType::Int32, true),
7704 ])),
7705 true,
7706 ));
7707 let scalars = vec![
7708 ScalarValue::try_new_null(&DataType::List(Arc::clone(&field_ref))).unwrap(),
7709 ScalarValue::try_new_null(&DataType::LargeList(Arc::clone(&field_ref)))
7710 .unwrap(),
7711 ScalarValue::try_new_null(&DataType::FixedSizeList(
7712 Arc::clone(&field_ref),
7713 42,
7714 ))
7715 .unwrap(),
7716 ScalarValue::try_new_null(&DataType::Struct(
7717 vec![Arc::clone(&field_ref)].into(),
7718 ))
7719 .unwrap(),
7720 ScalarValue::try_new_null(&DataType::Map(map_field_ref, false)).unwrap(),
7721 ScalarValue::try_new_null(&DataType::Union(
7722 UnionFields::new(vec![42], vec![field_ref]),
7723 UnionMode::Dense,
7724 ))
7725 .unwrap(),
7726 ];
7727 assert!(scalars.iter().all(|s| s.is_null()));
7728 }
7729
7730 fn assert_starts_with(actual: impl AsRef<str>, expected_prefix: impl AsRef<str>) {
7733 let actual = actual.as_ref();
7734 let expected_prefix = expected_prefix.as_ref();
7735 assert!(
7736 actual.starts_with(expected_prefix),
7737 "Expected '{actual}' to start with '{expected_prefix}'"
7738 );
7739 }
7740}