1mod consts;
21mod struct_builder;
22
23use std::borrow::Borrow;
24use std::cmp::Ordering;
25use std::collections::{HashSet, VecDeque};
26use std::convert::Infallible;
27use std::fmt;
28use std::hash::Hash;
29use std::hash::Hasher;
30use std::iter::repeat_n;
31use std::mem::{size_of, size_of_val};
32use std::str::FromStr;
33use std::sync::Arc;
34
35use crate::arrow_datafusion_err;
36use crate::cast::{
37 as_decimal128_array, as_decimal256_array, as_dictionary_array,
38 as_fixed_size_binary_array, as_fixed_size_list_array,
39};
40use crate::error::{DataFusionError, Result, _exec_err, _internal_err, _not_impl_err};
41use crate::format::DEFAULT_CAST_OPTIONS;
42use crate::hash_utils::create_hashes;
43use crate::utils::SingleRowListArrayBuilder;
44use arrow::array::{
45 types::{IntervalDayTime, IntervalMonthDayNano},
46 *,
47};
48use arrow::buffer::ScalarBuffer;
49use arrow::compute::kernels::{
50 cast::{cast_with_options, CastOptions},
51 numeric::*,
52};
53use arrow::datatypes::{
54 i256, ArrowDictionaryKeyType, ArrowNativeType, ArrowTimestampType, DataType,
55 Date32Type, Date64Type, Field, Float32Type, Int16Type, Int32Type, Int64Type,
56 Int8Type, IntervalDayTimeType, IntervalMonthDayNanoType, IntervalUnit,
57 IntervalYearMonthType, TimeUnit, TimestampMicrosecondType, TimestampMillisecondType,
58 TimestampNanosecondType, TimestampSecondType, UInt16Type, UInt32Type, UInt64Type,
59 UInt8Type, UnionFields, UnionMode, DECIMAL128_MAX_PRECISION,
60};
61use arrow::util::display::{array_value_to_string, ArrayFormatter, FormatOptions};
62use half::f16;
63pub use struct_builder::ScalarStructBuilder;
64
65#[derive(Clone)]
195pub enum ScalarValue {
196 Null,
198 Boolean(Option<bool>),
200 Float16(Option<f16>),
202 Float32(Option<f32>),
204 Float64(Option<f64>),
206 Decimal128(Option<i128>, u8, i8),
208 Decimal256(Option<i256>, u8, i8),
210 Int8(Option<i8>),
212 Int16(Option<i16>),
214 Int32(Option<i32>),
216 Int64(Option<i64>),
218 UInt8(Option<u8>),
220 UInt16(Option<u16>),
222 UInt32(Option<u32>),
224 UInt64(Option<u64>),
226 Utf8(Option<String>),
228 Utf8View(Option<String>),
230 LargeUtf8(Option<String>),
232 Binary(Option<Vec<u8>>),
234 BinaryView(Option<Vec<u8>>),
236 FixedSizeBinary(i32, Option<Vec<u8>>),
238 LargeBinary(Option<Vec<u8>>),
240 FixedSizeList(Arc<FixedSizeListArray>),
244 List(Arc<ListArray>),
248 LargeList(Arc<LargeListArray>),
250 Struct(Arc<StructArray>),
253 Map(Arc<MapArray>),
255 Date32(Option<i32>),
257 Date64(Option<i64>),
259 Time32Second(Option<i32>),
261 Time32Millisecond(Option<i32>),
263 Time64Microsecond(Option<i64>),
265 Time64Nanosecond(Option<i64>),
267 TimestampSecond(Option<i64>, Option<Arc<str>>),
269 TimestampMillisecond(Option<i64>, Option<Arc<str>>),
271 TimestampMicrosecond(Option<i64>, Option<Arc<str>>),
273 TimestampNanosecond(Option<i64>, Option<Arc<str>>),
275 IntervalYearMonth(Option<i32>),
277 IntervalDayTime(Option<IntervalDayTime>),
280 IntervalMonthDayNano(Option<IntervalMonthDayNano>),
284 DurationSecond(Option<i64>),
286 DurationMillisecond(Option<i64>),
288 DurationMicrosecond(Option<i64>),
290 DurationNanosecond(Option<i64>),
292 Union(Option<(i8, Box<ScalarValue>)>, UnionFields, UnionMode),
297 Dictionary(Box<DataType>, Box<ScalarValue>),
299}
300
301impl Hash for Fl<f16> {
302 fn hash<H: Hasher>(&self, state: &mut H) {
303 self.0.to_bits().hash(state);
304 }
305}
306
307impl PartialEq for ScalarValue {
309 fn eq(&self, other: &Self) -> bool {
310 use ScalarValue::*;
311 match (self, other) {
315 (Decimal128(v1, p1, s1), Decimal128(v2, p2, s2)) => {
316 v1.eq(v2) && p1.eq(p2) && s1.eq(s2)
317 }
318 (Decimal128(_, _, _), _) => false,
319 (Decimal256(v1, p1, s1), Decimal256(v2, p2, s2)) => {
320 v1.eq(v2) && p1.eq(p2) && s1.eq(s2)
321 }
322 (Decimal256(_, _, _), _) => false,
323 (Boolean(v1), Boolean(v2)) => v1.eq(v2),
324 (Boolean(_), _) => false,
325 (Float32(v1), Float32(v2)) => match (v1, v2) {
326 (Some(f1), Some(f2)) => f1.to_bits() == f2.to_bits(),
327 _ => v1.eq(v2),
328 },
329 (Float16(v1), Float16(v2)) => match (v1, v2) {
330 (Some(f1), Some(f2)) => f1.to_bits() == f2.to_bits(),
331 _ => v1.eq(v2),
332 },
333 (Float32(_), _) => false,
334 (Float16(_), _) => false,
335 (Float64(v1), Float64(v2)) => match (v1, v2) {
336 (Some(f1), Some(f2)) => f1.to_bits() == f2.to_bits(),
337 _ => v1.eq(v2),
338 },
339 (Float64(_), _) => false,
340 (Int8(v1), Int8(v2)) => v1.eq(v2),
341 (Int8(_), _) => false,
342 (Int16(v1), Int16(v2)) => v1.eq(v2),
343 (Int16(_), _) => false,
344 (Int32(v1), Int32(v2)) => v1.eq(v2),
345 (Int32(_), _) => false,
346 (Int64(v1), Int64(v2)) => v1.eq(v2),
347 (Int64(_), _) => false,
348 (UInt8(v1), UInt8(v2)) => v1.eq(v2),
349 (UInt8(_), _) => false,
350 (UInt16(v1), UInt16(v2)) => v1.eq(v2),
351 (UInt16(_), _) => false,
352 (UInt32(v1), UInt32(v2)) => v1.eq(v2),
353 (UInt32(_), _) => false,
354 (UInt64(v1), UInt64(v2)) => v1.eq(v2),
355 (UInt64(_), _) => false,
356 (Utf8(v1), Utf8(v2)) => v1.eq(v2),
357 (Utf8(_), _) => false,
358 (Utf8View(v1), Utf8View(v2)) => v1.eq(v2),
359 (Utf8View(_), _) => false,
360 (LargeUtf8(v1), LargeUtf8(v2)) => v1.eq(v2),
361 (LargeUtf8(_), _) => false,
362 (Binary(v1), Binary(v2)) => v1.eq(v2),
363 (Binary(_), _) => false,
364 (BinaryView(v1), BinaryView(v2)) => v1.eq(v2),
365 (BinaryView(_), _) => false,
366 (FixedSizeBinary(_, v1), FixedSizeBinary(_, v2)) => v1.eq(v2),
367 (FixedSizeBinary(_, _), _) => false,
368 (LargeBinary(v1), LargeBinary(v2)) => v1.eq(v2),
369 (LargeBinary(_), _) => false,
370 (FixedSizeList(v1), FixedSizeList(v2)) => v1.eq(v2),
371 (FixedSizeList(_), _) => false,
372 (List(v1), List(v2)) => v1.eq(v2),
373 (List(_), _) => false,
374 (LargeList(v1), LargeList(v2)) => v1.eq(v2),
375 (LargeList(_), _) => false,
376 (Struct(v1), Struct(v2)) => v1.eq(v2),
377 (Struct(_), _) => false,
378 (Map(v1), Map(v2)) => v1.eq(v2),
379 (Map(_), _) => false,
380 (Date32(v1), Date32(v2)) => v1.eq(v2),
381 (Date32(_), _) => false,
382 (Date64(v1), Date64(v2)) => v1.eq(v2),
383 (Date64(_), _) => false,
384 (Time32Second(v1), Time32Second(v2)) => v1.eq(v2),
385 (Time32Second(_), _) => false,
386 (Time32Millisecond(v1), Time32Millisecond(v2)) => v1.eq(v2),
387 (Time32Millisecond(_), _) => false,
388 (Time64Microsecond(v1), Time64Microsecond(v2)) => v1.eq(v2),
389 (Time64Microsecond(_), _) => false,
390 (Time64Nanosecond(v1), Time64Nanosecond(v2)) => v1.eq(v2),
391 (Time64Nanosecond(_), _) => false,
392 (TimestampSecond(v1, _), TimestampSecond(v2, _)) => v1.eq(v2),
393 (TimestampSecond(_, _), _) => false,
394 (TimestampMillisecond(v1, _), TimestampMillisecond(v2, _)) => v1.eq(v2),
395 (TimestampMillisecond(_, _), _) => false,
396 (TimestampMicrosecond(v1, _), TimestampMicrosecond(v2, _)) => v1.eq(v2),
397 (TimestampMicrosecond(_, _), _) => false,
398 (TimestampNanosecond(v1, _), TimestampNanosecond(v2, _)) => v1.eq(v2),
399 (TimestampNanosecond(_, _), _) => false,
400 (DurationSecond(v1), DurationSecond(v2)) => v1.eq(v2),
401 (DurationSecond(_), _) => false,
402 (DurationMillisecond(v1), DurationMillisecond(v2)) => v1.eq(v2),
403 (DurationMillisecond(_), _) => false,
404 (DurationMicrosecond(v1), DurationMicrosecond(v2)) => v1.eq(v2),
405 (DurationMicrosecond(_), _) => false,
406 (DurationNanosecond(v1), DurationNanosecond(v2)) => v1.eq(v2),
407 (DurationNanosecond(_), _) => false,
408 (IntervalYearMonth(v1), IntervalYearMonth(v2)) => v1.eq(v2),
409 (IntervalYearMonth(_), _) => false,
410 (IntervalDayTime(v1), IntervalDayTime(v2)) => v1.eq(v2),
411 (IntervalDayTime(_), _) => false,
412 (IntervalMonthDayNano(v1), IntervalMonthDayNano(v2)) => v1.eq(v2),
413 (IntervalMonthDayNano(_), _) => false,
414 (Union(val1, fields1, mode1), Union(val2, fields2, mode2)) => {
415 val1.eq(val2) && fields1.eq(fields2) && mode1.eq(mode2)
416 }
417 (Union(_, _, _), _) => false,
418 (Dictionary(k1, v1), Dictionary(k2, v2)) => k1.eq(k2) && v1.eq(v2),
419 (Dictionary(_, _), _) => false,
420 (Null, Null) => true,
421 (Null, _) => false,
422 }
423 }
424}
425
426impl PartialOrd for ScalarValue {
428 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
429 use ScalarValue::*;
430 match (self, other) {
434 (Decimal128(v1, p1, s1), Decimal128(v2, p2, s2)) => {
435 if p1.eq(p2) && s1.eq(s2) {
436 v1.partial_cmp(v2)
437 } else {
438 None
440 }
441 }
442 (Decimal128(_, _, _), _) => None,
443 (Decimal256(v1, p1, s1), Decimal256(v2, p2, s2)) => {
444 if p1.eq(p2) && s1.eq(s2) {
445 v1.partial_cmp(v2)
446 } else {
447 None
449 }
450 }
451 (Decimal256(_, _, _), _) => None,
452 (Boolean(v1), Boolean(v2)) => v1.partial_cmp(v2),
453 (Boolean(_), _) => None,
454 (Float32(v1), Float32(v2)) => match (v1, v2) {
455 (Some(f1), Some(f2)) => Some(f1.total_cmp(f2)),
456 _ => v1.partial_cmp(v2),
457 },
458 (Float16(v1), Float16(v2)) => match (v1, v2) {
459 (Some(f1), Some(f2)) => Some(f1.total_cmp(f2)),
460 _ => v1.partial_cmp(v2),
461 },
462 (Float32(_), _) => None,
463 (Float16(_), _) => None,
464 (Float64(v1), Float64(v2)) => match (v1, v2) {
465 (Some(f1), Some(f2)) => Some(f1.total_cmp(f2)),
466 _ => v1.partial_cmp(v2),
467 },
468 (Float64(_), _) => None,
469 (Int8(v1), Int8(v2)) => v1.partial_cmp(v2),
470 (Int8(_), _) => None,
471 (Int16(v1), Int16(v2)) => v1.partial_cmp(v2),
472 (Int16(_), _) => None,
473 (Int32(v1), Int32(v2)) => v1.partial_cmp(v2),
474 (Int32(_), _) => None,
475 (Int64(v1), Int64(v2)) => v1.partial_cmp(v2),
476 (Int64(_), _) => None,
477 (UInt8(v1), UInt8(v2)) => v1.partial_cmp(v2),
478 (UInt8(_), _) => None,
479 (UInt16(v1), UInt16(v2)) => v1.partial_cmp(v2),
480 (UInt16(_), _) => None,
481 (UInt32(v1), UInt32(v2)) => v1.partial_cmp(v2),
482 (UInt32(_), _) => None,
483 (UInt64(v1), UInt64(v2)) => v1.partial_cmp(v2),
484 (UInt64(_), _) => None,
485 (Utf8(v1), Utf8(v2)) => v1.partial_cmp(v2),
486 (Utf8(_), _) => None,
487 (LargeUtf8(v1), LargeUtf8(v2)) => v1.partial_cmp(v2),
488 (LargeUtf8(_), _) => None,
489 (Utf8View(v1), Utf8View(v2)) => v1.partial_cmp(v2),
490 (Utf8View(_), _) => None,
491 (Binary(v1), Binary(v2)) => v1.partial_cmp(v2),
492 (Binary(_), _) => None,
493 (BinaryView(v1), BinaryView(v2)) => v1.partial_cmp(v2),
494 (BinaryView(_), _) => None,
495 (FixedSizeBinary(_, v1), FixedSizeBinary(_, v2)) => v1.partial_cmp(v2),
496 (FixedSizeBinary(_, _), _) => None,
497 (LargeBinary(v1), LargeBinary(v2)) => v1.partial_cmp(v2),
498 (LargeBinary(_), _) => None,
499 (List(arr1), List(arr2)) => partial_cmp_list(arr1.as_ref(), arr2.as_ref()),
501 (FixedSizeList(arr1), FixedSizeList(arr2)) => {
502 partial_cmp_list(arr1.as_ref(), arr2.as_ref())
503 }
504 (LargeList(arr1), LargeList(arr2)) => {
505 partial_cmp_list(arr1.as_ref(), arr2.as_ref())
506 }
507 (List(_), _) | (LargeList(_), _) | (FixedSizeList(_), _) => None,
508 (Struct(struct_arr1), Struct(struct_arr2)) => {
509 partial_cmp_struct(struct_arr1.as_ref(), struct_arr2.as_ref())
510 }
511 (Struct(_), _) => None,
512 (Map(map_arr1), Map(map_arr2)) => partial_cmp_map(map_arr1, map_arr2),
513 (Map(_), _) => None,
514 (Date32(v1), Date32(v2)) => v1.partial_cmp(v2),
515 (Date32(_), _) => None,
516 (Date64(v1), Date64(v2)) => v1.partial_cmp(v2),
517 (Date64(_), _) => None,
518 (Time32Second(v1), Time32Second(v2)) => v1.partial_cmp(v2),
519 (Time32Second(_), _) => None,
520 (Time32Millisecond(v1), Time32Millisecond(v2)) => v1.partial_cmp(v2),
521 (Time32Millisecond(_), _) => None,
522 (Time64Microsecond(v1), Time64Microsecond(v2)) => v1.partial_cmp(v2),
523 (Time64Microsecond(_), _) => None,
524 (Time64Nanosecond(v1), Time64Nanosecond(v2)) => v1.partial_cmp(v2),
525 (Time64Nanosecond(_), _) => None,
526 (TimestampSecond(v1, _), TimestampSecond(v2, _)) => v1.partial_cmp(v2),
527 (TimestampSecond(_, _), _) => None,
528 (TimestampMillisecond(v1, _), TimestampMillisecond(v2, _)) => {
529 v1.partial_cmp(v2)
530 }
531 (TimestampMillisecond(_, _), _) => None,
532 (TimestampMicrosecond(v1, _), TimestampMicrosecond(v2, _)) => {
533 v1.partial_cmp(v2)
534 }
535 (TimestampMicrosecond(_, _), _) => None,
536 (TimestampNanosecond(v1, _), TimestampNanosecond(v2, _)) => {
537 v1.partial_cmp(v2)
538 }
539 (TimestampNanosecond(_, _), _) => None,
540 (IntervalYearMonth(v1), IntervalYearMonth(v2)) => v1.partial_cmp(v2),
541 (IntervalYearMonth(_), _) => None,
542 (IntervalDayTime(v1), IntervalDayTime(v2)) => v1.partial_cmp(v2),
543 (IntervalDayTime(_), _) => None,
544 (IntervalMonthDayNano(v1), IntervalMonthDayNano(v2)) => v1.partial_cmp(v2),
545 (IntervalMonthDayNano(_), _) => None,
546 (DurationSecond(v1), DurationSecond(v2)) => v1.partial_cmp(v2),
547 (DurationSecond(_), _) => None,
548 (DurationMillisecond(v1), DurationMillisecond(v2)) => v1.partial_cmp(v2),
549 (DurationMillisecond(_), _) => None,
550 (DurationMicrosecond(v1), DurationMicrosecond(v2)) => v1.partial_cmp(v2),
551 (DurationMicrosecond(_), _) => None,
552 (DurationNanosecond(v1), DurationNanosecond(v2)) => v1.partial_cmp(v2),
553 (DurationNanosecond(_), _) => None,
554 (Union(v1, t1, m1), Union(v2, t2, m2)) => {
555 if t1.eq(t2) && m1.eq(m2) {
556 v1.partial_cmp(v2)
557 } else {
558 None
559 }
560 }
561 (Union(_, _, _), _) => None,
562 (Dictionary(k1, v1), Dictionary(k2, v2)) => {
563 if k1 == k2 {
565 v1.partial_cmp(v2)
566 } else {
567 None
568 }
569 }
570 (Dictionary(_, _), _) => None,
571 (Null, Null) => Some(Ordering::Equal),
572 (Null, _) => None,
573 }
574 }
575}
576
577fn first_array_for_list(arr: &dyn Array) -> ArrayRef {
580 assert_eq!(arr.len(), 1);
581 if let Some(arr) = arr.as_list_opt::<i32>() {
582 arr.value(0)
583 } else if let Some(arr) = arr.as_list_opt::<i64>() {
584 arr.value(0)
585 } else if let Some(arr) = arr.as_fixed_size_list_opt() {
586 arr.value(0)
587 } else {
588 unreachable!("Since only List / LargeList / FixedSizeList are supported, this should never happen")
589 }
590}
591
592fn partial_cmp_list(arr1: &dyn Array, arr2: &dyn Array) -> Option<Ordering> {
594 if arr1.data_type() != arr2.data_type() {
595 return None;
596 }
597 let arr1 = first_array_for_list(arr1);
598 let arr2 = first_array_for_list(arr2);
599
600 let min_length = arr1.len().min(arr2.len());
601 let arr1_trimmed = arr1.slice(0, min_length);
602 let arr2_trimmed = arr2.slice(0, min_length);
603
604 let lt_res = arrow::compute::kernels::cmp::lt(&arr1_trimmed, &arr2_trimmed).ok()?;
605 let eq_res = arrow::compute::kernels::cmp::eq(&arr1_trimmed, &arr2_trimmed).ok()?;
606
607 for j in 0..lt_res.len() {
608 if arr1_trimmed.is_null(j) && !arr2_trimmed.is_null(j) {
616 return Some(Ordering::Greater);
617 }
618 if !arr1_trimmed.is_null(j) && arr2_trimmed.is_null(j) {
619 return Some(Ordering::Less);
620 }
621
622 if lt_res.is_valid(j) && lt_res.value(j) {
623 return Some(Ordering::Less);
624 }
625 if eq_res.is_valid(j) && !eq_res.value(j) {
626 return Some(Ordering::Greater);
627 }
628 }
629
630 Some(arr1.len().cmp(&arr2.len()))
631}
632
633fn flatten<'a>(array: &'a StructArray, columns: &mut Vec<&'a ArrayRef>) {
634 for i in 0..array.num_columns() {
635 let column = array.column(i);
636 if let Some(nested_struct) = column.as_any().downcast_ref::<StructArray>() {
637 flatten(nested_struct, columns);
639 } else {
640 columns.push(column);
642 }
643 }
644}
645
646pub fn partial_cmp_struct(s1: &StructArray, s2: &StructArray) -> Option<Ordering> {
647 if s1.len() != s2.len() {
648 return None;
649 }
650
651 if s1.data_type() != s2.data_type() {
652 return None;
653 }
654
655 let mut expanded_columns1 = Vec::with_capacity(s1.num_columns());
656 let mut expanded_columns2 = Vec::with_capacity(s2.num_columns());
657
658 flatten(s1, &mut expanded_columns1);
659 flatten(s2, &mut expanded_columns2);
660
661 for col_index in 0..expanded_columns1.len() {
662 let arr1 = expanded_columns1[col_index];
663 let arr2 = expanded_columns2[col_index];
664
665 let lt_res = arrow::compute::kernels::cmp::lt(arr1, arr2).ok()?;
666 let eq_res = arrow::compute::kernels::cmp::eq(arr1, arr2).ok()?;
667
668 for j in 0..lt_res.len() {
669 if lt_res.is_valid(j) && lt_res.value(j) {
670 return Some(Ordering::Less);
671 }
672 if eq_res.is_valid(j) && !eq_res.value(j) {
673 return Some(Ordering::Greater);
674 }
675 }
676 }
677 Some(Ordering::Equal)
678}
679
680fn partial_cmp_map(m1: &Arc<MapArray>, m2: &Arc<MapArray>) -> Option<Ordering> {
681 if m1.len() != m2.len() {
682 return None;
683 }
684
685 if m1.data_type() != m2.data_type() {
686 return None;
687 }
688
689 for col_index in 0..m1.len() {
690 let arr1 = m1.entries().column(col_index);
691 let arr2 = m2.entries().column(col_index);
692
693 let lt_res = arrow::compute::kernels::cmp::lt(arr1, arr2).ok()?;
694 let eq_res = arrow::compute::kernels::cmp::eq(arr1, arr2).ok()?;
695
696 for j in 0..lt_res.len() {
697 if lt_res.is_valid(j) && lt_res.value(j) {
698 return Some(Ordering::Less);
699 }
700 if eq_res.is_valid(j) && !eq_res.value(j) {
701 return Some(Ordering::Greater);
702 }
703 }
704 }
705 Some(Ordering::Equal)
706}
707
708impl Eq for ScalarValue {}
709
710struct Fl<T>(T);
712
713macro_rules! hash_float_value {
714 ($(($t:ty, $i:ty)),+) => {
715 $(impl std::hash::Hash for Fl<$t> {
716 #[inline]
717 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
718 state.write(&<$i>::from_ne_bytes(self.0.to_ne_bytes()).to_ne_bytes())
719 }
720 })+
721 };
722}
723
724hash_float_value!((f64, u64), (f32, u32));
725
726impl Hash for ScalarValue {
732 fn hash<H: Hasher>(&self, state: &mut H) {
733 use ScalarValue::*;
734 match self {
735 Decimal128(v, p, s) => {
736 v.hash(state);
737 p.hash(state);
738 s.hash(state)
739 }
740 Decimal256(v, p, s) => {
741 v.hash(state);
742 p.hash(state);
743 s.hash(state)
744 }
745 Boolean(v) => v.hash(state),
746 Float16(v) => v.map(Fl).hash(state),
747 Float32(v) => v.map(Fl).hash(state),
748 Float64(v) => v.map(Fl).hash(state),
749 Int8(v) => v.hash(state),
750 Int16(v) => v.hash(state),
751 Int32(v) => v.hash(state),
752 Int64(v) => v.hash(state),
753 UInt8(v) => v.hash(state),
754 UInt16(v) => v.hash(state),
755 UInt32(v) => v.hash(state),
756 UInt64(v) => v.hash(state),
757 Utf8(v) | LargeUtf8(v) | Utf8View(v) => v.hash(state),
758 Binary(v) | FixedSizeBinary(_, v) | LargeBinary(v) | BinaryView(v) => {
759 v.hash(state)
760 }
761 List(arr) => {
762 hash_nested_array(arr.to_owned() as ArrayRef, state);
763 }
764 LargeList(arr) => {
765 hash_nested_array(arr.to_owned() as ArrayRef, state);
766 }
767 FixedSizeList(arr) => {
768 hash_nested_array(arr.to_owned() as ArrayRef, state);
769 }
770 Struct(arr) => {
771 hash_nested_array(arr.to_owned() as ArrayRef, state);
772 }
773 Map(arr) => {
774 hash_nested_array(arr.to_owned() as ArrayRef, state);
775 }
776 Date32(v) => v.hash(state),
777 Date64(v) => v.hash(state),
778 Time32Second(v) => v.hash(state),
779 Time32Millisecond(v) => v.hash(state),
780 Time64Microsecond(v) => v.hash(state),
781 Time64Nanosecond(v) => v.hash(state),
782 TimestampSecond(v, _) => v.hash(state),
783 TimestampMillisecond(v, _) => v.hash(state),
784 TimestampMicrosecond(v, _) => v.hash(state),
785 TimestampNanosecond(v, _) => v.hash(state),
786 DurationSecond(v) => v.hash(state),
787 DurationMillisecond(v) => v.hash(state),
788 DurationMicrosecond(v) => v.hash(state),
789 DurationNanosecond(v) => v.hash(state),
790 IntervalYearMonth(v) => v.hash(state),
791 IntervalDayTime(v) => v.hash(state),
792 IntervalMonthDayNano(v) => v.hash(state),
793 Union(v, t, m) => {
794 v.hash(state);
795 t.hash(state);
796 m.hash(state);
797 }
798 Dictionary(k, v) => {
799 k.hash(state);
800 v.hash(state);
801 }
802 Null => 1.hash(state),
804 }
805 }
806}
807
808fn hash_nested_array<H: Hasher>(arr: ArrayRef, state: &mut H) {
809 let arrays = vec![arr.to_owned()];
810 let hashes_buffer = &mut vec![0; arr.len()];
811 let random_state = ahash::RandomState::with_seeds(0, 0, 0, 0);
812 let hashes = create_hashes(&arrays, &random_state, hashes_buffer).unwrap();
813 hashes.hash(state);
815}
816
817#[inline]
824pub fn get_dict_value<K: ArrowDictionaryKeyType>(
825 array: &dyn Array,
826 index: usize,
827) -> Result<(&ArrayRef, Option<usize>)> {
828 let dict_array = as_dictionary_array::<K>(array)?;
829 Ok((dict_array.values(), dict_array.key(index)))
830}
831
832fn dict_from_scalar<K: ArrowDictionaryKeyType>(
835 value: &ScalarValue,
836 size: usize,
837) -> Result<ArrayRef> {
838 let values_array = value.to_array_of_size(1)?;
840
841 let key_array: PrimitiveArray<K> = repeat_n(
843 if value.is_null() {
844 None
845 } else {
846 Some(K::default_value())
847 },
848 size,
849 )
850 .collect();
851
852 Ok(Arc::new(
858 DictionaryArray::<K>::try_new(key_array, values_array)?, ))
860}
861
862fn dict_from_values<K: ArrowDictionaryKeyType>(
864 values_array: ArrayRef,
865) -> Result<ArrayRef> {
866 let key_array: PrimitiveArray<K> = (0..values_array.len())
869 .map(|index| {
870 if values_array.is_valid(index) {
871 let native_index = K::Native::from_usize(index).ok_or_else(|| {
872 DataFusionError::Internal(format!(
873 "Can not create index of type {} from value {}",
874 K::DATA_TYPE,
875 index
876 ))
877 })?;
878 Ok(Some(native_index))
879 } else {
880 Ok(None)
881 }
882 })
883 .collect::<Result<Vec<_>>>()?
884 .into_iter()
885 .collect();
886
887 let dict_array = DictionaryArray::<K>::try_new(key_array, values_array)?;
893 Ok(Arc::new(dict_array))
894}
895
896macro_rules! typed_cast_tz {
897 ($array:expr, $index:expr, $ARRAYTYPE:ident, $SCALAR:ident, $TZ:expr) => {{
898 use std::any::type_name;
899 let array = $array
900 .as_any()
901 .downcast_ref::<$ARRAYTYPE>()
902 .ok_or_else(|| {
903 DataFusionError::Internal(format!(
904 "could not cast value to {}",
905 type_name::<$ARRAYTYPE>()
906 ))
907 })?;
908 Ok::<ScalarValue, DataFusionError>(ScalarValue::$SCALAR(
909 match array.is_null($index) {
910 true => None,
911 false => Some(array.value($index).into()),
912 },
913 $TZ.clone(),
914 ))
915 }};
916}
917
918macro_rules! typed_cast {
919 ($array:expr, $index:expr, $ARRAYTYPE:ident, $SCALAR:ident) => {{
920 use std::any::type_name;
921 let array = $array
922 .as_any()
923 .downcast_ref::<$ARRAYTYPE>()
924 .ok_or_else(|| {
925 DataFusionError::Internal(format!(
926 "could not cast value to {}",
927 type_name::<$ARRAYTYPE>()
928 ))
929 })?;
930 Ok::<ScalarValue, DataFusionError>(ScalarValue::$SCALAR(
931 match array.is_null($index) {
932 true => None,
933 false => Some(array.value($index).into()),
934 },
935 ))
936 }};
937}
938
939macro_rules! build_array_from_option {
940 ($DATA_TYPE:ident, $ARRAY_TYPE:ident, $EXPR:expr, $SIZE:expr) => {{
941 match $EXPR {
942 Some(value) => Arc::new($ARRAY_TYPE::from_value(*value, $SIZE)),
943 None => new_null_array(&DataType::$DATA_TYPE, $SIZE),
944 }
945 }};
946 ($DATA_TYPE:ident, $ENUM:expr, $ARRAY_TYPE:ident, $EXPR:expr, $SIZE:expr) => {{
947 match $EXPR {
948 Some(value) => Arc::new($ARRAY_TYPE::from_value(*value, $SIZE)),
949 None => new_null_array(&DataType::$DATA_TYPE($ENUM), $SIZE),
950 }
951 }};
952}
953
954macro_rules! build_timestamp_array_from_option {
955 ($TIME_UNIT:expr, $TZ:expr, $ARRAY_TYPE:ident, $EXPR:expr, $SIZE:expr) => {
956 match $EXPR {
957 Some(value) => {
958 Arc::new($ARRAY_TYPE::from_value(*value, $SIZE).with_timezone_opt($TZ))
959 }
960 None => new_null_array(&DataType::Timestamp($TIME_UNIT, $TZ), $SIZE),
961 }
962 };
963}
964
965macro_rules! eq_array_primitive {
966 ($array:expr, $index:expr, $ARRAYTYPE:ident, $VALUE:expr) => {{
967 use std::any::type_name;
968 let array = $array
969 .as_any()
970 .downcast_ref::<$ARRAYTYPE>()
971 .ok_or_else(|| {
972 DataFusionError::Internal(format!(
973 "could not cast value to {}",
974 type_name::<$ARRAYTYPE>()
975 ))
976 })?;
977 let is_valid = array.is_valid($index);
978 Ok::<bool, DataFusionError>(match $VALUE {
979 Some(val) => is_valid && &array.value($index) == val,
980 None => !is_valid,
981 })
982 }};
983}
984
985impl ScalarValue {
986 pub fn new_primitive<T: ArrowPrimitiveType>(
992 a: Option<T::Native>,
993 d: &DataType,
994 ) -> Result<Self> {
995 match a {
996 None => d.try_into(),
997 Some(v) => {
998 let array = PrimitiveArray::<T>::new(vec![v].into(), None)
999 .with_data_type(d.clone());
1000 Self::try_from_array(&array, 0)
1001 }
1002 }
1003 }
1004
1005 pub fn try_new_decimal128(value: i128, precision: u8, scale: i8) -> Result<Self> {
1007 if precision <= DECIMAL128_MAX_PRECISION && scale.unsigned_abs() <= precision {
1009 return Ok(ScalarValue::Decimal128(Some(value), precision, scale));
1010 }
1011 _internal_err!(
1012 "Can not new a decimal type ScalarValue for precision {precision} and scale {scale}"
1013 )
1014 }
1015
1016 pub fn try_new_null(data_type: &DataType) -> Result<Self> {
1028 Ok(match data_type {
1029 DataType::Boolean => ScalarValue::Boolean(None),
1030 DataType::Float16 => ScalarValue::Float16(None),
1031 DataType::Float64 => ScalarValue::Float64(None),
1032 DataType::Float32 => ScalarValue::Float32(None),
1033 DataType::Int8 => ScalarValue::Int8(None),
1034 DataType::Int16 => ScalarValue::Int16(None),
1035 DataType::Int32 => ScalarValue::Int32(None),
1036 DataType::Int64 => ScalarValue::Int64(None),
1037 DataType::UInt8 => ScalarValue::UInt8(None),
1038 DataType::UInt16 => ScalarValue::UInt16(None),
1039 DataType::UInt32 => ScalarValue::UInt32(None),
1040 DataType::UInt64 => ScalarValue::UInt64(None),
1041 DataType::Decimal128(precision, scale) => {
1042 ScalarValue::Decimal128(None, *precision, *scale)
1043 }
1044 DataType::Decimal256(precision, scale) => {
1045 ScalarValue::Decimal256(None, *precision, *scale)
1046 }
1047 DataType::Utf8 => ScalarValue::Utf8(None),
1048 DataType::LargeUtf8 => ScalarValue::LargeUtf8(None),
1049 DataType::Utf8View => ScalarValue::Utf8View(None),
1050 DataType::Binary => ScalarValue::Binary(None),
1051 DataType::BinaryView => ScalarValue::BinaryView(None),
1052 DataType::FixedSizeBinary(len) => ScalarValue::FixedSizeBinary(*len, None),
1053 DataType::LargeBinary => ScalarValue::LargeBinary(None),
1054 DataType::Date32 => ScalarValue::Date32(None),
1055 DataType::Date64 => ScalarValue::Date64(None),
1056 DataType::Time32(TimeUnit::Second) => ScalarValue::Time32Second(None),
1057 DataType::Time32(TimeUnit::Millisecond) => {
1058 ScalarValue::Time32Millisecond(None)
1059 }
1060 DataType::Time64(TimeUnit::Microsecond) => {
1061 ScalarValue::Time64Microsecond(None)
1062 }
1063 DataType::Time64(TimeUnit::Nanosecond) => ScalarValue::Time64Nanosecond(None),
1064 DataType::Timestamp(TimeUnit::Second, tz_opt) => {
1065 ScalarValue::TimestampSecond(None, tz_opt.clone())
1066 }
1067 DataType::Timestamp(TimeUnit::Millisecond, tz_opt) => {
1068 ScalarValue::TimestampMillisecond(None, tz_opt.clone())
1069 }
1070 DataType::Timestamp(TimeUnit::Microsecond, tz_opt) => {
1071 ScalarValue::TimestampMicrosecond(None, tz_opt.clone())
1072 }
1073 DataType::Timestamp(TimeUnit::Nanosecond, tz_opt) => {
1074 ScalarValue::TimestampNanosecond(None, tz_opt.clone())
1075 }
1076 DataType::Interval(IntervalUnit::YearMonth) => {
1077 ScalarValue::IntervalYearMonth(None)
1078 }
1079 DataType::Interval(IntervalUnit::DayTime) => {
1080 ScalarValue::IntervalDayTime(None)
1081 }
1082 DataType::Interval(IntervalUnit::MonthDayNano) => {
1083 ScalarValue::IntervalMonthDayNano(None)
1084 }
1085 DataType::Duration(TimeUnit::Second) => ScalarValue::DurationSecond(None),
1086 DataType::Duration(TimeUnit::Millisecond) => {
1087 ScalarValue::DurationMillisecond(None)
1088 }
1089 DataType::Duration(TimeUnit::Microsecond) => {
1090 ScalarValue::DurationMicrosecond(None)
1091 }
1092 DataType::Duration(TimeUnit::Nanosecond) => {
1093 ScalarValue::DurationNanosecond(None)
1094 }
1095 DataType::Dictionary(index_type, value_type) => ScalarValue::Dictionary(
1096 index_type.clone(),
1097 Box::new(value_type.as_ref().try_into()?),
1098 ),
1099 DataType::List(field_ref) => ScalarValue::List(Arc::new(
1101 GenericListArray::new_null(Arc::clone(field_ref), 1),
1102 )),
1103 DataType::LargeList(field_ref) => ScalarValue::LargeList(Arc::new(
1105 GenericListArray::new_null(Arc::clone(field_ref), 1),
1106 )),
1107 DataType::FixedSizeList(field_ref, fixed_length) => {
1109 ScalarValue::FixedSizeList(Arc::new(FixedSizeListArray::new_null(
1110 Arc::clone(field_ref),
1111 *fixed_length,
1112 1,
1113 )))
1114 }
1115 DataType::Struct(fields) => ScalarValue::Struct(
1116 new_null_array(&DataType::Struct(fields.to_owned()), 1)
1117 .as_struct()
1118 .to_owned()
1119 .into(),
1120 ),
1121 DataType::Map(fields, sorted) => ScalarValue::Map(
1122 new_null_array(&DataType::Map(fields.to_owned(), sorted.to_owned()), 1)
1123 .as_map()
1124 .to_owned()
1125 .into(),
1126 ),
1127 DataType::Union(fields, mode) => {
1128 ScalarValue::Union(None, fields.clone(), *mode)
1129 }
1130 DataType::Null => ScalarValue::Null,
1131 _ => {
1132 return _not_impl_err!(
1133 "Can't create a null scalar from data_type \"{data_type:?}\""
1134 );
1135 }
1136 })
1137 }
1138
1139 pub fn new_utf8(val: impl Into<String>) -> Self {
1141 ScalarValue::from(val.into())
1142 }
1143
1144 pub fn new_utf8view(val: impl Into<String>) -> Self {
1146 ScalarValue::Utf8View(Some(val.into()))
1147 }
1148
1149 pub fn new_interval_ym(years: i32, months: i32) -> Self {
1152 let val = IntervalYearMonthType::make_value(years, months);
1153 ScalarValue::IntervalYearMonth(Some(val))
1154 }
1155
1156 pub fn new_interval_dt(days: i32, millis: i32) -> Self {
1159 let val = IntervalDayTimeType::make_value(days, millis);
1160 Self::IntervalDayTime(Some(val))
1161 }
1162
1163 pub fn new_interval_mdn(months: i32, days: i32, nanos: i64) -> Self {
1166 let val = IntervalMonthDayNanoType::make_value(months, days, nanos);
1167 ScalarValue::IntervalMonthDayNano(Some(val))
1168 }
1169
1170 pub fn new_timestamp<T: ArrowTimestampType>(
1173 value: Option<i64>,
1174 tz_opt: Option<Arc<str>>,
1175 ) -> Self {
1176 match T::UNIT {
1177 TimeUnit::Second => ScalarValue::TimestampSecond(value, tz_opt),
1178 TimeUnit::Millisecond => ScalarValue::TimestampMillisecond(value, tz_opt),
1179 TimeUnit::Microsecond => ScalarValue::TimestampMicrosecond(value, tz_opt),
1180 TimeUnit::Nanosecond => ScalarValue::TimestampNanosecond(value, tz_opt),
1181 }
1182 }
1183
1184 pub fn new_pi(datatype: &DataType) -> Result<ScalarValue> {
1186 match datatype {
1187 DataType::Float32 => Ok(ScalarValue::from(std::f32::consts::PI)),
1188 DataType::Float64 => Ok(ScalarValue::from(std::f64::consts::PI)),
1189 _ => _internal_err!("PI is not supported for data type: {:?}", datatype),
1190 }
1191 }
1192
1193 pub fn new_pi_upper(datatype: &DataType) -> Result<ScalarValue> {
1195 match datatype {
1198 DataType::Float32 => Ok(ScalarValue::from(consts::PI_UPPER_F32)),
1199 DataType::Float64 => Ok(ScalarValue::from(consts::PI_UPPER_F64)),
1200 _ => {
1201 _internal_err!("PI_UPPER is not supported for data type: {:?}", datatype)
1202 }
1203 }
1204 }
1205
1206 pub fn new_negative_pi_lower(datatype: &DataType) -> Result<ScalarValue> {
1208 match datatype {
1209 DataType::Float32 => Ok(ScalarValue::from(consts::NEGATIVE_PI_LOWER_F32)),
1210 DataType::Float64 => Ok(ScalarValue::from(consts::NEGATIVE_PI_LOWER_F64)),
1211 _ => {
1212 _internal_err!("-PI_LOWER is not supported for data type: {:?}", datatype)
1213 }
1214 }
1215 }
1216
1217 pub fn new_frac_pi_2_upper(datatype: &DataType) -> Result<ScalarValue> {
1219 match datatype {
1220 DataType::Float32 => Ok(ScalarValue::from(consts::FRAC_PI_2_UPPER_F32)),
1221 DataType::Float64 => Ok(ScalarValue::from(consts::FRAC_PI_2_UPPER_F64)),
1222 _ => {
1223 _internal_err!(
1224 "PI_UPPER/2 is not supported for data type: {:?}",
1225 datatype
1226 )
1227 }
1228 }
1229 }
1230
1231 pub fn new_neg_frac_pi_2_lower(datatype: &DataType) -> Result<ScalarValue> {
1233 match datatype {
1234 DataType::Float32 => {
1235 Ok(ScalarValue::from(consts::NEGATIVE_FRAC_PI_2_LOWER_F32))
1236 }
1237 DataType::Float64 => {
1238 Ok(ScalarValue::from(consts::NEGATIVE_FRAC_PI_2_LOWER_F64))
1239 }
1240 _ => {
1241 _internal_err!(
1242 "-PI/2_LOWER is not supported for data type: {:?}",
1243 datatype
1244 )
1245 }
1246 }
1247 }
1248
1249 pub fn new_negative_pi(datatype: &DataType) -> Result<ScalarValue> {
1251 match datatype {
1252 DataType::Float32 => Ok(ScalarValue::from(-std::f32::consts::PI)),
1253 DataType::Float64 => Ok(ScalarValue::from(-std::f64::consts::PI)),
1254 _ => _internal_err!("-PI is not supported for data type: {:?}", datatype),
1255 }
1256 }
1257
1258 pub fn new_frac_pi_2(datatype: &DataType) -> Result<ScalarValue> {
1260 match datatype {
1261 DataType::Float32 => Ok(ScalarValue::from(std::f32::consts::FRAC_PI_2)),
1262 DataType::Float64 => Ok(ScalarValue::from(std::f64::consts::FRAC_PI_2)),
1263 _ => _internal_err!("PI/2 is not supported for data type: {:?}", datatype),
1264 }
1265 }
1266
1267 pub fn new_neg_frac_pi_2(datatype: &DataType) -> Result<ScalarValue> {
1269 match datatype {
1270 DataType::Float32 => Ok(ScalarValue::from(-std::f32::consts::FRAC_PI_2)),
1271 DataType::Float64 => Ok(ScalarValue::from(-std::f64::consts::FRAC_PI_2)),
1272 _ => _internal_err!("-PI/2 is not supported for data type: {:?}", datatype),
1273 }
1274 }
1275
1276 pub fn new_infinity(datatype: &DataType) -> Result<ScalarValue> {
1278 match datatype {
1279 DataType::Float32 => Ok(ScalarValue::from(f32::INFINITY)),
1280 DataType::Float64 => Ok(ScalarValue::from(f64::INFINITY)),
1281 _ => {
1282 _internal_err!("Infinity is not supported for data type: {:?}", datatype)
1283 }
1284 }
1285 }
1286
1287 pub fn new_neg_infinity(datatype: &DataType) -> Result<ScalarValue> {
1289 match datatype {
1290 DataType::Float32 => Ok(ScalarValue::from(f32::NEG_INFINITY)),
1291 DataType::Float64 => Ok(ScalarValue::from(f64::NEG_INFINITY)),
1292 _ => {
1293 _internal_err!(
1294 "Negative Infinity is not supported for data type: {:?}",
1295 datatype
1296 )
1297 }
1298 }
1299 }
1300
1301 pub fn new_zero(datatype: &DataType) -> Result<ScalarValue> {
1303 Ok(match datatype {
1304 DataType::Boolean => ScalarValue::Boolean(Some(false)),
1305 DataType::Int8 => ScalarValue::Int8(Some(0)),
1306 DataType::Int16 => ScalarValue::Int16(Some(0)),
1307 DataType::Int32 => ScalarValue::Int32(Some(0)),
1308 DataType::Int64 => ScalarValue::Int64(Some(0)),
1309 DataType::UInt8 => ScalarValue::UInt8(Some(0)),
1310 DataType::UInt16 => ScalarValue::UInt16(Some(0)),
1311 DataType::UInt32 => ScalarValue::UInt32(Some(0)),
1312 DataType::UInt64 => ScalarValue::UInt64(Some(0)),
1313 DataType::Float16 => ScalarValue::Float16(Some(f16::from_f32(0.0))),
1314 DataType::Float32 => ScalarValue::Float32(Some(0.0)),
1315 DataType::Float64 => ScalarValue::Float64(Some(0.0)),
1316 DataType::Decimal128(precision, scale) => {
1317 ScalarValue::Decimal128(Some(0), *precision, *scale)
1318 }
1319 DataType::Decimal256(precision, scale) => {
1320 ScalarValue::Decimal256(Some(i256::ZERO), *precision, *scale)
1321 }
1322 DataType::Timestamp(TimeUnit::Second, tz) => {
1323 ScalarValue::TimestampSecond(Some(0), tz.clone())
1324 }
1325 DataType::Timestamp(TimeUnit::Millisecond, tz) => {
1326 ScalarValue::TimestampMillisecond(Some(0), tz.clone())
1327 }
1328 DataType::Timestamp(TimeUnit::Microsecond, tz) => {
1329 ScalarValue::TimestampMicrosecond(Some(0), tz.clone())
1330 }
1331 DataType::Timestamp(TimeUnit::Nanosecond, tz) => {
1332 ScalarValue::TimestampNanosecond(Some(0), tz.clone())
1333 }
1334 DataType::Time32(TimeUnit::Second) => ScalarValue::Time32Second(Some(0)),
1335 DataType::Time32(TimeUnit::Millisecond) => {
1336 ScalarValue::Time32Millisecond(Some(0))
1337 }
1338 DataType::Time64(TimeUnit::Microsecond) => {
1339 ScalarValue::Time64Microsecond(Some(0))
1340 }
1341 DataType::Time64(TimeUnit::Nanosecond) => {
1342 ScalarValue::Time64Nanosecond(Some(0))
1343 }
1344 DataType::Interval(IntervalUnit::YearMonth) => {
1345 ScalarValue::IntervalYearMonth(Some(0))
1346 }
1347 DataType::Interval(IntervalUnit::DayTime) => {
1348 ScalarValue::IntervalDayTime(Some(IntervalDayTime::ZERO))
1349 }
1350 DataType::Interval(IntervalUnit::MonthDayNano) => {
1351 ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano::ZERO))
1352 }
1353 DataType::Duration(TimeUnit::Second) => ScalarValue::DurationSecond(Some(0)),
1354 DataType::Duration(TimeUnit::Millisecond) => {
1355 ScalarValue::DurationMillisecond(Some(0))
1356 }
1357 DataType::Duration(TimeUnit::Microsecond) => {
1358 ScalarValue::DurationMicrosecond(Some(0))
1359 }
1360 DataType::Duration(TimeUnit::Nanosecond) => {
1361 ScalarValue::DurationNanosecond(Some(0))
1362 }
1363 DataType::Date32 => ScalarValue::Date32(Some(0)),
1364 DataType::Date64 => ScalarValue::Date64(Some(0)),
1365 _ => {
1366 return _not_impl_err!(
1367 "Can't create a zero scalar from data_type \"{datatype:?}\""
1368 );
1369 }
1370 })
1371 }
1372
1373 pub fn new_one(datatype: &DataType) -> Result<ScalarValue> {
1375 Ok(match datatype {
1376 DataType::Int8 => ScalarValue::Int8(Some(1)),
1377 DataType::Int16 => ScalarValue::Int16(Some(1)),
1378 DataType::Int32 => ScalarValue::Int32(Some(1)),
1379 DataType::Int64 => ScalarValue::Int64(Some(1)),
1380 DataType::UInt8 => ScalarValue::UInt8(Some(1)),
1381 DataType::UInt16 => ScalarValue::UInt16(Some(1)),
1382 DataType::UInt32 => ScalarValue::UInt32(Some(1)),
1383 DataType::UInt64 => ScalarValue::UInt64(Some(1)),
1384 DataType::Float16 => ScalarValue::Float16(Some(f16::from_f32(1.0))),
1385 DataType::Float32 => ScalarValue::Float32(Some(1.0)),
1386 DataType::Float64 => ScalarValue::Float64(Some(1.0)),
1387 _ => {
1388 return _not_impl_err!(
1389 "Can't create an one scalar from data_type \"{datatype:?}\""
1390 );
1391 }
1392 })
1393 }
1394
1395 pub fn new_negative_one(datatype: &DataType) -> Result<ScalarValue> {
1397 Ok(match datatype {
1398 DataType::Int8 | DataType::UInt8 => ScalarValue::Int8(Some(-1)),
1399 DataType::Int16 | DataType::UInt16 => ScalarValue::Int16(Some(-1)),
1400 DataType::Int32 | DataType::UInt32 => ScalarValue::Int32(Some(-1)),
1401 DataType::Int64 | DataType::UInt64 => ScalarValue::Int64(Some(-1)),
1402 DataType::Float16 => ScalarValue::Float16(Some(f16::from_f32(-1.0))),
1403 DataType::Float32 => ScalarValue::Float32(Some(-1.0)),
1404 DataType::Float64 => ScalarValue::Float64(Some(-1.0)),
1405 _ => {
1406 return _not_impl_err!(
1407 "Can't create a negative one scalar from data_type \"{datatype:?}\""
1408 );
1409 }
1410 })
1411 }
1412
1413 pub fn new_ten(datatype: &DataType) -> Result<ScalarValue> {
1414 Ok(match datatype {
1415 DataType::Int8 => ScalarValue::Int8(Some(10)),
1416 DataType::Int16 => ScalarValue::Int16(Some(10)),
1417 DataType::Int32 => ScalarValue::Int32(Some(10)),
1418 DataType::Int64 => ScalarValue::Int64(Some(10)),
1419 DataType::UInt8 => ScalarValue::UInt8(Some(10)),
1420 DataType::UInt16 => ScalarValue::UInt16(Some(10)),
1421 DataType::UInt32 => ScalarValue::UInt32(Some(10)),
1422 DataType::UInt64 => ScalarValue::UInt64(Some(10)),
1423 DataType::Float16 => ScalarValue::Float16(Some(f16::from_f32(10.0))),
1424 DataType::Float32 => ScalarValue::Float32(Some(10.0)),
1425 DataType::Float64 => ScalarValue::Float64(Some(10.0)),
1426 _ => {
1427 return _not_impl_err!(
1428 "Can't create a ten scalar from data_type \"{datatype:?}\""
1429 );
1430 }
1431 })
1432 }
1433
1434 pub fn data_type(&self) -> DataType {
1436 match self {
1437 ScalarValue::Boolean(_) => DataType::Boolean,
1438 ScalarValue::UInt8(_) => DataType::UInt8,
1439 ScalarValue::UInt16(_) => DataType::UInt16,
1440 ScalarValue::UInt32(_) => DataType::UInt32,
1441 ScalarValue::UInt64(_) => DataType::UInt64,
1442 ScalarValue::Int8(_) => DataType::Int8,
1443 ScalarValue::Int16(_) => DataType::Int16,
1444 ScalarValue::Int32(_) => DataType::Int32,
1445 ScalarValue::Int64(_) => DataType::Int64,
1446 ScalarValue::Decimal128(_, precision, scale) => {
1447 DataType::Decimal128(*precision, *scale)
1448 }
1449 ScalarValue::Decimal256(_, precision, scale) => {
1450 DataType::Decimal256(*precision, *scale)
1451 }
1452 ScalarValue::TimestampSecond(_, tz_opt) => {
1453 DataType::Timestamp(TimeUnit::Second, tz_opt.clone())
1454 }
1455 ScalarValue::TimestampMillisecond(_, tz_opt) => {
1456 DataType::Timestamp(TimeUnit::Millisecond, tz_opt.clone())
1457 }
1458 ScalarValue::TimestampMicrosecond(_, tz_opt) => {
1459 DataType::Timestamp(TimeUnit::Microsecond, tz_opt.clone())
1460 }
1461 ScalarValue::TimestampNanosecond(_, tz_opt) => {
1462 DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone())
1463 }
1464 ScalarValue::Float16(_) => DataType::Float16,
1465 ScalarValue::Float32(_) => DataType::Float32,
1466 ScalarValue::Float64(_) => DataType::Float64,
1467 ScalarValue::Utf8(_) => DataType::Utf8,
1468 ScalarValue::LargeUtf8(_) => DataType::LargeUtf8,
1469 ScalarValue::Utf8View(_) => DataType::Utf8View,
1470 ScalarValue::Binary(_) => DataType::Binary,
1471 ScalarValue::BinaryView(_) => DataType::BinaryView,
1472 ScalarValue::FixedSizeBinary(sz, _) => DataType::FixedSizeBinary(*sz),
1473 ScalarValue::LargeBinary(_) => DataType::LargeBinary,
1474 ScalarValue::List(arr) => arr.data_type().to_owned(),
1475 ScalarValue::LargeList(arr) => arr.data_type().to_owned(),
1476 ScalarValue::FixedSizeList(arr) => arr.data_type().to_owned(),
1477 ScalarValue::Struct(arr) => arr.data_type().to_owned(),
1478 ScalarValue::Map(arr) => arr.data_type().to_owned(),
1479 ScalarValue::Date32(_) => DataType::Date32,
1480 ScalarValue::Date64(_) => DataType::Date64,
1481 ScalarValue::Time32Second(_) => DataType::Time32(TimeUnit::Second),
1482 ScalarValue::Time32Millisecond(_) => DataType::Time32(TimeUnit::Millisecond),
1483 ScalarValue::Time64Microsecond(_) => DataType::Time64(TimeUnit::Microsecond),
1484 ScalarValue::Time64Nanosecond(_) => DataType::Time64(TimeUnit::Nanosecond),
1485 ScalarValue::IntervalYearMonth(_) => {
1486 DataType::Interval(IntervalUnit::YearMonth)
1487 }
1488 ScalarValue::IntervalDayTime(_) => DataType::Interval(IntervalUnit::DayTime),
1489 ScalarValue::IntervalMonthDayNano(_) => {
1490 DataType::Interval(IntervalUnit::MonthDayNano)
1491 }
1492 ScalarValue::DurationSecond(_) => DataType::Duration(TimeUnit::Second),
1493 ScalarValue::DurationMillisecond(_) => {
1494 DataType::Duration(TimeUnit::Millisecond)
1495 }
1496 ScalarValue::DurationMicrosecond(_) => {
1497 DataType::Duration(TimeUnit::Microsecond)
1498 }
1499 ScalarValue::DurationNanosecond(_) => {
1500 DataType::Duration(TimeUnit::Nanosecond)
1501 }
1502 ScalarValue::Union(_, fields, mode) => DataType::Union(fields.clone(), *mode),
1503 ScalarValue::Dictionary(k, v) => {
1504 DataType::Dictionary(k.clone(), Box::new(v.data_type()))
1505 }
1506 ScalarValue::Null => DataType::Null,
1507 }
1508 }
1509
1510 pub fn arithmetic_negate(&self) -> Result<Self> {
1512 fn neg_checked_with_ctx<T: ArrowNativeTypeOp>(
1513 v: T,
1514 ctx: impl Fn() -> String,
1515 ) -> Result<T> {
1516 v.neg_checked()
1517 .map_err(|e| arrow_datafusion_err!(e).context(ctx()))
1518 }
1519 match self {
1520 ScalarValue::Int8(None)
1521 | ScalarValue::Int16(None)
1522 | ScalarValue::Int32(None)
1523 | ScalarValue::Int64(None)
1524 | ScalarValue::Float16(None)
1525 | ScalarValue::Float32(None)
1526 | ScalarValue::Float64(None) => Ok(self.clone()),
1527 ScalarValue::Float16(Some(v)) => {
1528 Ok(ScalarValue::Float16(Some(f16::from_f32(-v.to_f32()))))
1529 }
1530 ScalarValue::Float64(Some(v)) => Ok(ScalarValue::Float64(Some(-v))),
1531 ScalarValue::Float32(Some(v)) => Ok(ScalarValue::Float32(Some(-v))),
1532 ScalarValue::Int8(Some(v)) => Ok(ScalarValue::Int8(Some(v.neg_checked()?))),
1533 ScalarValue::Int16(Some(v)) => Ok(ScalarValue::Int16(Some(v.neg_checked()?))),
1534 ScalarValue::Int32(Some(v)) => Ok(ScalarValue::Int32(Some(v.neg_checked()?))),
1535 ScalarValue::Int64(Some(v)) => Ok(ScalarValue::Int64(Some(v.neg_checked()?))),
1536 ScalarValue::IntervalYearMonth(Some(v)) => Ok(
1537 ScalarValue::IntervalYearMonth(Some(neg_checked_with_ctx(*v, || {
1538 format!("In negation of IntervalYearMonth({v})")
1539 })?)),
1540 ),
1541 ScalarValue::IntervalDayTime(Some(v)) => {
1542 let (days, ms) = IntervalDayTimeType::to_parts(*v);
1543 let val = IntervalDayTimeType::make_value(
1544 neg_checked_with_ctx(days, || {
1545 format!("In negation of days {days} in IntervalDayTime")
1546 })?,
1547 neg_checked_with_ctx(ms, || {
1548 format!("In negation of milliseconds {ms} in IntervalDayTime")
1549 })?,
1550 );
1551 Ok(ScalarValue::IntervalDayTime(Some(val)))
1552 }
1553 ScalarValue::IntervalMonthDayNano(Some(v)) => {
1554 let (months, days, nanos) = IntervalMonthDayNanoType::to_parts(*v);
1555 let val = IntervalMonthDayNanoType::make_value(
1556 neg_checked_with_ctx(months, || {
1557 format!("In negation of months {months} of IntervalMonthDayNano")
1558 })?,
1559 neg_checked_with_ctx(days, || {
1560 format!("In negation of days {days} of IntervalMonthDayNano")
1561 })?,
1562 neg_checked_with_ctx(nanos, || {
1563 format!("In negation of nanos {nanos} of IntervalMonthDayNano")
1564 })?,
1565 );
1566 Ok(ScalarValue::IntervalMonthDayNano(Some(val)))
1567 }
1568 ScalarValue::Decimal128(Some(v), precision, scale) => {
1569 Ok(ScalarValue::Decimal128(
1570 Some(neg_checked_with_ctx(*v, || {
1571 format!("In negation of Decimal128({v}, {precision}, {scale})")
1572 })?),
1573 *precision,
1574 *scale,
1575 ))
1576 }
1577 ScalarValue::Decimal256(Some(v), precision, scale) => {
1578 Ok(ScalarValue::Decimal256(
1579 Some(neg_checked_with_ctx(*v, || {
1580 format!("In negation of Decimal256({v}, {precision}, {scale})")
1581 })?),
1582 *precision,
1583 *scale,
1584 ))
1585 }
1586 ScalarValue::TimestampSecond(Some(v), tz) => {
1587 Ok(ScalarValue::TimestampSecond(
1588 Some(neg_checked_with_ctx(*v, || {
1589 format!("In negation of TimestampSecond({v})")
1590 })?),
1591 tz.clone(),
1592 ))
1593 }
1594 ScalarValue::TimestampNanosecond(Some(v), tz) => {
1595 Ok(ScalarValue::TimestampNanosecond(
1596 Some(neg_checked_with_ctx(*v, || {
1597 format!("In negation of TimestampNanoSecond({v})")
1598 })?),
1599 tz.clone(),
1600 ))
1601 }
1602 ScalarValue::TimestampMicrosecond(Some(v), tz) => {
1603 Ok(ScalarValue::TimestampMicrosecond(
1604 Some(neg_checked_with_ctx(*v, || {
1605 format!("In negation of TimestampMicroSecond({v})")
1606 })?),
1607 tz.clone(),
1608 ))
1609 }
1610 ScalarValue::TimestampMillisecond(Some(v), tz) => {
1611 Ok(ScalarValue::TimestampMillisecond(
1612 Some(neg_checked_with_ctx(*v, || {
1613 format!("In negation of TimestampMilliSecond({v})")
1614 })?),
1615 tz.clone(),
1616 ))
1617 }
1618 value => _internal_err!(
1619 "Can not run arithmetic negative on scalar value {value:?}"
1620 ),
1621 }
1622 }
1623
1624 pub fn add<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
1629 let r = add_wrapping(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
1630 Self::try_from_array(r.as_ref(), 0)
1631 }
1632 pub fn add_checked<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
1637 let r = add(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
1638 Self::try_from_array(r.as_ref(), 0)
1639 }
1640
1641 pub fn sub<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
1646 let r = sub_wrapping(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
1647 Self::try_from_array(r.as_ref(), 0)
1648 }
1649
1650 pub fn sub_checked<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
1655 let r = sub(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
1656 Self::try_from_array(r.as_ref(), 0)
1657 }
1658
1659 pub fn mul<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
1664 let r = mul_wrapping(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
1665 Self::try_from_array(r.as_ref(), 0)
1666 }
1667
1668 pub fn mul_checked<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
1673 let r = mul(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
1674 Self::try_from_array(r.as_ref(), 0)
1675 }
1676
1677 pub fn div<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
1685 let r = div(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
1686 Self::try_from_array(r.as_ref(), 0)
1687 }
1688
1689 pub fn rem<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
1697 let r = rem(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
1698 Self::try_from_array(r.as_ref(), 0)
1699 }
1700
1701 pub fn is_unsigned(&self) -> bool {
1702 matches!(
1703 self,
1704 ScalarValue::UInt8(_)
1705 | ScalarValue::UInt16(_)
1706 | ScalarValue::UInt32(_)
1707 | ScalarValue::UInt64(_)
1708 )
1709 }
1710
1711 pub fn is_null(&self) -> bool {
1713 match self {
1714 ScalarValue::Boolean(v) => v.is_none(),
1715 ScalarValue::Null => true,
1716 ScalarValue::Float16(v) => v.is_none(),
1717 ScalarValue::Float32(v) => v.is_none(),
1718 ScalarValue::Float64(v) => v.is_none(),
1719 ScalarValue::Decimal128(v, _, _) => v.is_none(),
1720 ScalarValue::Decimal256(v, _, _) => v.is_none(),
1721 ScalarValue::Int8(v) => v.is_none(),
1722 ScalarValue::Int16(v) => v.is_none(),
1723 ScalarValue::Int32(v) => v.is_none(),
1724 ScalarValue::Int64(v) => v.is_none(),
1725 ScalarValue::UInt8(v) => v.is_none(),
1726 ScalarValue::UInt16(v) => v.is_none(),
1727 ScalarValue::UInt32(v) => v.is_none(),
1728 ScalarValue::UInt64(v) => v.is_none(),
1729 ScalarValue::Utf8(v)
1730 | ScalarValue::Utf8View(v)
1731 | ScalarValue::LargeUtf8(v) => v.is_none(),
1732 ScalarValue::Binary(v)
1733 | ScalarValue::BinaryView(v)
1734 | ScalarValue::FixedSizeBinary(_, v)
1735 | ScalarValue::LargeBinary(v) => v.is_none(),
1736 ScalarValue::List(arr) => arr.len() == arr.null_count(),
1739 ScalarValue::LargeList(arr) => arr.len() == arr.null_count(),
1740 ScalarValue::FixedSizeList(arr) => arr.len() == arr.null_count(),
1741 ScalarValue::Struct(arr) => arr.len() == arr.null_count(),
1742 ScalarValue::Map(arr) => arr.len() == arr.null_count(),
1743 ScalarValue::Date32(v) => v.is_none(),
1744 ScalarValue::Date64(v) => v.is_none(),
1745 ScalarValue::Time32Second(v) => v.is_none(),
1746 ScalarValue::Time32Millisecond(v) => v.is_none(),
1747 ScalarValue::Time64Microsecond(v) => v.is_none(),
1748 ScalarValue::Time64Nanosecond(v) => v.is_none(),
1749 ScalarValue::TimestampSecond(v, _) => v.is_none(),
1750 ScalarValue::TimestampMillisecond(v, _) => v.is_none(),
1751 ScalarValue::TimestampMicrosecond(v, _) => v.is_none(),
1752 ScalarValue::TimestampNanosecond(v, _) => v.is_none(),
1753 ScalarValue::IntervalYearMonth(v) => v.is_none(),
1754 ScalarValue::IntervalDayTime(v) => v.is_none(),
1755 ScalarValue::IntervalMonthDayNano(v) => v.is_none(),
1756 ScalarValue::DurationSecond(v) => v.is_none(),
1757 ScalarValue::DurationMillisecond(v) => v.is_none(),
1758 ScalarValue::DurationMicrosecond(v) => v.is_none(),
1759 ScalarValue::DurationNanosecond(v) => v.is_none(),
1760 ScalarValue::Union(v, _, _) => match v {
1761 Some((_, s)) => s.is_null(),
1762 None => true,
1763 },
1764 ScalarValue::Dictionary(_, v) => v.is_null(),
1765 }
1766 }
1767
1768 pub fn distance(&self, other: &ScalarValue) -> Option<usize> {
1776 match (self, other) {
1777 (Self::Int8(Some(l)), Self::Int8(Some(r))) => Some(l.abs_diff(*r) as _),
1778 (Self::Int16(Some(l)), Self::Int16(Some(r))) => Some(l.abs_diff(*r) as _),
1779 (Self::Int32(Some(l)), Self::Int32(Some(r))) => Some(l.abs_diff(*r) as _),
1780 (Self::Int64(Some(l)), Self::Int64(Some(r))) => Some(l.abs_diff(*r) as _),
1781 (Self::UInt8(Some(l)), Self::UInt8(Some(r))) => Some(l.abs_diff(*r) as _),
1782 (Self::UInt16(Some(l)), Self::UInt16(Some(r))) => Some(l.abs_diff(*r) as _),
1783 (Self::UInt32(Some(l)), Self::UInt32(Some(r))) => Some(l.abs_diff(*r) as _),
1784 (Self::UInt64(Some(l)), Self::UInt64(Some(r))) => Some(l.abs_diff(*r) as _),
1785 (Self::Float16(Some(l)), Self::Float16(Some(r))) => {
1787 Some((f16::to_f32(*l) - f16::to_f32(*r)).abs().round() as _)
1788 }
1789 (Self::Float32(Some(l)), Self::Float32(Some(r))) => {
1790 Some((l - r).abs().round() as _)
1791 }
1792 (Self::Float64(Some(l)), Self::Float64(Some(r))) => {
1793 Some((l - r).abs().round() as _)
1794 }
1795 _ => None,
1796 }
1797 }
1798
1799 pub fn to_array(&self) -> Result<ArrayRef> {
1805 self.to_array_of_size(1)
1806 }
1807
1808 pub fn to_scalar(&self) -> Result<Scalar<ArrayRef>> {
1841 Ok(Scalar::new(self.to_array_of_size(1)?))
1842 }
1843
1844 pub fn iter_to_array(
1881 scalars: impl IntoIterator<Item = ScalarValue>,
1882 ) -> Result<ArrayRef> {
1883 let mut scalars = scalars.into_iter().peekable();
1884
1885 let data_type = match scalars.peek() {
1887 None => {
1888 return _exec_err!("Empty iterator passed to ScalarValue::iter_to_array");
1889 }
1890 Some(sv) => sv.data_type(),
1891 };
1892
1893 macro_rules! build_array_primitive {
1896 ($ARRAY_TY:ident, $SCALAR_TY:ident) => {{
1897 {
1898 let array = scalars.map(|sv| {
1899 if let ScalarValue::$SCALAR_TY(v) = sv {
1900 Ok(v)
1901 } else {
1902 _exec_err!(
1903 "Inconsistent types in ScalarValue::iter_to_array. \
1904 Expected {:?}, got {:?}",
1905 data_type, sv
1906 )
1907 }
1908 })
1909 .collect::<Result<$ARRAY_TY>>()?;
1910 Arc::new(array)
1911 }
1912 }};
1913 }
1914
1915 macro_rules! build_array_primitive_tz {
1916 ($ARRAY_TY:ident, $SCALAR_TY:ident, $TZ:expr) => {{
1917 {
1918 let array = scalars.map(|sv| {
1919 if let ScalarValue::$SCALAR_TY(v, _) = sv {
1920 Ok(v)
1921 } else {
1922 _exec_err!(
1923 "Inconsistent types in ScalarValue::iter_to_array. \
1924 Expected {:?}, got {:?}",
1925 data_type, sv
1926 )
1927 }
1928 })
1929 .collect::<Result<$ARRAY_TY>>()?;
1930 Arc::new(array.with_timezone_opt($TZ.clone()))
1931 }
1932 }};
1933 }
1934
1935 macro_rules! build_array_string {
1938 ($ARRAY_TY:ident, $SCALAR_TY:ident) => {{
1939 {
1940 let array = scalars.map(|sv| {
1941 if let ScalarValue::$SCALAR_TY(v) = sv {
1942 Ok(v)
1943 } else {
1944 _exec_err!(
1945 "Inconsistent types in ScalarValue::iter_to_array. \
1946 Expected {:?}, got {:?}",
1947 data_type, sv
1948 )
1949 }
1950 })
1951 .collect::<Result<$ARRAY_TY>>()?;
1952 Arc::new(array)
1953 }
1954 }};
1955 }
1956
1957 let array: ArrayRef = match &data_type {
1958 DataType::Decimal128(precision, scale) => {
1959 let decimal_array =
1960 ScalarValue::iter_to_decimal_array(scalars, *precision, *scale)?;
1961 Arc::new(decimal_array)
1962 }
1963 DataType::Decimal256(precision, scale) => {
1964 let decimal_array =
1965 ScalarValue::iter_to_decimal256_array(scalars, *precision, *scale)?;
1966 Arc::new(decimal_array)
1967 }
1968 DataType::Null => ScalarValue::iter_to_null_array(scalars)?,
1969 DataType::Boolean => build_array_primitive!(BooleanArray, Boolean),
1970 DataType::Float16 => build_array_primitive!(Float16Array, Float16),
1971 DataType::Float32 => build_array_primitive!(Float32Array, Float32),
1972 DataType::Float64 => build_array_primitive!(Float64Array, Float64),
1973 DataType::Int8 => build_array_primitive!(Int8Array, Int8),
1974 DataType::Int16 => build_array_primitive!(Int16Array, Int16),
1975 DataType::Int32 => build_array_primitive!(Int32Array, Int32),
1976 DataType::Int64 => build_array_primitive!(Int64Array, Int64),
1977 DataType::UInt8 => build_array_primitive!(UInt8Array, UInt8),
1978 DataType::UInt16 => build_array_primitive!(UInt16Array, UInt16),
1979 DataType::UInt32 => build_array_primitive!(UInt32Array, UInt32),
1980 DataType::UInt64 => build_array_primitive!(UInt64Array, UInt64),
1981 DataType::Utf8View => build_array_string!(StringViewArray, Utf8View),
1982 DataType::Utf8 => build_array_string!(StringArray, Utf8),
1983 DataType::LargeUtf8 => build_array_string!(LargeStringArray, LargeUtf8),
1984 DataType::BinaryView => build_array_string!(BinaryViewArray, BinaryView),
1985 DataType::Binary => build_array_string!(BinaryArray, Binary),
1986 DataType::LargeBinary => build_array_string!(LargeBinaryArray, LargeBinary),
1987 DataType::Date32 => build_array_primitive!(Date32Array, Date32),
1988 DataType::Date64 => build_array_primitive!(Date64Array, Date64),
1989 DataType::Time32(TimeUnit::Second) => {
1990 build_array_primitive!(Time32SecondArray, Time32Second)
1991 }
1992 DataType::Time32(TimeUnit::Millisecond) => {
1993 build_array_primitive!(Time32MillisecondArray, Time32Millisecond)
1994 }
1995 DataType::Time64(TimeUnit::Microsecond) => {
1996 build_array_primitive!(Time64MicrosecondArray, Time64Microsecond)
1997 }
1998 DataType::Time64(TimeUnit::Nanosecond) => {
1999 build_array_primitive!(Time64NanosecondArray, Time64Nanosecond)
2000 }
2001 DataType::Timestamp(TimeUnit::Second, tz) => {
2002 build_array_primitive_tz!(TimestampSecondArray, TimestampSecond, tz)
2003 }
2004 DataType::Timestamp(TimeUnit::Millisecond, tz) => {
2005 build_array_primitive_tz!(
2006 TimestampMillisecondArray,
2007 TimestampMillisecond,
2008 tz
2009 )
2010 }
2011 DataType::Timestamp(TimeUnit::Microsecond, tz) => {
2012 build_array_primitive_tz!(
2013 TimestampMicrosecondArray,
2014 TimestampMicrosecond,
2015 tz
2016 )
2017 }
2018 DataType::Timestamp(TimeUnit::Nanosecond, tz) => {
2019 build_array_primitive_tz!(
2020 TimestampNanosecondArray,
2021 TimestampNanosecond,
2022 tz
2023 )
2024 }
2025 DataType::Duration(TimeUnit::Second) => {
2026 build_array_primitive!(DurationSecondArray, DurationSecond)
2027 }
2028 DataType::Duration(TimeUnit::Millisecond) => {
2029 build_array_primitive!(DurationMillisecondArray, DurationMillisecond)
2030 }
2031 DataType::Duration(TimeUnit::Microsecond) => {
2032 build_array_primitive!(DurationMicrosecondArray, DurationMicrosecond)
2033 }
2034 DataType::Duration(TimeUnit::Nanosecond) => {
2035 build_array_primitive!(DurationNanosecondArray, DurationNanosecond)
2036 }
2037 DataType::Interval(IntervalUnit::DayTime) => {
2038 build_array_primitive!(IntervalDayTimeArray, IntervalDayTime)
2039 }
2040 DataType::Interval(IntervalUnit::YearMonth) => {
2041 build_array_primitive!(IntervalYearMonthArray, IntervalYearMonth)
2042 }
2043 DataType::Interval(IntervalUnit::MonthDayNano) => {
2044 build_array_primitive!(IntervalMonthDayNanoArray, IntervalMonthDayNano)
2045 }
2046 DataType::FixedSizeList(_, _) => {
2047 let mut arrays =
2051 scalars.map(|s| s.to_array()).collect::<Result<Vec<_>>>()?;
2052 let first_non_null_data_type = arrays
2053 .iter()
2054 .find(|sv| !sv.is_null(0))
2055 .map(|sv| sv.data_type().to_owned());
2056 if let Some(DataType::FixedSizeList(f, l)) = first_non_null_data_type {
2057 for array in arrays.iter_mut() {
2058 if array.is_null(0) {
2059 *array = Arc::new(FixedSizeListArray::new_null(
2060 Arc::clone(&f),
2061 l,
2062 1,
2063 ));
2064 }
2065 }
2066 }
2067 let arrays = arrays.iter().map(|a| a.as_ref()).collect::<Vec<_>>();
2068 arrow::compute::concat(arrays.as_slice())?
2069 }
2070 DataType::List(_)
2071 | DataType::LargeList(_)
2072 | DataType::Map(_, _)
2073 | DataType::Struct(_)
2074 | DataType::Union(_, _) => {
2075 let arrays = scalars.map(|s| s.to_array()).collect::<Result<Vec<_>>>()?;
2076 let arrays = arrays.iter().map(|a| a.as_ref()).collect::<Vec<_>>();
2077 arrow::compute::concat(arrays.as_slice())?
2078 }
2079 DataType::Dictionary(key_type, value_type) => {
2080 let value_scalars = scalars
2082 .map(|scalar| match scalar {
2083 ScalarValue::Dictionary(inner_key_type, scalar) => {
2084 if &inner_key_type == key_type {
2085 Ok(*scalar)
2086 } else {
2087 _exec_err!("Expected inner key type of {key_type} but found: {inner_key_type}, value was ({scalar:?})")
2088 }
2089 }
2090 _ => {
2091 _exec_err!(
2092 "Expected scalar of type {value_type} but found: {scalar} {scalar:?}"
2093 )
2094 }
2095 })
2096 .collect::<Result<Vec<_>>>()?;
2097
2098 let values = Self::iter_to_array(value_scalars)?;
2099 assert_eq!(values.data_type(), value_type.as_ref());
2100
2101 match key_type.as_ref() {
2102 DataType::Int8 => dict_from_values::<Int8Type>(values)?,
2103 DataType::Int16 => dict_from_values::<Int16Type>(values)?,
2104 DataType::Int32 => dict_from_values::<Int32Type>(values)?,
2105 DataType::Int64 => dict_from_values::<Int64Type>(values)?,
2106 DataType::UInt8 => dict_from_values::<UInt8Type>(values)?,
2107 DataType::UInt16 => dict_from_values::<UInt16Type>(values)?,
2108 DataType::UInt32 => dict_from_values::<UInt32Type>(values)?,
2109 DataType::UInt64 => dict_from_values::<UInt64Type>(values)?,
2110 _ => unreachable!("Invalid dictionary keys type: {:?}", key_type),
2111 }
2112 }
2113 DataType::FixedSizeBinary(size) => {
2114 let array = scalars
2115 .map(|sv| {
2116 if let ScalarValue::FixedSizeBinary(_, v) = sv {
2117 Ok(v)
2118 } else {
2119 _exec_err!(
2120 "Inconsistent types in ScalarValue::iter_to_array. \
2121 Expected {data_type:?}, got {sv:?}"
2122 )
2123 }
2124 })
2125 .collect::<Result<Vec<_>>>()?;
2126 let array = FixedSizeBinaryArray::try_from_sparse_iter_with_size(
2127 array.into_iter(),
2128 *size,
2129 )?;
2130 Arc::new(array)
2131 }
2132 DataType::Time32(TimeUnit::Microsecond)
2138 | DataType::Time32(TimeUnit::Nanosecond)
2139 | DataType::Time64(TimeUnit::Second)
2140 | DataType::Time64(TimeUnit::Millisecond)
2141 | DataType::RunEndEncoded(_, _)
2142 | DataType::ListView(_)
2143 | DataType::LargeListView(_) => {
2144 return _not_impl_err!(
2145 "Unsupported creation of {:?} array from ScalarValue {:?}",
2146 data_type,
2147 scalars.peek()
2148 );
2149 }
2150 };
2151 Ok(array)
2152 }
2153
2154 fn iter_to_null_array(
2155 scalars: impl IntoIterator<Item = ScalarValue>,
2156 ) -> Result<ArrayRef> {
2157 let length = scalars.into_iter().try_fold(
2158 0usize,
2159 |r, element: ScalarValue| match element {
2160 ScalarValue::Null => Ok::<usize, DataFusionError>(r + 1),
2161 s => {
2162 _internal_err!("Expected ScalarValue::Null element. Received {s:?}")
2163 }
2164 },
2165 )?;
2166 Ok(new_null_array(&DataType::Null, length))
2167 }
2168
2169 fn iter_to_decimal_array(
2170 scalars: impl IntoIterator<Item = ScalarValue>,
2171 precision: u8,
2172 scale: i8,
2173 ) -> Result<Decimal128Array> {
2174 let array = scalars
2175 .into_iter()
2176 .map(|element: ScalarValue| match element {
2177 ScalarValue::Decimal128(v1, _, _) => Ok(v1),
2178 s => {
2179 _internal_err!("Expected ScalarValue::Null element. Received {s:?}")
2180 }
2181 })
2182 .collect::<Result<Decimal128Array>>()?
2183 .with_precision_and_scale(precision, scale)?;
2184 Ok(array)
2185 }
2186
2187 fn iter_to_decimal256_array(
2188 scalars: impl IntoIterator<Item = ScalarValue>,
2189 precision: u8,
2190 scale: i8,
2191 ) -> Result<Decimal256Array> {
2192 let array = scalars
2193 .into_iter()
2194 .map(|element: ScalarValue| match element {
2195 ScalarValue::Decimal256(v1, _, _) => Ok(v1),
2196 s => {
2197 _internal_err!(
2198 "Expected ScalarValue::Decimal256 element. Received {s:?}"
2199 )
2200 }
2201 })
2202 .collect::<Result<Decimal256Array>>()?
2203 .with_precision_and_scale(precision, scale)?;
2204 Ok(array)
2205 }
2206
2207 fn build_decimal_array(
2208 value: Option<i128>,
2209 precision: u8,
2210 scale: i8,
2211 size: usize,
2212 ) -> Result<Decimal128Array> {
2213 Ok(match value {
2214 Some(val) => Decimal128Array::from(vec![val; size])
2215 .with_precision_and_scale(precision, scale)?,
2216 None => {
2217 let mut builder = Decimal128Array::builder(size)
2218 .with_precision_and_scale(precision, scale)?;
2219 builder.append_nulls(size);
2220 builder.finish()
2221 }
2222 })
2223 }
2224
2225 fn build_decimal256_array(
2226 value: Option<i256>,
2227 precision: u8,
2228 scale: i8,
2229 size: usize,
2230 ) -> Result<Decimal256Array> {
2231 Ok(repeat_n(value, size)
2232 .collect::<Decimal256Array>()
2233 .with_precision_and_scale(precision, scale)?)
2234 }
2235
2236 pub fn new_list(
2262 values: &[ScalarValue],
2263 data_type: &DataType,
2264 nullable: bool,
2265 ) -> Arc<ListArray> {
2266 let values = if values.is_empty() {
2267 new_empty_array(data_type)
2268 } else {
2269 Self::iter_to_array(values.iter().cloned()).unwrap()
2270 };
2271 Arc::new(
2272 SingleRowListArrayBuilder::new(values)
2273 .with_nullable(nullable)
2274 .build_list_array(),
2275 )
2276 }
2277
2278 pub fn new_list_nullable(
2280 values: &[ScalarValue],
2281 data_type: &DataType,
2282 ) -> Arc<ListArray> {
2283 Self::new_list(values, data_type, true)
2284 }
2285
2286 pub fn new_null_list(data_type: DataType, nullable: bool, null_len: usize) -> Self {
2290 let data_type = DataType::List(Field::new_list_field(data_type, nullable).into());
2291 Self::List(Arc::new(ListArray::from(ArrayData::new_null(
2292 &data_type, null_len,
2293 ))))
2294 }
2295
2296 pub fn new_list_from_iter(
2322 values: impl IntoIterator<Item = ScalarValue> + ExactSizeIterator,
2323 data_type: &DataType,
2324 nullable: bool,
2325 ) -> Arc<ListArray> {
2326 let values = if values.len() == 0 {
2327 new_empty_array(data_type)
2328 } else {
2329 Self::iter_to_array(values).unwrap()
2330 };
2331 Arc::new(
2332 SingleRowListArrayBuilder::new(values)
2333 .with_nullable(nullable)
2334 .build_list_array(),
2335 )
2336 }
2337
2338 pub fn new_large_list(
2364 values: &[ScalarValue],
2365 data_type: &DataType,
2366 ) -> Arc<LargeListArray> {
2367 let values = if values.is_empty() {
2368 new_empty_array(data_type)
2369 } else {
2370 Self::iter_to_array(values.iter().cloned()).unwrap()
2371 };
2372 Arc::new(SingleRowListArrayBuilder::new(values).build_large_list_array())
2373 }
2374
2375 pub fn to_array_of_size(&self, size: usize) -> Result<ArrayRef> {
2385 Ok(match self {
2386 ScalarValue::Decimal128(e, precision, scale) => Arc::new(
2387 ScalarValue::build_decimal_array(*e, *precision, *scale, size)?,
2388 ),
2389 ScalarValue::Decimal256(e, precision, scale) => Arc::new(
2390 ScalarValue::build_decimal256_array(*e, *precision, *scale, size)?,
2391 ),
2392 ScalarValue::Boolean(e) => {
2393 Arc::new(BooleanArray::from(vec![*e; size])) as ArrayRef
2394 }
2395 ScalarValue::Float64(e) => {
2396 build_array_from_option!(Float64, Float64Array, e, size)
2397 }
2398 ScalarValue::Float32(e) => {
2399 build_array_from_option!(Float32, Float32Array, e, size)
2400 }
2401 ScalarValue::Float16(e) => {
2402 build_array_from_option!(Float16, Float16Array, e, size)
2403 }
2404 ScalarValue::Int8(e) => build_array_from_option!(Int8, Int8Array, e, size),
2405 ScalarValue::Int16(e) => build_array_from_option!(Int16, Int16Array, e, size),
2406 ScalarValue::Int32(e) => build_array_from_option!(Int32, Int32Array, e, size),
2407 ScalarValue::Int64(e) => build_array_from_option!(Int64, Int64Array, e, size),
2408 ScalarValue::UInt8(e) => build_array_from_option!(UInt8, UInt8Array, e, size),
2409 ScalarValue::UInt16(e) => {
2410 build_array_from_option!(UInt16, UInt16Array, e, size)
2411 }
2412 ScalarValue::UInt32(e) => {
2413 build_array_from_option!(UInt32, UInt32Array, e, size)
2414 }
2415 ScalarValue::UInt64(e) => {
2416 build_array_from_option!(UInt64, UInt64Array, e, size)
2417 }
2418 ScalarValue::TimestampSecond(e, tz_opt) => {
2419 build_timestamp_array_from_option!(
2420 TimeUnit::Second,
2421 tz_opt.clone(),
2422 TimestampSecondArray,
2423 e,
2424 size
2425 )
2426 }
2427 ScalarValue::TimestampMillisecond(e, tz_opt) => {
2428 build_timestamp_array_from_option!(
2429 TimeUnit::Millisecond,
2430 tz_opt.clone(),
2431 TimestampMillisecondArray,
2432 e,
2433 size
2434 )
2435 }
2436
2437 ScalarValue::TimestampMicrosecond(e, tz_opt) => {
2438 build_timestamp_array_from_option!(
2439 TimeUnit::Microsecond,
2440 tz_opt.clone(),
2441 TimestampMicrosecondArray,
2442 e,
2443 size
2444 )
2445 }
2446 ScalarValue::TimestampNanosecond(e, tz_opt) => {
2447 build_timestamp_array_from_option!(
2448 TimeUnit::Nanosecond,
2449 tz_opt.clone(),
2450 TimestampNanosecondArray,
2451 e,
2452 size
2453 )
2454 }
2455 ScalarValue::Utf8(e) => match e {
2456 Some(value) => {
2457 Arc::new(StringArray::from_iter_values(repeat_n(value, size)))
2458 }
2459 None => new_null_array(&DataType::Utf8, size),
2460 },
2461 ScalarValue::Utf8View(e) => match e {
2462 Some(value) => {
2463 Arc::new(StringViewArray::from_iter_values(repeat_n(value, size)))
2464 }
2465 None => new_null_array(&DataType::Utf8View, size),
2466 },
2467 ScalarValue::LargeUtf8(e) => match e {
2468 Some(value) => {
2469 Arc::new(LargeStringArray::from_iter_values(repeat_n(value, size)))
2470 }
2471 None => new_null_array(&DataType::LargeUtf8, size),
2472 },
2473 ScalarValue::Binary(e) => match e {
2474 Some(value) => Arc::new(
2475 repeat_n(Some(value.as_slice()), size).collect::<BinaryArray>(),
2476 ),
2477 None => Arc::new(repeat_n(None::<&str>, size).collect::<BinaryArray>()),
2478 },
2479 ScalarValue::BinaryView(e) => match e {
2480 Some(value) => Arc::new(
2481 repeat_n(Some(value.as_slice()), size).collect::<BinaryViewArray>(),
2482 ),
2483 None => {
2484 Arc::new(repeat_n(None::<&str>, size).collect::<BinaryViewArray>())
2485 }
2486 },
2487 ScalarValue::FixedSizeBinary(s, e) => match e {
2488 Some(value) => Arc::new(
2489 FixedSizeBinaryArray::try_from_sparse_iter_with_size(
2490 repeat_n(Some(value.as_slice()), size),
2491 *s,
2492 )
2493 .unwrap(),
2494 ),
2495 None => Arc::new(
2496 FixedSizeBinaryArray::try_from_sparse_iter_with_size(
2497 repeat_n(None::<&[u8]>, size),
2498 *s,
2499 )
2500 .unwrap(),
2501 ),
2502 },
2503 ScalarValue::LargeBinary(e) => match e {
2504 Some(value) => Arc::new(
2505 repeat_n(Some(value.as_slice()), size).collect::<LargeBinaryArray>(),
2506 ),
2507 None => {
2508 Arc::new(repeat_n(None::<&str>, size).collect::<LargeBinaryArray>())
2509 }
2510 },
2511 ScalarValue::List(arr) => {
2512 Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
2513 }
2514 ScalarValue::LargeList(arr) => {
2515 Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
2516 }
2517 ScalarValue::FixedSizeList(arr) => {
2518 Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
2519 }
2520 ScalarValue::Struct(arr) => {
2521 Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
2522 }
2523 ScalarValue::Map(arr) => {
2524 Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
2525 }
2526 ScalarValue::Date32(e) => {
2527 build_array_from_option!(Date32, Date32Array, e, size)
2528 }
2529 ScalarValue::Date64(e) => {
2530 build_array_from_option!(Date64, Date64Array, e, size)
2531 }
2532 ScalarValue::Time32Second(e) => {
2533 build_array_from_option!(
2534 Time32,
2535 TimeUnit::Second,
2536 Time32SecondArray,
2537 e,
2538 size
2539 )
2540 }
2541 ScalarValue::Time32Millisecond(e) => {
2542 build_array_from_option!(
2543 Time32,
2544 TimeUnit::Millisecond,
2545 Time32MillisecondArray,
2546 e,
2547 size
2548 )
2549 }
2550 ScalarValue::Time64Microsecond(e) => {
2551 build_array_from_option!(
2552 Time64,
2553 TimeUnit::Microsecond,
2554 Time64MicrosecondArray,
2555 e,
2556 size
2557 )
2558 }
2559 ScalarValue::Time64Nanosecond(e) => {
2560 build_array_from_option!(
2561 Time64,
2562 TimeUnit::Nanosecond,
2563 Time64NanosecondArray,
2564 e,
2565 size
2566 )
2567 }
2568 ScalarValue::IntervalDayTime(e) => build_array_from_option!(
2569 Interval,
2570 IntervalUnit::DayTime,
2571 IntervalDayTimeArray,
2572 e,
2573 size
2574 ),
2575 ScalarValue::IntervalYearMonth(e) => build_array_from_option!(
2576 Interval,
2577 IntervalUnit::YearMonth,
2578 IntervalYearMonthArray,
2579 e,
2580 size
2581 ),
2582 ScalarValue::IntervalMonthDayNano(e) => build_array_from_option!(
2583 Interval,
2584 IntervalUnit::MonthDayNano,
2585 IntervalMonthDayNanoArray,
2586 e,
2587 size
2588 ),
2589 ScalarValue::DurationSecond(e) => build_array_from_option!(
2590 Duration,
2591 TimeUnit::Second,
2592 DurationSecondArray,
2593 e,
2594 size
2595 ),
2596 ScalarValue::DurationMillisecond(e) => build_array_from_option!(
2597 Duration,
2598 TimeUnit::Millisecond,
2599 DurationMillisecondArray,
2600 e,
2601 size
2602 ),
2603 ScalarValue::DurationMicrosecond(e) => build_array_from_option!(
2604 Duration,
2605 TimeUnit::Microsecond,
2606 DurationMicrosecondArray,
2607 e,
2608 size
2609 ),
2610 ScalarValue::DurationNanosecond(e) => build_array_from_option!(
2611 Duration,
2612 TimeUnit::Nanosecond,
2613 DurationNanosecondArray,
2614 e,
2615 size
2616 ),
2617 ScalarValue::Union(value, fields, mode) => match value {
2618 Some((v_id, value)) => {
2619 let mut new_fields = Vec::with_capacity(fields.len());
2620 let mut child_arrays = Vec::<ArrayRef>::with_capacity(fields.len());
2621 for (f_id, field) in fields.iter() {
2622 let ar = if f_id == *v_id {
2623 value.to_array_of_size(size)?
2624 } else {
2625 let dt = field.data_type();
2626 match mode {
2627 UnionMode::Sparse => new_null_array(dt, size),
2628 UnionMode::Dense => new_null_array(dt, 0),
2631 }
2632 };
2633 let field = (**field).clone();
2634 child_arrays.push(ar);
2635 new_fields.push(field.clone());
2636 }
2637 let type_ids = repeat_n(*v_id, size);
2638 let type_ids = ScalarBuffer::<i8>::from_iter(type_ids);
2639 let value_offsets = match mode {
2640 UnionMode::Sparse => None,
2641 UnionMode::Dense => Some(ScalarBuffer::from_iter(0..size as i32)),
2642 };
2643 let ar = UnionArray::try_new(
2644 fields.clone(),
2645 type_ids,
2646 value_offsets,
2647 child_arrays,
2648 )
2649 .map_err(|e| DataFusionError::ArrowError(e, None))?;
2650 Arc::new(ar)
2651 }
2652 None => {
2653 let dt = self.data_type();
2654 new_null_array(&dt, size)
2655 }
2656 },
2657 ScalarValue::Dictionary(key_type, v) => {
2658 match key_type.as_ref() {
2660 DataType::Int8 => dict_from_scalar::<Int8Type>(v, size)?,
2661 DataType::Int16 => dict_from_scalar::<Int16Type>(v, size)?,
2662 DataType::Int32 => dict_from_scalar::<Int32Type>(v, size)?,
2663 DataType::Int64 => dict_from_scalar::<Int64Type>(v, size)?,
2664 DataType::UInt8 => dict_from_scalar::<UInt8Type>(v, size)?,
2665 DataType::UInt16 => dict_from_scalar::<UInt16Type>(v, size)?,
2666 DataType::UInt32 => dict_from_scalar::<UInt32Type>(v, size)?,
2667 DataType::UInt64 => dict_from_scalar::<UInt64Type>(v, size)?,
2668 _ => unreachable!("Invalid dictionary keys type: {:?}", key_type),
2669 }
2670 }
2671 ScalarValue::Null => new_null_array(&DataType::Null, size),
2672 })
2673 }
2674
2675 fn get_decimal_value_from_array(
2676 array: &dyn Array,
2677 index: usize,
2678 precision: u8,
2679 scale: i8,
2680 ) -> Result<ScalarValue> {
2681 match array.data_type() {
2682 DataType::Decimal128(_, _) => {
2683 let array = as_decimal128_array(array)?;
2684 if array.is_null(index) {
2685 Ok(ScalarValue::Decimal128(None, precision, scale))
2686 } else {
2687 let value = array.value(index);
2688 Ok(ScalarValue::Decimal128(Some(value), precision, scale))
2689 }
2690 }
2691 DataType::Decimal256(_, _) => {
2692 let array = as_decimal256_array(array)?;
2693 if array.is_null(index) {
2694 Ok(ScalarValue::Decimal256(None, precision, scale))
2695 } else {
2696 let value = array.value(index);
2697 Ok(ScalarValue::Decimal256(Some(value), precision, scale))
2698 }
2699 }
2700 _ => _internal_err!("Unsupported decimal type"),
2701 }
2702 }
2703
2704 fn list_to_array_of_size(arr: &dyn Array, size: usize) -> Result<ArrayRef> {
2705 let arrays = repeat_n(arr, size).collect::<Vec<_>>();
2706 let ret = match !arrays.is_empty() {
2707 true => arrow::compute::concat(arrays.as_slice())?,
2708 false => arr.slice(0, 0),
2709 };
2710 Ok(ret)
2711 }
2712
2713 pub fn convert_array_to_scalar_vec(array: &dyn Array) -> Result<Vec<Vec<Self>>> {
2781 let mut scalars = Vec::with_capacity(array.len());
2782
2783 for index in 0..array.len() {
2784 let nested_array = array.as_list::<i32>().value(index);
2785 let scalar_values = (0..nested_array.len())
2786 .map(|i| ScalarValue::try_from_array(&nested_array, i))
2787 .collect::<Result<Vec<_>>>()?;
2788 scalars.push(scalar_values);
2789 }
2790
2791 Ok(scalars)
2792 }
2793
2794 #[deprecated(
2795 since = "46.0.0",
2796 note = "This function is obsolete. Use `to_array` instead"
2797 )]
2798 pub fn raw_data(&self) -> Result<ArrayRef> {
2799 match self {
2800 ScalarValue::List(arr) => Ok(arr.to_owned()),
2801 _ => _internal_err!("ScalarValue is not a list"),
2802 }
2803 }
2804
2805 pub fn try_from_array(array: &dyn Array, index: usize) -> Result<Self> {
2807 if !array.is_valid(index) {
2809 return array.data_type().try_into();
2810 }
2811
2812 Ok(match array.data_type() {
2813 DataType::Null => ScalarValue::Null,
2814 DataType::Decimal128(precision, scale) => {
2815 ScalarValue::get_decimal_value_from_array(
2816 array, index, *precision, *scale,
2817 )?
2818 }
2819 DataType::Decimal256(precision, scale) => {
2820 ScalarValue::get_decimal_value_from_array(
2821 array, index, *precision, *scale,
2822 )?
2823 }
2824 DataType::Boolean => typed_cast!(array, index, BooleanArray, Boolean)?,
2825 DataType::Float64 => typed_cast!(array, index, Float64Array, Float64)?,
2826 DataType::Float32 => typed_cast!(array, index, Float32Array, Float32)?,
2827 DataType::Float16 => typed_cast!(array, index, Float16Array, Float16)?,
2828 DataType::UInt64 => typed_cast!(array, index, UInt64Array, UInt64)?,
2829 DataType::UInt32 => typed_cast!(array, index, UInt32Array, UInt32)?,
2830 DataType::UInt16 => typed_cast!(array, index, UInt16Array, UInt16)?,
2831 DataType::UInt8 => typed_cast!(array, index, UInt8Array, UInt8)?,
2832 DataType::Int64 => typed_cast!(array, index, Int64Array, Int64)?,
2833 DataType::Int32 => typed_cast!(array, index, Int32Array, Int32)?,
2834 DataType::Int16 => typed_cast!(array, index, Int16Array, Int16)?,
2835 DataType::Int8 => typed_cast!(array, index, Int8Array, Int8)?,
2836 DataType::Binary => typed_cast!(array, index, BinaryArray, Binary)?,
2837 DataType::LargeBinary => {
2838 typed_cast!(array, index, LargeBinaryArray, LargeBinary)?
2839 }
2840 DataType::BinaryView => {
2841 typed_cast!(array, index, BinaryViewArray, BinaryView)?
2842 }
2843 DataType::Utf8 => typed_cast!(array, index, StringArray, Utf8)?,
2844 DataType::LargeUtf8 => {
2845 typed_cast!(array, index, LargeStringArray, LargeUtf8)?
2846 }
2847 DataType::Utf8View => typed_cast!(array, index, StringViewArray, Utf8View)?,
2848 DataType::List(field) => {
2849 let list_array = array.as_list::<i32>();
2850 let nested_array = list_array.value(index);
2851 SingleRowListArrayBuilder::new(nested_array)
2853 .with_field(field)
2854 .build_list_scalar()
2855 }
2856 DataType::LargeList(field) => {
2857 let list_array = as_large_list_array(array);
2858 let nested_array = list_array.value(index);
2859 SingleRowListArrayBuilder::new(nested_array)
2861 .with_field(field)
2862 .build_large_list_scalar()
2863 }
2864 DataType::FixedSizeList(field, _) => {
2866 let list_array = as_fixed_size_list_array(array)?;
2867 let nested_array = list_array.value(index);
2868 let list_size = nested_array.len();
2870 SingleRowListArrayBuilder::new(nested_array)
2871 .with_field(field)
2872 .build_fixed_size_list_scalar(list_size)
2873 }
2874 DataType::Date32 => typed_cast!(array, index, Date32Array, Date32)?,
2875 DataType::Date64 => typed_cast!(array, index, Date64Array, Date64)?,
2876 DataType::Time32(TimeUnit::Second) => {
2877 typed_cast!(array, index, Time32SecondArray, Time32Second)?
2878 }
2879 DataType::Time32(TimeUnit::Millisecond) => {
2880 typed_cast!(array, index, Time32MillisecondArray, Time32Millisecond)?
2881 }
2882 DataType::Time64(TimeUnit::Microsecond) => {
2883 typed_cast!(array, index, Time64MicrosecondArray, Time64Microsecond)?
2884 }
2885 DataType::Time64(TimeUnit::Nanosecond) => {
2886 typed_cast!(array, index, Time64NanosecondArray, Time64Nanosecond)?
2887 }
2888 DataType::Timestamp(TimeUnit::Second, tz_opt) => typed_cast_tz!(
2889 array,
2890 index,
2891 TimestampSecondArray,
2892 TimestampSecond,
2893 tz_opt
2894 )?,
2895 DataType::Timestamp(TimeUnit::Millisecond, tz_opt) => typed_cast_tz!(
2896 array,
2897 index,
2898 TimestampMillisecondArray,
2899 TimestampMillisecond,
2900 tz_opt
2901 )?,
2902 DataType::Timestamp(TimeUnit::Microsecond, tz_opt) => typed_cast_tz!(
2903 array,
2904 index,
2905 TimestampMicrosecondArray,
2906 TimestampMicrosecond,
2907 tz_opt
2908 )?,
2909 DataType::Timestamp(TimeUnit::Nanosecond, tz_opt) => typed_cast_tz!(
2910 array,
2911 index,
2912 TimestampNanosecondArray,
2913 TimestampNanosecond,
2914 tz_opt
2915 )?,
2916 DataType::Dictionary(key_type, _) => {
2917 let (values_array, values_index) = match key_type.as_ref() {
2918 DataType::Int8 => get_dict_value::<Int8Type>(array, index)?,
2919 DataType::Int16 => get_dict_value::<Int16Type>(array, index)?,
2920 DataType::Int32 => get_dict_value::<Int32Type>(array, index)?,
2921 DataType::Int64 => get_dict_value::<Int64Type>(array, index)?,
2922 DataType::UInt8 => get_dict_value::<UInt8Type>(array, index)?,
2923 DataType::UInt16 => get_dict_value::<UInt16Type>(array, index)?,
2924 DataType::UInt32 => get_dict_value::<UInt32Type>(array, index)?,
2925 DataType::UInt64 => get_dict_value::<UInt64Type>(array, index)?,
2926 _ => unreachable!("Invalid dictionary keys type: {:?}", key_type),
2927 };
2928 let value = match values_index {
2930 Some(values_index) => {
2931 ScalarValue::try_from_array(values_array, values_index)
2932 }
2933 None => values_array.data_type().try_into(),
2935 }?;
2936
2937 Self::Dictionary(key_type.clone(), Box::new(value))
2938 }
2939 DataType::Struct(_) => {
2940 let a = array.slice(index, 1);
2941 Self::Struct(Arc::new(a.as_struct().to_owned()))
2942 }
2943 DataType::FixedSizeBinary(_) => {
2944 let array = as_fixed_size_binary_array(array)?;
2945 let size = match array.data_type() {
2946 DataType::FixedSizeBinary(size) => *size,
2947 _ => unreachable!(),
2948 };
2949 ScalarValue::FixedSizeBinary(
2950 size,
2951 match array.is_null(index) {
2952 true => None,
2953 false => Some(array.value(index).into()),
2954 },
2955 )
2956 }
2957 DataType::Interval(IntervalUnit::DayTime) => {
2958 typed_cast!(array, index, IntervalDayTimeArray, IntervalDayTime)?
2959 }
2960 DataType::Interval(IntervalUnit::YearMonth) => {
2961 typed_cast!(array, index, IntervalYearMonthArray, IntervalYearMonth)?
2962 }
2963 DataType::Interval(IntervalUnit::MonthDayNano) => typed_cast!(
2964 array,
2965 index,
2966 IntervalMonthDayNanoArray,
2967 IntervalMonthDayNano
2968 )?,
2969
2970 DataType::Duration(TimeUnit::Second) => {
2971 typed_cast!(array, index, DurationSecondArray, DurationSecond)?
2972 }
2973 DataType::Duration(TimeUnit::Millisecond) => {
2974 typed_cast!(array, index, DurationMillisecondArray, DurationMillisecond)?
2975 }
2976 DataType::Duration(TimeUnit::Microsecond) => {
2977 typed_cast!(array, index, DurationMicrosecondArray, DurationMicrosecond)?
2978 }
2979 DataType::Duration(TimeUnit::Nanosecond) => {
2980 typed_cast!(array, index, DurationNanosecondArray, DurationNanosecond)?
2981 }
2982 DataType::Map(_, _) => {
2983 let a = array.slice(index, 1);
2984 Self::Map(Arc::new(a.as_map().to_owned()))
2985 }
2986 DataType::Union(fields, mode) => {
2987 let array = as_union_array(array);
2988 let ti = array.type_id(index);
2989 let index = array.value_offset(index);
2990 let value = ScalarValue::try_from_array(array.child(ti), index)?;
2991 ScalarValue::Union(Some((ti, Box::new(value))), fields.clone(), *mode)
2992 }
2993 other => {
2994 return _not_impl_err!(
2995 "Can't create a scalar from array of type \"{other:?}\""
2996 );
2997 }
2998 })
2999 }
3000
3001 pub fn try_from_string(value: String, target_type: &DataType) -> Result<Self> {
3003 ScalarValue::from(value).cast_to(target_type)
3004 }
3005
3006 pub fn try_as_str(&self) -> Option<Option<&str>> {
3040 let v = match self {
3041 ScalarValue::Utf8(v) => v,
3042 ScalarValue::LargeUtf8(v) => v,
3043 ScalarValue::Utf8View(v) => v,
3044 ScalarValue::Dictionary(_, v) => return v.try_as_str(),
3045 _ => return None,
3046 };
3047 Some(v.as_ref().map(|v| v.as_str()))
3048 }
3049
3050 pub fn cast_to(&self, target_type: &DataType) -> Result<Self> {
3052 self.cast_to_with_options(target_type, &DEFAULT_CAST_OPTIONS)
3053 }
3054
3055 pub fn cast_to_with_options(
3057 &self,
3058 target_type: &DataType,
3059 cast_options: &CastOptions<'static>,
3060 ) -> Result<Self> {
3061 let scalar_array = match (self, target_type) {
3062 (
3063 ScalarValue::Float64(Some(float_ts)),
3064 DataType::Timestamp(TimeUnit::Nanosecond, None),
3065 ) => ScalarValue::Int64(Some((float_ts * 1_000_000_000_f64).trunc() as i64))
3066 .to_array()?,
3067 (
3068 ScalarValue::Decimal128(Some(decimal_value), _, scale),
3069 DataType::Timestamp(time_unit, None),
3070 ) => {
3071 let scale_factor = 10_i128.pow(*scale as u32);
3072 let seconds = decimal_value / scale_factor;
3073 let fraction = decimal_value % scale_factor;
3074
3075 let timestamp_value = match time_unit {
3076 TimeUnit::Second => ScalarValue::Int64(Some(seconds as i64)),
3077 TimeUnit::Millisecond => {
3078 let millis = seconds * 1_000 + (fraction * 1_000) / scale_factor;
3079 ScalarValue::Int64(Some(millis as i64))
3080 }
3081 TimeUnit::Microsecond => {
3082 let micros =
3083 seconds * 1_000_000 + (fraction * 1_000_000) / scale_factor;
3084 ScalarValue::Int64(Some(micros as i64))
3085 }
3086 TimeUnit::Nanosecond => {
3087 let nanos = seconds * 1_000_000_000
3088 + (fraction * 1_000_000_000) / scale_factor;
3089 ScalarValue::Int64(Some(nanos as i64))
3090 }
3091 };
3092
3093 timestamp_value.to_array()?
3094 }
3095 _ => self.to_array()?,
3096 };
3097
3098 let cast_arr = cast_with_options(&scalar_array, target_type, cast_options)?;
3099 ScalarValue::try_from_array(&cast_arr, 0)
3100 }
3101
3102 fn eq_array_decimal(
3103 array: &ArrayRef,
3104 index: usize,
3105 value: Option<&i128>,
3106 precision: u8,
3107 scale: i8,
3108 ) -> Result<bool> {
3109 let array = as_decimal128_array(array)?;
3110 if array.precision() != precision || array.scale() != scale {
3111 return Ok(false);
3112 }
3113 let is_null = array.is_null(index);
3114 if let Some(v) = value {
3115 Ok(!array.is_null(index) && array.value(index) == *v)
3116 } else {
3117 Ok(is_null)
3118 }
3119 }
3120
3121 fn eq_array_decimal256(
3122 array: &ArrayRef,
3123 index: usize,
3124 value: Option<&i256>,
3125 precision: u8,
3126 scale: i8,
3127 ) -> Result<bool> {
3128 let array = as_decimal256_array(array)?;
3129 if array.precision() != precision || array.scale() != scale {
3130 return Ok(false);
3131 }
3132 let is_null = array.is_null(index);
3133 if let Some(v) = value {
3134 Ok(!array.is_null(index) && array.value(index) == *v)
3135 } else {
3136 Ok(is_null)
3137 }
3138 }
3139
3140 #[inline]
3167 pub fn eq_array(&self, array: &ArrayRef, index: usize) -> Result<bool> {
3168 Ok(match self {
3169 ScalarValue::Decimal128(v, precision, scale) => {
3170 ScalarValue::eq_array_decimal(
3171 array,
3172 index,
3173 v.as_ref(),
3174 *precision,
3175 *scale,
3176 )?
3177 }
3178 ScalarValue::Decimal256(v, precision, scale) => {
3179 ScalarValue::eq_array_decimal256(
3180 array,
3181 index,
3182 v.as_ref(),
3183 *precision,
3184 *scale,
3185 )?
3186 }
3187 ScalarValue::Boolean(val) => {
3188 eq_array_primitive!(array, index, BooleanArray, val)?
3189 }
3190 ScalarValue::Float16(val) => {
3191 eq_array_primitive!(array, index, Float16Array, val)?
3192 }
3193 ScalarValue::Float32(val) => {
3194 eq_array_primitive!(array, index, Float32Array, val)?
3195 }
3196 ScalarValue::Float64(val) => {
3197 eq_array_primitive!(array, index, Float64Array, val)?
3198 }
3199 ScalarValue::Int8(val) => eq_array_primitive!(array, index, Int8Array, val)?,
3200 ScalarValue::Int16(val) => {
3201 eq_array_primitive!(array, index, Int16Array, val)?
3202 }
3203 ScalarValue::Int32(val) => {
3204 eq_array_primitive!(array, index, Int32Array, val)?
3205 }
3206 ScalarValue::Int64(val) => {
3207 eq_array_primitive!(array, index, Int64Array, val)?
3208 }
3209 ScalarValue::UInt8(val) => {
3210 eq_array_primitive!(array, index, UInt8Array, val)?
3211 }
3212 ScalarValue::UInt16(val) => {
3213 eq_array_primitive!(array, index, UInt16Array, val)?
3214 }
3215 ScalarValue::UInt32(val) => {
3216 eq_array_primitive!(array, index, UInt32Array, val)?
3217 }
3218 ScalarValue::UInt64(val) => {
3219 eq_array_primitive!(array, index, UInt64Array, val)?
3220 }
3221 ScalarValue::Utf8(val) => {
3222 eq_array_primitive!(array, index, StringArray, val)?
3223 }
3224 ScalarValue::Utf8View(val) => {
3225 eq_array_primitive!(array, index, StringViewArray, val)?
3226 }
3227 ScalarValue::LargeUtf8(val) => {
3228 eq_array_primitive!(array, index, LargeStringArray, val)?
3229 }
3230 ScalarValue::Binary(val) => {
3231 eq_array_primitive!(array, index, BinaryArray, val)?
3232 }
3233 ScalarValue::BinaryView(val) => {
3234 eq_array_primitive!(array, index, BinaryViewArray, val)?
3235 }
3236 ScalarValue::FixedSizeBinary(_, val) => {
3237 eq_array_primitive!(array, index, FixedSizeBinaryArray, val)?
3238 }
3239 ScalarValue::LargeBinary(val) => {
3240 eq_array_primitive!(array, index, LargeBinaryArray, val)?
3241 }
3242 ScalarValue::List(arr) => {
3243 Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
3244 }
3245 ScalarValue::LargeList(arr) => {
3246 Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
3247 }
3248 ScalarValue::FixedSizeList(arr) => {
3249 Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
3250 }
3251 ScalarValue::Struct(arr) => {
3252 Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
3253 }
3254 ScalarValue::Map(arr) => {
3255 Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
3256 }
3257 ScalarValue::Date32(val) => {
3258 eq_array_primitive!(array, index, Date32Array, val)?
3259 }
3260 ScalarValue::Date64(val) => {
3261 eq_array_primitive!(array, index, Date64Array, val)?
3262 }
3263 ScalarValue::Time32Second(val) => {
3264 eq_array_primitive!(array, index, Time32SecondArray, val)?
3265 }
3266 ScalarValue::Time32Millisecond(val) => {
3267 eq_array_primitive!(array, index, Time32MillisecondArray, val)?
3268 }
3269 ScalarValue::Time64Microsecond(val) => {
3270 eq_array_primitive!(array, index, Time64MicrosecondArray, val)?
3271 }
3272 ScalarValue::Time64Nanosecond(val) => {
3273 eq_array_primitive!(array, index, Time64NanosecondArray, val)?
3274 }
3275 ScalarValue::TimestampSecond(val, _) => {
3276 eq_array_primitive!(array, index, TimestampSecondArray, val)?
3277 }
3278 ScalarValue::TimestampMillisecond(val, _) => {
3279 eq_array_primitive!(array, index, TimestampMillisecondArray, val)?
3280 }
3281 ScalarValue::TimestampMicrosecond(val, _) => {
3282 eq_array_primitive!(array, index, TimestampMicrosecondArray, val)?
3283 }
3284 ScalarValue::TimestampNanosecond(val, _) => {
3285 eq_array_primitive!(array, index, TimestampNanosecondArray, val)?
3286 }
3287 ScalarValue::IntervalYearMonth(val) => {
3288 eq_array_primitive!(array, index, IntervalYearMonthArray, val)?
3289 }
3290 ScalarValue::IntervalDayTime(val) => {
3291 eq_array_primitive!(array, index, IntervalDayTimeArray, val)?
3292 }
3293 ScalarValue::IntervalMonthDayNano(val) => {
3294 eq_array_primitive!(array, index, IntervalMonthDayNanoArray, val)?
3295 }
3296 ScalarValue::DurationSecond(val) => {
3297 eq_array_primitive!(array, index, DurationSecondArray, val)?
3298 }
3299 ScalarValue::DurationMillisecond(val) => {
3300 eq_array_primitive!(array, index, DurationMillisecondArray, val)?
3301 }
3302 ScalarValue::DurationMicrosecond(val) => {
3303 eq_array_primitive!(array, index, DurationMicrosecondArray, val)?
3304 }
3305 ScalarValue::DurationNanosecond(val) => {
3306 eq_array_primitive!(array, index, DurationNanosecondArray, val)?
3307 }
3308 ScalarValue::Union(value, _, _) => {
3309 let array = as_union_array(array);
3310 let ti = array.type_id(index);
3311 let index = array.value_offset(index);
3312 if let Some((ti_v, value)) = value {
3313 ti_v == &ti && value.eq_array(array.child(ti), index)?
3314 } else {
3315 array.child(ti).is_null(index)
3316 }
3317 }
3318 ScalarValue::Dictionary(key_type, v) => {
3319 let (values_array, values_index) = match key_type.as_ref() {
3320 DataType::Int8 => get_dict_value::<Int8Type>(array, index)?,
3321 DataType::Int16 => get_dict_value::<Int16Type>(array, index)?,
3322 DataType::Int32 => get_dict_value::<Int32Type>(array, index)?,
3323 DataType::Int64 => get_dict_value::<Int64Type>(array, index)?,
3324 DataType::UInt8 => get_dict_value::<UInt8Type>(array, index)?,
3325 DataType::UInt16 => get_dict_value::<UInt16Type>(array, index)?,
3326 DataType::UInt32 => get_dict_value::<UInt32Type>(array, index)?,
3327 DataType::UInt64 => get_dict_value::<UInt64Type>(array, index)?,
3328 _ => unreachable!("Invalid dictionary keys type: {:?}", key_type),
3329 };
3330 match values_index {
3332 Some(values_index) => v.eq_array(values_array, values_index)?,
3333 None => v.is_null(),
3334 }
3335 }
3336 ScalarValue::Null => array.is_null(index),
3337 })
3338 }
3339
3340 fn eq_array_list(arr1: &ArrayRef, arr2: &ArrayRef, index: usize) -> bool {
3341 let right = arr2.slice(index, 1);
3342 arr1 == &right
3343 }
3344
3345 pub fn size(&self) -> usize {
3348 size_of_val(self)
3349 + match self {
3350 ScalarValue::Null
3351 | ScalarValue::Boolean(_)
3352 | ScalarValue::Float16(_)
3353 | ScalarValue::Float32(_)
3354 | ScalarValue::Float64(_)
3355 | ScalarValue::Decimal128(_, _, _)
3356 | ScalarValue::Decimal256(_, _, _)
3357 | ScalarValue::Int8(_)
3358 | ScalarValue::Int16(_)
3359 | ScalarValue::Int32(_)
3360 | ScalarValue::Int64(_)
3361 | ScalarValue::UInt8(_)
3362 | ScalarValue::UInt16(_)
3363 | ScalarValue::UInt32(_)
3364 | ScalarValue::UInt64(_)
3365 | ScalarValue::Date32(_)
3366 | ScalarValue::Date64(_)
3367 | ScalarValue::Time32Second(_)
3368 | ScalarValue::Time32Millisecond(_)
3369 | ScalarValue::Time64Microsecond(_)
3370 | ScalarValue::Time64Nanosecond(_)
3371 | ScalarValue::IntervalYearMonth(_)
3372 | ScalarValue::IntervalDayTime(_)
3373 | ScalarValue::IntervalMonthDayNano(_)
3374 | ScalarValue::DurationSecond(_)
3375 | ScalarValue::DurationMillisecond(_)
3376 | ScalarValue::DurationMicrosecond(_)
3377 | ScalarValue::DurationNanosecond(_) => 0,
3378 ScalarValue::Utf8(s)
3379 | ScalarValue::LargeUtf8(s)
3380 | ScalarValue::Utf8View(s) => {
3381 s.as_ref().map(|s| s.capacity()).unwrap_or_default()
3382 }
3383 ScalarValue::TimestampSecond(_, s)
3384 | ScalarValue::TimestampMillisecond(_, s)
3385 | ScalarValue::TimestampMicrosecond(_, s)
3386 | ScalarValue::TimestampNanosecond(_, s) => {
3387 s.as_ref().map(|s| s.len()).unwrap_or_default()
3388 }
3389 ScalarValue::Binary(b)
3390 | ScalarValue::FixedSizeBinary(_, b)
3391 | ScalarValue::LargeBinary(b)
3392 | ScalarValue::BinaryView(b) => {
3393 b.as_ref().map(|b| b.capacity()).unwrap_or_default()
3394 }
3395 ScalarValue::List(arr) => arr.get_array_memory_size(),
3396 ScalarValue::LargeList(arr) => arr.get_array_memory_size(),
3397 ScalarValue::FixedSizeList(arr) => arr.get_array_memory_size(),
3398 ScalarValue::Struct(arr) => arr.get_array_memory_size(),
3399 ScalarValue::Map(arr) => arr.get_array_memory_size(),
3400 ScalarValue::Union(vals, fields, _mode) => {
3401 vals.as_ref()
3402 .map(|(_id, sv)| sv.size() - size_of_val(sv))
3403 .unwrap_or_default()
3404 + size_of_val(fields)
3406 + (size_of::<Field>() * fields.len())
3407 + fields.iter().map(|(_idx, field)| field.size() - size_of_val(field)).sum::<usize>()
3408 }
3409 ScalarValue::Dictionary(dt, sv) => {
3410 dt.size() + sv.size()
3412 }
3413 }
3414 }
3415
3416 pub fn size_of_vec(vec: &Vec<Self>) -> usize {
3420 size_of_val(vec)
3421 + (size_of::<ScalarValue>() * vec.capacity())
3422 + vec
3423 .iter()
3424 .map(|sv| sv.size() - size_of_val(sv))
3425 .sum::<usize>()
3426 }
3427
3428 pub fn size_of_vec_deque(vec_deque: &VecDeque<Self>) -> usize {
3432 size_of_val(vec_deque)
3433 + (size_of::<ScalarValue>() * vec_deque.capacity())
3434 + vec_deque
3435 .iter()
3436 .map(|sv| sv.size() - size_of_val(sv))
3437 .sum::<usize>()
3438 }
3439
3440 pub fn size_of_hashset<S>(set: &HashSet<Self, S>) -> usize {
3444 size_of_val(set)
3445 + (size_of::<ScalarValue>() * set.capacity())
3446 + set
3447 .iter()
3448 .map(|sv| sv.size() - size_of_val(sv))
3449 .sum::<usize>()
3450 }
3451
3452 pub fn compact(&mut self) {
3458 match self {
3459 ScalarValue::Null
3460 | ScalarValue::Boolean(_)
3461 | ScalarValue::Float16(_)
3462 | ScalarValue::Float32(_)
3463 | ScalarValue::Float64(_)
3464 | ScalarValue::Decimal128(_, _, _)
3465 | ScalarValue::Decimal256(_, _, _)
3466 | ScalarValue::Int8(_)
3467 | ScalarValue::Int16(_)
3468 | ScalarValue::Int32(_)
3469 | ScalarValue::Int64(_)
3470 | ScalarValue::UInt8(_)
3471 | ScalarValue::UInt16(_)
3472 | ScalarValue::UInt32(_)
3473 | ScalarValue::UInt64(_)
3474 | ScalarValue::Date32(_)
3475 | ScalarValue::Date64(_)
3476 | ScalarValue::Time32Second(_)
3477 | ScalarValue::Time32Millisecond(_)
3478 | ScalarValue::Time64Microsecond(_)
3479 | ScalarValue::Time64Nanosecond(_)
3480 | ScalarValue::IntervalYearMonth(_)
3481 | ScalarValue::IntervalDayTime(_)
3482 | ScalarValue::IntervalMonthDayNano(_)
3483 | ScalarValue::DurationSecond(_)
3484 | ScalarValue::DurationMillisecond(_)
3485 | ScalarValue::DurationMicrosecond(_)
3486 | ScalarValue::DurationNanosecond(_)
3487 | ScalarValue::Utf8(_)
3488 | ScalarValue::LargeUtf8(_)
3489 | ScalarValue::Utf8View(_)
3490 | ScalarValue::TimestampSecond(_, _)
3491 | ScalarValue::TimestampMillisecond(_, _)
3492 | ScalarValue::TimestampMicrosecond(_, _)
3493 | ScalarValue::TimestampNanosecond(_, _)
3494 | ScalarValue::Binary(_)
3495 | ScalarValue::FixedSizeBinary(_, _)
3496 | ScalarValue::LargeBinary(_)
3497 | ScalarValue::BinaryView(_) => (),
3498 ScalarValue::FixedSizeList(arr) => {
3499 let array = copy_array_data(&arr.to_data());
3500 *Arc::make_mut(arr) = FixedSizeListArray::from(array);
3501 }
3502 ScalarValue::List(arr) => {
3503 let array = copy_array_data(&arr.to_data());
3504 *Arc::make_mut(arr) = ListArray::from(array);
3505 }
3506 ScalarValue::LargeList(arr) => {
3507 let array = copy_array_data(&arr.to_data());
3508 *Arc::make_mut(arr) = LargeListArray::from(array)
3509 }
3510 ScalarValue::Struct(arr) => {
3511 let array = copy_array_data(&arr.to_data());
3512 *Arc::make_mut(arr) = StructArray::from(array);
3513 }
3514 ScalarValue::Map(arr) => {
3515 let array = copy_array_data(&arr.to_data());
3516 *Arc::make_mut(arr) = MapArray::from(array);
3517 }
3518 ScalarValue::Union(val, _, _) => {
3519 if let Some((_, value)) = val.as_mut() {
3520 value.compact();
3521 }
3522 }
3523 ScalarValue::Dictionary(_, value) => {
3524 value.compact();
3525 }
3526 }
3527 }
3528}
3529
3530pub fn copy_array_data(data: &ArrayData) -> ArrayData {
3531 let mut copy = MutableArrayData::new(vec![&data], true, data.len());
3532 copy.extend(0, 0, data.len());
3533 copy.freeze()
3534}
3535
3536macro_rules! impl_scalar {
3537 ($ty:ty, $scalar:tt) => {
3538 impl From<$ty> for ScalarValue {
3539 fn from(value: $ty) -> Self {
3540 ScalarValue::$scalar(Some(value))
3541 }
3542 }
3543
3544 impl From<Option<$ty>> for ScalarValue {
3545 fn from(value: Option<$ty>) -> Self {
3546 ScalarValue::$scalar(value)
3547 }
3548 }
3549 };
3550}
3551
3552impl_scalar!(f64, Float64);
3553impl_scalar!(f32, Float32);
3554impl_scalar!(i8, Int8);
3555impl_scalar!(i16, Int16);
3556impl_scalar!(i32, Int32);
3557impl_scalar!(i64, Int64);
3558impl_scalar!(bool, Boolean);
3559impl_scalar!(u8, UInt8);
3560impl_scalar!(u16, UInt16);
3561impl_scalar!(u32, UInt32);
3562impl_scalar!(u64, UInt64);
3563
3564impl From<&str> for ScalarValue {
3565 fn from(value: &str) -> Self {
3566 Some(value).into()
3567 }
3568}
3569
3570impl From<Option<&str>> for ScalarValue {
3571 fn from(value: Option<&str>) -> Self {
3572 let value = value.map(|s| s.to_string());
3573 ScalarValue::Utf8(value)
3574 }
3575}
3576
3577impl From<Vec<(&str, ScalarValue)>> for ScalarValue {
3579 fn from(value: Vec<(&str, ScalarValue)>) -> Self {
3580 value
3581 .into_iter()
3582 .fold(ScalarStructBuilder::new(), |builder, (name, value)| {
3583 builder.with_name_and_scalar(name, value)
3584 })
3585 .build()
3586 .unwrap()
3587 }
3588}
3589
3590impl FromStr for ScalarValue {
3591 type Err = Infallible;
3592
3593 fn from_str(s: &str) -> Result<Self, Self::Err> {
3594 Ok(s.into())
3595 }
3596}
3597
3598impl From<String> for ScalarValue {
3599 fn from(value: String) -> Self {
3600 ScalarValue::Utf8(Some(value))
3601 }
3602}
3603
3604macro_rules! impl_try_from {
3605 ($SCALAR:ident, $NATIVE:ident) => {
3606 impl TryFrom<ScalarValue> for $NATIVE {
3607 type Error = DataFusionError;
3608
3609 fn try_from(value: ScalarValue) -> Result<Self> {
3610 match value {
3611 ScalarValue::$SCALAR(Some(inner_value)) => Ok(inner_value),
3612 _ => _internal_err!(
3613 "Cannot convert {:?} to {}",
3614 value,
3615 std::any::type_name::<Self>()
3616 ),
3617 }
3618 }
3619 }
3620 };
3621}
3622
3623impl_try_from!(Int8, i8);
3624impl_try_from!(Int16, i16);
3625
3626impl TryFrom<ScalarValue> for i32 {
3628 type Error = DataFusionError;
3629
3630 fn try_from(value: ScalarValue) -> Result<Self> {
3631 match value {
3632 ScalarValue::Int32(Some(inner_value))
3633 | ScalarValue::Date32(Some(inner_value))
3634 | ScalarValue::Time32Second(Some(inner_value))
3635 | ScalarValue::Time32Millisecond(Some(inner_value)) => Ok(inner_value),
3636 _ => _internal_err!(
3637 "Cannot convert {:?} to {}",
3638 value,
3639 std::any::type_name::<Self>()
3640 ),
3641 }
3642 }
3643}
3644
3645impl TryFrom<ScalarValue> for i64 {
3647 type Error = DataFusionError;
3648
3649 fn try_from(value: ScalarValue) -> Result<Self> {
3650 match value {
3651 ScalarValue::Int64(Some(inner_value))
3652 | ScalarValue::Date64(Some(inner_value))
3653 | ScalarValue::Time64Microsecond(Some(inner_value))
3654 | ScalarValue::Time64Nanosecond(Some(inner_value))
3655 | ScalarValue::TimestampNanosecond(Some(inner_value), _)
3656 | ScalarValue::TimestampMicrosecond(Some(inner_value), _)
3657 | ScalarValue::TimestampMillisecond(Some(inner_value), _)
3658 | ScalarValue::TimestampSecond(Some(inner_value), _) => Ok(inner_value),
3659 _ => _internal_err!(
3660 "Cannot convert {:?} to {}",
3661 value,
3662 std::any::type_name::<Self>()
3663 ),
3664 }
3665 }
3666}
3667
3668impl TryFrom<ScalarValue> for i128 {
3670 type Error = DataFusionError;
3671
3672 fn try_from(value: ScalarValue) -> Result<Self> {
3673 match value {
3674 ScalarValue::Decimal128(Some(inner_value), _, _) => Ok(inner_value),
3675 _ => _internal_err!(
3676 "Cannot convert {:?} to {}",
3677 value,
3678 std::any::type_name::<Self>()
3679 ),
3680 }
3681 }
3682}
3683
3684impl TryFrom<ScalarValue> for i256 {
3686 type Error = DataFusionError;
3687
3688 fn try_from(value: ScalarValue) -> Result<Self> {
3689 match value {
3690 ScalarValue::Decimal256(Some(inner_value), _, _) => Ok(inner_value),
3691 _ => _internal_err!(
3692 "Cannot convert {:?} to {}",
3693 value,
3694 std::any::type_name::<Self>()
3695 ),
3696 }
3697 }
3698}
3699
3700impl_try_from!(UInt8, u8);
3701impl_try_from!(UInt16, u16);
3702impl_try_from!(UInt32, u32);
3703impl_try_from!(UInt64, u64);
3704impl_try_from!(Float32, f32);
3705impl_try_from!(Float64, f64);
3706impl_try_from!(Boolean, bool);
3707
3708impl TryFrom<DataType> for ScalarValue {
3709 type Error = DataFusionError;
3710
3711 fn try_from(datatype: DataType) -> Result<Self> {
3713 (&datatype).try_into()
3714 }
3715}
3716
3717impl TryFrom<&DataType> for ScalarValue {
3718 type Error = DataFusionError;
3719
3720 fn try_from(data_type: &DataType) -> Result<Self> {
3722 Self::try_new_null(data_type)
3723 }
3724}
3725
3726macro_rules! format_option {
3727 ($F:expr, $EXPR:expr) => {{
3728 match $EXPR {
3729 Some(e) => write!($F, "{e}"),
3730 None => write!($F, "NULL"),
3731 }
3732 }};
3733}
3734
3735impl fmt::Display for ScalarValue {
3741 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
3742 match self {
3743 ScalarValue::Decimal128(v, p, s) => {
3744 write!(f, "{v:?},{p:?},{s:?}")?;
3745 }
3746 ScalarValue::Decimal256(v, p, s) => {
3747 write!(f, "{v:?},{p:?},{s:?}")?;
3748 }
3749 ScalarValue::Boolean(e) => format_option!(f, e)?,
3750 ScalarValue::Float16(e) => format_option!(f, e)?,
3751 ScalarValue::Float32(e) => format_option!(f, e)?,
3752 ScalarValue::Float64(e) => format_option!(f, e)?,
3753 ScalarValue::Int8(e) => format_option!(f, e)?,
3754 ScalarValue::Int16(e) => format_option!(f, e)?,
3755 ScalarValue::Int32(e) => format_option!(f, e)?,
3756 ScalarValue::Int64(e) => format_option!(f, e)?,
3757 ScalarValue::UInt8(e) => format_option!(f, e)?,
3758 ScalarValue::UInt16(e) => format_option!(f, e)?,
3759 ScalarValue::UInt32(e) => format_option!(f, e)?,
3760 ScalarValue::UInt64(e) => format_option!(f, e)?,
3761 ScalarValue::TimestampSecond(e, _) => format_option!(f, e)?,
3762 ScalarValue::TimestampMillisecond(e, _) => format_option!(f, e)?,
3763 ScalarValue::TimestampMicrosecond(e, _) => format_option!(f, e)?,
3764 ScalarValue::TimestampNanosecond(e, _) => format_option!(f, e)?,
3765 ScalarValue::Utf8(e)
3766 | ScalarValue::LargeUtf8(e)
3767 | ScalarValue::Utf8View(e) => format_option!(f, e)?,
3768 ScalarValue::Binary(e)
3769 | ScalarValue::FixedSizeBinary(_, e)
3770 | ScalarValue::LargeBinary(e)
3771 | ScalarValue::BinaryView(e) => match e {
3772 Some(bytes) => {
3773 for b in bytes.iter().take(10) {
3775 write!(f, "{b:02X}")?;
3776 }
3777 if bytes.len() > 10 {
3778 write!(f, "...")?;
3779 }
3780 }
3781 None => write!(f, "NULL")?,
3782 },
3783 ScalarValue::List(arr) => fmt_list(arr.to_owned() as ArrayRef, f)?,
3784 ScalarValue::LargeList(arr) => fmt_list(arr.to_owned() as ArrayRef, f)?,
3785 ScalarValue::FixedSizeList(arr) => fmt_list(arr.to_owned() as ArrayRef, f)?,
3786 ScalarValue::Date32(e) => {
3787 format_option!(f, e.map(|v| Date32Type::to_naive_date(v).to_string()))?
3788 }
3789 ScalarValue::Date64(e) => {
3790 format_option!(f, e.map(|v| Date64Type::to_naive_date(v).to_string()))?
3791 }
3792 ScalarValue::Time32Second(e) => format_option!(f, e)?,
3793 ScalarValue::Time32Millisecond(e) => format_option!(f, e)?,
3794 ScalarValue::Time64Microsecond(e) => format_option!(f, e)?,
3795 ScalarValue::Time64Nanosecond(e) => format_option!(f, e)?,
3796 ScalarValue::IntervalYearMonth(e) => format_option!(f, e)?,
3797 ScalarValue::IntervalMonthDayNano(e) => {
3798 format_option!(f, e.map(|v| format!("{v:?}")))?
3799 }
3800 ScalarValue::IntervalDayTime(e) => {
3801 format_option!(f, e.map(|v| format!("{v:?}")))?;
3802 }
3803 ScalarValue::DurationSecond(e) => format_option!(f, e)?,
3804 ScalarValue::DurationMillisecond(e) => format_option!(f, e)?,
3805 ScalarValue::DurationMicrosecond(e) => format_option!(f, e)?,
3806 ScalarValue::DurationNanosecond(e) => format_option!(f, e)?,
3807 ScalarValue::Struct(struct_arr) => {
3808 assert_eq!(struct_arr.len(), 1);
3810
3811 if struct_arr.null_count() == struct_arr.len() {
3812 write!(f, "NULL")?;
3813 return Ok(());
3814 }
3815
3816 let columns = struct_arr.columns();
3817 let fields = struct_arr.fields();
3818 let nulls = struct_arr.nulls();
3819
3820 write!(
3821 f,
3822 "{{{}}}",
3823 columns
3824 .iter()
3825 .zip(fields.iter())
3826 .map(|(column, field)| {
3827 if nulls.is_some_and(|b| b.is_null(0)) {
3828 format!("{}:NULL", field.name())
3829 } else if let DataType::Struct(_) = field.data_type() {
3830 let sv = ScalarValue::Struct(Arc::new(
3831 column.as_struct().to_owned(),
3832 ));
3833 format!("{}:{sv}", field.name())
3834 } else {
3835 let sv = array_value_to_string(column, 0).unwrap();
3836 format!("{}:{sv}", field.name())
3837 }
3838 })
3839 .collect::<Vec<_>>()
3840 .join(",")
3841 )?
3842 }
3843 ScalarValue::Map(map_arr) => {
3844 if map_arr.null_count() == map_arr.len() {
3845 write!(f, "NULL")?;
3846 return Ok(());
3847 }
3848
3849 write!(
3850 f,
3851 "[{}]",
3852 map_arr
3853 .iter()
3854 .map(|struct_array| {
3855 if let Some(arr) = struct_array {
3856 let mut buffer = VecDeque::new();
3857 for i in 0..arr.len() {
3858 let key =
3859 array_value_to_string(arr.column(0), i).unwrap();
3860 let value =
3861 array_value_to_string(arr.column(1), i).unwrap();
3862 buffer.push_back(format!("{key}:{value}"));
3863 }
3864 format!(
3865 "{{{}}}",
3866 buffer
3867 .into_iter()
3868 .collect::<Vec<_>>()
3869 .join(",")
3870 .as_str()
3871 )
3872 } else {
3873 "NULL".to_string()
3874 }
3875 })
3876 .collect::<Vec<_>>()
3877 .join(",")
3878 )?
3879 }
3880 ScalarValue::Union(val, _fields, _mode) => match val {
3881 Some((id, val)) => write!(f, "{id}:{val}")?,
3882 None => write!(f, "NULL")?,
3883 },
3884 ScalarValue::Dictionary(_k, v) => write!(f, "{v}")?,
3885 ScalarValue::Null => write!(f, "NULL")?,
3886 };
3887 Ok(())
3888 }
3889}
3890
3891fn fmt_list(arr: ArrayRef, f: &mut fmt::Formatter) -> fmt::Result {
3892 assert_eq!(arr.len(), 1);
3894 let options = FormatOptions::default().with_display_error(true);
3895 let formatter =
3896 ArrayFormatter::try_new(arr.as_ref() as &dyn Array, &options).unwrap();
3897 let value_formatter = formatter.value(0);
3898 write!(f, "{value_formatter}")
3899}
3900
3901fn fmt_binary(data: &[u8], f: &mut fmt::Formatter) -> fmt::Result {
3903 let mut iter = data.iter();
3904 if let Some(b) = iter.next() {
3905 write!(f, "{b}")?;
3906 }
3907 for b in iter {
3908 write!(f, ",{b}")?;
3909 }
3910 Ok(())
3911}
3912
3913impl fmt::Debug for ScalarValue {
3914 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
3915 match self {
3916 ScalarValue::Decimal128(_, _, _) => write!(f, "Decimal128({self})"),
3917 ScalarValue::Decimal256(_, _, _) => write!(f, "Decimal256({self})"),
3918 ScalarValue::Boolean(_) => write!(f, "Boolean({self})"),
3919 ScalarValue::Float16(_) => write!(f, "Float16({self})"),
3920 ScalarValue::Float32(_) => write!(f, "Float32({self})"),
3921 ScalarValue::Float64(_) => write!(f, "Float64({self})"),
3922 ScalarValue::Int8(_) => write!(f, "Int8({self})"),
3923 ScalarValue::Int16(_) => write!(f, "Int16({self})"),
3924 ScalarValue::Int32(_) => write!(f, "Int32({self})"),
3925 ScalarValue::Int64(_) => write!(f, "Int64({self})"),
3926 ScalarValue::UInt8(_) => write!(f, "UInt8({self})"),
3927 ScalarValue::UInt16(_) => write!(f, "UInt16({self})"),
3928 ScalarValue::UInt32(_) => write!(f, "UInt32({self})"),
3929 ScalarValue::UInt64(_) => write!(f, "UInt64({self})"),
3930 ScalarValue::TimestampSecond(_, tz_opt) => {
3931 write!(f, "TimestampSecond({self}, {tz_opt:?})")
3932 }
3933 ScalarValue::TimestampMillisecond(_, tz_opt) => {
3934 write!(f, "TimestampMillisecond({self}, {tz_opt:?})")
3935 }
3936 ScalarValue::TimestampMicrosecond(_, tz_opt) => {
3937 write!(f, "TimestampMicrosecond({self}, {tz_opt:?})")
3938 }
3939 ScalarValue::TimestampNanosecond(_, tz_opt) => {
3940 write!(f, "TimestampNanosecond({self}, {tz_opt:?})")
3941 }
3942 ScalarValue::Utf8(None) => write!(f, "Utf8({self})"),
3943 ScalarValue::Utf8(Some(_)) => write!(f, "Utf8(\"{self}\")"),
3944 ScalarValue::Utf8View(None) => write!(f, "Utf8View({self})"),
3945 ScalarValue::Utf8View(Some(_)) => write!(f, "Utf8View(\"{self}\")"),
3946 ScalarValue::LargeUtf8(None) => write!(f, "LargeUtf8({self})"),
3947 ScalarValue::LargeUtf8(Some(_)) => write!(f, "LargeUtf8(\"{self}\")"),
3948 ScalarValue::Binary(None) => write!(f, "Binary({self})"),
3949 ScalarValue::Binary(Some(b)) => {
3950 write!(f, "Binary(\"")?;
3951 fmt_binary(b.as_slice(), f)?;
3952 write!(f, "\")")
3953 }
3954 ScalarValue::BinaryView(None) => write!(f, "BinaryView({self})"),
3955 ScalarValue::BinaryView(Some(b)) => {
3956 write!(f, "BinaryView(\"")?;
3957 fmt_binary(b.as_slice(), f)?;
3958 write!(f, "\")")
3959 }
3960 ScalarValue::FixedSizeBinary(size, None) => {
3961 write!(f, "FixedSizeBinary({size}, {self})")
3962 }
3963 ScalarValue::FixedSizeBinary(size, Some(b)) => {
3964 write!(f, "FixedSizeBinary({size}, \"")?;
3965 fmt_binary(b.as_slice(), f)?;
3966 write!(f, "\")")
3967 }
3968 ScalarValue::LargeBinary(None) => write!(f, "LargeBinary({self})"),
3969 ScalarValue::LargeBinary(Some(b)) => {
3970 write!(f, "LargeBinary(\"")?;
3971 fmt_binary(b.as_slice(), f)?;
3972 write!(f, "\")")
3973 }
3974 ScalarValue::FixedSizeList(_) => write!(f, "FixedSizeList({self})"),
3975 ScalarValue::List(_) => write!(f, "List({self})"),
3976 ScalarValue::LargeList(_) => write!(f, "LargeList({self})"),
3977 ScalarValue::Struct(struct_arr) => {
3978 assert_eq!(struct_arr.len(), 1);
3980
3981 let columns = struct_arr.columns();
3982 let fields = struct_arr.fields();
3983
3984 write!(
3985 f,
3986 "Struct({{{}}})",
3987 columns
3988 .iter()
3989 .zip(fields.iter())
3990 .map(|(column, field)| {
3991 let sv = array_value_to_string(column, 0).unwrap();
3992 let name = field.name();
3993 format!("{name}:{sv}")
3994 })
3995 .collect::<Vec<_>>()
3996 .join(",")
3997 )
3998 }
3999 ScalarValue::Map(map_arr) => {
4000 write!(
4001 f,
4002 "Map([{}])",
4003 map_arr
4004 .iter()
4005 .map(|struct_array| {
4006 if let Some(arr) = struct_array {
4007 let buffer: Vec<String> = (0..arr.len())
4008 .map(|i| {
4009 let key = array_value_to_string(arr.column(0), i)
4010 .unwrap();
4011 let value =
4012 array_value_to_string(arr.column(1), i)
4013 .unwrap();
4014 format!("{key:?}:{value:?}")
4015 })
4016 .collect();
4017 format!("{{{}}}", buffer.join(","))
4018 } else {
4019 "NULL".to_string()
4020 }
4021 })
4022 .collect::<Vec<_>>()
4023 .join(",")
4024 )
4025 }
4026 ScalarValue::Date32(_) => write!(f, "Date32(\"{self}\")"),
4027 ScalarValue::Date64(_) => write!(f, "Date64(\"{self}\")"),
4028 ScalarValue::Time32Second(_) => write!(f, "Time32Second(\"{self}\")"),
4029 ScalarValue::Time32Millisecond(_) => {
4030 write!(f, "Time32Millisecond(\"{self}\")")
4031 }
4032 ScalarValue::Time64Microsecond(_) => {
4033 write!(f, "Time64Microsecond(\"{self}\")")
4034 }
4035 ScalarValue::Time64Nanosecond(_) => {
4036 write!(f, "Time64Nanosecond(\"{self}\")")
4037 }
4038 ScalarValue::IntervalDayTime(_) => {
4039 write!(f, "IntervalDayTime(\"{self}\")")
4040 }
4041 ScalarValue::IntervalYearMonth(_) => {
4042 write!(f, "IntervalYearMonth(\"{self}\")")
4043 }
4044 ScalarValue::IntervalMonthDayNano(_) => {
4045 write!(f, "IntervalMonthDayNano(\"{self}\")")
4046 }
4047 ScalarValue::DurationSecond(_) => write!(f, "DurationSecond(\"{self}\")"),
4048 ScalarValue::DurationMillisecond(_) => {
4049 write!(f, "DurationMillisecond(\"{self}\")")
4050 }
4051 ScalarValue::DurationMicrosecond(_) => {
4052 write!(f, "DurationMicrosecond(\"{self}\")")
4053 }
4054 ScalarValue::DurationNanosecond(_) => {
4055 write!(f, "DurationNanosecond(\"{self}\")")
4056 }
4057 ScalarValue::Union(val, _fields, _mode) => match val {
4058 Some((id, val)) => write!(f, "Union {id}:{val}"),
4059 None => write!(f, "Union(NULL)"),
4060 },
4061 ScalarValue::Dictionary(k, v) => write!(f, "Dictionary({k:?}, {v:?})"),
4062 ScalarValue::Null => write!(f, "NULL"),
4063 }
4064 }
4065}
4066
4067pub trait ScalarType<T: ArrowNativeType> {
4069 fn scalar(r: Option<T>) -> ScalarValue;
4071}
4072
4073impl ScalarType<f32> for Float32Type {
4074 fn scalar(r: Option<f32>) -> ScalarValue {
4075 ScalarValue::Float32(r)
4076 }
4077}
4078
4079impl ScalarType<i64> for TimestampSecondType {
4080 fn scalar(r: Option<i64>) -> ScalarValue {
4081 ScalarValue::TimestampSecond(r, None)
4082 }
4083}
4084
4085impl ScalarType<i64> for TimestampMillisecondType {
4086 fn scalar(r: Option<i64>) -> ScalarValue {
4087 ScalarValue::TimestampMillisecond(r, None)
4088 }
4089}
4090
4091impl ScalarType<i64> for TimestampMicrosecondType {
4092 fn scalar(r: Option<i64>) -> ScalarValue {
4093 ScalarValue::TimestampMicrosecond(r, None)
4094 }
4095}
4096
4097impl ScalarType<i64> for TimestampNanosecondType {
4098 fn scalar(r: Option<i64>) -> ScalarValue {
4099 ScalarValue::TimestampNanosecond(r, None)
4100 }
4101}
4102
4103impl ScalarType<i32> for Date32Type {
4104 fn scalar(r: Option<i32>) -> ScalarValue {
4105 ScalarValue::Date32(r)
4106 }
4107}
4108
4109#[cfg(test)]
4110mod tests {
4111
4112 use super::*;
4113 use crate::cast::{
4114 as_map_array, as_string_array, as_struct_array, as_uint32_array, as_uint64_array,
4115 };
4116
4117 use crate::test_util::batches_to_string;
4118 use arrow::array::{types::Float64Type, NullBufferBuilder};
4119 use arrow::buffer::{Buffer, OffsetBuffer};
4120 use arrow::compute::{is_null, kernels};
4121 use arrow::datatypes::Fields;
4122 use arrow::error::ArrowError;
4123 use arrow::util::pretty::pretty_format_columns;
4124 use chrono::NaiveDate;
4125 use insta::assert_snapshot;
4126 use rand::Rng;
4127
4128 #[test]
4129 fn test_scalar_value_from_for_map() {
4130 let string_builder = StringBuilder::new();
4131 let int_builder = Int32Builder::with_capacity(4);
4132 let mut builder = MapBuilder::new(None, string_builder, int_builder);
4133 builder.keys().append_value("joe");
4134 builder.values().append_value(1);
4135 builder.append(true).unwrap();
4136
4137 builder.keys().append_value("blogs");
4138 builder.values().append_value(2);
4139 builder.keys().append_value("foo");
4140 builder.values().append_value(4);
4141 builder.append(true).unwrap();
4142 builder.append(true).unwrap();
4143 builder.append(false).unwrap();
4144
4145 let expected = builder.finish();
4146
4147 let sv = ScalarValue::Map(Arc::new(expected.clone()));
4148 let map_arr = sv.to_array().unwrap();
4149 let actual = as_map_array(&map_arr).unwrap();
4150 assert_eq!(actual, &expected);
4151 }
4152
4153 #[test]
4154 fn test_scalar_value_from_for_struct() {
4155 let boolean = Arc::new(BooleanArray::from(vec![false]));
4156 let int = Arc::new(Int32Array::from(vec![42]));
4157
4158 let expected = StructArray::from(vec![
4159 (
4160 Arc::new(Field::new("b", DataType::Boolean, false)),
4161 Arc::clone(&boolean) as ArrayRef,
4162 ),
4163 (
4164 Arc::new(Field::new("c", DataType::Int32, false)),
4165 Arc::clone(&int) as ArrayRef,
4166 ),
4167 ]);
4168
4169 let sv = ScalarStructBuilder::new()
4170 .with_array(Field::new("b", DataType::Boolean, false), boolean)
4171 .with_array(Field::new("c", DataType::Int32, false), int)
4172 .build()
4173 .unwrap();
4174
4175 let struct_arr = sv.to_array().unwrap();
4176 let actual = as_struct_array(&struct_arr).unwrap();
4177 assert_eq!(actual, &expected);
4178 }
4179
4180 #[test]
4181 #[should_panic(
4182 expected = "InvalidArgumentError(\"Incorrect array length for StructArray field \\\"bool\\\", expected 1 got 4\")"
4183 )]
4184 fn test_scalar_value_from_for_struct_should_panic() {
4185 let _ = ScalarStructBuilder::new()
4186 .with_array(
4187 Field::new("bool", DataType::Boolean, false),
4188 Arc::new(BooleanArray::from(vec![false, true, false, false])),
4189 )
4190 .with_array(
4191 Field::new("i32", DataType::Int32, false),
4192 Arc::new(Int32Array::from(vec![42, 28, 19, 31])),
4193 )
4194 .build()
4195 .unwrap();
4196 }
4197
4198 #[test]
4199 fn test_to_array_of_size_for_nested() {
4200 let boolean = Arc::new(BooleanArray::from(vec![false, false, true, true]));
4202 let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
4203
4204 let struct_array = StructArray::from(vec![
4205 (
4206 Arc::new(Field::new("b", DataType::Boolean, false)),
4207 Arc::clone(&boolean) as ArrayRef,
4208 ),
4209 (
4210 Arc::new(Field::new("c", DataType::Int32, false)),
4211 Arc::clone(&int) as ArrayRef,
4212 ),
4213 ]);
4214 let sv = ScalarValue::Struct(Arc::new(struct_array));
4215 let actual_arr = sv.to_array_of_size(2).unwrap();
4216
4217 let boolean = Arc::new(BooleanArray::from(vec![
4218 false, false, true, true, false, false, true, true,
4219 ]));
4220 let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31, 42, 28, 19, 31]));
4221
4222 let struct_array = StructArray::from(vec![
4223 (
4224 Arc::new(Field::new("b", DataType::Boolean, false)),
4225 Arc::clone(&boolean) as ArrayRef,
4226 ),
4227 (
4228 Arc::new(Field::new("c", DataType::Int32, false)),
4229 Arc::clone(&int) as ArrayRef,
4230 ),
4231 ]);
4232
4233 let actual = as_struct_array(&actual_arr).unwrap();
4234 assert_eq!(actual, &struct_array);
4235
4236 let arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
4238 Some(1),
4239 None,
4240 Some(2),
4241 ])]);
4242
4243 let sv = ScalarValue::List(Arc::new(arr));
4244 let actual_arr = sv
4245 .to_array_of_size(2)
4246 .expect("Failed to convert to array of size");
4247 let actual_list_arr = actual_arr.as_list::<i32>();
4248
4249 let arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
4250 Some(vec![Some(1), None, Some(2)]),
4251 Some(vec![Some(1), None, Some(2)]),
4252 ]);
4253
4254 assert_eq!(&arr, actual_list_arr);
4255 }
4256
4257 #[test]
4258 fn test_to_array_of_size_for_fsl() {
4259 let values = Int32Array::from_iter([Some(1), None, Some(2)]);
4260 let field = Arc::new(Field::new_list_field(DataType::Int32, true));
4261 let arr = FixedSizeListArray::new(Arc::clone(&field), 3, Arc::new(values), None);
4262 let sv = ScalarValue::FixedSizeList(Arc::new(arr));
4263 let actual_arr = sv
4264 .to_array_of_size(2)
4265 .expect("Failed to convert to array of size");
4266
4267 let expected_values =
4268 Int32Array::from_iter([Some(1), None, Some(2), Some(1), None, Some(2)]);
4269 let expected_arr =
4270 FixedSizeListArray::new(field, 3, Arc::new(expected_values), None);
4271
4272 assert_eq!(
4273 &expected_arr,
4274 as_fixed_size_list_array(actual_arr.as_ref()).unwrap()
4275 );
4276
4277 let empty_array = sv
4278 .to_array_of_size(0)
4279 .expect("Failed to convert to empty array");
4280
4281 assert_eq!(empty_array.len(), 0);
4282 }
4283
4284 #[test]
4285 fn test_list_to_array_string() {
4286 let scalars = vec![
4287 ScalarValue::from("rust"),
4288 ScalarValue::from("arrow"),
4289 ScalarValue::from("data-fusion"),
4290 ];
4291
4292 let result = ScalarValue::new_list_nullable(scalars.as_slice(), &DataType::Utf8);
4293
4294 let expected = single_row_list_array(vec!["rust", "arrow", "data-fusion"]);
4295 assert_eq!(*result, expected);
4296 }
4297
4298 fn single_row_list_array(items: Vec<&str>) -> ListArray {
4299 SingleRowListArrayBuilder::new(Arc::new(StringArray::from(items)))
4300 .build_list_array()
4301 }
4302
4303 fn build_list<O: OffsetSizeTrait>(
4304 values: Vec<Option<Vec<Option<i64>>>>,
4305 ) -> Vec<ScalarValue> {
4306 values
4307 .into_iter()
4308 .map(|v| {
4309 let arr = if v.is_some() {
4310 Arc::new(
4311 GenericListArray::<O>::from_iter_primitive::<Int64Type, _, _>(
4312 vec![v],
4313 ),
4314 )
4315 } else if O::IS_LARGE {
4316 new_null_array(
4317 &DataType::LargeList(Arc::new(Field::new_list_field(
4318 DataType::Int64,
4319 true,
4320 ))),
4321 1,
4322 )
4323 } else {
4324 new_null_array(
4325 &DataType::List(Arc::new(Field::new_list_field(
4326 DataType::Int64,
4327 true,
4328 ))),
4329 1,
4330 )
4331 };
4332
4333 if O::IS_LARGE {
4334 ScalarValue::LargeList(arr.as_list::<i64>().to_owned().into())
4335 } else {
4336 ScalarValue::List(arr.as_list::<i32>().to_owned().into())
4337 }
4338 })
4339 .collect()
4340 }
4341
4342 #[test]
4343 fn test_iter_to_array_fixed_size_list() {
4344 let field = Arc::new(Field::new_list_field(DataType::Int32, true));
4345 let f1 = Arc::new(FixedSizeListArray::new(
4346 Arc::clone(&field),
4347 3,
4348 Arc::new(Int32Array::from(vec![1, 2, 3])),
4349 None,
4350 ));
4351 let f2 = Arc::new(FixedSizeListArray::new(
4352 Arc::clone(&field),
4353 3,
4354 Arc::new(Int32Array::from(vec![4, 5, 6])),
4355 None,
4356 ));
4357 let f_nulls = Arc::new(FixedSizeListArray::new_null(field, 1, 1));
4358
4359 let scalars = vec![
4360 ScalarValue::FixedSizeList(Arc::clone(&f_nulls)),
4361 ScalarValue::FixedSizeList(f1),
4362 ScalarValue::FixedSizeList(f2),
4363 ScalarValue::FixedSizeList(f_nulls),
4364 ];
4365
4366 let array = ScalarValue::iter_to_array(scalars).unwrap();
4367
4368 let expected = FixedSizeListArray::from_iter_primitive::<Int32Type, _, _>(
4369 vec![
4370 None,
4371 Some(vec![Some(1), Some(2), Some(3)]),
4372 Some(vec![Some(4), Some(5), Some(6)]),
4373 None,
4374 ],
4375 3,
4376 );
4377 assert_eq!(array.as_ref(), &expected);
4378 }
4379
4380 #[test]
4381 fn test_iter_to_array_struct() {
4382 let s1 = StructArray::from(vec![
4383 (
4384 Arc::new(Field::new("A", DataType::Boolean, false)),
4385 Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
4386 ),
4387 (
4388 Arc::new(Field::new("B", DataType::Int32, false)),
4389 Arc::new(Int32Array::from(vec![42])) as ArrayRef,
4390 ),
4391 ]);
4392
4393 let s2 = StructArray::from(vec![
4394 (
4395 Arc::new(Field::new("A", DataType::Boolean, false)),
4396 Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
4397 ),
4398 (
4399 Arc::new(Field::new("B", DataType::Int32, false)),
4400 Arc::new(Int32Array::from(vec![42])) as ArrayRef,
4401 ),
4402 ]);
4403
4404 let scalars = vec![
4405 ScalarValue::Struct(Arc::new(s1)),
4406 ScalarValue::Struct(Arc::new(s2)),
4407 ];
4408
4409 let array = ScalarValue::iter_to_array(scalars).unwrap();
4410
4411 let expected = StructArray::from(vec![
4412 (
4413 Arc::new(Field::new("A", DataType::Boolean, false)),
4414 Arc::new(BooleanArray::from(vec![false, false])) as ArrayRef,
4415 ),
4416 (
4417 Arc::new(Field::new("B", DataType::Int32, false)),
4418 Arc::new(Int32Array::from(vec![42, 42])) as ArrayRef,
4419 ),
4420 ]);
4421 assert_eq!(array.as_ref(), &expected);
4422 }
4423
4424 #[test]
4425 fn test_iter_to_array_struct_with_nulls() {
4426 let s1 = StructArray::from((
4428 vec![
4429 (
4430 Arc::new(Field::new("A", DataType::Int32, false)),
4431 Arc::new(Int32Array::from(vec![1])) as ArrayRef,
4432 ),
4433 (
4434 Arc::new(Field::new("B", DataType::Int64, false)),
4435 Arc::new(Int64Array::from(vec![2])) as ArrayRef,
4436 ),
4437 ],
4438 Buffer::from(&[1]),
4440 ));
4441
4442 let s2 = StructArray::from((
4444 vec![
4445 (
4446 Arc::new(Field::new("A", DataType::Int32, false)),
4447 Arc::new(Int32Array::from(vec![3])) as ArrayRef,
4448 ),
4449 (
4450 Arc::new(Field::new("B", DataType::Int64, false)),
4451 Arc::new(Int64Array::from(vec![4])) as ArrayRef,
4452 ),
4453 ],
4454 Buffer::from(&[0]),
4455 ));
4456
4457 let scalars = vec![
4458 ScalarValue::Struct(Arc::new(s1)),
4459 ScalarValue::Struct(Arc::new(s2)),
4460 ];
4461
4462 let array = ScalarValue::iter_to_array(scalars).unwrap();
4463 let struct_array = array.as_struct();
4464 assert!(struct_array.is_valid(0));
4465 assert!(struct_array.is_null(1));
4466 }
4467
4468 #[test]
4469 fn iter_to_array_primitive_test() {
4470 let scalars = build_list::<i32>(vec![
4472 Some(vec![Some(1), Some(2), Some(3)]),
4473 None,
4474 Some(vec![Some(4), Some(5)]),
4475 ]);
4476
4477 let array = ScalarValue::iter_to_array(scalars).unwrap();
4478 let list_array = as_list_array(&array);
4479 let expected = ListArray::from_iter_primitive::<Int64Type, _, _>(vec![
4481 Some(vec![Some(1), Some(2), Some(3)]),
4482 None,
4483 Some(vec![Some(4), Some(5)]),
4484 ]);
4485 assert_eq!(list_array, &expected);
4486
4487 let scalars = build_list::<i64>(vec![
4488 Some(vec![Some(1), Some(2), Some(3)]),
4489 None,
4490 Some(vec![Some(4), Some(5)]),
4491 ]);
4492
4493 let array = ScalarValue::iter_to_array(scalars).unwrap();
4494 let list_array = as_large_list_array(&array);
4495 let expected = LargeListArray::from_iter_primitive::<Int64Type, _, _>(vec![
4496 Some(vec![Some(1), Some(2), Some(3)]),
4497 None,
4498 Some(vec![Some(4), Some(5)]),
4499 ]);
4500 assert_eq!(list_array, &expected);
4501 }
4502
4503 #[test]
4504 fn iter_to_array_string_test() {
4505 let arr1 = single_row_list_array(vec!["foo", "bar", "baz"]);
4506 let arr2 = single_row_list_array(vec!["rust", "world"]);
4507
4508 let scalars = vec![
4509 ScalarValue::List(Arc::new(arr1)),
4510 ScalarValue::List(Arc::new(arr2)),
4511 ];
4512
4513 let array = ScalarValue::iter_to_array(scalars).unwrap();
4514 let result = array.as_list::<i32>();
4515
4516 let string_builder = StringBuilder::with_capacity(5, 25);
4518 let mut list_of_string_builder = ListBuilder::new(string_builder);
4519
4520 list_of_string_builder.values().append_value("foo");
4521 list_of_string_builder.values().append_value("bar");
4522 list_of_string_builder.values().append_value("baz");
4523 list_of_string_builder.append(true);
4524
4525 list_of_string_builder.values().append_value("rust");
4526 list_of_string_builder.values().append_value("world");
4527 list_of_string_builder.append(true);
4528 let expected = list_of_string_builder.finish();
4529
4530 assert_eq!(result, &expected);
4531 }
4532
4533 #[test]
4534 fn test_list_scalar_eq_to_array() {
4535 let list_array: ArrayRef =
4536 Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
4537 Some(vec![Some(0), Some(1), Some(2)]),
4538 None,
4539 Some(vec![None, Some(5)]),
4540 ]));
4541
4542 let fsl_array: ArrayRef =
4543 Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
4544 Some(vec![Some(0), Some(1), Some(2)]),
4545 None,
4546 Some(vec![Some(3), None, Some(5)]),
4547 ]));
4548
4549 for arr in [list_array, fsl_array] {
4550 for i in 0..arr.len() {
4551 let scalar =
4552 ScalarValue::List(arr.slice(i, 1).as_list::<i32>().to_owned().into());
4553 assert!(scalar.eq_array(&arr, i).unwrap());
4554 }
4555 }
4556 }
4557
4558 #[test]
4559 fn scalar_add_trait_test() -> Result<()> {
4560 let float_value = ScalarValue::Float64(Some(123.));
4561 let float_value_2 = ScalarValue::Float64(Some(123.));
4562 assert_eq!(
4563 (float_value.add(&float_value_2))?,
4564 ScalarValue::Float64(Some(246.))
4565 );
4566 assert_eq!(
4567 (float_value.add(float_value_2))?,
4568 ScalarValue::Float64(Some(246.))
4569 );
4570 Ok(())
4571 }
4572
4573 #[test]
4574 fn scalar_sub_trait_test() -> Result<()> {
4575 let float_value = ScalarValue::Float64(Some(123.));
4576 let float_value_2 = ScalarValue::Float64(Some(123.));
4577 assert_eq!(
4578 float_value.sub(&float_value_2)?,
4579 ScalarValue::Float64(Some(0.))
4580 );
4581 assert_eq!(
4582 float_value.sub(float_value_2)?,
4583 ScalarValue::Float64(Some(0.))
4584 );
4585 Ok(())
4586 }
4587
4588 #[test]
4589 fn scalar_sub_trait_int32_test() -> Result<()> {
4590 let int_value = ScalarValue::Int32(Some(42));
4591 let int_value_2 = ScalarValue::Int32(Some(100));
4592 assert_eq!(int_value.sub(&int_value_2)?, ScalarValue::Int32(Some(-58)));
4593 assert_eq!(int_value_2.sub(int_value)?, ScalarValue::Int32(Some(58)));
4594 Ok(())
4595 }
4596
4597 #[test]
4598 fn scalar_sub_trait_int32_overflow_test() {
4599 let int_value = ScalarValue::Int32(Some(i32::MAX));
4600 let int_value_2 = ScalarValue::Int32(Some(i32::MIN));
4601 let err = int_value
4602 .sub_checked(&int_value_2)
4603 .unwrap_err()
4604 .strip_backtrace();
4605 assert_eq!(
4606 err,
4607 "Arrow error: Arithmetic overflow: Overflow happened on: 2147483647 - -2147483648"
4608 )
4609 }
4610
4611 #[test]
4612 fn scalar_sub_trait_int64_test() -> Result<()> {
4613 let int_value = ScalarValue::Int64(Some(42));
4614 let int_value_2 = ScalarValue::Int64(Some(100));
4615 assert_eq!(int_value.sub(&int_value_2)?, ScalarValue::Int64(Some(-58)));
4616 assert_eq!(int_value_2.sub(int_value)?, ScalarValue::Int64(Some(58)));
4617 Ok(())
4618 }
4619
4620 #[test]
4621 fn scalar_sub_trait_int64_overflow_test() {
4622 let int_value = ScalarValue::Int64(Some(i64::MAX));
4623 let int_value_2 = ScalarValue::Int64(Some(i64::MIN));
4624 let err = int_value
4625 .sub_checked(&int_value_2)
4626 .unwrap_err()
4627 .strip_backtrace();
4628 assert_eq!(err, "Arrow error: Arithmetic overflow: Overflow happened on: 9223372036854775807 - -9223372036854775808")
4629 }
4630
4631 #[test]
4632 fn scalar_add_overflow_test() -> Result<()> {
4633 check_scalar_add_overflow::<Int8Type>(
4634 ScalarValue::Int8(Some(i8::MAX)),
4635 ScalarValue::Int8(Some(i8::MAX)),
4636 );
4637 check_scalar_add_overflow::<UInt8Type>(
4638 ScalarValue::UInt8(Some(u8::MAX)),
4639 ScalarValue::UInt8(Some(u8::MAX)),
4640 );
4641 check_scalar_add_overflow::<Int16Type>(
4642 ScalarValue::Int16(Some(i16::MAX)),
4643 ScalarValue::Int16(Some(i16::MAX)),
4644 );
4645 check_scalar_add_overflow::<UInt16Type>(
4646 ScalarValue::UInt16(Some(u16::MAX)),
4647 ScalarValue::UInt16(Some(u16::MAX)),
4648 );
4649 check_scalar_add_overflow::<Int32Type>(
4650 ScalarValue::Int32(Some(i32::MAX)),
4651 ScalarValue::Int32(Some(i32::MAX)),
4652 );
4653 check_scalar_add_overflow::<UInt32Type>(
4654 ScalarValue::UInt32(Some(u32::MAX)),
4655 ScalarValue::UInt32(Some(u32::MAX)),
4656 );
4657 check_scalar_add_overflow::<Int64Type>(
4658 ScalarValue::Int64(Some(i64::MAX)),
4659 ScalarValue::Int64(Some(i64::MAX)),
4660 );
4661 check_scalar_add_overflow::<UInt64Type>(
4662 ScalarValue::UInt64(Some(u64::MAX)),
4663 ScalarValue::UInt64(Some(u64::MAX)),
4664 );
4665
4666 Ok(())
4667 }
4668
4669 fn check_scalar_add_overflow<T>(left: ScalarValue, right: ScalarValue)
4671 where
4672 T: ArrowNumericType,
4673 {
4674 let scalar_result = left.add_checked(&right);
4675
4676 let left_array = left.to_array().expect("Failed to convert to array");
4677 let right_array = right.to_array().expect("Failed to convert to array");
4678 let arrow_left_array = left_array.as_primitive::<T>();
4679 let arrow_right_array = right_array.as_primitive::<T>();
4680 let arrow_result = add(arrow_left_array, arrow_right_array);
4681
4682 assert_eq!(scalar_result.is_ok(), arrow_result.is_ok());
4683 }
4684
4685 #[test]
4686 fn test_interval_add_timestamp() -> Result<()> {
4687 let interval = ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano {
4688 months: 1,
4689 days: 2,
4690 nanoseconds: 3,
4691 }));
4692 let timestamp = ScalarValue::TimestampNanosecond(Some(123), None);
4693 let result = interval.add(×tamp)?;
4694 let expect = timestamp.add(&interval)?;
4695 assert_eq!(result, expect);
4696
4697 let interval = ScalarValue::IntervalYearMonth(Some(123));
4698 let timestamp = ScalarValue::TimestampNanosecond(Some(123), None);
4699 let result = interval.add(×tamp)?;
4700 let expect = timestamp.add(&interval)?;
4701 assert_eq!(result, expect);
4702
4703 let interval = ScalarValue::IntervalDayTime(Some(IntervalDayTime {
4704 days: 1,
4705 milliseconds: 23,
4706 }));
4707 let timestamp = ScalarValue::TimestampNanosecond(Some(123), None);
4708 let result = interval.add(×tamp)?;
4709 let expect = timestamp.add(&interval)?;
4710 assert_eq!(result, expect);
4711 Ok(())
4712 }
4713
4714 #[test]
4715 fn scalar_decimal_test() -> Result<()> {
4716 let decimal_value = ScalarValue::Decimal128(Some(123), 10, 1);
4717 assert_eq!(DataType::Decimal128(10, 1), decimal_value.data_type());
4718 let try_into_value: i128 = decimal_value.clone().try_into().unwrap();
4719 assert_eq!(123_i128, try_into_value);
4720 assert!(!decimal_value.is_null());
4721 let neg_decimal_value = decimal_value.arithmetic_negate()?;
4722 match neg_decimal_value {
4723 ScalarValue::Decimal128(v, _, _) => {
4724 assert_eq!(-123, v.unwrap());
4725 }
4726 _ => {
4727 unreachable!();
4728 }
4729 }
4730
4731 let array = decimal_value
4733 .to_array()
4734 .expect("Failed to convert to array");
4735 let array = as_decimal128_array(&array)?;
4736 assert_eq!(1, array.len());
4737 assert_eq!(DataType::Decimal128(10, 1), array.data_type().clone());
4738 assert_eq!(123i128, array.value(0));
4739
4740 let array = decimal_value
4742 .to_array_of_size(10)
4743 .expect("Failed to convert to array of size");
4744 let array_decimal = as_decimal128_array(&array)?;
4745 assert_eq!(10, array.len());
4746 assert_eq!(DataType::Decimal128(10, 1), array.data_type().clone());
4747 assert_eq!(123i128, array_decimal.value(0));
4748 assert_eq!(123i128, array_decimal.value(9));
4749 assert!(decimal_value
4751 .eq_array(&array, 1)
4752 .expect("Failed to compare arrays"));
4753 assert!(decimal_value
4754 .eq_array(&array, 5)
4755 .expect("Failed to compare arrays"));
4756 assert_eq!(
4758 decimal_value,
4759 ScalarValue::try_from_array(&array, 5).unwrap()
4760 );
4761
4762 assert_eq!(
4763 decimal_value,
4764 ScalarValue::try_new_decimal128(123, 10, 1).unwrap()
4765 );
4766
4767 let left = ScalarValue::Decimal128(Some(123), 10, 2);
4769 let right = ScalarValue::Decimal128(Some(124), 10, 2);
4770 assert!(!left.eq(&right));
4771 let result = left < right;
4772 assert!(result);
4773 let result = left <= right;
4774 assert!(result);
4775 let right = ScalarValue::Decimal128(Some(124), 10, 3);
4776 let result = left.partial_cmp(&right);
4778 assert_eq!(None, result);
4779
4780 let decimal_vec = vec![
4781 ScalarValue::Decimal128(Some(1), 10, 2),
4782 ScalarValue::Decimal128(Some(2), 10, 2),
4783 ScalarValue::Decimal128(Some(3), 10, 2),
4784 ];
4785 let array = ScalarValue::iter_to_array(decimal_vec).unwrap();
4787 assert_eq!(3, array.len());
4788 assert_eq!(DataType::Decimal128(10, 2), array.data_type().clone());
4789
4790 let decimal_vec = vec![
4791 ScalarValue::Decimal128(Some(1), 10, 2),
4792 ScalarValue::Decimal128(Some(2), 10, 2),
4793 ScalarValue::Decimal128(Some(3), 10, 2),
4794 ScalarValue::Decimal128(None, 10, 2),
4795 ];
4796 let array = ScalarValue::iter_to_array(decimal_vec).unwrap();
4797 assert_eq!(4, array.len());
4798 assert_eq!(DataType::Decimal128(10, 2), array.data_type().clone());
4799
4800 assert!(ScalarValue::try_new_decimal128(1, 10, 2)
4801 .unwrap()
4802 .eq_array(&array, 0)
4803 .expect("Failed to compare arrays"));
4804 assert!(ScalarValue::try_new_decimal128(2, 10, 2)
4805 .unwrap()
4806 .eq_array(&array, 1)
4807 .expect("Failed to compare arrays"));
4808 assert!(ScalarValue::try_new_decimal128(3, 10, 2)
4809 .unwrap()
4810 .eq_array(&array, 2)
4811 .expect("Failed to compare arrays"));
4812 assert_eq!(
4813 ScalarValue::Decimal128(None, 10, 2),
4814 ScalarValue::try_from_array(&array, 3).unwrap()
4815 );
4816
4817 Ok(())
4818 }
4819
4820 #[test]
4821 fn test_list_partial_cmp() {
4822 let a =
4823 ScalarValue::List(Arc::new(
4824 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4825 Some(1),
4826 Some(2),
4827 Some(3),
4828 ])]),
4829 ));
4830 let b =
4831 ScalarValue::List(Arc::new(
4832 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4833 Some(1),
4834 Some(2),
4835 Some(3),
4836 ])]),
4837 ));
4838 assert_eq!(a.partial_cmp(&b), Some(Ordering::Equal));
4839
4840 let a =
4841 ScalarValue::List(Arc::new(
4842 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4843 Some(10),
4844 Some(2),
4845 Some(3),
4846 ])]),
4847 ));
4848 let b =
4849 ScalarValue::List(Arc::new(
4850 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4851 Some(1),
4852 Some(2),
4853 Some(30),
4854 ])]),
4855 ));
4856 assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
4857
4858 let a =
4859 ScalarValue::List(Arc::new(
4860 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4861 Some(10),
4862 Some(2),
4863 Some(3),
4864 ])]),
4865 ));
4866 let b =
4867 ScalarValue::List(Arc::new(
4868 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4869 Some(10),
4870 Some(2),
4871 Some(30),
4872 ])]),
4873 ));
4874 assert_eq!(a.partial_cmp(&b), Some(Ordering::Less));
4875
4876 let a =
4877 ScalarValue::List(Arc::new(
4878 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4879 Some(1),
4880 Some(2),
4881 Some(3),
4882 ])]),
4883 ));
4884 let b =
4885 ScalarValue::List(Arc::new(
4886 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4887 Some(2),
4888 Some(3),
4889 ])]),
4890 ));
4891 assert_eq!(a.partial_cmp(&b), Some(Ordering::Less));
4892
4893 let a =
4894 ScalarValue::List(Arc::new(
4895 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4896 Some(2),
4897 Some(3),
4898 Some(4),
4899 ])]),
4900 ));
4901 let b =
4902 ScalarValue::List(Arc::new(
4903 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4904 Some(1),
4905 Some(2),
4906 ])]),
4907 ));
4908 assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
4909
4910 let a =
4911 ScalarValue::List(Arc::new(
4912 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4913 Some(1),
4914 Some(2),
4915 Some(3),
4916 ])]),
4917 ));
4918 let b =
4919 ScalarValue::List(Arc::new(
4920 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4921 Some(1),
4922 Some(2),
4923 ])]),
4924 ));
4925 assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
4926
4927 let a =
4928 ScalarValue::List(Arc::new(
4929 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4930 None,
4931 Some(2),
4932 Some(3),
4933 ])]),
4934 ));
4935 let b =
4936 ScalarValue::List(Arc::new(
4937 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4938 Some(1),
4939 Some(2),
4940 Some(3),
4941 ])]),
4942 ));
4943 assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
4944
4945 let a = ScalarValue::LargeList(Arc::new(LargeListArray::from_iter_primitive::<
4946 Int64Type,
4947 _,
4948 _,
4949 >(vec![Some(vec![
4950 None,
4951 Some(2),
4952 Some(3),
4953 ])])));
4954 let b = ScalarValue::LargeList(Arc::new(LargeListArray::from_iter_primitive::<
4955 Int64Type,
4956 _,
4957 _,
4958 >(vec![Some(vec![
4959 Some(1),
4960 Some(2),
4961 Some(3),
4962 ])])));
4963 assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
4964
4965 let a = ScalarValue::FixedSizeList(Arc::new(
4966 FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(
4967 vec![Some(vec![None, Some(2), Some(3)])],
4968 3,
4969 ),
4970 ));
4971 let b = ScalarValue::FixedSizeList(Arc::new(
4972 FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(
4973 vec![Some(vec![Some(1), Some(2), Some(3)])],
4974 3,
4975 ),
4976 ));
4977 assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
4978 }
4979
4980 #[test]
4981 fn scalar_value_to_array_u64() -> Result<()> {
4982 let value = ScalarValue::UInt64(Some(13u64));
4983 let array = value.to_array().expect("Failed to convert to array");
4984 let array = as_uint64_array(&array)?;
4985 assert_eq!(array.len(), 1);
4986 assert!(!array.is_null(0));
4987 assert_eq!(array.value(0), 13);
4988
4989 let value = ScalarValue::UInt64(None);
4990 let array = value.to_array().expect("Failed to convert to array");
4991 let array = as_uint64_array(&array)?;
4992 assert_eq!(array.len(), 1);
4993 assert!(array.is_null(0));
4994 Ok(())
4995 }
4996
4997 #[test]
4998 fn scalar_value_to_array_u32() -> Result<()> {
4999 let value = ScalarValue::UInt32(Some(13u32));
5000 let array = value.to_array().expect("Failed to convert to array");
5001 let array = as_uint32_array(&array)?;
5002 assert_eq!(array.len(), 1);
5003 assert!(!array.is_null(0));
5004 assert_eq!(array.value(0), 13);
5005
5006 let value = ScalarValue::UInt32(None);
5007 let array = value.to_array().expect("Failed to convert to array");
5008 let array = as_uint32_array(&array)?;
5009 assert_eq!(array.len(), 1);
5010 assert!(array.is_null(0));
5011 Ok(())
5012 }
5013
5014 #[test]
5015 fn scalar_list_null_to_array() {
5016 let list_array = ScalarValue::new_list_nullable(&[], &DataType::UInt64);
5017
5018 assert_eq!(list_array.len(), 1);
5019 assert_eq!(list_array.values().len(), 0);
5020 }
5021
5022 #[test]
5023 fn scalar_large_list_null_to_array() {
5024 let list_array = ScalarValue::new_large_list(&[], &DataType::UInt64);
5025
5026 assert_eq!(list_array.len(), 1);
5027 assert_eq!(list_array.values().len(), 0);
5028 }
5029
5030 #[test]
5031 fn scalar_list_to_array() -> Result<()> {
5032 let values = vec![
5033 ScalarValue::UInt64(Some(100)),
5034 ScalarValue::UInt64(None),
5035 ScalarValue::UInt64(Some(101)),
5036 ];
5037 let list_array = ScalarValue::new_list_nullable(&values, &DataType::UInt64);
5038 assert_eq!(list_array.len(), 1);
5039 assert_eq!(list_array.values().len(), 3);
5040
5041 let prim_array_ref = list_array.value(0);
5042 let prim_array = as_uint64_array(&prim_array_ref)?;
5043 assert_eq!(prim_array.len(), 3);
5044 assert_eq!(prim_array.value(0), 100);
5045 assert!(prim_array.is_null(1));
5046 assert_eq!(prim_array.value(2), 101);
5047 Ok(())
5048 }
5049
5050 #[test]
5051 fn scalar_large_list_to_array() -> Result<()> {
5052 let values = vec![
5053 ScalarValue::UInt64(Some(100)),
5054 ScalarValue::UInt64(None),
5055 ScalarValue::UInt64(Some(101)),
5056 ];
5057 let list_array = ScalarValue::new_large_list(&values, &DataType::UInt64);
5058 assert_eq!(list_array.len(), 1);
5059 assert_eq!(list_array.values().len(), 3);
5060
5061 let prim_array_ref = list_array.value(0);
5062 let prim_array = as_uint64_array(&prim_array_ref)?;
5063 assert_eq!(prim_array.len(), 3);
5064 assert_eq!(prim_array.value(0), 100);
5065 assert!(prim_array.is_null(1));
5066 assert_eq!(prim_array.value(2), 101);
5067 Ok(())
5068 }
5069
5070 macro_rules! check_scalar_iter {
5072 ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
5073 let scalars: Vec<_> =
5074 $INPUT.iter().map(|v| ScalarValue::$SCALAR_T(*v)).collect();
5075
5076 let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
5077
5078 let expected: ArrayRef = Arc::new($ARRAYTYPE::from($INPUT));
5079
5080 assert_eq!(&array, &expected);
5081 }};
5082 }
5083
5084 macro_rules! check_scalar_iter_tz {
5087 ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
5088 let scalars: Vec<_> = $INPUT
5089 .iter()
5090 .map(|v| ScalarValue::$SCALAR_T(*v, None))
5091 .collect();
5092
5093 let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
5094
5095 let expected: ArrayRef = Arc::new($ARRAYTYPE::from($INPUT));
5096
5097 assert_eq!(&array, &expected);
5098 }};
5099 }
5100
5101 macro_rules! check_scalar_iter_string {
5104 ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
5105 let scalars: Vec<_> = $INPUT
5106 .iter()
5107 .map(|v| ScalarValue::$SCALAR_T(v.map(|v| v.to_string())))
5108 .collect();
5109
5110 let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
5111
5112 let expected: ArrayRef = Arc::new($ARRAYTYPE::from($INPUT));
5113
5114 assert_eq!(&array, &expected);
5115 }};
5116 }
5117
5118 macro_rules! check_scalar_iter_binary {
5121 ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
5122 let scalars: Vec<_> = $INPUT
5123 .iter()
5124 .map(|v| ScalarValue::$SCALAR_T(v.map(|v| v.to_vec())))
5125 .collect();
5126
5127 let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
5128
5129 let expected: $ARRAYTYPE =
5130 $INPUT.iter().map(|v| v.map(|v| v.to_vec())).collect();
5131
5132 let expected: ArrayRef = Arc::new(expected);
5133
5134 assert_eq!(&array, &expected);
5135 }};
5136 }
5137
5138 #[test]
5139 #[allow(clippy::useless_vec)]
5141 fn scalar_iter_to_array_boolean() {
5142 check_scalar_iter!(Boolean, BooleanArray, vec![Some(true), None, Some(false)]);
5143 check_scalar_iter!(Float32, Float32Array, vec![Some(1.9), None, Some(-2.1)]);
5144 check_scalar_iter!(Float64, Float64Array, vec![Some(1.9), None, Some(-2.1)]);
5145
5146 check_scalar_iter!(Int8, Int8Array, vec![Some(1), None, Some(3)]);
5147 check_scalar_iter!(Int16, Int16Array, vec![Some(1), None, Some(3)]);
5148 check_scalar_iter!(Int32, Int32Array, vec![Some(1), None, Some(3)]);
5149 check_scalar_iter!(Int64, Int64Array, vec![Some(1), None, Some(3)]);
5150
5151 check_scalar_iter!(UInt8, UInt8Array, vec![Some(1), None, Some(3)]);
5152 check_scalar_iter!(UInt16, UInt16Array, vec![Some(1), None, Some(3)]);
5153 check_scalar_iter!(UInt32, UInt32Array, vec![Some(1), None, Some(3)]);
5154 check_scalar_iter!(UInt64, UInt64Array, vec![Some(1), None, Some(3)]);
5155
5156 check_scalar_iter_tz!(
5157 TimestampSecond,
5158 TimestampSecondArray,
5159 vec![Some(1), None, Some(3)]
5160 );
5161 check_scalar_iter_tz!(
5162 TimestampMillisecond,
5163 TimestampMillisecondArray,
5164 vec![Some(1), None, Some(3)]
5165 );
5166 check_scalar_iter_tz!(
5167 TimestampMicrosecond,
5168 TimestampMicrosecondArray,
5169 vec![Some(1), None, Some(3)]
5170 );
5171 check_scalar_iter_tz!(
5172 TimestampNanosecond,
5173 TimestampNanosecondArray,
5174 vec![Some(1), None, Some(3)]
5175 );
5176
5177 check_scalar_iter_string!(
5178 Utf8,
5179 StringArray,
5180 vec![Some("foo"), None, Some("bar")]
5181 );
5182 check_scalar_iter_string!(
5183 LargeUtf8,
5184 LargeStringArray,
5185 vec![Some("foo"), None, Some("bar")]
5186 );
5187 check_scalar_iter_binary!(
5188 Binary,
5189 BinaryArray,
5190 vec![Some(b"foo"), None, Some(b"bar")]
5191 );
5192 check_scalar_iter_binary!(
5193 LargeBinary,
5194 LargeBinaryArray,
5195 vec![Some(b"foo"), None, Some(b"bar")]
5196 );
5197 }
5198
5199 #[test]
5200 fn scalar_iter_to_array_empty() {
5201 let scalars = vec![] as Vec<ScalarValue>;
5202
5203 let result = ScalarValue::iter_to_array(scalars).unwrap_err();
5204 assert!(
5205 result
5206 .to_string()
5207 .contains("Empty iterator passed to ScalarValue::iter_to_array"),
5208 "{}",
5209 result
5210 );
5211 }
5212
5213 #[test]
5214 fn scalar_iter_to_dictionary() {
5215 fn make_val(v: Option<String>) -> ScalarValue {
5216 let key_type = DataType::Int32;
5217 let value = ScalarValue::Utf8(v);
5218 ScalarValue::Dictionary(Box::new(key_type), Box::new(value))
5219 }
5220
5221 let scalars = [
5222 make_val(Some("Foo".into())),
5223 make_val(None),
5224 make_val(Some("Bar".into())),
5225 ];
5226
5227 let array = ScalarValue::iter_to_array(scalars).unwrap();
5228 let array = as_dictionary_array::<Int32Type>(&array).unwrap();
5229 let values_array = as_string_array(array.values()).unwrap();
5230
5231 let values = array
5232 .keys_iter()
5233 .map(|k| {
5234 k.map(|k| {
5235 assert!(values_array.is_valid(k));
5236 values_array.value(k)
5237 })
5238 })
5239 .collect::<Vec<_>>();
5240
5241 let expected = vec![Some("Foo"), None, Some("Bar")];
5242 assert_eq!(values, expected);
5243 }
5244
5245 #[test]
5246 fn scalar_iter_to_array_mismatched_types() {
5247 use ScalarValue::*;
5248 let scalars = [Boolean(Some(true)), Int32(Some(5))];
5250
5251 let result = ScalarValue::iter_to_array(scalars).unwrap_err();
5252 assert!(result.to_string().contains("Inconsistent types in ScalarValue::iter_to_array. Expected Boolean, got Int32(5)"),
5253 "{}", result);
5254 }
5255
5256 #[test]
5257 fn scalar_try_from_array_null() {
5258 let array = vec![Some(33), None].into_iter().collect::<Int64Array>();
5259 let array: ArrayRef = Arc::new(array);
5260
5261 assert_eq!(
5262 ScalarValue::Int64(Some(33)),
5263 ScalarValue::try_from_array(&array, 0).unwrap()
5264 );
5265 assert_eq!(
5266 ScalarValue::Int64(None),
5267 ScalarValue::try_from_array(&array, 1).unwrap()
5268 );
5269 }
5270
5271 #[test]
5272 fn scalar_try_from_array_list_array_null() {
5273 let list = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
5274 Some(vec![Some(1), Some(2)]),
5275 None,
5276 ]);
5277
5278 let non_null_list_scalar = ScalarValue::try_from_array(&list, 0).unwrap();
5279 let null_list_scalar = ScalarValue::try_from_array(&list, 1).unwrap();
5280
5281 let data_type =
5282 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
5283
5284 assert_eq!(non_null_list_scalar.data_type(), data_type);
5285 assert_eq!(null_list_scalar.data_type(), data_type);
5286 }
5287
5288 #[test]
5289 fn scalar_try_from_list_datatypes() {
5290 let inner_field = Arc::new(Field::new_list_field(DataType::Int32, true));
5291
5292 let data_type = &DataType::List(Arc::clone(&inner_field));
5294 let scalar: ScalarValue = data_type.try_into().unwrap();
5295 let expected = ScalarValue::List(
5296 new_null_array(data_type, 1)
5297 .as_list::<i32>()
5298 .to_owned()
5299 .into(),
5300 );
5301 assert_eq!(expected, scalar);
5302 assert!(expected.is_null());
5303
5304 let data_type = &DataType::LargeList(Arc::clone(&inner_field));
5306 let scalar: ScalarValue = data_type.try_into().unwrap();
5307 let expected = ScalarValue::LargeList(
5308 new_null_array(data_type, 1)
5309 .as_list::<i64>()
5310 .to_owned()
5311 .into(),
5312 );
5313 assert_eq!(expected, scalar);
5314 assert!(expected.is_null());
5315
5316 let data_type = &DataType::FixedSizeList(Arc::clone(&inner_field), 5);
5318 let scalar: ScalarValue = data_type.try_into().unwrap();
5319 let expected = ScalarValue::FixedSizeList(
5320 new_null_array(data_type, 1)
5321 .as_fixed_size_list()
5322 .to_owned()
5323 .into(),
5324 );
5325 assert_eq!(expected, scalar);
5326 assert!(expected.is_null());
5327 }
5328
5329 #[test]
5330 fn scalar_try_from_list_of_list() {
5331 let data_type = DataType::List(Arc::new(Field::new_list_field(
5332 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
5333 true,
5334 )));
5335 let data_type = &data_type;
5336 let scalar: ScalarValue = data_type.try_into().unwrap();
5337
5338 let expected = ScalarValue::List(
5339 new_null_array(
5340 &DataType::List(Arc::new(Field::new_list_field(
5341 DataType::List(Arc::new(Field::new_list_field(
5342 DataType::Int32,
5343 true,
5344 ))),
5345 true,
5346 ))),
5347 1,
5348 )
5349 .as_list::<i32>()
5350 .to_owned()
5351 .into(),
5352 );
5353
5354 assert_eq!(expected, scalar)
5355 }
5356
5357 #[test]
5358 fn scalar_try_from_not_equal_list_nested_list() {
5359 let list_data_type =
5360 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
5361 let data_type = &list_data_type;
5362 let list_scalar: ScalarValue = data_type.try_into().unwrap();
5363
5364 let nested_list_data_type = DataType::List(Arc::new(Field::new_list_field(
5365 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
5366 true,
5367 )));
5368 let data_type = &nested_list_data_type;
5369 let nested_list_scalar: ScalarValue = data_type.try_into().unwrap();
5370
5371 assert_ne!(list_scalar, nested_list_scalar);
5372 }
5373
5374 #[test]
5375 fn scalar_try_from_dict_datatype() {
5376 let data_type =
5377 DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8));
5378 let data_type = &data_type;
5379 let expected = ScalarValue::Dictionary(
5380 Box::new(DataType::Int8),
5381 Box::new(ScalarValue::Utf8(None)),
5382 );
5383 assert_eq!(expected, data_type.try_into().unwrap())
5384 }
5385
5386 #[test]
5387 fn size_of_scalar() {
5388 assert_eq!(size_of::<ScalarValue>(), 64);
5399 }
5400
5401 #[test]
5402 fn memory_size() {
5403 let sv = ScalarValue::Binary(Some(Vec::with_capacity(10)));
5404 assert_eq!(sv.size(), size_of::<ScalarValue>() + 10,);
5405 let sv_size = sv.size();
5406
5407 let mut v = Vec::with_capacity(10);
5408 v.push(sv);
5410 assert_eq!(v.capacity(), 10);
5411 assert_eq!(
5412 ScalarValue::size_of_vec(&v),
5413 size_of::<Vec<ScalarValue>>() + (9 * size_of::<ScalarValue>()) + sv_size,
5414 );
5415
5416 let mut s = HashSet::with_capacity(0);
5417 s.insert(v.pop().unwrap());
5419 let s_capacity = s.capacity();
5421 assert_eq!(
5422 ScalarValue::size_of_hashset(&s),
5423 size_of::<HashSet<ScalarValue>>()
5424 + ((s_capacity - 1) * size_of::<ScalarValue>())
5425 + sv_size,
5426 );
5427 }
5428
5429 #[test]
5430 fn scalar_eq_array() {
5431 macro_rules! make_typed_vec {
5433 ($INPUT:expr, $TYPE:ident) => {{
5434 $INPUT
5435 .iter()
5436 .map(|v| v.map(|v| v as $TYPE))
5437 .collect::<Vec<_>>()
5438 }};
5439 }
5440
5441 let bool_vals = [Some(true), None, Some(false)];
5442 let f32_vals = [Some(-1.0), None, Some(1.0)];
5443 let f64_vals = make_typed_vec!(f32_vals, f64);
5444
5445 let i8_vals = [Some(-1), None, Some(1)];
5446 let i16_vals = make_typed_vec!(i8_vals, i16);
5447 let i32_vals = make_typed_vec!(i8_vals, i32);
5448 let i64_vals = make_typed_vec!(i8_vals, i64);
5449
5450 let u8_vals = [Some(0), None, Some(1)];
5451 let u16_vals = make_typed_vec!(u8_vals, u16);
5452 let u32_vals = make_typed_vec!(u8_vals, u32);
5453 let u64_vals = make_typed_vec!(u8_vals, u64);
5454
5455 let str_vals = [Some("foo"), None, Some("bar")];
5456
5457 let interval_dt_vals = [
5458 Some(IntervalDayTime::MINUS_ONE),
5459 None,
5460 Some(IntervalDayTime::ONE),
5461 ];
5462 let interval_mdn_vals = [
5463 Some(IntervalMonthDayNano::MINUS_ONE),
5464 None,
5465 Some(IntervalMonthDayNano::ONE),
5466 ];
5467
5468 #[derive(Debug)]
5472 struct TestCase {
5473 array: ArrayRef,
5474 scalars: Vec<ScalarValue>,
5475 }
5476
5477 macro_rules! make_test_case {
5479 ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident) => {{
5480 TestCase {
5481 array: Arc::new($INPUT.iter().collect::<$ARRAY_TY>()),
5482 scalars: $INPUT.iter().map(|v| ScalarValue::$SCALAR_TY(*v)).collect(),
5483 }
5484 }};
5485
5486 ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident, $TZ:expr) => {{
5487 let tz = $TZ;
5488 TestCase {
5489 array: Arc::new($INPUT.iter().collect::<$ARRAY_TY>()),
5490 scalars: $INPUT
5491 .iter()
5492 .map(|v| ScalarValue::$SCALAR_TY(*v, tz.clone()))
5493 .collect(),
5494 }
5495 }};
5496 }
5497
5498 macro_rules! make_str_test_case {
5499 ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident) => {{
5500 TestCase {
5501 array: Arc::new($INPUT.iter().cloned().collect::<$ARRAY_TY>()),
5502 scalars: $INPUT
5503 .iter()
5504 .map(|v| ScalarValue::$SCALAR_TY(v.map(|v| v.to_string())))
5505 .collect(),
5506 }
5507 }};
5508 }
5509
5510 macro_rules! make_binary_test_case {
5511 ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident) => {{
5512 TestCase {
5513 array: Arc::new($INPUT.iter().cloned().collect::<$ARRAY_TY>()),
5514 scalars: $INPUT
5515 .iter()
5516 .map(|v| {
5517 ScalarValue::$SCALAR_TY(v.map(|v| v.as_bytes().to_vec()))
5518 })
5519 .collect(),
5520 }
5521 }};
5522 }
5523
5524 macro_rules! make_str_dict_test_case {
5526 ($INPUT:expr, $INDEX_TY:ident) => {{
5527 TestCase {
5528 array: Arc::new(
5529 $INPUT
5530 .iter()
5531 .cloned()
5532 .collect::<DictionaryArray<$INDEX_TY>>(),
5533 ),
5534 scalars: $INPUT
5535 .iter()
5536 .map(|v| {
5537 ScalarValue::Dictionary(
5538 Box::new($INDEX_TY::DATA_TYPE),
5539 Box::new(ScalarValue::Utf8(v.map(|v| v.to_string()))),
5540 )
5541 })
5542 .collect(),
5543 }
5544 }};
5545 }
5546
5547 let cases = vec![
5548 make_test_case!(bool_vals, BooleanArray, Boolean),
5549 make_test_case!(f32_vals, Float32Array, Float32),
5550 make_test_case!(f64_vals, Float64Array, Float64),
5551 make_test_case!(i8_vals, Int8Array, Int8),
5552 make_test_case!(i16_vals, Int16Array, Int16),
5553 make_test_case!(i32_vals, Int32Array, Int32),
5554 make_test_case!(i64_vals, Int64Array, Int64),
5555 make_test_case!(u8_vals, UInt8Array, UInt8),
5556 make_test_case!(u16_vals, UInt16Array, UInt16),
5557 make_test_case!(u32_vals, UInt32Array, UInt32),
5558 make_test_case!(u64_vals, UInt64Array, UInt64),
5559 make_str_test_case!(str_vals, StringArray, Utf8),
5560 make_str_test_case!(str_vals, LargeStringArray, LargeUtf8),
5561 make_binary_test_case!(str_vals, BinaryArray, Binary),
5562 make_binary_test_case!(str_vals, LargeBinaryArray, LargeBinary),
5563 make_test_case!(i32_vals, Date32Array, Date32),
5564 make_test_case!(i64_vals, Date64Array, Date64),
5565 make_test_case!(i32_vals, Time32SecondArray, Time32Second),
5566 make_test_case!(i32_vals, Time32MillisecondArray, Time32Millisecond),
5567 make_test_case!(i64_vals, Time64MicrosecondArray, Time64Microsecond),
5568 make_test_case!(i64_vals, Time64NanosecondArray, Time64Nanosecond),
5569 make_test_case!(i64_vals, TimestampSecondArray, TimestampSecond, None),
5570 make_test_case!(
5571 i64_vals,
5572 TimestampSecondArray,
5573 TimestampSecond,
5574 Some("UTC".into())
5575 ),
5576 make_test_case!(
5577 i64_vals,
5578 TimestampMillisecondArray,
5579 TimestampMillisecond,
5580 None
5581 ),
5582 make_test_case!(
5583 i64_vals,
5584 TimestampMillisecondArray,
5585 TimestampMillisecond,
5586 Some("UTC".into())
5587 ),
5588 make_test_case!(
5589 i64_vals,
5590 TimestampMicrosecondArray,
5591 TimestampMicrosecond,
5592 None
5593 ),
5594 make_test_case!(
5595 i64_vals,
5596 TimestampMicrosecondArray,
5597 TimestampMicrosecond,
5598 Some("UTC".into())
5599 ),
5600 make_test_case!(
5601 i64_vals,
5602 TimestampNanosecondArray,
5603 TimestampNanosecond,
5604 None
5605 ),
5606 make_test_case!(
5607 i64_vals,
5608 TimestampNanosecondArray,
5609 TimestampNanosecond,
5610 Some("UTC".into())
5611 ),
5612 make_test_case!(i32_vals, IntervalYearMonthArray, IntervalYearMonth),
5613 make_test_case!(interval_dt_vals, IntervalDayTimeArray, IntervalDayTime),
5614 make_test_case!(
5615 interval_mdn_vals,
5616 IntervalMonthDayNanoArray,
5617 IntervalMonthDayNano
5618 ),
5619 make_str_dict_test_case!(str_vals, Int8Type),
5620 make_str_dict_test_case!(str_vals, Int16Type),
5621 make_str_dict_test_case!(str_vals, Int32Type),
5622 make_str_dict_test_case!(str_vals, Int64Type),
5623 make_str_dict_test_case!(str_vals, UInt8Type),
5624 make_str_dict_test_case!(str_vals, UInt16Type),
5625 make_str_dict_test_case!(str_vals, UInt32Type),
5626 make_str_dict_test_case!(str_vals, UInt64Type),
5627 ];
5628
5629 for case in cases {
5630 println!("**** Test Case *****");
5631 let TestCase { array, scalars } = case;
5632 println!("Input array type: {}", array.data_type());
5633 println!("Input scalars: {scalars:#?}");
5634 assert_eq!(array.len(), scalars.len());
5635
5636 for (index, scalar) in scalars.into_iter().enumerate() {
5637 assert!(
5638 scalar
5639 .eq_array(&array, index)
5640 .expect("Failed to compare arrays"),
5641 "Expected {scalar:?} to be equal to {array:?} at index {index}"
5642 );
5643
5644 for other_index in 0..array.len() {
5646 if index != other_index {
5647 assert!(
5648 !scalar.eq_array(&array, other_index).expect("Failed to compare arrays"),
5649 "Expected {scalar:?} to be NOT equal to {array:?} at index {other_index}"
5650 );
5651 }
5652 }
5653 }
5654 }
5655 }
5656
5657 #[test]
5658 fn scalar_partial_ordering() {
5659 use ScalarValue::*;
5660
5661 assert_eq!(
5662 Int64(Some(33)).partial_cmp(&Int64(Some(0))),
5663 Some(Ordering::Greater)
5664 );
5665 assert_eq!(
5666 Int64(Some(0)).partial_cmp(&Int64(Some(33))),
5667 Some(Ordering::Less)
5668 );
5669 assert_eq!(
5670 Int64(Some(33)).partial_cmp(&Int64(Some(33))),
5671 Some(Ordering::Equal)
5672 );
5673 assert_eq!(Int64(Some(33)).partial_cmp(&Int32(Some(33))), None);
5675 assert_eq!(Int32(Some(33)).partial_cmp(&Int64(Some(33))), None);
5676
5677 assert_eq!(
5678 ScalarValue::from(vec![
5679 ("A", ScalarValue::from(1.0)),
5680 ("B", ScalarValue::from("Z")),
5681 ])
5682 .partial_cmp(&ScalarValue::from(vec![
5683 ("A", ScalarValue::from(2.0)),
5684 ("B", ScalarValue::from("A")),
5685 ])),
5686 Some(Ordering::Less)
5687 );
5688
5689 assert_eq!(
5691 ScalarValue::from(vec![
5692 ("A", ScalarValue::from(1.0)),
5693 ("B", ScalarValue::from("Z")),
5694 ])
5695 .partial_cmp(&ScalarValue::from(vec![
5696 ("a", ScalarValue::from(2.0)),
5697 ("b", ScalarValue::from("A")),
5698 ])),
5699 None
5700 );
5701 }
5702
5703 #[test]
5704 fn test_scalar_value_from_string() {
5705 let scalar = ScalarValue::from("foo");
5706 assert_eq!(scalar, ScalarValue::Utf8(Some("foo".to_string())));
5707 let scalar = ScalarValue::from("foo".to_string());
5708 assert_eq!(scalar, ScalarValue::Utf8(Some("foo".to_string())));
5709 let scalar = ScalarValue::from_str("foo").unwrap();
5710 assert_eq!(scalar, ScalarValue::Utf8(Some("foo".to_string())));
5711 }
5712
5713 #[test]
5714 fn test_scalar_struct() {
5715 let field_a = Arc::new(Field::new("A", DataType::Int32, false));
5716 let field_b = Arc::new(Field::new("B", DataType::Boolean, false));
5717 let field_c = Arc::new(Field::new("C", DataType::Utf8, false));
5718
5719 let field_e = Arc::new(Field::new("e", DataType::Int16, false));
5720 let field_f = Arc::new(Field::new("f", DataType::Int64, false));
5721 let field_d = Arc::new(Field::new(
5722 "D",
5723 DataType::Struct(vec![Arc::clone(&field_e), Arc::clone(&field_f)].into()),
5724 false,
5725 ));
5726
5727 let struct_array = StructArray::from(vec![
5728 (
5729 Arc::clone(&field_e),
5730 Arc::new(Int16Array::from(vec![2])) as ArrayRef,
5731 ),
5732 (
5733 Arc::clone(&field_f),
5734 Arc::new(Int64Array::from(vec![3])) as ArrayRef,
5735 ),
5736 ]);
5737
5738 let struct_array = StructArray::from(vec![
5739 (
5740 Arc::clone(&field_a),
5741 Arc::new(Int32Array::from(vec![23])) as ArrayRef,
5742 ),
5743 (
5744 Arc::clone(&field_b),
5745 Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
5746 ),
5747 (
5748 Arc::clone(&field_c),
5749 Arc::new(StringArray::from(vec!["Hello"])) as ArrayRef,
5750 ),
5751 (Arc::clone(&field_d), Arc::new(struct_array) as ArrayRef),
5752 ]);
5753 let scalar = ScalarValue::Struct(Arc::new(struct_array));
5754
5755 let array = scalar
5756 .to_array_of_size(2)
5757 .expect("Failed to convert to array of size");
5758
5759 let expected = Arc::new(StructArray::from(vec![
5760 (
5761 Arc::clone(&field_a),
5762 Arc::new(Int32Array::from(vec![23, 23])) as ArrayRef,
5763 ),
5764 (
5765 Arc::clone(&field_b),
5766 Arc::new(BooleanArray::from(vec![false, false])) as ArrayRef,
5767 ),
5768 (
5769 Arc::clone(&field_c),
5770 Arc::new(StringArray::from(vec!["Hello", "Hello"])) as ArrayRef,
5771 ),
5772 (
5773 Arc::clone(&field_d),
5774 Arc::new(StructArray::from(vec![
5775 (
5776 Arc::clone(&field_e),
5777 Arc::new(Int16Array::from(vec![2, 2])) as ArrayRef,
5778 ),
5779 (
5780 Arc::clone(&field_f),
5781 Arc::new(Int64Array::from(vec![3, 3])) as ArrayRef,
5782 ),
5783 ])) as ArrayRef,
5784 ),
5785 ])) as ArrayRef;
5786
5787 assert_eq!(&array, &expected);
5788
5789 let constructed = ScalarValue::try_from_array(&expected, 1).unwrap();
5791 assert_eq!(constructed, scalar);
5792
5793 let none_scalar = ScalarValue::try_from(array.data_type()).unwrap();
5795 assert!(none_scalar.is_null());
5796 assert_eq!(
5797 format!("{none_scalar:?}"),
5798 String::from("Struct({A:,B:,C:,D:})")
5799 );
5800
5801 let constructed = ScalarValue::from(vec![
5803 ("A", ScalarValue::from(23)),
5804 ("B", ScalarValue::from(false)),
5805 ("C", ScalarValue::from("Hello")),
5806 (
5807 "D",
5808 ScalarValue::from(vec![
5809 ("e", ScalarValue::from(2i16)),
5810 ("f", ScalarValue::from(3i64)),
5811 ]),
5812 ),
5813 ]);
5814 assert_eq!(constructed, scalar);
5815
5816 let scalars = vec![
5818 ScalarValue::from(vec![
5819 ("A", ScalarValue::from(23)),
5820 ("B", ScalarValue::from(false)),
5821 ("C", ScalarValue::from("Hello")),
5822 (
5823 "D",
5824 ScalarValue::from(vec![
5825 ("e", ScalarValue::from(2i16)),
5826 ("f", ScalarValue::from(3i64)),
5827 ]),
5828 ),
5829 ]),
5830 ScalarValue::from(vec![
5831 ("A", ScalarValue::from(7)),
5832 ("B", ScalarValue::from(true)),
5833 ("C", ScalarValue::from("World")),
5834 (
5835 "D",
5836 ScalarValue::from(vec![
5837 ("e", ScalarValue::from(4i16)),
5838 ("f", ScalarValue::from(5i64)),
5839 ]),
5840 ),
5841 ]),
5842 ScalarValue::from(vec![
5843 ("A", ScalarValue::from(-1000)),
5844 ("B", ScalarValue::from(true)),
5845 ("C", ScalarValue::from("!!!!!")),
5846 (
5847 "D",
5848 ScalarValue::from(vec![
5849 ("e", ScalarValue::from(6i16)),
5850 ("f", ScalarValue::from(7i64)),
5851 ]),
5852 ),
5853 ]),
5854 ];
5855 let array = ScalarValue::iter_to_array(scalars).unwrap();
5856
5857 let expected = Arc::new(StructArray::from(vec![
5858 (
5859 Arc::clone(&field_a),
5860 Arc::new(Int32Array::from(vec![23, 7, -1000])) as ArrayRef,
5861 ),
5862 (
5863 Arc::clone(&field_b),
5864 Arc::new(BooleanArray::from(vec![false, true, true])) as ArrayRef,
5865 ),
5866 (
5867 Arc::clone(&field_c),
5868 Arc::new(StringArray::from(vec!["Hello", "World", "!!!!!"])) as ArrayRef,
5869 ),
5870 (
5871 Arc::clone(&field_d),
5872 Arc::new(StructArray::from(vec![
5873 (
5874 Arc::clone(&field_e),
5875 Arc::new(Int16Array::from(vec![2, 4, 6])) as ArrayRef,
5876 ),
5877 (
5878 Arc::clone(&field_f),
5879 Arc::new(Int64Array::from(vec![3, 5, 7])) as ArrayRef,
5880 ),
5881 ])) as ArrayRef,
5882 ),
5883 ])) as ArrayRef;
5884
5885 assert_eq!(&array, &expected);
5886 }
5887
5888 #[test]
5889 fn round_trip() {
5890 let cases: Vec<ArrayRef> = vec![
5892 Arc::new(Int8Array::from(vec![Some(1), None, Some(3)])),
5894 Arc::new(Int16Array::from(vec![Some(1), None, Some(3)])),
5895 Arc::new(Int32Array::from(vec![Some(1), None, Some(3)])),
5896 Arc::new(Int64Array::from(vec![Some(1), None, Some(3)])),
5897 Arc::new(UInt8Array::from(vec![Some(1), None, Some(3)])),
5898 Arc::new(UInt16Array::from(vec![Some(1), None, Some(3)])),
5899 Arc::new(UInt32Array::from(vec![Some(1), None, Some(3)])),
5900 Arc::new(UInt64Array::from(vec![Some(1), None, Some(3)])),
5901 Arc::new(BooleanArray::from(vec![Some(true), None, Some(false)])),
5903 Arc::new(Float32Array::from(vec![Some(1.0), None, Some(3.0)])),
5905 Arc::new(Float64Array::from(vec![Some(1.0), None, Some(3.0)])),
5906 Arc::new(StringArray::from(vec![Some("foo"), None, Some("bar")])),
5908 Arc::new(LargeStringArray::from(vec![Some("foo"), None, Some("bar")])),
5909 Arc::new(StringViewArray::from(vec![Some("foo"), None, Some("bar")])),
5910 {
5912 let mut builder = StringDictionaryBuilder::<Int32Type>::new();
5913 builder.append("foo").unwrap();
5914 builder.append_null();
5915 builder.append("bar").unwrap();
5916 Arc::new(builder.finish())
5917 },
5918 Arc::new(BinaryArray::from_iter(vec![
5920 Some(b"foo"),
5921 None,
5922 Some(b"bar"),
5923 ])),
5924 Arc::new(LargeBinaryArray::from_iter(vec![
5925 Some(b"foo"),
5926 None,
5927 Some(b"bar"),
5928 ])),
5929 Arc::new(BinaryViewArray::from_iter(vec![
5930 Some(b"foo"),
5931 None,
5932 Some(b"bar"),
5933 ])),
5934 Arc::new(TimestampSecondArray::from(vec![Some(1), None, Some(3)])),
5936 Arc::new(TimestampMillisecondArray::from(vec![
5937 Some(1),
5938 None,
5939 Some(3),
5940 ])),
5941 Arc::new(TimestampMicrosecondArray::from(vec![
5942 Some(1),
5943 None,
5944 Some(3),
5945 ])),
5946 Arc::new(TimestampNanosecondArray::from(vec![Some(1), None, Some(3)])),
5947 Arc::new(
5949 TimestampSecondArray::from(vec![Some(1), None, Some(3)])
5950 .with_timezone_opt(Some("UTC")),
5951 ),
5952 Arc::new(
5953 TimestampMillisecondArray::from(vec![Some(1), None, Some(3)])
5954 .with_timezone_opt(Some("UTC")),
5955 ),
5956 Arc::new(
5957 TimestampMicrosecondArray::from(vec![Some(1), None, Some(3)])
5958 .with_timezone_opt(Some("UTC")),
5959 ),
5960 Arc::new(
5961 TimestampNanosecondArray::from(vec![Some(1), None, Some(3)])
5962 .with_timezone_opt(Some("UTC")),
5963 ),
5964 Arc::new(Date32Array::from(vec![Some(1), None, Some(3)])),
5966 Arc::new(Date64Array::from(vec![Some(1), None, Some(3)])),
5967 Arc::new(Time32SecondArray::from(vec![Some(1), None, Some(3)])),
5969 Arc::new(Time32MillisecondArray::from(vec![Some(1), None, Some(3)])),
5970 Arc::new(Time64MicrosecondArray::from(vec![Some(1), None, Some(3)])),
5971 Arc::new(Time64NanosecondArray::from(vec![Some(1), None, Some(3)])),
5972 Arc::new(NullArray::new(3)),
5974 {
5976 let mut builder = UnionBuilder::new_dense();
5977 builder.append::<Int32Type>("a", 1).unwrap();
5978 builder.append::<Float64Type>("b", 3.4).unwrap();
5979 Arc::new(builder.build().unwrap())
5980 },
5981 {
5983 let mut builder = UnionBuilder::new_sparse();
5984 builder.append::<Int32Type>("a", 1).unwrap();
5985 builder.append::<Float64Type>("b", 3.4).unwrap();
5986 Arc::new(builder.build().unwrap())
5987 },
5988 {
5990 let values_builder = StringBuilder::new();
5991 let mut builder = ListBuilder::new(values_builder);
5992 builder.values().append_value("A");
5994 builder.values().append_value("B");
5995 builder.append(true);
5996 builder.append(true);
5998 builder.values().append_value("?"); builder.append(false);
6001 Arc::new(builder.finish())
6002 },
6003 {
6005 let values_builder = StringBuilder::new();
6006 let mut builder = LargeListBuilder::new(values_builder);
6007 builder.values().append_value("A");
6009 builder.values().append_value("B");
6010 builder.append(true);
6011 builder.append(true);
6013 builder.append(false);
6015 Arc::new(builder.finish())
6016 },
6017 {
6019 let values_builder = Int32Builder::new();
6020 let mut builder = FixedSizeListBuilder::new(values_builder, 3);
6021
6022 builder.values().append_value(0);
6024 builder.values().append_value(1);
6025 builder.values().append_value(2);
6026 builder.append(true);
6027 builder.values().append_null();
6028 builder.values().append_null();
6029 builder.values().append_null();
6030 builder.append(false);
6031 builder.values().append_value(3);
6032 builder.values().append_null();
6033 builder.values().append_value(5);
6034 builder.append(true);
6035 Arc::new(builder.finish())
6036 },
6037 {
6039 let string_builder = StringBuilder::new();
6040 let int_builder = Int32Builder::with_capacity(4);
6041
6042 let mut builder = MapBuilder::new(None, string_builder, int_builder);
6043 builder.keys().append_value("joe");
6045 builder.values().append_value(1);
6046 builder.append(true).unwrap();
6047 builder.append(true).unwrap();
6049 builder.append(false).unwrap();
6051
6052 Arc::new(builder.finish())
6053 },
6054 ];
6055
6056 for arr in cases {
6057 round_trip_through_scalar(arr);
6058 }
6059 }
6060
6061 fn round_trip_through_scalar(arr: ArrayRef) {
6066 for i in 0..arr.len() {
6067 let scalar = ScalarValue::try_from_array(&arr, i).unwrap();
6069 let array = scalar.to_array_of_size(1).unwrap();
6070 assert_eq!(array.len(), 1);
6071 assert_eq!(array.data_type(), arr.data_type());
6072 assert_eq!(array.as_ref(), arr.slice(i, 1).as_ref());
6073 }
6074 }
6075
6076 #[test]
6077 fn test_scalar_union_sparse() {
6078 let field_a = Arc::new(Field::new("A", DataType::Int32, true));
6079 let field_b = Arc::new(Field::new("B", DataType::Boolean, true));
6080 let field_c = Arc::new(Field::new("C", DataType::Utf8, true));
6081 let fields = UnionFields::from_iter([(0, field_a), (1, field_b), (2, field_c)]);
6082
6083 let mut values_a = vec![None; 6];
6084 values_a[0] = Some(42);
6085 let mut values_b = vec![None; 6];
6086 values_b[1] = Some(true);
6087 let mut values_c = vec![None; 6];
6088 values_c[2] = Some("foo");
6089 let children: Vec<ArrayRef> = vec![
6090 Arc::new(Int32Array::from(values_a)),
6091 Arc::new(BooleanArray::from(values_b)),
6092 Arc::new(StringArray::from(values_c)),
6093 ];
6094
6095 let type_ids = ScalarBuffer::from(vec![0, 1, 2, 0, 1, 2]);
6096 let array: ArrayRef = Arc::new(
6097 UnionArray::try_new(fields.clone(), type_ids, None, children)
6098 .expect("UnionArray"),
6099 );
6100
6101 let expected = [
6102 (0, ScalarValue::from(42)),
6103 (1, ScalarValue::from(true)),
6104 (2, ScalarValue::from("foo")),
6105 (0, ScalarValue::Int32(None)),
6106 (1, ScalarValue::Boolean(None)),
6107 (2, ScalarValue::Utf8(None)),
6108 ];
6109
6110 for (i, (ti, value)) in expected.into_iter().enumerate() {
6111 let is_null = value.is_null();
6112 let value = Some((ti, Box::new(value)));
6113 let expected = ScalarValue::Union(value, fields.clone(), UnionMode::Sparse);
6114 let actual = ScalarValue::try_from_array(&array, i).expect("try_from_array");
6115
6116 assert_eq!(
6117 actual, expected,
6118 "[{i}] {actual} was not equal to {expected}"
6119 );
6120
6121 assert!(
6122 expected.eq_array(&array, i).expect("eq_array"),
6123 "[{i}] {expected}.eq_array was false"
6124 );
6125
6126 if is_null {
6127 assert!(actual.is_null(), "[{i}] {actual} was not null")
6128 }
6129 }
6130 }
6131
6132 #[test]
6133 fn test_scalar_union_dense() {
6134 let field_a = Arc::new(Field::new("A", DataType::Int32, true));
6135 let field_b = Arc::new(Field::new("B", DataType::Boolean, true));
6136 let field_c = Arc::new(Field::new("C", DataType::Utf8, true));
6137 let fields = UnionFields::from_iter([(0, field_a), (1, field_b), (2, field_c)]);
6138 let children: Vec<ArrayRef> = vec![
6139 Arc::new(Int32Array::from(vec![Some(42), None])),
6140 Arc::new(BooleanArray::from(vec![Some(true), None])),
6141 Arc::new(StringArray::from(vec![Some("foo"), None])),
6142 ];
6143
6144 let type_ids = ScalarBuffer::from(vec![0, 1, 2, 0, 1, 2]);
6145 let offsets = ScalarBuffer::from(vec![0, 0, 0, 1, 1, 1]);
6146 let array: ArrayRef = Arc::new(
6147 UnionArray::try_new(fields.clone(), type_ids, Some(offsets), children)
6148 .expect("UnionArray"),
6149 );
6150
6151 let expected = [
6152 (0, ScalarValue::from(42)),
6153 (1, ScalarValue::from(true)),
6154 (2, ScalarValue::from("foo")),
6155 (0, ScalarValue::Int32(None)),
6156 (1, ScalarValue::Boolean(None)),
6157 (2, ScalarValue::Utf8(None)),
6158 ];
6159
6160 for (i, (ti, value)) in expected.into_iter().enumerate() {
6161 let is_null = value.is_null();
6162 let value = Some((ti, Box::new(value)));
6163 let expected = ScalarValue::Union(value, fields.clone(), UnionMode::Dense);
6164 let actual = ScalarValue::try_from_array(&array, i).expect("try_from_array");
6165
6166 assert_eq!(
6167 actual, expected,
6168 "[{i}] {actual} was not equal to {expected}"
6169 );
6170
6171 assert!(
6172 expected.eq_array(&array, i).expect("eq_array"),
6173 "[{i}] {expected}.eq_array was false"
6174 );
6175
6176 if is_null {
6177 assert!(actual.is_null(), "[{i}] {actual} was not null")
6178 }
6179 }
6180 }
6181
6182 #[test]
6183 fn test_lists_in_struct() {
6184 let field_a = Arc::new(Field::new("A", DataType::Utf8, false));
6185 let field_primitive_list = Arc::new(Field::new(
6186 "primitive_list",
6187 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
6188 false,
6189 ));
6190
6191 let l0 =
6193 ScalarValue::List(Arc::new(
6194 ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
6195 Some(1),
6196 Some(2),
6197 Some(3),
6198 ])]),
6199 ));
6200 let l1 =
6201 ScalarValue::List(Arc::new(
6202 ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
6203 Some(4),
6204 Some(5),
6205 ])]),
6206 ));
6207 let l2 = ScalarValue::List(Arc::new(ListArray::from_iter_primitive::<
6208 Int32Type,
6209 _,
6210 _,
6211 >(vec![Some(vec![Some(6)])])));
6212
6213 let s0 = ScalarValue::from(vec![
6215 ("A", ScalarValue::from("First")),
6216 ("primitive_list", l0),
6217 ]);
6218
6219 let s1 = ScalarValue::from(vec![
6220 ("A", ScalarValue::from("Second")),
6221 ("primitive_list", l1),
6222 ]);
6223
6224 let s2 = ScalarValue::from(vec![
6225 ("A", ScalarValue::from("Third")),
6226 ("primitive_list", l2),
6227 ]);
6228
6229 let array =
6231 ScalarValue::iter_to_array(vec![s0.clone(), s1.clone(), s2.clone()]).unwrap();
6232
6233 let array = as_struct_array(&array).unwrap();
6234 let expected = StructArray::from(vec![
6235 (
6236 Arc::clone(&field_a),
6237 Arc::new(StringArray::from(vec!["First", "Second", "Third"])) as ArrayRef,
6238 ),
6239 (
6240 Arc::clone(&field_primitive_list),
6241 Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
6242 Some(vec![Some(1), Some(2), Some(3)]),
6243 Some(vec![Some(4), Some(5)]),
6244 Some(vec![Some(6)]),
6245 ])),
6246 ),
6247 ]);
6248
6249 assert_eq!(array, &expected);
6250
6251 let nl0_array = ScalarValue::iter_to_array(vec![s0, s1.clone()]).unwrap();
6254 let nl0 = SingleRowListArrayBuilder::new(nl0_array).build_list_scalar();
6255
6256 let nl1_array = ScalarValue::iter_to_array(vec![s2]).unwrap();
6257 let nl1 = SingleRowListArrayBuilder::new(nl1_array).build_list_scalar();
6258
6259 let nl2_array = ScalarValue::iter_to_array(vec![s1]).unwrap();
6260 let nl2 = SingleRowListArrayBuilder::new(nl2_array).build_list_scalar();
6261
6262 let array = ScalarValue::iter_to_array(vec![nl0, nl1, nl2]).unwrap();
6264 let array = array.as_list::<i32>();
6265
6266 let field_a_builder = StringBuilder::with_capacity(4, 1024);
6268 let primitive_value_builder = Int32Array::builder(8);
6269 let field_primitive_list_builder = ListBuilder::new(primitive_value_builder);
6270
6271 let element_builder = StructBuilder::new(
6272 vec![field_a, field_primitive_list],
6273 vec![
6274 Box::new(field_a_builder),
6275 Box::new(field_primitive_list_builder),
6276 ],
6277 );
6278
6279 let mut list_builder = ListBuilder::new(element_builder);
6280
6281 list_builder
6282 .values()
6283 .field_builder::<StringBuilder>(0)
6284 .unwrap()
6285 .append_value("First");
6286 list_builder
6287 .values()
6288 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6289 .unwrap()
6290 .values()
6291 .append_value(1);
6292 list_builder
6293 .values()
6294 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6295 .unwrap()
6296 .values()
6297 .append_value(2);
6298 list_builder
6299 .values()
6300 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6301 .unwrap()
6302 .values()
6303 .append_value(3);
6304 list_builder
6305 .values()
6306 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6307 .unwrap()
6308 .append(true);
6309 list_builder.values().append(true);
6310
6311 list_builder
6312 .values()
6313 .field_builder::<StringBuilder>(0)
6314 .unwrap()
6315 .append_value("Second");
6316 list_builder
6317 .values()
6318 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6319 .unwrap()
6320 .values()
6321 .append_value(4);
6322 list_builder
6323 .values()
6324 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6325 .unwrap()
6326 .values()
6327 .append_value(5);
6328 list_builder
6329 .values()
6330 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6331 .unwrap()
6332 .append(true);
6333 list_builder.values().append(true);
6334 list_builder.append(true);
6335
6336 list_builder
6337 .values()
6338 .field_builder::<StringBuilder>(0)
6339 .unwrap()
6340 .append_value("Third");
6341 list_builder
6342 .values()
6343 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6344 .unwrap()
6345 .values()
6346 .append_value(6);
6347 list_builder
6348 .values()
6349 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6350 .unwrap()
6351 .append(true);
6352 list_builder.values().append(true);
6353 list_builder.append(true);
6354
6355 list_builder
6356 .values()
6357 .field_builder::<StringBuilder>(0)
6358 .unwrap()
6359 .append_value("Second");
6360 list_builder
6361 .values()
6362 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6363 .unwrap()
6364 .values()
6365 .append_value(4);
6366 list_builder
6367 .values()
6368 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6369 .unwrap()
6370 .values()
6371 .append_value(5);
6372 list_builder
6373 .values()
6374 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6375 .unwrap()
6376 .append(true);
6377 list_builder.values().append(true);
6378 list_builder.append(true);
6379
6380 let expected = list_builder.finish();
6381
6382 assert_eq!(array, &expected);
6383 }
6384
6385 fn build_2d_list(data: Vec<Option<i32>>) -> ListArray {
6386 let a1 = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(data)]);
6387 ListArray::new(
6388 Arc::new(Field::new_list_field(
6389 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
6390 true,
6391 )),
6392 OffsetBuffer::<i32>::from_lengths([1]),
6393 Arc::new(a1),
6394 None,
6395 )
6396 }
6397
6398 #[test]
6399 fn test_nested_lists() {
6400 let arr1 = build_2d_list(vec![Some(1), Some(2), Some(3)]);
6402 let arr2 = build_2d_list(vec![Some(4), Some(5)]);
6403 let arr3 = build_2d_list(vec![Some(6)]);
6404
6405 let array = ScalarValue::iter_to_array(vec![
6406 ScalarValue::List(Arc::new(arr1)),
6407 ScalarValue::List(Arc::new(arr2)),
6408 ScalarValue::List(Arc::new(arr3)),
6409 ])
6410 .unwrap();
6411 let array = array.as_list::<i32>();
6412
6413 let inner_builder = Int32Array::builder(6);
6415 let middle_builder = ListBuilder::new(inner_builder);
6416 let mut outer_builder = ListBuilder::new(middle_builder);
6417
6418 outer_builder.values().values().append_value(1);
6419 outer_builder.values().values().append_value(2);
6420 outer_builder.values().values().append_value(3);
6421 outer_builder.values().append(true);
6422 outer_builder.append(true);
6423
6424 outer_builder.values().values().append_value(4);
6425 outer_builder.values().values().append_value(5);
6426 outer_builder.values().append(true);
6427 outer_builder.append(true);
6428
6429 outer_builder.values().values().append_value(6);
6430 outer_builder.values().append(true);
6431 outer_builder.append(true);
6432
6433 let expected = outer_builder.finish();
6434
6435 assert_eq!(array, &expected);
6436 }
6437
6438 #[test]
6439 fn scalar_timestamp_ns_utc_timezone() {
6440 let scalar = ScalarValue::TimestampNanosecond(
6441 Some(1599566400000000000),
6442 Some("UTC".into()),
6443 );
6444
6445 assert_eq!(
6446 scalar.data_type(),
6447 DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()))
6448 );
6449
6450 let array = scalar.to_array().expect("Failed to convert to array");
6451 assert_eq!(array.len(), 1);
6452 assert_eq!(
6453 array.data_type(),
6454 &DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()))
6455 );
6456
6457 let new_scalar = ScalarValue::try_from_array(&array, 0).unwrap();
6458 assert_eq!(
6459 new_scalar.data_type(),
6460 DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()))
6461 );
6462 }
6463
6464 #[test]
6465 fn cast_round_trip() {
6466 check_scalar_cast(ScalarValue::Int8(Some(5)), DataType::Int16);
6467 check_scalar_cast(ScalarValue::Int8(None), DataType::Int16);
6468
6469 check_scalar_cast(ScalarValue::Float64(Some(5.5)), DataType::Int16);
6470
6471 check_scalar_cast(ScalarValue::Float64(None), DataType::Int16);
6472
6473 check_scalar_cast(
6474 ScalarValue::from("foo"),
6475 DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
6476 );
6477
6478 check_scalar_cast(
6479 ScalarValue::Utf8(None),
6480 DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
6481 );
6482
6483 check_scalar_cast(ScalarValue::Utf8(None), DataType::Utf8View);
6484 check_scalar_cast(ScalarValue::from("foo"), DataType::Utf8View);
6485 check_scalar_cast(
6486 ScalarValue::from("larger than 12 bytes string"),
6487 DataType::Utf8View,
6488 );
6489 check_scalar_cast(
6490 {
6491 let element_field =
6492 Arc::new(Field::new("element", DataType::Int32, true));
6493
6494 let mut builder =
6495 ListBuilder::new(Int32Builder::new()).with_field(element_field);
6496 builder.append_value([Some(1)]);
6497 builder.append(true);
6498
6499 ScalarValue::List(Arc::new(builder.finish()))
6500 },
6501 DataType::List(Arc::new(Field::new("element", DataType::Int64, true))),
6502 );
6503 check_scalar_cast(
6504 {
6505 let element_field =
6506 Arc::new(Field::new("element", DataType::Int32, true));
6507
6508 let mut builder = FixedSizeListBuilder::new(Int32Builder::new(), 1)
6509 .with_field(element_field);
6510 builder.values().append_value(1);
6511 builder.append(true);
6512
6513 ScalarValue::FixedSizeList(Arc::new(builder.finish()))
6514 },
6515 DataType::FixedSizeList(
6516 Arc::new(Field::new("element", DataType::Int64, true)),
6517 1,
6518 ),
6519 );
6520 check_scalar_cast(
6521 {
6522 let element_field =
6523 Arc::new(Field::new("element", DataType::Int32, true));
6524
6525 let mut builder =
6526 LargeListBuilder::new(Int32Builder::new()).with_field(element_field);
6527 builder.append_value([Some(1)]);
6528 builder.append(true);
6529
6530 ScalarValue::LargeList(Arc::new(builder.finish()))
6531 },
6532 DataType::LargeList(Arc::new(Field::new("element", DataType::Int64, true))),
6533 );
6534 }
6535
6536 fn check_scalar_cast(scalar: ScalarValue, desired_type: DataType) {
6538 let scalar_array = scalar.to_array().expect("Failed to convert to array");
6540 let cast_array = kernels::cast::cast(&scalar_array, &desired_type).unwrap();
6542
6543 let cast_scalar = ScalarValue::try_from_array(&cast_array, 0).unwrap();
6545 assert_eq!(cast_scalar.data_type(), desired_type);
6546
6547 let array = cast_scalar
6549 .to_array_of_size(10)
6550 .expect("Failed to convert to array of size");
6551
6552 assert_eq!(array.data_type(), &desired_type)
6554 }
6555
6556 #[test]
6557 fn test_scalar_negative() -> Result<()> {
6558 let value = ScalarValue::Int32(Some(12));
6560 assert_eq!(ScalarValue::Int32(Some(-12)), value.arithmetic_negate()?);
6561 let value = ScalarValue::Int32(None);
6562 assert_eq!(ScalarValue::Int32(None), value.arithmetic_negate()?);
6563
6564 let value = ScalarValue::UInt8(Some(12));
6566 assert!(value.arithmetic_negate().is_err());
6567 let value = ScalarValue::Boolean(None);
6568 assert!(value.arithmetic_negate().is_err());
6569 Ok(())
6570 }
6571
6572 #[test]
6573 #[allow(arithmetic_overflow)] fn test_scalar_negative_overflows() -> Result<()> {
6575 macro_rules! test_overflow_on_value {
6576 ($($val:expr),* $(,)?) => {$(
6577 {
6578 let value: ScalarValue = $val;
6579 let err = value.arithmetic_negate().expect_err("Should receive overflow error on negating {value:?}");
6580 let root_err = err.find_root();
6581 match root_err{
6582 DataFusionError::ArrowError(
6583 ArrowError::ArithmeticOverflow(_),
6584 _,
6585 ) => {}
6586 _ => return Err(err),
6587 };
6588 }
6589 )*};
6590 }
6591 test_overflow_on_value!(
6592 i8::MIN.into(),
6594 i16::MIN.into(),
6595 i32::MIN.into(),
6596 i64::MIN.into(),
6597 ScalarValue::try_new_decimal128(i128::MIN, 10, 5)?,
6599 ScalarValue::Decimal256(Some(i256::MIN), 20, 5),
6600 ScalarValue::IntervalYearMonth(Some(i32::MIN)),
6602 ScalarValue::new_interval_dt(i32::MIN, 999),
6603 ScalarValue::new_interval_dt(1, i32::MIN),
6604 ScalarValue::new_interval_mdn(i32::MIN, 15, 123_456),
6605 ScalarValue::new_interval_mdn(12, i32::MIN, 123_456),
6606 ScalarValue::new_interval_mdn(12, 15, i64::MIN),
6607 ScalarValue::TimestampSecond(Some(i64::MIN), None),
6609 ScalarValue::TimestampMillisecond(Some(i64::MIN), None),
6610 ScalarValue::TimestampMicrosecond(Some(i64::MIN), None),
6611 ScalarValue::TimestampNanosecond(Some(i64::MIN), None),
6612 );
6613
6614 let float_cases = [
6615 (
6616 ScalarValue::Float16(Some(f16::MIN)),
6617 ScalarValue::Float16(Some(f16::MAX)),
6618 ),
6619 (
6620 ScalarValue::Float16(Some(f16::MAX)),
6621 ScalarValue::Float16(Some(f16::MIN)),
6622 ),
6623 (f32::MIN.into(), f32::MAX.into()),
6624 (f32::MAX.into(), f32::MIN.into()),
6625 (f64::MIN.into(), f64::MAX.into()),
6626 (f64::MAX.into(), f64::MIN.into()),
6627 ];
6628 for (test, expected) in float_cases.into_iter().skip(2) {
6630 assert_eq!(test.arithmetic_negate()?, expected);
6631 }
6632 Ok(())
6633 }
6634
6635 #[test]
6636 fn f16_test_overflow() {
6637 let cases = [
6639 (
6640 ScalarValue::Float16(Some(f16::MIN)),
6641 ScalarValue::Float16(Some(f16::MAX)),
6642 ),
6643 (
6644 ScalarValue::Float16(Some(f16::MAX)),
6645 ScalarValue::Float16(Some(f16::MIN)),
6646 ),
6647 ];
6648
6649 for (test, expected) in cases {
6650 assert_eq!(test.arithmetic_negate().unwrap(), expected);
6651 }
6652 }
6653
6654 macro_rules! expect_operation_error {
6655 ($TEST_NAME:ident, $FUNCTION:ident, $EXPECTED_ERROR:expr) => {
6656 #[test]
6657 fn $TEST_NAME() {
6658 let lhs = ScalarValue::UInt64(Some(12));
6659 let rhs = ScalarValue::Int32(Some(-3));
6660 match lhs.$FUNCTION(&rhs) {
6661 Ok(_result) => {
6662 panic!(
6663 "Expected binary operation error between lhs: '{:?}', rhs: {:?}",
6664 lhs, rhs
6665 );
6666 }
6667 Err(e) => {
6668 let error_message = e.to_string();
6669 assert!(
6670 error_message.contains($EXPECTED_ERROR),
6671 "Expected error '{}' not found in actual error '{}'",
6672 $EXPECTED_ERROR,
6673 error_message
6674 );
6675 }
6676 }
6677 }
6678 };
6679 }
6680
6681 expect_operation_error!(
6682 expect_add_error,
6683 add,
6684 "Invalid arithmetic operation: UInt64 + Int32"
6685 );
6686 expect_operation_error!(
6687 expect_sub_error,
6688 sub,
6689 "Invalid arithmetic operation: UInt64 - Int32"
6690 );
6691
6692 macro_rules! decimal_op_test_cases {
6693 ($OPERATION:ident, [$([$L_VALUE:expr, $L_PRECISION:expr, $L_SCALE:expr, $R_VALUE:expr, $R_PRECISION:expr, $R_SCALE:expr, $O_VALUE:expr, $O_PRECISION:expr, $O_SCALE:expr]),+]) => {
6694 $(
6695
6696 let left = ScalarValue::Decimal128($L_VALUE, $L_PRECISION, $L_SCALE);
6697 let right = ScalarValue::Decimal128($R_VALUE, $R_PRECISION, $R_SCALE);
6698 let result = left.$OPERATION(&right).unwrap();
6699 assert_eq!(ScalarValue::Decimal128($O_VALUE, $O_PRECISION, $O_SCALE), result);
6700
6701 )+
6702 };
6703 }
6704
6705 #[test]
6706 fn decimal_operations() {
6707 decimal_op_test_cases!(
6708 add,
6709 [
6710 [Some(123), 10, 2, Some(124), 10, 2, Some(123 + 124), 11, 2],
6711 [
6713 Some(123),
6714 10,
6715 3,
6716 Some(124),
6717 10,
6718 2,
6719 Some(123 + 124 * 10_i128.pow(1)),
6720 12,
6721 3
6722 ],
6723 [
6725 Some(123),
6726 10,
6727 2,
6728 Some(124),
6729 11,
6730 3,
6731 Some(123 * 10_i128.pow(3 - 2) + 124),
6732 12,
6733 3
6734 ]
6735 ]
6736 );
6737 }
6738
6739 #[test]
6740 fn decimal_operations_with_nulls() {
6741 decimal_op_test_cases!(
6742 add,
6743 [
6744 [None, 10, 2, Some(123), 10, 2, None, 11, 2],
6746 [Some(123), 10, 2, None, 10, 2, None, 11, 2],
6748 [Some(123), 8, 2, None, 10, 3, None, 11, 3],
6750 [None, 8, 2, Some(123), 10, 3, None, 11, 3],
6752 [Some(123), 8, 4, None, 10, 3, None, 12, 4],
6754 [None, 10, 3, Some(123), 8, 4, None, 12, 4]
6756 ]
6757 );
6758 }
6759
6760 #[test]
6761 fn test_scalar_distance() {
6762 let cases = [
6763 (ScalarValue::Int8(Some(1)), ScalarValue::Int8(Some(2)), 1),
6766 (ScalarValue::Int8(Some(2)), ScalarValue::Int8(Some(1)), 1),
6767 (
6768 ScalarValue::Int16(Some(-5)),
6769 ScalarValue::Int16(Some(5)),
6770 10,
6771 ),
6772 (
6773 ScalarValue::Int16(Some(5)),
6774 ScalarValue::Int16(Some(-5)),
6775 10,
6776 ),
6777 (ScalarValue::Int32(Some(0)), ScalarValue::Int32(Some(0)), 0),
6778 (
6779 ScalarValue::Int32(Some(-5)),
6780 ScalarValue::Int32(Some(-10)),
6781 5,
6782 ),
6783 (
6784 ScalarValue::Int64(Some(-10)),
6785 ScalarValue::Int64(Some(-5)),
6786 5,
6787 ),
6788 (ScalarValue::UInt8(Some(1)), ScalarValue::UInt8(Some(2)), 1),
6789 (ScalarValue::UInt8(Some(0)), ScalarValue::UInt8(Some(0)), 0),
6790 (
6791 ScalarValue::UInt16(Some(5)),
6792 ScalarValue::UInt16(Some(10)),
6793 5,
6794 ),
6795 (
6796 ScalarValue::UInt32(Some(10)),
6797 ScalarValue::UInt32(Some(5)),
6798 5,
6799 ),
6800 (
6801 ScalarValue::UInt64(Some(5)),
6802 ScalarValue::UInt64(Some(10)),
6803 5,
6804 ),
6805 (
6806 ScalarValue::Float16(Some(f16::from_f32(1.1))),
6807 ScalarValue::Float16(Some(f16::from_f32(1.9))),
6808 1,
6809 ),
6810 (
6811 ScalarValue::Float16(Some(f16::from_f32(-5.3))),
6812 ScalarValue::Float16(Some(f16::from_f32(-9.2))),
6813 4,
6814 ),
6815 (
6816 ScalarValue::Float16(Some(f16::from_f32(-5.3))),
6817 ScalarValue::Float16(Some(f16::from_f32(-9.7))),
6818 4,
6819 ),
6820 (
6821 ScalarValue::Float32(Some(1.0)),
6822 ScalarValue::Float32(Some(2.0)),
6823 1,
6824 ),
6825 (
6826 ScalarValue::Float32(Some(2.0)),
6827 ScalarValue::Float32(Some(1.0)),
6828 1,
6829 ),
6830 (
6831 ScalarValue::Float64(Some(0.0)),
6832 ScalarValue::Float64(Some(0.0)),
6833 0,
6834 ),
6835 (
6836 ScalarValue::Float64(Some(-5.0)),
6837 ScalarValue::Float64(Some(-10.0)),
6838 5,
6839 ),
6840 (
6841 ScalarValue::Float64(Some(-10.0)),
6842 ScalarValue::Float64(Some(-5.0)),
6843 5,
6844 ),
6845 (
6849 ScalarValue::Float32(Some(1.2)),
6850 ScalarValue::Float32(Some(1.3)),
6851 0,
6852 ),
6853 (
6854 ScalarValue::Float32(Some(1.1)),
6855 ScalarValue::Float32(Some(1.9)),
6856 1,
6857 ),
6858 (
6859 ScalarValue::Float64(Some(-5.3)),
6860 ScalarValue::Float64(Some(-9.2)),
6861 4,
6862 ),
6863 (
6864 ScalarValue::Float64(Some(-5.3)),
6865 ScalarValue::Float64(Some(-9.7)),
6866 4,
6867 ),
6868 (
6869 ScalarValue::Float64(Some(-5.3)),
6870 ScalarValue::Float64(Some(-9.9)),
6871 5,
6872 ),
6873 ];
6874 for (lhs, rhs, expected) in cases.iter() {
6875 let distance = lhs.distance(rhs).unwrap();
6876 assert_eq!(distance, *expected);
6877 }
6878 }
6879
6880 #[test]
6881 fn test_scalar_distance_invalid() {
6882 let cases = [
6883 (ScalarValue::Int8(None), ScalarValue::Int8(None)),
6887 (ScalarValue::Int8(None), ScalarValue::Int8(Some(1))),
6888 (ScalarValue::Int8(Some(1)), ScalarValue::Int8(None)),
6889 (ScalarValue::Int8(Some(1)), ScalarValue::Int16(Some(1))),
6891 (ScalarValue::Int8(Some(1)), ScalarValue::Float32(Some(1.0))),
6892 (
6893 ScalarValue::Float16(Some(f16::from_f32(1.0))),
6894 ScalarValue::Float32(Some(1.0)),
6895 ),
6896 (
6897 ScalarValue::Float16(Some(f16::from_f32(1.0))),
6898 ScalarValue::Int32(Some(1)),
6899 ),
6900 (
6901 ScalarValue::Float64(Some(1.1)),
6902 ScalarValue::Float32(Some(2.2)),
6903 ),
6904 (
6905 ScalarValue::UInt64(Some(777)),
6906 ScalarValue::Int32(Some(111)),
6907 ),
6908 (ScalarValue::Int8(None), ScalarValue::Int16(Some(1))),
6910 (ScalarValue::Int8(Some(1)), ScalarValue::Int16(None)),
6911 (ScalarValue::from("foo"), ScalarValue::from("bar")),
6913 (
6914 ScalarValue::Boolean(Some(true)),
6915 ScalarValue::Boolean(Some(false)),
6916 ),
6917 (ScalarValue::Date32(Some(0)), ScalarValue::Date32(Some(1))),
6918 (ScalarValue::Date64(Some(0)), ScalarValue::Date64(Some(1))),
6919 (
6920 ScalarValue::Decimal128(Some(123), 5, 5),
6921 ScalarValue::Decimal128(Some(120), 5, 5),
6922 ),
6923 ];
6924 for (lhs, rhs) in cases {
6925 let distance = lhs.distance(&rhs);
6926 assert!(distance.is_none());
6927 }
6928 }
6929
6930 #[test]
6931 fn test_scalar_interval_negate() {
6932 let cases = [
6933 (
6934 ScalarValue::new_interval_ym(1, 12),
6935 ScalarValue::new_interval_ym(-1, -12),
6936 ),
6937 (
6938 ScalarValue::new_interval_dt(1, 999),
6939 ScalarValue::new_interval_dt(-1, -999),
6940 ),
6941 (
6942 ScalarValue::new_interval_mdn(12, 15, 123_456),
6943 ScalarValue::new_interval_mdn(-12, -15, -123_456),
6944 ),
6945 ];
6946 for (expr, expected) in cases.iter() {
6947 let result = expr.arithmetic_negate().unwrap();
6948 assert_eq!(*expected, result, "-expr:{expr:?}");
6949 }
6950 }
6951
6952 #[test]
6953 fn test_scalar_interval_add() {
6954 let cases = [
6955 (
6956 ScalarValue::new_interval_ym(1, 12),
6957 ScalarValue::new_interval_ym(1, 12),
6958 ScalarValue::new_interval_ym(2, 24),
6959 ),
6960 (
6961 ScalarValue::new_interval_dt(1, 999),
6962 ScalarValue::new_interval_dt(1, 999),
6963 ScalarValue::new_interval_dt(2, 1998),
6964 ),
6965 (
6966 ScalarValue::new_interval_mdn(12, 15, 123_456),
6967 ScalarValue::new_interval_mdn(12, 15, 123_456),
6968 ScalarValue::new_interval_mdn(24, 30, 246_912),
6969 ),
6970 ];
6971 for (lhs, rhs, expected) in cases.iter() {
6972 let result = lhs.add(rhs).unwrap();
6973 let result_commute = rhs.add(lhs).unwrap();
6974 assert_eq!(*expected, result, "lhs:{lhs:?} + rhs:{rhs:?}");
6975 assert_eq!(*expected, result_commute, "lhs:{rhs:?} + rhs:{lhs:?}");
6976 }
6977 }
6978
6979 #[test]
6980 fn test_scalar_interval_sub() {
6981 let cases = [
6982 (
6983 ScalarValue::new_interval_ym(1, 12),
6984 ScalarValue::new_interval_ym(1, 12),
6985 ScalarValue::new_interval_ym(0, 0),
6986 ),
6987 (
6988 ScalarValue::new_interval_dt(1, 999),
6989 ScalarValue::new_interval_dt(1, 999),
6990 ScalarValue::new_interval_dt(0, 0),
6991 ),
6992 (
6993 ScalarValue::new_interval_mdn(12, 15, 123_456),
6994 ScalarValue::new_interval_mdn(12, 15, 123_456),
6995 ScalarValue::new_interval_mdn(0, 0, 0),
6996 ),
6997 ];
6998 for (lhs, rhs, expected) in cases.iter() {
6999 let result = lhs.sub(rhs).unwrap();
7000 assert_eq!(*expected, result, "lhs:{lhs:?} - rhs:{rhs:?}");
7001 }
7002 }
7003
7004 #[test]
7005 fn timestamp_op_random_tests() {
7006 let sample_size = 1000;
7009 let timestamps1 = get_random_timestamps(sample_size);
7010 let intervals = get_random_intervals(sample_size);
7011 for (idx, ts1) in timestamps1.iter().enumerate() {
7015 if idx % 2 == 0 {
7016 let timestamp2 = ts1.add(intervals[idx].clone()).unwrap();
7017 let back = timestamp2.sub(intervals[idx].clone()).unwrap();
7018 assert_eq!(ts1, &back);
7019 } else {
7020 let timestamp2 = ts1.sub(intervals[idx].clone()).unwrap();
7021 let back = timestamp2.add(intervals[idx].clone()).unwrap();
7022 assert_eq!(ts1, &back);
7023 };
7024 }
7025 }
7026
7027 #[test]
7028 fn test_struct_nulls() {
7029 let fields_b = Fields::from(vec![
7030 Field::new("ba", DataType::UInt64, true),
7031 Field::new("bb", DataType::UInt64, true),
7032 ]);
7033 let fields = Fields::from(vec![
7034 Field::new("a", DataType::UInt64, true),
7035 Field::new("b", DataType::Struct(fields_b.clone()), true),
7036 ]);
7037
7038 let struct_value = vec![
7039 (
7040 Arc::clone(&fields[0]),
7041 Arc::new(UInt64Array::from(vec![Some(1)])) as ArrayRef,
7042 ),
7043 (
7044 Arc::clone(&fields[1]),
7045 Arc::new(StructArray::from(vec![
7046 (
7047 Arc::clone(&fields_b[0]),
7048 Arc::new(UInt64Array::from(vec![Some(2)])) as ArrayRef,
7049 ),
7050 (
7051 Arc::clone(&fields_b[1]),
7052 Arc::new(UInt64Array::from(vec![Some(3)])) as ArrayRef,
7053 ),
7054 ])) as ArrayRef,
7055 ),
7056 ];
7057
7058 let struct_value_with_nulls = vec![
7059 (
7060 Arc::clone(&fields[0]),
7061 Arc::new(UInt64Array::from(vec![Some(1)])) as ArrayRef,
7062 ),
7063 (
7064 Arc::clone(&fields[1]),
7065 Arc::new(StructArray::from((
7066 vec![
7067 (
7068 Arc::clone(&fields_b[0]),
7069 Arc::new(UInt64Array::from(vec![Some(2)])) as ArrayRef,
7070 ),
7071 (
7072 Arc::clone(&fields_b[1]),
7073 Arc::new(UInt64Array::from(vec![Some(3)])) as ArrayRef,
7074 ),
7075 ],
7076 Buffer::from(&[0]),
7077 ))) as ArrayRef,
7078 ),
7079 ];
7080
7081 let scalars = vec![
7082 ScalarValue::Struct(Arc::new(StructArray::from((
7084 struct_value.clone(),
7085 Buffer::from(&[0]),
7086 )))),
7087 ScalarValue::Struct(Arc::new(StructArray::from((
7089 struct_value_with_nulls.clone(),
7090 Buffer::from(&[1]),
7091 )))),
7092 ScalarValue::Struct(Arc::new(StructArray::from((
7094 struct_value.clone(),
7095 Buffer::from(&[1]),
7096 )))),
7097 ];
7098
7099 let check_array = |array| {
7100 let is_null = is_null(&array).unwrap();
7101 assert_eq!(is_null, BooleanArray::from(vec![true, false, false]));
7102
7103 let formatted = pretty_format_columns("col", &[array]).unwrap().to_string();
7104 let formatted = formatted.split('\n').collect::<Vec<_>>();
7105 let expected = vec![
7106 "+---------------------------+",
7107 "| col |",
7108 "+---------------------------+",
7109 "| |",
7110 "| {a: 1, b: } |",
7111 "| {a: 1, b: {ba: 2, bb: 3}} |",
7112 "+---------------------------+",
7113 ];
7114 assert_eq!(
7115 formatted, expected,
7116 "Actual:\n{formatted:#?}\n\nExpected:\n{expected:#?}"
7117 );
7118 };
7119
7120 let array = ScalarValue::iter_to_array(scalars.clone()).unwrap();
7122 check_array(array);
7123
7124 let arrays = scalars
7126 .iter()
7127 .map(ScalarValue::to_array)
7128 .collect::<Result<Vec<_>>>()
7129 .expect("Failed to convert to array");
7130 let arrays = arrays.iter().map(|a| a.as_ref()).collect::<Vec<_>>();
7131 let array = arrow::compute::concat(&arrays).unwrap();
7132 check_array(array);
7133 }
7134
7135 #[test]
7136 fn test_struct_display() {
7137 let field_a = Field::new("a", DataType::Int32, true);
7138 let field_b = Field::new("b", DataType::Utf8, true);
7139
7140 let s = ScalarStructBuilder::new()
7141 .with_scalar(field_a, ScalarValue::from(1i32))
7142 .with_scalar(field_b, ScalarValue::Utf8(None))
7143 .build()
7144 .unwrap();
7145
7146 assert_eq!(s.to_string(), "{a:1,b:}");
7147 assert_eq!(format!("{s:?}"), r#"Struct({a:1,b:})"#);
7148
7149 let ScalarValue::Struct(arr) = s else {
7150 panic!("Expected struct");
7151 };
7152
7153 let batch = RecordBatch::try_from_iter(vec![("s", arr as _)]).unwrap();
7155 assert_snapshot!(batches_to_string(&[batch]), @r"
7156 +-------------+
7157 | s |
7158 +-------------+
7159 | {a: 1, b: } |
7160 +-------------+
7161 ");
7162 }
7163
7164 #[test]
7165 fn test_null_bug() {
7166 let field_a = Field::new("a", DataType::Int32, true);
7167 let field_b = Field::new("b", DataType::Int32, true);
7168 let fields = Fields::from(vec![field_a, field_b]);
7169
7170 let array_a = Arc::new(Int32Array::from_iter_values([1]));
7171 let array_b = Arc::new(Int32Array::from_iter_values([2]));
7172 let arrays: Vec<ArrayRef> = vec![array_a, array_b];
7173
7174 let mut not_nulls = NullBufferBuilder::new(1);
7175
7176 not_nulls.append_non_null();
7177
7178 let ar = StructArray::new(fields, arrays, not_nulls.finish());
7179 let s = ScalarValue::Struct(Arc::new(ar));
7180
7181 assert_eq!(s.to_string(), "{a:1,b:2}");
7182 assert_eq!(format!("{s:?}"), r#"Struct({a:1,b:2})"#);
7183
7184 let ScalarValue::Struct(arr) = s else {
7185 panic!("Expected struct");
7186 };
7187
7188 let batch = RecordBatch::try_from_iter(vec![("s", arr as _)]).unwrap();
7190 assert_snapshot!(batches_to_string(&[batch]), @r"
7191 +--------------+
7192 | s |
7193 +--------------+
7194 | {a: 1, b: 2} |
7195 +--------------+
7196 ");
7197 }
7198
7199 #[test]
7200 fn test_struct_display_null() {
7201 let fields = vec![Field::new("a", DataType::Int32, false)];
7202 let s = ScalarStructBuilder::new_null(fields);
7203 assert_eq!(s.to_string(), "NULL");
7204
7205 let ScalarValue::Struct(arr) = s else {
7206 panic!("Expected struct");
7207 };
7208
7209 let batch = RecordBatch::try_from_iter(vec![("s", arr as _)]).unwrap();
7211
7212 assert_snapshot!(batches_to_string(&[batch]), @r"
7213 +---+
7214 | s |
7215 +---+
7216 | |
7217 +---+
7218 ");
7219 }
7220
7221 #[test]
7222 fn test_map_display_and_debug() {
7223 let string_builder = StringBuilder::new();
7224 let int_builder = Int32Builder::with_capacity(4);
7225 let mut builder = MapBuilder::new(None, string_builder, int_builder);
7226 builder.keys().append_value("joe");
7227 builder.values().append_value(1);
7228 builder.append(true).unwrap();
7229
7230 builder.keys().append_value("blogs");
7231 builder.values().append_value(2);
7232 builder.keys().append_value("foo");
7233 builder.values().append_value(4);
7234 builder.append(true).unwrap();
7235 builder.append(true).unwrap();
7236 builder.append(false).unwrap();
7237
7238 let map_value = ScalarValue::Map(Arc::new(builder.finish()));
7239
7240 assert_eq!(map_value.to_string(), "[{joe:1},{blogs:2,foo:4},{},NULL]");
7241 assert_eq!(
7242 format!("{map_value:?}"),
7243 r#"Map([{"joe":"1"},{"blogs":"2","foo":"4"},{},NULL])"#
7244 );
7245
7246 let ScalarValue::Map(arr) = map_value else {
7247 panic!("Expected map");
7248 };
7249
7250 let batch = RecordBatch::try_from_iter(vec![("m", arr as _)]).unwrap();
7252 assert_snapshot!(batches_to_string(&[batch]), @r"
7253 +--------------------+
7254 | m |
7255 +--------------------+
7256 | {joe: 1} |
7257 | {blogs: 2, foo: 4} |
7258 | {} |
7259 | |
7260 +--------------------+
7261 ");
7262 }
7263
7264 #[test]
7265 fn test_binary_display() {
7266 let no_binary_value = ScalarValue::Binary(None);
7267 assert_eq!(format!("{no_binary_value}"), "NULL");
7268 let single_binary_value = ScalarValue::Binary(Some(vec![42u8]));
7269 assert_eq!(format!("{single_binary_value}"), "2A");
7270 let small_binary_value = ScalarValue::Binary(Some(vec![1u8, 2, 3]));
7271 assert_eq!(format!("{small_binary_value}"), "010203");
7272 let large_binary_value =
7273 ScalarValue::Binary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
7274 assert_eq!(format!("{large_binary_value}"), "0102030405060708090A...");
7275
7276 let no_binary_value = ScalarValue::BinaryView(None);
7277 assert_eq!(format!("{no_binary_value}"), "NULL");
7278 let small_binary_value = ScalarValue::BinaryView(Some(vec![1u8, 2, 3]));
7279 assert_eq!(format!("{small_binary_value}"), "010203");
7280 let large_binary_value =
7281 ScalarValue::BinaryView(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
7282 assert_eq!(format!("{large_binary_value}"), "0102030405060708090A...");
7283
7284 let no_binary_value = ScalarValue::LargeBinary(None);
7285 assert_eq!(format!("{no_binary_value}"), "NULL");
7286 let small_binary_value = ScalarValue::LargeBinary(Some(vec![1u8, 2, 3]));
7287 assert_eq!(format!("{small_binary_value}"), "010203");
7288 let large_binary_value =
7289 ScalarValue::LargeBinary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
7290 assert_eq!(format!("{large_binary_value}"), "0102030405060708090A...");
7291
7292 let no_binary_value = ScalarValue::FixedSizeBinary(3, None);
7293 assert_eq!(format!("{no_binary_value}"), "NULL");
7294 let small_binary_value = ScalarValue::FixedSizeBinary(3, Some(vec![1u8, 2, 3]));
7295 assert_eq!(format!("{small_binary_value}"), "010203");
7296 let large_binary_value = ScalarValue::FixedSizeBinary(
7297 11,
7298 Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
7299 );
7300 assert_eq!(format!("{large_binary_value}"), "0102030405060708090A...");
7301 }
7302
7303 #[test]
7304 fn test_binary_debug() {
7305 let no_binary_value = ScalarValue::Binary(None);
7306 assert_eq!(format!("{no_binary_value:?}"), "Binary(NULL)");
7307 let single_binary_value = ScalarValue::Binary(Some(vec![42u8]));
7308 assert_eq!(format!("{single_binary_value:?}"), "Binary(\"42\")");
7309 let small_binary_value = ScalarValue::Binary(Some(vec![1u8, 2, 3]));
7310 assert_eq!(format!("{small_binary_value:?}"), "Binary(\"1,2,3\")");
7311 let large_binary_value =
7312 ScalarValue::Binary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
7313 assert_eq!(
7314 format!("{large_binary_value:?}"),
7315 "Binary(\"1,2,3,4,5,6,7,8,9,10,11\")"
7316 );
7317
7318 let no_binary_value = ScalarValue::BinaryView(None);
7319 assert_eq!(format!("{no_binary_value:?}"), "BinaryView(NULL)");
7320 let small_binary_value = ScalarValue::BinaryView(Some(vec![1u8, 2, 3]));
7321 assert_eq!(format!("{small_binary_value:?}"), "BinaryView(\"1,2,3\")");
7322 let large_binary_value =
7323 ScalarValue::BinaryView(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
7324 assert_eq!(
7325 format!("{large_binary_value:?}"),
7326 "BinaryView(\"1,2,3,4,5,6,7,8,9,10,11\")"
7327 );
7328
7329 let no_binary_value = ScalarValue::LargeBinary(None);
7330 assert_eq!(format!("{no_binary_value:?}"), "LargeBinary(NULL)");
7331 let small_binary_value = ScalarValue::LargeBinary(Some(vec![1u8, 2, 3]));
7332 assert_eq!(format!("{small_binary_value:?}"), "LargeBinary(\"1,2,3\")");
7333 let large_binary_value =
7334 ScalarValue::LargeBinary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
7335 assert_eq!(
7336 format!("{large_binary_value:?}"),
7337 "LargeBinary(\"1,2,3,4,5,6,7,8,9,10,11\")"
7338 );
7339
7340 let no_binary_value = ScalarValue::FixedSizeBinary(3, None);
7341 assert_eq!(format!("{no_binary_value:?}"), "FixedSizeBinary(3, NULL)");
7342 let small_binary_value = ScalarValue::FixedSizeBinary(3, Some(vec![1u8, 2, 3]));
7343 assert_eq!(
7344 format!("{small_binary_value:?}"),
7345 "FixedSizeBinary(3, \"1,2,3\")"
7346 );
7347 let large_binary_value = ScalarValue::FixedSizeBinary(
7348 11,
7349 Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
7350 );
7351 assert_eq!(
7352 format!("{large_binary_value:?}"),
7353 "FixedSizeBinary(11, \"1,2,3,4,5,6,7,8,9,10,11\")"
7354 );
7355 }
7356
7357 #[test]
7358 fn test_build_timestamp_millisecond_list() {
7359 let values = vec![ScalarValue::TimestampMillisecond(Some(1), None)];
7360 let arr = ScalarValue::new_list_nullable(
7361 &values,
7362 &DataType::Timestamp(TimeUnit::Millisecond, None),
7363 );
7364 assert_eq!(1, arr.len());
7365 }
7366
7367 #[test]
7368 fn test_newlist_timestamp_zone() {
7369 let s: &'static str = "UTC";
7370 let values = vec![ScalarValue::TimestampMillisecond(Some(1), Some(s.into()))];
7371 let arr = ScalarValue::new_list_nullable(
7372 &values,
7373 &DataType::Timestamp(TimeUnit::Millisecond, Some(s.into())),
7374 );
7375 assert_eq!(1, arr.len());
7376 assert_eq!(
7377 arr.data_type(),
7378 &DataType::List(Arc::new(Field::new_list_field(
7379 DataType::Timestamp(TimeUnit::Millisecond, Some(s.into())),
7380 true,
7381 )))
7382 );
7383 }
7384
7385 fn get_random_timestamps(sample_size: u64) -> Vec<ScalarValue> {
7386 let vector_size = sample_size;
7387 let mut timestamp = vec![];
7388 let mut rng = rand::rng();
7389 for i in 0..vector_size {
7390 let year = rng.random_range(1995..=2050);
7391 let month = rng.random_range(1..=12);
7392 let day = rng.random_range(1..=28); let hour = rng.random_range(0..=23);
7394 let minute = rng.random_range(0..=59);
7395 let second = rng.random_range(0..=59);
7396 if i % 4 == 0 {
7397 timestamp.push(ScalarValue::TimestampSecond(
7398 Some(
7399 NaiveDate::from_ymd_opt(year, month, day)
7400 .unwrap()
7401 .and_hms_opt(hour, minute, second)
7402 .unwrap()
7403 .and_utc()
7404 .timestamp(),
7405 ),
7406 None,
7407 ))
7408 } else if i % 4 == 1 {
7409 let millisec = rng.random_range(0..=999);
7410 timestamp.push(ScalarValue::TimestampMillisecond(
7411 Some(
7412 NaiveDate::from_ymd_opt(year, month, day)
7413 .unwrap()
7414 .and_hms_milli_opt(hour, minute, second, millisec)
7415 .unwrap()
7416 .and_utc()
7417 .timestamp_millis(),
7418 ),
7419 None,
7420 ))
7421 } else if i % 4 == 2 {
7422 let microsec = rng.random_range(0..=999_999);
7423 timestamp.push(ScalarValue::TimestampMicrosecond(
7424 Some(
7425 NaiveDate::from_ymd_opt(year, month, day)
7426 .unwrap()
7427 .and_hms_micro_opt(hour, minute, second, microsec)
7428 .unwrap()
7429 .and_utc()
7430 .timestamp_micros(),
7431 ),
7432 None,
7433 ))
7434 } else if i % 4 == 3 {
7435 let nanosec = rng.random_range(0..=999_999_999);
7436 timestamp.push(ScalarValue::TimestampNanosecond(
7437 Some(
7438 NaiveDate::from_ymd_opt(year, month, day)
7439 .unwrap()
7440 .and_hms_nano_opt(hour, minute, second, nanosec)
7441 .unwrap()
7442 .and_utc()
7443 .timestamp_nanos_opt()
7444 .unwrap(),
7445 ),
7446 None,
7447 ))
7448 }
7449 }
7450 timestamp
7451 }
7452
7453 fn get_random_intervals(sample_size: u64) -> Vec<ScalarValue> {
7454 const MILLISECS_IN_ONE_DAY: i64 = 86_400_000;
7455 const NANOSECS_IN_ONE_DAY: i64 = 86_400_000_000_000;
7456
7457 let vector_size = sample_size;
7458 let mut intervals = vec![];
7459 let mut rng = rand::rng();
7460 const SECS_IN_ONE_DAY: i32 = 86_400;
7461 const MICROSECS_IN_ONE_DAY: i64 = 86_400_000_000;
7462 for i in 0..vector_size {
7463 if i % 4 == 0 {
7464 let days = rng.random_range(0..5000);
7465 let millis = rng.random_range(0..SECS_IN_ONE_DAY) * 1000;
7467 intervals.push(ScalarValue::new_interval_dt(days, millis));
7468 } else if i % 4 == 1 {
7469 let days = rng.random_range(0..5000);
7470 let millisec = rng.random_range(0..(MILLISECS_IN_ONE_DAY as i32));
7471 intervals.push(ScalarValue::new_interval_dt(days, millisec));
7472 } else if i % 4 == 2 {
7473 let days = rng.random_range(0..5000);
7474 let nanosec = rng.random_range(0..MICROSECS_IN_ONE_DAY) * 1000;
7476 intervals.push(ScalarValue::new_interval_mdn(0, days, nanosec));
7477 } else {
7478 let days = rng.random_range(0..5000);
7479 let nanosec = rng.random_range(0..NANOSECS_IN_ONE_DAY);
7480 intervals.push(ScalarValue::new_interval_mdn(0, days, nanosec));
7481 }
7482 }
7483 intervals
7484 }
7485
7486 fn union_fields() -> UnionFields {
7487 [
7488 (0, Arc::new(Field::new("A", DataType::Int32, true))),
7489 (1, Arc::new(Field::new("B", DataType::Float64, true))),
7490 ]
7491 .into_iter()
7492 .collect()
7493 }
7494
7495 #[test]
7496 fn sparse_scalar_union_is_null() {
7497 let sparse_scalar = ScalarValue::Union(
7498 Some((0_i8, Box::new(ScalarValue::Int32(None)))),
7499 union_fields(),
7500 UnionMode::Sparse,
7501 );
7502 assert!(sparse_scalar.is_null());
7503 }
7504
7505 #[test]
7506 fn dense_scalar_union_is_null() {
7507 let dense_scalar = ScalarValue::Union(
7508 Some((0_i8, Box::new(ScalarValue::Int32(None)))),
7509 union_fields(),
7510 UnionMode::Dense,
7511 );
7512 assert!(dense_scalar.is_null());
7513 }
7514
7515 #[test]
7516 fn null_dictionary_scalar_produces_null_dictionary_array() {
7517 let dictionary_scalar = ScalarValue::Dictionary(
7518 Box::new(DataType::Int32),
7519 Box::new(ScalarValue::Null),
7520 );
7521 assert!(dictionary_scalar.is_null());
7522 let dictionary_array = dictionary_scalar.to_array().unwrap();
7523 assert!(dictionary_array.is_null(0));
7524 }
7525
7526 #[test]
7527 fn test_scalar_value_try_new_null() {
7528 let scalars = vec![
7529 ScalarValue::try_new_null(&DataType::Boolean).unwrap(),
7530 ScalarValue::try_new_null(&DataType::Int8).unwrap(),
7531 ScalarValue::try_new_null(&DataType::Int16).unwrap(),
7532 ScalarValue::try_new_null(&DataType::Int32).unwrap(),
7533 ScalarValue::try_new_null(&DataType::Int64).unwrap(),
7534 ScalarValue::try_new_null(&DataType::UInt8).unwrap(),
7535 ScalarValue::try_new_null(&DataType::UInt16).unwrap(),
7536 ScalarValue::try_new_null(&DataType::UInt32).unwrap(),
7537 ScalarValue::try_new_null(&DataType::UInt64).unwrap(),
7538 ScalarValue::try_new_null(&DataType::Float16).unwrap(),
7539 ScalarValue::try_new_null(&DataType::Float32).unwrap(),
7540 ScalarValue::try_new_null(&DataType::Float64).unwrap(),
7541 ScalarValue::try_new_null(&DataType::Decimal128(42, 42)).unwrap(),
7542 ScalarValue::try_new_null(&DataType::Decimal256(42, 42)).unwrap(),
7543 ScalarValue::try_new_null(&DataType::Utf8).unwrap(),
7544 ScalarValue::try_new_null(&DataType::LargeUtf8).unwrap(),
7545 ScalarValue::try_new_null(&DataType::Utf8View).unwrap(),
7546 ScalarValue::try_new_null(&DataType::Binary).unwrap(),
7547 ScalarValue::try_new_null(&DataType::BinaryView).unwrap(),
7548 ScalarValue::try_new_null(&DataType::FixedSizeBinary(42)).unwrap(),
7549 ScalarValue::try_new_null(&DataType::LargeBinary).unwrap(),
7550 ScalarValue::try_new_null(&DataType::Date32).unwrap(),
7551 ScalarValue::try_new_null(&DataType::Date64).unwrap(),
7552 ScalarValue::try_new_null(&DataType::Time32(TimeUnit::Second)).unwrap(),
7553 ScalarValue::try_new_null(&DataType::Time32(TimeUnit::Millisecond)).unwrap(),
7554 ScalarValue::try_new_null(&DataType::Time64(TimeUnit::Microsecond)).unwrap(),
7555 ScalarValue::try_new_null(&DataType::Time64(TimeUnit::Nanosecond)).unwrap(),
7556 ScalarValue::try_new_null(&DataType::Timestamp(TimeUnit::Second, None))
7557 .unwrap(),
7558 ScalarValue::try_new_null(&DataType::Timestamp(TimeUnit::Millisecond, None))
7559 .unwrap(),
7560 ScalarValue::try_new_null(&DataType::Timestamp(TimeUnit::Microsecond, None))
7561 .unwrap(),
7562 ScalarValue::try_new_null(&DataType::Timestamp(TimeUnit::Nanosecond, None))
7563 .unwrap(),
7564 ScalarValue::try_new_null(&DataType::Interval(IntervalUnit::YearMonth))
7565 .unwrap(),
7566 ScalarValue::try_new_null(&DataType::Interval(IntervalUnit::DayTime))
7567 .unwrap(),
7568 ScalarValue::try_new_null(&DataType::Interval(IntervalUnit::MonthDayNano))
7569 .unwrap(),
7570 ScalarValue::try_new_null(&DataType::Duration(TimeUnit::Second)).unwrap(),
7571 ScalarValue::try_new_null(&DataType::Duration(TimeUnit::Microsecond))
7572 .unwrap(),
7573 ScalarValue::try_new_null(&DataType::Duration(TimeUnit::Nanosecond)).unwrap(),
7574 ScalarValue::try_new_null(&DataType::Null).unwrap(),
7575 ];
7576 assert!(scalars.iter().all(|s| s.is_null()));
7577
7578 let field_ref = Arc::new(Field::new("foo", DataType::Int32, true));
7579 let map_field_ref = Arc::new(Field::new(
7580 "foo",
7581 DataType::Struct(Fields::from(vec![
7582 Field::new("bar", DataType::Utf8, true),
7583 Field::new("baz", DataType::Int32, true),
7584 ])),
7585 true,
7586 ));
7587 let scalars = vec![
7588 ScalarValue::try_new_null(&DataType::List(Arc::clone(&field_ref))).unwrap(),
7589 ScalarValue::try_new_null(&DataType::LargeList(Arc::clone(&field_ref)))
7590 .unwrap(),
7591 ScalarValue::try_new_null(&DataType::FixedSizeList(
7592 Arc::clone(&field_ref),
7593 42,
7594 ))
7595 .unwrap(),
7596 ScalarValue::try_new_null(&DataType::Struct(
7597 vec![Arc::clone(&field_ref)].into(),
7598 ))
7599 .unwrap(),
7600 ScalarValue::try_new_null(&DataType::Map(map_field_ref, false)).unwrap(),
7601 ScalarValue::try_new_null(&DataType::Union(
7602 UnionFields::new(vec![42], vec![field_ref]),
7603 UnionMode::Dense,
7604 ))
7605 .unwrap(),
7606 ];
7607 assert!(scalars.iter().all(|s| s.is_null()));
7608 }
7609}