1use std::sync::Arc;
5
6use vortex_buffer::BitBuffer;
7use vortex_buffer::Buffer;
8use vortex_buffer::buffer;
9use vortex_dtype::DType;
10use vortex_dtype::DecimalType;
11use vortex_dtype::Nullability;
12use vortex_dtype::match_each_decimal_value;
13use vortex_dtype::match_each_decimal_value_type;
14use vortex_dtype::match_each_native_ptype;
15use vortex_error::VortexExpect;
16use vortex_scalar::BinaryScalar;
17use vortex_scalar::BoolScalar;
18use vortex_scalar::DecimalValue;
19use vortex_scalar::ExtScalar;
20use vortex_scalar::ListScalar;
21use vortex_scalar::Scalar;
22use vortex_scalar::StructScalar;
23use vortex_scalar::Utf8Scalar;
24use vortex_vector::binaryview::BinaryView;
25
26use crate::Canonical;
27use crate::IntoArray;
28use crate::arrays::BoolArray;
29use crate::arrays::ConstantVTable;
30use crate::arrays::DecimalArray;
31use crate::arrays::ExtensionArray;
32use crate::arrays::FixedSizeListArray;
33use crate::arrays::ListViewArray;
34use crate::arrays::NullArray;
35use crate::arrays::StructArray;
36use crate::arrays::VarBinViewArray;
37use crate::arrays::constant::ConstantArray;
38use crate::arrays::primitive::PrimitiveArray;
39use crate::builders::builder_with_capacity;
40use crate::validity::Validity;
41use crate::vtable::CanonicalVTable;
42
43impl CanonicalVTable<ConstantVTable> for ConstantVTable {
44 fn canonicalize(array: &ConstantArray) -> Canonical {
45 let scalar = array.scalar();
46
47 let validity = match array.dtype().nullability() {
48 Nullability::NonNullable => Validity::NonNullable,
49 Nullability::Nullable => match scalar.is_null() {
50 true => Validity::AllInvalid,
51 false => Validity::AllValid,
52 },
53 };
54
55 match array.dtype() {
56 DType::Null => Canonical::Null(NullArray::new(array.len())),
57 DType::Bool(..) => Canonical::Bool(BoolArray::from_bit_buffer(
58 if BoolScalar::try_from(scalar)
59 .vortex_expect("must be bool")
60 .value()
61 .unwrap_or_default()
62 {
63 BitBuffer::new_set(array.len())
64 } else {
65 BitBuffer::new_unset(array.len())
66 },
67 validity,
68 )),
69 DType::Primitive(ptype, ..) => {
70 match_each_native_ptype!(ptype, |P| {
71 Canonical::Primitive(PrimitiveArray::new(
72 if scalar.is_valid() {
73 Buffer::full(
74 P::try_from(scalar)
75 .vortex_expect("Couldn't unwrap scalar to primitive"),
76 array.len(),
77 )
78 } else {
79 Buffer::zeroed(array.len())
80 },
81 validity,
82 ))
83 })
84 }
85 DType::Decimal(decimal_type, ..) => {
86 let size = DecimalType::smallest_decimal_value_type(decimal_type);
87 let decimal = scalar.as_decimal();
88 let Some(value) = decimal.decimal_value() else {
89 let all_null = match_each_decimal_value_type!(size, |D| {
90 unsafe {
92 DecimalArray::new_unchecked(
93 Buffer::<D>::zeroed(array.len()),
94 *decimal_type,
95 validity,
96 )
97 }
98 });
99 return Canonical::Decimal(all_null);
100 };
101
102 let decimal_array = match_each_decimal_value!(value, |value| {
103 unsafe {
105 DecimalArray::new_unchecked(
106 Buffer::full(value, array.len()),
107 *decimal_type,
108 validity,
109 )
110 }
111 });
112 Canonical::Decimal(decimal_array)
113 }
114 DType::Utf8(_) => {
115 let value = Utf8Scalar::try_from(scalar)
116 .vortex_expect("Must be a utf8 scalar")
117 .value();
118 let const_value = value.as_ref().map(|v| v.as_bytes());
119 Canonical::VarBinView(constant_canonical_byte_view(
120 const_value,
121 array.dtype(),
122 array.len(),
123 ))
124 }
125 DType::Binary(_) => {
126 let value = BinaryScalar::try_from(scalar)
127 .vortex_expect("must be a binary scalar")
128 .value();
129 let const_value = value.as_ref().map(|v| v.as_slice());
130 Canonical::VarBinView(constant_canonical_byte_view(
131 const_value,
132 array.dtype(),
133 array.len(),
134 ))
135 }
136 DType::Struct(struct_dtype, _) => {
137 let value = StructScalar::try_from(scalar).vortex_expect("must be struct");
138 let fields: Vec<_> = match value.fields() {
139 Some(fields) => fields
140 .into_iter()
141 .map(|s| ConstantArray::new(s, array.len()).into_array())
142 .collect(),
143 None => {
144 assert!(validity.all_invalid(array.len()));
145 struct_dtype
146 .fields()
147 .map(|dt| {
148 let scalar = Scalar::default_value(dt);
149 ConstantArray::new(scalar, array.len()).into_array()
150 })
151 .collect()
152 }
153 };
154 Canonical::Struct(unsafe {
157 StructArray::new_unchecked(fields, struct_dtype.clone(), array.len(), validity)
158 })
159 }
160 DType::List(..) => Canonical::List(constant_canonical_list_array(scalar, array.len())),
161 DType::FixedSizeList(element_dtype, list_size, _) => {
162 let value = ListScalar::try_from(scalar).vortex_expect("must be list");
163
164 Canonical::FixedSizeList(constant_canonical_fixed_size_list_array(
165 value.elements(),
166 element_dtype,
167 *list_size,
168 value.dtype().nullability(),
169 array.len(),
170 ))
171 }
172 DType::Extension(ext_dtype) => {
173 let s = ExtScalar::try_from(scalar).vortex_expect("must be an extension scalar");
174
175 let storage_scalar = s.storage();
176 let storage_self = ConstantArray::new(storage_scalar, array.len()).into_array();
177 Canonical::Extension(ExtensionArray::new(ext_dtype.clone(), storage_self))
178 }
179 }
180 }
181}
182
183fn constant_canonical_byte_view(
184 scalar_bytes: Option<&[u8]>,
185 dtype: &DType,
186 len: usize,
187) -> VarBinViewArray {
188 match scalar_bytes {
189 None => {
190 let views = buffer![BinaryView::empty_view(); len];
191
192 unsafe {
194 VarBinViewArray::new_unchecked(
195 views,
196 Default::default(),
197 dtype.clone(),
198 Validity::AllInvalid,
199 )
200 }
201 }
202 Some(scalar_bytes) => {
203 let view = BinaryView::make_view(scalar_bytes, 0, 0);
206 let mut buffers = Vec::new();
207 if scalar_bytes.len() >= BinaryView::MAX_INLINED_SIZE {
208 buffers.push(Buffer::copy_from(scalar_bytes));
209 }
210
211 let views = buffer![view; len];
213
214 unsafe {
216 VarBinViewArray::new_unchecked(
217 views,
218 Arc::from(buffers),
219 dtype.clone(),
220 Validity::from(dtype.nullability()),
221 )
222 }
223 }
224 }
225}
226
227fn constant_canonical_list_array(scalar: &Scalar, len: usize) -> ListViewArray {
232 let list = ListScalar::try_from(scalar).vortex_expect("must be list");
233
234 let elements = if let Some(elements) = list.elements() {
237 let mut builder = builder_with_capacity(
239 list.dtype()
240 .as_list_element_opt()
241 .vortex_expect("list scalar somehow did not have a list DType"),
242 list.len(),
243 );
244 for scalar in &elements {
245 builder
246 .append_scalar(scalar)
247 .vortex_expect("list element scalar was invalid");
248 }
249 builder.finish()
250 } else {
251 Canonical::empty(list.element_dtype()).into_array()
253 };
254
255 let validity = if scalar.dtype().is_nullable() {
256 if list.is_null() {
257 Validity::AllInvalid
258 } else {
259 Validity::AllValid
260 }
261 } else {
262 debug_assert!(!list.is_null());
263 Validity::NonNullable
264 };
265
266 let offsets = ConstantArray::new::<u64>(0, len).into_array();
268 let sizes = ConstantArray::new::<u64>(list.len() as u64, len).into_array();
269
270 debug_assert!(!offsets.dtype().is_nullable());
271 debug_assert!(!sizes.dtype().is_nullable());
272
273 unsafe { ListViewArray::new_unchecked(elements, offsets, sizes, validity) }
277}
278
279fn constant_canonical_fixed_size_list_array(
280 values: Option<Vec<Scalar>>,
281 element_dtype: &DType,
282 list_size: u32,
283 list_nullability: Nullability,
284 len: usize,
285) -> FixedSizeListArray {
286 match values {
287 None => {
288 let elements_len = list_size as usize * len;
291 let mut element_builder = builder_with_capacity(element_dtype, elements_len);
292 element_builder.append_defaults(elements_len);
293 let elements = element_builder.finish();
294
295 unsafe {
298 FixedSizeListArray::new_unchecked(elements, list_size, Validity::AllInvalid, len)
299 }
300 }
301 Some(values) => {
302 let mut elements_builder = builder_with_capacity(element_dtype, len * values.len());
303
304 for _ in 0..len {
305 for v in &values {
306 elements_builder
307 .append_scalar(v)
308 .vortex_expect("must be a same dtype");
309 }
310 }
311
312 let elements = elements_builder.finish();
313 let validity = Validity::from(list_nullability);
314
315 unsafe { FixedSizeListArray::new_unchecked(elements, list_size, validity, len) }
318 }
319 }
320}
321
322#[cfg(test)]
323mod tests {
324 use std::sync::Arc;
325
326 use enum_iterator::all;
327 use itertools::Itertools;
328 use vortex_dtype::DType;
329 use vortex_dtype::Nullability;
330 use vortex_dtype::PType;
331 use vortex_dtype::half::f16;
332 use vortex_scalar::Scalar;
333
334 use crate::Array;
335 use crate::IntoArray;
336 use crate::arrays::ConstantArray;
337 use crate::arrays::ListViewRebuildMode;
338 use crate::canonical::ToCanonical;
339 use crate::expr::stats::Stat;
340 use crate::expr::stats::StatsProvider;
341 use crate::validity::Validity;
342 use crate::vtable::ValidityHelper;
343
344 #[test]
345 fn test_canonicalize_null() {
346 let const_null = ConstantArray::new(Scalar::null(DType::Null), 42);
347 let actual = const_null.to_null();
348 assert_eq!(actual.len(), 42);
349 assert_eq!(actual.scalar_at(33), Scalar::null(DType::Null));
350 }
351
352 #[test]
353 fn test_canonicalize_const_str() {
354 let const_array = ConstantArray::new("four".to_string(), 4);
355
356 let canonical = const_array.to_varbinview();
358
359 assert_eq!(canonical.len(), 4);
360
361 for i in 0..=3 {
362 assert_eq!(canonical.scalar_at(i), "four".into());
363 }
364 }
365
366 #[test]
367 fn test_canonicalize_propagates_stats() {
368 let scalar = Scalar::bool(true, Nullability::NonNullable);
369 let const_array = ConstantArray::new(scalar, 4).into_array();
370 let stats = const_array
371 .statistics()
372 .compute_all(&all::<Stat>().collect_vec())
373 .unwrap();
374 let canonical = const_array.to_canonical();
375 let canonical_stats = canonical.as_ref().statistics();
376
377 let stats_ref = stats.as_typed_ref(canonical.as_ref().dtype());
378
379 for stat in all::<Stat>() {
380 if stat.dtype(canonical.as_ref().dtype()).is_none() {
381 continue;
382 }
383 assert_eq!(
384 canonical_stats.get(stat),
385 stats_ref.get(stat),
386 "stat mismatch {stat}"
387 );
388 }
389 }
390
391 #[test]
392 fn test_canonicalize_scalar_values() {
393 let f16_value = f16::from_f32(5.722046e-6);
394 let f16_scalar = Scalar::primitive(f16_value, Nullability::NonNullable);
395
396 let const_array = ConstantArray::new(f16_scalar.clone(), 1).into_array();
398 let canonical_const = const_array.to_primitive();
399
400 assert_eq!(canonical_const.scalar_at(0), f16_scalar);
402 }
403
404 #[test]
405 fn test_canonicalize_lists() {
406 let list_scalar = Scalar::list(
407 Arc::new(DType::Primitive(PType::U64, Nullability::NonNullable)),
408 vec![1u64.into(), 2u64.into()],
409 Nullability::NonNullable,
410 );
411 let const_array = ConstantArray::new(list_scalar, 2).into_array();
412 let canonical_const = const_array.to_listview();
413 let list_array = canonical_const.rebuild(ListViewRebuildMode::MakeZeroCopyToList);
414 assert_eq!(
415 list_array.elements().to_primitive().as_slice::<u64>(),
416 [1u64, 2, 1, 2]
417 );
418 assert_eq!(
419 list_array.offsets().to_primitive().as_slice::<u64>(),
420 [0u64, 2]
421 );
422 assert_eq!(
423 list_array.sizes().to_primitive().as_slice::<u64>(),
424 [2u64, 2]
425 );
426 }
427
428 #[test]
429 fn test_canonicalize_empty_list() {
430 let list_scalar = Scalar::list(
431 Arc::new(DType::Primitive(PType::U64, Nullability::NonNullable)),
432 vec![],
433 Nullability::NonNullable,
434 );
435 let const_array = ConstantArray::new(list_scalar, 2).into_array();
436 let canonical_const = const_array.to_listview();
437 assert!(canonical_const.elements().to_primitive().is_empty());
438 assert_eq!(
439 canonical_const.offsets().to_primitive().as_slice::<u64>(),
440 [0u64, 0]
441 );
442 assert_eq!(
443 canonical_const.sizes().to_primitive().as_slice::<u64>(),
444 [0u64, 0]
445 );
446 }
447
448 #[test]
449 fn test_canonicalize_null_list() {
450 let list_scalar = Scalar::null(DType::List(
451 Arc::new(DType::Primitive(PType::U64, Nullability::NonNullable)),
452 Nullability::Nullable,
453 ));
454 let const_array = ConstantArray::new(list_scalar, 2).into_array();
455 let canonical_const = const_array.to_listview();
456 assert!(canonical_const.elements().to_primitive().is_empty());
457 assert_eq!(
458 canonical_const.offsets().to_primitive().as_slice::<u64>(),
459 [0u64, 0]
460 );
461 assert_eq!(
462 canonical_const.sizes().to_primitive().as_slice::<u64>(),
463 [0u64, 0]
464 );
465 }
466
467 #[test]
468 fn test_canonicalize_nullable_struct() {
469 let array = ConstantArray::new(
470 Scalar::null(DType::struct_(
471 [(
472 "non_null_field",
473 DType::Primitive(PType::I8, Nullability::NonNullable),
474 )],
475 Nullability::Nullable,
476 )),
477 3,
478 );
479
480 let struct_array = array.to_struct();
481 assert_eq!(struct_array.len(), 3);
482 assert_eq!(struct_array.valid_count(), 0);
483
484 let field = struct_array.field_by_name("non_null_field").unwrap();
485
486 assert_eq!(
487 field.dtype(),
488 &DType::Primitive(PType::I8, Nullability::NonNullable)
489 );
490 }
491
492 #[test]
493 fn test_canonicalize_fixed_size_list_non_null() {
494 let fsl_scalar = Scalar::fixed_size_list(
496 Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)),
497 vec![
498 Scalar::primitive(10i32, Nullability::NonNullable),
499 Scalar::primitive(20i32, Nullability::NonNullable),
500 Scalar::primitive(30i32, Nullability::NonNullable),
501 ],
502 Nullability::NonNullable,
503 );
504
505 let const_array = ConstantArray::new(fsl_scalar, 4).into_array();
506 let canonical = const_array.to_fixed_size_list();
507
508 assert_eq!(canonical.len(), 4);
509 assert_eq!(canonical.list_size(), 3);
510 assert_eq!(canonical.validity(), &Validity::NonNullable);
511
512 for i in 0..4 {
514 let list = canonical.fixed_size_list_elements_at(i);
515 let list_primitive = list.to_primitive();
516 assert_eq!(list_primitive.as_slice::<i32>(), [10, 20, 30]);
517 }
518 }
519
520 #[test]
521 fn test_canonicalize_fixed_size_list_nullable() {
522 let fsl_scalar = Scalar::fixed_size_list(
524 Arc::new(DType::Primitive(PType::F64, Nullability::NonNullable)),
525 vec![
526 Scalar::primitive(1.5f64, Nullability::NonNullable),
527 Scalar::primitive(2.5f64, Nullability::NonNullable),
528 ],
529 Nullability::Nullable,
530 );
531
532 let const_array = ConstantArray::new(fsl_scalar, 3).into_array();
533 let canonical = const_array.to_fixed_size_list();
534
535 assert_eq!(canonical.len(), 3);
536 assert_eq!(canonical.list_size(), 2);
537 assert_eq!(canonical.validity(), &Validity::AllValid);
538
539 let elements = canonical.elements().to_primitive();
541 assert_eq!(elements.as_slice::<f64>(), [1.5, 2.5, 1.5, 2.5, 1.5, 2.5]);
542 }
543
544 #[test]
545 fn test_canonicalize_fixed_size_list_null() {
546 let fsl_scalar = Scalar::null(DType::FixedSizeList(
548 Arc::new(DType::Primitive(PType::U64, Nullability::NonNullable)),
549 4,
550 Nullability::Nullable,
551 ));
552
553 let const_array = ConstantArray::new(fsl_scalar, 5).into_array();
554 let canonical = const_array.to_fixed_size_list();
555
556 assert_eq!(canonical.len(), 5);
557 assert_eq!(canonical.list_size(), 4);
558 assert_eq!(canonical.validity(), &Validity::AllInvalid);
559
560 let elements = canonical.elements().to_primitive();
562 assert_eq!(elements.len(), 20); assert!(elements.as_slice::<u64>().iter().all(|&x| x == 0));
564 }
565
566 #[test]
567 fn test_canonicalize_fixed_size_list_empty() {
568 let fsl_scalar = Scalar::fixed_size_list(
570 Arc::new(DType::Primitive(PType::I8, Nullability::NonNullable)),
571 vec![],
572 Nullability::NonNullable,
573 );
574
575 let const_array = ConstantArray::new(fsl_scalar, 10).into_array();
576 let canonical = const_array.to_fixed_size_list();
577
578 assert_eq!(canonical.len(), 10);
579 assert_eq!(canonical.list_size(), 0);
580 assert_eq!(canonical.validity(), &Validity::NonNullable);
581
582 assert!(canonical.elements().is_empty());
584 }
585
586 #[test]
587 fn test_canonicalize_fixed_size_list_nested() {
588 let fsl_scalar = Scalar::fixed_size_list(
590 Arc::new(DType::Utf8(Nullability::NonNullable)),
591 vec![Scalar::from("hello"), Scalar::from("world")],
592 Nullability::NonNullable,
593 );
594
595 let const_array = ConstantArray::new(fsl_scalar, 2).into_array();
596 let canonical = const_array.to_fixed_size_list();
597
598 assert_eq!(canonical.len(), 2);
599 assert_eq!(canonical.list_size(), 2);
600
601 let elements = canonical.elements().to_varbinview();
603 assert_eq!(elements.scalar_at(0), "hello".into());
604 assert_eq!(elements.scalar_at(1), "world".into());
605 assert_eq!(elements.scalar_at(2), "hello".into());
606 assert_eq!(elements.scalar_at(3), "world".into());
607 }
608
609 #[test]
610 fn test_canonicalize_fixed_size_list_single_element() {
611 let fsl_scalar = Scalar::fixed_size_list(
613 Arc::new(DType::Primitive(PType::I16, Nullability::NonNullable)),
614 vec![Scalar::primitive(42i16, Nullability::NonNullable)],
615 Nullability::NonNullable,
616 );
617
618 let const_array = ConstantArray::new(fsl_scalar, 1).into_array();
619 let canonical = const_array.to_fixed_size_list();
620
621 assert_eq!(canonical.len(), 1);
622 assert_eq!(canonical.list_size(), 1);
623
624 let elements = canonical.elements().to_primitive();
625 assert_eq!(elements.as_slice::<i16>(), [42]);
626 }
627
628 #[test]
629 fn test_canonicalize_fixed_size_list_with_null_elements() {
630 let fsl_scalar = Scalar::fixed_size_list(
632 Arc::new(DType::Primitive(PType::I32, Nullability::Nullable)),
633 vec![
634 Scalar::primitive(100i32, Nullability::Nullable),
635 Scalar::null(DType::Primitive(PType::I32, Nullability::Nullable)),
636 Scalar::primitive(200i32, Nullability::Nullable),
637 ],
638 Nullability::NonNullable,
639 );
640
641 let const_array = ConstantArray::new(fsl_scalar, 3).into_array();
642 let canonical = const_array.to_fixed_size_list();
643
644 assert_eq!(canonical.len(), 3);
645 assert_eq!(canonical.list_size(), 3);
646 assert_eq!(canonical.validity(), &Validity::NonNullable);
647
648 let elements = canonical.elements().to_primitive();
650 assert_eq!(elements.as_slice::<i32>()[0], 100);
651 assert_eq!(elements.as_slice::<i32>()[1], 0); assert_eq!(elements.as_slice::<i32>()[2], 200);
653
654 let element_validity = elements.validity();
656 assert!(element_validity.is_valid(0));
657 assert!(!element_validity.is_valid(1));
658 assert!(element_validity.is_valid(2));
659
660 assert!(element_validity.is_valid(3));
662 assert!(!element_validity.is_valid(4));
663 assert!(element_validity.is_valid(5));
664 }
665
666 #[test]
667 fn test_canonicalize_fixed_size_list_large() {
668 let fsl_scalar = Scalar::fixed_size_list(
670 Arc::new(DType::Primitive(PType::U8, Nullability::NonNullable)),
671 vec![
672 Scalar::primitive(1u8, Nullability::NonNullable),
673 Scalar::primitive(2u8, Nullability::NonNullable),
674 Scalar::primitive(3u8, Nullability::NonNullable),
675 Scalar::primitive(4u8, Nullability::NonNullable),
676 Scalar::primitive(5u8, Nullability::NonNullable),
677 ],
678 Nullability::NonNullable,
679 );
680
681 let const_array = ConstantArray::new(fsl_scalar, 1000).into_array();
682 let canonical = const_array.to_fixed_size_list();
683
684 assert_eq!(canonical.len(), 1000);
685 assert_eq!(canonical.list_size(), 5);
686
687 let elements = canonical.elements().to_primitive();
688 assert_eq!(elements.len(), 5000);
689
690 for i in 0..1000 {
692 let base = i * 5;
693 assert_eq!(elements.as_slice::<u8>()[base], 1);
694 assert_eq!(elements.as_slice::<u8>()[base + 1], 2);
695 assert_eq!(elements.as_slice::<u8>()[base + 2], 3);
696 assert_eq!(elements.as_slice::<u8>()[base + 3], 4);
697 assert_eq!(elements.as_slice::<u8>()[base + 4], 5);
698 }
699 }
700}