1use std::fmt::Debug;
7use std::ops::Range;
8
9use vortex_buffer::BitBuffer;
10use vortex_error::VortexExpect as _;
11use vortex_error::VortexResult;
12use vortex_error::vortex_bail;
13use vortex_error::vortex_err;
14use vortex_error::vortex_panic;
15use vortex_mask::AllOr;
16use vortex_mask::Mask;
17use vortex_mask::MaskValues;
18
19use crate::ArrayRef;
20use crate::Canonical;
21use crate::ExecutionCtx;
22use crate::IntoArray;
23use crate::LEGACY_SESSION;
24use crate::VortexSessionExecute;
25use crate::arrays::BoolArray;
26use crate::arrays::ConstantArray;
27use crate::arrays::scalar_fn::ScalarFnFactoryExt;
28use crate::builtins::ArrayBuiltins;
29use crate::dtype::DType;
30use crate::dtype::Nullability;
31use crate::optimizer::ArrayOptimizer;
32use crate::patches::Patches;
33use crate::scalar::Scalar;
34use crate::scalar_fn::fns::binary::Binary;
35use crate::scalar_fn::fns::operators::Operator;
36
37#[derive(Clone)]
39pub enum Validity {
40 NonNullable,
42 AllValid,
44 AllInvalid,
46 Array(ArrayRef),
50}
51
52impl Debug for Validity {
53 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
54 match self {
55 Self::NonNullable => write!(f, "NonNullable"),
56 Self::AllValid => write!(f, "AllValid"),
57 Self::AllInvalid => write!(f, "AllInvalid"),
58 Self::Array(arr) => write!(f, "SomeValid({})", arr.display_values()),
59 }
60 }
61}
62
63impl Validity {
64 pub fn execute(self, ctx: &mut ExecutionCtx) -> VortexResult<Validity> {
66 match self {
67 v @ Validity::NonNullable | v @ Validity::AllValid | v @ Validity::AllInvalid => Ok(v),
68 Validity::Array(a) => Ok(Validity::Array(a.execute::<Canonical>(ctx)?.into_array())),
69 }
70 }
71}
72
73impl Validity {
74 pub const DTYPE: DType = DType::Bool(Nullability::NonNullable);
76
77 pub fn to_array(&self, len: usize) -> ArrayRef {
79 match self {
80 Self::NonNullable | Self::AllValid => ConstantArray::new(true, len).into_array(),
81 Self::AllInvalid => ConstantArray::new(false, len).into_array(),
82 Self::Array(a) => a.clone(),
83 }
84 }
85
86 #[inline]
88 pub fn into_array(self) -> Option<ArrayRef> {
89 if let Self::Array(a) = self {
90 Some(a)
91 } else {
92 None
93 }
94 }
95
96 #[inline]
98 pub fn as_array(&self) -> Option<&ArrayRef> {
99 if let Self::Array(a) = self {
100 Some(a)
101 } else {
102 None
103 }
104 }
105
106 #[inline]
107 pub fn nullability(&self) -> Nullability {
108 if matches!(self, Self::NonNullable) {
109 Nullability::NonNullable
110 } else {
111 Nullability::Nullable
112 }
113 }
114
115 #[inline]
117 pub fn union_nullability(self, nullability: Nullability) -> Self {
118 match nullability {
119 Nullability::NonNullable => self,
120 Nullability::Nullable => self.into_nullable(),
121 }
122 }
123
124 #[inline]
126 pub fn is_valid(&self, index: usize) -> VortexResult<bool> {
127 Ok(match self {
128 Self::NonNullable | Self::AllValid => true,
129 Self::AllInvalid => false,
130 Self::Array(a) => a
131 .execute_scalar(index, &mut LEGACY_SESSION.create_execution_ctx())
132 .vortex_expect("Validity array must support execute_scalar")
133 .as_bool()
134 .value()
135 .vortex_expect("Validity must be non-nullable"),
136 })
137 }
138
139 #[inline]
140 pub fn is_null(&self, index: usize) -> VortexResult<bool> {
141 Ok(!self.is_valid(index)?)
142 }
143
144 #[inline]
145 pub fn slice(&self, range: Range<usize>) -> VortexResult<Self> {
146 match self {
147 Self::Array(a) => Ok(Self::Array(a.slice(range)?)),
148 Self::NonNullable | Self::AllValid | Self::AllInvalid => Ok(self.clone()),
149 }
150 }
151
152 pub fn take(&self, indices: &ArrayRef) -> VortexResult<Self> {
153 match self {
154 Self::NonNullable => {
155 let len = indices.len();
156 let indices_mask = indices
157 .validity()?
158 .to_mask(len, &mut LEGACY_SESSION.create_execution_ctx())?;
159 match indices_mask.bit_buffer() {
160 AllOr::All => {
161 if indices.dtype().is_nullable() {
162 Ok(Self::AllValid)
163 } else {
164 Ok(Self::NonNullable)
165 }
166 }
167 AllOr::None => Ok(Self::AllInvalid),
168 AllOr::Some(buf) => Ok(Validity::from(buf.clone())),
169 }
170 }
171 Self::AllValid => {
172 let len = indices.len();
173 let indices_mask = indices
174 .validity()?
175 .to_mask(len, &mut LEGACY_SESSION.create_execution_ctx())?;
176 match indices_mask.bit_buffer() {
177 AllOr::All => Ok(Self::AllValid),
178 AllOr::None => Ok(Self::AllInvalid),
179 AllOr::Some(buf) => Ok(Validity::from(buf.clone())),
180 }
181 }
182 Self::AllInvalid => Ok(Self::AllInvalid),
183 Self::Array(is_valid) => {
184 let maybe_is_valid = is_valid.take(indices.clone())?;
185 let is_valid = maybe_is_valid.fill_null(Scalar::from(false))?;
187 Ok(Self::Array(is_valid))
188 }
189 }
190 }
191
192 pub fn not(&self) -> VortexResult<Self> {
194 match self {
195 Validity::NonNullable => Ok(Validity::NonNullable),
196 Validity::AllValid => Ok(Validity::AllInvalid),
197 Validity::AllInvalid => Ok(Validity::AllValid),
198 Validity::Array(arr) => Ok(Validity::Array(arr.not()?)),
199 }
200 }
201
202 pub fn filter(&self, mask: &Mask) -> VortexResult<Self> {
210 match self {
213 v @ (Validity::NonNullable | Validity::AllValid | Validity::AllInvalid) => {
214 Ok(v.clone())
215 }
216 Validity::Array(arr) => Ok(Validity::Array(arr.filter(mask.clone())?)),
217 }
218 }
219
220 pub fn to_mask(&self, length: usize, ctx: &mut ExecutionCtx) -> VortexResult<Mask> {
224 match self {
225 Self::NonNullable | Self::AllValid => Ok(Mask::new_true(length)),
226 Self::AllInvalid => Ok(Mask::new_false(length)),
227 Self::Array(arr) => arr.clone().execute::<Mask>(ctx),
228 }
229 }
230
231 pub fn execute_mask(&self, length: usize, ctx: &mut ExecutionCtx) -> VortexResult<Mask> {
232 match self {
233 Self::NonNullable | Self::AllValid => Ok(Mask::AllTrue(length)),
234 Self::AllInvalid => Ok(Mask::AllFalse(length)),
235 Self::Array(arr) => {
236 assert_eq!(
237 arr.len(),
238 length,
239 "Validity::Array length must equal to_logical's argument: {}, {}.",
240 arr.len(),
241 length,
242 );
243 arr.clone().execute::<Mask>(ctx)
246 }
247 }
248 }
249
250 pub fn mask_eq(&self, other: &Validity, ctx: &mut ExecutionCtx) -> VortexResult<bool> {
252 match (self, other) {
253 (Validity::NonNullable, Validity::NonNullable) => Ok(true),
254 (Validity::AllValid, Validity::AllValid) => Ok(true),
255 (Validity::AllInvalid, Validity::AllInvalid) => Ok(true),
256 (Validity::Array(a), Validity::Array(b)) => {
257 let a = a.clone().execute::<Mask>(ctx)?;
258 let b = b.clone().execute::<Mask>(ctx)?;
259 Ok(a == b)
260 }
261 _ => Ok(false),
262 }
263 }
264
265 #[inline]
267 pub fn and(self, rhs: Validity) -> VortexResult<Validity> {
268 Ok(match (self, rhs) {
269 (Validity::NonNullable, Validity::NonNullable) => Validity::NonNullable,
271 (Validity::AllInvalid, _) | (_, Validity::AllInvalid) => Validity::AllInvalid,
273 (Validity::Array(a), Validity::AllValid)
275 | (Validity::Array(a), Validity::NonNullable)
276 | (Validity::NonNullable, Validity::Array(a))
277 | (Validity::AllValid, Validity::Array(a)) => Validity::Array(a),
278 (Validity::NonNullable, Validity::AllValid)
280 | (Validity::AllValid, Validity::NonNullable)
281 | (Validity::AllValid, Validity::AllValid) => Validity::AllValid,
282 (Validity::Array(lhs), Validity::Array(rhs)) => Validity::Array(
284 Binary
285 .try_new_array(lhs.len(), Operator::And, [lhs, rhs])?
286 .optimize()?,
287 ),
288 })
289 }
290
291 pub fn patch(
292 self,
293 len: usize,
294 indices_offset: usize,
295 indices: &ArrayRef,
296 patches: &Validity,
297 ctx: &mut ExecutionCtx,
298 ) -> VortexResult<Self> {
299 match (&self, patches) {
300 (Validity::NonNullable, Validity::NonNullable) => return Ok(Validity::NonNullable),
301 (Validity::NonNullable, _) => {
302 vortex_bail!("Can't patch a non-nullable validity with nullable validity")
303 }
304 (_, Validity::NonNullable) => {
305 vortex_bail!("Can't patch a nullable validity with non-nullable validity")
306 }
307 (Validity::AllValid, Validity::AllValid) => return Ok(Validity::AllValid),
308 (Validity::AllInvalid, Validity::AllInvalid) => return Ok(Validity::AllInvalid),
309 _ => {}
310 };
311
312 let own_nullability = if matches!(self, Validity::NonNullable) {
313 Nullability::NonNullable
314 } else {
315 Nullability::Nullable
316 };
317
318 let source = match self {
319 Validity::NonNullable => BoolArray::from(BitBuffer::new_set(len)),
320 Validity::AllValid => BoolArray::from(BitBuffer::new_set(len)),
321 Validity::AllInvalid => BoolArray::from(BitBuffer::new_unset(len)),
322 Validity::Array(a) => a.execute::<BoolArray>(ctx)?,
323 };
324
325 let patch_values = match patches {
326 Validity::NonNullable => BoolArray::from(BitBuffer::new_set(indices.len())),
327 Validity::AllValid => BoolArray::from(BitBuffer::new_set(indices.len())),
328 Validity::AllInvalid => BoolArray::from(BitBuffer::new_unset(indices.len())),
329 Validity::Array(a) => a.clone().execute::<BoolArray>(ctx)?,
330 };
331
332 let patches = Patches::new(
333 len,
334 indices_offset,
335 indices.clone(),
336 patch_values.into_array(),
337 None,
339 )?;
340
341 Ok(Self::from_array(
342 source.patch(&patches, ctx)?.into_array(),
343 own_nullability,
344 ))
345 }
346
347 #[inline]
349 pub fn into_nullable(self) -> Validity {
350 match self {
351 Self::NonNullable => Self::AllValid,
352 Self::AllValid | Self::AllInvalid | Self::Array(_) => self,
353 }
354 }
355
356 #[inline]
358 pub fn into_non_nullable(self, len: usize) -> Option<Validity> {
359 match self {
360 _ if len == 0 => Some(Validity::NonNullable),
361 Self::NonNullable => Some(Self::NonNullable),
362 Self::AllValid => Some(Self::NonNullable),
363 Self::AllInvalid => None,
364 Self::Array(is_valid) => {
365 is_valid
366 .statistics()
367 .compute_min::<bool>(&mut LEGACY_SESSION.create_execution_ctx())
368 .vortex_expect("validity array must support min")
369 .then(|| {
370 Self::NonNullable
372 })
373 }
374 }
375 }
376
377 #[inline]
379 pub fn cast_nullability(self, nullability: Nullability, len: usize) -> VortexResult<Validity> {
380 match nullability {
381 Nullability::NonNullable => self.into_non_nullable(len).ok_or_else(|| {
382 vortex_err!(InvalidArgument: "Cannot cast array with invalid values to non-nullable type.")
383 }),
384 Nullability::Nullable => Ok(self.into_nullable()),
385 }
386 }
387
388 #[inline]
390 pub fn copy_from_array(array: &ArrayRef) -> VortexResult<Self> {
391 let len = array.len();
392 let mask = array
393 .validity()?
394 .to_mask(len, &mut LEGACY_SESSION.create_execution_ctx())?;
395 Ok(Validity::from_mask(mask, array.dtype().nullability()))
396 }
397
398 fn from_array(value: ArrayRef, nullability: Nullability) -> Self {
403 if !matches!(value.dtype(), DType::Bool(Nullability::NonNullable)) {
404 vortex_panic!("Expected a non-nullable boolean array")
405 }
406 match nullability {
407 Nullability::NonNullable => Self::NonNullable,
408 Nullability::Nullable => Self::Array(value),
409 }
410 }
411
412 #[inline]
414 pub fn maybe_len(&self) -> Option<usize> {
415 match self {
416 Self::NonNullable | Self::AllValid | Self::AllInvalid => None,
417 Self::Array(a) => Some(a.len()),
418 }
419 }
420
421 #[inline]
422 pub fn uncompressed_size(&self) -> usize {
423 if let Validity::Array(a) = self {
424 a.len().div_ceil(8)
425 } else {
426 0
427 }
428 }
429}
430
431impl From<BitBuffer> for Validity {
432 #[inline]
433 fn from(value: BitBuffer) -> Self {
434 let true_count = value.true_count();
435 if true_count == value.len() {
436 Self::AllValid
437 } else if true_count == 0 {
438 Self::AllInvalid
439 } else {
440 Self::Array(BoolArray::from(value).into_array())
441 }
442 }
443}
444
445impl FromIterator<Mask> for Validity {
446 #[inline]
447 fn from_iter<T: IntoIterator<Item = Mask>>(iter: T) -> Self {
448 Validity::from_mask(iter.into_iter().collect(), Nullability::Nullable)
449 }
450}
451
452impl FromIterator<bool> for Validity {
453 #[inline]
454 fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
455 Validity::from(BitBuffer::from_iter(iter))
456 }
457}
458
459impl From<Nullability> for Validity {
460 #[inline]
461 fn from(value: Nullability) -> Self {
462 Validity::from(&value)
463 }
464}
465
466impl From<&Nullability> for Validity {
467 #[inline]
468 fn from(value: &Nullability) -> Self {
469 match *value {
470 Nullability::NonNullable => Validity::NonNullable,
471 Nullability::Nullable => Validity::AllValid,
472 }
473 }
474}
475
476impl Validity {
477 pub fn from_bit_buffer(buffer: BitBuffer, nullability: Nullability) -> Self {
478 if buffer.true_count() == buffer.len() {
479 nullability.into()
480 } else if buffer.true_count() == 0 {
481 Validity::AllInvalid
482 } else {
483 Validity::Array(BoolArray::new(buffer, Validity::NonNullable).into_array())
484 }
485 }
486
487 pub fn from_mask(mask: Mask, nullability: Nullability) -> Self {
488 assert!(
489 nullability == Nullability::Nullable || matches!(mask, Mask::AllTrue(_)),
490 "NonNullable validity must be AllValid",
491 );
492 match mask {
493 Mask::AllTrue(_) => match nullability {
494 Nullability::NonNullable => Validity::NonNullable,
495 Nullability::Nullable => Validity::AllValid,
496 },
497 Mask::AllFalse(_) => Validity::AllInvalid,
498 Mask::Values(values) => Validity::Array(values.into_array()),
499 }
500 }
501}
502
503impl IntoArray for Mask {
504 #[inline]
505 fn into_array(self) -> ArrayRef {
506 match self {
507 Self::AllTrue(len) => ConstantArray::new(true, len).into_array(),
508 Self::AllFalse(len) => ConstantArray::new(false, len).into_array(),
509 Self::Values(a) => a.into_array(),
510 }
511 }
512}
513
514impl IntoArray for &MaskValues {
515 #[inline]
516 fn into_array(self) -> ArrayRef {
517 BoolArray::new(self.bit_buffer().clone(), Validity::NonNullable).into_array()
518 }
519}
520
521#[cfg(test)]
522mod tests {
523 use rstest::rstest;
524 use vortex_buffer::Buffer;
525 use vortex_buffer::buffer;
526 use vortex_mask::Mask;
527
528 use crate::ArrayRef;
529 use crate::IntoArray;
530 use crate::LEGACY_SESSION;
531 use crate::VortexSessionExecute;
532 use crate::arrays::PrimitiveArray;
533 use crate::dtype::Nullability;
534 use crate::validity::BoolArray;
535 use crate::validity::Validity;
536
537 #[rstest]
538 #[case(Validity::AllValid, 5, &[2, 4], Validity::AllValid, Validity::AllValid)]
539 #[case(
540 Validity::AllValid,
541 5,
542 &[2, 4],
543 Validity::AllInvalid,
544 Validity::Array(BoolArray::from_iter([true, true, false, true, false]).into_array())
545 )]
546 #[case(
547 Validity::AllValid,
548 5,
549 &[2, 4],
550 Validity::Array(BoolArray::from_iter([true, false]).into_array()),
551 Validity::Array(BoolArray::from_iter([true, true, true, true, false]).into_array())
552 )]
553 #[case(
554 Validity::AllInvalid,
555 5,
556 &[2, 4],
557 Validity::AllValid,
558 Validity::Array(BoolArray::from_iter([false, false, true, false, true]).into_array())
559 )]
560 #[case(Validity::AllInvalid, 5, &[2, 4], Validity::AllInvalid, Validity::AllInvalid)]
561 #[case(
562 Validity::AllInvalid,
563 5,
564 &[2, 4],
565 Validity::Array(BoolArray::from_iter([true, false]).into_array()),
566 Validity::Array(BoolArray::from_iter([false, false, true, false, false]).into_array())
567 )]
568 #[case(
569 Validity::Array(BoolArray::from_iter([false, true, false, true, false]).into_array()),
570 5,
571 &[2, 4],
572 Validity::AllValid,
573 Validity::Array(BoolArray::from_iter([false, true, true, true, true]).into_array())
574 )]
575 #[case(
576 Validity::Array(BoolArray::from_iter([false, true, false, true, false]).into_array()),
577 5,
578 &[2, 4],
579 Validity::AllInvalid,
580 Validity::Array(BoolArray::from_iter([false, true, false, true, false]).into_array())
581 )]
582 #[case(
583 Validity::Array(BoolArray::from_iter([false, true, false, true, false]).into_array()),
584 5,
585 &[2, 4],
586 Validity::Array(BoolArray::from_iter([true, false]).into_array()),
587 Validity::Array(BoolArray::from_iter([false, true, true, true, false]).into_array())
588 )]
589
590 fn patch_validity(
591 #[case] validity: Validity,
592 #[case] len: usize,
593 #[case] positions: &[u64],
594 #[case] patches: Validity,
595 #[case] expected: Validity,
596 ) {
597 let indices =
598 PrimitiveArray::new(Buffer::copy_from(positions), Validity::NonNullable).into_array();
599
600 let mut ctx = LEGACY_SESSION.create_execution_ctx();
601
602 assert!(
603 validity
604 .patch(
605 len,
606 0,
607 &indices,
608 &patches,
609 &mut LEGACY_SESSION.create_execution_ctx(),
610 )
611 .unwrap()
612 .mask_eq(&expected, &mut ctx)
613 .unwrap()
614 );
615 }
616
617 #[test]
618 #[should_panic]
619 fn out_of_bounds_patch() {
620 Validity::NonNullable
621 .patch(
622 2,
623 0,
624 &buffer![4].into_array(),
625 &Validity::AllInvalid,
626 &mut LEGACY_SESSION.create_execution_ctx(),
627 )
628 .unwrap();
629 }
630
631 #[test]
632 #[should_panic]
633 fn into_validity_nullable() {
634 Validity::from_mask(Mask::AllFalse(10), Nullability::NonNullable);
635 }
636
637 #[test]
638 #[should_panic]
639 fn into_validity_nullable_array() {
640 Validity::from_mask(Mask::from_iter(vec![true, false]), Nullability::NonNullable);
641 }
642
643 #[rstest]
644 #[case(
645 Validity::AllValid,
646 PrimitiveArray::new(buffer![0, 1], Validity::from_iter(vec![true, false])).into_array(),
647 Validity::from_iter(vec![true, false])
648 )]
649 #[case(Validity::AllValid, buffer![0, 1].into_array(), Validity::AllValid)]
650 #[case(
651 Validity::AllValid,
652 PrimitiveArray::new(buffer![0, 1], Validity::AllInvalid).into_array(),
653 Validity::AllInvalid
654 )]
655 #[case(
656 Validity::NonNullable,
657 PrimitiveArray::new(buffer![0, 1], Validity::from_iter(vec![true, false])).into_array(),
658 Validity::from_iter(vec![true, false])
659 )]
660 #[case(Validity::NonNullable, buffer![0, 1].into_array(), Validity::NonNullable)]
661 #[case(
662 Validity::NonNullable,
663 PrimitiveArray::new(buffer![0, 1], Validity::AllInvalid).into_array(),
664 Validity::AllInvalid
665 )]
666 fn validity_take(
667 #[case] validity: Validity,
668 #[case] indices: ArrayRef,
669 #[case] expected: Validity,
670 ) {
671 let mut ctx = LEGACY_SESSION.create_execution_ctx();
672 assert!(
673 validity
674 .take(&indices)
675 .unwrap()
676 .mask_eq(&expected, &mut ctx)
677 .unwrap()
678 );
679 }
680}