Skip to main content

vortex_array/
validity.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Array validity and nullability behavior, used by arrays and compute functions.
5
6use std::fmt::Debug;
7use std::ops::Range;
8
9use vortex_buffer::BitBuffer;
10use vortex_error::VortexExpect as _;
11use vortex_error::VortexResult;
12use vortex_error::vortex_bail;
13use vortex_error::vortex_err;
14use vortex_error::vortex_panic;
15use vortex_mask::AllOr;
16use vortex_mask::Mask;
17use vortex_mask::MaskValues;
18
19use crate::ArrayRef;
20use crate::Canonical;
21use crate::ExecutionCtx;
22use crate::IntoArray;
23use crate::LEGACY_SESSION;
24use crate::VortexSessionExecute;
25use crate::arrays::BoolArray;
26use crate::arrays::ConstantArray;
27use crate::arrays::scalar_fn::ScalarFnFactoryExt;
28use crate::builtins::ArrayBuiltins;
29use crate::dtype::DType;
30use crate::dtype::Nullability;
31use crate::optimizer::ArrayOptimizer;
32use crate::patches::Patches;
33use crate::scalar::Scalar;
34use crate::scalar_fn::fns::binary::Binary;
35use crate::scalar_fn::fns::operators::Operator;
36
37/// Validity information for an array
38#[derive(Clone)]
39pub enum Validity {
40    /// Items *can't* be null
41    NonNullable,
42    /// All items are valid
43    AllValid,
44    /// All items are null
45    AllInvalid,
46    /// The validity of each position in the array is determined by a boolean array.
47    ///
48    /// True values are valid, false values are invalid ("null").
49    Array(ArrayRef),
50}
51
52impl Debug for Validity {
53    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
54        match self {
55            Self::NonNullable => write!(f, "NonNullable"),
56            Self::AllValid => write!(f, "AllValid"),
57            Self::AllInvalid => write!(f, "AllInvalid"),
58            Self::Array(arr) => write!(f, "SomeValid({})", arr.display_values()),
59        }
60    }
61}
62
63impl Validity {
64    /// Make a step towards canonicalising validity if necessary
65    pub fn execute(self, ctx: &mut ExecutionCtx) -> VortexResult<Validity> {
66        match self {
67            v @ Validity::NonNullable | v @ Validity::AllValid | v @ Validity::AllInvalid => Ok(v),
68            Validity::Array(a) => Ok(Validity::Array(a.execute::<Canonical>(ctx)?.into_array())),
69        }
70    }
71}
72
73impl Validity {
74    /// The [`DType`] of the underlying validity array (if it exists).
75    pub const DTYPE: DType = DType::Bool(Nullability::NonNullable);
76
77    /// Convert the validity to an array representation.
78    pub fn to_array(&self, len: usize) -> ArrayRef {
79        match self {
80            Self::NonNullable | Self::AllValid => ConstantArray::new(true, len).into_array(),
81            Self::AllInvalid => ConstantArray::new(false, len).into_array(),
82            Self::Array(a) => a.clone(),
83        }
84    }
85
86    /// If Validity is [`Validity::Array`], returns the array, otherwise returns `None`.
87    #[inline]
88    pub fn into_array(self) -> Option<ArrayRef> {
89        if let Self::Array(a) = self {
90            Some(a)
91        } else {
92            None
93        }
94    }
95
96    /// If Validity is [`Validity::Array`], returns a reference to the array array, otherwise returns `None`.
97    #[inline]
98    pub fn as_array(&self) -> Option<&ArrayRef> {
99        if let Self::Array(a) = self {
100            Some(a)
101        } else {
102            None
103        }
104    }
105
106    #[inline]
107    pub fn nullability(&self) -> Nullability {
108        if matches!(self, Self::NonNullable) {
109            Nullability::NonNullable
110        } else {
111            Nullability::Nullable
112        }
113    }
114
115    /// The union nullability and validity.
116    #[inline]
117    pub fn union_nullability(self, nullability: Nullability) -> Self {
118        match nullability {
119            Nullability::NonNullable => self,
120            Nullability::Nullable => self.into_nullable(),
121        }
122    }
123
124    /// Returns whether the `index` item is valid.
125    #[inline]
126    pub fn is_valid(&self, index: usize) -> VortexResult<bool> {
127        Ok(match self {
128            Self::NonNullable | Self::AllValid => true,
129            Self::AllInvalid => false,
130            Self::Array(a) => a
131                .execute_scalar(index, &mut LEGACY_SESSION.create_execution_ctx())
132                .vortex_expect("Validity array must support execute_scalar")
133                .as_bool()
134                .value()
135                .vortex_expect("Validity must be non-nullable"),
136        })
137    }
138
139    #[inline]
140    pub fn is_null(&self, index: usize) -> VortexResult<bool> {
141        Ok(!self.is_valid(index)?)
142    }
143
144    #[inline]
145    pub fn slice(&self, range: Range<usize>) -> VortexResult<Self> {
146        match self {
147            Self::Array(a) => Ok(Self::Array(a.slice(range)?)),
148            Self::NonNullable | Self::AllValid | Self::AllInvalid => Ok(self.clone()),
149        }
150    }
151
152    pub fn take(&self, indices: &ArrayRef) -> VortexResult<Self> {
153        match self {
154            Self::NonNullable => {
155                let len = indices.len();
156                let indices_mask = indices
157                    .validity()?
158                    .to_mask(len, &mut LEGACY_SESSION.create_execution_ctx())?;
159                match indices_mask.bit_buffer() {
160                    AllOr::All => {
161                        if indices.dtype().is_nullable() {
162                            Ok(Self::AllValid)
163                        } else {
164                            Ok(Self::NonNullable)
165                        }
166                    }
167                    AllOr::None => Ok(Self::AllInvalid),
168                    AllOr::Some(buf) => Ok(Validity::from(buf.clone())),
169                }
170            }
171            Self::AllValid => {
172                let len = indices.len();
173                let indices_mask = indices
174                    .validity()?
175                    .to_mask(len, &mut LEGACY_SESSION.create_execution_ctx())?;
176                match indices_mask.bit_buffer() {
177                    AllOr::All => Ok(Self::AllValid),
178                    AllOr::None => Ok(Self::AllInvalid),
179                    AllOr::Some(buf) => Ok(Validity::from(buf.clone())),
180                }
181            }
182            Self::AllInvalid => Ok(Self::AllInvalid),
183            Self::Array(is_valid) => {
184                let maybe_is_valid = is_valid.take(indices.clone())?;
185                // Null indices invalidate that position.
186                let is_valid = maybe_is_valid.fill_null(Scalar::from(false))?;
187                Ok(Self::Array(is_valid))
188            }
189        }
190    }
191
192    // Invert the validity
193    pub fn not(&self) -> VortexResult<Self> {
194        match self {
195            Validity::NonNullable => Ok(Validity::NonNullable),
196            Validity::AllValid => Ok(Validity::AllInvalid),
197            Validity::AllInvalid => Ok(Validity::AllValid),
198            Validity::Array(arr) => Ok(Validity::Array(arr.not()?)),
199        }
200    }
201
202    /// Lazily filters a [`Validity`] with a selection mask, which keeps only the entries for which
203    /// the mask is true.
204    ///
205    /// The result has length equal to the number of true values in mask.
206    ///
207    /// If the validity is a [`Validity::Array`], then this lazily wraps it in a `FilterArray`
208    /// instead of eagerly filtering the values immediately.
209    pub fn filter(&self, mask: &Mask) -> VortexResult<Self> {
210        // NOTE(ngates): we take the mask as a reference to avoid the caller cloning unnecessarily
211        //  if we happen to be NonNullable, AllValid, or AllInvalid.
212        match self {
213            v @ (Validity::NonNullable | Validity::AllValid | Validity::AllInvalid) => {
214                Ok(v.clone())
215            }
216            Validity::Array(arr) => Ok(Validity::Array(arr.filter(mask.clone())?)),
217        }
218    }
219
220    /// Converts this validity into a [`Mask`] of the given length.
221    ///
222    /// Valid elements are `true` and invalid elements are `false`.
223    pub fn to_mask(&self, length: usize, ctx: &mut ExecutionCtx) -> VortexResult<Mask> {
224        match self {
225            Self::NonNullable | Self::AllValid => Ok(Mask::new_true(length)),
226            Self::AllInvalid => Ok(Mask::new_false(length)),
227            Self::Array(arr) => arr.clone().execute::<Mask>(ctx),
228        }
229    }
230
231    pub fn execute_mask(&self, length: usize, ctx: &mut ExecutionCtx) -> VortexResult<Mask> {
232        match self {
233            Self::NonNullable | Self::AllValid => Ok(Mask::AllTrue(length)),
234            Self::AllInvalid => Ok(Mask::AllFalse(length)),
235            Self::Array(arr) => {
236                assert_eq!(
237                    arr.len(),
238                    length,
239                    "Validity::Array length must equal to_logical's argument: {}, {}.",
240                    arr.len(),
241                    length,
242                );
243                // TODO(ngates): I'm not sure execution should take arrays by ownership.
244                //  If so we should fix call sites to clone and this function takes self.
245                arr.clone().execute::<Mask>(ctx)
246            }
247        }
248    }
249
250    /// Compare two Validity values of the same length by executing them into masks if necessary.
251    pub fn mask_eq(&self, other: &Validity, ctx: &mut ExecutionCtx) -> VortexResult<bool> {
252        match (self, other) {
253            (Validity::NonNullable, Validity::NonNullable) => Ok(true),
254            (Validity::AllValid, Validity::AllValid) => Ok(true),
255            (Validity::AllInvalid, Validity::AllInvalid) => Ok(true),
256            (Validity::Array(a), Validity::Array(b)) => {
257                let a = a.clone().execute::<Mask>(ctx)?;
258                let b = b.clone().execute::<Mask>(ctx)?;
259                Ok(a == b)
260            }
261            _ => Ok(false),
262        }
263    }
264
265    /// Logically & two Validity values of the same length
266    #[inline]
267    pub fn and(self, rhs: Validity) -> VortexResult<Validity> {
268        Ok(match (self, rhs) {
269            // Should be pretty clear
270            (Validity::NonNullable, Validity::NonNullable) => Validity::NonNullable,
271            // Any `AllInvalid` makes the output all invalid values
272            (Validity::AllInvalid, _) | (_, Validity::AllInvalid) => Validity::AllInvalid,
273            // All truthy values on one side, which makes no effect on an `Array` variant
274            (Validity::Array(a), Validity::AllValid)
275            | (Validity::Array(a), Validity::NonNullable)
276            | (Validity::NonNullable, Validity::Array(a))
277            | (Validity::AllValid, Validity::Array(a)) => Validity::Array(a),
278            // Both sides are all valid
279            (Validity::NonNullable, Validity::AllValid)
280            | (Validity::AllValid, Validity::NonNullable)
281            | (Validity::AllValid, Validity::AllValid) => Validity::AllValid,
282            // Here we actually have to do some work
283            (Validity::Array(lhs), Validity::Array(rhs)) => Validity::Array(
284                Binary
285                    .try_new_array(lhs.len(), Operator::And, [lhs, rhs])?
286                    .optimize()?,
287            ),
288        })
289    }
290
291    pub fn patch(
292        self,
293        len: usize,
294        indices_offset: usize,
295        indices: &ArrayRef,
296        patches: &Validity,
297        ctx: &mut ExecutionCtx,
298    ) -> VortexResult<Self> {
299        match (&self, patches) {
300            (Validity::NonNullable, Validity::NonNullable) => return Ok(Validity::NonNullable),
301            (Validity::NonNullable, _) => {
302                vortex_bail!("Can't patch a non-nullable validity with nullable validity")
303            }
304            (_, Validity::NonNullable) => {
305                vortex_bail!("Can't patch a nullable validity with non-nullable validity")
306            }
307            (Validity::AllValid, Validity::AllValid) => return Ok(Validity::AllValid),
308            (Validity::AllInvalid, Validity::AllInvalid) => return Ok(Validity::AllInvalid),
309            _ => {}
310        };
311
312        let own_nullability = if matches!(self, Validity::NonNullable) {
313            Nullability::NonNullable
314        } else {
315            Nullability::Nullable
316        };
317
318        let source = match self {
319            Validity::NonNullable => BoolArray::from(BitBuffer::new_set(len)),
320            Validity::AllValid => BoolArray::from(BitBuffer::new_set(len)),
321            Validity::AllInvalid => BoolArray::from(BitBuffer::new_unset(len)),
322            Validity::Array(a) => a.execute::<BoolArray>(ctx)?,
323        };
324
325        let patch_values = match patches {
326            Validity::NonNullable => BoolArray::from(BitBuffer::new_set(indices.len())),
327            Validity::AllValid => BoolArray::from(BitBuffer::new_set(indices.len())),
328            Validity::AllInvalid => BoolArray::from(BitBuffer::new_unset(indices.len())),
329            Validity::Array(a) => a.clone().execute::<BoolArray>(ctx)?,
330        };
331
332        let patches = Patches::new(
333            len,
334            indices_offset,
335            indices.clone(),
336            patch_values.into_array(),
337            // TODO(0ax1): chunk offsets
338            None,
339        )?;
340
341        Ok(Self::from_array(
342            source.patch(&patches, ctx)?.into_array(),
343            own_nullability,
344        ))
345    }
346
347    /// Convert into a nullable variant
348    #[inline]
349    pub fn into_nullable(self) -> Validity {
350        match self {
351            Self::NonNullable => Self::AllValid,
352            Self::AllValid | Self::AllInvalid | Self::Array(_) => self,
353        }
354    }
355
356    /// Convert into a non-nullable variant
357    #[inline]
358    pub fn into_non_nullable(self, len: usize) -> Option<Validity> {
359        match self {
360            _ if len == 0 => Some(Validity::NonNullable),
361            Self::NonNullable => Some(Self::NonNullable),
362            Self::AllValid => Some(Self::NonNullable),
363            Self::AllInvalid => None,
364            Self::Array(is_valid) => {
365                is_valid
366                    .statistics()
367                    .compute_min::<bool>(&mut LEGACY_SESSION.create_execution_ctx())
368                    .vortex_expect("validity array must support min")
369                    .then(|| {
370                        // min true => all true
371                        Self::NonNullable
372                    })
373            }
374        }
375    }
376
377    /// Convert into a variant compatible with the given nullability, if possible.
378    #[inline]
379    pub fn cast_nullability(self, nullability: Nullability, len: usize) -> VortexResult<Validity> {
380        match nullability {
381            Nullability::NonNullable => self.into_non_nullable(len).ok_or_else(|| {
382                vortex_err!(InvalidArgument: "Cannot cast array with invalid values to non-nullable type.")
383            }),
384            Nullability::Nullable => Ok(self.into_nullable()),
385        }
386    }
387
388    /// Create Validity by copying the given array's validity.
389    #[inline]
390    pub fn copy_from_array(array: &ArrayRef) -> VortexResult<Self> {
391        let len = array.len();
392        let mask = array
393            .validity()?
394            .to_mask(len, &mut LEGACY_SESSION.create_execution_ctx())?;
395        Ok(Validity::from_mask(mask, array.dtype().nullability()))
396    }
397
398    /// Create Validity from boolean array with given nullability of the array.
399    ///
400    /// Note: You want to pass the nullability of parent array and not the nullability of the validity array itself
401    ///     as that is always nonnullable
402    fn from_array(value: ArrayRef, nullability: Nullability) -> Self {
403        if !matches!(value.dtype(), DType::Bool(Nullability::NonNullable)) {
404            vortex_panic!("Expected a non-nullable boolean array")
405        }
406        match nullability {
407            Nullability::NonNullable => Self::NonNullable,
408            Nullability::Nullable => Self::Array(value),
409        }
410    }
411
412    /// Returns the length of the validity array, if it exists.
413    #[inline]
414    pub fn maybe_len(&self) -> Option<usize> {
415        match self {
416            Self::NonNullable | Self::AllValid | Self::AllInvalid => None,
417            Self::Array(a) => Some(a.len()),
418        }
419    }
420
421    #[inline]
422    pub fn uncompressed_size(&self) -> usize {
423        if let Validity::Array(a) = self {
424            a.len().div_ceil(8)
425        } else {
426            0
427        }
428    }
429}
430
431impl From<BitBuffer> for Validity {
432    #[inline]
433    fn from(value: BitBuffer) -> Self {
434        let true_count = value.true_count();
435        if true_count == value.len() {
436            Self::AllValid
437        } else if true_count == 0 {
438            Self::AllInvalid
439        } else {
440            Self::Array(BoolArray::from(value).into_array())
441        }
442    }
443}
444
445impl FromIterator<Mask> for Validity {
446    #[inline]
447    fn from_iter<T: IntoIterator<Item = Mask>>(iter: T) -> Self {
448        Validity::from_mask(iter.into_iter().collect(), Nullability::Nullable)
449    }
450}
451
452impl FromIterator<bool> for Validity {
453    #[inline]
454    fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
455        Validity::from(BitBuffer::from_iter(iter))
456    }
457}
458
459impl From<Nullability> for Validity {
460    #[inline]
461    fn from(value: Nullability) -> Self {
462        Validity::from(&value)
463    }
464}
465
466impl From<&Nullability> for Validity {
467    #[inline]
468    fn from(value: &Nullability) -> Self {
469        match *value {
470            Nullability::NonNullable => Validity::NonNullable,
471            Nullability::Nullable => Validity::AllValid,
472        }
473    }
474}
475
476impl Validity {
477    pub fn from_bit_buffer(buffer: BitBuffer, nullability: Nullability) -> Self {
478        if buffer.true_count() == buffer.len() {
479            nullability.into()
480        } else if buffer.true_count() == 0 {
481            Validity::AllInvalid
482        } else {
483            Validity::Array(BoolArray::new(buffer, Validity::NonNullable).into_array())
484        }
485    }
486
487    pub fn from_mask(mask: Mask, nullability: Nullability) -> Self {
488        assert!(
489            nullability == Nullability::Nullable || matches!(mask, Mask::AllTrue(_)),
490            "NonNullable validity must be AllValid",
491        );
492        match mask {
493            Mask::AllTrue(_) => match nullability {
494                Nullability::NonNullable => Validity::NonNullable,
495                Nullability::Nullable => Validity::AllValid,
496            },
497            Mask::AllFalse(_) => Validity::AllInvalid,
498            Mask::Values(values) => Validity::Array(values.into_array()),
499        }
500    }
501}
502
503impl IntoArray for Mask {
504    #[inline]
505    fn into_array(self) -> ArrayRef {
506        match self {
507            Self::AllTrue(len) => ConstantArray::new(true, len).into_array(),
508            Self::AllFalse(len) => ConstantArray::new(false, len).into_array(),
509            Self::Values(a) => a.into_array(),
510        }
511    }
512}
513
514impl IntoArray for &MaskValues {
515    #[inline]
516    fn into_array(self) -> ArrayRef {
517        BoolArray::new(self.bit_buffer().clone(), Validity::NonNullable).into_array()
518    }
519}
520
521#[cfg(test)]
522mod tests {
523    use rstest::rstest;
524    use vortex_buffer::Buffer;
525    use vortex_buffer::buffer;
526    use vortex_mask::Mask;
527
528    use crate::ArrayRef;
529    use crate::IntoArray;
530    use crate::LEGACY_SESSION;
531    use crate::VortexSessionExecute;
532    use crate::arrays::PrimitiveArray;
533    use crate::dtype::Nullability;
534    use crate::validity::BoolArray;
535    use crate::validity::Validity;
536
537    #[rstest]
538    #[case(Validity::AllValid, 5, &[2, 4], Validity::AllValid, Validity::AllValid)]
539    #[case(
540        Validity::AllValid,
541        5,
542        &[2, 4],
543        Validity::AllInvalid,
544        Validity::Array(BoolArray::from_iter([true, true, false, true, false]).into_array())
545    )]
546    #[case(
547        Validity::AllValid,
548        5,
549        &[2, 4],
550        Validity::Array(BoolArray::from_iter([true, false]).into_array()),
551        Validity::Array(BoolArray::from_iter([true, true, true, true, false]).into_array())
552    )]
553    #[case(
554        Validity::AllInvalid,
555        5,
556        &[2, 4],
557        Validity::AllValid,
558        Validity::Array(BoolArray::from_iter([false, false, true, false, true]).into_array())
559    )]
560    #[case(Validity::AllInvalid, 5, &[2, 4], Validity::AllInvalid, Validity::AllInvalid)]
561    #[case(
562        Validity::AllInvalid,
563        5,
564        &[2, 4],
565        Validity::Array(BoolArray::from_iter([true, false]).into_array()),
566        Validity::Array(BoolArray::from_iter([false, false, true, false, false]).into_array())
567    )]
568    #[case(
569        Validity::Array(BoolArray::from_iter([false, true, false, true, false]).into_array()),
570        5,
571        &[2, 4],
572        Validity::AllValid,
573        Validity::Array(BoolArray::from_iter([false, true, true, true, true]).into_array())
574    )]
575    #[case(
576        Validity::Array(BoolArray::from_iter([false, true, false, true, false]).into_array()),
577        5,
578        &[2, 4],
579        Validity::AllInvalid,
580        Validity::Array(BoolArray::from_iter([false, true, false, true, false]).into_array())
581    )]
582    #[case(
583        Validity::Array(BoolArray::from_iter([false, true, false, true, false]).into_array()),
584        5,
585        &[2, 4],
586        Validity::Array(BoolArray::from_iter([true, false]).into_array()),
587        Validity::Array(BoolArray::from_iter([false, true, true, true, false]).into_array())
588    )]
589
590    fn patch_validity(
591        #[case] validity: Validity,
592        #[case] len: usize,
593        #[case] positions: &[u64],
594        #[case] patches: Validity,
595        #[case] expected: Validity,
596    ) {
597        let indices =
598            PrimitiveArray::new(Buffer::copy_from(positions), Validity::NonNullable).into_array();
599
600        let mut ctx = LEGACY_SESSION.create_execution_ctx();
601
602        assert!(
603            validity
604                .patch(
605                    len,
606                    0,
607                    &indices,
608                    &patches,
609                    &mut LEGACY_SESSION.create_execution_ctx(),
610                )
611                .unwrap()
612                .mask_eq(&expected, &mut ctx)
613                .unwrap()
614        );
615    }
616
617    #[test]
618    #[should_panic]
619    fn out_of_bounds_patch() {
620        Validity::NonNullable
621            .patch(
622                2,
623                0,
624                &buffer![4].into_array(),
625                &Validity::AllInvalid,
626                &mut LEGACY_SESSION.create_execution_ctx(),
627            )
628            .unwrap();
629    }
630
631    #[test]
632    #[should_panic]
633    fn into_validity_nullable() {
634        Validity::from_mask(Mask::AllFalse(10), Nullability::NonNullable);
635    }
636
637    #[test]
638    #[should_panic]
639    fn into_validity_nullable_array() {
640        Validity::from_mask(Mask::from_iter(vec![true, false]), Nullability::NonNullable);
641    }
642
643    #[rstest]
644    #[case(
645        Validity::AllValid,
646        PrimitiveArray::new(buffer![0, 1], Validity::from_iter(vec![true, false])).into_array(),
647        Validity::from_iter(vec![true, false])
648    )]
649    #[case(Validity::AllValid, buffer![0, 1].into_array(), Validity::AllValid)]
650    #[case(
651        Validity::AllValid,
652        PrimitiveArray::new(buffer![0, 1], Validity::AllInvalid).into_array(),
653        Validity::AllInvalid
654    )]
655    #[case(
656        Validity::NonNullable,
657        PrimitiveArray::new(buffer![0, 1], Validity::from_iter(vec![true, false])).into_array(),
658        Validity::from_iter(vec![true, false])
659    )]
660    #[case(Validity::NonNullable, buffer![0, 1].into_array(), Validity::NonNullable)]
661    #[case(
662        Validity::NonNullable,
663        PrimitiveArray::new(buffer![0, 1], Validity::AllInvalid).into_array(),
664        Validity::AllInvalid
665    )]
666    fn validity_take(
667        #[case] validity: Validity,
668        #[case] indices: ArrayRef,
669        #[case] expected: Validity,
670    ) {
671        let mut ctx = LEGACY_SESSION.create_execution_ctx();
672        assert!(
673            validity
674                .take(&indices)
675                .unwrap()
676                .mask_eq(&expected, &mut ctx)
677                .unwrap()
678        );
679    }
680}