Skip to main content

vortex_array/
mask.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use vortex_error::VortexResult;
5use vortex_error::vortex_bail;
6use vortex_mask::Mask;
7
8use crate::ArrayRef;
9use crate::Executable;
10use crate::ExecutionCtx;
11use crate::IntoArray;
12use crate::arrays::BoolArray;
13use crate::columnar::Columnar;
14use crate::dtype::DType;
15use crate::dtype::Nullability;
16use crate::validity::Validity;
17
18impl Executable for Mask {
19    /// Executes a boolean array into a [`Mask`].
20    ///
21    /// The array must have a non-nullable boolean dtype. To execute a nullable boolean array,
22    /// coercing null elements to `false`, first call
23    /// [`ArrayRef::fill_null(false)`](crate::builtins::ArrayBuiltins::fill_null).
24    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
25        if !matches!(array.dtype(), DType::Bool(Nullability::NonNullable)) {
26            vortex_bail!(
27                "Mask array must have boolean(NonNullable) dtype, not {}",
28                array.dtype()
29            );
30        }
31
32        let array_len = array.len();
33        Ok(match array.execute(ctx)? {
34            Columnar::Constant(s) => {
35                Mask::new(array_len, s.scalar().as_bool().value().unwrap_or(false))
36            }
37            Columnar::Canonical(a) => {
38                let bool = a.into_array().execute::<BoolArray>(ctx)?;
39                Mask::from(bool.into_bit_buffer())
40            }
41        })
42    }
43}
44
45/// An adapter that coerces null elements of a boolean array to `false` before executing it into a
46/// [`Mask`]. Created by [`ArrayRef::null_as_false`].
47///
48/// Use for filter and pruning predicates over nullable data, where SQL semantics treat `NULL` as
49/// not matching.
50///
51/// Prefer `array.null_as_false().execute(ctx)` over `array.fill_null(false)?.execute::<Mask>(ctx)`:
52/// `fill_null` on a lazy `ScalarFn` array (e.g. the result of `apply(<predicate>)`) is currently
53/// slow because its `validity()` executes the predicate expression.
54pub struct NullAsFalse(ArrayRef);
55
56impl ArrayRef {
57    /// Returns an adapter that treats null elements of this boolean array as `false` when executed
58    /// into a [`Mask`]. See [`NullAsFalse`].
59    pub fn null_as_false(self) -> NullAsFalse {
60        NullAsFalse(self)
61    }
62}
63
64impl NullAsFalse {
65    /// Executes the boolean array into a [`Mask`], coercing null elements to `false`.
66    ///
67    /// Canonicalizes the (possibly lazy) array exactly once and folds validity into the value bits
68    /// with a single `AND` that reuses the value buffer when it is uniquely owned.
69    pub fn execute(self, ctx: &mut ExecutionCtx) -> VortexResult<Mask> {
70        let array = self.0;
71        if !matches!(array.dtype(), DType::Bool(_)) {
72            vortex_bail!("Mask array must have boolean dtype, not {}", array.dtype());
73        }
74        // Non-nullable input needs no coercion; defer to the strict `Mask` execution.
75        if !array.dtype().is_nullable() {
76            return array.execute::<Mask>(ctx);
77        }
78
79        let len = array.len();
80        Ok(match array.execute::<Columnar>(ctx)? {
81            Columnar::Constant(c) => Mask::new(len, c.scalar().as_bool().value().unwrap_or(false)),
82            Columnar::Canonical(c) => {
83                let bool = c.into_array().execute::<BoolArray>(ctx)?;
84                match bool.as_ref().validity()? {
85                    Validity::NonNullable | Validity::AllValid => {
86                        Mask::from_buffer(bool.into_bit_buffer())
87                    }
88                    Validity::AllInvalid => Mask::new_false(len),
89                    Validity::Array(v) => {
90                        let validity_bits = v.execute::<BoolArray>(ctx)?.into_bit_buffer();
91                        Mask::from_buffer(bool.into_bit_buffer() & &validity_bits)
92                    }
93                }
94            }
95        })
96    }
97}
98
99#[cfg(test)]
100mod tests {
101    use vortex_error::VortexResult;
102    use vortex_mask::Mask;
103
104    use crate::ExecutionCtx;
105    use crate::IntoArray;
106    use crate::VortexSessionExecute;
107    use crate::array_session;
108    use crate::arrays::BoolArray;
109    use crate::arrays::ConstantArray;
110    use crate::builtins::ArrayBuiltins;
111    use crate::dtype::DType;
112    use crate::dtype::Nullability;
113    use crate::scalar::Scalar;
114
115    fn ctx() -> ExecutionCtx {
116        array_session().create_execution_ctx()
117    }
118
119    #[test]
120    fn mask_non_nullable() -> VortexResult<()> {
121        let array = BoolArray::from_iter([true, false, true]).into_array();
122        let mask = array.execute::<Mask>(&mut ctx())?;
123        assert_eq!(mask, Mask::from_iter([true, false, true]));
124        Ok(())
125    }
126
127    #[test]
128    fn mask_rejects_nullable() {
129        let array = BoolArray::from_iter([Some(true), None]).into_array();
130        assert!(array.execute::<Mask>(&mut ctx()).is_err());
131    }
132
133    #[test]
134    fn fill_null_then_mask_coerces_nulls() -> VortexResult<()> {
135        let array = BoolArray::from_iter([Some(true), None, Some(false), None]).into_array();
136        let mask = array.fill_null(false)?.execute::<Mask>(&mut ctx())?;
137        assert_eq!(mask, Mask::from_iter([true, false, false, false]));
138        Ok(())
139    }
140
141    #[test]
142    fn fill_null_then_mask_null_constant() -> VortexResult<()> {
143        let array =
144            ConstantArray::new(Scalar::null(DType::Bool(Nullability::Nullable)), 4).into_array();
145        let mask = array.fill_null(false)?.execute::<Mask>(&mut ctx())?;
146        assert_eq!(mask, Mask::new_false(4));
147        Ok(())
148    }
149
150    #[test]
151    fn null_as_false_non_nullable() -> VortexResult<()> {
152        let array = BoolArray::from_iter([true, false, true]).into_array();
153        let mask = array.null_as_false().execute(&mut ctx())?;
154        assert_eq!(mask, Mask::from_iter([true, false, true]));
155        Ok(())
156    }
157
158    #[test]
159    fn null_as_false_treats_null_as_false() -> VortexResult<()> {
160        let array = BoolArray::from_iter([Some(true), None, Some(false), None]).into_array();
161        let mask = array.null_as_false().execute(&mut ctx())?;
162        assert_eq!(mask, Mask::from_iter([true, false, false, false]));
163        Ok(())
164    }
165
166    #[test]
167    fn null_as_false_null_constant() -> VortexResult<()> {
168        let array =
169            ConstantArray::new(Scalar::null(DType::Bool(Nullability::Nullable)), 4).into_array();
170        let mask = array.null_as_false().execute(&mut ctx())?;
171        assert_eq!(mask, Mask::new_false(4));
172        Ok(())
173    }
174
175    #[test]
176    fn null_as_false_matches_fill_null_then_mask() -> VortexResult<()> {
177        let array =
178            BoolArray::from_iter([Some(true), None, Some(false), Some(true), None]).into_array();
179        let via_fill_null = array.fill_null(false)?.execute::<Mask>(&mut ctx())?;
180        let via_coerce = array.null_as_false().execute(&mut ctx())?;
181        assert_eq!(via_coerce, via_fill_null);
182        Ok(())
183    }
184}