Skip to main content

vortex_array/
columnar.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::env::VarError;
5use std::sync::LazyLock;
6
7use vortex_dtype::DType;
8use vortex_error::VortexResult;
9use vortex_error::vortex_bail;
10use vortex_error::vortex_panic;
11
12use crate::AnyCanonical;
13use crate::Array;
14use crate::ArrayRef;
15use crate::Canonical;
16use crate::CanonicalView;
17use crate::Executable;
18use crate::ExecutionCtx;
19use crate::IntoArray;
20use crate::arrays::ConstantArray;
21use crate::arrays::ConstantVTable;
22use crate::matcher::Matcher;
23use crate::scalar::Scalar;
24
25/// Represents a columnnar array of data, either in canonical form or as a constant array.
26///
27/// Since the [`Canonical`] enum has one variant per logical data type, it is inefficient for
28/// representing constant arrays. The [`Columnar`] enum allows holding an array of data in either
29/// canonical or constant form enabling efficient handling of constants during execution.
30pub enum Columnar {
31    /// A columnar array in canonical form.
32    Canonical(Canonical),
33    /// A columnar array in constant form.
34    Constant(ConstantArray),
35}
36
37impl Columnar {
38    /// Creates a new columnar array from a scalar.
39    pub fn constant<S: Into<Scalar>>(scalar: S, len: usize) -> Self {
40        Columnar::Constant(ConstantArray::new(scalar.into(), len))
41    }
42
43    /// Returns the length of this columnar array.
44    pub fn len(&self) -> usize {
45        match self {
46            Columnar::Canonical(canonical) => canonical.len(),
47            Columnar::Constant(constant) => constant.len(),
48        }
49    }
50
51    /// Returns true if this columnar array has length zero.
52    pub fn is_empty(&self) -> bool {
53        self.len() == 0
54    }
55
56    /// Returns the data type of this columnar array.
57    pub fn dtype(&self) -> &DType {
58        match self {
59            Columnar::Canonical(canonical) => canonical.dtype(),
60            Columnar::Constant(constant) => constant.dtype(),
61        }
62    }
63}
64
65impl IntoArray for Columnar {
66    fn into_array(self) -> ArrayRef {
67        match self {
68            Columnar::Canonical(canonical) => canonical.into_array(),
69            Columnar::Constant(constant) => constant.into_array(),
70        }
71    }
72}
73
74/// Executing into a [`Columnar`] is implemented by repeatedly executing the array until we
75/// converge on either a constant or canonical.
76///
77/// For safety, we will error when the number of execution iterations reaches 128. We may want this
78/// to be configurable in the future in case of highly complex array trees, but in practice we
79/// don't expect to ever reach this limit.
80impl Executable for Columnar {
81    fn execute(mut array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
82        static MAX_ITERATIONS: LazyLock<usize> =
83            LazyLock::new(|| match std::env::var("VORTEX_MAX_ITERATIONS") {
84                Ok(val) => val.parse::<usize>().unwrap_or_else(|e| {
85                    vortex_panic!("VORTEX_MAX_ITERATIONS is not a valid usize: {e}")
86                }),
87                Err(VarError::NotPresent) => 128,
88                Err(VarError::NotUnicode(_)) => {
89                    vortex_panic!("VORTEX_MAX_ITERATIONS is not a valid unicode string")
90                }
91            });
92
93        for _ in 0..*MAX_ITERATIONS {
94            // Check for termination conditions
95            if let Some(constant) = array.as_opt::<ConstantVTable>() {
96                ctx.log(format_args!("-> constant({})", constant.scalar()));
97                return Ok(Columnar::Constant(constant.clone()));
98            }
99            if let Some(canonical) = array.as_opt::<AnyCanonical>() {
100                ctx.log(format_args!("-> canonical {}", array));
101                return Ok(Columnar::Canonical(canonical.into()));
102            }
103
104            // Otherwise execute the array one step
105            array = array.execute(ctx)?;
106        }
107
108        // If we reach here, we exceeded the maximum number of iterations, so error.
109        vortex_bail!("Exceeded maximum execution iterations while executing to Columnar")
110    }
111}
112
113pub enum ColumnarView<'a> {
114    Canonical(CanonicalView<'a>),
115    Constant(&'a ConstantArray),
116}
117
118impl<'a> AsRef<dyn Array> for ColumnarView<'a> {
119    fn as_ref(&self) -> &dyn Array {
120        match self {
121            ColumnarView::Canonical(canonical) => canonical.as_ref(),
122            ColumnarView::Constant(constant) => constant.as_ref(),
123        }
124    }
125}
126
127pub struct AnyColumnar;
128impl Matcher for AnyColumnar {
129    type Match<'a> = ColumnarView<'a>;
130
131    fn try_match<'a>(array: &'a dyn Array) -> Option<Self::Match<'a>> {
132        if let Some(constant) = array.as_opt::<ConstantVTable>() {
133            Some(ColumnarView::Constant(constant))
134        } else {
135            array.as_opt::<AnyCanonical>().map(ColumnarView::Canonical)
136        }
137    }
138}