Skip to main content

vortex_array/scalar_fn/internal/
row_count.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Formatter;
5
6use vortex_array::ArrayRef;
7use vortex_array::ExecutionCtx;
8use vortex_array::arrays::ScalarFn;
9use vortex_array::arrays::scalar_fn::ExactScalarFn;
10use vortex_array::arrays::scalar_fn::ScalarFnArrayExt;
11use vortex_array::dtype::DType;
12use vortex_array::dtype::Nullability;
13use vortex_array::dtype::PType;
14use vortex_array::expr::Expression;
15use vortex_array::scalar_fn::Arity;
16use vortex_array::scalar_fn::ChildName;
17use vortex_array::scalar_fn::EmptyOptions;
18use vortex_array::scalar_fn::ExecutionArgs;
19use vortex_array::scalar_fn::ScalarFnId;
20use vortex_array::scalar_fn::ScalarFnVTable;
21use vortex_error::VortexResult;
22use vortex_error::vortex_bail;
23use vortex_error::vortex_ensure;
24
25/// Zero-argument placeholder for the row count of the current evaluation scope.
26///
27/// This expression *MUST* be replaced with a concrete array before evaluation.
28/// Currently, the rewrite only happens in the context of stats pruning.
29///
30/// `RowCount` is emitted while building pruning predicates that need a
31/// scope-level value which is not stored as a regular stats column, such as the
32/// row count of the current file or zone. The layer that owns that scope must
33/// replace each placeholder with a concrete array via [`substitute_row_count`]
34/// before evaluation.
35///
36/// Calling [`ScalarFnVTable::execute`] directly returns an error because this
37/// node is only a marker in a lazy expression tree.
38#[derive(Clone)]
39pub struct RowCount;
40
41impl ScalarFnVTable for RowCount {
42    type Options = EmptyOptions;
43
44    fn id(&self) -> ScalarFnId {
45        ScalarFnId::from("vortex.row_count")
46    }
47
48    fn arity(&self, _options: &Self::Options) -> Arity {
49        Arity::Exact(0)
50    }
51
52    fn child_name(&self, _options: &Self::Options, _child_idx: usize) -> ChildName {
53        unreachable!("RowCount has arity 0")
54    }
55
56    fn fmt_sql(
57        &self,
58        _options: &Self::Options,
59        _expr: &Expression,
60        f: &mut Formatter<'_>,
61    ) -> std::fmt::Result {
62        write!(f, "row_count()")
63    }
64
65    fn return_dtype(&self, _options: &Self::Options, _args: &[DType]) -> VortexResult<DType> {
66        Ok(DType::Primitive(PType::U64, Nullability::NonNullable))
67    }
68
69    fn execute(
70        &self,
71        _options: &Self::Options,
72        _args: &dyn ExecutionArgs,
73        _ctx: &mut ExecutionCtx,
74    ) -> VortexResult<ArrayRef> {
75        vortex_bail!("RowCount must be substituted before evaluation")
76    }
77
78    fn is_null_sensitive(&self, _options: &Self::Options) -> bool {
79        false
80    }
81
82    fn is_fallible(&self, _options: &Self::Options) -> bool {
83        false
84    }
85}
86
87/// Returns whether `array` contains a [`RowCount`] placeholder.
88///
89/// Traversal is limited to lazy [`ScalarFnArray`] nodes produced by
90/// [`ArrayRef::apply`][crate::ArrayRef::apply]. Other arrays are evaluation
91/// leaves and cannot contain unevaluated placeholders.
92///
93/// [`ScalarFnArray`]: vortex_array::arrays::ScalarFnArray
94pub fn contains_row_count(array: &ArrayRef) -> bool {
95    if array.is::<ExactScalarFn<RowCount>>() {
96        return true;
97    }
98    match array.as_opt::<ScalarFn>() {
99        Some(view) => view.iter_children().any(contains_row_count),
100        None => false,
101    }
102}
103
104/// Replaces every [`RowCount`] placeholder with `replacement`.
105///
106/// The replacement must have the same dtype and length as each placeholder.
107/// Lazy [`ScalarFnArray`] ancestors are rewritten through slot take/put so
108/// unaffected children are preserved, while non-[`ScalarFn`] arrays are returned
109/// unchanged.
110///
111/// [`ScalarFnArray`]: vortex_array::arrays::ScalarFnArray
112pub fn substitute_row_count(array: ArrayRef, replacement: &ArrayRef) -> VortexResult<ArrayRef> {
113    if array.is::<ExactScalarFn<RowCount>>() {
114        vortex_ensure!(
115            replacement.len() == array.len(),
116            "RowCount replacement length {} does not match scope length {}",
117            replacement.len(),
118            array.len(),
119        );
120        vortex_ensure!(
121            replacement.dtype() == array.dtype(),
122            "RowCount replacement dtype {} does not match scope dtype {}",
123            replacement.dtype(),
124            array.dtype(),
125        );
126        return Ok(replacement.clone());
127    }
128
129    if !array.is::<ScalarFn>() {
130        return Ok(array);
131    }
132
133    let nchildren = array.nchildren();
134    let mut array = array;
135    for slot_idx in 0..nchildren {
136        // SAFETY: `substitute_row_count` always returns an array with the same dtype and
137        // length as its input — `RowCount` placeholders are replaced with a checked
138        // replacement (same dtype and length), and `ScalarFn` recursion preserves both by
139        // operating on each slot in place.
140        let (taken, child) = unsafe { array.take_slot_unchecked(slot_idx)? };
141        let new_child = substitute_row_count(child, replacement)?;
142        array = unsafe { taken.put_slot_unchecked(slot_idx, new_child)? };
143    }
144    Ok(array)
145}
146
147#[cfg(test)]
148mod tests {
149    use vortex_array::dtype::DType;
150    use vortex_array::dtype::Nullability;
151    use vortex_array::dtype::PType;
152
153    use crate::scalar_fn::EmptyOptions;
154    use crate::scalar_fn::internal::row_count::RowCount;
155    use crate::scalar_fn::vtable::ScalarFnVTableExt;
156
157    #[test]
158    fn row_count_helper_dtype() {
159        let expr = RowCount.new_expr(EmptyOptions, []);
160        assert_eq!(
161            expr.return_dtype(&DType::Primitive(PType::I32, Nullability::Nullable))
162                .unwrap(),
163            DType::Primitive(PType::U64, Nullability::NonNullable),
164        );
165    }
166}