vortex_array/expr/
expression.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::fmt;
6use std::fmt::Debug;
7use std::fmt::Display;
8use std::fmt::Formatter;
9use std::hash::Hash;
10use std::hash::Hasher;
11use std::sync::Arc;
12
13use itertools::Itertools;
14use vortex_dtype::DType;
15use vortex_error::VortexExpect;
16use vortex_error::VortexResult;
17use vortex_vector::Vector;
18use vortex_vector::VectorOps;
19
20use crate::ArrayRef;
21use crate::expr::ChildName;
22use crate::expr::ExecutionArgs;
23use crate::expr::ExprId;
24use crate::expr::ExprVTable;
25use crate::expr::ExpressionView;
26use crate::expr::Root;
27use crate::expr::StatsCatalog;
28use crate::expr::VTable;
29use crate::expr::display::DisplayTreeExpr;
30use crate::expr::stats::Stat;
31
32/// A node in a Vortex expression tree.
33///
34/// Expressions represent scalar computations that can be performed on data. Each
35/// expression consists of an encoding (vtable), heap-allocated metadata, and child expressions.
36#[derive(Clone)]
37pub struct Expression {
38    /// The vtable for this expression.
39    vtable: ExprVTable,
40    /// The instance data for this expression.
41    data: Arc<dyn Any + Send + Sync>,
42    /// Any children of this expression.
43    children: Arc<[Expression]>,
44}
45
46impl Expression {
47    /// Create a new expression from a vtable.
48    pub fn try_new<V: VTable>(
49        vtable: V,
50        data: V::Instance,
51        children: impl Into<Arc<[Expression]>>,
52    ) -> VortexResult<Self> {
53        let vtable = ExprVTable::new::<V>(vtable);
54        let data = Arc::new(data);
55        Self::try_new_erased(vtable, data, children.into())
56    }
57
58    /// Create a new expression from a static vtable.
59    pub fn new_static<V: VTable>(
60        vtable: &'static V,
61        data: V::Instance,
62        children: impl Into<Arc<[Expression]>>,
63    ) -> Self {
64        let vtable = ExprVTable::new_static::<V>(vtable);
65        let data = Arc::new(data);
66        Self {
67            vtable,
68            data,
69            children: children.into(),
70        }
71    }
72
73    /// Creates a new expression with the given encoding, metadata, and children.
74    ///
75    /// # Errors
76    ///
77    /// Returns an error if the provided `encoding` is not compatible with the
78    /// `metadata` and `children` or the encoding's own validation logic fails.
79    pub(super) fn try_new_erased(
80        vtable: ExprVTable,
81        data: Arc<dyn Any + Send + Sync>,
82        children: Arc<[Expression]>,
83    ) -> VortexResult<Self> {
84        let this = Self {
85            vtable,
86            data,
87            children,
88        };
89        // Validate that the encoding is compatible with the metadata and children.
90        this.vtable.as_dyn().validate(&this)?;
91        Ok(this)
92    }
93
94    /// Returns if the expression is an instance of the given vtable.
95    pub fn is<V: VTable>(&self) -> bool {
96        self.vtable.is::<V>()
97    }
98
99    /// Returns a typed view of this expression for the given vtable.
100    ///
101    /// # Panics
102    ///
103    /// Panics if the expression's encoding or metadata cannot be cast to the specified vtable.
104    pub fn as_<V: VTable>(&self) -> ExpressionView<'_, V> {
105        ExpressionView::maybe_new(self).vortex_expect("Failed to downcast expression {} to {}")
106    }
107
108    /// Returns a typed view of this expression for the given vtable, if the types match.
109    pub fn as_opt<V: VTable>(&self) -> Option<ExpressionView<'_, V>> {
110        ExpressionView::maybe_new(self)
111    }
112
113    /// Returns the expression ID.
114    pub fn id(&self) -> ExprId {
115        self.vtable.as_dyn().id()
116    }
117
118    /// Returns the expression's vtable.
119    pub fn vtable(&self) -> &ExprVTable {
120        &self.vtable
121    }
122
123    /// Returns the opaque data of the expression.
124    pub fn data(&self) -> &Arc<dyn Any + Send + Sync> {
125        &self.data
126    }
127
128    /// Returns the children of this expression.
129    pub fn children(&self) -> &Arc<[Expression]> {
130        &self.children
131    }
132
133    /// Returns the n'th child of this expression.
134    pub fn child(&self, n: usize) -> &Expression {
135        &self.children[n]
136    }
137
138    /// Returns the name of the n'th child of this expression.
139    pub fn child_name(&self, n: usize) -> ChildName {
140        self.vtable.as_dyn().child_name(self.data().as_ref(), n)
141    }
142
143    /// Replace the children of this expression with the provided new children.
144    pub fn with_children(mut self, children: impl Into<Arc<[Expression]>>) -> VortexResult<Self> {
145        self.children = children.into();
146        self.vtable.as_dyn().validate(&self)?;
147        Ok(self)
148    }
149
150    /// Returns the serialized metadata for this expression.
151    pub fn serialize_metadata(&self) -> VortexResult<Option<Vec<u8>>> {
152        self.vtable.as_dyn().serialize(self.data.as_ref())
153    }
154
155    /// Computes the return dtype of this expression given the input dtype.
156    pub fn return_dtype(&self, scope: &DType) -> VortexResult<DType> {
157        self.vtable.as_dyn().return_dtype(self, scope)
158    }
159
160    /// Evaluates the expression in the given scope.
161    pub fn evaluate(&self, scope: &ArrayRef) -> VortexResult<ArrayRef> {
162        self.vtable.as_dyn().evaluate(self, scope)
163    }
164
165    /// Executes the expression over the given vector input scope.
166    pub fn execute(&self, vector: &Vector, dtype: &DType) -> VortexResult<Vector> {
167        // We special-case the "root" expression that must extract that scope vector directly.
168        if self.is::<Root>() {
169            return Ok(vector.clone());
170        }
171
172        let return_dtype = self.return_dtype(dtype)?;
173        let child_dtypes: Vec<_> = self
174            .children
175            .iter()
176            .map(|child| child.return_dtype(dtype))
177            .try_collect()?;
178        let child_vectors: Vec<_> = self
179            .children
180            .iter()
181            .map(|child| child.execute(vector, dtype))
182            .try_collect()?;
183
184        let args = ExecutionArgs {
185            vectors: child_vectors,
186            dtypes: child_dtypes,
187            row_count: vector.len(),
188            return_dtype,
189        };
190
191        self.vtable.as_dyn().execute(&self.data, args)
192    }
193
194    /// An expression over zone-statistics which implies all records in the zone evaluate to false.
195    ///
196    /// Given an expression, `e`, if `e.stat_falsification(..)` evaluates to true, it is guaranteed
197    /// that `e` evaluates to false on all records in the zone. However, the inverse is not
198    /// necessarily true: even if the falsification evaluates to false, `e` need not evaluate to
199    /// true on all records.
200    ///
201    /// The [`StatsCatalog`] can be used to constrain or rename stats used in the final expr.
202    ///
203    /// # Examples
204    ///
205    /// - An expression over one variable: `x > 0` is false for all records in a zone if the maximum
206    ///   value of the column `x` in that zone is less than or equal to zero: `max(x) <= 0`.
207    /// - An expression over two variables: `x > y` becomes `max(x) <= min(y)`.
208    /// - A conjunctive expression: `x > y AND z < x` becomes `max(x) <= min(y) OR min(z) >= max(x).
209    ///
210    /// Some expressions, in theory, have falsifications but this function does not support them
211    /// such as `x < (y < z)` or `x LIKE "needle%"`.
212    pub fn stat_falsification(&self, catalog: &dyn StatsCatalog) -> Option<Expression> {
213        self.vtable.as_dyn().stat_falsification(self, catalog)
214    }
215
216    /// Returns an expression representing the zoned statistic for the given stat, if available.
217    ///
218    /// The [`StatsCatalog`] returns expressions that can be evaluated using the zone map as a
219    /// scope. Expressions can implement this function to propagate such statistics through the
220    /// expression tree. For example, the `a + 10` expression could propagate `min: min(a) + 10`.
221    ///
222    /// NOTE(gatesn): we currently cannot represent statistics over nested fields. Please file an
223    /// issue to discuss a solution to this.
224    pub fn stat_expression(&self, stat: Stat, catalog: &dyn StatsCatalog) -> Option<Expression> {
225        self.vtable.as_dyn().stat_expression(self, stat, catalog)
226    }
227
228    /// Returns an expression representing the zoned maximum statistic, if available.
229    ///
230    /// See [`Self::stat_expression`] for details.
231    pub fn stat_min(&self, catalog: &dyn StatsCatalog) -> Option<Expression> {
232        self.stat_expression(Stat::Min, catalog)
233    }
234
235    /// Returns an expression representing the zoned maximum statistic, if available.
236    ///
237    /// See [`Self::stat_expression`] for details.
238    pub fn stat_max(&self, catalog: &dyn StatsCatalog) -> Option<Expression> {
239        self.stat_expression(Stat::Max, catalog)
240    }
241
242    /// Returns whether this expression itself is null-sensitive.
243    /// See [`VTable::is_null_sensitive`].
244    pub fn is_null_sensitive(&self) -> bool {
245        self.vtable.as_dyn().is_null_sensitive(self.data.as_ref())
246    }
247
248    /// Returns whether this expression itself is fallible.
249    /// See [`VTable::is_fallible`].
250    pub fn is_fallible(&self) -> bool {
251        self.vtable.as_dyn().is_fallible(self.data.as_ref())
252    }
253
254    /// Format the expression as a compact string.
255    ///
256    /// Since this is a recursive formatter, it is exposed on the public Expression type.
257    /// See fmt_data that is only implemented on the vtable trait.
258    pub fn fmt_sql(&self, f: &mut Formatter<'_>) -> fmt::Result {
259        self.vtable.as_dyn().fmt_sql(self, f)
260    }
261
262    /// Format the instance data of the expression as a string for rendering..
263    pub fn fmt_data(&self, f: &mut Formatter<'_>) -> fmt::Result {
264        self.vtable.as_dyn().fmt_data(self.data().as_ref(), f)
265    }
266
267    /// Display the expression as a formatted tree structure.
268    ///
269    /// This provides a hierarchical view of the expression that shows the relationships
270    /// between parent and child expressions, making complex nested expressions easier
271    /// to understand and debug.
272    ///
273    /// # Example
274    ///
275    /// ```rust
276    /// # use vortex_array::compute::LikeOptions;
277    /// # use vortex_array::expr::VTableExt;
278    /// # use vortex_dtype::{DType, Nullability, PType};
279    /// # use vortex_array::expr::{and, cast, eq, get_item, gt, lit, not, root, select, Like};
280    /// // Build a complex nested expression
281    /// let complex_expr = select(
282    ///     ["result"],
283    ///     and(
284    ///         not(eq(get_item("status", root()), lit("inactive"))),
285    ///         and(
286    ///             Like.new_expr(LikeOptions::default(), [get_item("name", root()), lit("%admin%")]),
287    ///             gt(
288    ///                 cast(get_item("score", root()), DType::Primitive(PType::F64, Nullability::NonNullable)),
289    ///                 lit(75.0)
290    ///             )
291    ///         )
292    ///     )
293    /// );
294    ///
295    /// println!("{}", complex_expr.display_tree());
296    /// ```
297    ///
298    /// This produces output like:
299    ///
300    /// ```text
301    /// Select(include): {result}
302    /// └── Binary(and)
303    ///     ├── lhs: Not
304    ///     │   └── Binary(=)
305    ///     │       ├── lhs: GetItem(status)
306    ///     │       │   └── Root
307    ///     │       └── rhs: Literal(value: "inactive", dtype: utf8)
308    ///     └── rhs: Binary(and)
309    ///         ├── lhs: Like
310    ///         │   ├── child: GetItem(name)
311    ///         │   │   └── Root
312    ///         │   └── pattern: Literal(value: "%admin%", dtype: utf8)
313    ///         └── rhs: Binary(>)
314    ///             ├── lhs: Cast(target: f64)
315    ///             │   └── GetItem(score)
316    ///             │       └── Root
317    ///             └── rhs: Literal(value: 75f64, dtype: f64)
318    /// ```
319    pub fn display_tree(&self) -> impl Display {
320        DisplayTreeExpr(self)
321    }
322}
323
324/// The default display implementation for expressions uses the 'SQL'-style format.
325impl Display for Expression {
326    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
327        self.fmt_sql(f)
328    }
329}
330
331struct FormatExpressionData<'a> {
332    vtable: &'a ExprVTable,
333    data: &'a Arc<dyn Any + Send + Sync>,
334}
335
336impl<'a> Debug for FormatExpressionData<'a> {
337    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
338        self.vtable.as_dyn().fmt_data(self.data.as_ref(), f)
339    }
340}
341
342impl Debug for Expression {
343    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
344        f.debug_struct("Expression")
345            .field("vtable", &self.vtable)
346            .field(
347                "data",
348                &FormatExpressionData {
349                    vtable: &self.vtable,
350                    data: &self.data,
351                },
352            )
353            .field("children", &self.children)
354            .finish()
355    }
356}
357
358impl PartialEq for Expression {
359    fn eq(&self, other: &Self) -> bool {
360        self.vtable.as_dyn().id() == other.vtable.as_dyn().id()
361            && self
362                .vtable
363                .as_dyn()
364                .dyn_eq(self.data.as_ref(), other.data.as_ref())
365            && self.children.eq(&other.children)
366    }
367}
368impl Eq for Expression {}
369
370impl Hash for Expression {
371    fn hash<H: Hasher>(&self, state: &mut H) {
372        self.vtable.as_dyn().id().hash(state);
373        self.vtable.as_dyn().dyn_hash(self.data.as_ref(), state);
374        self.children.hash(state);
375    }
376}