vortex_array/expr/
expression.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::fmt;
6use std::fmt::{Display, Formatter};
7use std::hash::{Hash, Hasher};
8use std::sync::Arc;
9
10use vortex_dtype::{DType, FieldPath};
11use vortex_error::{VortexExpect, VortexResult};
12
13use crate::ArrayRef;
14use crate::expr::display::DisplayTreeExpr;
15use crate::expr::{ChildName, ExprId, ExprVTable, ExpressionView, StatsCatalog, VTable};
16
17/// A node in a Vortex expression tree.
18///
19/// Expressions represent scalar computations that can be performed on data. Each
20/// expression consists of an encoding (vtable), heap-allocated metadata, and child expressions.
21#[derive(Clone, Debug)]
22pub struct Expression {
23    /// The vtable for this expression.
24    vtable: ExprVTable,
25    /// The instance data for this expression.
26    data: Arc<dyn Any + Send + Sync>,
27    /// Any children of this expression.
28    children: Arc<[Expression]>,
29}
30
31impl Expression {
32    /// Creates a new expression with the given encoding, metadata, and children.
33    ///
34    /// # Errors
35    ///
36    /// Returns an error if the provided `encoding` is not compatible with the
37    /// `metadata` and `children` or the encoding's own validation logic fails.
38    pub fn try_new(
39        vtable: ExprVTable,
40        data: Arc<dyn Any + Send + Sync>,
41        children: Arc<[Expression]>,
42    ) -> VortexResult<Self> {
43        let this = Self {
44            vtable,
45            data,
46            children,
47        };
48        // Validate that the encoding is compatible with the metadata and children.
49        this.vtable.as_dyn().validate(&this)?;
50        Ok(this)
51    }
52
53    /// Creates a new expression with the given encoding, metadata, and children.
54    ///
55    /// # Safety
56    ///
57    /// The caller must ensure that the provided `encoding` is compatible with the
58    /// `metadata` and `children`. Failure to do so may lead to undefined behavior
59    ///  when the expression is used.
60    pub unsafe fn new_unchecked(
61        vtable: ExprVTable,
62        data: Arc<dyn Any + Send + Sync>,
63        children: Arc<[Expression]>,
64    ) -> Self {
65        Self {
66            vtable,
67            data,
68            children,
69        }
70    }
71
72    /// Returns if the expression is an instance of the given vtable.
73    pub fn is<V: VTable>(&self) -> bool {
74        self.vtable.is::<V>()
75    }
76
77    /// Returns a typed view of this expression for the given vtable.
78    ///
79    /// # Panics
80    ///
81    /// Panics if the expression's encoding or metadata cannot be cast to the specified vtable.
82    pub fn as_<V: VTable>(&self) -> ExpressionView<'_, V> {
83        ExpressionView::maybe_new(self).vortex_expect("Failed to downcast expression {} to {}")
84    }
85
86    /// Returns a typed view of this expression for the given vtable, if the types match.
87    pub fn as_opt<V: VTable>(&self) -> Option<ExpressionView<'_, V>> {
88        ExpressionView::maybe_new(self)
89    }
90
91    /// Returns the expression ID.
92    pub fn id(&self) -> ExprId {
93        self.vtable.as_dyn().id()
94    }
95
96    /// Returns the expression's vtable.
97    pub fn vtable(&self) -> &ExprVTable {
98        &self.vtable
99    }
100
101    /// Returns the opaque data of the expression.
102    pub fn data(&self) -> &Arc<dyn Any + Send + Sync> {
103        &self.data
104    }
105
106    /// Returns the children of this expression.
107    pub fn children(&self) -> &Arc<[Expression]> {
108        &self.children
109    }
110
111    /// Returns the n'th child of this expression.
112    pub fn child(&self, n: usize) -> &Expression {
113        &self.children[n]
114    }
115
116    /// Returns the name of the n'th child of this expression.
117    pub fn child_name(&self, n: usize) -> ChildName {
118        self.vtable.as_dyn().child_name(self.data().as_ref(), n)
119    }
120
121    /// Replace the children of this expression with the provided new children.
122    pub fn with_children(mut self, children: impl Into<Arc<[Expression]>>) -> VortexResult<Self> {
123        self.children = children.into();
124        self.vtable.as_dyn().validate(&self)?;
125        Ok(self)
126    }
127
128    /// Returns the serialized metadata for this expression.
129    pub fn serialize_metadata(&self) -> VortexResult<Option<Vec<u8>>> {
130        self.vtable.as_dyn().serialize(self.data.as_ref())
131    }
132
133    /// Computes the return dtype of this expression given the input dtype.
134    pub fn return_dtype(&self, scope: &DType) -> VortexResult<DType> {
135        self.vtable.as_dyn().return_dtype(self, scope)
136    }
137
138    /// Evaluates the expression in the given scope.
139    pub fn evaluate(&self, scope: &ArrayRef) -> VortexResult<ArrayRef> {
140        self.vtable.as_dyn().evaluate(self, scope)
141    }
142
143    /// An expression over zone-statistics which implies all records in the zone evaluate to false.
144    ///
145    /// Given an expression, `e`, if `e.stat_falsification(..)` evaluates to true, it is guaranteed
146    /// that `e` evaluates to false on all records in the zone. However, the inverse is not
147    /// necessarily true: even if the falsification evaluates to false, `e` need not evaluate to
148    /// true on all records.
149    ///
150    /// The [`StatsCatalog`] can be used to constrain or rename stats used in the final expr.
151    ///
152    /// # Examples
153    ///
154    /// - An expression over one variable: `x > 0` is false for all records in a zone if the maximum
155    ///   value of the column `x` in that zone is less than or equal to zero: `max(x) <= 0`.
156    /// - An expression over two variables: `x > y` becomes `max(x) <= min(y)`.
157    /// - A conjunctive expression: `x > y AND z < x` becomes `max(x) <= min(y) OR min(z) >= max(x).
158    ///
159    /// Some expressions, in theory, have falsifications but this function does not support them
160    /// such as `x < (y < z)` or `x LIKE "needle%"`.
161    pub fn stat_falsification(&self, catalog: &mut dyn StatsCatalog) -> Option<Expression> {
162        self.vtable.as_dyn().stat_falsification(self, catalog)
163    }
164
165    /// An expression for the upper non-null bound of this expression, if available.
166    ///
167    /// This function returns None if there is no upper bound, or it is difficult to compute.
168    ///
169    /// The returned expression evaluates to null if the maximum value is unknown. In that case, you
170    /// _must not_ assume the array is empty _nor_ may you assume the array only contains non-null
171    /// values.
172    pub fn stat_max(&self, catalog: &mut dyn StatsCatalog) -> Option<Expression> {
173        self.vtable.as_dyn().stat_max(self, catalog)
174    }
175
176    /// An expression for the lower non-null bound of this expression, if available.
177    ///
178    /// See [`Expression::stat_max`] for important details.
179    pub fn stat_min(&self, catalog: &mut dyn StatsCatalog) -> Option<Expression> {
180        self.vtable.as_dyn().stat_min(self, catalog)
181    }
182
183    /// An expression for the NaN count for a column, if available.
184    ///
185    /// This method returns `None` if the NaNCount stat is unknown.
186    pub fn stat_nan_count(&self, catalog: &mut dyn StatsCatalog) -> Option<Expression> {
187        self.vtable.as_dyn().stat_nan_count(self, catalog)
188    }
189
190    // TODO(ngates): I'm not sure what this is really for? We need to clean up stats compute for
191    //  expressions.
192    pub fn stat_field_path(&self) -> Option<FieldPath> {
193        self.vtable.as_dyn().stat_field_path(self)
194    }
195
196    /// Format the expression as a compact string.
197    ///
198    /// Since this is a recursive formatter, it is exposed on the public Expression type.
199    /// See fmt_data that is only implemented on the vtable trait.
200    pub fn fmt_sql(&self, f: &mut Formatter<'_>) -> fmt::Result {
201        self.vtable.as_dyn().fmt_sql(self, f)
202    }
203
204    /// Format the instance data of the expression as a string for rendering..
205    pub fn fmt_data(&self, f: &mut Formatter<'_>) -> fmt::Result {
206        self.vtable.as_dyn().fmt_data(self.data().as_ref(), f)
207    }
208
209    /// Display the expression as a formatted tree structure.
210    ///
211    /// This provides a hierarchical view of the expression that shows the relationships
212    /// between parent and child expressions, making complex nested expressions easier
213    /// to understand and debug.
214    ///
215    /// # Example
216    ///
217    /// ```rust
218    /// # use vortex_array::compute::LikeOptions;
219    /// # use vortex_array::expr::VTableExt;
220    /// # use vortex_dtype::{DType, Nullability, PType};
221    /// # use vortex_array::expr::{and, cast, eq, get_item, gt, lit, not, root, select, Like};
222    /// // Build a complex nested expression
223    /// let complex_expr = select(
224    ///     ["result"],
225    ///     and(
226    ///         not(eq(get_item("status", root()), lit("inactive"))),
227    ///         and(
228    ///             Like.new_expr(LikeOptions::default(), [get_item("name", root()), lit("%admin%")]),
229    ///             gt(
230    ///                 cast(get_item("score", root()), DType::Primitive(PType::F64, Nullability::NonNullable)),
231    ///                 lit(75.0)
232    ///             )
233    ///         )
234    ///     )
235    /// );
236    ///
237    /// println!("{}", complex_expr.display_tree());
238    /// ```
239    ///
240    /// This produces output like:
241    ///
242    /// ```text
243    /// Select(include): {result}
244    /// └── Binary(and)
245    ///     ├── lhs: Not
246    ///     │   └── Binary(=)
247    ///     │       ├── lhs: GetItem(status)
248    ///     │       │   └── Root
249    ///     │       └── rhs: Literal(value: "inactive", dtype: utf8)
250    ///     └── rhs: Binary(and)
251    ///         ├── lhs: Like
252    ///         │   ├── child: GetItem(name)
253    ///         │   │   └── Root
254    ///         │   └── pattern: Literal(value: "%admin%", dtype: utf8)
255    ///         └── rhs: Binary(>)
256    ///             ├── lhs: Cast(target: f64)
257    ///             │   └── GetItem(score)
258    ///             │       └── Root
259    ///             └── rhs: Literal(value: 75f64, dtype: f64)
260    /// ```
261    pub fn display_tree(&self) -> impl Display {
262        DisplayTreeExpr(self)
263    }
264}
265
266/// The default display implementation for expressions uses the 'SQL'-style format.
267impl Display for Expression {
268    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
269        self.fmt_sql(f)
270    }
271}
272
273impl PartialEq for Expression {
274    fn eq(&self, other: &Self) -> bool {
275        self.vtable.as_dyn().id() == other.vtable.as_dyn().id()
276            && self
277                .vtable
278                .as_dyn()
279                .dyn_eq(self.data.as_ref(), other.data.as_ref())
280            && self.children.eq(&other.children)
281    }
282}
283impl Eq for Expression {}
284
285impl Hash for Expression {
286    fn hash<H: Hasher>(&self, state: &mut H) {
287        self.vtable.as_dyn().id().hash(state);
288        self.vtable.as_dyn().dyn_hash(self.data.as_ref(), state);
289        self.children.hash(state);
290    }
291}