vortex_array/expr/expression.rs
1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::fmt;
6use std::fmt::{Display, Formatter};
7use std::hash::{Hash, Hasher};
8use std::sync::Arc;
9
10use vortex_dtype::{DType, FieldPath};
11use vortex_error::{VortexExpect, VortexResult};
12
13use crate::ArrayRef;
14use crate::expr::display::DisplayTreeExpr;
15use crate::expr::{ChildName, ExprId, ExprVTable, ExpressionView, StatsCatalog, VTable};
16
17/// A node in a Vortex expression tree.
18///
19/// Expressions represent scalar computations that can be performed on data. Each
20/// expression consists of an encoding (vtable), heap-allocated metadata, and child expressions.
21#[derive(Clone, Debug)]
22pub struct Expression {
23 /// The vtable for this expression.
24 vtable: ExprVTable,
25 /// The instance data for this expression.
26 data: Arc<dyn Any + Send + Sync>,
27 /// Any children of this expression.
28 children: Arc<[Expression]>,
29}
30
31impl Expression {
32 /// Creates a new expression with the given encoding, metadata, and children.
33 ///
34 /// # Errors
35 ///
36 /// Returns an error if the provided `encoding` is not compatible with the
37 /// `metadata` and `children` or the encoding's own validation logic fails.
38 pub fn try_new(
39 vtable: ExprVTable,
40 data: Arc<dyn Any + Send + Sync>,
41 children: Arc<[Expression]>,
42 ) -> VortexResult<Self> {
43 let this = Self {
44 vtable,
45 data,
46 children,
47 };
48 // Validate that the encoding is compatible with the metadata and children.
49 this.vtable.as_dyn().validate(&this)?;
50 Ok(this)
51 }
52
53 /// Creates a new expression with the given encoding, metadata, and children.
54 ///
55 /// # Safety
56 ///
57 /// The caller must ensure that the provided `encoding` is compatible with the
58 /// `metadata` and `children`. Failure to do so may lead to undefined behavior
59 /// when the expression is used.
60 pub unsafe fn new_unchecked(
61 vtable: ExprVTable,
62 data: Arc<dyn Any + Send + Sync>,
63 children: Arc<[Expression]>,
64 ) -> Self {
65 Self {
66 vtable,
67 data,
68 children,
69 }
70 }
71
72 /// Returns if the expression is an instance of the given vtable.
73 pub fn is<V: VTable>(&self) -> bool {
74 self.vtable.is::<V>()
75 }
76
77 /// Returns a typed view of this expression for the given vtable.
78 ///
79 /// # Panics
80 ///
81 /// Panics if the expression's encoding or metadata cannot be cast to the specified vtable.
82 pub fn as_<V: VTable>(&self) -> ExpressionView<'_, V> {
83 ExpressionView::maybe_new(self).vortex_expect("Failed to downcast expression {} to {}")
84 }
85
86 /// Returns a typed view of this expression for the given vtable, if the types match.
87 pub fn as_opt<V: VTable>(&self) -> Option<ExpressionView<'_, V>> {
88 ExpressionView::maybe_new(self)
89 }
90
91 /// Returns the expression ID.
92 pub fn id(&self) -> ExprId {
93 self.vtable.as_dyn().id()
94 }
95
96 /// Returns the expression's vtable.
97 pub fn vtable(&self) -> &ExprVTable {
98 &self.vtable
99 }
100
101 /// Returns the opaque data of the expression.
102 pub fn data(&self) -> &Arc<dyn Any + Send + Sync> {
103 &self.data
104 }
105
106 /// Returns the children of this expression.
107 pub fn children(&self) -> &Arc<[Expression]> {
108 &self.children
109 }
110
111 /// Returns the n'th child of this expression.
112 pub fn child(&self, n: usize) -> &Expression {
113 &self.children[n]
114 }
115
116 /// Returns the name of the n'th child of this expression.
117 pub fn child_name(&self, n: usize) -> ChildName {
118 self.vtable.as_dyn().child_name(self.data().as_ref(), n)
119 }
120
121 /// Replace the children of this expression with the provided new children.
122 pub fn with_children(mut self, children: impl Into<Arc<[Expression]>>) -> VortexResult<Self> {
123 self.children = children.into();
124 self.vtable.as_dyn().validate(&self)?;
125 Ok(self)
126 }
127
128 /// Returns the serialized metadata for this expression.
129 pub fn serialize_metadata(&self) -> VortexResult<Option<Vec<u8>>> {
130 self.vtable.as_dyn().serialize(self.data.as_ref())
131 }
132
133 /// Computes the return dtype of this expression given the input dtype.
134 pub fn return_dtype(&self, scope: &DType) -> VortexResult<DType> {
135 self.vtable.as_dyn().return_dtype(self, scope)
136 }
137
138 /// Evaluates the expression in the given scope.
139 pub fn evaluate(&self, scope: &ArrayRef) -> VortexResult<ArrayRef> {
140 self.vtable.as_dyn().evaluate(self, scope)
141 }
142
143 /// An expression over zone-statistics which implies all records in the zone evaluate to false.
144 ///
145 /// Given an expression, `e`, if `e.stat_falsification(..)` evaluates to true, it is guaranteed
146 /// that `e` evaluates to false on all records in the zone. However, the inverse is not
147 /// necessarily true: even if the falsification evaluates to false, `e` need not evaluate to
148 /// true on all records.
149 ///
150 /// The [`StatsCatalog`] can be used to constrain or rename stats used in the final expr.
151 ///
152 /// # Examples
153 ///
154 /// - An expression over one variable: `x > 0` is false for all records in a zone if the maximum
155 /// value of the column `x` in that zone is less than or equal to zero: `max(x) <= 0`.
156 /// - An expression over two variables: `x > y` becomes `max(x) <= min(y)`.
157 /// - A conjunctive expression: `x > y AND z < x` becomes `max(x) <= min(y) OR min(z) >= max(x).
158 ///
159 /// Some expressions, in theory, have falsifications but this function does not support them
160 /// such as `x < (y < z)` or `x LIKE "needle%"`.
161 pub fn stat_falsification(&self, catalog: &mut dyn StatsCatalog) -> Option<Expression> {
162 self.vtable.as_dyn().stat_falsification(self, catalog)
163 }
164
165 /// An expression for the upper non-null bound of this expression, if available.
166 ///
167 /// This function returns None if there is no upper bound, or it is difficult to compute.
168 ///
169 /// The returned expression evaluates to null if the maximum value is unknown. In that case, you
170 /// _must not_ assume the array is empty _nor_ may you assume the array only contains non-null
171 /// values.
172 pub fn stat_max(&self, catalog: &mut dyn StatsCatalog) -> Option<Expression> {
173 self.vtable.as_dyn().stat_max(self, catalog)
174 }
175
176 /// An expression for the lower non-null bound of this expression, if available.
177 ///
178 /// See [`Expression::stat_max`] for important details.
179 pub fn stat_min(&self, catalog: &mut dyn StatsCatalog) -> Option<Expression> {
180 self.vtable.as_dyn().stat_min(self, catalog)
181 }
182
183 /// An expression for the NaN count for a column, if available.
184 ///
185 /// This method returns `None` if the NaNCount stat is unknown.
186 pub fn stat_nan_count(&self, catalog: &mut dyn StatsCatalog) -> Option<Expression> {
187 self.vtable.as_dyn().stat_nan_count(self, catalog)
188 }
189
190 // TODO(ngates): I'm not sure what this is really for? We need to clean up stats compute for
191 // expressions.
192 pub fn stat_field_path(&self) -> Option<FieldPath> {
193 self.vtable.as_dyn().stat_field_path(self)
194 }
195
196 /// Format the expression as a compact string.
197 ///
198 /// Since this is a recursive formatter, it is exposed on the public Expression type.
199 /// See fmt_data that is only implemented on the vtable trait.
200 pub fn fmt_sql(&self, f: &mut Formatter<'_>) -> fmt::Result {
201 self.vtable.as_dyn().fmt_sql(self, f)
202 }
203
204 /// Format the instance data of the expression as a string for rendering..
205 pub fn fmt_data(&self, f: &mut Formatter<'_>) -> fmt::Result {
206 self.vtable.as_dyn().fmt_data(self.data().as_ref(), f)
207 }
208
209 /// Display the expression as a formatted tree structure.
210 ///
211 /// This provides a hierarchical view of the expression that shows the relationships
212 /// between parent and child expressions, making complex nested expressions easier
213 /// to understand and debug.
214 ///
215 /// # Example
216 ///
217 /// ```rust
218 /// # use vortex_array::compute::LikeOptions;
219 /// # use vortex_array::expr::VTableExt;
220 /// # use vortex_dtype::{DType, Nullability, PType};
221 /// # use vortex_array::expr::{and, cast, eq, get_item, gt, lit, not, root, select, Like};
222 /// // Build a complex nested expression
223 /// let complex_expr = select(
224 /// ["result"],
225 /// and(
226 /// not(eq(get_item("status", root()), lit("inactive"))),
227 /// and(
228 /// Like.new_expr(LikeOptions::default(), [get_item("name", root()), lit("%admin%")]),
229 /// gt(
230 /// cast(get_item("score", root()), DType::Primitive(PType::F64, Nullability::NonNullable)),
231 /// lit(75.0)
232 /// )
233 /// )
234 /// )
235 /// );
236 ///
237 /// println!("{}", complex_expr.display_tree());
238 /// ```
239 ///
240 /// This produces output like:
241 ///
242 /// ```text
243 /// Select(include): {result}
244 /// └── Binary(and)
245 /// ├── lhs: Not
246 /// │ └── Binary(=)
247 /// │ ├── lhs: GetItem(status)
248 /// │ │ └── Root
249 /// │ └── rhs: Literal(value: "inactive", dtype: utf8)
250 /// └── rhs: Binary(and)
251 /// ├── lhs: Like
252 /// │ ├── child: GetItem(name)
253 /// │ │ └── Root
254 /// │ └── pattern: Literal(value: "%admin%", dtype: utf8)
255 /// └── rhs: Binary(>)
256 /// ├── lhs: Cast(target: f64)
257 /// │ └── GetItem(score)
258 /// │ └── Root
259 /// └── rhs: Literal(value: 75f64, dtype: f64)
260 /// ```
261 pub fn display_tree(&self) -> impl Display {
262 DisplayTreeExpr(self)
263 }
264}
265
266/// The default display implementation for expressions uses the 'SQL'-style format.
267impl Display for Expression {
268 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
269 self.fmt_sql(f)
270 }
271}
272
273impl PartialEq for Expression {
274 fn eq(&self, other: &Self) -> bool {
275 self.vtable.as_dyn().id() == other.vtable.as_dyn().id()
276 && self
277 .vtable
278 .as_dyn()
279 .dyn_eq(self.data.as_ref(), other.data.as_ref())
280 && self.children.eq(&other.children)
281 }
282}
283impl Eq for Expression {}
284
285impl Hash for Expression {
286 fn hash<H: Hasher>(&self, state: &mut H) {
287 self.vtable.as_dyn().id().hash(state);
288 self.vtable.as_dyn().dyn_hash(self.data.as_ref(), state);
289 self.children.hash(state);
290 }
291}