vortex_array/expr/expression.rs
1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::fmt;
6use std::fmt::Debug;
7use std::fmt::Display;
8use std::fmt::Formatter;
9use std::hash::Hash;
10use std::hash::Hasher;
11use std::sync::Arc;
12
13use itertools::Itertools;
14use vortex_dtype::DType;
15use vortex_error::VortexExpect;
16use vortex_error::VortexResult;
17use vortex_vector::Vector;
18use vortex_vector::VectorOps;
19
20use crate::ArrayRef;
21use crate::expr::ChildName;
22use crate::expr::ExecutionArgs;
23use crate::expr::ExprId;
24use crate::expr::ExprVTable;
25use crate::expr::ExpressionView;
26use crate::expr::Root;
27use crate::expr::StatsCatalog;
28use crate::expr::VTable;
29use crate::expr::display::DisplayTreeExpr;
30use crate::expr::stats::Stat;
31
32/// A node in a Vortex expression tree.
33///
34/// Expressions represent scalar computations that can be performed on data. Each
35/// expression consists of an encoding (vtable), heap-allocated metadata, and child expressions.
36#[derive(Clone)]
37pub struct Expression {
38 /// The vtable for this expression.
39 vtable: ExprVTable,
40 /// The instance data for this expression.
41 data: Arc<dyn Any + Send + Sync>,
42 /// Any children of this expression.
43 children: Arc<[Expression]>,
44}
45
46impl Expression {
47 /// Create a new expression from a vtable.
48 pub fn try_new<V: VTable>(
49 vtable: V,
50 data: V::Instance,
51 children: impl Into<Arc<[Expression]>>,
52 ) -> VortexResult<Self> {
53 let vtable = ExprVTable::new::<V>(vtable);
54 let data = Arc::new(data);
55 Self::try_new_erased(vtable, data, children.into())
56 }
57
58 /// Create a new expression from a static vtable.
59 pub fn new_static<V: VTable>(
60 vtable: &'static V,
61 data: V::Instance,
62 children: impl Into<Arc<[Expression]>>,
63 ) -> Self {
64 let vtable = ExprVTable::new_static::<V>(vtable);
65 let data = Arc::new(data);
66 Self {
67 vtable,
68 data,
69 children: children.into(),
70 }
71 }
72
73 /// Creates a new expression with the given encoding, metadata, and children.
74 ///
75 /// # Errors
76 ///
77 /// Returns an error if the provided `encoding` is not compatible with the
78 /// `metadata` and `children` or the encoding's own validation logic fails.
79 pub(super) fn try_new_erased(
80 vtable: ExprVTable,
81 data: Arc<dyn Any + Send + Sync>,
82 children: Arc<[Expression]>,
83 ) -> VortexResult<Self> {
84 let this = Self {
85 vtable,
86 data,
87 children,
88 };
89 // Validate that the encoding is compatible with the metadata and children.
90 this.vtable.as_dyn().validate(&this)?;
91 Ok(this)
92 }
93
94 /// Returns if the expression is an instance of the given vtable.
95 pub fn is<V: VTable>(&self) -> bool {
96 self.vtable.is::<V>()
97 }
98
99 /// Returns a typed view of this expression for the given vtable.
100 ///
101 /// # Panics
102 ///
103 /// Panics if the expression's encoding or metadata cannot be cast to the specified vtable.
104 pub fn as_<V: VTable>(&self) -> ExpressionView<'_, V> {
105 ExpressionView::maybe_new(self).vortex_expect("Failed to downcast expression {} to {}")
106 }
107
108 /// Returns a typed view of this expression for the given vtable, if the types match.
109 pub fn as_opt<V: VTable>(&self) -> Option<ExpressionView<'_, V>> {
110 ExpressionView::maybe_new(self)
111 }
112
113 /// Returns the expression ID.
114 pub fn id(&self) -> ExprId {
115 self.vtable.as_dyn().id()
116 }
117
118 /// Returns the expression's vtable.
119 pub fn vtable(&self) -> &ExprVTable {
120 &self.vtable
121 }
122
123 /// Returns the opaque data of the expression.
124 pub fn data(&self) -> &Arc<dyn Any + Send + Sync> {
125 &self.data
126 }
127
128 /// Returns the children of this expression.
129 pub fn children(&self) -> &Arc<[Expression]> {
130 &self.children
131 }
132
133 /// Returns the n'th child of this expression.
134 pub fn child(&self, n: usize) -> &Expression {
135 &self.children[n]
136 }
137
138 /// Returns the name of the n'th child of this expression.
139 pub fn child_name(&self, n: usize) -> ChildName {
140 self.vtable.as_dyn().child_name(self.data().as_ref(), n)
141 }
142
143 /// Replace the children of this expression with the provided new children.
144 pub fn with_children(mut self, children: impl Into<Arc<[Expression]>>) -> VortexResult<Self> {
145 self.children = children.into();
146 self.vtable.as_dyn().validate(&self)?;
147 Ok(self)
148 }
149
150 /// Returns the serialized metadata for this expression.
151 pub fn serialize_metadata(&self) -> VortexResult<Option<Vec<u8>>> {
152 self.vtable.as_dyn().serialize(self.data.as_ref())
153 }
154
155 /// Computes the return dtype of this expression given the input dtype.
156 pub fn return_dtype(&self, scope: &DType) -> VortexResult<DType> {
157 self.vtable.as_dyn().return_dtype(self, scope)
158 }
159
160 /// Evaluates the expression in the given scope.
161 pub fn evaluate(&self, scope: &ArrayRef) -> VortexResult<ArrayRef> {
162 self.vtable.as_dyn().evaluate(self, scope)
163 }
164
165 /// Executes the expression over the given vector input scope.
166 pub fn execute(&self, vector: &Vector, dtype: &DType) -> VortexResult<Vector> {
167 // We special-case the "root" expression that must extract that scope vector directly.
168 if self.is::<Root>() {
169 return Ok(vector.clone());
170 }
171
172 let return_dtype = self.return_dtype(dtype)?;
173 let child_dtypes: Vec<_> = self
174 .children
175 .iter()
176 .map(|child| child.return_dtype(dtype))
177 .try_collect()?;
178 let child_vectors: Vec<_> = self
179 .children
180 .iter()
181 .map(|child| child.execute(vector, dtype))
182 .try_collect()?;
183
184 let args = ExecutionArgs {
185 vectors: child_vectors,
186 dtypes: child_dtypes,
187 row_count: vector.len(),
188 return_dtype,
189 };
190
191 self.vtable.as_dyn().execute(&self.data, args)
192 }
193
194 /// An expression over zone-statistics which implies all records in the zone evaluate to false.
195 ///
196 /// Given an expression, `e`, if `e.stat_falsification(..)` evaluates to true, it is guaranteed
197 /// that `e` evaluates to false on all records in the zone. However, the inverse is not
198 /// necessarily true: even if the falsification evaluates to false, `e` need not evaluate to
199 /// true on all records.
200 ///
201 /// The [`StatsCatalog`] can be used to constrain or rename stats used in the final expr.
202 ///
203 /// # Examples
204 ///
205 /// - An expression over one variable: `x > 0` is false for all records in a zone if the maximum
206 /// value of the column `x` in that zone is less than or equal to zero: `max(x) <= 0`.
207 /// - An expression over two variables: `x > y` becomes `max(x) <= min(y)`.
208 /// - A conjunctive expression: `x > y AND z < x` becomes `max(x) <= min(y) OR min(z) >= max(x).
209 ///
210 /// Some expressions, in theory, have falsifications but this function does not support them
211 /// such as `x < (y < z)` or `x LIKE "needle%"`.
212 pub fn stat_falsification(&self, catalog: &dyn StatsCatalog) -> Option<Expression> {
213 self.vtable.as_dyn().stat_falsification(self, catalog)
214 }
215
216 /// Returns an expression representing the zoned statistic for the given stat, if available.
217 ///
218 /// The [`StatsCatalog`] returns expressions that can be evaluated using the zone map as a
219 /// scope. Expressions can implement this function to propagate such statistics through the
220 /// expression tree. For example, the `a + 10` expression could propagate `min: min(a) + 10`.
221 ///
222 /// NOTE(gatesn): we currently cannot represent statistics over nested fields. Please file an
223 /// issue to discuss a solution to this.
224 pub fn stat_expression(&self, stat: Stat, catalog: &dyn StatsCatalog) -> Option<Expression> {
225 self.vtable.as_dyn().stat_expression(self, stat, catalog)
226 }
227
228 /// Returns an expression representing the zoned maximum statistic, if available.
229 ///
230 /// See [`Self::stat_expression`] for details.
231 pub fn stat_min(&self, catalog: &dyn StatsCatalog) -> Option<Expression> {
232 self.stat_expression(Stat::Min, catalog)
233 }
234
235 /// Returns an expression representing the zoned maximum statistic, if available.
236 ///
237 /// See [`Self::stat_expression`] for details.
238 pub fn stat_max(&self, catalog: &dyn StatsCatalog) -> Option<Expression> {
239 self.stat_expression(Stat::Max, catalog)
240 }
241
242 /// Returns whether this expression itself is null-sensitive.
243 /// See [`VTable::is_null_sensitive`].
244 pub fn is_null_sensitive(&self) -> bool {
245 self.vtable.as_dyn().is_null_sensitive(self.data.as_ref())
246 }
247
248 /// Returns whether this expression itself is fallible.
249 /// See [`VTable::is_fallible`].
250 pub fn is_fallible(&self) -> bool {
251 self.vtable.as_dyn().is_fallible(self.data.as_ref())
252 }
253
254 /// Format the expression as a compact string.
255 ///
256 /// Since this is a recursive formatter, it is exposed on the public Expression type.
257 /// See fmt_data that is only implemented on the vtable trait.
258 pub fn fmt_sql(&self, f: &mut Formatter<'_>) -> fmt::Result {
259 self.vtable.as_dyn().fmt_sql(self, f)
260 }
261
262 /// Format the instance data of the expression as a string for rendering..
263 pub fn fmt_data(&self, f: &mut Formatter<'_>) -> fmt::Result {
264 self.vtable.as_dyn().fmt_data(self.data().as_ref(), f)
265 }
266
267 /// Display the expression as a formatted tree structure.
268 ///
269 /// This provides a hierarchical view of the expression that shows the relationships
270 /// between parent and child expressions, making complex nested expressions easier
271 /// to understand and debug.
272 ///
273 /// # Example
274 ///
275 /// ```rust
276 /// # use vortex_array::compute::LikeOptions;
277 /// # use vortex_array::expr::VTableExt;
278 /// # use vortex_dtype::{DType, Nullability, PType};
279 /// # use vortex_array::expr::{and, cast, eq, get_item, gt, lit, not, root, select, Like};
280 /// // Build a complex nested expression
281 /// let complex_expr = select(
282 /// ["result"],
283 /// and(
284 /// not(eq(get_item("status", root()), lit("inactive"))),
285 /// and(
286 /// Like.new_expr(LikeOptions::default(), [get_item("name", root()), lit("%admin%")]),
287 /// gt(
288 /// cast(get_item("score", root()), DType::Primitive(PType::F64, Nullability::NonNullable)),
289 /// lit(75.0)
290 /// )
291 /// )
292 /// )
293 /// );
294 ///
295 /// println!("{}", complex_expr.display_tree());
296 /// ```
297 ///
298 /// This produces output like:
299 ///
300 /// ```text
301 /// Select(include): {result}
302 /// └── Binary(and)
303 /// ├── lhs: Not
304 /// │ └── Binary(=)
305 /// │ ├── lhs: GetItem(status)
306 /// │ │ └── Root
307 /// │ └── rhs: Literal(value: "inactive", dtype: utf8)
308 /// └── rhs: Binary(and)
309 /// ├── lhs: Like
310 /// │ ├── child: GetItem(name)
311 /// │ │ └── Root
312 /// │ └── pattern: Literal(value: "%admin%", dtype: utf8)
313 /// └── rhs: Binary(>)
314 /// ├── lhs: Cast(target: f64)
315 /// │ └── GetItem(score)
316 /// │ └── Root
317 /// └── rhs: Literal(value: 75f64, dtype: f64)
318 /// ```
319 pub fn display_tree(&self) -> impl Display {
320 DisplayTreeExpr(self)
321 }
322}
323
324/// The default display implementation for expressions uses the 'SQL'-style format.
325impl Display for Expression {
326 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
327 self.fmt_sql(f)
328 }
329}
330
331struct FormatExpressionData<'a> {
332 vtable: &'a ExprVTable,
333 data: &'a Arc<dyn Any + Send + Sync>,
334}
335
336impl<'a> Debug for FormatExpressionData<'a> {
337 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
338 self.vtable.as_dyn().fmt_data(self.data.as_ref(), f)
339 }
340}
341
342impl Debug for Expression {
343 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
344 f.debug_struct("Expression")
345 .field("vtable", &self.vtable)
346 .field(
347 "data",
348 &FormatExpressionData {
349 vtable: &self.vtable,
350 data: &self.data,
351 },
352 )
353 .field("children", &self.children)
354 .finish()
355 }
356}
357
358impl PartialEq for Expression {
359 fn eq(&self, other: &Self) -> bool {
360 self.vtable.as_dyn().id() == other.vtable.as_dyn().id()
361 && self
362 .vtable
363 .as_dyn()
364 .dyn_eq(self.data.as_ref(), other.data.as_ref())
365 && self.children.eq(&other.children)
366 }
367}
368impl Eq for Expression {}
369
370impl Hash for Expression {
371 fn hash<H: Hasher>(&self, state: &mut H) {
372 self.vtable.as_dyn().id().hash(state);
373 self.vtable.as_dyn().dyn_hash(self.data.as_ref(), state);
374 self.children.hash(state);
375 }
376}