vortex_expr/analysis.rs
1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use vortex_array::stats::Stat;
5use vortex_dtype::FieldPath;
6
7use crate::ExprRef;
8
9/// A catalog of available stats that are associated with field paths.
10pub trait StatsCatalog {
11 /// Given a field path and statistic, return an expression that when evaluated over the catalog
12 /// will return that stat for the referenced field.
13 ///
14 /// This is likely to be a column expression, or a literal.
15 ///
16 /// Returns `None` if the stat is not available for the field path.
17 fn stats_ref(&mut self, _field_path: &FieldPath, _stat: Stat) -> Option<ExprRef> {
18 None
19 }
20}
21
22/// This can be used by expression to plug into vortex expression analysis, such as
23/// pruning or expression simplification
24pub trait AnalysisExpr {
25 /// An expression over zone-statistics which implies all records in the zone evaluate to false.
26 ///
27 /// Given an expression, `e`, if `e.stat_falsification(..)` evaluates to true, it is guaranteed
28 /// that `e` evaluates to false on all records in the zone. However, the inverse is not
29 /// necessarily true: even if the falsification evaluates to false, `e` need not evaluate to
30 /// true on all records.
31 ///
32 /// The `StatsCatalog` can be used to constrain or rename stats used in the final expr.
33 ///
34 /// # Examples
35 ///
36 /// - An expression over one variable: `x > 0` is false for all records in a zone if the maximum
37 /// value of the column `x` in that zone is less than or equal to zero: `max(x) <= 0`.
38 /// - An expression over two variables: `x > y` becomes `max(x) <= min(y)`.
39 /// - A conjunctive expression: `x > y AND z < x` becomes `max(x) <= min(y) OR min(z) >= max(x).
40 ///
41 /// Some expressions, in theory, have falsifications but this function does not support them
42 /// such as `x < (y < z)` or `x LIKE "needle%"`.
43 fn stat_falsification(&self, _catalog: &mut dyn StatsCatalog) -> Option<ExprRef> {
44 None
45 }
46
47 /// An expression for the upper non-null bound of this expression, if available.
48 ///
49 /// This function returns None if there is no upper bound or it is difficult to compute.
50 ///
51 /// The returned expression evaluates to null if the maximum value is unknown. In that case, you
52 /// _must not_ assume the array is empty _nor_ may you assume the array only contains non-null
53 /// values.
54 fn max(&self, _catalog: &mut dyn StatsCatalog) -> Option<ExprRef> {
55 None
56 }
57
58 /// An expression for the lower non-null bound of this expression, if available.
59 ///
60 /// See [AnalysisExpr::max] for important details.
61 fn min(&self, _catalog: &mut dyn StatsCatalog) -> Option<ExprRef> {
62 None
63 }
64
65 /// An expression for the NaN count for a column, if available.
66 ///
67 /// This method returns `None` if the NaNCount stat is unknown.
68 fn nan_count(&self, _catalog: &mut dyn StatsCatalog) -> Option<ExprRef> {
69 None
70 }
71
72 fn field_path(&self) -> Option<FieldPath> {
73 None
74 }
75
76 // TODO: add containment
77}