vortex_expr/
analysis.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use vortex_array::stats::Stat;
5use vortex_dtype::FieldPath;
6
7use crate::ExprRef;
8
9/// A catalog of available stats that are associated with field paths.
10pub trait StatsCatalog {
11    /// Given a field path and statistic, return an expression that when evaluated over the catalog
12    /// will return that stat for the referenced field.
13    ///
14    /// This is likely to be a column expression, or a literal.
15    ///
16    /// Returns `None` if the stat is not available for the field path.
17    fn stats_ref(&mut self, _field_path: &FieldPath, _stat: Stat) -> Option<ExprRef> {
18        None
19    }
20}
21
22/// This can be used by expression to plug into vortex expression analysis, such as
23/// pruning or expression simplification
24pub trait AnalysisExpr {
25    /// An expression over zone-statistics which implies all records in the zone evaluate to false.
26    ///
27    /// Given an expression, `e`, if `e.stat_falsification(..)` evaluates to true, it is guaranteed
28    /// that `e` evaluates to false on all records in the zone. However, the inverse is not
29    /// necessarily true: even if the falsification evaluates to false, `e` need not evaluate to
30    /// true on all records.
31    ///
32    /// The `StatsCatalog` can be used to constrain or rename stats used in the final expr.
33    ///
34    /// # Examples
35    ///
36    /// - An expression over one variable: `x > 0` is false for all records in a zone if the maximum
37    ///   value of the column `x` in that zone is less than or equal to zero: `max(x) <= 0`.
38    /// - An expression over two variables: `x > y` becomes `max(x) <= min(y)`.
39    /// - A conjunctive expression: `x > y AND z < x` becomes `max(x) <= min(y) OR min(z) >= max(x).
40    ///
41    /// Some expressions, in theory, have falsifications but this function does not support them
42    /// such as `x < (y < z)` or `x LIKE "needle%"`.
43    fn stat_falsification(&self, _catalog: &mut dyn StatsCatalog) -> Option<ExprRef> {
44        None
45    }
46
47    /// An expression for the upper non-null bound of this expression, if available.
48    ///
49    /// This function returns None if there is no upper bound or it is difficult to compute.
50    ///
51    /// The returned expression evaluates to null if the maximum value is unknown. In that case, you
52    /// _must not_ assume the array is empty _nor_ may you assume the array only contains non-null
53    /// values.
54    fn max(&self, _catalog: &mut dyn StatsCatalog) -> Option<ExprRef> {
55        None
56    }
57
58    /// An expression for the lower non-null bound of this expression, if available.
59    ///
60    /// See [AnalysisExpr::max] for important details.
61    fn min(&self, _catalog: &mut dyn StatsCatalog) -> Option<ExprRef> {
62        None
63    }
64
65    /// An expression for the NaN count for a column, if available.
66    ///
67    /// This method returns `None` if the NaNCount stat is unknown.
68    fn nan_count(&self, _catalog: &mut dyn StatsCatalog) -> Option<ExprRef> {
69        None
70    }
71
72    fn field_path(&self) -> Option<FieldPath> {
73        None
74    }
75
76    // TODO: add containment
77}