use crate::expressions::{
BinaryExpression, BinaryOperator, ColumnName, Expression as Expr, Scalar, UnaryOperator,
VariadicExpression, VariadicOperator,
};
use crate::predicates::{
DataSkippingPredicateEvaluator, PredicateEvaluator, PredicateEvaluatorDefaults,
};
use crate::schema::DataType;
use std::cmp::Ordering;
#[cfg(test)]
mod tests;
pub(crate) trait ParquetStatsProvider {
fn get_parquet_min_stat(&self, col: &ColumnName, data_type: &DataType) -> Option<Scalar>;
fn get_parquet_max_stat(&self, col: &ColumnName, data_type: &DataType) -> Option<Scalar>;
fn get_parquet_nullcount_stat(&self, col: &ColumnName) -> Option<i64>;
fn get_parquet_rowcount_stat(&self) -> i64;
}
impl<T: ParquetStatsProvider> DataSkippingPredicateEvaluator for T {
type Output = bool;
type TypedStat = Scalar;
type IntStat = i64;
fn get_min_stat(&self, col: &ColumnName, data_type: &DataType) -> Option<Scalar> {
self.get_parquet_min_stat(col, data_type)
}
fn get_max_stat(&self, col: &ColumnName, data_type: &DataType) -> Option<Scalar> {
self.get_parquet_max_stat(col, data_type)
}
fn get_nullcount_stat(&self, col: &ColumnName) -> Option<i64> {
self.get_parquet_nullcount_stat(col)
}
fn get_rowcount_stat(&self) -> Option<i64> {
Some(self.get_parquet_rowcount_stat())
}
fn eval_partial_cmp(
&self,
ord: Ordering,
col: Scalar,
val: &Scalar,
inverted: bool,
) -> Option<bool> {
PredicateEvaluatorDefaults::partial_cmp_scalars(ord, &col, val, inverted)
}
fn eval_scalar(&self, val: &Scalar, inverted: bool) -> Option<bool> {
PredicateEvaluatorDefaults::eval_scalar(val, inverted)
}
fn eval_is_null(&self, col: &ColumnName, inverted: bool) -> Option<bool> {
let safe_to_skip = match inverted {
true => self.get_rowcount_stat()?, false => 0i64, };
Some(self.get_nullcount_stat(col)? != safe_to_skip)
}
fn eval_binary_scalars(
&self,
op: BinaryOperator,
left: &Scalar,
right: &Scalar,
inverted: bool,
) -> Option<bool> {
PredicateEvaluatorDefaults::eval_binary_scalars(op, left, right, inverted)
}
fn finish_eval_variadic(
&self,
op: VariadicOperator,
exprs: impl IntoIterator<Item = Option<bool>>,
inverted: bool,
) -> Option<bool> {
PredicateEvaluatorDefaults::finish_eval_variadic(op, exprs, inverted)
}
}
pub(crate) trait ParquetStatsSkippingFilter {
fn eval_sql_where(&self, filter: &Expr) -> Option<bool>;
fn eval_binary_nullsafe(&self, op: BinaryOperator, left: &Expr, right: &Expr) -> Option<bool>;
}
impl<T: DataSkippingPredicateEvaluator<Output = bool>> ParquetStatsSkippingFilter for T {
fn eval_sql_where(&self, filter: &Expr) -> Option<bool> {
use Expr::{Binary, Variadic};
match filter {
Variadic(VariadicExpression {
op: VariadicOperator::And,
exprs,
}) => {
let exprs: Vec<_> = exprs
.iter()
.map(|expr| self.eval_sql_where(expr))
.map(|result| match result {
Some(value) => Expr::literal(value),
None => Expr::null_literal(DataType::BOOLEAN),
})
.collect();
self.eval_variadic(VariadicOperator::And, &exprs, false)
}
Binary(BinaryExpression { op, left, right }) => {
self.eval_binary_nullsafe(*op, left, right)
}
_ => self.eval_expr(filter, false),
}
}
fn eval_binary_nullsafe(&self, op: BinaryOperator, left: &Expr, right: &Expr) -> Option<bool> {
use UnaryOperator::IsNull;
if let Some(false) = self.eval_unary(IsNull, left, true) {
return Some(false);
}
if let Some(false) = self.eval_unary(IsNull, right, true) {
return Some(false);
}
self.eval_binary(op, left, right, false)
}
}