use std::fs::File;
use super::*;
use crate::expressions::{column_name, column_pred};
use crate::kernel_predicates::DataSkippingPredicateEvaluator as _;
use crate::parquet::arrow::arrow_reader::ArrowReaderMetadata;
use crate::Predicate;
#[test]
fn test_get_stat_values() {
let file = File::open("./tests/data/parquet_row_group_skipping/part-00000-b92e017a-50ba-4676-8322-48fc371c2b59-c000.snappy.parquet").unwrap();
let metadata = ArrowReaderMetadata::load(&file, Default::default()).unwrap();
let columns = Predicate::and_from(vec![
column_pred!("varlen.utf8"),
column_pred!("numeric.ints.int64"),
column_pred!("numeric.ints.int32"),
column_pred!("numeric.ints.int16"),
column_pred!("numeric.ints.int8"),
column_pred!("numeric.floats.float32"),
column_pred!("numeric.floats.float64"),
column_pred!("bool"),
column_pred!("varlen.binary"),
column_pred!("numeric.decimals.decimal32"),
column_pred!("numeric.decimals.decimal64"),
column_pred!("numeric.decimals.decimal128"),
column_pred!("chrono.date32"),
column_pred!("chrono.timestamp"),
column_pred!("chrono.timestamp_ntz"),
]);
let filter = RowGroupFilter::new(metadata.metadata().row_group(0), &columns);
assert_eq!(filter.get_rowcount_stat(), Some(5i64.into()));
assert_eq!(
filter.get_nullcount_stat(&column_name!("bool")),
Some(3i64.into())
);
assert_eq!(
filter.get_nullcount_stat(&column_name!("varlen.utf8")),
None );
assert_eq!(
filter.get_min_stat(&column_name!("varlen.utf8"), &DataType::STRING),
Some("a".into())
);
assert_eq!(
filter.get_min_stat(
&column_name!("numeric.decimals.decimal128"),
&DataType::STRING
),
Some("\0\0\0\0\0\0\0\0\0\0\0\0+x".into())
);
assert_eq!(
filter.get_min_stat(&column_name!("numeric.ints.int64"), &DataType::LONG),
Some(1000000000i64.into())
);
assert_eq!(
filter.get_min_stat(&column_name!("numeric.ints.int32"), &DataType::LONG),
Some(1000000i64.into())
);
assert_eq!(
filter.get_min_stat(&column_name!("numeric.ints.int32"), &DataType::INTEGER),
Some(1000000i32.into())
);
assert_eq!(
filter.get_min_stat(&column_name!("numeric.ints.int16"), &DataType::SHORT),
Some(1000i16.into())
);
assert_eq!(
filter.get_min_stat(&column_name!("numeric.ints.int8"), &DataType::BYTE),
Some(0i8.into())
);
assert_eq!(
filter.get_min_stat(&column_name!("numeric.floats.float64"), &DataType::DOUBLE),
Some(1147f64.into())
);
assert_eq!(
filter.get_min_stat(&column_name!("numeric.floats.float32"), &DataType::DOUBLE),
Some(139f64.into())
);
assert_eq!(
filter.get_min_stat(&column_name!("numeric.floats.float32"), &DataType::FLOAT),
Some(139f32.into())
);
assert_eq!(
filter.get_min_stat(&column_name!("bool"), &DataType::BOOLEAN),
Some(false.into())
);
assert_eq!(
filter.get_min_stat(&column_name!("varlen.binary"), &DataType::BINARY),
Some([].as_slice().into())
);
assert_eq!(
filter.get_min_stat(
&column_name!("numeric.decimals.decimal128"),
&DataType::BINARY
),
Some(
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2b, 0x78]
.as_slice()
.into()
)
);
assert_eq!(
filter.get_min_stat(
&column_name!("numeric.decimals.decimal32"),
&DataType::decimal(8, 3).unwrap()
),
Some(Scalar::decimal(11032, 8, 3).unwrap())
);
assert_eq!(
filter.get_min_stat(
&column_name!("numeric.decimals.decimal64"),
&DataType::decimal(16, 3).unwrap()
),
Some(Scalar::decimal(11064, 16, 3).unwrap())
);
assert_eq!(
filter.get_min_stat(
&column_name!("numeric.decimals.decimal32"),
&DataType::decimal(16, 3).unwrap()
),
Some(Scalar::decimal(11032, 16, 3).unwrap())
);
assert_eq!(
filter.get_min_stat(
&column_name!("numeric.decimals.decimal128"),
&DataType::decimal(32, 3).unwrap()
),
Some(Scalar::decimal(11128, 32, 3).unwrap())
);
assert_eq!(
filter.get_min_stat(
&column_name!("numeric.decimals.decimal64"),
&DataType::decimal(32, 3).unwrap()
),
Some(Scalar::decimal(11064, 32, 3).unwrap())
);
assert_eq!(
filter.get_min_stat(
&column_name!("numeric.decimals.decimal32"),
&DataType::decimal(32, 3).unwrap()
),
Some(Scalar::decimal(11032, 32, 3).unwrap())
);
assert_eq!(
filter.get_min_stat(&column_name!("chrono.date32"), &DataType::DATE),
Some(PrimitiveType::Date.parse_scalar("1971-01-01").unwrap())
);
assert_eq!(
filter.get_min_stat(&column_name!("chrono.timestamp"), &DataType::TIMESTAMP),
None );
assert_eq!(
filter.get_min_stat(
&column_name!("chrono.date32"),
&DataType::unshredded_variant()
),
None
);
assert_eq!(
filter.get_min_stat(&column_name!("chrono.timestamp_ntz"), &DataType::TIMESTAMP),
Some(
PrimitiveType::Timestamp
.parse_scalar("1970-01-02 00:00:00.000000")
.unwrap()
)
);
assert_eq!(
filter.get_min_stat(
&column_name!("chrono.timestamp_ntz"),
&DataType::TIMESTAMP_NTZ
),
Some(
PrimitiveType::TimestampNtz
.parse_scalar("1970-01-02 00:00:00.000000")
.unwrap()
)
);
assert_eq!(
filter.get_min_stat(&column_name!("chrono.date32"), &DataType::TIMESTAMP_NTZ),
Some(
PrimitiveType::TimestampNtz
.parse_scalar("1971-01-01 00:00:00.000000")
.unwrap()
)
);
assert_eq!(
filter.get_max_stat(&column_name!("varlen.utf8"), &DataType::STRING),
Some("e".into())
);
assert_eq!(
filter.get_max_stat(
&column_name!("numeric.decimals.decimal128"),
&DataType::STRING
),
Some("\0\0\0\0\0\0\0\0\0\0\0\0;\u{18}".into())
);
assert_eq!(
filter.get_max_stat(&column_name!("numeric.ints.int64"), &DataType::LONG),
Some(1000000004i64.into())
);
assert_eq!(
filter.get_max_stat(&column_name!("numeric.ints.int32"), &DataType::LONG),
Some(1000004i64.into())
);
assert_eq!(
filter.get_max_stat(&column_name!("numeric.ints.int32"), &DataType::INTEGER),
Some(1000004.into())
);
assert_eq!(
filter.get_max_stat(&column_name!("numeric.ints.int16"), &DataType::SHORT),
Some(1004i16.into())
);
assert_eq!(
filter.get_max_stat(&column_name!("numeric.ints.int8"), &DataType::BYTE),
Some(4i8.into())
);
assert_eq!(
filter.get_max_stat(&column_name!("numeric.floats.float64"), &DataType::DOUBLE),
Some(1125899906842747f64.into())
);
assert_eq!(
filter.get_max_stat(&column_name!("numeric.floats.float32"), &DataType::DOUBLE),
Some(1048699f64.into())
);
assert_eq!(
filter.get_max_stat(&column_name!("numeric.floats.float32"), &DataType::FLOAT),
Some(1048699f32.into())
);
assert_eq!(
filter.get_max_stat(&column_name!("bool"), &DataType::BOOLEAN),
Some(true.into())
);
assert_eq!(
filter.get_max_stat(&column_name!("varlen.binary"), &DataType::BINARY),
Some([0, 0, 0, 0].as_slice().into())
);
assert_eq!(
filter.get_max_stat(
&column_name!("numeric.decimals.decimal128"),
&DataType::BINARY
),
Some(
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x3b, 0x18]
.as_slice()
.into()
)
);
assert_eq!(
filter.get_max_stat(
&column_name!("numeric.decimals.decimal32"),
&DataType::decimal(8, 3).unwrap()
),
Some(Scalar::decimal(15032, 8, 3).unwrap())
);
assert_eq!(
filter.get_max_stat(
&column_name!("numeric.decimals.decimal64"),
&DataType::decimal(16, 3).unwrap()
),
Some(Scalar::decimal(15064, 16, 3).unwrap())
);
assert_eq!(
filter.get_max_stat(
&column_name!("numeric.decimals.decimal32"),
&DataType::decimal(16, 3).unwrap()
),
Some(Scalar::decimal(15032, 16, 3).unwrap())
);
assert_eq!(
filter.get_max_stat(
&column_name!("numeric.decimals.decimal128"),
&DataType::decimal(32, 3).unwrap()
),
Some(Scalar::decimal(15128, 32, 3).unwrap())
);
assert_eq!(
filter.get_max_stat(
&column_name!("numeric.decimals.decimal64"),
&DataType::decimal(32, 3).unwrap()
),
Some(Scalar::decimal(15064, 32, 3).unwrap())
);
assert_eq!(
filter.get_max_stat(
&column_name!("numeric.decimals.decimal32"),
&DataType::decimal(32, 3).unwrap()
),
Some(Scalar::decimal(15032, 32, 3).unwrap())
);
assert_eq!(
filter.get_max_stat(&column_name!("chrono.date32"), &DataType::DATE),
Some(PrimitiveType::Date.parse_scalar("1971-01-05").unwrap())
);
assert_eq!(
filter.get_max_stat(&column_name!("chrono.timestamp"), &DataType::TIMESTAMP),
None );
assert_eq!(
filter.get_max_stat(
&column_name!("chrono.date32"),
&DataType::unshredded_variant()
),
None
);
assert_eq!(
filter.get_max_stat(&column_name!("chrono.timestamp_ntz"), &DataType::TIMESTAMP),
Some(
PrimitiveType::Timestamp
.parse_scalar("1970-01-02 00:04:00.000000")
.unwrap()
)
);
assert_eq!(
filter.get_max_stat(
&column_name!("chrono.timestamp_ntz"),
&DataType::TIMESTAMP_NTZ
),
Some(
PrimitiveType::TimestampNtz
.parse_scalar("1970-01-02 00:04:00.000000")
.unwrap()
)
);
assert_eq!(
filter.get_max_stat(&column_name!("chrono.date32"), &DataType::TIMESTAMP_NTZ),
Some(
PrimitiveType::TimestampNtz
.parse_scalar("1971-01-05 00:00:00.000000")
.unwrap()
)
);
}