use std::any::Any;
use parquet2::metadata::ColumnChunkMetaData;
use parquet2::schema::types::PhysicalType;
use parquet2::statistics::PrimitiveStatistics as ParquetPrimitiveStatistics;
use parquet2::statistics::Statistics as ParquetStatistics;
use crate::datatypes::DataType;
use crate::datatypes::Field;
use crate::error::ArrowError;
use crate::error::Result;
mod primitive;
pub use primitive::*;
mod binary;
pub use binary::*;
mod boolean;
pub use boolean::*;
mod fixlen;
pub use fixlen::*;
use super::get_field_columns;
pub trait Statistics: std::fmt::Debug {
fn data_type(&self) -> &DataType;
fn as_any(&self) -> &dyn Any;
fn null_count(&self) -> Option<i64>;
}
impl PartialEq for &dyn Statistics {
fn eq(&self, other: &Self) -> bool {
self.data_type() == other.data_type()
}
}
impl PartialEq for Box<dyn Statistics> {
fn eq(&self, other: &Self) -> bool {
self.data_type() == other.data_type()
}
}
fn _deserialize_statistics(
stats: &dyn ParquetStatistics,
data_type: DataType,
) -> Result<Box<dyn Statistics>> {
match stats.physical_type() {
PhysicalType::Int32 => {
let stats = stats.as_any().downcast_ref().unwrap();
primitive::statistics_from_i32(stats, data_type)
}
PhysicalType::Int64 => {
let stats = stats.as_any().downcast_ref().unwrap();
primitive::statistics_from_i64(stats, data_type)
}
PhysicalType::ByteArray => {
let stats = stats.as_any().downcast_ref().unwrap();
binary::statistics_from_byte_array(stats, data_type)
}
PhysicalType::Boolean => {
let stats = stats.as_any().downcast_ref().unwrap();
Ok(Box::new(BooleanStatistics::from(stats)))
}
PhysicalType::Float => {
let stats = stats
.as_any()
.downcast_ref::<ParquetPrimitiveStatistics<f32>>()
.unwrap();
Ok(Box::new(PrimitiveStatistics::<f32>::from((
stats, data_type,
))))
}
PhysicalType::Double => {
let stats = stats
.as_any()
.downcast_ref::<ParquetPrimitiveStatistics<f64>>()
.unwrap();
Ok(Box::new(PrimitiveStatistics::<f64>::from((
stats, data_type,
))))
}
PhysicalType::FixedLenByteArray(_) => {
let stats = stats.as_any().downcast_ref().unwrap();
fixlen::statistics_from_fix_len(stats, data_type)
}
_ => Err(ArrowError::NotYetImplemented(
"Reading Fixed-len array statistics is not yet supported".to_string(),
)),
}
}
fn get_fields(field: &Field) -> Vec<&Field> {
match field.data_type.to_logical_type() {
DataType::List(inner) => get_fields(inner),
DataType::LargeList(inner) => get_fields(inner),
DataType::Struct(fields) => fields.iter().flat_map(get_fields).collect(),
_ => vec![field],
}
}
pub fn deserialize_statistics(
field: &Field,
columns: &[ColumnChunkMetaData],
) -> Result<Vec<Option<Box<dyn Statistics>>>> {
let columns = get_field_columns(columns, field.name.as_ref());
let fields = get_fields(field);
columns
.into_iter()
.zip(fields.into_iter())
.map(|(column, field)| {
column
.statistics()
.map(|x| _deserialize_statistics(x?.as_ref(), field.data_type.clone()))
.transpose()
})
.collect()
}