use crate::{error, proto};
#[derive(Debug, Clone)]
pub struct ColumnStatistics {
number_of_values: u64,
has_null: bool,
type_statistics: Option<TypeStatistics>,
}
impl ColumnStatistics {
pub fn number_of_values(&self) -> u64 {
self.number_of_values
}
pub fn has_null(&self) -> bool {
self.has_null
}
pub fn type_statistics(&self) -> Option<&TypeStatistics> {
self.type_statistics.as_ref()
}
}
#[derive(Debug, Clone)]
pub enum TypeStatistics {
Integer {
min: i64,
max: i64,
sum: Option<i64>,
},
Double {
min: f64,
max: f64,
sum: Option<f64>,
},
String {
min: String,
max: String,
sum: i64,
},
Bucket { true_count: u64 },
Decimal {
min: String,
max: String,
sum: String,
},
Date {
min: i32,
max: i32,
},
Binary {
sum: i64,
},
Timestamp {
min: i64,
max: i64,
min_utc: i64,
max_utc: i64,
},
Collection {
min_children: u64,
max_children: u64,
total_children: u64,
},
}
impl TryFrom<&proto::ColumnStatistics> for ColumnStatistics {
type Error = error::OrcError;
fn try_from(value: &proto::ColumnStatistics) -> Result<Self, Self::Error> {
let type_statistics = if let Some(stats) = &value.int_statistics {
Some(TypeStatistics::Integer {
min: stats.minimum(),
max: stats.maximum(),
sum: stats.sum,
})
} else if let Some(stats) = &value.double_statistics {
Some(TypeStatistics::Double {
min: stats.minimum(),
max: stats.maximum(),
sum: stats.sum,
})
} else if let Some(stats) = &value.string_statistics {
Some(TypeStatistics::String {
min: stats.minimum().to_owned(),
max: stats.maximum().to_owned(),
sum: stats.sum(),
})
} else if let Some(stats) = &value.bucket_statistics {
Some(TypeStatistics::Bucket {
true_count: stats.count[0], })
} else if let Some(stats) = &value.decimal_statistics {
Some(TypeStatistics::Decimal {
min: stats.minimum().to_owned(),
max: stats.maximum().to_owned(),
sum: stats.sum().to_owned(),
})
} else if let Some(stats) = &value.date_statistics {
Some(TypeStatistics::Date {
min: stats.minimum(),
max: stats.maximum(),
})
} else if let Some(stats) = &value.binary_statistics {
Some(TypeStatistics::Binary { sum: stats.sum() })
} else if let Some(stats) = &value.timestamp_statistics {
Some(TypeStatistics::Timestamp {
min: stats.minimum(),
max: stats.maximum(),
min_utc: stats.minimum_utc(),
max_utc: stats.maximum_utc(),
})
} else {
value
.collection_statistics
.as_ref()
.map(|stats| TypeStatistics::Collection {
min_children: stats.min_children(),
max_children: stats.max_children(),
total_children: stats.total_children(),
})
};
Ok(Self {
number_of_values: value.number_of_values(),
has_null: value.has_null(),
type_statistics,
})
}
}