use serde::{Deserialize, Serialize};
#[repr(transparent)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
pub struct Level(pub u8);
pub const L0: Level = Level(0);
impl Level {
pub fn next(self) -> Self {
Level(self.0 + 1)
}
}
impl std::fmt::Display for Level {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "L{}", self.0)
}
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct ParquetFileMeta {
pub level: Level,
pub seq_min: u64,
pub seq_max: u64,
#[serde(with = "hex_bytes")]
pub key_min: Vec<u8>,
#[serde(with = "hex_bytes")]
pub key_max: Vec<u8>,
pub num_rows: u64,
pub file_size: u64,
pub dv_path: Option<String>,
pub dv_offset: Option<i64>,
pub dv_length: Option<i64>,
#[serde(default)]
pub format: Option<FileFormat>,
#[serde(default)]
pub column_stats: Option<Vec<ColumnStats>>,
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct ColumnStats {
pub field_id: i32,
pub compressed_bytes: u64,
pub value_count: u64,
pub null_count: u64,
#[serde(with = "hex_bytes_opt", default)]
pub lower_bound: Option<Vec<u8>>,
#[serde(with = "hex_bytes_opt", default)]
pub upper_bound: Option<Vec<u8>>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum FileFormat {
Columnar,
Dual,
}
impl FileFormat {
#[inline]
pub fn default_for_level(level: Level) -> Self {
if level.0 == 0 {
FileFormat::Dual
} else {
FileFormat::Columnar
}
}
#[inline]
pub fn has_value_blob(self) -> bool {
matches!(self, FileFormat::Dual)
}
}
impl ParquetFileMeta {
pub const FOOTER_KEY: &'static str = "merutable.meta";
pub const SCHEMA_KEY: &'static str = "merutable.schema";
pub fn serialize(&self) -> crate::types::Result<String> {
serde_json::to_string(self).map_err(|e| crate::types::MeruError::Corruption(e.to_string()))
}
pub fn deserialize(s: &str) -> crate::types::Result<Self> {
serde_json::from_str(s).map_err(|e| crate::types::MeruError::Corruption(e.to_string()))
}
}
pub const FOOTER_KEY: &str = ParquetFileMeta::FOOTER_KEY;
pub const SCHEMA_KEY: &str = ParquetFileMeta::SCHEMA_KEY;
mod hex_bytes {
use serde::{Deserialize, Deserializer, Serializer};
pub fn serialize<S: Serializer>(bytes: &[u8], ser: S) -> Result<S::Ok, S::Error> {
ser.serialize_str(&hex::encode(bytes))
}
pub fn deserialize<'de, D: Deserializer<'de>>(de: D) -> Result<Vec<u8>, D::Error> {
let s = String::deserialize(de)?;
hex::decode(&s).map_err(serde::de::Error::custom)
}
}
mod hex_bytes_opt {
use serde::{Deserialize, Deserializer, Serializer};
pub fn serialize<S: Serializer>(bytes: &Option<Vec<u8>>, ser: S) -> Result<S::Ok, S::Error> {
match bytes {
Some(b) => ser.serialize_str(&hex::encode(b)),
None => ser.serialize_none(),
}
}
pub fn deserialize<'de, D: Deserializer<'de>>(de: D) -> Result<Option<Vec<u8>>, D::Error> {
let opt = Option::<String>::deserialize(de)?;
match opt {
Some(s) => hex::decode(&s).map(Some).map_err(serde::de::Error::custom),
None => Ok(None),
}
}
}