use serde::{Deserialize, Serialize};
use zerompk::{FromMessagePack, ToMessagePack};
use crate::error::CodecError;
#[derive(
Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, ToMessagePack, FromMessagePack,
)]
#[serde(rename_all = "snake_case")]
#[repr(u8)]
#[msgpack(c_enum)]
pub enum ColumnCodec {
Auto = 0,
AlpFastLanesLz4 = 1,
AlpRdLz4 = 2,
PcodecLz4 = 3,
DeltaFastLanesLz4 = 4,
FastLanesLz4 = 5,
FsstLz4 = 6,
AlpFastLanesRans = 7,
DeltaFastLanesRans = 8,
FsstRans = 9,
Gorilla = 10,
DoubleDelta = 11,
Delta = 12,
Lz4 = 13,
Zstd = 14,
Raw = 15,
}
impl ColumnCodec {
pub fn is_compressed(&self) -> bool {
!matches!(self, Self::Raw | Self::Auto)
}
pub fn is_cascading(&self) -> bool {
matches!(
self,
Self::AlpFastLanesLz4
| Self::AlpRdLz4
| Self::PcodecLz4
| Self::DeltaFastLanesLz4
| Self::FastLanesLz4
| Self::FsstLz4
| Self::AlpFastLanesRans
| Self::DeltaFastLanesRans
| Self::FsstRans
)
}
pub fn is_cold_tier(&self) -> bool {
matches!(
self,
Self::AlpFastLanesRans | Self::DeltaFastLanesRans | Self::FsstRans
)
}
pub fn as_str(&self) -> &'static str {
match self {
Self::Auto => "auto",
Self::AlpFastLanesLz4 => "alp_fastlanes_lz4",
Self::AlpRdLz4 => "alp_rd_lz4",
Self::PcodecLz4 => "pcodec_lz4",
Self::DeltaFastLanesLz4 => "delta_fastlanes_lz4",
Self::FastLanesLz4 => "fastlanes_lz4",
Self::FsstLz4 => "fsst_lz4",
Self::AlpFastLanesRans => "alp_fastlanes_rans",
Self::DeltaFastLanesRans => "delta_fastlanes_rans",
Self::FsstRans => "fsst_rans",
Self::Gorilla => "gorilla",
Self::DoubleDelta => "double_delta",
Self::Delta => "delta",
Self::Lz4 => "lz4",
Self::Zstd => "zstd",
Self::Raw => "raw",
}
}
pub fn try_resolve(self) -> Result<ResolvedColumnCodec, CodecError> {
match self {
Self::Auto => Err(CodecError::UnresolvedAuto),
Self::AlpFastLanesLz4 => Ok(ResolvedColumnCodec::AlpFastLanesLz4),
Self::AlpRdLz4 => Ok(ResolvedColumnCodec::AlpRdLz4),
Self::PcodecLz4 => Ok(ResolvedColumnCodec::PcodecLz4),
Self::DeltaFastLanesLz4 => Ok(ResolvedColumnCodec::DeltaFastLanesLz4),
Self::FastLanesLz4 => Ok(ResolvedColumnCodec::FastLanesLz4),
Self::FsstLz4 => Ok(ResolvedColumnCodec::FsstLz4),
Self::AlpFastLanesRans => Ok(ResolvedColumnCodec::AlpFastLanesRans),
Self::DeltaFastLanesRans => Ok(ResolvedColumnCodec::DeltaFastLanesRans),
Self::FsstRans => Ok(ResolvedColumnCodec::FsstRans),
Self::Gorilla => Ok(ResolvedColumnCodec::Gorilla),
Self::DoubleDelta => Ok(ResolvedColumnCodec::DoubleDelta),
Self::Delta => Ok(ResolvedColumnCodec::Delta),
Self::Lz4 => Ok(ResolvedColumnCodec::Lz4),
Self::Zstd => Ok(ResolvedColumnCodec::Zstd),
Self::Raw => Ok(ResolvedColumnCodec::Raw),
}
}
}
impl std::fmt::Display for ColumnCodec {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(self.as_str())
}
}
pub fn parse_codec_name(s: &str) -> Result<ColumnCodec, CodecError> {
match s {
"auto" => Ok(ColumnCodec::Auto),
"alp_fastlanes_lz4" => Ok(ColumnCodec::AlpFastLanesLz4),
"alp_rd_lz4" => Ok(ColumnCodec::AlpRdLz4),
"pcodec_lz4" => Ok(ColumnCodec::PcodecLz4),
"delta_fastlanes_lz4" => Ok(ColumnCodec::DeltaFastLanesLz4),
"fastlanes_lz4" => Ok(ColumnCodec::FastLanesLz4),
"fsst_lz4" => Ok(ColumnCodec::FsstLz4),
"alp_fastlanes_rans" => Ok(ColumnCodec::AlpFastLanesRans),
"delta_fastlanes_rans" => Ok(ColumnCodec::DeltaFastLanesRans),
"fsst_rans" => Ok(ColumnCodec::FsstRans),
"gorilla" => Ok(ColumnCodec::Gorilla),
"double_delta" => Ok(ColumnCodec::DoubleDelta),
"delta" => Ok(ColumnCodec::Delta),
"lz4" => Ok(ColumnCodec::Lz4),
"zstd" => Ok(ColumnCodec::Zstd),
"raw" => Ok(ColumnCodec::Raw),
_ => Err(CodecError::UnknownCodec {
name: s.to_owned(),
valid: "auto, alp_fastlanes_lz4, alp_rd_lz4, pcodec_lz4, delta_fastlanes_lz4, \
fastlanes_lz4, fsst_lz4, alp_fastlanes_rans, delta_fastlanes_rans, \
fsst_rans, gorilla, double_delta, delta, lz4, zstd, raw",
}),
}
}
#[derive(
Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, ToMessagePack, FromMessagePack,
)]
#[serde(rename_all = "snake_case")]
#[repr(u8)]
#[msgpack(c_enum)]
pub enum ResolvedColumnCodec {
AlpFastLanesLz4 = 1,
AlpRdLz4 = 2,
PcodecLz4 = 3,
DeltaFastLanesLz4 = 4,
FastLanesLz4 = 5,
FsstLz4 = 6,
AlpFastLanesRans = 7,
DeltaFastLanesRans = 8,
FsstRans = 9,
Gorilla = 10,
DoubleDelta = 11,
Delta = 12,
Lz4 = 13,
Zstd = 14,
Raw = 15,
}
impl ResolvedColumnCodec {
pub fn into_column_codec(self) -> ColumnCodec {
match self {
Self::AlpFastLanesLz4 => ColumnCodec::AlpFastLanesLz4,
Self::AlpRdLz4 => ColumnCodec::AlpRdLz4,
Self::PcodecLz4 => ColumnCodec::PcodecLz4,
Self::DeltaFastLanesLz4 => ColumnCodec::DeltaFastLanesLz4,
Self::FastLanesLz4 => ColumnCodec::FastLanesLz4,
Self::FsstLz4 => ColumnCodec::FsstLz4,
Self::AlpFastLanesRans => ColumnCodec::AlpFastLanesRans,
Self::DeltaFastLanesRans => ColumnCodec::DeltaFastLanesRans,
Self::FsstRans => ColumnCodec::FsstRans,
Self::Gorilla => ColumnCodec::Gorilla,
Self::DoubleDelta => ColumnCodec::DoubleDelta,
Self::Delta => ColumnCodec::Delta,
Self::Lz4 => ColumnCodec::Lz4,
Self::Zstd => ColumnCodec::Zstd,
Self::Raw => ColumnCodec::Raw,
}
}
pub fn as_str(self) -> &'static str {
match self {
Self::AlpFastLanesLz4 => "alp_fastlanes_lz4",
Self::AlpRdLz4 => "alp_rd_lz4",
Self::PcodecLz4 => "pcodec_lz4",
Self::DeltaFastLanesLz4 => "delta_fastlanes_lz4",
Self::FastLanesLz4 => "fastlanes_lz4",
Self::FsstLz4 => "fsst_lz4",
Self::AlpFastLanesRans => "alp_fastlanes_rans",
Self::DeltaFastLanesRans => "delta_fastlanes_rans",
Self::FsstRans => "fsst_rans",
Self::Gorilla => "gorilla",
Self::DoubleDelta => "double_delta",
Self::Delta => "delta",
Self::Lz4 => "lz4",
Self::Zstd => "zstd",
Self::Raw => "raw",
}
}
}
impl std::fmt::Display for ResolvedColumnCodec {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(self.as_str())
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[non_exhaustive]
pub enum ColumnTypeHint {
Timestamp,
Float64,
Int64,
Symbol,
String,
}
#[derive(Debug, Clone, Serialize, Deserialize, ToMessagePack, FromMessagePack)]
pub struct ColumnStatistics {
pub codec: ResolvedColumnCodec,
pub count: u64,
#[serde(skip_serializing_if = "Option::is_none")]
pub min: Option<f64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub max: Option<f64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub sum: Option<f64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub cardinality: Option<u32>,
pub compressed_bytes: u64,
pub uncompressed_bytes: u64,
}
impl ColumnStatistics {
pub fn new(codec: ResolvedColumnCodec) -> Self {
Self {
codec,
count: 0,
min: None,
max: None,
sum: None,
cardinality: None,
compressed_bytes: 0,
uncompressed_bytes: 0,
}
}
pub fn from_i64(values: &[i64], codec: ResolvedColumnCodec, compressed_bytes: u64) -> Self {
if values.is_empty() {
return Self::new(codec);
}
let mut min = values[0];
let mut max = values[0];
let mut sum: i128 = 0;
for &v in values {
if v < min {
min = v;
}
if v > max {
max = v;
}
sum += v as i128;
}
Self {
codec,
count: values.len() as u64,
min: Some(min as f64),
max: Some(max as f64),
sum: Some(sum as f64),
cardinality: None,
compressed_bytes,
uncompressed_bytes: (values.len() * 8) as u64,
}
}
pub fn from_f64(values: &[f64], codec: ResolvedColumnCodec, compressed_bytes: u64) -> Self {
if values.is_empty() {
return Self::new(codec);
}
let mut min = values[0];
let mut max = values[0];
let mut sum: f64 = 0.0;
for &v in values {
if v < min {
min = v;
}
if v > max {
max = v;
}
sum += v;
}
Self {
codec,
count: values.len() as u64,
min: Some(min),
max: Some(max),
sum: Some(sum),
cardinality: None,
compressed_bytes,
uncompressed_bytes: (values.len() * 8) as u64,
}
}
pub fn from_symbols(
values: &[u32],
cardinality: u32,
codec: ResolvedColumnCodec,
compressed_bytes: u64,
) -> Self {
Self {
codec,
count: values.len() as u64,
min: None,
max: None,
sum: None,
cardinality: Some(cardinality),
compressed_bytes,
uncompressed_bytes: (values.len() * 4) as u64,
}
}
pub fn compression_ratio(&self) -> f64 {
if self.compressed_bytes == 0 {
return 1.0;
}
self.uncompressed_bytes as f64 / self.compressed_bytes as f64
}
}