use std::fmt::Debug;
use std::hash::Hash;
use std::hash::Hasher;
use vortex_array::ArrayRef;
use vortex_error::VortexResult;
use crate::BtrBlocksCompressor;
use crate::CompressorContext;
use crate::CompressorStats;
use crate::sample::sample_count_approx_one_percent;
use crate::stats::SAMPLE_SIZE;
pub trait Scheme: Debug {
type StatsType: CompressorStats;
type CodeType: Copy + Eq + Hash + Ord;
fn code(&self) -> Self::CodeType;
fn is_constant(&self) -> bool {
false
}
fn expected_compression_ratio(
&self,
compressor: &BtrBlocksCompressor,
stats: &Self::StatsType,
ctx: CompressorContext,
excludes: &[Self::CodeType],
) -> VortexResult<f64> {
self.estimate_compression_ratio_with_sampling(compressor, stats, ctx, excludes)
}
fn compress(
&self,
compressor: &BtrBlocksCompressor,
stats: &Self::StatsType,
ctx: CompressorContext,
excludes: &[Self::CodeType],
) -> VortexResult<ArrayRef>;
}
impl<C: Copy + Eq + Hash + Ord, V: CompressorStats> PartialEq for dyn Scheme<CodeType = C, StatsType = V> {
fn eq(&self, other: &Self) -> bool {
self.code() == other.code()
}
}
impl<C: Copy + Eq + Hash + Ord, V: CompressorStats> Eq for dyn Scheme<CodeType = C, StatsType = V> {}
impl<C: Copy + Eq + Hash + Ord, V: CompressorStats> Hash for dyn Scheme<CodeType = C, StatsType = V> {
fn hash<H: Hasher>(&self, state: &mut H) {
self.code().hash(state)
}
}
pub trait SchemeExt: Scheme {
fn estimate_compression_ratio_with_sampling(
&self,
btr_blocks_compressor: &BtrBlocksCompressor,
stats: &Self::StatsType,
ctx: CompressorContext,
excludes: &[Self::CodeType],
) -> VortexResult<f64> {
let sample = if ctx.is_sample {
stats.clone()
} else {
let source_len = stats.source().len();
let sample_count = sample_count_approx_one_percent(source_len);
tracing::trace!(
"Sampling {} values out of {}",
SAMPLE_SIZE as u64 * sample_count as u64,
source_len
);
stats.sample(SAMPLE_SIZE, sample_count)
};
let after = self
.compress(btr_blocks_compressor, &sample, ctx.as_sample(), excludes)?
.nbytes();
let before = sample.source().nbytes();
tracing::debug!(
"estimate_compression_ratio_with_sampling(compressor={self:#?} ctx={ctx:?}) = {}",
before as f64 / after as f64
);
Ok(before as f64 / after as f64)
}
}
impl<T: Scheme + ?Sized> SchemeExt for T {}