use std::fmt;
use vortex_array::ArrayRef;
use vortex_array::Canonical;
use vortex_array::IntoArray;
use vortex_error::VortexResult;
use crate::CascadingCompressor;
use crate::ctx::CompressorContext;
use crate::sample::SAMPLE_SIZE;
use crate::sample::sample;
use crate::sample::sample_count_approx_one_percent;
use crate::scheme::Scheme;
use crate::stats::ArrayAndStats;
#[rustfmt::skip]
pub type EstimateFn = dyn FnOnce(
&CascadingCompressor,
&mut ArrayAndStats,
CompressorContext,
) -> VortexResult<EstimateVerdict>
+ Send
+ Sync;
#[derive(Debug)]
pub enum CompressionEstimate {
Verdict(EstimateVerdict),
Deferred(DeferredEstimate),
}
#[derive(Debug)]
pub enum EstimateVerdict {
Skip,
AlwaysUse,
Ratio(f64),
}
pub enum DeferredEstimate {
Sample,
Callback(Box<EstimateFn>),
}
pub(super) fn is_better_ratio(ratio: f64, best: &Option<(&'static dyn Scheme, f64)>) -> bool {
ratio.is_finite() && !ratio.is_subnormal() && ratio > 1.0 && best.is_none_or(|(_, r)| ratio > r)
}
pub(super) fn estimate_compression_ratio_with_sampling<S: Scheme + ?Sized>(
scheme: &S,
compressor: &CascadingCompressor,
array: &ArrayRef,
ctx: CompressorContext,
) -> VortexResult<f64> {
let sample_array = if ctx.is_sample() {
array.clone()
} else {
let source_len = array.len();
let sample_count = sample_count_approx_one_percent(source_len);
tracing::trace!(
"Sampling {} values out of {}",
SAMPLE_SIZE as u64 * sample_count as u64,
source_len
);
let canonical: Canonical =
sample(array, SAMPLE_SIZE, sample_count).execute(&mut compressor.execution_ctx())?;
canonical.into_array()
};
let mut sample_data = ArrayAndStats::new(sample_array, scheme.stats_options());
let sample_ctx = ctx.with_sampling();
let after = scheme
.compress(compressor, &mut sample_data, sample_ctx)?
.nbytes();
let before = sample_data.array().nbytes();
let ratio = before as f64 / after as f64;
tracing::debug!("estimate_compression_ratio_with_sampling(compressor={scheme:#?}) = {ratio}",);
Ok(ratio)
}
impl fmt::Debug for DeferredEstimate {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
DeferredEstimate::Sample => write!(f, "Sample"),
DeferredEstimate::Callback(_) => write!(f, "Callback(..)"),
}
}
}