vortex_btrblocks/stats.rs
1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Compression statistics types.
5
6use std::fmt::Debug;
7
8use vortex_array::vtable::VTable;
9
10/// Configures how stats are generated.
11pub struct GenerateStatsOptions {
12 /// Should distinct values should be counted during stats generation.
13 pub count_distinct_values: bool,
14 // pub count_runs: bool,
15 // should this be scheme-specific?
16}
17
18impl Default for GenerateStatsOptions {
19 fn default() -> Self {
20 Self {
21 count_distinct_values: true,
22 // count_runs: true,
23 }
24 }
25}
26
27/// The size of each sampled run.
28pub(crate) const SAMPLE_SIZE: u32 = 64;
29/// The number of sampled runs.
30///
31/// # Warning
32///
33/// The product of SAMPLE_SIZE and SAMPLE_COUNT should be (roughly) a multiple of 1024 so that
34/// fastlanes bitpacking of sampled vectors does not introduce (large amounts of) padding.
35pub(crate) const SAMPLE_COUNT: u32 = 16;
36
37/// Stats for the compressor.
38pub trait CompressorStats: Debug + Clone {
39 /// The type of the underlying source array vtable.
40 type ArrayVTable: VTable;
41
42 /// Generates stats with default options.
43 fn generate(input: &<Self::ArrayVTable as VTable>::Array) -> Self {
44 Self::generate_opts(input, GenerateStatsOptions::default())
45 }
46
47 /// Generates stats with provided options.
48 fn generate_opts(
49 input: &<Self::ArrayVTable as VTable>::Array,
50 opts: GenerateStatsOptions,
51 ) -> Self;
52
53 /// Returns the underlying source array that statistics were generated from.
54 fn source(&self) -> &<Self::ArrayVTable as VTable>::Array;
55
56 /// Sample the array with default options.
57 fn sample(&self, sample_size: u32, sample_count: u32) -> Self {
58 self.sample_opts(sample_size, sample_count, GenerateStatsOptions::default())
59 }
60
61 /// Sample the array with provided options.
62 fn sample_opts(&self, sample_size: u32, sample_count: u32, opts: GenerateStatsOptions) -> Self;
63}