Skip to main content

vortex_btrblocks/
stats.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Compression statistics types.
5
6use std::fmt::Debug;
7
8use vortex_array::vtable::VTable;
9
10/// Configures how stats are generated.
11pub struct GenerateStatsOptions {
12    /// Should distinct values should be counted during stats generation.
13    pub count_distinct_values: bool,
14    // pub count_runs: bool,
15    // should this be scheme-specific?
16}
17
18impl Default for GenerateStatsOptions {
19    fn default() -> Self {
20        Self {
21            count_distinct_values: true,
22            // count_runs: true,
23        }
24    }
25}
26
27/// The size of each sampled run.
28pub(crate) const SAMPLE_SIZE: u32 = 64;
29/// The number of sampled runs.
30///
31/// # Warning
32///
33/// The product of SAMPLE_SIZE and SAMPLE_COUNT should be (roughly) a multiple of 1024 so that
34/// fastlanes bitpacking of sampled vectors does not introduce (large amounts of) padding.
35pub(crate) const SAMPLE_COUNT: u32 = 16;
36
37/// Stats for the compressor.
38pub trait CompressorStats: Debug + Clone {
39    /// The type of the underlying source array vtable.
40    type ArrayVTable: VTable;
41
42    /// Generates stats with default options.
43    fn generate(input: &<Self::ArrayVTable as VTable>::Array) -> Self {
44        Self::generate_opts(input, GenerateStatsOptions::default())
45    }
46
47    /// Generates stats with provided options.
48    fn generate_opts(
49        input: &<Self::ArrayVTable as VTable>::Array,
50        opts: GenerateStatsOptions,
51    ) -> Self;
52
53    /// Returns the underlying source array that statistics were generated from.
54    fn source(&self) -> &<Self::ArrayVTable as VTable>::Array;
55
56    /// Sample the array with default options.
57    fn sample(&self, sample_size: u32, sample_count: u32) -> Self {
58        self.sample_opts(sample_size, sample_count, GenerateStatsOptions::default())
59    }
60
61    /// Sample the array with provided options.
62    fn sample_opts(&self, sample_size: u32, sample_count: u32, opts: GenerateStatsOptions) -> Self;
63}