Skip to main content

vortex_compressor/
ctx.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Compression context for recursive compression.
5
6use vortex_error::VortexExpect;
7
8use crate::scheme::SchemeId;
9use crate::stats::GenerateStatsOptions;
10
11// TODO(connor): Why is this 3??? This doesn't seem smart or adaptive.
12/// Maximum cascade depth for compression.
13pub const MAX_CASCADE: usize = 3;
14
15/// Context passed through recursive compression calls.
16///
17/// Tracks the cascade history (which schemes and child indices have been applied in the current
18/// chain) so the compressor can enforce exclusion rules and prevent cycles.
19#[derive(Debug, Clone)]
20pub struct CompressorContext {
21    /// Whether we're compressing a sample (for ratio estimation).
22    is_sample: bool,
23
24    /// Remaining cascade depth allowed.
25    allowed_cascading: usize,
26
27    /// Merged stats options from all eligible schemes at this compression site.
28    merged_stats_options: GenerateStatsOptions,
29
30    /// The cascade chain: `(scheme_id, child_index)` pairs from root to current depth.
31    /// Used for self-exclusion, push rules ([`descendant_exclusions`]), and pull rules
32    /// ([`ancestor_exclusions`]).
33    ///
34    /// [`descendant_exclusions`]: crate::scheme::Scheme::descendant_exclusions
35    /// [`ancestor_exclusions`]: crate::scheme::Scheme::ancestor_exclusions
36    cascade_history: Vec<(SchemeId, usize)>,
37}
38
39impl CompressorContext {
40    /// Creates a new `CompressorContext`.
41    ///
42    /// This should **only** be created by the compressor.
43    pub(super) fn new() -> Self {
44        Self {
45            is_sample: false,
46            allowed_cascading: MAX_CASCADE,
47            merged_stats_options: GenerateStatsOptions::default(),
48            cascade_history: Vec::new(),
49        }
50    }
51}
52
53#[cfg(test)]
54impl Default for CompressorContext {
55    fn default() -> Self {
56        Self::new()
57    }
58}
59
60impl CompressorContext {
61    /// Whether this context is for sample compression (ratio estimation).
62    pub fn is_sample(&self) -> bool {
63        self.is_sample
64    }
65
66    /// Returns the merged stats generation options for this compression site.
67    pub fn merged_stats_options(&self) -> GenerateStatsOptions {
68        self.merged_stats_options
69    }
70
71    /// Returns the cascade chain of `(scheme_id, child_index)` pairs.
72    pub fn cascade_history(&self) -> &[(SchemeId, usize)] {
73        &self.cascade_history
74    }
75
76    /// Whether cascading is exhausted (no further cascade levels allowed).
77    ///
78    /// This should only be used in the implementation of a [`Scheme`](crate::scheme::Scheme) if the
79    /// scheme knows that it's child _must_ be compressed for it to make any sense being chosen.
80    pub fn finished_cascading(&self) -> bool {
81        self.allowed_cascading == 0
82    }
83
84    /// Returns a context that disallows further cascading.
85    pub fn as_leaf(mut self) -> Self {
86        self.allowed_cascading = 0;
87        self
88    }
89
90    /// Returns a context with the given stats options.
91    pub(super) fn with_merged_stats_options(mut self, opts: GenerateStatsOptions) -> Self {
92        self.merged_stats_options = opts;
93        self
94    }
95
96    /// Returns a context marked as sample compression.
97    pub(super) fn with_sampling(mut self) -> Self {
98        self.is_sample = true;
99        self
100    }
101
102    /// Descends one level in the cascade, recording the current scheme and which child is
103    /// being compressed.
104    ///
105    /// The `child_index` identifies which child of the scheme is being compressed (e.g. for
106    /// Dict: values=0, codes=1).
107    pub(super) fn descend_with_scheme(mut self, id: SchemeId, child_index: usize) -> Self {
108        self.allowed_cascading = self
109            .allowed_cascading
110            .checked_sub(1)
111            .vortex_expect("cannot descend: cascade depth exhausted");
112        self.cascade_history.push((id, child_index));
113        self
114    }
115}