vortex_compressor/ctx.rs
1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Compression context for recursive compression.
5
6use vortex_error::VortexExpect;
7
8use crate::scheme::SchemeId;
9use crate::stats::GenerateStatsOptions;
10
11// TODO(connor): Why is this 3??? This doesn't seem smart or adaptive.
12/// Maximum cascade depth for compression.
13pub const MAX_CASCADE: usize = 3;
14
15/// Context passed through recursive compression calls.
16///
17/// Tracks the cascade history (which schemes and child indices have been applied in the current
18/// chain) so the compressor can enforce exclusion rules and prevent cycles.
19#[derive(Debug, Clone)]
20pub struct CompressorContext {
21 /// Whether we're compressing a sample (for ratio estimation).
22 is_sample: bool,
23
24 /// Remaining cascade depth allowed.
25 allowed_cascading: usize,
26
27 /// Merged stats options from all eligible schemes at this compression site.
28 merged_stats_options: GenerateStatsOptions,
29
30 /// The cascade chain: `(scheme_id, child_index)` pairs from root to current depth.
31 /// Used for self-exclusion, push rules ([`descendant_exclusions`]), and pull rules
32 /// ([`ancestor_exclusions`]).
33 ///
34 /// [`descendant_exclusions`]: crate::scheme::Scheme::descendant_exclusions
35 /// [`ancestor_exclusions`]: crate::scheme::Scheme::ancestor_exclusions
36 cascade_history: Vec<(SchemeId, usize)>,
37}
38
39impl CompressorContext {
40 /// Creates a new `CompressorContext`.
41 ///
42 /// This should **only** be created by the compressor.
43 pub(super) fn new() -> Self {
44 Self {
45 is_sample: false,
46 allowed_cascading: MAX_CASCADE,
47 merged_stats_options: GenerateStatsOptions::default(),
48 cascade_history: Vec::new(),
49 }
50 }
51}
52
53#[cfg(test)]
54impl Default for CompressorContext {
55 fn default() -> Self {
56 Self::new()
57 }
58}
59
60impl CompressorContext {
61 /// Whether this context is for sample compression (ratio estimation).
62 pub fn is_sample(&self) -> bool {
63 self.is_sample
64 }
65
66 /// Returns the merged stats generation options for this compression site.
67 pub fn merged_stats_options(&self) -> GenerateStatsOptions {
68 self.merged_stats_options
69 }
70
71 /// Returns the cascade chain of `(scheme_id, child_index)` pairs.
72 pub fn cascade_history(&self) -> &[(SchemeId, usize)] {
73 &self.cascade_history
74 }
75
76 /// Whether cascading is exhausted (no further cascade levels allowed).
77 ///
78 /// This should only be used in the implementation of a [`Scheme`](crate::scheme::Scheme) if the
79 /// scheme knows that it's child _must_ be compressed for it to make any sense being chosen.
80 pub fn finished_cascading(&self) -> bool {
81 self.allowed_cascading == 0
82 }
83
84 /// Returns a context that disallows further cascading.
85 pub fn as_leaf(mut self) -> Self {
86 self.allowed_cascading = 0;
87 self
88 }
89
90 /// Returns a context with the given stats options.
91 pub(super) fn with_merged_stats_options(mut self, opts: GenerateStatsOptions) -> Self {
92 self.merged_stats_options = opts;
93 self
94 }
95
96 /// Returns a context marked as sample compression.
97 pub(super) fn with_sampling(mut self) -> Self {
98 self.is_sample = true;
99 self
100 }
101
102 /// Descends one level in the cascade, recording the current scheme and which child is
103 /// being compressed.
104 ///
105 /// The `child_index` identifies which child of the scheme is being compressed (e.g. for
106 /// Dict: values=0, codes=1).
107 pub(super) fn descend_with_scheme(mut self, id: SchemeId, child_index: usize) -> Self {
108 self.allowed_cascading = self
109 .allowed_cascading
110 .checked_sub(1)
111 .vortex_expect("cannot descend: cascade depth exhausted");
112 self.cascade_history.push((id, child_index));
113 self
114 }
115}