Skip to main content

vortex_btrblocks/schemes/integer/
zigzag.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! ZigZag integer encoding for signed integers.
5
6use vortex_array::ArrayRef;
7use vortex_array::Canonical;
8use vortex_array::ExecutionCtx;
9use vortex_array::IntoArray;
10use vortex_array::arrays::PrimitiveArray;
11use vortex_compressor::builtins::BinaryDictScheme;
12use vortex_compressor::builtins::FloatDictScheme;
13use vortex_compressor::builtins::IntDictScheme;
14use vortex_compressor::builtins::StringDictScheme;
15use vortex_compressor::estimate::CompressionEstimate;
16use vortex_compressor::estimate::DeferredEstimate;
17use vortex_compressor::estimate::EstimateVerdict;
18use vortex_compressor::scheme::AncestorExclusion;
19use vortex_compressor::scheme::ChildSelection;
20use vortex_compressor::scheme::DescendantExclusion;
21use vortex_error::VortexResult;
22use vortex_zigzag::ZigZag;
23use vortex_zigzag::ZigZagArrayExt;
24use vortex_zigzag::zigzag_encode;
25
26use super::RunEndScheme;
27use super::SparseScheme;
28use crate::ArrayAndStats;
29use crate::CascadingCompressor;
30use crate::CompressorContext;
31use crate::Scheme;
32use crate::SchemeExt;
33
34/// ZigZag encoding for negative integers.
35#[derive(Debug, Copy, Clone, PartialEq, Eq)]
36pub struct ZigZagScheme;
37
38impl Scheme for ZigZagScheme {
39    fn scheme_name(&self) -> &'static str {
40        "vortex.int.zigzag"
41    }
42
43    fn matches(&self, canonical: &Canonical) -> bool {
44        canonical.dtype().is_int()
45    }
46
47    /// Children: encoded=0.
48    fn num_children(&self) -> usize {
49        1
50    }
51
52    /// ZigZag is a bijective value transform that preserves cardinality, run patterns, and value
53    /// dominance. If Dict, RunEnd, or Sparse lost on the original array, they will lose on ZigZag's
54    /// output too, so we skip evaluating them.
55    fn descendant_exclusions(&self) -> Vec<DescendantExclusion> {
56        vec![
57            DescendantExclusion {
58                excluded: IntDictScheme.id(),
59                children: ChildSelection::All,
60            },
61            DescendantExclusion {
62                excluded: RunEndScheme.id(),
63                children: ChildSelection::All,
64            },
65            DescendantExclusion {
66                excluded: SparseScheme.id(),
67                children: ChildSelection::All,
68            },
69        ]
70    }
71
72    /// Dict codes are unsigned integers (0..cardinality). ZigZag only helps negatives.
73    fn ancestor_exclusions(&self) -> Vec<AncestorExclusion> {
74        vec![
75            AncestorExclusion {
76                ancestor: IntDictScheme.id(),
77                children: ChildSelection::One(1),
78            },
79            AncestorExclusion {
80                ancestor: FloatDictScheme.id(),
81                children: ChildSelection::One(1),
82            },
83            AncestorExclusion {
84                ancestor: StringDictScheme.id(),
85                children: ChildSelection::One(1),
86            },
87            AncestorExclusion {
88                ancestor: BinaryDictScheme.id(),
89                children: ChildSelection::One(1),
90            },
91        ]
92    }
93
94    fn expected_compression_ratio(
95        &self,
96        data: &ArrayAndStats,
97        compress_ctx: CompressorContext,
98        exec_ctx: &mut ExecutionCtx,
99    ) -> CompressionEstimate {
100        // ZigZag only transforms negative values to positive. Without further compression,
101        // the output is the same size.
102        if compress_ctx.finished_cascading() {
103            return CompressionEstimate::Verdict(EstimateVerdict::Skip);
104        }
105        let stats = data.integer_stats(exec_ctx);
106
107        // ZigZag is only useful when there are negative values.
108        if !stats.erased().min_is_negative() {
109            return CompressionEstimate::Verdict(EstimateVerdict::Skip);
110        }
111
112        CompressionEstimate::Deferred(DeferredEstimate::Sample)
113    }
114
115    fn compress(
116        &self,
117        compressor: &CascadingCompressor,
118        data: &ArrayAndStats,
119        compress_ctx: CompressorContext,
120        exec_ctx: &mut ExecutionCtx,
121    ) -> VortexResult<ArrayRef> {
122        // Zigzag encode the values, then recursively compress the inner values.
123        let zag = zigzag_encode(data.array_as_primitive())?;
124        let encoded = zag.encoded().clone().execute::<PrimitiveArray>(exec_ctx)?;
125
126        let compressed = compressor.compress_child(
127            &encoded.into_array(),
128            &compress_ctx,
129            self.id(),
130            0,
131            exec_ctx,
132        )?;
133
134        Ok(ZigZag::try_new(compressed)?.into_array())
135    }
136}