tsz_compress/v2/
mod.rs

1pub mod consts;
2pub mod decode;
3pub mod encode;
4pub mod halfvec;
5pub mod queue;
6pub use decode::*;
7pub use encode::*;
8pub use queue::*;
9
10///
11/// An enumeration representing the possible errors that can occur during the decoding process.
12///
13#[derive(Debug)]
14pub enum CodingError {
15    /// There is not enough data to decode a single value.
16    Empty,
17    /// There were not enough bits to finish decoding an expected value.
18    NotEnoughBits,
19    /// There were bits that indicated an invalid value.
20    InvalidBits,
21    /// The first column tag was invalid.
22    InvalidInitialColumnTag,
23    /// A non-first column tag was invalid.
24    InvalidColumnTag,
25    /// The number of rows decoded did not match the expected number of rows.
26    ColumnLengthMismatch(ColumnLengths),
27    /// The number of rows to decode cannot be valid
28    InvalidRowCount(usize),
29}
30
31///
32/// A struct representing the expected and actual lengths of columns in a data set.
33///
34#[derive(Debug)]
35pub struct ColumnLengths {
36    pub expected_rows: usize,
37    pub column_lengths: ::alloc::vec::Vec<usize>,
38}
39
40///
41/// High-level interface for compression.
42///
43pub trait TszCompressV2 {
44    /// The type of the row to compress.
45    type T: Copy;
46
47    ///
48    /// Initializes a new instance of the Compressor.
49    ///
50    /// `prealloc_rows` is a hint for initial capacity for internal buffers.
51    /// It is not a hard limit, just a guess at the number of rows that will be compressed.
52    ///
53    fn new(prealloc_rows: usize) -> Self;
54
55    ///
56    /// Lazily compress a row.
57    ///
58    fn compress(&mut self, row: Self::T);
59
60    ///
61    /// The number of bits that have been compressed.
62    /// This is an estimate, as the last few samples may have been emitted are estimated.
63    ///
64    fn len(&self) -> usize;
65
66    ///
67    /// Returns true if no bits have been compressed.
68    ///
69    fn is_empty(&self) -> bool {
70        self.len() == 0
71    }
72
73    ///
74    /// Return an estimate of bits per column value as the number of
75    /// compressed bits / count of column values compressed / columns per row.
76    ///
77    fn bit_rate(&self) -> usize;
78
79    ///
80    /// The number of rows that have been compressed.
81    /// This is an exact answer for rows consumed including rows that may not have been emitted.
82    ///
83    fn row_count(&self) -> usize;
84
85    ///
86    /// Consumes the compressor state, appending compressed bytes
87    /// to the provided buffer and reserving space if needed.
88    ///
89    /// Leaving the intermediate buffers in a reserved, cleared state.
90    ///
91    fn finish_into(&mut self, output_bytes: &mut ::alloc::vec::Vec<u8>);
92
93    ///
94    /// Convienence method to call `finish_into` compression and return the compressed bytes.
95    ///
96    fn finish(&mut self) -> ::alloc::vec::Vec<u8> {
97        let mut bytes = ::alloc::vec::Vec::new();
98        self.finish_into(&mut bytes);
99        bytes
100    }
101
102    ///
103    /// Consumes the compressor state the same was as `finish_into`, but
104    /// does so directly into a ThinVec
105    ///
106    #[cfg(feature = "thin-vec")]
107    fn finish_into_thin(&mut self, output_bytes: &mut ::thin_vec::ThinVec<u8>);
108
109    ///
110    /// Convenience method to call `finish_into_thin`
111    ///
112    #[cfg(feature = "thin-vec")]
113    fn finish_thin(&mut self) -> ::thin_vec::ThinVec<u8> {
114        let mut bytes = ::thin_vec::ThinVec::new();
115        self.finish_into_thin(&mut bytes);
116        bytes
117    }
118}
119
120///
121/// High-level interface for decompression.
122///
123pub trait TszDecompressV2 {
124    type T: Copy;
125
126    ///
127    /// Initializes a new instance of the Decompressor.
128    ///
129    fn new() -> Self;
130
131    ///
132    /// Decompress all of the rows into columnar buffers.
133    ///
134    /// This operation will not overwrite existing data in the buffers.
135    ///
136    /// # Arguments
137    /// * `bits` - The compressed data from a TszCmopressV2 instance.
138    ///
139    fn decompress(&mut self, bits: &[u8]) -> Result<(), CodingError>;
140
141    ///
142    /// Rotate the decompressed values into a vector of rows.
143    ///
144    /// If columnar data is desired, each implementation derived
145    /// via macro will include an accessor for each column vector by name.
146    ///
147    /// For example, if derived for a struct with fields `a: i8` and `b: i32`
148    /// then the following accessors will be generated:
149    ///
150    /// rust
151    /// fn col_a(&self) -> &[i8];
152    /// fn col_b(&self) -> &[i32];
153    ///
154    ///
155    /// # Returns
156    /// A vector of rows, where each row is a struct with the same fields as the original.
157    ///
158    fn rows(&self) -> ::alloc::vec::Vec<Self::T>;
159
160    ///
161    /// Clears the internal state of the decompressor.
162    ///
163    /// This is useful for reusing the decompressor instance for multiple decompression operations.
164    ///
165    fn clear(&mut self);
166}