tsz_compress/v2/
encode.rs

1use core::fmt::Binary;
2use num_traits::PrimInt;
3
4use crate::prelude::*;
5use crate::v2::consts::headers;
6
7use super::halfvec::{HalfVec, HalfWord};
8
9///
10/// A trait for types that can be represented as bits.
11///
12/// This trait is for types that can be used in binary operations and have a constant size in bits.
13/// It provides methods for zigzag encoding, which is a form of variable-length encoding that
14/// efficiently encodes signed integers.
15///
16pub trait Bits: PrimInt + Binary {
17    const BITS: usize;
18
19    /// Language limitations prevent us from writing simple math expressions
20    /// ((self << 1) ^ self >> (Self::BITS - 1)) as u32
21    fn zigzag(self) -> usize;
22
23    /// Return the zigzag encoding and number of bits required to represent the value
24    #[inline(always)]
25    fn zigzag_bits(self) -> (usize, usize) {
26        let zbits = self.zigzag();
27        (zbits, (usize::BITS - zbits.leading_zeros()) as usize)
28    }
29}
30
31impl Bits for i8 {
32    const BITS: usize = 8;
33
34    #[inline(always)]
35    fn zigzag(self) -> usize {
36        ((self << 1) ^ self >> (Self::BITS - 1)) as u8 as usize
37    }
38}
39
40impl Bits for i16 {
41    const BITS: usize = 16;
42
43    #[inline(always)]
44    fn zigzag(self) -> usize {
45        ((self << 1) ^ self >> (Self::BITS - 1)) as u16 as usize
46    }
47}
48
49impl Bits for i32 {
50    const BITS: usize = 32;
51
52    #[inline(always)]
53    fn zigzag(self) -> usize {
54        ((self << 1) ^ self >> (Self::BITS - 1)) as u32 as usize
55    }
56}
57
58#[cfg(target_pointer_width = "64")]
59impl Bits for i64 {
60    const BITS: usize = 64;
61
62    #[inline(always)]
63    fn zigzag(self) -> usize {
64        ((self << 1) ^ self >> (Self::BITS - 1)) as u64 as usize
65    }
66}
67
68#[inline(always)]
69fn push_three_bits(q: &mut CompressionQueue<10>, buf: &mut HalfVec) {
70    const N: usize = 10;
71    const N1: usize = N - 1;
72    buf.push(HalfWord::Half(headers::THREE_BITS_TEN_SAMPLES));
73    let mut word: usize = 0;
74    let values = q.pop_n::<N>();
75    for value in values.iter().take(N1) {
76        word |= value;
77        word <<= 3;
78    }
79    word |= values[N1];
80    buf.push(HalfWord::Full(word as u32));
81}
82
83#[inline(always)]
84fn push_six_bits(q: &mut CompressionQueue<10>, buf: &mut HalfVec) {
85    const N: usize = 5;
86    const N1: usize = N - 1;
87    buf.push(HalfWord::Half(headers::SIX_BITS_FIVE_SAMPLES));
88    let mut word: usize = 0;
89    let values = q.pop_n::<N>();
90    for value in values.iter().take(N1) {
91        word |= value;
92        word <<= 6;
93    }
94    word |= values[N1];
95    buf.push(HalfWord::Full(word as u32));
96}
97
98#[inline(always)]
99fn push_eight_bits(q: &mut CompressionQueue<10>, buf: &mut HalfVec) {
100    const N: usize = 4;
101    const N1: usize = N - 1;
102    buf.push(HalfWord::Half(headers::EIGHT_BITS_FOUR_SAMPLES));
103    let mut word: usize = 0;
104    let values = q.pop_n::<N>();
105    for value in values.iter().take(N1) {
106        word |= value;
107        word <<= 8;
108    }
109    word |= values[N1];
110    buf.push(HalfWord::Full(word as u32));
111}
112
113#[inline(always)]
114fn push_ten_bits(q: &mut CompressionQueue<10>, buf: &mut HalfVec) {
115    const N: usize = 3;
116    const N1: usize = N - 1;
117    buf.push(HalfWord::Half(headers::TEN_BITS_THREE_SAMPLES));
118    let mut word: usize = 0b00 << 10;
119    let values = q.pop_n::<N>();
120    for value in values.iter().take(N1) {
121        word |= value;
122        word <<= 10;
123    }
124    word |= values[N1];
125    buf.push(HalfWord::Full(word as u32));
126}
127
128#[inline(always)]
129fn push_sixteen_bits(q: &mut CompressionQueue<10>, buf: &mut HalfVec) {
130    const N: usize = 2;
131    const N1: usize = N - 1;
132    buf.push(HalfWord::Half(headers::SIXTEEN_BITS_TWO_SAMPLES));
133    let mut word: usize = 0b00 << 10;
134    let values = q.pop_n::<N>();
135    for value in values.iter().take(N1) {
136        word |= value;
137        word <<= 16;
138    }
139    word |= values[N1];
140    buf.push(HalfWord::Full(word as u32));
141}
142
143#[inline(always)]
144unsafe fn push_32_or_64_bits(q: &mut CompressionQueue<10>, buf: &mut HalfVec) {
145    let value = q.pop().unwrap_unchecked();
146    if value <= u32::MAX as usize {
147        buf.push(HalfWord::Half(headers::THIRTY_TWO_BITS_ONE_SAMPLE));
148    } else {
149        buf.push(HalfWord::Half(headers::SIXTY_FOUR_BITS_ONE_SAMPLE));
150        buf.push(HalfWord::Full((value >> 32) as u32));
151    }
152    buf.push(HalfWord::Full(value as u32));
153}
154
155///
156/// A trait that emits bits according to the most efficient case of Delta Compression.
157///
158/// This trait provides methods for emitting bits and flushing the remaining bits in the queue.
159/// The methods return the number of elements popped from the queue.
160///
161pub trait EmitDeltaBits {
162    /// Emits bits according to the most efficient case of Delta Compression.
163    /// Returns the number of elements popped from the queue.
164    fn emit_delta_bits(&mut self, out: &mut HalfVec) -> usize;
165    fn flush_delta_bits(&mut self, out: &mut HalfVec) -> usize;
166}
167
168impl EmitDeltaBits for CompressionQueue<10> {
169    #[inline(always)]
170    fn emit_delta_bits(&mut self, out: &mut HalfVec) -> usize {
171        let mut fits = [true; 5];
172
173        // Check if the values will fit in the cases
174        let values = self.peak_bitcounts::<10>();
175        for (index, bits_required) in values.into_iter().enumerate() {
176            if (index < 2) & (bits_required > 16) {
177                fits[4] = false;
178            }
179            if (index < 3) & (bits_required > 10) {
180                fits[3] = false;
181            }
182            if (index < 4) & (bits_required > 8) {
183                fits[2] = false;
184            }
185            if (index < 5) & (bits_required > 6) {
186                fits[1] = false;
187            }
188            if (index < 10) & (bits_required > 3) {
189                fits[0] = false;
190            }
191        }
192
193        // Emit according to priority of cases
194        if fits[0] {
195            push_three_bits(self, out);
196            10
197        } else if fits[1] {
198            push_six_bits(self, out);
199            5
200        } else if fits[2] {
201            push_eight_bits(self, out);
202            4
203        } else if fits[3] {
204            push_ten_bits(self, out);
205            3
206        } else if fits[4] {
207            push_sixteen_bits(self, out);
208            2
209        } else {
210            unsafe {
211                push_32_or_64_bits(self, out);
212            }
213            1
214        }
215    }
216
217    #[inline(always)]
218    fn flush_delta_bits(&mut self, out: &mut HalfVec) -> usize {
219        let mut fits = [true; 5];
220
221        // Can not emit with any case of delta compression if queue is empty
222        if self.is_empty() {
223            return 0;
224        }
225
226        // Can not emit with case v of delta compression if number of samples < 10
227        if self.len() < 10 {
228            fits[0] = false;
229        }
230
231        // Can not emit with case iv of delta compression if number of samples < 5.
232        if self.len() < 5 {
233            fits[1] = false;
234        }
235
236        // Can not emit with case iii of delta compression if number of samples < 4
237        if self.len() < 4 {
238            fits[2] = false;
239        }
240
241        // Can not emit with case ii of delta compression if number of samples < 3
242        if self.len() < 3 {
243            fits[3] = false;
244        }
245
246        // Can not emit with case ii of delta compression if number of samples < 2
247        if self.len() < 2 {
248            fits[4] = false;
249        }
250
251        // Check if the values will fit in the cases
252        let values = self.peak_bitcounts::<10>();
253        for (index, bits_required) in values.into_iter().enumerate() {
254            if (index < 2) & (bits_required > 16) {
255                fits[4] = false;
256            }
257            if (index < 3) & (bits_required > 10) {
258                fits[3] = false;
259            }
260            if (index < 4) & (bits_required > 8) {
261                fits[2] = false;
262            }
263            if (index < 5) & (bits_required > 6) {
264                fits[1] = false;
265            }
266            if (index < 10) & (bits_required > 3) {
267                fits[0] = false;
268            }
269        }
270
271        // Emit according to priority of cases
272        if fits[0] {
273            push_three_bits(self, out);
274            10
275        } else if fits[1] {
276            push_six_bits(self, out);
277            5
278        } else if fits[2] {
279            push_eight_bits(self, out);
280            4
281        } else if fits[3] {
282            push_ten_bits(self, out);
283            3
284        } else if fits[4] {
285            push_sixteen_bits(self, out);
286            2
287        } else {
288            unsafe {
289                push_32_or_64_bits(self, out);
290            }
291            1
292        }
293    }
294}
295
296// Delta-Delta Encoding
297///
298/// A trait that provides method for emitting bits according to the most efficient case of Delta-Delta Compression.
299///
300pub trait EmitDeltaDeltaBits {
301    /// Emits bits according to the most efficient case of Delta-Delta Compression.
302    /// Returns the number of elements popped from the queue.
303    fn emit_delta_delta_bits(&mut self, out: &mut HalfVec) -> usize;
304}
305
306///
307/// A helper function that emits bits according to the most efficient case of Delta-Delta Compression.
308fn emit_popped_values<const N: usize>(
309    bitcounts: &[usize; N],
310    values: &[usize; N],
311    out: &mut HalfVec,
312) {
313    for (bits, value) in bitcounts.iter().zip(values.iter()) {
314        match bits {
315            0 => out.push(HalfWord::Half(0b0000)),
316            1..=5 => {
317                let zigzag = (value & 0b1_1111) as u8;
318                out.push(HalfWord::Byte(0b0010_0000 | zigzag));
319            }
320            6..=9 => {
321                let zigzag = (value & 0b1_1111_1111) as u16;
322                out.push(HalfWord::Half(0b0100 | (zigzag >> 8) as u8));
323                out.push(HalfWord::Byte(zigzag as u8));
324            }
325            10..=16 => {
326                let zigzag = (value & 0b1111_1111_1111_1111) as u16;
327                out.push(HalfWord::Half(0b0110));
328                out.push(HalfWord::Byte((zigzag >> 8) as u8));
329                out.push(HalfWord::Byte(zigzag as u8));
330            }
331            _ => {
332                out.push(HalfWord::Half(0b0111));
333                out.push(HalfWord::Full(*value as u32));
334            }
335        }
336    }
337}
338
339impl EmitDeltaDeltaBits for CompressionQueue<2> {
340    fn emit_delta_delta_bits(&mut self, out: &mut HalfVec) -> usize {
341        match self.len() {
342            2 => {
343                let bitcounts = self.peak_bitcounts::<2>();
344                let values = self.pop_n::<2>();
345                emit_popped_values(&bitcounts, &values, out);
346                2
347            }
348            1 => {
349                let bitcounts = self.peak_bitcounts::<1>();
350                let values = self.pop_n::<1>();
351                emit_popped_values(&bitcounts, &values, out);
352                1
353            }
354            _ => 0,
355        }
356    }
357}
358
359///
360/// Writes a 128-bit integer to a HalfVec.
361///
362/// This function takes a mutable reference to a HalfVec and a 128-bit integer.
363/// It converts the integer to a 128-bit unsigned integer and pushes it to the HalfVec in 32-bit chunks.
364///
365pub fn write_i128_bits(buf: &mut HalfVec, i: i128) {
366    let i = i as u128;
367    buf.push(HalfWord::Full((i >> 96) as u32));
368    buf.push(HalfWord::Full((i >> 64) as u32));
369    buf.push(HalfWord::Full((i >> 32) as u32));
370    buf.push(HalfWord::Full(i as u32));
371}
372
373///
374/// Writes a 64-bit integer to a HalfVec.
375///
376/// This function takes a mutable reference to a HalfVec and a 64-bit integer.
377/// It converts the integer to a 64-bit unsigned integer and pushes it to the HalfVec in 32-bit chunks.
378///
379pub fn write_i64_bits(buf: &mut HalfVec, i: i64) {
380    let i = i as u64;
381    buf.push(HalfWord::Full((i >> 32) as u32));
382    buf.push(HalfWord::Full(i as u32));
383}
384
385///
386/// Writes a 32-bit integer to a HalfVec.
387///
388/// This function takes a mutable reference to a HalfVec and a 32-bit integer.
389/// It pushes the integer to the HalfVec as a 32-bit unsigned integer.
390///
391pub fn write_i32_bits(buf: &mut HalfVec, i: i32) {
392    buf.push(HalfWord::Full(i as u32));
393}
394
395///
396/// Writes a 16-bit integer to a HalfVec.
397///
398/// This function takes a mutable reference to a HalfVec and a 16-bit integer.
399/// It converts the integer to a 16-bit unsigned integer and pushes it to the HalfVec in 8-bit chunks.
400///
401pub fn write_i16_bits(buf: &mut HalfVec, i: i16) {
402    let i = i as u16;
403    buf.push(HalfWord::Byte((i >> 8) as u8));
404    buf.push(HalfWord::Byte(i as u8));
405}
406
407///
408/// Writes an 8-bit integer to a HalfVec.
409///
410/// This function takes a mutable reference to a HalfVec and an 8-bit integer.
411/// It pushes the integer to the HalfVec as an 8-bit unsigned integer.
412///
413pub fn write_i8_bits(buf: &mut HalfVec, i: i8) {
414    buf.push(HalfWord::Byte(i as u8));
415}