q_compress/data_types/mod.rs
1use std::fmt::{Debug, Display};
2use std::ops::{Add, BitAnd, BitOr, BitOrAssign, Div, Mul, RemAssign, Shl, Shr, Sub};
3
4use crate::bit_reader::BitReader;
5use crate::bit_writer::BitWriter;
6use crate::bits;
7use crate::errors::QCompressResult;
8
9pub use timestamps::{TimestampMicros, TimestampNanos};
10
11mod boolean;
12mod floats;
13mod signeds;
14mod timestamps;
15mod unsigneds;
16
17#[cfg(feature = "timestamps_96")]
18mod timestamps_96;
19#[cfg(feature = "timestamps_96")]
20pub use timestamps_96::{TimestampMicros96, TimestampNanos96};
21
22/// Trait for data types that behave like signed integers.
23///
24/// This is used for delta encoding/decoding; i.e. the difference
25/// between consecutive numbers must be a `SignedLike`.
26/// For example,
27/// * The deltas between consecutive `u64`s are `i64`.
28/// * The deltas between consecutive `i64`s are `i64`.
29/// * The deltas between consecutive timestamps are `i128`.
30/// * The deltas between consecutive `bool`s are `bool`s (basically 1 bit
31/// signed integers under XOR).
32///
33/// This is important because deltas like +1 and -1 are numerically close to
34/// each other and easily compressible, which would not be the case with
35/// unsigned integers.
36/// Note: API stability of `SignedLike` is not guaranteed.
37pub trait SignedLike: NumberLike<Signed = Self> {
38 const ZERO: Self;
39
40 fn wrapping_add(self, other: Self) -> Self;
41 fn wrapping_sub(self, other: Self) -> Self;
42}
43
44/// Trait for data types that behave like unsigned integers.
45///
46/// This is used extensively in `q_compress` to guarantee that bitwise
47/// operations like `>>` and `|=` are available and that certain properties
48/// hold.
49/// Under the hood, when numbers are encoded or decoded, they go through their
50/// corresponding `UnsignedLike` representation.
51///
52/// Note: API stability of `UnsignedLike` is not guaranteed.
53pub trait UnsignedLike:
54 Add<Output = Self>
55 + BitAnd<Output = Self>
56 + BitOr<Output = Self>
57 + BitOrAssign
58 + Copy
59 + Debug
60 + Display
61 + Div<Output = Self>
62 + Mul<Output = Self>
63 + Ord
64 + PartialOrd
65 + RemAssign
66 + Shl<usize, Output = Self>
67 + Shr<usize, Output = Self>
68 + Sub<Output = Self>
69{
70 const ZERO: Self;
71 const ONE: Self;
72 const MAX: Self;
73 const BITS: usize;
74
75 /// Converts a `usize` into this type. Panics if the conversion is
76 /// impossible.
77 fn from_word(word: usize) -> Self;
78
79 fn to_f64(self) -> f64;
80
81 /// Shifts the unsigned integer right and returns its lowest bits as a
82 /// `usize`.
83 /// For example,
84 /// ```
85 /// use q_compress::data_types::UnsignedLike;
86 /// assert_eq!(6_u8.rshift_word(1), 3_usize);
87 /// assert_eq!(((1_u128 << 100) + (1_u128 << 4)).rshift_word(1), 8_usize);
88 /// ```
89 ///
90 /// Used for some bit arithmetic operations during compression.
91 fn rshift_word(self, shift: usize) -> usize;
92
93 /// Shifts the unsigned integer left and returns its lowest bits as a
94 /// `usize`.
95 /// For example,
96 /// ```
97 /// use q_compress::data_types::UnsignedLike;
98 /// assert_eq!(6_u8.lshift_word(1), 12_usize);
99 /// assert_eq!(((1_u128 << 100) + (1_u128 << 4)).lshift_word(1), 32_usize);
100 /// ```
101 ///
102 /// Used for some bit arithmetic operations during compression.
103 fn lshift_word(self, shift: usize) -> usize;
104}
105
106/// Trait for data types supported for compression/decompression.
107///
108/// If you have a new data type you would like to add to the library or
109/// implement as custom in your own, these are the questions you need to
110/// answer:
111/// * What are the corresponding signed integer and unsigned integer types?
112/// These are usually the next-larger signed and unsigned integers.
113/// * How can I convert to these signed and unsigned representations and back
114/// in *a way that preserves ordering*? For instance, converting `f32` to `i32`
115/// can be done trivially by transmuting the bytes in memory, but converting
116/// from `f32`
117/// to `u32` in an order-preserving way requires flipping the sign bit and, if
118/// negative, the rest of the bits.
119/// * How can I encode and decode this number in an uncompressed way? This
120/// uncompressed representation is used to store metadata in each chunk of the
121/// Quantile Compression format.
122///
123/// Note: API stability of `NumberLike` is not guaranteed.
124pub trait NumberLike: Copy + Debug + Display + Default + PartialEq + 'static {
125 /// A number from 0-255 that corresponds to the number's data type.
126 ///
127 /// Each `NumberLike` implementation should have a different `HEADER_BYTE`.
128 /// This byte gets written into the file's header during compression, and
129 /// if the wrong header byte shows up during decompression, the decompressor
130 /// will return an error.
131 ///
132 /// To choose a header byte for a new data type, review all header bytes in
133 /// the library and pick an unused one. For instance, as of writing, bytes
134 /// 1 through 15 are used, so 16 would be a good choice for another
135 /// `q_compress`-supported data type, and 255 would be a good choice for a
136 /// custom data type.
137 const HEADER_BYTE: u8;
138 /// The number of bits in the number's uncompressed representation.
139 /// This must match the number of bytes in the `to_bytes` and `from_bytes`
140 /// implementations.
141 /// Note that booleans have 8 physical bits (not 1)
142 /// and timestamps have 96 (not 128).
143 const PHYSICAL_BITS: usize;
144
145 /// The signed integer this type can convert between to do wrapped
146 /// subtraction and addition for delta encoding/decoding.
147 /// Must be another `NumberLike` with the same `Signed` and `Unsigned` as
148 /// this type; in this way, if we take 7th order deltas, they are ensured to
149 /// have the same type as 1st order deltas.
150 type Signed: SignedLike + NumberLike<Signed = Self::Signed, Unsigned = Self::Unsigned>;
151 /// The unsigned integer this type can convert between to do
152 /// bitwise logic and such.
153 type Unsigned: UnsignedLike;
154
155 // TODO in 1.0 remove this
156 /// This is no longer important and will go away in a future release.
157 fn num_eq(&self, other: &Self) -> bool {
158 self.to_unsigned() == other.to_unsigned()
159 }
160
161 /// Used during compression to convert to an unsigned integer.
162 fn to_unsigned(self) -> Self::Unsigned;
163
164 /// Used during decompression to convert back from an unsigned integer.
165 fn from_unsigned(off: Self::Unsigned) -> Self;
166
167 /// Used during delta encoding to convert to a signed integer.
168 fn to_signed(self) -> Self::Signed;
169
170 /// Used during delta decoding to convert back from a signed integer.
171 fn from_signed(signed: Self::Signed) -> Self;
172
173 /// Returns an uncompressed representation for the number.
174 fn to_bytes(self) -> Vec<u8>;
175
176 /// Creates a number from an uncompressed representation.
177 fn from_bytes(bytes: &[u8]) -> QCompressResult<Self>;
178
179 /// Parses an uncompressed representation of the number from the
180 /// `BitReader`.
181 fn read_from(reader: &mut BitReader) -> QCompressResult<Self> {
182 let bools = reader.read(Self::PHYSICAL_BITS)?;
183 Self::from_bytes(&bits::bits_to_bytes(bools))
184 }
185
186 /// Appends an uncompressed representation of the number to the
187 /// `BitWriter`.
188 fn write_to(self, writer: &mut BitWriter) {
189 writer.write(&bits::bytes_to_bits(self.to_bytes()));
190 }
191}