q_compress/data_types/
mod.rs

1use std::fmt::{Debug, Display};
2use std::ops::{Add, BitAnd, BitOr, BitOrAssign, Div, Mul, RemAssign, Shl, Shr, Sub};
3
4use crate::bit_reader::BitReader;
5use crate::bit_writer::BitWriter;
6use crate::bits;
7use crate::errors::QCompressResult;
8
9pub use timestamps::{TimestampMicros, TimestampNanos};
10
11mod boolean;
12mod floats;
13mod signeds;
14mod timestamps;
15mod unsigneds;
16
17#[cfg(feature = "timestamps_96")]
18mod timestamps_96;
19#[cfg(feature = "timestamps_96")]
20pub use timestamps_96::{TimestampMicros96, TimestampNanos96};
21
22/// Trait for data types that behave like signed integers.
23///
24/// This is used for delta encoding/decoding; i.e. the difference
25/// between consecutive numbers must be a `SignedLike`.
26/// For example,
27/// * The deltas between consecutive `u64`s are `i64`.
28/// * The deltas between consecutive `i64`s are `i64`.
29/// * The deltas between consecutive timestamps are `i128`.
30/// * The deltas between consecutive `bool`s are `bool`s (basically 1 bit
31/// signed integers under XOR).
32///
33/// This is important because deltas like +1 and -1 are numerically close to
34/// each other and easily compressible, which would not be the case with
35/// unsigned integers.
36/// Note: API stability of `SignedLike` is not guaranteed.
37pub trait SignedLike: NumberLike<Signed = Self> {
38  const ZERO: Self;
39
40  fn wrapping_add(self, other: Self) -> Self;
41  fn wrapping_sub(self, other: Self) -> Self;
42}
43
44/// Trait for data types that behave like unsigned integers.
45///
46/// This is used extensively in `q_compress` to guarantee that bitwise
47/// operations like `>>` and `|=` are available and that certain properties
48/// hold.
49/// Under the hood, when numbers are encoded or decoded, they go through their
50/// corresponding `UnsignedLike` representation.
51///
52/// Note: API stability of `UnsignedLike` is not guaranteed.
53pub trait UnsignedLike:
54  Add<Output = Self>
55  + BitAnd<Output = Self>
56  + BitOr<Output = Self>
57  + BitOrAssign
58  + Copy
59  + Debug
60  + Display
61  + Div<Output = Self>
62  + Mul<Output = Self>
63  + Ord
64  + PartialOrd
65  + RemAssign
66  + Shl<usize, Output = Self>
67  + Shr<usize, Output = Self>
68  + Sub<Output = Self>
69{
70  const ZERO: Self;
71  const ONE: Self;
72  const MAX: Self;
73  const BITS: usize;
74
75  /// Converts a `usize` into this type. Panics if the conversion is
76  /// impossible.
77  fn from_word(word: usize) -> Self;
78
79  fn to_f64(self) -> f64;
80
81  /// Shifts the unsigned integer right and returns its lowest bits as a
82  /// `usize`.
83  /// For example,
84  /// ```
85  /// use q_compress::data_types::UnsignedLike;
86  /// assert_eq!(6_u8.rshift_word(1), 3_usize);
87  /// assert_eq!(((1_u128 << 100) + (1_u128 << 4)).rshift_word(1), 8_usize);
88  /// ```
89  ///
90  /// Used for some bit arithmetic operations during compression.
91  fn rshift_word(self, shift: usize) -> usize;
92
93  /// Shifts the unsigned integer left and returns its lowest bits as a
94  /// `usize`.
95  /// For example,
96  /// ```
97  /// use q_compress::data_types::UnsignedLike;
98  /// assert_eq!(6_u8.lshift_word(1), 12_usize);
99  /// assert_eq!(((1_u128 << 100) + (1_u128 << 4)).lshift_word(1), 32_usize);
100  /// ```
101  ///
102  /// Used for some bit arithmetic operations during compression.
103  fn lshift_word(self, shift: usize) -> usize;
104}
105
106/// Trait for data types supported for compression/decompression.
107///
108/// If you have a new data type you would like to add to the library or
109/// implement as custom in your own, these are the questions you need to
110/// answer:
111/// * What are the corresponding signed integer and unsigned integer types?
112/// These are usually the next-larger signed and unsigned integers.
113/// * How can I convert to these signed and unsigned representations and back
114/// in *a way that preserves ordering*? For instance, converting `f32` to `i32`
115/// can be done trivially by transmuting the bytes in memory, but converting
116/// from `f32`
117/// to `u32` in an order-preserving way requires flipping the sign bit and, if
118/// negative, the rest of the bits.
119/// * How can I encode and decode this number in an uncompressed way? This
120/// uncompressed representation is used to store metadata in each chunk of the
121/// Quantile Compression format.
122///
123/// Note: API stability of `NumberLike` is not guaranteed.
124pub trait NumberLike: Copy + Debug + Display + Default + PartialEq + 'static {
125  /// A number from 0-255 that corresponds to the number's data type.
126  ///
127  /// Each `NumberLike` implementation should have a different `HEADER_BYTE`.
128  /// This byte gets written into the file's header during compression, and
129  /// if the wrong header byte shows up during decompression, the decompressor
130  /// will return an error.
131  ///
132  /// To choose a header byte for a new data type, review all header bytes in
133  /// the library and pick an unused one. For instance, as of writing, bytes
134  /// 1 through 15 are used, so 16 would be a good choice for another
135  /// `q_compress`-supported data type, and 255 would be a good choice for a
136  /// custom data type.
137  const HEADER_BYTE: u8;
138  /// The number of bits in the number's uncompressed representation.
139  /// This must match the number of bytes in the `to_bytes` and `from_bytes`
140  /// implementations.
141  /// Note that booleans have 8 physical bits (not 1)
142  /// and timestamps have 96 (not 128).
143  const PHYSICAL_BITS: usize;
144
145  /// The signed integer this type can convert between to do wrapped
146  /// subtraction and addition for delta encoding/decoding.
147  /// Must be another `NumberLike` with the same `Signed` and `Unsigned` as
148  /// this type; in this way, if we take 7th order deltas, they are ensured to
149  /// have the same type as 1st order deltas.
150  type Signed: SignedLike + NumberLike<Signed = Self::Signed, Unsigned = Self::Unsigned>;
151  /// The unsigned integer this type can convert between to do
152  /// bitwise logic and such.
153  type Unsigned: UnsignedLike;
154
155  // TODO in 1.0 remove this
156  /// This is no longer important and will go away in a future release.
157  fn num_eq(&self, other: &Self) -> bool {
158    self.to_unsigned() == other.to_unsigned()
159  }
160
161  /// Used during compression to convert to an unsigned integer.
162  fn to_unsigned(self) -> Self::Unsigned;
163
164  /// Used during decompression to convert back from an unsigned integer.
165  fn from_unsigned(off: Self::Unsigned) -> Self;
166
167  /// Used during delta encoding to convert to a signed integer.
168  fn to_signed(self) -> Self::Signed;
169
170  /// Used during delta decoding to convert back from a signed integer.
171  fn from_signed(signed: Self::Signed) -> Self;
172
173  /// Returns an uncompressed representation for the number.
174  fn to_bytes(self) -> Vec<u8>;
175
176  /// Creates a number from an uncompressed representation.
177  fn from_bytes(bytes: &[u8]) -> QCompressResult<Self>;
178
179  /// Parses an uncompressed representation of the number from the
180  /// `BitReader`.
181  fn read_from(reader: &mut BitReader) -> QCompressResult<Self> {
182    let bools = reader.read(Self::PHYSICAL_BITS)?;
183    Self::from_bytes(&bits::bits_to_bytes(bools))
184  }
185
186  /// Appends an uncompressed representation of the number to the
187  /// `BitWriter`.
188  fn write_to(self, writer: &mut BitWriter) {
189    writer.write(&bits::bytes_to_bits(self.to_bytes()));
190  }
191}