tree_buf/internal/types/
float.rs

1// Promising Compressors:
2// Gorilla - https://crates.io/crates/tsz   http://www.vldb.org/pvldb/vol8/p1816-teller.pdf
3// FPC
4// Akamuli - https://akumuli.org/akumuli/2017/02/05/compression_part2/
5// ? http://blog.omega-prime.co.uk/2016/01/25/compression-of-floating-point-timeseries/
6// https://www.cs.unc.edu/~isenburg/lcpfpv/
7// dfcm - https://userweb.cs.txstate.edu/~mb92/papers/dcc06.pdf
8
9// TODO: Lowerings
10// Interesting reading: https://internals.rust-lang.org/t/tryfrom-for-f64/9793/35
11// A useful starting point is that all possible down-cast through up-cast round trips
12// must preserve bit-for-bit the original value. That's not quite enough though, since this
13// is true for some values due to saturating rounding that one wouldn't want to downcast.
14// https://floating-point-gui.de/formats/fp/
15// f64 -> u64
16// f64 -> f32
17// f32 -> u32
18
19// TODO: More compressors
20
21macro_rules! impl_float {
22    ($T:ident, $id:ident) => {
23        //use crate::encodings::zfp;
24        use crate::prelude::*;
25        use num_traits::AsPrimitive as _;
26        use std::convert::TryInto;
27        use std::mem::size_of;
28        use std::vec::IntoIter;
29
30        // TODO: Check for lowering - f64 -> f63
31        #[cfg(feature = "encode")]
32        fn encode_item(item: $T, bytes: &mut Vec<u8>) {
33            let b = item.to_le_bytes();
34            bytes.extend_from_slice(&b);
35        }
36
37        #[cfg(feature = "decode")]
38        pub(super) fn decode_item(bytes: &[u8], offset: &mut usize) -> DecodeResult<$T> {
39            let bytes = decode_bytes(size_of::<$T>(), bytes, offset)?;
40            // This unwrap is ok, because we just read exactly size_of::<T> bytes on the line above.
41            Ok(<$T>::from_le_bytes(bytes.try_into().unwrap()))
42        }
43
44        #[cfg(feature = "encode")]
45        impl Encodable for $T {
46            type EncoderArray = Vec<$T>;
47            fn encode_root<O: EncodeOptions>(&self, stream: &mut EncoderStream<'_, O>) -> RootTypeId {
48                let value = *self;
49
50                // Check for positive sign so that -0.0 goes through
51                // the unhappy path but round-trips bit-for-bit
52                if value == 0.0 && value.is_sign_positive() {
53                    RootTypeId::Zero
54                } else if value == 1.0 {
55                    RootTypeId::One
56                } else if value == -1.0 {
57                    RootTypeId::NegOne
58                } else if value.is_nan() {
59                    // FIXME: Check for canonical NaN,
60                    // so that other NaN round trip bit-for-bit
61                    RootTypeId::NaN
62                } else {
63                    encode_item(value, stream.bytes);
64                    RootTypeId::$id
65                }
66            }
67        }
68
69        #[cfg(feature = "decode")]
70        impl Decodable for $T {
71            type DecoderArray = IntoIter<$T>;
72            fn decode(sticks: DynRootBranch<'_>, _options: &impl DecodeOptions) -> DecodeResult<Self> {
73                profile_method!(decode);
74                match sticks {
75                    DynRootBranch::Integer(root_integer) => {
76                        // FIXME: Fast and lose to get refactoring done. Double check here.
77                        // Also, float can express some (but not all) integers larger than MAX_SAFE_INT
78                        match root_integer {
79                            RootInteger::U(u) => {
80                                if u < (2 << std::$T::MANTISSA_DIGITS) {
81                                    Ok(u as $T)
82                                } else {
83                                    Err(DecodeError::SchemaMismatch)
84                                }
85                            }
86                            RootInteger::S(s) => {
87                                if s < (2 << std::$T::MANTISSA_DIGITS) && s > (-2 << (std::$T::MANTISSA_DIGITS - 1)) {
88                                    // FIXME: Made up number
89                                    Ok(s as $T)
90                                } else {
91                                    Err(DecodeError::SchemaMismatch)
92                                }
93                            }
94                        }
95                    }
96                    DynRootBranch::Float(root_float) => {
97                        match root_float {
98                            // FIXME: Macro here - should be schema mismatch for f64 -> f32
99                            RootFloat::F64(v) => Ok(v as $T),
100                            RootFloat::NaN => Ok(std::$T::NAN),
101                            // This should be safe to cast without loss of information.
102                            // Double-check that the meaning of various NaN values
103                            // is preserved though (signaling, non-signaling, etc)
104                            // https://stackoverflow.com/a/59795029/11837266
105                            RootFloat::F32(v) => Ok(v as $T),
106                        }
107                    }
108                    _ => Err(DecodeError::SchemaMismatch),
109                }
110            }
111        }
112
113        #[cfg(feature = "decode")]
114        impl InfallibleDecoderArray for IntoIter<$T> {
115            type Decode = $T;
116            fn new_infallible(sticks: DynArrayBranch<'_>, _options: &impl DecodeOptions) -> DecodeResult<Self> {
117                profile_method!(new_infallibe);
118
119                match sticks {
120                    DynArrayBranch::Float(float) => {
121                        match float {
122                            ArrayFloat::F64(bytes) => {
123                                profile_section!(array_f64);
124
125                                // FIXME: Should do schema mismatch for f32 -> f64
126                                let values = decode_all(&bytes, |bytes, offset| Ok(super::_f64::decode_item(bytes, offset)?.as_()))?;
127                                Ok(values.into_iter())
128                            }
129                            ArrayFloat::F32(bytes) => {
130                                profile_section!(array_f32);
131
132                                let values = decode_all(&bytes, |bytes, offset| Ok(super::_f32::decode_item(bytes, offset)?.as_()))?;
133                                Ok(values.into_iter())
134                            }
135                            ArrayFloat::DoubleGorilla(bytes) => gorilla::decompress::<$T>(&bytes).map(|f| f.into_iter()),
136                            /*
137                            ArrayFloat::Zfp32(bytes) => {
138                                // FIXME: This is likely a bug switching between 32 and 64 might just get garbage data out
139                                let values = zfp::decompress::<f32>(&bytes)?;
140                                // TODO: (Performance) unnecessary copy in some cases
141                                let values: Vec<_> = values.iter().map(|v| v.as_()).collect();
142                                Ok(values.into_iter())
143                            }
144                            ArrayFloat::Zfp64(bytes) => {
145                                let values = zfp::decompress::<f64>(&bytes)?;
146                                // TODO: (Performance) unnecessary copy in some cases
147                                let values: Vec<_> = values.iter().map(|v| v.as_()).collect();
148                                Ok(values.into_iter())
149                            }
150                            */
151                            ArrayFloat::Zfp32(_bytes) => unimplemented!("zfp32"),
152                            ArrayFloat::Zfp64(_bytes) => unimplemented!("zfp64"),
153                        }
154                    }
155                    // TODO: There are some conversions that are infallable.
156                    // Eg: Simple16.
157                    _ => Err(DecodeError::SchemaMismatch),
158                }
159            }
160            fn decode_next_infallible(&mut self) -> Self::Decode {
161                self.next().unwrap_or_default()
162            }
163        }
164
165        #[cfg(feature = "encode")]
166        impl EncoderArray<$T> for Vec<$T> {
167            fn buffer_one<'a, 'b: 'a>(&'a mut self, value: &'b $T) {
168                self.push(*value);
169            }
170            fn buffer_many<'a, 'b: 'a>(&'a mut self, values: &'b [$T]) {
171                profile_method!(buffer_many);
172                self.extend_from_slice(values);
173            }
174            fn encode_all<O: EncodeOptions>(values: &[$T], stream: &mut EncoderStream<'_, O>) -> ArrayTypeId {
175                profile_method!(encode_all);
176
177                // See also 558c24b8-dc75-4f08-8ea2-0f839af4da2e
178                let compressors = (
179                    Fixed, //Zfp,
180                    Gorilla,
181                );
182
183                compress(values, stream, &compressors)
184            }
185            fn flush<O: EncodeOptions>(self, stream: &mut EncoderStream<'_, O>) -> ArrayTypeId {
186                Self::encode_all(&self[..], stream)
187            }
188        }
189
190        impl PrimitiveEncoderArray<$T> for Vec<$T> {
191            fn fast_size_for_all<O: EncodeOptions>(values: &[$T], options: &O) -> usize {
192                // See also 558c24b8-dc75-4f08-8ea2-0f839af4da2e
193                let compressors = (
194                    Fixed, //Zfp,
195                    Gorilla,
196                );
197                fast_size_for(values, &compressors, options)
198            }
199        }
200
201        struct Fixed;
202        impl Compressor<$T> for Fixed {
203            fn fast_size_for<O: EncodeOptions>(&self, data: &[$T], _options: &O) -> Result<usize, ()> {
204                let arr_size = size_of::<$T>() * data.len();
205                Ok(arr_size + size_for_varint(arr_size as u64))
206            }
207            fn compress<O: EncodeOptions>(&self, data: &[$T], stream: &mut EncoderStream<'_, O>) -> Result<ArrayTypeId, ()> {
208                profile_method!(compress);
209                stream.encode_with_len(|stream| {
210                    for item in data {
211                        encode_item(*item, &mut stream.bytes);
212                    }
213                });
214                Ok(ArrayTypeId::$id)
215            }
216        }
217
218        struct Gorilla;
219        impl Compressor<$T> for Gorilla {
220            fn fast_size_for<O: EncodeOptions>(&self, data: &[$T], options: &O) -> Result<usize, ()> {
221                profile_method!(fast_size_for);
222
223                if let Some(tolerance) = options.lossy_float_tolerance() {
224                    // TODO: This is a hack (albeit a surprisingly effective one) to get lossy compression
225                    // before a real lossy compressor (Eg: fzip) is used.
226                    let multiplier = (2.0 as $T).powi(tolerance * -1);
227                    let data = data.iter().map(|f| ((f * multiplier).floor() / multiplier) as f64);
228                    gorilla::size_for(data)
229                } else {
230                    let data = data.iter().map(|f| *f as f64);
231                    gorilla::size_for(data)
232                }
233            }
234
235            fn compress<O: EncodeOptions>(&self, data: &[$T], stream: &mut EncoderStream<'_, O>) -> Result<ArrayTypeId, ()> {
236                profile_method!(compress);
237
238                stream.encode_with_len(|stream| {
239                    if let Some(tolerance) = stream.options.lossy_float_tolerance() {
240                        // TODO: This is a hack (albeit a surprisingly effective one) to get lossy compression
241                        // before a real lossy compressor (Eg: fzip) is used.
242                        let multiplier = (2.0 as $T).powi(tolerance * -1);
243                        let data = data.iter().map(|f| ((f * multiplier).floor() / multiplier) as f64);
244                        gorilla::compress(data, stream.bytes)
245                    } else {
246                        let data = data.iter().map(|f| *f as f64);
247                        gorilla::compress(data, stream.bytes)
248                    }
249                })
250            }
251        }
252    };
253}
254
255/*
256struct Zfp64 {
257    tolerance: Option<i32>,
258}
259impl Compressor<f64> for Zfp64 {
260    fn compress<O: EncodeOptions>(&self, data: &[f64], stream: &mut EncoderStream<'_, O>) -> Result<ArrayTypeId, ()> {
261        profile_method!(compress);
262        stream.encode_with_len(|stream| zfp::compress(data, &mut stream.bytes, self.tolerance));
263    }
264}
265
266struct Zfp32 {
267    tolerance: Option<i32>,
268}
269impl Compressor<f32> for Zfp32 {
270    fn compress<O: EncodeOptions>(&self, data: &[f32], stream: &mut EncoderStream<'_, O>) -> Result<ArrayTypeId, ()> {
271        profile_method!(compress);
272        stream.encode_with_len(|stream| zfp::compress(data, bytes, self.tolerance));
273    }
274}
275*/
276
277mod _f64 {
278    impl_float!(f64, F64);
279}
280mod _f32 {
281    impl_float!(f32, F32);
282}
tree_buf/internal/types/float.rs

tree_buf/internal/types/
float.rs