locustdb_compression_utils/xor_float/
double.rs1use bitbuffer::{BitReadBuffer, BitReadStream, BitWriteStream, LittleEndian};
2
3use super::Error;
4
5pub fn encode(floats: &[f64], max_regret: u32, mantissa: Option<u32>) -> Vec<u8> {
6 let mut write_bytes = vec![];
7 let mut writer = BitWriteStream::new(&mut write_bytes, LittleEndian);
8 writer.write_int(floats.len() as u64, 64).unwrap();
9 match floats.first() {
10 Some(first) => writer.write_int(first.to_bits(), 64).unwrap(),
11 None => return write_bytes,
12 };
13 let mut last_value = floats[0];
14 let mut last_leading_zeros = 65;
15 let mut last_trailing_zeros = 65;
16 let mut last_significant_bits = 0;
17 let mut regret = 0;
18 let mask = match mantissa {
19 Some(mantissa) => {
20 assert!(mantissa <= 52, "f64 has at most 52 bits of mantissa");
21 u64::MAX - ((1 << (52 - mantissa)) - 1)
22 }
23 None => u64::MAX,
24 };
25 for &f in floats.iter().skip(1) {
26 let xor = (f.to_bits() ^ last_value.to_bits()) & mask;
27 let leading_zeros = xor.leading_zeros().min(31);
28 let trailing_zeros = xor.trailing_zeros();
29
30 if trailing_zeros == 64 {
31 writer.write_int(0, 1).unwrap();
32 } else {
33 let significant_bits = 64 - leading_zeros - trailing_zeros;
34 if leading_zeros >= last_leading_zeros
35 && trailing_zeros >= last_trailing_zeros
36 && (regret < max_regret || significant_bits == last_significant_bits)
37 {
38 writer.write_int(0b01, 2).unwrap();
41 let xor = xor >> last_trailing_zeros;
42 writer
43 .write_int(xor, last_significant_bits as usize)
44 .unwrap();
45 regret += last_significant_bits - significant_bits;
46 } else {
47 last_leading_zeros = leading_zeros;
48 last_trailing_zeros = trailing_zeros;
49 last_significant_bits = significant_bits;
50 regret = 0;
51 writer.write_int(0b11, 2).unwrap();
52 writer.write_int(leading_zeros, 5).unwrap();
53 writer.write_int(significant_bits - 1, 6).unwrap();
54 let xor = xor >> last_trailing_zeros;
55 writer.write_int(xor, significant_bits as usize).unwrap();
56 }
57 }
58 last_value = f;
59 }
60 write_bytes
61}
62
63pub fn decode(data: &[u8]) -> Result<Vec<f64>, Error> {
64 let buffer = BitReadBuffer::new(data, LittleEndian);
65 let mut reader = BitReadStream::new(buffer);
66 let length = reader.read_int::<u64>(64).map_err(|_| Error::Eof)? as usize;
67
68 let mut decoded = vec![f64::from_bits(0u64); length];
69
70 if length == 0 {
71 return Ok(decoded);
72 }
73
74 let first = reader.read_int(64).map_err(|_| Error::Eof)?;
75 decoded[0] = f64::from_bits(first);
76
77 let mut last = first;
78 let mut last_leading_zeros: u32;
79 let mut last_trailing_zeros = 65u32;
80 let mut last_significant_bits = 0;
81 for decoded in &mut decoded[1..length] {
82 match reader.read_int::<u8>(1).map_err(|_| Error::Eof)? {
83 0 => {
84 *decoded = f64::from_bits(last);
85 }
86 1 => {
87 if reader.read_int::<u8>(1).map_err(|_| Error::Eof)? == 1u8 {
88 last_leading_zeros = reader.read_int(5).map_err(|_| Error::Eof)?;
89 last_significant_bits = reader.read_int::<u32>(6).map_err(|_| Error::Eof)? + 1;
90 last_trailing_zeros = 64 - last_leading_zeros - last_significant_bits;
91 }
92 let xor: u64 = reader
93 .read_int(last_significant_bits as usize)
94 .map_err(|_| Error::Eof)?;
95 last ^= xor << last_trailing_zeros;
96 *decoded = f64::from_bits(last);
97 }
98 _ => {
99 return Err(Error::Eof);
100 }
101 }
102 }
103
104 Ok(decoded)
105}
106
107pub fn verbose_encode(
108 name: &str,
109 floats: &[f64],
110 max_regret: u32,
111 mantissa: Option<u32>,
112 verbose: bool,
113) -> Vec<u8> {
114 let mut write_bytes = vec![];
115 let mut writer = BitWriteStream::new(&mut write_bytes, LittleEndian);
116
117 let mask = match mantissa {
118 Some(mantissa) => {
119 assert!(mantissa <= 52, "f64 has at most 52 bits of mantissa");
120 let mask = u64::MAX - ((1 << (52 - mantissa)) - 1);
121 if verbose {
122 println!("MASK: {:064b}", mask);
123 }
124 mask
125 }
126 None => u64::MAX,
127 };
128
129 if verbose {
131 println!(
132 "\x1b[1m{:23}\x1b[0m \x1b[1m{:64}\x1b[0m \x1b[1m{:64}\x1b[0m \x1b[1m{:10}\x1b[0m",
133 "DECIMAL", "BITS IN", "XOR", "BITS OUT"
134 );
135 println!(
136 "{:<23} {:64} {:64} {}",
137 floats[0],
138 format_f64_bits(floats[0].to_bits()),
139 "",
140 format_f64_bits(floats[0].to_bits())
141 );
142 }
143
144 writer.write_int(floats.len(), 64).unwrap();
145 writer.write_int(floats[0].to_bits(), 64).unwrap();
146 let mut last_value = floats[0];
147 let mut last_leading_zeros = 65;
148 let mut last_trailing_zeros = 65;
149 let mut last_significant_bits = 0;
150 let mut regret = 0;
151
152 for &f in floats.iter().skip(1) {
153 let xor = (f.to_bits() ^ last_value.to_bits()) & mask;
154
155 let leading_zeros = xor.leading_zeros().min(31);
156 let trailing_zeros = xor.trailing_zeros();
157
158 let mut bits_string = String::new();
159 if trailing_zeros == 64 {
160 writer.write_int(0, 1).unwrap();
161 bits_string.push_str("\x1b[1;31m0\x1b[0m");
162 } else {
163 let significant_bits = 64 - leading_zeros - trailing_zeros;
164 if leading_zeros >= last_leading_zeros
165 && trailing_zeros >= last_trailing_zeros
166 && (regret < max_regret || significant_bits == last_significant_bits)
167 {
168 writer.write_int(0b01, 2).unwrap();
169 bits_string.push_str("\x1b[1;31m10\x1b[0m");
170 let xor = xor >> last_trailing_zeros;
171 writer
172 .write_int(xor, last_significant_bits as usize)
173 .unwrap();
174 if verbose {
175 bits_string.push_str(&format!(
176 "\x1b[1;33m{:0width$b}\x1b[0m",
177 xor,
178 width = last_significant_bits as usize
179 ));
180 }
181 regret += last_significant_bits - significant_bits;
182 } else {
183 last_leading_zeros = leading_zeros;
184 last_trailing_zeros = trailing_zeros;
185 last_significant_bits = significant_bits;
186 regret = 0;
187
188 writer.write_int(0b11, 2).unwrap();
189 bits_string.push_str("\x1b[1;31m11\x1b[0m");
190 writer.write_int(leading_zeros, 5).unwrap();
191 bits_string.push_str(&format!("\x1b[1;32m{:05b}\x1b[0m", leading_zeros));
192 writer.write_int(significant_bits - 1, 6).unwrap();
193 bits_string.push_str(&format!("\x1b[1;34m{:06b}\x1b[0m", significant_bits - 1));
194 let xor = xor >> last_trailing_zeros;
195 writer.write_int(xor, significant_bits as usize).unwrap();
196 if verbose {
197 bits_string.push_str(&format!(
198 "\x1b[1;33m{:0width$b}\x1b[0m",
199 xor,
200 width = significant_bits as usize
201 ));
202 }
203 }
204 }
205 if verbose {
206 println!(
207 "{:<23} {:64} {:64} {:32}",
208 f,
209 format_f64_bits(f.to_bits()),
210 format_f64_bits_highlight_remainder(xor),
211 bits_string
212 );
213 }
214 last_value = f;
215 }
216
217 let uncompressed_size = std::mem::size_of_val(floats) + 8;
219 println!(
220 "Compression ratio of {:.2} for {name} (max_regret={max_regret})",
221 uncompressed_size as f64 / write_bytes.len() as f64,
222 );
223 write_bytes
224}
225
226fn format_f64_bits(bits: u64) -> String {
227 let bits_str = format!("{:064b}", bits);
228 format!(
230 "\x1b[1;31m{}\x1b[0m\x1b[1;32m{}\x1b[0m\x1b[1;34m{}\x1b[0m",
231 &bits_str[0..1],
232 &bits_str[1..12],
233 &bits_str[12..]
234 )
235}
236
237fn format_f64_bits_highlight_remainder(bits: u64) -> String {
238 let mut bits_str = format!("{:064b}", bits);
239 if let (Some(start_bold), Some(end_bold)) = (bits_str.find('1'), bits_str.rfind('1')) {
241 bits_str = format!(
242 "{}\x1b[1;33m{}\x1b[0m{}",
243 &bits_str[..start_bold],
244 &bits_str[start_bold..end_bold + 1],
245 &bits_str[end_bold + 1..]
246 );
247 }
248 bits_str
249}
250
251#[cfg(test)]
252mod test {
253 use super::{decode, encode};
254 use crate::test_data::FLOATS;
255
256 #[test]
257 fn test_xor_float_encode_decode() {
258 for &(floats, _) in FLOATS {
259 for max_regret in [0, 30, 100, 1000] {
260 let encoded = encode(floats, max_regret, None);
261 let decoded = decode(&encoded).unwrap();
262 assert_eq!(floats, decoded);
263 }
264 }
265 }
266}