locustdb_compression_utils/xor_float/
single.rs1use bitbuffer::{BigEndian, BitWriteStream};
2
3pub fn encode(floats: &[f32], max_regret: u32, mantissa: Option<u32>) -> Vec<u8> {
4 let mut write_bytes = vec![];
5 let mut writer = BitWriteStream::new(&mut write_bytes, BigEndian);
6 writer.write_int(floats.len(), 64).unwrap();
7 match floats.first() {
8 Some(first) => writer.write_int(first.to_bits(), 32).unwrap(),
9 None => return write_bytes,
10 }
11 let mut last_value = floats[0];
12 let mut last_leading_zeros = 65;
13 let mut last_trailing_zeros = 65;
14 let mut last_significant_bits = 0;
15 let mut regret = 0;
16 let mask = match mantissa {
17 Some(mantissa) => {
18 assert!(mantissa <= 23, "f32 has at most 23 bits of mantissa");
19 u32::MAX - ((1 << (32 - mantissa)) - 1)
20 }
21 None => u32::MAX,
22 };
23 for &f in floats.iter().skip(1) {
24 let xor = f.to_bits() ^ last_value.to_bits() & mask;
25 let leading_zeros = xor.leading_zeros();
26 let trailing_zeros = xor.trailing_zeros();
27 if trailing_zeros == 32 {
28 writer.write_int(0, 1).unwrap();
29 } else {
30 let significant_bits = 32 - leading_zeros - trailing_zeros;
31 if leading_zeros >= last_leading_zeros
32 && trailing_zeros >= last_trailing_zeros
33 && (regret < max_regret || significant_bits == last_significant_bits)
34 {
35 writer.write_int(0b10, 2).unwrap();
36 let xor = xor >> last_trailing_zeros;
37 writer.write_int(xor, last_significant_bits as usize).unwrap();
38 regret += last_significant_bits - significant_bits;
39 } else {
40 last_leading_zeros = leading_zeros;
41 last_trailing_zeros = trailing_zeros;
42 last_significant_bits = significant_bits;
43 regret = 0;
44 writer.write_int(0b11, 2).unwrap();
45 writer.write_int(leading_zeros as u64, 5).unwrap();
46 writer.write_int(significant_bits as u64 - 1, 6).unwrap();
47 let xor = xor >> last_trailing_zeros;
48 writer.write_int(xor, significant_bits as usize).unwrap();
49 }
50 }
51 last_value = f;
52 }
53 write_bytes
54}
55
56
57pub fn verbose_encode(name: &str, floats: &[f32], max_regret: u32, mantissa: Option<u32>, verbose: bool) -> Vec<u8> {
58 let mut write_bytes = vec![];
59 let mut writer = BitWriteStream::new(&mut write_bytes, BigEndian);
60
61 let mask = match mantissa {
62 Some(mantissa) => {
63 assert!(mantissa <= 23, "f32 has at most 23 bits of mantissa");
64 let mask = u32::MAX - ((1 << (23 - mantissa)) - 1);
65 if verbose {
66 println!("MASK: {:032b}", mask);
67 }
68 mask
69 }
70 None => u32::MAX,
71 };
72
73 if verbose {
75 println!(
76 "\x1b[1m{:23}\x1b[0m \x1b[1m{:32}\x1b[0m \x1b[1m{:32}\x1b[0m \x1b[1m{:10}\x1b[0m",
77 "DECIMAL",
78 "BITS IN",
79 "XOR",
80 "BITS OUT"
81 );
82 println!(
83 "{:<23} {:32} {:32} {}",
84 floats[0],
85 format_f32_bits(floats[0].to_bits()),
86 "",
87 format_f32_bits(floats[0].to_bits())
88 );
89 }
90
91 writer.write_int(floats.len(), 64).unwrap();
92 writer.write_int(floats[0].to_bits(), 32).unwrap();
93 let mut last_value = floats[0];
94 let mut last_leading_zeros = 65;
95 let mut last_trailing_zeros = 65;
96 let mut last_significant_bits = 0;
97 let mut regret = 0;
98
99 for &f in floats.iter().skip(1) {
100 let xor = (f.to_bits() ^ last_value.to_bits()) & mask;
101
102 let leading_zeros = xor.leading_zeros().min(31);
103 let trailing_zeros = xor.trailing_zeros();
104
105 let mut bits_string = String::new();
106 if trailing_zeros == 32 {
107 writer.write_int(0, 1).unwrap();
108 bits_string.push_str("\x1b[1;31m0\x1b[0m");
109 } else {
110 let significant_bits = 32 - leading_zeros - trailing_zeros;
111 if leading_zeros >= last_leading_zeros && trailing_zeros >= last_trailing_zeros && (regret < max_regret || significant_bits == last_significant_bits) {
112 writer.write_int(0b10, 2).unwrap();
113 bits_string.push_str("\x1b[1;31m10\x1b[0m");
114 let xor = xor >> last_trailing_zeros;
115 writer.write_int(xor, last_significant_bits as usize).unwrap();
116 if verbose {
117 bits_string.push_str(&format!(
118 "\x1b[1;33m{:0width$b}\x1b[0m",
119 xor,
120 width = last_significant_bits as usize
121 ));
122 }
123 regret += last_significant_bits - significant_bits;
124 } else {
125 last_leading_zeros = leading_zeros;
126 last_trailing_zeros = trailing_zeros;
127 last_significant_bits = significant_bits;
128 regret = 0;
129
130 writer.write_int(0b11, 2).unwrap();
131 bits_string.push_str("\x1b[1;31m11\x1b[0m");
132 writer.write_int(leading_zeros as u64, 5).unwrap();
133 bits_string.push_str(&format!("\x1b[1;32m{:05b}\x1b[0m", leading_zeros));
134 writer.write_int(significant_bits as u64 - 1, 6).unwrap();
135 bits_string.push_str(&format!("\x1b[1;34m{:05b}\x1b[0m", significant_bits - 1));
136 let xor = xor >> last_trailing_zeros;
137 writer.write_int(xor, significant_bits as usize).unwrap();
138 if verbose {
139 bits_string.push_str(&format!(
140 "\x1b[1;33m{:0width$b}\x1b[0m",
141 xor,
142 width = significant_bits as usize
143 ));
144 }
145 }
146 }
147 if verbose {
148 println!(
149 "{:<23} {:32} {:32} {:32}",
150 f,
151 format_f32_bits(f.to_bits()),
152 format_f32_bits_highlight_remainder(xor),
153 bits_string
154 );
155 }
156 last_value = f;
157 }
158
159 let uncompressed_size = std::mem::size_of_val(floats) + 8;
161 println!(
162 "Compression ratio of {:.2} for {name} (max_regret={max_regret})",
163 uncompressed_size as f64 / write_bytes.len() as f64,
164 );
165 write_bytes
166}
167
168
169fn format_f32_bits(bits: u32) -> String {
170 let bits_str = format!("{:032b}", bits);
171 format!(
173 "\x1b[1;31m{}\x1b[0m\x1b[1;32m{}\x1b[0m\x1b[1;34m{}\x1b[0m",
174 &bits_str[0..1],
175 &bits_str[1..9],
176 &bits_str[9..]
177 )
178}
179
180fn format_f32_bits_highlight_remainder(bits: u32) -> String {
181 let mut bits_str = format!("{:032b}", bits);
182 if let (Some(start_bold), Some(end_bold)) = (bits_str.find('1'), bits_str.rfind('1')) {
184 bits_str = format!(
185 "{}\x1b[1;33m{}\x1b[0m{}",
186 &bits_str[..start_bold],
187 &bits_str[start_bold..end_bold + 1],
188 &bits_str[end_bold + 1..]
189 );
190 }
191 bits_str
192}