locustdb_compression_utils/xor_float/
single.rs

1use bitbuffer::{BigEndian, BitWriteStream};
2
3pub fn encode(floats: &[f32], max_regret: u32, mantissa: Option<u32>) -> Vec<u8> {
4    let mut write_bytes = vec![];
5    let mut writer = BitWriteStream::new(&mut write_bytes, BigEndian);
6    writer.write_int(floats.len(), 64).unwrap();
7    match floats.first() {
8        Some(first) => writer.write_int(first.to_bits(), 32).unwrap(),
9        None => return write_bytes,
10    }
11    let mut last_value = floats[0];
12    let mut last_leading_zeros = 65;
13    let mut last_trailing_zeros = 65;
14    let mut last_significant_bits = 0;
15    let mut regret = 0;
16    let mask = match mantissa {
17        Some(mantissa) => {
18            assert!(mantissa <= 23, "f32 has at most 23 bits of mantissa");
19            u32::MAX - ((1 << (32 - mantissa)) - 1)
20        }
21        None => u32::MAX,
22    };
23    for &f in floats.iter().skip(1) {
24        let xor = f.to_bits() ^ last_value.to_bits() & mask;
25        let leading_zeros = xor.leading_zeros();
26        let trailing_zeros = xor.trailing_zeros();
27        if trailing_zeros == 32 {
28            writer.write_int(0, 1).unwrap();
29        } else {
30            let significant_bits = 32 - leading_zeros - trailing_zeros;
31            if leading_zeros >= last_leading_zeros
32                && trailing_zeros >= last_trailing_zeros
33                && (regret < max_regret || significant_bits == last_significant_bits)
34            {
35                writer.write_int(0b10, 2).unwrap();
36                let xor = xor >> last_trailing_zeros;
37                writer.write_int(xor, last_significant_bits as usize).unwrap();
38                regret += last_significant_bits - significant_bits;
39            } else {
40                last_leading_zeros = leading_zeros;
41                last_trailing_zeros = trailing_zeros;
42                last_significant_bits = significant_bits;
43                regret = 0;
44                writer.write_int(0b11, 2).unwrap();
45                writer.write_int(leading_zeros as u64, 5).unwrap();
46                writer.write_int(significant_bits as u64 - 1, 6).unwrap();
47                let xor = xor >> last_trailing_zeros;
48                writer.write_int(xor, significant_bits as usize).unwrap();
49            }
50        }
51        last_value = f;
52    }
53    write_bytes
54}
55
56
57pub fn verbose_encode(name: &str, floats: &[f32], max_regret: u32, mantissa: Option<u32>, verbose: bool) -> Vec<u8> {
58    let mut write_bytes = vec![];
59    let mut writer = BitWriteStream::new(&mut write_bytes, BigEndian);
60
61    let mask = match mantissa {
62        Some(mantissa) => {
63            assert!(mantissa <= 23, "f32 has at most 23 bits of mantissa");
64            let mask = u32::MAX - ((1 << (23 - mantissa)) - 1);
65            if verbose {
66                println!("MASK: {:032b}", mask);
67            }
68            mask
69        }
70        None => u32::MAX,
71    };
72
73    // ANSI bold escape code
74    if verbose {
75        println!(
76            "\x1b[1m{:23}\x1b[0m  \x1b[1m{:32}\x1b[0m  \x1b[1m{:32}\x1b[0m  \x1b[1m{:10}\x1b[0m",
77            "DECIMAL",
78            "BITS IN",
79            "XOR",
80            "BITS OUT"
81        );
82        println!(
83            "{:<23}  {:32}  {:32}  {}",
84            floats[0],
85            format_f32_bits(floats[0].to_bits()),
86            "",
87            format_f32_bits(floats[0].to_bits())
88        );
89    }
90
91    writer.write_int(floats.len(), 64).unwrap();
92    writer.write_int(floats[0].to_bits(), 32).unwrap();
93    let mut last_value = floats[0];
94    let mut last_leading_zeros = 65;
95    let mut last_trailing_zeros = 65;
96    let mut last_significant_bits = 0;
97    let mut regret = 0;
98
99    for &f in floats.iter().skip(1) {
100        let xor = (f.to_bits() ^ last_value.to_bits()) & mask;
101
102        let leading_zeros = xor.leading_zeros().min(31);
103        let trailing_zeros = xor.trailing_zeros();
104
105        let mut bits_string = String::new();
106        if trailing_zeros == 32 {
107            writer.write_int(0, 1).unwrap();
108            bits_string.push_str("\x1b[1;31m0\x1b[0m");
109        } else {
110            let significant_bits = 32 - leading_zeros - trailing_zeros;
111            if leading_zeros >= last_leading_zeros && trailing_zeros >= last_trailing_zeros && (regret < max_regret || significant_bits == last_significant_bits) {
112                writer.write_int(0b10, 2).unwrap();
113                bits_string.push_str("\x1b[1;31m10\x1b[0m");
114                let xor = xor >> last_trailing_zeros;
115                writer.write_int(xor, last_significant_bits as usize).unwrap();
116                if verbose {
117                    bits_string.push_str(&format!(
118                        "\x1b[1;33m{:0width$b}\x1b[0m",
119                        xor,
120                        width = last_significant_bits as usize
121                    ));
122                }
123                regret += last_significant_bits - significant_bits;
124            } else {
125                last_leading_zeros = leading_zeros;
126                last_trailing_zeros = trailing_zeros;
127                last_significant_bits = significant_bits;
128                regret = 0;
129
130                writer.write_int(0b11, 2).unwrap();
131                bits_string.push_str("\x1b[1;31m11\x1b[0m");
132                writer.write_int(leading_zeros as u64, 5).unwrap();
133                bits_string.push_str(&format!("\x1b[1;32m{:05b}\x1b[0m", leading_zeros));
134                writer.write_int(significant_bits as u64 - 1, 6).unwrap();
135                bits_string.push_str(&format!("\x1b[1;34m{:05b}\x1b[0m", significant_bits - 1));
136                let xor = xor >> last_trailing_zeros;
137                writer.write_int(xor, significant_bits as usize).unwrap();
138                if verbose {
139                    bits_string.push_str(&format!(
140                        "\x1b[1;33m{:0width$b}\x1b[0m",
141                        xor,
142                        width = significant_bits as usize
143                    ));
144                }
145            }
146        }
147        if verbose {
148            println!(
149                "{:<23}  {:32}  {:32}  {:32}",
150                f,
151                format_f32_bits(f.to_bits()),
152                format_f32_bits_highlight_remainder(xor),
153                bits_string
154            );
155        }
156        last_value = f;
157    }
158
159    // 8 bytes per value and 8 addtional bytes for the length
160    let uncompressed_size = std::mem::size_of_val(floats) + 8;
161    println!(
162        "Compression ratio of {:.2} for {name} (max_regret={max_regret})",
163        uncompressed_size as f64 / write_bytes.len() as f64,
164    );
165    write_bytes
166}
167
168
169fn format_f32_bits(bits: u32) -> String {
170    let bits_str = format!("{:032b}", bits);
171    // ANSI escape codes for color on sign, exponent, and mantissa
172    format!(
173        "\x1b[1;31m{}\x1b[0m\x1b[1;32m{}\x1b[0m\x1b[1;34m{}\x1b[0m",
174        &bits_str[0..1],
175        &bits_str[1..9],
176        &bits_str[9..]
177    )
178}
179
180fn format_f32_bits_highlight_remainder(bits: u32) -> String {
181    let mut bits_str = format!("{:032b}", bits);
182    // Highlight the bits that are different from the previous value
183    if let (Some(start_bold), Some(end_bold)) = (bits_str.find('1'), bits_str.rfind('1')) {
184        bits_str = format!(
185            "{}\x1b[1;33m{}\x1b[0m{}",
186            &bits_str[..start_bold],
187            &bits_str[start_bold..end_bold + 1],
188            &bits_str[end_bold + 1..]
189        );
190    }
191    bits_str
192}