nodedb-codec 0.1.1

Compression codecs for NodeDB timeseries columnar storage
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
// SPDX-License-Identifier: Apache-2.0

//! RaBitQ — 1-bit quantization with O(1/√D) MSE error bound (SIGMOD 2024).
//!
//! Algorithm outline
//! -----------------
//! 1. **Calibration**: compute centroid `c` over training vectors.
//! 2. **Rotation**: apply a randomised Walsh-Hadamard transform (WHT) with a
//!    seed-derived signed-diagonal matrix `D` to the residual `v - c`.
//!    - WHT requires a power-of-2 length; dimensions that are not pow2 are
//!      zero-padded before the transform then truncated after.
//!    - `D` is a vector of `±1` scalars derived deterministically from
//!      `rotation_seed` using an xorshift64 generator.
//! 3. **Encoding**: `code = sign(R·(v-c))`, packed 1-bit-per-dimension.
//! 4. **Distance estimation** (L2):
//!    `‖q-v‖² ≈ ‖v-c‖² + ‖q-c‖² − 2‖v-c‖‖q-c‖·(1 − 2·hamming/D)`
//!    with `O(1/√D)` MSE — see SIGMOD 2024 Theorem 4.
//!
//! IP / raw inner-product bias
//! ---------------------------
//! The angular estimator above is MSE-optimal for L2 and cosine. For raw IP
//! it carries a systematic bias. When `bias_correct = true` the codec
//! subtracts `dot_quantized` (stored in the [`QuantHeader`]) from the
//! asymmetric distance to partially compensate, following the TurboQuant-style
//! QJL residual correction. This is off by default; consumers that use raw IP
//! (RAG, attention scores) should enable it.
//!
//! [`rand_xoshiro`] is **not** a dependency of `nodedb-codec`. The signed-
//! diagonal flip vector is derived from `rotation_seed` via an inline
//! xorshift64 generator — no external crate required.

use crate::vector_quant::codec::VectorCodec;
use crate::vector_quant::hamming::hamming_distance;
use crate::vector_quant::layout::{QuantHeader, QuantMode, UnifiedQuantizedVector};

// ── Xorshift64 (inline PRNG) ────────────────────────────────────────────────

/// Minimal xorshift64 PRNG for deterministic signed-diagonal generation.
#[inline]
fn xorshift64(state: &mut u64) -> u64 {
    let mut x = *state;
    x ^= x << 13;
    x ^= x >> 7;
    x ^= x << 17;
    *state = x;
    x
}

// ── WHT helpers ──────────────────────────────────────────────────────────────

/// Next power-of-two ≥ `n`, returning `n` itself if already pow2.
#[inline]
fn next_pow2(n: usize) -> usize {
    if n.is_power_of_two() {
        n
    } else {
        n.next_power_of_two()
    }
}

/// In-place Walsh-Hadamard Transform of a power-of-2 length slice.
/// O(N log N) butterfly. Does not normalise by 1/√N (sign-only code
/// does not require normalisation).
fn wht_inplace(buf: &mut [f32]) {
    let n = buf.len();
    debug_assert!(n.is_power_of_two());
    let mut step = 1usize;
    while step < n {
        let mut i = 0usize;
        while i < n {
            for j in i..i + step {
                let a = buf[j];
                let b = buf[j + step];
                buf[j] = a + b;
                buf[j + step] = a - b;
            }
            i += step * 2;
        }
        step *= 2;
    }
}

// ── Sign-pack / unpack helpers ───────────────────────────────────────────────

/// Pack `dim` signs from `rotated` (negative = 1-bit, non-negative = 0-bit)
/// into ceil(dim/8) bytes, LSB-first.
fn sign_pack(rotated: &[f32], dim: usize) -> Vec<u8> {
    let nbytes = dim.div_ceil(8);
    let mut out = vec![0u8; nbytes];
    for (i, &v) in rotated.iter().take(dim).enumerate() {
        if v < 0.0 {
            out[i / 8] |= 1 << (i % 8);
        }
    }
    out
}

/// Dequantize sign-packed bits back to ±1 values (for dot_quantized calc).
fn sign_unpack(packed: &[u8], dim: usize) -> Vec<f32> {
    (0..dim)
        .map(|i| {
            if packed[i / 8] & (1 << (i % 8)) != 0 {
                -1.0f32
            } else {
                1.0f32
            }
        })
        .collect()
}

// ── RaBitQCodec ──────────────────────────────────────────────────────────────

/// RaBitQ codec: 1-bit quantization with O(1/√D) MSE error bound.
///
/// See module-level documentation for algorithm details.
pub struct RaBitQCodec {
    pub dim: usize,
    /// Mean of training vectors; subtracted from each vector before rotation.
    centroid: Vec<f32>,
    /// Seed for the signed-diagonal flip vector used by the WHT rotation.
    rotation_seed: u64,
    /// If `true`, subtract `dot_quantized` from the asymmetric distance
    /// estimate as a TurboQuant-style IP-bias correction term.
    pub bias_correct: bool,
}

impl RaBitQCodec {
    /// Calibrate a new codec from a set of training vectors.
    ///
    /// - Computes the centroid as the coordinate-wise mean of `vectors`.
    /// - Stores `rotation_seed` for reproducible signed-diagonal generation.
    ///
    /// # Errors (none — returns Self directly)
    ///
    /// Returns a zero-centroid codec if `vectors` is empty.
    pub fn calibrate(vectors: &[&[f32]], dim: usize, rotation_seed: u64) -> Self {
        let centroid = if vectors.is_empty() {
            vec![0.0f32; dim]
        } else {
            let n = vectors.len() as f32;
            let mut c = vec![0.0f32; dim];
            for v in vectors {
                for (ci, &vi) in c.iter_mut().zip(v.iter()) {
                    *ci += vi;
                }
            }
            c.iter_mut().for_each(|x| *x /= n);
            c
        };
        Self {
            dim,
            centroid,
            rotation_seed,
            bias_correct: false,
        }
    }

    /// Apply the randomised WHT rotation to a residual vector.
    ///
    /// Steps:
    /// 1. Apply signed-diagonal `D` (deterministic from `rotation_seed`).
    /// 2. Zero-pad to the next power of two if `dim` is not pow2.
    /// 3. WHT in-place.
    /// 4. Truncate back to `dim`.
    pub fn apply_rotation(&self, v: &[f32]) -> Vec<f32> {
        let dim = self.dim;
        let pow2 = next_pow2(dim);

        // Signed-diagonal multiply: generate ±1 per dim from seed.
        let mut seed = self.rotation_seed;
        let mut buf = vec![0.0f32; pow2];
        for (i, &vi) in v.iter().take(dim).enumerate() {
            let flip = if xorshift64(&mut seed) & 1 == 0 {
                1.0f32
            } else {
                -1.0f32
            };
            buf[i] = vi * flip;
        }
        // Trailing elements remain zero (pad).

        wht_inplace(&mut buf);
        buf.truncate(dim);
        buf
    }

    /// Encode a single vector into a [`UnifiedQuantizedVector`] with
    /// `QuantMode::RaBitQ`.
    ///
    /// The header fields populated are:
    /// - `global_scale` = `residual_norm` (‖v−c‖); both store the same value
    ///   so that consumers that use either field without context still have the
    ///   magnitude available.
    /// - `residual_norm` = ‖v−c‖.
    /// - `dot_quantized` = ⟨residual, dequantised_sign_vector⟩ / ‖v−c‖;
    ///   used for IP-bias correction when `bias_correct = true`.
    fn encode_inner(&self, v: &[f32]) -> UnifiedQuantizedVector {
        let dim = self.dim;

        // Step 1: residual = v - centroid
        let residual: Vec<f32> = v
            .iter()
            .zip(self.centroid.iter())
            .map(|(&vi, &ci)| vi - ci)
            .collect();

        // Step 2: ‖residual‖
        let residual_norm = residual.iter().map(|x| x * x).sum::<f32>().sqrt();

        // Step 3: rotate
        let rotated = self.apply_rotation(&residual);

        // Step 4: sign-pack → 1-bit code
        let packed = sign_pack(&rotated, dim);

        // Step 5: compute dot_quantized = ⟨residual, R⁻¹·sign(rotated)⟩ / ‖residual‖
        // Inverse rotation of the sign vector, then dot with original residual.
        let signs_fp = sign_unpack(&packed, dim);
        // Inverse WHT rotation: apply WHT again then re-apply D⁻¹ = D (since D² = I).
        let pow2 = next_pow2(dim);
        let mut sign_buf = vec![0.0f32; pow2];
        for (i, &s) in signs_fp.iter().enumerate() {
            sign_buf[i] = s;
        }
        wht_inplace(&mut sign_buf);
        // Re-apply signed diagonal (D is its own inverse since flips are ±1)
        let mut seed = self.rotation_seed;
        #[allow(clippy::needless_range_loop)]
        for i in 0..dim {
            let flip = if xorshift64(&mut seed) & 1 == 0 {
                1.0f32
            } else {
                -1.0f32
            };
            sign_buf[i] *= flip;
        }
        let dot_raw: f32 = residual
            .iter()
            .zip(sign_buf.iter().take(dim))
            .map(|(&r, &s)| r * s)
            .sum();
        let dot_quantized = if residual_norm > 0.0 {
            dot_raw / residual_norm
        } else {
            0.0
        };

        let header = QuantHeader {
            quant_mode: QuantMode::RaBitQ as u16,
            dim: dim as u16,
            global_scale: residual_norm,
            residual_norm,
            dot_quantized,
            outlier_bitmask: 0,
            reserved: [0u8; 8],
        };

        UnifiedQuantizedVector::new(header, &packed, &[])
            .expect("RaBitQ encode: layout construction must succeed")
    }
}

// ── Quantized / Query newtypes ───────────────────────────────────────────────

/// Packed 1-bit quantized vector produced by [`RaBitQCodec::encode`].
pub struct RaBitQQuantized(UnifiedQuantizedVector);

impl AsRef<UnifiedQuantizedVector> for RaBitQQuantized {
    #[inline]
    fn as_ref(&self) -> &UnifiedQuantizedVector {
        &self.0
    }
}

/// Prepared query for [`RaBitQCodec`] distance computations.
pub struct RaBitQQuery {
    /// Sign-packed rotated query (same bit layout as the stored codes).
    pub rotated_signs: Vec<u8>,
    /// ‖q − centroid‖.
    pub query_norm: f32,
}

// ── VectorCodec impl ─────────────────────────────────────────────────────────

impl VectorCodec for RaBitQCodec {
    type Quantized = RaBitQQuantized;
    type Query = RaBitQQuery;

    fn encode(&self, v: &[f32]) -> Self::Quantized {
        RaBitQQuantized(self.encode_inner(v))
    }

    fn prepare_query(&self, q: &[f32]) -> Self::Query {
        let dim = self.dim;
        let residual: Vec<f32> = q
            .iter()
            .zip(self.centroid.iter())
            .map(|(&qi, &ci)| qi - ci)
            .collect();
        let query_norm = residual.iter().map(|x| x * x).sum::<f32>().sqrt();
        let rotated = self.apply_rotation(&residual);
        let rotated_signs = sign_pack(&rotated, dim);
        RaBitQQuery {
            rotated_signs,
            query_norm,
        }
    }

    /// Symmetric distance estimate: both `q` and `v` are quantized.
    ///
    /// `approx_l2 = ‖v-c‖² + ‖q-c‖² − 2·‖v-c‖·‖q-c‖·(1 − 2·hamming/D)`
    ///
    /// The angular factor `1 − 2·hamming/D` approximates `cos(θ)` between
    /// the two sign-vectors. Error bound: O(1/√D) MSE.
    fn fast_symmetric_distance(&self, q: &Self::Quantized, v: &Self::Quantized) -> f32 {
        let qh = q.0.header();
        let vh = v.0.header();
        let qb = q.0.packed_bits();
        let vb = v.0.packed_bits();
        let h = hamming_distance(qb, vb);
        let dim = self.dim as f32;
        let dot_estimate = 1.0 - 2.0 * h as f32 / dim;
        let approx = qh.residual_norm * qh.residual_norm + vh.residual_norm * vh.residual_norm
            - 2.0 * qh.residual_norm * vh.residual_norm * dot_estimate;
        approx.max(0.0)
    }

    /// Asymmetric distance estimate: `q` is a prepared [`RaBitQQuery`], `v`
    /// is a stored quantized vector.
    ///
    /// Uses `query_norm` (exact ‖q−c‖) against `v.residual_norm` (exact ‖v−c‖)
    /// for higher fidelity than the symmetric variant.
    ///
    /// If `self.bias_correct = true`, subtract `v.dot_quantized` as a first-
    /// order IP-bias correction term (TurboQuant-style).
    fn exact_asymmetric_distance(&self, q: &Self::Query, v: &Self::Quantized) -> f32 {
        let vh = v.0.header();
        let vb = v.0.packed_bits();
        let h = hamming_distance(&q.rotated_signs, vb);
        let dim = self.dim as f32;
        let dot_estimate = 1.0 - 2.0 * h as f32 / dim;
        let mut approx = q.query_norm * q.query_norm + vh.residual_norm * vh.residual_norm
            - 2.0 * q.query_norm * vh.residual_norm * dot_estimate;
        if self.bias_correct {
            approx -= vh.dot_quantized;
        }
        approx.max(0.0)
    }
}

// ── Tests ─────────────────────────────────────────────────────────────────────

#[cfg(test)]
mod tests {
    use super::*;

    fn random_vec(seed: u64, dim: usize) -> Vec<f32> {
        let mut s = seed | 1;
        (0..dim)
            .map(|_| {
                let v = xorshift64(&mut s);
                // Map to [-1, 1]
                (v as f32 / u64::MAX as f32) * 2.0 - 1.0
            })
            .collect()
    }

    #[test]
    fn apply_rotation_different_seeds_differ() {
        let dim = 64;
        let v: Vec<f32> = (0..dim).map(|i| i as f32 * 0.1).collect();
        let codec_a = RaBitQCodec::calibrate(&[], dim, 0xDEAD_BEEF_1234_5678);
        let codec_b = RaBitQCodec::calibrate(&[], dim, 0xCAFE_BABE_0000_0001);
        let rot_a = codec_a.apply_rotation(&v);
        let rot_b = codec_b.apply_rotation(&v);
        // Different seeds → different rotation outputs.
        let differ = rot_a
            .iter()
            .zip(rot_b.iter())
            .any(|(a, b)| (a - b).abs() > 1e-6);
        assert!(differ, "different seeds must produce different rotations");
    }

    #[test]
    fn encode_roundtrip_preserves_residual_norm() {
        let dim = 128;
        let vecs: Vec<Vec<f32>> = (0..16).map(|i| random_vec(i as u64, dim)).collect();
        let refs: Vec<&[f32]> = vecs.iter().map(|v| v.as_slice()).collect();
        let codec = RaBitQCodec::calibrate(&refs, dim, 42);
        let v = random_vec(99, dim);
        let q = codec.encode(&v);
        let h = q.0.header();
        // residual_norm should be finite and equal to global_scale.
        assert!(h.residual_norm.is_finite() && h.residual_norm >= 0.0);
        assert!((h.global_scale - h.residual_norm).abs() < 1e-6);
    }

    #[test]
    fn distance_non_negative_finite() {
        let dim = 64;
        let vecs: Vec<Vec<f32>> = (0..8).map(|i| random_vec(i as u64, dim)).collect();
        let refs: Vec<&[f32]> = vecs.iter().map(|v| v.as_slice()).collect();
        let codec = RaBitQCodec::calibrate(&refs, dim, 7);
        let v1 = codec.encode(&random_vec(100, dim));
        let v2 = codec.encode(&random_vec(200, dim));
        let sym = codec.fast_symmetric_distance(&v1, &v2);
        assert!(sym.is_finite() && sym >= 0.0, "sym distance: {sym}");
        let q = codec.prepare_query(&random_vec(300, dim));
        let asym = codec.exact_asymmetric_distance(&q, &v2);
        assert!(asym.is_finite() && asym >= 0.0, "asym distance: {asym}");
    }

    #[test]
    fn calibrate_identical_vectors_zero_residual() {
        let dim = 32;
        let v: Vec<f32> = (0..dim).map(|i| i as f32).collect();
        let refs = vec![v.as_slice(); 16];
        let codec = RaBitQCodec::calibrate(&refs, dim, 1);
        // Centroid == v, so residual = 0 → residual_norm = 0.
        let q = codec.encode(&v);
        assert!(
            q.0.header().residual_norm < 1e-5,
            "residual_norm should be ~0 for vector equal to centroid"
        );
    }
}