turboquant 0.1.1

Implementation of Google's TurboQuant algorithm for vector quantization
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
use serde::{Deserialize, Serialize};

use crate::error::{Result, TurboQuantError};
use crate::utils::{beta_pdf, sample_beta_marginal};

/// A scalar quantization codebook: a set of centroids and decision boundaries
/// optimized for the Beta distribution (marginal distribution of coordinates
/// of unit-sphere-uniform vectors).
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Codebook {
    /// The k = 2^b quantization centroids (reconstruction values).
    pub centroids: Vec<f64>,
    /// The k-1 decision boundaries (thresholds between centroids).
    pub boundaries: Vec<f64>,
    /// Number of bits per coordinate.
    pub bit_width: u8,
}

impl Codebook {
    /// Return the number of levels (centroids) = 2^bit_width.
    pub fn num_levels(&self) -> usize {
        self.centroids.len()
    }

    /// Validate that a codebook index is in range for this codebook.
    pub fn validate_index(&self, index: u8) -> Result<()> {
        let max = self.num_levels().saturating_sub(1) as u8;
        if index as usize >= self.num_levels() {
            return Err(TurboQuantError::InvalidQuantizationIndex {
                index,
                max,
                bit_width: self.bit_width,
            });
        }

        Ok(())
    }

    /// Find the quantization index for a scalar value using binary search
    /// on the pre-computed decision boundaries.
    ///
    /// This is a low-level fast path used by validated callers. Non-finite
    /// values are not rejected here; NaN maps to index 0.
    ///
    /// NaN values are mapped to the first centroid (index 0).
    pub fn quantize_scalar(&self, value: f64) -> u8 {
        if value.is_nan() {
            return 0;
        }
        // boundaries.len() == num_levels - 1
        // Boundary i is between centroid i and centroid i+1.
        // Return i such that value is in [boundaries[i-1], boundaries[i]).
        match self
            .boundaries
            .binary_search_by(|b| b.partial_cmp(&value).unwrap_or(std::cmp::Ordering::Equal))
        {
            Ok(i) => i as u8, // exact boundary hit → pick right side
            Err(i) => i.min(self.centroids.len() - 1) as u8,
        }
    }

    /// Checked variant of [`Codebook::quantize_scalar`] that rejects non-finite input.
    pub fn checked_quantize_scalar(&self, value: f64) -> Result<u8> {
        if !value.is_finite() {
            return Err(TurboQuantError::InvalidValue {
                context: "codebook scalar value".into(),
                value,
            });
        }

        Ok(self.quantize_scalar(value))
    }

    /// Decode a quantization index back to its centroid value.
    pub fn dequantize_scalar(&self, index: u8) -> f64 {
        self.centroids[index as usize]
    }

    /// Checked variant of [`Codebook::dequantize_scalar`] that validates the index.
    pub fn checked_dequantize_scalar(&self, index: u8) -> Result<f64> {
        self.validate_index(index)?;
        Ok(self.dequantize_scalar(index))
    }
}

/// Generate a Lloyd-Max optimal codebook for the coordinate distribution
/// of a d-dimensional unit-sphere-uniform vector.
///
/// The marginal PDF of each coordinate is:
///   f(x) ∝ (1 - x²)^((d-3)/2)  for |x| < 1
///
/// For d > 50 this is well approximated by N(0, 1/d).
///
/// Lloyd-Max iterations:
///   1. Given boundaries, compute centroids as conditional means:
///      c_k = E[X | b_{k-1} ≤ X < b_k]
///   2. Given centroids, update boundaries as midpoints:
///      b_k = (c_k + c_{k+1}) / 2
///      Repeat until convergence.
pub fn generate_codebook(dim: usize, bit_width: u8, iterations: usize) -> Result<Codebook> {
    if !(1..=8).contains(&bit_width) {
        return Err(TurboQuantError::InvalidBitWidth(bit_width));
    }
    if dim == 0 {
        return Err(TurboQuantError::InvalidDimension(dim));
    }

    let k = 1usize << bit_width; // number of levels

    // Initialize centroids via quantile spacing of the marginal distribution.
    // We sample k quantiles uniformly and use them as initial centroids.
    let mut centroids: Vec<f64> = (0..k)
        .map(|i| {
            let u = (i as f64 + 0.5) / k as f64;
            sample_beta_marginal(dim, u)
        })
        .collect();
    centroids.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));

    // Lloyd-Max iterations
    for _ in 0..iterations {
        // Step 1: Update boundaries as midpoints of adjacent centroids
        let boundaries: Vec<f64> = centroids.windows(2).map(|w| (w[0] + w[1]) / 2.0).collect();

        // Step 2: Update centroids as conditional means E[X | region]
        // We use numerical integration (Simpson's rule) over each Voronoi region.
        let new_centroids = compute_centroids(&centroids, &boundaries, dim);

        let converged = centroids
            .iter()
            .zip(new_centroids.iter())
            .all(|(a, b)| (a - b).abs() < 1e-12);

        centroids = new_centroids;

        if converged {
            break;
        }
    }

    // Recompute final boundaries
    let boundaries: Vec<f64> = centroids.windows(2).map(|w| (w[0] + w[1]) / 2.0).collect();

    Ok(Codebook {
        centroids,
        boundaries,
        bit_width,
    })
}

/// Compute Lloyd-Max centroids: E[X | b_{k-1} ≤ X < b_k] for each region.
fn compute_centroids(old_centroids: &[f64], boundaries: &[f64], dim: usize) -> Vec<f64> {
    let k = old_centroids.len();
    let n_points = 200usize; // integration points per region

    let mut new_centroids = Vec::with_capacity(k);

    for i in 0..k {
        let lo = if i == 0 { -1.0_f64 } else { boundaries[i - 1] };
        let hi = if i == k - 1 { 1.0_f64 } else { boundaries[i] };

        let lo = lo.max(-0.9999);
        let hi = hi.min(0.9999);

        if hi <= lo {
            new_centroids.push(old_centroids[i]);
            continue;
        }

        // Numerator: ∫ x·f(x) dx over [lo, hi]
        // Denominator: ∫ f(x) dx over [lo, hi]
        let (num, den) = simpson_integrate(lo, hi, n_points, |x| {
            let pdf = beta_pdf(x, dim);
            (x * pdf, pdf)
        });

        if den.abs() < 1e-15 {
            new_centroids.push(old_centroids[i]);
        } else {
            new_centroids.push(num / den);
        }
    }

    new_centroids
}

/// Simpson's rule integration returning (integral_f, integral_g) simultaneously.
fn simpson_integrate<F>(a: f64, b: f64, n: usize, f: F) -> (f64, f64)
where
    F: Fn(f64) -> (f64, f64),
{
    let n = if n.is_multiple_of(2) { n } else { n + 1 };
    let h = (b - a) / n as f64;
    let (f0, g0) = f(a);
    let (fn_, gn) = f(b);
    let mut sum_f = f0 + fn_;
    let mut sum_g = g0 + gn;

    for i in 1..n {
        let x = a + i as f64 * h;
        let (fi, gi) = f(x);
        let w = if i % 2 == 0 { 2.0 } else { 4.0 };
        sum_f += w * fi;
        sum_g += w * gi;
    }

    (sum_f * h / 3.0, sum_g * h / 3.0)
}

/// Pre-computed codebook cache for common configurations.
pub struct CodebookCache {
    books: std::collections::HashMap<(usize, u8), Codebook>,
}

impl CodebookCache {
    pub fn new() -> Self {
        Self {
            books: std::collections::HashMap::new(),
        }
    }

    /// Get or generate a codebook. Uses 100 Lloyd-Max iterations.
    pub fn get_or_generate(&mut self, dim: usize, bit_width: u8) -> Result<&Codebook> {
        let key = (dim, bit_width);
        if let std::collections::hash_map::Entry::Vacant(e) = self.books.entry(key) {
            let cb = generate_codebook(dim, bit_width, 100)?;
            e.insert(cb);
        }
        Ok(&self.books[&key])
    }
}

impl Default for CodebookCache {
    fn default() -> Self {
        Self::new()
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_codebook_1bit() {
        let cb = generate_codebook(128, 1, 50).unwrap();
        assert_eq!(cb.centroids.len(), 2);
        assert_eq!(cb.boundaries.len(), 1);
        // Centroids should be symmetric around 0
        assert!((cb.centroids[0] + cb.centroids[1]).abs() < 1e-6);
        // Boundary should be near 0
        assert!(cb.boundaries[0].abs() < 1e-6);
    }

    #[test]
    fn test_codebook_4bit() {
        let cb = generate_codebook(128, 4, 100).unwrap();
        assert_eq!(cb.centroids.len(), 16);
        assert_eq!(cb.boundaries.len(), 15);
        // Centroids should be sorted
        for w in cb.centroids.windows(2) {
            assert!(w[0] < w[1]);
        }
    }

    #[test]
    fn test_quantize_dequantize_roundtrip() {
        let cb = generate_codebook(64, 3, 50).unwrap();
        for &val in &[-0.5, -0.1, 0.0, 0.1, 0.5] {
            let idx = cb.quantize_scalar(val);
            let recon = cb.dequantize_scalar(idx);
            // Reconstruction should be "close" to original (within half the codebook range)
            assert!(
                (val - recon).abs() < 0.5,
                "val={}, recon={}, idx={}",
                val,
                recon,
                idx
            );
        }
    }

    #[test]
    fn test_invalid_bit_width() {
        assert!(generate_codebook(64, 0, 50).is_err());
        assert!(generate_codebook(64, 9, 50).is_err());
    }

    #[test]
    fn test_invalid_dimension() {
        assert!(generate_codebook(0, 4, 50).is_err());
    }

    #[test]
    fn test_quantize_nan_returns_zero() {
        let cb = generate_codebook(64, 2, 50).unwrap();
        let idx = cb.quantize_scalar(f64::NAN);
        assert_eq!(idx, 0);
    }

    #[test]
    fn test_codebook_centroids_in_range() {
        let cb = generate_codebook(32, 2, 50).unwrap();
        for c in &cb.centroids {
            assert!(*c > -1.0 && *c < 1.0, "centroid {} out of range", c);
        }
    }

    #[test]
    fn test_codebook_cache_get_or_generate() {
        let mut cache = CodebookCache::new();
        // First call generates the codebook
        let cb1 = cache.get_or_generate(64, 2).unwrap();
        let centroids1 = cb1.centroids.clone();
        // Second call returns the cached version (same centroids)
        let cb2 = cache.get_or_generate(64, 2).unwrap();
        assert_eq!(centroids1, cb2.centroids);
    }

    #[test]
    fn test_codebook_cache_different_configs() {
        let mut cache = CodebookCache::new();
        let len1 = cache.get_or_generate(64, 2).unwrap().centroids.len();
        let len2 = cache.get_or_generate(64, 4).unwrap().centroids.len();
        // Different bit widths → different codebooks
        assert_ne!(len1, len2);
    }

    #[test]
    fn test_codebook_cache_default() {
        let mut cache = CodebookCache::default();
        assert!(cache.get_or_generate(32, 2).is_ok());
    }

    #[test]
    fn test_codebook_cache_invalid_params() {
        let mut cache = CodebookCache::new();
        assert!(cache.get_or_generate(0, 2).is_err());
        assert!(cache.get_or_generate(64, 0).is_err());
    }

    #[test]
    fn test_codebook_large_dim_gaussian_path() {
        // dim > 50 uses Gaussian approximation in beta_pdf
        let cb = generate_codebook(256, 4, 100).unwrap();
        assert_eq!(cb.centroids.len(), 16);
        // Centroids should be sorted
        for w in cb.centroids.windows(2) {
            assert!(w[0] < w[1], "centroids not sorted: {} >= {}", w[0], w[1]);
        }
        // Centroids should be tightly clustered around 0 for large dim
        for c in &cb.centroids {
            assert!(c.abs() < 0.5, "centroid {} too far from 0 for dim=256", c);
        }
    }

    #[test]
    fn test_codebook_boundary_midpoints() {
        let cb = generate_codebook(64, 2, 50).unwrap();
        // Boundaries should be between adjacent centroids
        for (i, &b) in cb.boundaries.iter().enumerate() {
            assert!(
                b > cb.centroids[i] && b < cb.centroids[i + 1],
                "boundary {} not between centroids {} and {}",
                b,
                cb.centroids[i],
                cb.centroids[i + 1]
            );
        }
    }

    #[test]
    fn test_codebook_8bit() {
        let cb = generate_codebook(128, 8, 50).unwrap();
        assert_eq!(cb.centroids.len(), 256);
        assert_eq!(cb.boundaries.len(), 255);
    }

    #[test]
    fn test_quantize_extreme_values() {
        let cb = generate_codebook(64, 4, 50).unwrap();
        // Very large positive → should map to highest centroid
        let idx_pos = cb.quantize_scalar(100.0);
        assert_eq!(idx_pos as usize, cb.centroids.len() - 1);
        // Very large negative → should map to lowest centroid
        let idx_neg = cb.quantize_scalar(-100.0);
        assert_eq!(idx_neg, 0);
    }

    #[test]
    fn test_checked_quantize_rejects_non_finite() {
        let cb = generate_codebook(64, 4, 50).unwrap();
        assert!(matches!(
            cb.checked_quantize_scalar(f64::NAN),
            Err(TurboQuantError::InvalidValue { .. })
        ));
        assert!(matches!(
            cb.checked_quantize_scalar(f64::INFINITY),
            Err(TurboQuantError::InvalidValue { .. })
        ));
    }

    #[test]
    fn test_checked_dequantize_rejects_invalid_index() {
        let cb = generate_codebook(64, 2, 50).unwrap();
        assert!(matches!(
            cb.checked_dequantize_scalar(4),
            Err(TurboQuantError::InvalidQuantizationIndex { .. })
        ));
    }
}