turbo_quant/
lib.rs

1//! # turbo-quant
2//!
3//! Rust implementation of **TurboQuant**, **PolarQuant**, and **QJL** —
4//! the vector quantization algorithms from Google Research (ICLR 2026).
5//!
6//! These algorithms compress high-dimensional vectors (embeddings, KV cache
7//! entries) to 3–8 bits per value with **zero accuracy loss** and
8//! **no dataset-specific calibration**.
9//!
10//! ## Key Properties
11//!
12//! - **Data-oblivious**: no k-means training, no codebook, no calibration set.
13//!   The rotation is seeded once and works on any distribution.
14//! - **Deterministic**: identical `(dim, bits, seed)` always produces the same
15//!   quantizer. State can be fully reconstructed from four integers.
16//! - **Zero accuracy loss**: at 3+ bits, inner product estimates are provably
17//!   unbiased and achieve near-optimal distortion (within ~2.7× of Shannon limit).
18//! - **Instant indexing**: unlike Product Quantization, there is no offline
19//!   training phase. Vectors can be indexed as they arrive.
20//!
21//! ## Quick Start
22//!
23//! ```rust
24//! use turbo_quant::{TurboQuantizer, PolarQuantizer};
25//!
26//! // Compress 1536-dimensional embeddings (OpenAI/sentence-transformer size).
27//! let dim = 64; // use 1536 in production
28//! let q = TurboQuantizer::new(dim, 8, 32, /* seed */ 42).unwrap();
29//!
30//! let database_vector: Vec<f32> = vec![0.1; dim]; // your embedding here
31//! let query_vector: Vec<f32> = vec![0.1; dim];    // your query here
32//!
33//! // Compress the database vector (store this, not the raw f32 array).
34//! let code = q.encode(&database_vector).unwrap();
35//!
36//! // At query time: estimate inner product without decompressing.
37//! let score = q.inner_product_estimate(&code, &query_vector).unwrap();
38//!
39//! // Or just use PolarQuant for a simpler single-stage compressor.
40//! let pq = PolarQuantizer::new(dim, 8, 42).unwrap();
41//! let polar_code = pq.encode(&database_vector).unwrap();
42//! let polar_score = pq.inner_product_estimate(&polar_code, &query_vector).unwrap();
43//! ```
44//!
45//! ## Choosing Parameters
46//!
47//! | Use case | Recommended bits | Recommended projections |
48//! |---|---|---|
49//! | Semantic search (recall@10) | 8 | dim / 4 |
50//! | KV cache compression | 4–6 | dim / 8 |
51//! | Maximum compression | 3 | dim / 16 |
52//!
53//! ## References
54//!
55//! - TurboQuant: Zandieh et al., ICLR 2026
56//! - PolarQuant: Zandieh et al., AISTATS 2026
57//! - QJL: Zandieh et al., AAAI 2025
58
59pub mod error;
60pub mod kv;
61pub mod polar;
62pub mod qjl;
63pub mod rotation;
64pub mod turbo;
65
66pub use error::{Result, TurboQuantError};
67pub use kv::{CompressedToken, KvCacheCompressor, KvCacheConfig};
68pub use polar::{PolarCode, PolarQuantizer};
69pub use qjl::{QjlQuantizer, QjlSketch};
70pub use rotation::{Rotation, StoredRotation};
71pub use turbo::{BatchStats, TurboCode, TurboQuantizer};
turbo_quant/lib.rs

turbo_quant/
lib.rs