Expand description
§TurboQuant
A Rust implementation of Google’s TurboQuant algorithm for high-performance vector quantization, optimized for LLM KV cache compression.
§Overview
TurboQuant provides two complementary quantizers:
- TurboQuantMSE: Minimizes mean squared reconstruction error via random rotation + Lloyd-Max scalar quantization.
- TurboQuantProd: Minimizes inner product estimation error via two-stage quantization (MSE stage + QJL residual correction).
- PolarQuant: Hierarchical polar-coordinate quantizer for KV caches.
§Example
use turboquant::turboquant_mse::TurboQuantMSE;
use turboquant::utils::normalize;
let dim = 128;
let tq = TurboQuantMSE::new(dim, 4, 42).unwrap();
let x: Vec<f64> = (0..dim).map(|i| i as f64).collect();
let x_norm = normalize(&x).unwrap();
let q = tq.quantize(&x_norm).unwrap();
let x_approx = tq.dequantize(&q).unwrap();
println!("Compression ratio: {:.1}x", q.compression_ratio());Re-exports§
pub use backend::ExecutionBackend;pub use batch::batch_attention_scores_mse;pub use batch::batch_attention_scores_mse_with_backend;pub use batch::batch_dequantize_mse;pub use batch::batch_dequantize_mse_with_backend;pub use batch::batch_estimate_inner_products;pub use batch::batch_estimate_inner_products_with_backend;pub use batch::batch_ip_error;pub use batch::batch_mse;pub use batch::batch_quantize_mse;pub use batch::batch_quantize_mse_with_backend;pub use batch::batch_quantize_prod;pub use batch::batch_quantize_prod_with_backend;pub use batch::BatchQuantizedMSE;pub use batch::BatchQuantizedProd;pub use batch::BatchStats;pub use bitpack::BitPackedVector;pub use codebook::Codebook;pub use error::Result;pub use error::TurboQuantError;pub use kv_cache::CacheStats;pub use kv_cache::KVCacheConfig;pub use kv_cache::MultiHeadCacheStats;pub use kv_cache::MultiHeadConfig;pub use kv_cache::MultiHeadKVCache;pub use kv_cache::QuantStrategy;pub use kv_cache::QuantizedKVCache;pub use polar::PolarQuant;pub use polar::PolarQuantized;pub use qjl::QJLQuantized;pub use qjl::QJL;pub use real_model::KvCacheUsage;pub use real_model::RealModelGenerationConfig;pub use real_model::RealModelQuantizationConfig;pub use real_model::RealModelRunner;pub use real_model::RealModelTrace;pub use real_model::SupportedRealModel;pub use rotation::RandomRotation;pub use scalar_quant::ScalarQuantizer;pub use trace::KvTrace;pub use trace::TraceMetadata;pub use trace::TraceSample;pub use turboquant_mse::QuantizedVector;pub use turboquant_mse::TurboQuantMSE;pub use turboquant_prod::ProdQuantized;pub use turboquant_prod::TurboQuantProd;