Skip to main content

Crate turboquant

Crate turboquant 

Source
Expand description

§TurboQuant

A Rust implementation of Google’s TurboQuant algorithm for high-performance vector quantization, optimized for LLM KV cache compression.

§Overview

TurboQuant provides two complementary quantizers:

  • TurboQuantMSE: Minimizes mean squared reconstruction error via random rotation + Lloyd-Max scalar quantization.
  • TurboQuantProd: Minimizes inner product estimation error via two-stage quantization (MSE stage + QJL residual correction).
  • PolarQuant: Hierarchical polar-coordinate quantizer for KV caches.

§Example

use turboquant::turboquant_mse::TurboQuantMSE;
use turboquant::utils::normalize;

let dim = 128;
let tq = TurboQuantMSE::new(dim, 4, 42).unwrap();
let x: Vec<f64> = (0..dim).map(|i| i as f64).collect();
let x_norm = normalize(&x).unwrap();

let q = tq.quantize(&x_norm).unwrap();
let x_approx = tq.dequantize(&q).unwrap();
println!("Compression ratio: {:.1}x", q.compression_ratio());

Re-exports§

pub use backend::ExecutionBackend;
pub use batch::batch_attention_scores_mse;
pub use batch::batch_attention_scores_mse_with_backend;
pub use batch::batch_dequantize_mse;
pub use batch::batch_dequantize_mse_with_backend;
pub use batch::batch_estimate_inner_products;
pub use batch::batch_estimate_inner_products_with_backend;
pub use batch::batch_ip_error;
pub use batch::batch_mse;
pub use batch::batch_quantize_mse;
pub use batch::batch_quantize_mse_with_backend;
pub use batch::batch_quantize_prod;
pub use batch::batch_quantize_prod_with_backend;
pub use batch::BatchQuantizedMSE;
pub use batch::BatchQuantizedProd;
pub use batch::BatchStats;
pub use bitpack::BitPackedVector;
pub use codebook::Codebook;
pub use error::Result;
pub use error::TurboQuantError;
pub use kv_cache::CacheStats;
pub use kv_cache::KVCacheConfig;
pub use kv_cache::MultiHeadCacheStats;
pub use kv_cache::MultiHeadConfig;
pub use kv_cache::MultiHeadKVCache;
pub use kv_cache::QuantStrategy;
pub use kv_cache::QuantizedKVCache;
pub use polar::PolarQuant;
pub use polar::PolarQuantized;
pub use qjl::QJLQuantized;
pub use qjl::QJL;
pub use real_model::KvCacheUsage;
pub use real_model::RealModelGenerationConfig;
pub use real_model::RealModelQuantizationConfig;
pub use real_model::RealModelRunner;
pub use real_model::RealModelTrace;
pub use real_model::SupportedRealModel;
pub use rotation::RandomRotation;
pub use scalar_quant::ScalarQuantizer;
pub use trace::KvTrace;
pub use trace::TraceMetadata;
pub use trace::TraceSample;
pub use turboquant_mse::QuantizedVector;
pub use turboquant_mse::TurboQuantMSE;
pub use turboquant_prod::ProdQuantized;
pub use turboquant_prod::TurboQuantProd;

Modules§

backend
batch
bitpack
codebook
error
kv_cache
polar
qjl
real_model
rotation
scalar_quant
trace
turboquant_mse
turboquant_prod
utils