Skip to main content

turboquant/
lib.rs

1//! TurboQuant -- KV-Cache quantization with zero accuracy loss.
2//!
3//! Implements Google's TurboQuant algorithm (Zandieh et al., ICLR 2026)
4//! for compressing LLM key-value caches to 3-4 bits per value.
5
6pub mod attention;
7pub mod codebook;
8pub mod error;
9pub mod math;
10pub mod packed;
11pub mod qjl;
12pub mod quantize;
13pub mod rotation;
14
15#[cfg(feature = "candle")]
16pub mod cache;
17
18/// Test helpers shared by integration tests and benches. Declared `pub mod`
19/// so cross-file test code can import them, and `#[doc(hidden)]` to keep
20/// them out of rustdoc — but note that this module *is* part of the crate's
21/// public API surface for SemVer purposes.
22#[doc(hidden)]
23pub mod test_utils;
24
25pub use attention::{PackedImport, QuantizedKVCache};
26pub use error::{Result, TurboQuantError};
27pub use packed::PackedBlock;
28pub use packed::TurboQuantConfig;
29pub use qjl::{
30    compute_qjl_signs, estimate_inner_product, estimate_inner_product_single,
31    estimate_inner_product_with_codebook, precompute_query_projections, quantize_with_qjl,
32    quantize_with_qjl_resources, EstimationContext, QjlBatchResources, QjlBlock,
33};
34pub use quantize::{
35    dequantize_into_with_codebook, dequantize_vec, dequantize_vec_with_codebook, quantize_vec,
36    quantize_vec_with_codebook,
37};
38pub use rotation::RotationOrder;