turbo-quant 0.2.2

Experimental vector compression sidecars with PolarQuant, TurboQuant, QJL sketches, wire formats, and benchmark receipts
Documentation
use turbo_quant::{KvCacheCompressor, KvQuantPolicy, KvRuntimeConfig};

fn main() -> turbo_quant::Result<()> {
    let dim = 64;
    let mut cache = KvCacheCompressor::new_runtime(KvRuntimeConfig {
        head_dim: dim,
        key_policy: KvQuantPolicy::quantized(8, 16),
        value_policy: KvQuantPolicy::Exact,
        seed: 42,
        keep_exact_shadow: true,
    })?;
    for token in 0..8 {
        let key = (0..dim)
            .map(|index| ((token * dim + index) as f32 * 0.017).sin())
            .collect::<Vec<_>>();
        let value = (0..dim)
            .map(|index| ((token * dim + index) as f32 * 0.019).cos())
            .collect::<Vec<_>>();
        cache.compress_token(&key, &value)?;
    }
    let query = (0..dim)
        .map(|index| (index as f32 * 0.023).sin())
        .collect::<Vec<_>>();
    let shadow = cache.shadow_attention_scores(&query)?;
    let mean_abs_error = shadow.iter().map(|row| row.abs_error).sum::<f32>() / shadow.len() as f32;
    println!(
        "{}",
        serde_json::json!({
            "schema": "KvShadowExampleV1",
            "tokens": cache.len(),
            "mean_abs_score_error": mean_abs_error,
            "compressed_bytes": cache.compressed_bytes(),
            "uncompressed_bytes": cache.uncompressed_bytes()
        })
    );
    Ok(())
}