use crate::error::{Result, RuvLLMError};
use crate::quantize::hadamard::HadamardTransform;
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum TurboQuantBits {
Bits2_5,
Bits3_0,
Bits3_5,
Bits4_0,
}
impl TurboQuantBits {
pub fn scalar_levels(&self) -> u32 {
match self {
TurboQuantBits::Bits2_5 => 4, TurboQuantBits::Bits3_0 => 6, TurboQuantBits::Bits3_5 => 8, TurboQuantBits::Bits4_0 => 12, }
}
pub fn effective_bits(&self) -> f32 {
match self {
TurboQuantBits::Bits2_5 => 2.5,
TurboQuantBits::Bits3_0 => 3.0,
TurboQuantBits::Bits3_5 => 3.5,
TurboQuantBits::Bits4_0 => 4.0,
}
}
pub fn compression_ratio(&self) -> f32 {
32.0 / self.effective_bits()
}
pub fn compression_ratio_vs_fp16(&self) -> f32 {
16.0 / self.effective_bits()
}
}
#[derive(Debug, Clone)]
pub struct TurboQuantConfig {
pub bits: TurboQuantBits,
pub rotation_seed: u64,
pub enable_qjl_residual: bool,
pub block_size: usize,
}
impl Default for TurboQuantConfig {
fn default() -> Self {
Self {
bits: TurboQuantBits::Bits3_5,
rotation_seed: 42,
enable_qjl_residual: true,
block_size: 128,
}
}
}
#[derive(Debug, Clone)]
pub struct TurboQuantized {
pub quantized_values: Vec<u8>,
pub qjl_signs: Vec<u64>,
pub scales: Vec<f32>,
pub offsets: Vec<f32>,
pub dim: usize,
pub num_vectors: usize,
pub bits: TurboQuantBits,
pub has_qjl: bool,
}
impl TurboQuantized {
pub fn memory_bytes(&self) -> usize {
self.quantized_values.len()
+ self.qjl_signs.len() * 8
+ self.scales.len() * 4
+ self.offsets.len() * 4
}
pub fn compression_ratio(&self) -> f32 {
let original_bytes = self.num_vectors * self.dim * 4; if self.memory_bytes() == 0 {
return 0.0;
}
original_bytes as f32 / self.memory_bytes() as f32
}
}
#[derive(Debug)]
pub struct TurboQuantCompressor {
config: TurboQuantConfig,
hadamard: HadamardTransform,
log_block_size: u32,
}
impl TurboQuantCompressor {
pub fn new(config: TurboQuantConfig) -> Result<Self> {
let block_size = config.block_size;
if block_size == 0 || (block_size & (block_size - 1)) != 0 {
return Err(RuvLLMError::Quantization(format!(
"TurboQuant block_size must be power of 2, got {}",
block_size
)));
}
let log_block_size = block_size.trailing_zeros();
let hadamard = HadamardTransform::new(log_block_size, Some(config.rotation_seed))?;
Ok(Self {
config,
hadamard,
log_block_size,
})
}
pub fn with_defaults() -> Result<Self> {
Self::new(TurboQuantConfig::default())
}
pub fn compress(&self, data: &[f32]) -> Result<TurboQuantized> {
self.compress_batch(&[data])
}
pub fn compress_batch(&self, vectors: &[&[f32]]) -> Result<TurboQuantized> {
if vectors.is_empty() {
return Err(RuvLLMError::Quantization(
"Cannot compress empty batch".to_string(),
));
}
let dim = vectors[0].len();
let num_vectors = vectors.len();
let block_size = self.config.block_size;
let levels = self.config.bits.scalar_levels();
let padded_dim = ((dim + block_size - 1) / block_size) * block_size;
let num_blocks_per_vector = padded_dim / block_size;
let total_blocks = num_vectors * num_blocks_per_vector;
let mut scales = Vec::with_capacity(total_blocks);
let mut offsets = Vec::with_capacity(total_blocks);
let bits_per_value = (levels as f32).log2().ceil() as usize;
let bytes_per_block = (block_size * bits_per_value + 7) / 8;
let mut quantized_values = Vec::with_capacity(total_blocks * bytes_per_block);
let qjl_u64s_per_vector = (padded_dim + 63) / 64;
let mut qjl_signs = if self.config.enable_qjl_residual {
Vec::with_capacity(num_vectors * qjl_u64s_per_vector)
} else {
Vec::new()
};
for &vec in vectors {
let mut padded = vec.to_vec();
padded.resize(padded_dim, 0.0);
let mut rotated = padded.clone();
self.rotate_forward(&mut rotated)?;
for block_idx in 0..num_blocks_per_vector {
let start = block_idx * block_size;
let end = start + block_size;
let block = &rotated[start..end];
let (min_val, max_val) = block_min_max(block);
let range = max_val - min_val;
let scale = if range > f32::EPSILON {
range / (levels - 1) as f32
} else {
1.0
};
let offset = min_val;
scales.push(scale);
offsets.push(offset);
let block_start = quantized_values.len();
quantized_values.resize(block_start + bytes_per_block, 0u8);
let mask = (1u8 << bits_per_value) - 1;
let mut global_bit = 0usize;
for &val in block {
let normalized = if scale > f32::EPSILON {
((val - offset) / scale)
.round()
.clamp(0.0, (levels - 1) as f32) as u8
} else {
0u8
};
let qval = normalized & mask;
let byte_idx = block_start + global_bit / 8;
let bit_offset = global_bit % 8;
quantized_values[byte_idx] |= qval << bit_offset;
if bit_offset + bits_per_value > 8 && byte_idx + 1 < quantized_values.len() {
quantized_values[byte_idx + 1] |= qval >> (8 - bit_offset);
}
global_bit += bits_per_value;
}
}
if self.config.enable_qjl_residual {
let reconstructed = self.dequantize_rotated(
&quantized_values
[quantized_values.len() - num_blocks_per_vector * bytes_per_block..],
&scales[scales.len() - num_blocks_per_vector..],
&offsets[offsets.len() - num_blocks_per_vector..],
padded_dim,
);
let residual: Vec<f32> = rotated
.iter()
.zip(reconstructed.iter())
.map(|(r, q)| r - q)
.collect();
let mut sign_idx = 0u64;
let mut bit_count = 0;
for &r in &residual {
if r >= 0.0 {
sign_idx |= 1u64 << bit_count;
}
bit_count += 1;
if bit_count == 64 {
qjl_signs.push(sign_idx);
sign_idx = 0;
bit_count = 0;
}
}
if bit_count > 0 {
qjl_signs.push(sign_idx);
}
}
}
Ok(TurboQuantized {
quantized_values,
qjl_signs,
scales,
offsets,
dim,
num_vectors,
bits: self.config.bits,
has_qjl: self.config.enable_qjl_residual,
})
}
pub fn decompress(&self, compressed: &TurboQuantized) -> Result<Vec<Vec<f32>>> {
let dim = compressed.dim;
let block_size = self.config.block_size;
let padded_dim = ((dim + block_size - 1) / block_size) * block_size;
let num_blocks_per_vector = padded_dim / block_size;
let levels = compressed.bits.scalar_levels();
let bits_per_value = (levels as f32).log2().ceil() as usize;
let bytes_per_block = (block_size * bits_per_value + 7) / 8;
let mut result = Vec::with_capacity(compressed.num_vectors);
let qjl_u64s_per_vector = (padded_dim + 63) / 64;
for vec_idx in 0..compressed.num_vectors {
let qv_offset = vec_idx * num_blocks_per_vector * bytes_per_block;
let scale_offset = vec_idx * num_blocks_per_vector;
let mut rotated = self.dequantize_rotated(
&compressed.quantized_values
[qv_offset..qv_offset + num_blocks_per_vector * bytes_per_block],
&compressed.scales[scale_offset..scale_offset + num_blocks_per_vector],
&compressed.offsets[scale_offset..scale_offset + num_blocks_per_vector],
padded_dim,
);
if compressed.has_qjl && !compressed.qjl_signs.is_empty() {
let qjl_offset = vec_idx * qjl_u64s_per_vector;
let qjl_slice = &compressed.qjl_signs[qjl_offset..qjl_offset + qjl_u64s_per_vector];
for block_idx in 0..num_blocks_per_vector {
let scale = compressed.scales[scale_offset + block_idx];
let correction_magnitude = scale / (2.0 * (levels as f32).sqrt());
let start = block_idx * block_size;
for k in 0..block_size {
let global_idx = start + k;
let word_idx = global_idx / 64;
let bit_idx = global_idx % 64;
if word_idx < qjl_slice.len() {
let sign = if (qjl_slice[word_idx] >> bit_idx) & 1 == 1 {
1.0
} else {
-1.0
};
rotated[global_idx] += sign * correction_magnitude;
}
}
}
}
self.rotate_inverse(&mut rotated)?;
rotated.truncate(dim);
result.push(rotated);
}
Ok(result)
}
pub fn decompress_single(&self, compressed: &TurboQuantized, index: usize) -> Result<Vec<f32>> {
if index >= compressed.num_vectors {
return Err(RuvLLMError::Quantization(format!(
"Vector index {} out of range ({})",
index, compressed.num_vectors
)));
}
let dim = compressed.dim;
let block_size = self.config.block_size;
let padded_dim = ((dim + block_size - 1) / block_size) * block_size;
let num_blocks_per_vector = padded_dim / block_size;
let levels = compressed.bits.scalar_levels();
let bits_per_value = (levels as f32).log2().ceil() as usize;
let bytes_per_block = (block_size * bits_per_value + 7) / 8;
let qjl_u64s_per_vector = (padded_dim + 63) / 64;
let qv_offset = index * num_blocks_per_vector * bytes_per_block;
let scale_offset = index * num_blocks_per_vector;
let mut rotated = self.dequantize_rotated(
&compressed.quantized_values
[qv_offset..qv_offset + num_blocks_per_vector * bytes_per_block],
&compressed.scales[scale_offset..scale_offset + num_blocks_per_vector],
&compressed.offsets[scale_offset..scale_offset + num_blocks_per_vector],
padded_dim,
);
if compressed.has_qjl && !compressed.qjl_signs.is_empty() {
let qjl_offset = index * qjl_u64s_per_vector;
let qjl_slice = &compressed.qjl_signs[qjl_offset..qjl_offset + qjl_u64s_per_vector];
for block_idx in 0..num_blocks_per_vector {
let scale = compressed.scales[scale_offset + block_idx];
let correction_magnitude = scale / (2.0 * (levels as f32).sqrt());
let start = block_idx * block_size;
for k in 0..block_size {
let global_idx = start + k;
let word_idx = global_idx / 64;
let bit_idx = global_idx % 64;
if word_idx < qjl_slice.len() {
let sign = if (qjl_slice[word_idx] >> bit_idx) & 1 == 1 {
1.0
} else {
-1.0
};
rotated[global_idx] += sign * correction_magnitude;
}
}
}
}
self.rotate_inverse(&mut rotated)?;
rotated.truncate(dim);
Ok(rotated)
}
pub fn inner_product_asymmetric(
&self,
query: &[f32],
compressed: &TurboQuantized,
index: usize,
) -> Result<f32> {
let decompressed = self.decompress_single(compressed, index)?;
let dot: f32 = query
.iter()
.zip(decompressed.iter())
.map(|(a, b)| a * b)
.sum();
Ok(dot)
}
pub fn inner_product_batch(
&self,
query: &[f32],
compressed: &TurboQuantized,
) -> Result<Vec<f32>> {
let mut results = Vec::with_capacity(compressed.num_vectors);
for i in 0..compressed.num_vectors {
results.push(self.inner_product_asymmetric(query, compressed, i)?);
}
Ok(results)
}
pub fn inner_product_asymmetric_optimized(
&self,
query: &[f32],
compressed: &TurboQuantized,
index: usize,
) -> Result<f32> {
if index >= compressed.num_vectors {
return Err(RuvLLMError::Quantization(format!(
"Vector index {} out of range ({})",
index, compressed.num_vectors
)));
}
let dim = compressed.dim;
let block_size = self.config.block_size;
let padded_dim = ((dim + block_size - 1) / block_size) * block_size;
let mut rotated_query = query.to_vec();
rotated_query.resize(padded_dim, 0.0);
self.rotate_forward(&mut rotated_query)?;
self.dot_in_rotated_space(&rotated_query, compressed, index)
}
pub fn inner_product_batch_optimized(
&self,
query: &[f32],
compressed: &TurboQuantized,
) -> Result<Vec<f32>> {
let dim = compressed.dim;
let block_size = self.config.block_size;
let padded_dim = ((dim + block_size - 1) / block_size) * block_size;
let mut rotated_query = query.to_vec();
rotated_query.resize(padded_dim, 0.0);
self.rotate_forward(&mut rotated_query)?;
let mut results = Vec::with_capacity(compressed.num_vectors);
for i in 0..compressed.num_vectors {
results.push(self.dot_in_rotated_space(&rotated_query, compressed, i)?);
}
Ok(results)
}
fn dot_in_rotated_space(
&self,
rotated_query: &[f32],
compressed: &TurboQuantized,
index: usize,
) -> Result<f32> {
let block_size = self.config.block_size;
let dim = compressed.dim;
let padded_dim = ((dim + block_size - 1) / block_size) * block_size;
let num_blocks_per_vector = padded_dim / block_size;
let levels = compressed.bits.scalar_levels();
let bits_per_value = (levels as f32).log2().ceil() as usize;
let bytes_per_block = (block_size * bits_per_value + 7) / 8;
let qjl_u64s_per_vector = (padded_dim + 63) / 64;
let qv_offset = index * num_blocks_per_vector * bytes_per_block;
let scale_offset = index * num_blocks_per_vector;
let mut rotated_key = self.dequantize_rotated(
&compressed.quantized_values
[qv_offset..qv_offset + num_blocks_per_vector * bytes_per_block],
&compressed.scales[scale_offset..scale_offset + num_blocks_per_vector],
&compressed.offsets[scale_offset..scale_offset + num_blocks_per_vector],
padded_dim,
);
if compressed.has_qjl && !compressed.qjl_signs.is_empty() {
let qjl_offset = index * qjl_u64s_per_vector;
let qjl_slice = &compressed.qjl_signs[qjl_offset..qjl_offset + qjl_u64s_per_vector];
for block_idx in 0..num_blocks_per_vector {
let scale = compressed.scales[scale_offset + block_idx];
let correction_magnitude = scale / (2.0 * (levels as f32).sqrt());
let start = block_idx * block_size;
for k in 0..block_size {
let global_idx = start + k;
let word_idx = global_idx / 64;
let bit_idx = global_idx % 64;
if word_idx < qjl_slice.len() {
let sign = if (qjl_slice[word_idx] >> bit_idx) & 1 == 1 {
1.0
} else {
-1.0
};
rotated_key[global_idx] += sign * correction_magnitude;
}
}
}
}
let dot: f32 = rotated_query
.iter()
.zip(rotated_key.iter())
.map(|(a, b)| a * b)
.sum();
Ok(dot)
}
fn rotate_forward(&self, data: &mut [f32]) -> Result<()> {
let block_size = self.config.block_size;
let num_blocks = data.len() / block_size;
for i in 0..num_blocks {
let start = i * block_size;
let end = start + block_size;
self.hadamard.forward_inplace(&mut data[start..end]);
}
Ok(())
}
fn rotate_inverse(&self, data: &mut [f32]) -> Result<()> {
let block_size = self.config.block_size;
let num_blocks = data.len() / block_size;
for i in 0..num_blocks {
let start = i * block_size;
let end = start + block_size;
self.hadamard.inverse_inplace(&mut data[start..end]);
}
Ok(())
}
fn dequantize_rotated(
&self,
quantized_data: &[u8],
scales: &[f32],
offsets: &[f32],
padded_dim: usize,
) -> Vec<f32> {
let block_size = self.config.block_size;
let num_blocks = padded_dim / block_size;
let levels = self.config.bits.scalar_levels();
let bits_per_value = (levels as f32).log2().ceil() as usize;
let bytes_per_block = (block_size * bits_per_value + 7) / 8;
let mask = (1u8 << bits_per_value) - 1;
let mut result = vec![0.0f32; padded_dim];
for block_idx in 0..num_blocks {
let scale = scales[block_idx];
let offset = offsets[block_idx];
let byte_start = block_idx * bytes_per_block;
let mut global_bit = 0usize;
for k in 0..block_size {
let byte_idx = byte_start + global_bit / 8;
let bit_offset = global_bit % 8;
let mut quantized_val = 0u8;
if byte_idx < quantized_data.len() {
quantized_val = (quantized_data[byte_idx] >> bit_offset) & mask;
if bit_offset + bits_per_value > 8 && byte_idx + 1 < quantized_data.len() {
let overflow_bits = quantized_data[byte_idx + 1] << (8 - bit_offset);
quantized_val = (quantized_val | overflow_bits) & mask;
}
}
result[block_idx * block_size + k] = quantized_val as f32 * scale + offset;
global_bit += bits_per_value;
}
}
result
}
}
#[derive(Debug, Clone)]
pub struct TurboQuantKvPair {
pub key: TurboQuantized,
pub value: TurboQuantized,
pub position: usize,
}
#[derive(Debug)]
pub struct TurboQuantCacheTier {
compressor: TurboQuantCompressor,
pairs: Vec<TurboQuantKvPair>,
config: TurboQuantConfig,
}
impl TurboQuantCacheTier {
pub fn new(config: TurboQuantConfig) -> Result<Self> {
let compressor = TurboQuantCompressor::new(config.clone())?;
Ok(Self {
compressor,
pairs: Vec::new(),
config,
})
}
pub fn with_defaults() -> Result<Self> {
Self::new(TurboQuantConfig::default())
}
pub fn push(&mut self, keys: &[f32], values: &[f32], position: usize) -> Result<()> {
let compressed_key = self.compressor.compress(keys)?;
let compressed_value = self.compressor.compress(values)?;
self.pairs.push(TurboQuantKvPair {
key: compressed_key,
value: compressed_value,
position,
});
Ok(())
}
pub fn get(&self, index: usize) -> Result<(Vec<f32>, Vec<f32>, usize)> {
let pair = self.pairs.get(index).ok_or_else(|| {
RuvLLMError::Quantization(format!("KV pair index {} out of range", index))
})?;
let keys = self.compressor.decompress_single(&pair.key, 0)?;
let values = self.compressor.decompress_single(&pair.value, 0)?;
Ok((keys, values, pair.position))
}
pub fn get_all_kv(&self) -> Result<(Vec<f32>, Vec<f32>)> {
let mut all_keys = Vec::new();
let mut all_values = Vec::new();
for pair in &self.pairs {
let keys = self.compressor.decompress_single(&pair.key, 0)?;
let values = self.compressor.decompress_single(&pair.value, 0)?;
all_keys.extend(keys);
all_values.extend(values);
}
Ok((all_keys, all_values))
}
pub fn len(&self) -> usize {
self.pairs.len()
}
pub fn is_empty(&self) -> bool {
self.pairs.is_empty()
}
pub fn memory_bytes(&self) -> usize {
self.pairs
.iter()
.map(|p| p.key.memory_bytes() + p.value.memory_bytes())
.sum()
}
pub fn evict_oldest(&mut self, count: usize) {
let drain_count = count.min(self.pairs.len());
self.pairs.drain(0..drain_count);
}
pub fn clear(&mut self) {
self.pairs.clear();
}
pub fn stats(&self) -> TurboQuantStats {
let total_compressed = self.memory_bytes();
let dim = self.pairs.first().map(|p| p.key.dim).unwrap_or(0);
let original_bytes = self.pairs.len() * dim * 4 * 2;
TurboQuantStats {
num_pairs: self.pairs.len(),
dim,
compressed_bytes: total_compressed,
original_bytes,
compression_ratio: if total_compressed > 0 {
original_bytes as f32 / total_compressed as f32
} else {
0.0
},
bits_per_value: self.config.bits.effective_bits(),
}
}
}
#[derive(Debug, Clone)]
pub struct TurboQuantStats {
pub num_pairs: usize,
pub dim: usize,
pub compressed_bytes: usize,
pub original_bytes: usize,
pub compression_ratio: f32,
pub bits_per_value: f32,
}
#[inline]
fn block_min_max(data: &[f32]) -> (f32, f32) {
let mut min = f32::MAX;
let mut max = f32::MIN;
for &v in data {
if v < min {
min = v;
}
if v > max {
max = v;
}
}
(min, max)
}
#[derive(Debug)]
pub struct TurboQuantEmbeddingStore {
compressor: TurboQuantCompressor,
compressed: Option<TurboQuantized>,
dim: usize,
id_to_index: Vec<u64>,
}
impl TurboQuantEmbeddingStore {
pub fn new(dim: usize, config: TurboQuantConfig) -> Result<Self> {
let compressor = TurboQuantCompressor::new(config)?;
Ok(Self {
compressor,
compressed: None,
dim,
id_to_index: Vec::new(),
})
}
pub fn build_from_batch(&mut self, embeddings: &[Vec<f32>], ids: &[u64]) -> Result<()> {
if embeddings.len() != ids.len() {
return Err(RuvLLMError::Quantization(
"Embedding and ID count mismatch".to_string(),
));
}
if embeddings.is_empty() {
return Ok(());
}
let refs: Vec<&[f32]> = embeddings.iter().map(|v| v.as_slice()).collect();
self.compressed = Some(self.compressor.compress_batch(&refs)?);
self.id_to_index = ids.to_vec();
Ok(())
}
pub fn get(&self, id: u64) -> Result<Vec<f32>> {
let index = self
.id_to_index
.iter()
.position(|&i| i == id)
.ok_or_else(|| RuvLLMError::Quantization(format!("Embedding ID {} not found", id)))?;
let compressed = self
.compressed
.as_ref()
.ok_or_else(|| RuvLLMError::Quantization("Store is empty".to_string()))?;
self.compressor.decompress_single(compressed, index)
}
pub fn search(&self, query: &[f32], top_k: usize) -> Result<Vec<(u64, f32)>> {
let compressed = self
.compressed
.as_ref()
.ok_or_else(|| RuvLLMError::Quantization("Store is empty".to_string()))?;
let scores = self.compressor.inner_product_batch(query, compressed)?;
let mut scored: Vec<(u64, f32)> = self
.id_to_index
.iter()
.zip(scores.iter())
.map(|(&id, &score)| (id, score))
.collect();
scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
scored.truncate(top_k);
Ok(scored)
}
pub fn len(&self) -> usize {
self.id_to_index.len()
}
pub fn is_empty(&self) -> bool {
self.id_to_index.is_empty()
}
pub fn memory_bytes(&self) -> usize {
self.compressed
.as_ref()
.map(|c| c.memory_bytes())
.unwrap_or(0)
+ self.id_to_index.len() * 8
}
pub fn compression_ratio(&self) -> f32 {
let original = self.id_to_index.len() * self.dim * 4;
let compressed = self.memory_bytes();
if compressed == 0 {
return 0.0;
}
original as f32 / compressed as f32
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_turbo_quant_roundtrip_3_5bit() {
let compressor = TurboQuantCompressor::with_defaults().unwrap();
let data: Vec<f32> = (0..128).map(|i| (i as f32 - 64.0) / 32.0).collect();
let compressed = compressor.compress(&data).unwrap();
let decompressed = compressor.decompress(&compressed).unwrap();
assert_eq!(decompressed.len(), 1);
assert_eq!(decompressed[0].len(), data.len());
let mse: f32 = data
.iter()
.zip(decompressed[0].iter())
.map(|(a, b)| (a - b).powi(2))
.sum::<f32>()
/ data.len() as f32;
assert!(mse < 0.1, "MSE too high: {}", mse);
}
#[test]
fn test_turbo_quant_roundtrip_4bit() {
let config = TurboQuantConfig {
bits: TurboQuantBits::Bits4_0,
..Default::default()
};
let compressor = TurboQuantCompressor::new(config).unwrap();
let data: Vec<f32> = (0..128).map(|i| (i as f32 - 64.0) / 32.0).collect();
let compressed = compressor.compress(&data).unwrap();
let decompressed = compressor.decompress(&compressed).unwrap();
let mse: f32 = data
.iter()
.zip(decompressed[0].iter())
.map(|(a, b)| (a - b).powi(2))
.sum::<f32>()
/ data.len() as f32;
assert!(mse < 0.05, "4-bit MSE too high: {}", mse);
}
#[test]
fn test_compression_ratio() {
let compressor = TurboQuantCompressor::with_defaults().unwrap();
let data: Vec<f32> = (0..256).map(|i| (i as f32) / 256.0).collect();
let compressed = compressor.compress(&data).unwrap();
let ratio = compressed.compression_ratio();
assert!(ratio > 4.0, "Compression ratio too low: {}", ratio);
}
#[test]
fn test_inner_product_preservation() {
let compressor = TurboQuantCompressor::with_defaults().unwrap();
let a: Vec<f32> = (0..128).map(|i| (i as f32) / 128.0).collect();
let b: Vec<f32> = (0..128).map(|i| (127 - i) as f32 / 128.0).collect();
let true_ip: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
let compressed_b = compressor.compress(&b).unwrap();
let approx_ip = compressor
.inner_product_asymmetric(&a, &compressed_b, 0)
.unwrap();
let relative_error = ((true_ip - approx_ip) / true_ip).abs();
assert!(
relative_error < 0.15,
"Inner product relative error too high: {} (true={}, approx={})",
relative_error,
true_ip,
approx_ip
);
}
#[test]
fn test_batch_compression() {
let compressor = TurboQuantCompressor::with_defaults().unwrap();
let v1: Vec<f32> = (0..128).map(|i| i as f32 / 128.0).collect();
let v2: Vec<f32> = (0..128).map(|i| (127 - i) as f32 / 128.0).collect();
let v3: Vec<f32> = (0..128).map(|i| ((i * 7) % 128) as f32 / 128.0).collect();
let compressed = compressor.compress_batch(&[&v1, &v2, &v3]).unwrap();
assert_eq!(compressed.num_vectors, 3);
let decompressed = compressor.decompress(&compressed).unwrap();
assert_eq!(decompressed.len(), 3);
for (original, restored) in [&v1, &v2, &v3].iter().zip(decompressed.iter()) {
let mse: f32 = original
.iter()
.zip(restored.iter())
.map(|(a, b)| (a - b).powi(2))
.sum::<f32>()
/ original.len() as f32;
assert!(mse < 0.1, "Batch MSE too high: {}", mse);
}
}
#[test]
fn test_kv_cache_tier() {
let mut tier = TurboQuantCacheTier::with_defaults().unwrap();
let key: Vec<f32> = (0..128).map(|i| i as f32 / 128.0).collect();
let value: Vec<f32> = (0..128).map(|i| (127 - i) as f32 / 128.0).collect();
for pos in 0..10 {
tier.push(&key, &value, pos).unwrap();
}
assert_eq!(tier.len(), 10);
let (k, v, pos) = tier.get(5).unwrap();
assert_eq!(pos, 5);
assert_eq!(k.len(), 128);
assert_eq!(v.len(), 128);
let stats = tier.stats();
assert_eq!(stats.num_pairs, 10);
assert!(stats.compression_ratio > 3.0);
}
#[test]
fn test_kv_cache_eviction() {
let mut tier = TurboQuantCacheTier::with_defaults().unwrap();
let key: Vec<f32> = vec![1.0; 128];
let value: Vec<f32> = vec![0.5; 128];
for pos in 0..20 {
tier.push(&key, &value, pos).unwrap();
}
assert_eq!(tier.len(), 20);
tier.evict_oldest(5);
assert_eq!(tier.len(), 15);
}
#[test]
fn test_non_power_of_2_dimension() {
let compressor = TurboQuantCompressor::with_defaults().unwrap();
let data: Vec<f32> = (0..100).map(|i| i as f32 / 100.0).collect();
let compressed = compressor.compress(&data).unwrap();
let decompressed = compressor.decompress(&compressed).unwrap();
assert_eq!(decompressed[0].len(), 100); }
#[test]
fn test_bit_configurations() {
for bits in [
TurboQuantBits::Bits2_5,
TurboQuantBits::Bits3_0,
TurboQuantBits::Bits3_5,
TurboQuantBits::Bits4_0,
] {
let config = TurboQuantConfig {
bits,
..Default::default()
};
let compressor = TurboQuantCompressor::new(config).unwrap();
let data: Vec<f32> = (0..128).map(|i| (i as f32 - 64.0) / 32.0).collect();
let compressed = compressor.compress(&data).unwrap();
let decompressed = compressor.decompress(&compressed).unwrap();
assert_eq!(decompressed[0].len(), 128);
assert_eq!(compressed.bits, bits);
}
}
#[test]
fn test_without_qjl() {
let config = TurboQuantConfig {
enable_qjl_residual: false,
..Default::default()
};
let compressor = TurboQuantCompressor::new(config).unwrap();
let data: Vec<f32> = (0..128).map(|i| i as f32 / 128.0).collect();
let compressed = compressor.compress(&data).unwrap();
assert!(!compressed.has_qjl);
assert!(compressed.qjl_signs.is_empty());
let decompressed = compressor.decompress(&compressed).unwrap();
assert_eq!(decompressed[0].len(), 128);
}
#[test]
fn test_memory_bytes() {
let compressor = TurboQuantCompressor::with_defaults().unwrap();
let data: Vec<f32> = vec![1.0; 256];
let compressed = compressor.compress(&data).unwrap();
let mem = compressed.memory_bytes();
let original = 256 * 4;
assert!(
mem < original,
"Compressed {} >= original {}",
mem,
original
);
}
#[test]
fn test_embedding_store() {
let config = TurboQuantConfig::default();
let mut store = TurboQuantEmbeddingStore::new(128, config).unwrap();
let embeddings: Vec<Vec<f32>> = (0..10)
.map(|i| (0..128).map(|j| ((i * 128 + j) as f32) / 1280.0).collect())
.collect();
let ids: Vec<u64> = (0..10).collect();
store.build_from_batch(&embeddings, &ids).unwrap();
assert_eq!(store.len(), 10);
assert!(store.compression_ratio() > 3.0);
let retrieved = store.get(5).unwrap();
assert_eq!(retrieved.len(), 128);
let mse: f32 = embeddings[5]
.iter()
.zip(retrieved.iter())
.map(|(a, b)| (a - b).powi(2))
.sum::<f32>()
/ 128.0;
assert!(mse < 0.1, "Embedding retrieval MSE too high: {}", mse);
}
#[test]
fn test_embedding_search() {
let config = TurboQuantConfig::default();
let mut store = TurboQuantEmbeddingStore::new(128, config).unwrap();
let embeddings: Vec<Vec<f32>> = (0..5)
.map(|i| {
let mut v = vec![0.0f32; 128];
v[i * 25] = 1.0; for j in 0..128 {
v[j] += 0.01;
}
v
})
.collect();
let ids: Vec<u64> = (100..105).collect();
store.build_from_batch(&embeddings, &ids).unwrap();
let mut query = vec![0.01f32; 128];
query[50] = 1.0;
let results = store.search(&query, 3).unwrap();
assert!(!results.is_empty());
assert_eq!(
results[0].0, 102,
"Expected top result to be ID 102, got {}",
results[0].0
);
}
#[test]
fn test_optimized_inner_product_matches_original() {
let compressor = TurboQuantCompressor::with_defaults().unwrap();
let a: Vec<f32> = (0..128).map(|i| (i as f32) / 128.0).collect();
let b: Vec<f32> = (0..128).map(|i| (127 - i) as f32 / 128.0).collect();
let compressed_b = compressor.compress(&b).unwrap();
let original = compressor
.inner_product_asymmetric(&a, &compressed_b, 0)
.unwrap();
let optimized = compressor
.inner_product_asymmetric_optimized(&a, &compressed_b, 0)
.unwrap();
let diff = (original - optimized).abs();
assert!(
diff < 1e-4,
"Optimized inner product diverges from original: original={}, optimized={}, diff={}",
original,
optimized,
diff
);
}
#[test]
fn test_optimized_batch_matches_original() {
let compressor = TurboQuantCompressor::with_defaults().unwrap();
let query: Vec<f32> = (0..128).map(|i| ((i * 3) % 128) as f32 / 128.0).collect();
let v1: Vec<f32> = (0..128).map(|i| i as f32 / 128.0).collect();
let v2: Vec<f32> = (0..128).map(|i| (127 - i) as f32 / 128.0).collect();
let v3: Vec<f32> = (0..128).map(|i| ((i * 7) % 128) as f32 / 128.0).collect();
let compressed = compressor.compress_batch(&[&v1, &v2, &v3]).unwrap();
let original_results = compressor.inner_product_batch(&query, &compressed).unwrap();
let optimized_results = compressor
.inner_product_batch_optimized(&query, &compressed)
.unwrap();
assert_eq!(original_results.len(), optimized_results.len());
for (i, (orig, opt)) in original_results
.iter()
.zip(optimized_results.iter())
.enumerate()
{
let diff = (orig - opt).abs();
assert!(
diff < 1e-4,
"Batch result {} diverges: original={}, optimized={}, diff={}",
i,
orig,
opt,
diff
);
}
}
#[test]
fn test_optimized_inner_product_without_qjl() {
let config = TurboQuantConfig {
enable_qjl_residual: false,
..Default::default()
};
let compressor = TurboQuantCompressor::new(config).unwrap();
let a: Vec<f32> = (0..128).map(|i| (i as f32 - 64.0) / 64.0).collect();
let b: Vec<f32> = (0..128).map(|i| (i as f32) / 128.0).collect();
let compressed_b = compressor.compress(&b).unwrap();
let original = compressor
.inner_product_asymmetric(&a, &compressed_b, 0)
.unwrap();
let optimized = compressor
.inner_product_asymmetric_optimized(&a, &compressed_b, 0)
.unwrap();
let diff = (original - optimized).abs();
assert!(
diff < 1e-4,
"No-QJL optimized diverges: original={}, optimized={}, diff={}",
original,
optimized,
diff
);
}
#[test]
fn test_optimized_inner_product_all_bit_widths() {
for bits in [
TurboQuantBits::Bits2_5,
TurboQuantBits::Bits3_0,
TurboQuantBits::Bits3_5,
TurboQuantBits::Bits4_0,
] {
let config = TurboQuantConfig {
bits,
..Default::default()
};
let compressor = TurboQuantCompressor::new(config).unwrap();
let query: Vec<f32> = (0..128).map(|i| (i as f32) / 128.0).collect();
let key: Vec<f32> = (0..128).map(|i| (127 - i) as f32 / 128.0).collect();
let compressed = compressor.compress(&key).unwrap();
let original = compressor
.inner_product_asymmetric(&query, &compressed, 0)
.unwrap();
let optimized = compressor
.inner_product_asymmetric_optimized(&query, &compressed, 0)
.unwrap();
let diff = (original - optimized).abs();
assert!(
diff < 1e-3,
"Bits {:?}: original={}, optimized={}, diff={}",
bits,
original,
optimized,
diff
);
}
}
#[test]
fn test_optimized_non_power_of_2_dimension() {
let compressor = TurboQuantCompressor::with_defaults().unwrap();
let query: Vec<f32> = (0..100).map(|i| i as f32 / 100.0).collect();
let key: Vec<f32> = (0..100).map(|i| (99 - i) as f32 / 100.0).collect();
let compressed = compressor.compress(&key).unwrap();
let original = compressor
.inner_product_asymmetric(&query, &compressed, 0)
.unwrap();
let optimized = compressor
.inner_product_asymmetric_optimized(&query, &compressed, 0)
.unwrap();
let diff = (original - optimized).abs();
assert!(
diff < 1e-3,
"Non-pow2 dim: original={}, optimized={}, diff={}",
original,
optimized,
diff
);
}
}