use anyhow::{anyhow, Result};
use serde::{Deserialize, Serialize};
use crate::simd::quantized_manhattan_distance_simd;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct QuantizationConfig {
pub bits: u8,
pub signed: bool,
}
impl Default for QuantizationConfig {
fn default() -> Self {
Self {
bits: 8,
signed: false, }
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ScalarQuantizer {
config: QuantizationConfig,
min_vals: Vec<f32>,
max_vals: Vec<f32>,
scales: Vec<f32>,
dimensions: usize,
is_fitted: bool,
}
impl ScalarQuantizer {
pub fn new(config: QuantizationConfig) -> Self {
Self {
config,
min_vals: Vec::new(),
max_vals: Vec::new(),
scales: Vec::new(),
dimensions: 0,
is_fitted: false,
}
}
pub fn fit(&mut self, vectors: &[Vec<f32>]) -> Result<()> {
if vectors.is_empty() {
return Err(anyhow!("Cannot fit quantizer on empty data"));
}
let dim = vectors[0].len();
if vectors.iter().any(|v| v.len() != dim) {
return Err(anyhow!("All vectors must have the same dimension"));
}
self.dimensions = dim;
self.min_vals = vec![f32::INFINITY; dim];
self.max_vals = vec![f32::NEG_INFINITY; dim];
for vector in vectors {
for (i, &val) in vector.iter().enumerate() {
self.min_vals[i] = self.min_vals[i].min(val);
self.max_vals[i] = self.max_vals[i].max(val);
}
}
self.scales = Vec::with_capacity(dim);
let max_quant_val = if self.config.signed { 127.0 } else { 255.0 };
for i in 0..dim {
let range = self.max_vals[i] - self.min_vals[i];
self.scales.push(if range > 1e-10 {
max_quant_val / range
} else {
1.0
});
}
self.is_fitted = true;
Ok(())
}
pub fn quantize(&self, vector: &[f32]) -> Vec<u8> {
assert!(self.is_fitted, "Quantizer must be fitted before use");
assert_eq!(vector.len(), self.dimensions, "Vector dimension mismatch");
vector
.iter()
.enumerate()
.map(|(i, &val)| {
let clipped = val.max(self.min_vals[i]).min(self.max_vals[i]);
let scaled = (clipped - self.min_vals[i]) * self.scales[i];
scaled.round().clamp(0.0, 255.0) as u8
})
.collect()
}
pub fn dequantize(&self, quantized: &[u8]) -> Vec<f32> {
assert!(self.is_fitted, "Quantizer must be fitted before use");
assert_eq!(
quantized.len(),
self.dimensions,
"Quantized vector dimension mismatch"
);
quantized
.iter()
.enumerate()
.map(|(i, &val)| {
let scaled = val as f32 / self.scales[i];
scaled + self.min_vals[i]
})
.collect()
}
pub fn quantize_batch(&self, vectors: &[Vec<f32>]) -> Vec<Vec<u8>> {
vectors.iter().map(|v| self.quantize(v)).collect()
}
pub fn dequantize_batch(&self, quantized: &[Vec<u8>]) -> Vec<Vec<f32>> {
quantized.iter().map(|v| self.dequantize(v)).collect()
}
pub fn quantized_distance(&self, a: &[u8], b: &[u8]) -> f32 {
assert_eq!(a.len(), b.len(), "Vector dimension mismatch");
quantized_manhattan_distance_simd(a, b) as f32
}
pub fn compression_ratio(&self) -> f32 {
4.0
}
pub fn memory_savings(&self) -> f32 {
0.75
}
pub fn is_fitted(&self) -> bool {
self.is_fitted
}
pub fn dimensions(&self) -> usize {
self.dimensions
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct QuantizedVectorIndex {
quantizer: ScalarQuantizer,
quantized_vectors: Vec<Vec<u8>>,
entity_ids: Vec<String>,
}
impl QuantizedVectorIndex {
pub fn new(config: QuantizationConfig) -> Self {
Self {
quantizer: ScalarQuantizer::new(config),
quantized_vectors: Vec::new(),
entity_ids: Vec::new(),
}
}
pub fn build(&mut self, vectors: &[(String, Vec<f32>)]) -> Result<()> {
if vectors.is_empty() {
return Err(anyhow!("Cannot build index from empty vectors"));
}
let float_vecs: Vec<Vec<f32>> = vectors.iter().map(|(_, v)| v.clone()).collect();
self.quantizer.fit(&float_vecs)?;
self.entity_ids = vectors.iter().map(|(id, _)| id.clone()).collect();
self.quantized_vectors = self.quantizer.quantize_batch(&float_vecs);
Ok(())
}
pub fn search(&self, query: &[f32], k: usize) -> Result<Vec<(String, f32)>> {
if !self.quantizer.is_fitted() {
return Err(anyhow!("Index not built"));
}
let quantized_query = self.quantizer.quantize(query);
let mut distances: Vec<(usize, f32)> = self
.quantized_vectors
.iter()
.enumerate()
.map(|(i, v)| (i, self.quantizer.quantized_distance(&quantized_query, v)))
.collect();
distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
Ok(distances
.iter()
.take(k.min(self.entity_ids.len()))
.map(|(idx, dist)| (self.entity_ids[*idx].clone(), *dist))
.collect())
}
pub fn stats(&self) -> QuantizedIndexStats {
let num_vectors = self.quantized_vectors.len();
let dimensions = self.quantizer.dimensions();
let original_bytes = num_vectors * dimensions * 4; let quantized_bytes = num_vectors * dimensions;
QuantizedIndexStats {
num_vectors,
dimensions,
compression_ratio: self.quantizer.compression_ratio(),
memory_savings: self.quantizer.memory_savings(),
original_bytes,
quantized_bytes,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct QuantizedIndexStats {
pub num_vectors: usize,
pub dimensions: usize,
pub compression_ratio: f32,
pub memory_savings: f32,
pub original_bytes: usize,
pub quantized_bytes: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BinaryQuantizationConfig {
pub use_mean_threshold: bool,
}
impl Default for BinaryQuantizationConfig {
fn default() -> Self {
Self {
use_mean_threshold: true,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BinaryQuantizer {
config: BinaryQuantizationConfig,
thresholds: Vec<f32>,
dimensions: usize,
is_fitted: bool,
}
impl BinaryQuantizer {
pub fn new(config: BinaryQuantizationConfig) -> Self {
Self {
config,
thresholds: Vec::new(),
dimensions: 0,
is_fitted: false,
}
}
pub fn fit(&mut self, vectors: &[Vec<f32>]) -> Result<()> {
if vectors.is_empty() {
return Err(anyhow!("Cannot fit quantizer on empty data"));
}
let dim = vectors[0].len();
if vectors.iter().any(|v| v.len() != dim) {
return Err(anyhow!("All vectors must have the same dimension"));
}
self.dimensions = dim;
if self.config.use_mean_threshold {
self.thresholds = vec![0.0; dim];
for vector in vectors {
for (i, &val) in vector.iter().enumerate() {
self.thresholds[i] += val;
}
}
let count = vectors.len() as f32;
for threshold in &mut self.thresholds {
*threshold /= count;
}
} else {
self.thresholds = vec![0.0; dim];
}
self.is_fitted = true;
Ok(())
}
pub fn quantize(&self, vector: &[f32]) -> Vec<u8> {
assert!(self.is_fitted, "Quantizer must be fitted before use");
assert_eq!(vector.len(), self.dimensions, "Vector dimension mismatch");
let num_bytes = self.dimensions.div_ceil(8);
let mut binary = vec![0u8; num_bytes];
for (i, &val) in vector.iter().enumerate() {
if val > self.thresholds[i] {
let byte_idx = i / 8;
let bit_idx = i % 8;
binary[byte_idx] |= 1u8 << bit_idx;
}
}
binary
}
pub fn dequantize(&self, binary: &[u8]) -> Vec<f32> {
assert!(self.is_fitted, "Quantizer must be fitted before use");
let expected_bytes = self.dimensions.div_ceil(8);
assert_eq!(binary.len(), expected_bytes, "Binary vector size mismatch");
let mut vector = Vec::with_capacity(self.dimensions);
for i in 0..self.dimensions {
let byte_idx = i / 8;
let bit_idx = i % 8;
let bit_set = (binary[byte_idx] >> bit_idx) & 1 == 1;
let val = if bit_set {
self.thresholds[i] + 1.0
} else {
self.thresholds[i] - 1.0
};
vector.push(val);
}
vector
}
pub fn quantize_batch(&self, vectors: &[Vec<f32>]) -> Vec<Vec<u8>> {
vectors.iter().map(|v| self.quantize(v)).collect()
}
pub fn dequantize_batch(&self, binary: &[Vec<u8>]) -> Vec<Vec<f32>> {
binary.iter().map(|v| self.dequantize(v)).collect()
}
#[inline]
pub fn hamming_distance(&self, a: &[u8], b: &[u8]) -> u32 {
assert_eq!(a.len(), b.len(), "Binary vector size mismatch");
a.iter()
.zip(b.iter())
.map(|(&x, &y)| (x ^ y).count_ones())
.sum()
}
#[inline]
pub fn hamming_similarity(&self, a: &[u8], b: &[u8]) -> f32 {
let distance = self.hamming_distance(a, b);
1.0 - (distance as f32 / self.dimensions as f32)
}
pub fn compression_ratio(&self) -> f32 {
32.0
}
pub fn memory_savings(&self) -> f32 {
0.96875
}
pub fn is_fitted(&self) -> bool {
self.is_fitted
}
pub fn dimensions(&self) -> usize {
self.dimensions
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BinaryQuantizedIndex {
quantizer: BinaryQuantizer,
binary_vectors: Vec<Vec<u8>>,
entity_ids: Vec<String>,
}
impl BinaryQuantizedIndex {
pub fn new(config: BinaryQuantizationConfig) -> Self {
Self {
quantizer: BinaryQuantizer::new(config),
binary_vectors: Vec::new(),
entity_ids: Vec::new(),
}
}
pub fn build(&mut self, vectors: &[(String, Vec<f32>)]) -> Result<()> {
if vectors.is_empty() {
return Err(anyhow!("Cannot build index from empty vectors"));
}
let float_vecs: Vec<Vec<f32>> = vectors.iter().map(|(_, v)| v.clone()).collect();
self.quantizer.fit(&float_vecs)?;
self.entity_ids = vectors.iter().map(|(id, _)| id.clone()).collect();
self.binary_vectors = self.quantizer.quantize_batch(&float_vecs);
Ok(())
}
pub fn search(&self, query: &[f32], k: usize) -> Result<Vec<(String, f32)>> {
if !self.quantizer.is_fitted() {
return Err(anyhow!("Index not built"));
}
let binary_query = self.quantizer.quantize(query);
let mut similarities: Vec<(usize, f32)> = self
.binary_vectors
.iter()
.enumerate()
.map(|(i, v)| (i, self.quantizer.hamming_similarity(&binary_query, v)))
.collect();
similarities.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
Ok(similarities
.iter()
.take(k.min(self.entity_ids.len()))
.map(|(idx, sim)| (self.entity_ids[*idx].clone(), *sim))
.collect())
}
pub fn stats(&self) -> BinaryQuantizedIndexStats {
let num_vectors = self.binary_vectors.len();
let dimensions = self.quantizer.dimensions();
let original_bytes = num_vectors * dimensions * 4; let binary_bytes = num_vectors * dimensions.div_ceil(8);
BinaryQuantizedIndexStats {
num_vectors,
dimensions,
compression_ratio: self.quantizer.compression_ratio(),
memory_savings: self.quantizer.memory_savings(),
original_bytes,
binary_bytes,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BinaryQuantizedIndexStats {
pub num_vectors: usize,
pub dimensions: usize,
pub compression_ratio: f32,
pub memory_savings: f32,
pub original_bytes: usize,
pub binary_bytes: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FourBitQuantizer {
min_vals: Vec<f32>,
max_vals: Vec<f32>,
scales: Vec<f32>,
dimensions: usize,
is_fitted: bool,
}
impl FourBitQuantizer {
pub fn new() -> Self {
Self {
min_vals: Vec::new(),
max_vals: Vec::new(),
scales: Vec::new(),
dimensions: 0,
is_fitted: false,
}
}
pub fn fit(&mut self, vectors: &[Vec<f32>]) -> Result<()> {
if vectors.is_empty() {
return Err(anyhow!("Cannot fit quantizer on empty data"));
}
let dim = vectors[0].len();
if vectors.iter().any(|v| v.len() != dim) {
return Err(anyhow!("All vectors must have the same dimension"));
}
self.dimensions = dim;
self.min_vals = vec![f32::INFINITY; dim];
self.max_vals = vec![f32::NEG_INFINITY; dim];
for vector in vectors {
for (i, &val) in vector.iter().enumerate() {
self.min_vals[i] = self.min_vals[i].min(val);
self.max_vals[i] = self.max_vals[i].max(val);
}
}
self.scales = Vec::with_capacity(dim);
for i in 0..dim {
let range = self.max_vals[i] - self.min_vals[i];
self.scales
.push(if range > 1e-10 { 15.0 / range } else { 1.0 });
}
self.is_fitted = true;
Ok(())
}
pub fn quantize(&self, vector: &[f32]) -> Vec<u8> {
assert!(self.is_fitted, "Quantizer must be fitted before use");
assert_eq!(vector.len(), self.dimensions, "Vector dimension mismatch");
let num_bytes = self.dimensions.div_ceil(2);
let mut quantized = vec![0u8; num_bytes];
for (i, &val) in vector.iter().enumerate() {
let clipped = val.max(self.min_vals[i]).min(self.max_vals[i]);
let scaled = (clipped - self.min_vals[i]) * self.scales[i];
let nibble = scaled.round().clamp(0.0, 15.0) as u8;
let byte_idx = i / 2;
if i % 2 == 0 {
quantized[byte_idx] |= nibble;
} else {
quantized[byte_idx] |= nibble << 4;
}
}
quantized
}
pub fn dequantize(&self, quantized: &[u8]) -> Vec<f32> {
assert!(self.is_fitted, "Quantizer must be fitted before use");
let expected_bytes = self.dimensions.div_ceil(2);
assert_eq!(
quantized.len(),
expected_bytes,
"Quantized vector size mismatch"
);
let mut vector = Vec::with_capacity(self.dimensions);
for i in 0..self.dimensions {
let byte_idx = i / 2;
let nibble = if i % 2 == 0 {
quantized[byte_idx] & 0x0F
} else {
(quantized[byte_idx] >> 4) & 0x0F
};
let scaled = nibble as f32 / self.scales[i];
vector.push(scaled + self.min_vals[i]);
}
vector
}
pub fn quantize_batch(&self, vectors: &[Vec<f32>]) -> Vec<Vec<u8>> {
vectors.iter().map(|v| self.quantize(v)).collect()
}
pub fn dequantize_batch(&self, quantized: &[Vec<u8>]) -> Vec<Vec<f32>> {
quantized.iter().map(|v| self.dequantize(v)).collect()
}
#[inline]
pub fn quantized_distance(&self, a: &[u8], b: &[u8]) -> f32 {
assert_eq!(a.len(), b.len(), "Vector size mismatch");
let mut distance = 0.0f32;
for i in 0..self.dimensions {
let byte_idx = i / 2;
let nibble_a = if i % 2 == 0 {
a[byte_idx] & 0x0F
} else {
(a[byte_idx] >> 4) & 0x0F
};
let nibble_b = if i % 2 == 0 {
b[byte_idx] & 0x0F
} else {
(b[byte_idx] >> 4) & 0x0F
};
distance += (nibble_a as i32 - nibble_b as i32).abs() as f32;
}
distance
}
pub fn compression_ratio(&self) -> f32 {
8.0
}
pub fn memory_savings(&self) -> f32 {
0.875
}
pub fn is_fitted(&self) -> bool {
self.is_fitted
}
pub fn dimensions(&self) -> usize {
self.dimensions
}
}
impl Default for FourBitQuantizer {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FourBitQuantizedIndex {
quantizer: FourBitQuantizer,
quantized_vectors: Vec<Vec<u8>>,
entity_ids: Vec<String>,
}
impl FourBitQuantizedIndex {
pub fn new() -> Self {
Self {
quantizer: FourBitQuantizer::new(),
quantized_vectors: Vec::new(),
entity_ids: Vec::new(),
}
}
pub fn build(&mut self, vectors: &[(String, Vec<f32>)]) -> Result<()> {
if vectors.is_empty() {
return Err(anyhow!("Cannot build index from empty vectors"));
}
let float_vecs: Vec<Vec<f32>> = vectors.iter().map(|(_, v)| v.clone()).collect();
self.quantizer.fit(&float_vecs)?;
self.entity_ids = vectors.iter().map(|(id, _)| id.clone()).collect();
self.quantized_vectors = self.quantizer.quantize_batch(&float_vecs);
Ok(())
}
pub fn search(&self, query: &[f32], k: usize) -> Result<Vec<(String, f32)>> {
if !self.quantizer.is_fitted() {
return Err(anyhow!("Index not built"));
}
let quantized_query = self.quantizer.quantize(query);
let mut distances: Vec<(usize, f32)> = self
.quantized_vectors
.iter()
.enumerate()
.map(|(i, v)| (i, self.quantizer.quantized_distance(&quantized_query, v)))
.collect();
distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
Ok(distances
.iter()
.take(k.min(self.entity_ids.len()))
.map(|(idx, dist)| (self.entity_ids[*idx].clone(), *dist))
.collect())
}
pub fn stats(&self) -> FourBitQuantizedIndexStats {
let num_vectors = self.quantized_vectors.len();
let dimensions = self.quantizer.dimensions();
let original_bytes = num_vectors * dimensions * 4; let quantized_bytes = num_vectors * dimensions.div_ceil(2);
FourBitQuantizedIndexStats {
num_vectors,
dimensions,
compression_ratio: self.quantizer.compression_ratio(),
memory_savings: self.quantizer.memory_savings(),
original_bytes,
quantized_bytes,
}
}
}
impl Default for FourBitQuantizedIndex {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FourBitQuantizedIndexStats {
pub num_vectors: usize,
pub dimensions: usize,
pub compression_ratio: f32,
pub memory_savings: f32,
pub original_bytes: usize,
pub quantized_bytes: usize,
}
#[cfg(feature = "fp16")]
use half::f16;
#[cfg(feature = "fp16")]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Fp16Quantizer {
dimensions: usize,
}
#[cfg(feature = "fp16")]
impl Fp16Quantizer {
pub fn new() -> Self {
Self { dimensions: 0 }
}
pub fn set_dimensions(&mut self, dimensions: usize) {
self.dimensions = dimensions;
}
pub fn quantize(&self, vector: &[f32]) -> Vec<u16> {
if self.dimensions > 0 {
assert_eq!(vector.len(), self.dimensions, "Vector dimension mismatch");
}
vector
.iter()
.map(|&val| f16::from_f32(val).to_bits())
.collect()
}
pub fn dequantize(&self, quantized: &[u16]) -> Vec<f32> {
if self.dimensions > 0 {
assert_eq!(
quantized.len(),
self.dimensions,
"Quantized vector dimension mismatch"
);
}
quantized
.iter()
.map(|&bits| f16::from_bits(bits).to_f32())
.collect()
}
pub fn quantize_batch(&self, vectors: &[Vec<f32>]) -> Vec<Vec<u16>> {
vectors.iter().map(|v| self.quantize(v)).collect()
}
pub fn dequantize_batch(&self, quantized: &[Vec<u16>]) -> Vec<Vec<f32>> {
quantized.iter().map(|v| self.dequantize(v)).collect()
}
#[inline]
pub fn fp16_distance(&self, a: &[u16], b: &[u16]) -> f32 {
assert_eq!(a.len(), b.len(), "Vector dimension mismatch");
let mut distance = 0.0f32;
for (&a_bits, &b_bits) in a.iter().zip(b.iter()) {
let a_val = f16::from_bits(a_bits).to_f32();
let b_val = f16::from_bits(b_bits).to_f32();
let diff = a_val - b_val;
distance += diff * diff;
}
distance.sqrt()
}
pub fn compression_ratio(&self) -> f32 {
2.0
}
pub fn memory_savings(&self) -> f32 {
0.5
}
pub fn dimensions(&self) -> usize {
self.dimensions
}
}
#[cfg(feature = "fp16")]
impl Default for Fp16Quantizer {
fn default() -> Self {
Self::new()
}
}
#[cfg(feature = "fp16")]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Fp16QuantizedIndex {
quantizer: Fp16Quantizer,
fp16_vectors: Vec<Vec<u16>>,
entity_ids: Vec<String>,
}
#[cfg(feature = "fp16")]
impl Fp16QuantizedIndex {
pub fn new() -> Self {
Self {
quantizer: Fp16Quantizer::new(),
fp16_vectors: Vec::new(),
entity_ids: Vec::new(),
}
}
pub fn build(&mut self, vectors: &[(String, Vec<f32>)]) -> Result<()> {
if vectors.is_empty() {
return Err(anyhow!("Cannot build index from empty vectors"));
}
let dimensions = vectors[0].1.len();
self.quantizer.set_dimensions(dimensions);
self.entity_ids = vectors.iter().map(|(id, _)| id.clone()).collect();
let float_vecs: Vec<Vec<f32>> = vectors.iter().map(|(_, v)| v.clone()).collect();
self.fp16_vectors = self.quantizer.quantize_batch(&float_vecs);
Ok(())
}
pub fn search(&self, query: &[f32], k: usize) -> Result<Vec<(String, f32)>> {
if self.fp16_vectors.is_empty() {
return Err(anyhow!("Index not built"));
}
let fp16_query = self.quantizer.quantize(query);
let mut distances: Vec<(usize, f32)> = self
.fp16_vectors
.iter()
.enumerate()
.map(|(i, v)| (i, self.quantizer.fp16_distance(&fp16_query, v)))
.collect();
distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
Ok(distances
.iter()
.take(k.min(self.entity_ids.len()))
.map(|(idx, dist)| (self.entity_ids[*idx].clone(), *dist))
.collect())
}
pub fn stats(&self) -> Fp16QuantizedIndexStats {
let num_vectors = self.fp16_vectors.len();
let dimensions = self.quantizer.dimensions();
let original_bytes = num_vectors * dimensions * 4; let fp16_bytes = num_vectors * dimensions * 2;
Fp16QuantizedIndexStats {
num_vectors,
dimensions,
compression_ratio: self.quantizer.compression_ratio(),
memory_savings: self.quantizer.memory_savings(),
original_bytes,
fp16_bytes,
}
}
}
#[cfg(feature = "fp16")]
impl Default for Fp16QuantizedIndex {
fn default() -> Self {
Self::new()
}
}
#[cfg(feature = "fp16")]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Fp16QuantizedIndexStats {
pub num_vectors: usize,
pub dimensions: usize,
pub compression_ratio: f32,
pub memory_savings: f32,
pub original_bytes: usize,
pub fp16_bytes: usize,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_quantizer_fit() {
let vectors = vec![
vec![0.0, 1.0, 2.0],
vec![1.0, 2.0, 3.0],
vec![2.0, 3.0, 4.0],
];
let mut quantizer = ScalarQuantizer::new(QuantizationConfig::default());
assert!(quantizer.fit(&vectors).is_ok());
assert!(quantizer.is_fitted());
assert_eq!(quantizer.dimensions(), 3);
}
#[test]
fn test_quantize_dequantize() {
let vectors = vec![
vec![0.0, 1.0, 2.0],
vec![1.0, 2.0, 3.0],
vec![2.0, 3.0, 4.0],
];
let mut quantizer = ScalarQuantizer::new(QuantizationConfig::default());
quantizer.fit(&vectors).unwrap();
let vector = vec![1.0, 2.0, 3.0];
let quantized = quantizer.quantize(&vector);
let dequantized = quantizer.dequantize(&quantized);
assert_eq!(quantized.len(), 3);
assert_eq!(dequantized.len(), 3);
for (orig, deq) in vector.iter().zip(dequantized.iter()) {
assert!((orig - deq).abs() < 0.1); }
}
#[test]
fn test_quantize_batch() {
let vectors = vec![
vec![0.0, 1.0, 2.0],
vec![1.0, 2.0, 3.0],
vec![2.0, 3.0, 4.0],
];
let mut quantizer = ScalarQuantizer::new(QuantizationConfig::default());
quantizer.fit(&vectors).unwrap();
let quantized = quantizer.quantize_batch(&vectors);
assert_eq!(quantized.len(), 3);
assert_eq!(quantized[0].len(), 3);
}
#[test]
fn test_quantized_distance() {
let vectors = vec![
vec![0.0, 0.0, 0.0],
vec![1.0, 1.0, 1.0],
vec![2.0, 2.0, 2.0],
];
let mut quantizer = ScalarQuantizer::new(QuantizationConfig::default());
quantizer.fit(&vectors).unwrap();
let a = quantizer.quantize(&vectors[0]);
let b = quantizer.quantize(&vectors[1]);
let c = quantizer.quantize(&vectors[2]);
let dist_ab = quantizer.quantized_distance(&a, &b);
let dist_ac = quantizer.quantized_distance(&a, &c);
assert!(dist_ac > dist_ab);
}
#[test]
fn test_compression_ratio() {
let quantizer = ScalarQuantizer::new(QuantizationConfig::default());
assert_eq!(quantizer.compression_ratio(), 4.0);
assert_eq!(quantizer.memory_savings(), 0.75);
}
#[test]
fn test_quantized_index_build() {
let vectors = vec![
("doc1".to_string(), vec![0.0, 1.0, 2.0]),
("doc2".to_string(), vec![1.0, 2.0, 3.0]),
("doc3".to_string(), vec![2.0, 3.0, 4.0]),
];
let mut index = QuantizedVectorIndex::new(QuantizationConfig::default());
assert!(index.build(&vectors).is_ok());
}
#[test]
fn test_quantized_index_search() {
let vectors = vec![
("doc1".to_string(), vec![0.0, 0.0, 0.0]),
("doc2".to_string(), vec![1.0, 1.0, 1.0]),
("doc3".to_string(), vec![2.0, 2.0, 2.0]),
];
let mut index = QuantizedVectorIndex::new(QuantizationConfig::default());
index.build(&vectors).unwrap();
let query = vec![1.0, 1.0, 1.0];
let results = index.search(&query, 2).unwrap();
assert_eq!(results.len(), 2);
assert_eq!(results[0].0, "doc2"); }
#[test]
fn test_quantized_index_stats() {
let vectors = vec![
("doc1".to_string(), vec![0.0; 768]),
("doc2".to_string(), vec![1.0; 768]),
("doc3".to_string(), vec![2.0; 768]),
];
let mut index = QuantizedVectorIndex::new(QuantizationConfig::default());
index.build(&vectors).unwrap();
let stats = index.stats();
assert_eq!(stats.num_vectors, 3);
assert_eq!(stats.dimensions, 768);
assert_eq!(stats.original_bytes, 3 * 768 * 4); assert_eq!(stats.quantized_bytes, 3 * 768); assert_eq!(stats.compression_ratio, 4.0);
}
#[test]
fn test_fit_empty_vectors() {
let vectors: Vec<Vec<f32>> = vec![];
let mut quantizer = ScalarQuantizer::new(QuantizationConfig::default());
assert!(quantizer.fit(&vectors).is_err());
}
#[test]
#[should_panic(expected = "Quantizer must be fitted")]
fn test_quantize_unfitted() {
let quantizer = ScalarQuantizer::new(QuantizationConfig::default());
quantizer.quantize(&[1.0, 2.0, 3.0]);
}
#[test]
#[should_panic(expected = "Vector dimension mismatch")]
fn test_quantize_dimension_mismatch() {
let vectors = vec![vec![0.0, 1.0, 2.0]];
let mut quantizer = ScalarQuantizer::new(QuantizationConfig::default());
quantizer.fit(&vectors).unwrap();
quantizer.quantize(&[1.0, 2.0]); }
#[test]
fn test_binary_quantizer_fit() {
let vectors = vec![
vec![0.0, 1.0, 2.0],
vec![1.0, 2.0, 3.0],
vec![2.0, 3.0, 4.0],
];
let mut quantizer = BinaryQuantizer::new(BinaryQuantizationConfig::default());
assert!(quantizer.fit(&vectors).is_ok());
assert!(quantizer.is_fitted());
assert_eq!(quantizer.dimensions(), 3);
}
#[test]
fn test_binary_quantize_dequantize() {
let vectors = vec![
vec![0.0, 1.0, 2.0],
vec![1.0, 2.0, 3.0],
vec![2.0, 3.0, 4.0],
];
let mut quantizer = BinaryQuantizer::new(BinaryQuantizationConfig::default());
quantizer.fit(&vectors).unwrap();
let vector = vec![1.0, 2.0, 3.0];
let binary = quantizer.quantize(&vector);
let dequantized = quantizer.dequantize(&binary);
assert_eq!(binary.len(), 1); assert_eq!(dequantized.len(), 3);
}
#[test]
fn test_binary_quantize_large_vector() {
let vectors: Vec<Vec<f32>> = (0..10)
.map(|_| (0..128).map(|i| i as f32).collect())
.collect();
let mut quantizer = BinaryQuantizer::new(BinaryQuantizationConfig::default());
quantizer.fit(&vectors).unwrap();
let vector: Vec<f32> = (0..128).map(|i| i as f32).collect();
let binary = quantizer.quantize(&vector);
assert_eq!(binary.len(), 16);
}
#[test]
fn test_binary_hamming_distance() {
let vectors = vec![
vec![0.0, 0.0, 0.0, 0.0],
vec![1.0, 1.0, 1.0, 1.0],
vec![2.0, 2.0, 2.0, 2.0],
];
let mut quantizer = BinaryQuantizer::new(BinaryQuantizationConfig::default());
quantizer.fit(&vectors).unwrap();
let a = quantizer.quantize(&vectors[0]);
let b = quantizer.quantize(&vectors[1]);
let c = quantizer.quantize(&vectors[2]);
let dist_ab = quantizer.hamming_distance(&a, &b);
let dist_ac = quantizer.hamming_distance(&a, &c);
assert!(dist_ab <= 4); assert!(dist_ac <= 4);
}
#[test]
fn test_binary_hamming_similarity() {
let vectors = vec![
vec![0.0, 0.0, 0.0, 0.0],
vec![1.0, 1.0, 1.0, 1.0],
vec![2.0, 2.0, 2.0, 2.0],
];
let mut quantizer = BinaryQuantizer::new(BinaryQuantizationConfig::default());
quantizer.fit(&vectors).unwrap();
let a = quantizer.quantize(&vectors[0]);
let b = quantizer.quantize(&vectors[1]);
let sim = quantizer.hamming_similarity(&a, &b);
assert!((0.0..=1.0).contains(&sim));
}
#[test]
fn test_binary_compression_ratio() {
let quantizer = BinaryQuantizer::new(BinaryQuantizationConfig::default());
assert_eq!(quantizer.compression_ratio(), 32.0);
assert_eq!(quantizer.memory_savings(), 0.96875);
}
#[test]
fn test_binary_quantize_batch() {
let vectors = vec![
vec![0.0, 1.0, 2.0],
vec![1.0, 2.0, 3.0],
vec![2.0, 3.0, 4.0],
];
let mut quantizer = BinaryQuantizer::new(BinaryQuantizationConfig::default());
quantizer.fit(&vectors).unwrap();
let binary = quantizer.quantize_batch(&vectors);
assert_eq!(binary.len(), 3);
assert_eq!(binary[0].len(), 1); }
#[test]
fn test_binary_quantized_index_build() {
let vectors = vec![
("doc1".to_string(), vec![0.0, 1.0, 2.0]),
("doc2".to_string(), vec![1.0, 2.0, 3.0]),
("doc3".to_string(), vec![2.0, 3.0, 4.0]),
];
let mut index = BinaryQuantizedIndex::new(BinaryQuantizationConfig::default());
assert!(index.build(&vectors).is_ok());
}
#[test]
fn test_binary_quantized_index_search() {
let vectors = vec![
("doc1".to_string(), vec![0.0, 0.0, 0.0]),
("doc2".to_string(), vec![1.0, 1.0, 1.0]),
("doc3".to_string(), vec![2.0, 2.0, 2.0]),
];
let mut index = BinaryQuantizedIndex::new(BinaryQuantizationConfig::default());
index.build(&vectors).unwrap();
let query = vec![1.0, 1.0, 1.0];
let results = index.search(&query, 2).unwrap();
assert_eq!(results.len(), 2);
assert!(results[0].1 >= results[1].1);
}
#[test]
fn test_binary_quantized_index_stats() {
let vectors = vec![
("doc1".to_string(), vec![0.0; 768]),
("doc2".to_string(), vec![1.0; 768]),
("doc3".to_string(), vec![2.0; 768]),
];
let mut index = BinaryQuantizedIndex::new(BinaryQuantizationConfig::default());
index.build(&vectors).unwrap();
let stats = index.stats();
assert_eq!(stats.num_vectors, 3);
assert_eq!(stats.dimensions, 768);
assert_eq!(stats.original_bytes, 3 * 768 * 4); assert_eq!(stats.binary_bytes, 3 * 96); assert_eq!(stats.compression_ratio, 32.0);
}
#[test]
fn test_binary_fit_empty_vectors() {
let vectors: Vec<Vec<f32>> = vec![];
let mut quantizer = BinaryQuantizer::new(BinaryQuantizationConfig::default());
assert!(quantizer.fit(&vectors).is_err());
}
#[test]
#[should_panic(expected = "Quantizer must be fitted")]
fn test_binary_quantize_unfitted() {
let quantizer = BinaryQuantizer::new(BinaryQuantizationConfig::default());
quantizer.quantize(&[1.0, 2.0, 3.0]);
}
#[test]
#[should_panic(expected = "Vector dimension mismatch")]
fn test_binary_quantize_dimension_mismatch() {
let vectors = vec![vec![0.0, 1.0, 2.0]];
let mut quantizer = BinaryQuantizer::new(BinaryQuantizationConfig::default());
quantizer.fit(&vectors).unwrap();
quantizer.quantize(&[1.0, 2.0]); }
#[test]
fn test_binary_zero_threshold() {
let vectors = vec![vec![-1.0, 0.0, 1.0], vec![-2.0, 0.0, 2.0]];
let config = BinaryQuantizationConfig {
use_mean_threshold: false,
};
let mut quantizer = BinaryQuantizer::new(config);
quantizer.fit(&vectors).unwrap();
let vector = vec![-1.0, 0.0, 1.0]; let binary = quantizer.quantize(&vector);
assert_eq!(binary[0] & 0b00000111, 0b00000100);
}
#[test]
fn test_binary_identical_vectors() {
let vectors = vec![vec![1.0, 2.0, 3.0], vec![1.0, 2.0, 3.0]];
let mut quantizer = BinaryQuantizer::new(BinaryQuantizationConfig::default());
quantizer.fit(&vectors).unwrap();
let a = quantizer.quantize(&vectors[0]);
let b = quantizer.quantize(&vectors[1]);
let dist = quantizer.hamming_distance(&a, &b);
assert_eq!(dist, 0);
let sim = quantizer.hamming_similarity(&a, &b);
assert_eq!(sim, 1.0);
}
#[test]
fn test_fourbit_quantizer_fit() {
let vectors = vec![
vec![0.0, 1.0, 2.0],
vec![1.0, 2.0, 3.0],
vec![2.0, 3.0, 4.0],
];
let mut quantizer = FourBitQuantizer::new();
assert!(quantizer.fit(&vectors).is_ok());
assert!(quantizer.is_fitted());
assert_eq!(quantizer.dimensions(), 3);
}
#[test]
fn test_fourbit_quantize_dequantize() {
let vectors = vec![
vec![0.0, 1.0, 2.0],
vec![1.0, 2.0, 3.0],
vec![2.0, 3.0, 4.0],
];
let mut quantizer = FourBitQuantizer::new();
quantizer.fit(&vectors).unwrap();
let vector = vec![1.0, 2.0, 3.0];
let quantized = quantizer.quantize(&vector);
let dequantized = quantizer.dequantize(&quantized);
assert_eq!(quantized.len(), 2); assert_eq!(dequantized.len(), 3);
for (orig, deq) in vector.iter().zip(dequantized.iter()) {
assert!((orig - deq).abs() < 0.3); }
}
#[test]
fn test_fourbit_quantize_large_vector() {
let vectors: Vec<Vec<f32>> = (0..10)
.map(|_| (0..100).map(|i| i as f32).collect())
.collect();
let mut quantizer = FourBitQuantizer::new();
quantizer.fit(&vectors).unwrap();
let vector: Vec<f32> = (0..100).map(|i| i as f32).collect();
let quantized = quantizer.quantize(&vector);
assert_eq!(quantized.len(), 50);
}
#[test]
fn test_fourbit_odd_dimensions() {
let vectors = vec![
vec![0.0, 1.0, 2.0, 3.0, 4.0], vec![1.0, 2.0, 3.0, 4.0, 5.0],
];
let mut quantizer = FourBitQuantizer::new();
quantizer.fit(&vectors).unwrap();
let vector = vec![1.5, 2.5, 3.5, 4.5, 5.5];
let quantized = quantizer.quantize(&vector);
assert_eq!(quantized.len(), 3);
}
#[test]
fn test_fourbit_nibble_packing() {
let vectors = vec![vec![0.0, 0.0], vec![15.0, 15.0]];
let mut quantizer = FourBitQuantizer::new();
quantizer.fit(&vectors).unwrap();
let vector = vec![0.0, 15.0];
let quantized = quantizer.quantize(&vector);
assert_eq!(quantized.len(), 1); assert_eq!(quantized[0], 0xF0);
}
#[test]
fn test_fourbit_compression_ratio() {
let quantizer = FourBitQuantizer::new();
assert_eq!(quantizer.compression_ratio(), 8.0);
assert_eq!(quantizer.memory_savings(), 0.875);
}
#[test]
fn test_fourbit_quantize_batch() {
let vectors = vec![
vec![0.0, 1.0, 2.0],
vec![1.0, 2.0, 3.0],
vec![2.0, 3.0, 4.0],
];
let mut quantizer = FourBitQuantizer::new();
quantizer.fit(&vectors).unwrap();
let quantized = quantizer.quantize_batch(&vectors);
assert_eq!(quantized.len(), 3);
assert_eq!(quantized[0].len(), 2); }
#[test]
fn test_fourbit_quantized_distance() {
let vectors = vec![
vec![0.0, 0.0, 0.0],
vec![1.0, 1.0, 1.0],
vec![2.0, 2.0, 2.0],
];
let mut quantizer = FourBitQuantizer::new();
quantizer.fit(&vectors).unwrap();
let a = quantizer.quantize(&vectors[0]);
let b = quantizer.quantize(&vectors[1]);
let c = quantizer.quantize(&vectors[2]);
let dist_ab = quantizer.quantized_distance(&a, &b);
let dist_ac = quantizer.quantized_distance(&a, &c);
assert!(dist_ac > dist_ab);
}
#[test]
fn test_fourbit_quantized_index_build() {
let vectors = vec![
("doc1".to_string(), vec![0.0, 1.0, 2.0]),
("doc2".to_string(), vec![1.0, 2.0, 3.0]),
("doc3".to_string(), vec![2.0, 3.0, 4.0]),
];
let mut index = FourBitQuantizedIndex::new();
assert!(index.build(&vectors).is_ok());
}
#[test]
fn test_fourbit_quantized_index_search() {
let vectors = vec![
("doc1".to_string(), vec![0.0, 0.0, 0.0]),
("doc2".to_string(), vec![1.0, 1.0, 1.0]),
("doc3".to_string(), vec![2.0, 2.0, 2.0]),
];
let mut index = FourBitQuantizedIndex::new();
index.build(&vectors).unwrap();
let query = vec![1.0, 1.0, 1.0];
let results = index.search(&query, 2).unwrap();
assert_eq!(results.len(), 2);
assert_eq!(results[0].0, "doc2"); }
#[test]
fn test_fourbit_quantized_index_stats() {
let vectors = vec![
("doc1".to_string(), vec![0.0; 768]),
("doc2".to_string(), vec![1.0; 768]),
("doc3".to_string(), vec![2.0; 768]),
];
let mut index = FourBitQuantizedIndex::new();
index.build(&vectors).unwrap();
let stats = index.stats();
assert_eq!(stats.num_vectors, 3);
assert_eq!(stats.dimensions, 768);
assert_eq!(stats.original_bytes, 3 * 768 * 4); assert_eq!(stats.quantized_bytes, 3 * 384); assert_eq!(stats.compression_ratio, 8.0);
}
#[test]
fn test_fourbit_fit_empty_vectors() {
let vectors: Vec<Vec<f32>> = vec![];
let mut quantizer = FourBitQuantizer::new();
assert!(quantizer.fit(&vectors).is_err());
}
#[test]
#[should_panic(expected = "Quantizer must be fitted")]
fn test_fourbit_quantize_unfitted() {
let quantizer = FourBitQuantizer::new();
quantizer.quantize(&[1.0, 2.0, 3.0]);
}
#[test]
#[should_panic(expected = "Vector dimension mismatch")]
fn test_fourbit_quantize_dimension_mismatch() {
let vectors = vec![vec![0.0, 1.0, 2.0]];
let mut quantizer = FourBitQuantizer::new();
quantizer.fit(&vectors).unwrap();
quantizer.quantize(&[1.0, 2.0]); }
#[test]
#[cfg(feature = "fp16")]
fn test_fp16_quantizer_basic() {
let mut quantizer = Fp16Quantizer::new();
quantizer.set_dimensions(3);
assert_eq!(quantizer.dimensions(), 3);
}
#[test]
#[cfg(feature = "fp16")]
fn test_fp16_quantize_dequantize() {
let quantizer = Fp16Quantizer::new();
let vector = vec![1.0, 2.0, 3.0];
let quantized = quantizer.quantize(&vector);
let dequantized = quantizer.dequantize(&quantized);
assert_eq!(quantized.len(), 3);
assert_eq!(dequantized.len(), 3);
for (orig, deq) in vector.iter().zip(dequantized.iter()) {
assert!((orig - deq).abs() < 0.001); }
}
#[test]
#[cfg(feature = "fp16")]
fn test_fp16_quantize_large_vector() {
let quantizer = Fp16Quantizer::new();
let vector: Vec<f32> = (0..768).map(|i| i as f32 * 0.1).collect();
let quantized = quantizer.quantize(&vector);
assert_eq!(quantized.len(), 768);
}
#[test]
#[cfg(feature = "fp16")]
fn test_fp16_quantize_batch() {
let quantizer = Fp16Quantizer::new();
let vectors = vec![
vec![0.0, 1.0, 2.0],
vec![1.0, 2.0, 3.0],
vec![2.0, 3.0, 4.0],
];
let quantized = quantizer.quantize_batch(&vectors);
assert_eq!(quantized.len(), 3);
assert_eq!(quantized[0].len(), 3);
}
#[test]
#[cfg(feature = "fp16")]
fn test_fp16_distance() {
let quantizer = Fp16Quantizer::new();
let v0 = vec![0.0, 0.0, 0.0];
let v1 = vec![1.0, 1.0, 1.0];
let v2 = vec![2.0, 2.0, 2.0];
let a = quantizer.quantize(&v0);
let b = quantizer.quantize(&v1);
let c = quantizer.quantize(&v2);
let dist_ab = quantizer.fp16_distance(&a, &b);
let dist_ac = quantizer.fp16_distance(&a, &c);
assert!(dist_ac > dist_ab);
}
#[test]
#[cfg(feature = "fp16")]
fn test_fp16_compression_ratio() {
let quantizer = Fp16Quantizer::new();
assert_eq!(quantizer.compression_ratio(), 2.0);
assert_eq!(quantizer.memory_savings(), 0.5);
}
#[test]
#[cfg(feature = "fp16")]
fn test_fp16_quantized_index_build() {
let vectors = vec![
("doc1".to_string(), vec![0.0, 1.0, 2.0]),
("doc2".to_string(), vec![1.0, 2.0, 3.0]),
("doc3".to_string(), vec![2.0, 3.0, 4.0]),
];
let mut index = Fp16QuantizedIndex::new();
assert!(index.build(&vectors).is_ok());
}
#[test]
#[cfg(feature = "fp16")]
fn test_fp16_quantized_index_search() {
let vectors = vec![
("doc1".to_string(), vec![0.0, 0.0, 0.0]),
("doc2".to_string(), vec![1.0, 1.0, 1.0]),
("doc3".to_string(), vec![2.0, 2.0, 2.0]),
];
let mut index = Fp16QuantizedIndex::new();
index.build(&vectors).unwrap();
let query = vec![1.0, 1.0, 1.0];
let results = index.search(&query, 2).unwrap();
assert_eq!(results.len(), 2);
assert_eq!(results[0].0, "doc2"); }
#[test]
#[cfg(feature = "fp16")]
fn test_fp16_quantized_index_stats() {
let vectors = vec![
("doc1".to_string(), vec![0.0; 768]),
("doc2".to_string(), vec![1.0; 768]),
("doc3".to_string(), vec![2.0; 768]),
];
let mut index = Fp16QuantizedIndex::new();
index.build(&vectors).unwrap();
let stats = index.stats();
assert_eq!(stats.num_vectors, 3);
assert_eq!(stats.dimensions, 768);
assert_eq!(stats.original_bytes, 3 * 768 * 4); assert_eq!(stats.fp16_bytes, 3 * 768 * 2); assert_eq!(stats.compression_ratio, 2.0);
}
#[test]
#[cfg(feature = "fp16")]
fn test_fp16_high_precision() {
let quantizer = Fp16Quantizer::new();
let test_values = vec![
vec![0.1, 0.2, 0.3],
vec![1.5, 2.5, 3.5],
vec![100.0, 200.0, 300.0],
vec![-1.0, -2.0, -3.0],
];
for vector in &test_values {
let quantized = quantizer.quantize(vector);
let dequantized = quantizer.dequantize(&quantized);
for (orig, deq) in vector.iter().zip(dequantized.iter()) {
let relative_error = ((orig - deq) / orig).abs();
assert!(relative_error < 0.001 || orig.abs() < 0.01);
}
}
}
#[test]
#[cfg(feature = "fp16")]
#[should_panic(expected = "Vector dimension mismatch")]
fn test_fp16_quantize_dimension_mismatch() {
let mut quantizer = Fp16Quantizer::new();
quantizer.set_dimensions(3);
let vector1 = vec![1.0, 2.0, 3.0];
quantizer.quantize(&vector1);
quantizer.quantize(&[1.0, 2.0]); }
}