use crate::error::Result;
use serde::{Deserialize, Serialize};
pub trait QuantizedVector: Send + Sync {
fn quantize(vector: &[f32]) -> Self;
fn distance(&self, other: &Self) -> f32;
fn reconstruct(&self) -> Vec<f32>;
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ScalarQuantized {
pub data: Vec<u8>,
pub min: f32,
pub scale: f32,
}
impl QuantizedVector for ScalarQuantized {
fn quantize(vector: &[f32]) -> Self {
let min = vector.iter().copied().fold(f32::INFINITY, f32::min);
let max = vector.iter().copied().fold(f32::NEG_INFINITY, f32::max);
let scale = if (max - min).abs() < f32::EPSILON {
1.0 } else {
(max - min) / 255.0
};
let data = vector
.iter()
.map(|&v| ((v - min) / scale).round().clamp(0.0, 255.0) as u8)
.collect();
Self { data, min, scale }
}
fn distance(&self, other: &Self) -> f32 {
let avg_scale = (self.scale + other.scale) / 2.0;
#[cfg(target_arch = "aarch64")]
{
if self.data.len() >= 16 {
return unsafe { scalar_distance_neon(&self.data, &other.data) }.sqrt() * avg_scale;
}
}
#[cfg(target_arch = "x86_64")]
{
if self.data.len() >= 32 && is_x86_feature_detected!("avx2") {
return unsafe { scalar_distance_avx2(&self.data, &other.data) }.sqrt() * avg_scale;
}
}
scalar_distance_scalar(&self.data, &other.data).sqrt() * avg_scale
}
fn reconstruct(&self) -> Vec<f32> {
self.data
.iter()
.map(|&v| self.min + (v as f32) * self.scale)
.collect()
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ProductQuantized {
pub codes: Vec<u8>,
pub codebooks: Vec<Vec<Vec<f32>>>,
}
impl ProductQuantized {
pub fn train(
vectors: &[Vec<f32>],
num_subspaces: usize,
codebook_size: usize,
iterations: usize,
) -> Result<Self> {
if vectors.is_empty() {
return Err(crate::error::RuvectorError::InvalidInput(
"Cannot train on empty vector set".into(),
));
}
if vectors[0].is_empty() {
return Err(crate::error::RuvectorError::InvalidInput(
"Cannot train on vectors with zero dimensions".into(),
));
}
if codebook_size > 256 {
return Err(crate::error::RuvectorError::InvalidParameter(format!(
"Codebook size {} exceeds u8 maximum of 256",
codebook_size
)));
}
let dimensions = vectors[0].len();
let subspace_dim = dimensions / num_subspaces;
let mut codebooks = Vec::with_capacity(num_subspaces);
for subspace_idx in 0..num_subspaces {
let start = subspace_idx * subspace_dim;
let end = start + subspace_dim;
let subspace_vectors: Vec<Vec<f32>> =
vectors.iter().map(|v| v[start..end].to_vec()).collect();
let codebook = kmeans_clustering(&subspace_vectors, codebook_size, iterations);
codebooks.push(codebook);
}
Ok(Self {
codes: vec![],
codebooks,
})
}
pub fn encode(&self, vector: &[f32]) -> Vec<u8> {
let num_subspaces = self.codebooks.len();
let subspace_dim = vector.len() / num_subspaces;
let mut codes = Vec::with_capacity(num_subspaces);
for (subspace_idx, codebook) in self.codebooks.iter().enumerate() {
let start = subspace_idx * subspace_dim;
let end = start + subspace_dim;
let subvector = &vector[start..end];
let code = codebook
.iter()
.enumerate()
.min_by(|(_, a), (_, b)| {
let dist_a = euclidean_squared(subvector, a);
let dist_b = euclidean_squared(subvector, b);
dist_a.partial_cmp(&dist_b).unwrap()
})
.map(|(idx, _)| idx as u8)
.unwrap_or(0);
codes.push(code);
}
codes
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Int4Quantized {
pub data: Vec<u8>,
pub min: f32,
pub scale: f32,
pub dimensions: usize,
}
impl Int4Quantized {
pub fn quantize(vector: &[f32]) -> Self {
let min = vector.iter().copied().fold(f32::INFINITY, f32::min);
let max = vector.iter().copied().fold(f32::NEG_INFINITY, f32::max);
let scale = if (max - min).abs() < f32::EPSILON {
1.0
} else {
(max - min) / 15.0 };
let dimensions = vector.len();
let num_bytes = dimensions.div_ceil(2);
let mut data = vec![0u8; num_bytes];
for (i, &v) in vector.iter().enumerate() {
let quantized = ((v - min) / scale).round().clamp(0.0, 15.0) as u8;
let byte_idx = i / 2;
if i % 2 == 0 {
data[byte_idx] |= quantized;
} else {
data[byte_idx] |= quantized << 4;
}
}
Self {
data,
min,
scale,
dimensions,
}
}
pub fn distance(&self, other: &Self) -> f32 {
assert_eq!(self.dimensions, other.dimensions);
let avg_scale = (self.scale + other.scale) / 2.0;
let _avg_min = (self.min + other.min) / 2.0;
let mut sum_sq = 0i32;
for i in 0..self.dimensions {
let byte_idx = i / 2;
let shift = if i % 2 == 0 { 0 } else { 4 };
let a = ((self.data[byte_idx] >> shift) & 0x0F) as i32;
let b = ((other.data[byte_idx] >> shift) & 0x0F) as i32;
let diff = a - b;
sum_sq += diff * diff;
}
(sum_sq as f32).sqrt() * avg_scale
}
pub fn reconstruct(&self) -> Vec<f32> {
let mut result = Vec::with_capacity(self.dimensions);
for i in 0..self.dimensions {
let byte_idx = i / 2;
let shift = if i % 2 == 0 { 0 } else { 4 };
let quantized = (self.data[byte_idx] >> shift) & 0x0F;
result.push(self.min + (quantized as f32) * self.scale);
}
result
}
pub fn compression_ratio() -> f32 {
8.0 }
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BinaryQuantized {
pub bits: Vec<u8>,
pub dimensions: usize,
}
impl QuantizedVector for BinaryQuantized {
fn quantize(vector: &[f32]) -> Self {
let dimensions = vector.len();
let num_bytes = dimensions.div_ceil(8);
let mut bits = vec![0u8; num_bytes];
for (i, &v) in vector.iter().enumerate() {
if v > 0.0 {
let byte_idx = i / 8;
let bit_idx = i % 8;
bits[byte_idx] |= 1 << bit_idx;
}
}
Self { bits, dimensions }
}
fn distance(&self, other: &Self) -> f32 {
Self::hamming_distance_fast(&self.bits, &other.bits) as f32
}
fn reconstruct(&self) -> Vec<f32> {
let mut result = Vec::with_capacity(self.dimensions);
for i in 0..self.dimensions {
let byte_idx = i / 8;
let bit_idx = i % 8;
let bit = (self.bits[byte_idx] >> bit_idx) & 1;
result.push(if bit == 1 { 1.0 } else { -1.0 });
}
result
}
}
impl BinaryQuantized {
pub fn hamming_distance_fast(a: &[u8], b: &[u8]) -> u32 {
#[cfg(target_arch = "aarch64")]
{
if a.len() >= 16 {
return unsafe { hamming_distance_neon(a, b) };
}
}
#[cfg(target_arch = "x86_64")]
{
if a.len() >= 8 && is_x86_feature_detected!("popcnt") {
return unsafe { hamming_distance_simd_x86(a, b) };
}
}
let mut distance = 0u32;
let chunks_a = a.chunks_exact(8);
let chunks_b = b.chunks_exact(8);
let remainder_a = chunks_a.remainder();
let remainder_b = chunks_b.remainder();
for (chunk_a, chunk_b) in chunks_a.zip(chunks_b) {
let a_u64 = u64::from_le_bytes(chunk_a.try_into().unwrap());
let b_u64 = u64::from_le_bytes(chunk_b.try_into().unwrap());
distance += (a_u64 ^ b_u64).count_ones();
}
for (&a_byte, &b_byte) in remainder_a.iter().zip(remainder_b) {
distance += (a_byte ^ b_byte).count_ones();
}
distance
}
pub fn similarity(&self, other: &Self) -> f32 {
let distance = self.distance(other);
1.0 - (distance / self.dimensions as f32)
}
pub fn compression_ratio() -> f32 {
32.0 }
pub fn to_bytes(&self) -> &[u8] {
&self.bits
}
pub fn from_bytes(bits: Vec<u8>, dimensions: usize) -> Self {
Self { bits, dimensions }
}
}
fn scalar_distance_scalar(a: &[u8], b: &[u8]) -> f32 {
let mut sum_sq = 0i32;
let chunks = a.len() / 4;
for i in 0..chunks {
let idx = i * 4;
let d0 = (a[idx] as i32) - (b[idx] as i32);
let d1 = (a[idx + 1] as i32) - (b[idx + 1] as i32);
let d2 = (a[idx + 2] as i32) - (b[idx + 2] as i32);
let d3 = (a[idx + 3] as i32) - (b[idx + 3] as i32);
sum_sq += d0 * d0 + d1 * d1 + d2 * d2 + d3 * d3;
}
for i in (chunks * 4)..a.len() {
let diff = (a[i] as i32) - (b[i] as i32);
sum_sq += diff * diff;
}
sum_sq as f32
}
#[cfg(target_arch = "aarch64")]
#[inline(always)]
unsafe fn scalar_distance_neon(a: &[u8], b: &[u8]) -> f32 {
use std::arch::aarch64::*;
let len = a.len();
let a_ptr = a.as_ptr();
let b_ptr = b.as_ptr();
let mut sum = vdupq_n_s32(0);
let chunks = len / 8;
let mut idx = 0usize;
for _ in 0..chunks {
let va = vld1_u8(a_ptr.add(idx));
let vb = vld1_u8(b_ptr.add(idx));
let va_u16 = vmovl_u8(va);
let vb_u16 = vmovl_u8(vb);
let va_s16 = vreinterpretq_s16_u16(va_u16);
let vb_s16 = vreinterpretq_s16_u16(vb_u16);
let diff = vsubq_s16(va_s16, vb_s16);
let prod_lo = vmull_s16(vget_low_s16(diff), vget_low_s16(diff));
let prod_hi = vmull_s16(vget_high_s16(diff), vget_high_s16(diff));
sum = vaddq_s32(sum, prod_lo);
sum = vaddq_s32(sum, prod_hi);
idx += 8;
}
let mut total = vaddvq_s32(sum);
for i in (chunks * 8)..len {
let diff = (*a.get_unchecked(i) as i32) - (*b.get_unchecked(i) as i32);
total += diff * diff;
}
total as f32
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2")]
#[inline]
unsafe fn scalar_distance_avx2(a: &[u8], b: &[u8]) -> f32 {
use std::arch::x86_64::*;
let len = a.len();
let mut sum = _mm256_setzero_si256();
let chunks = len / 16;
for i in 0..chunks {
let idx = i * 16;
let va = _mm_loadu_si128(a.as_ptr().add(idx) as *const __m128i);
let vb = _mm_loadu_si128(b.as_ptr().add(idx) as *const __m128i);
let va_lo = _mm256_cvtepu8_epi16(va);
let vb_lo = _mm256_cvtepu8_epi16(vb);
let diff = _mm256_sub_epi16(va_lo, vb_lo);
let prod = _mm256_madd_epi16(diff, diff);
sum = _mm256_add_epi32(sum, prod);
}
let sum_lo = _mm256_castsi256_si128(sum);
let sum_hi = _mm256_extracti128_si256(sum, 1);
let sum_128 = _mm_add_epi32(sum_lo, sum_hi);
let shuffle = _mm_shuffle_epi32(sum_128, 0b10_11_00_01);
let sum_64 = _mm_add_epi32(sum_128, shuffle);
let shuffle2 = _mm_shuffle_epi32(sum_64, 0b00_00_10_10);
let final_sum = _mm_add_epi32(sum_64, shuffle2);
let mut total = _mm_cvtsi128_si32(final_sum);
for i in (chunks * 16)..len {
let diff = (a[i] as i32) - (b[i] as i32);
total += diff * diff;
}
total as f32
}
fn euclidean_squared(a: &[f32], b: &[f32]) -> f32 {
a.iter()
.zip(b)
.map(|(&x, &y)| {
let diff = x - y;
diff * diff
})
.sum()
}
fn kmeans_clustering(vectors: &[Vec<f32>], k: usize, iterations: usize) -> Vec<Vec<f32>> {
use rand::seq::SliceRandom;
use rand::thread_rng;
let mut rng = thread_rng();
let mut centroids: Vec<Vec<f32>> = vectors.choose_multiple(&mut rng, k).cloned().collect();
for _ in 0..iterations {
let mut assignments = vec![Vec::new(); k];
for vector in vectors {
let nearest = centroids
.iter()
.enumerate()
.min_by(|(_, a), (_, b)| {
let dist_a = euclidean_squared(vector, a);
let dist_b = euclidean_squared(vector, b);
dist_a.partial_cmp(&dist_b).unwrap()
})
.map(|(idx, _)| idx)
.unwrap_or(0);
assignments[nearest].push(vector.clone());
}
for (centroid, assigned) in centroids.iter_mut().zip(&assignments) {
if !assigned.is_empty() {
let dim = centroid.len();
*centroid = vec![0.0; dim];
for vector in assigned {
for (i, &v) in vector.iter().enumerate() {
centroid[i] += v;
}
}
let count = assigned.len() as f32;
for v in centroid.iter_mut() {
*v /= count;
}
}
}
}
centroids
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "popcnt")]
#[inline]
unsafe fn hamming_distance_simd_x86(a: &[u8], b: &[u8]) -> u32 {
use std::arch::x86_64::*;
let mut distance = 0u64;
let chunks_a = a.chunks_exact(8);
let chunks_b = b.chunks_exact(8);
let remainder_a = chunks_a.remainder();
let remainder_b = chunks_b.remainder();
for (chunk_a, chunk_b) in chunks_a.zip(chunks_b) {
let a_u64 = u64::from_le_bytes(chunk_a.try_into().unwrap());
let b_u64 = u64::from_le_bytes(chunk_b.try_into().unwrap());
distance += _popcnt64((a_u64 ^ b_u64) as i64) as u64;
}
for (&a_byte, &b_byte) in remainder_a.iter().zip(remainder_b) {
distance += (a_byte ^ b_byte).count_ones() as u64;
}
distance as u32
}
#[cfg(target_arch = "aarch64")]
#[inline(always)]
unsafe fn hamming_distance_neon(a: &[u8], b: &[u8]) -> u32 {
use std::arch::aarch64::*;
let len = a.len();
let a_ptr = a.as_ptr();
let b_ptr = b.as_ptr();
let chunks = len / 16;
let mut idx = 0usize;
let mut sum = vdupq_n_u8(0);
for _ in 0..chunks {
let a_vec = vld1q_u8(a_ptr.add(idx));
let b_vec = vld1q_u8(b_ptr.add(idx));
let xor_result = veorq_u8(a_vec, b_vec);
let bits = vcntq_u8(xor_result);
sum = vaddq_u8(sum, bits);
idx += 16;
}
let sum_val = vaddvq_u8(sum) as u32;
let mut remainder_sum = 0u32;
let start = chunks * 16;
for i in start..len {
remainder_sum += (*a.get_unchecked(i) ^ *b.get_unchecked(i)).count_ones();
}
sum_val + remainder_sum
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_scalar_quantization() {
let vector = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let quantized = ScalarQuantized::quantize(&vector);
let reconstructed = quantized.reconstruct();
for (orig, recon) in vector.iter().zip(&reconstructed) {
assert!((orig - recon).abs() < 0.1);
}
}
#[test]
fn test_binary_quantization() {
let vector = vec![1.0, -1.0, 2.0, -2.0, 0.5];
let quantized = BinaryQuantized::quantize(&vector);
assert_eq!(quantized.dimensions, 5);
assert_eq!(quantized.bits.len(), 1); }
#[test]
fn test_binary_distance() {
let v1 = vec![1.0, 1.0, 1.0, 1.0];
let v2 = vec![1.0, 1.0, -1.0, -1.0];
let q1 = BinaryQuantized::quantize(&v1);
let q2 = BinaryQuantized::quantize(&v2);
let dist = q1.distance(&q2);
assert_eq!(dist, 2.0); }
#[test]
fn test_scalar_quantization_roundtrip() {
let test_vectors = vec![
vec![1.0, 2.0, 3.0, 4.0, 5.0],
vec![-10.0, -5.0, 0.0, 5.0, 10.0],
vec![0.1, 0.2, 0.3, 0.4, 0.5],
vec![100.0, 200.0, 300.0, 400.0, 500.0],
];
for vector in test_vectors {
let quantized = ScalarQuantized::quantize(&vector);
let reconstructed = quantized.reconstruct();
assert_eq!(vector.len(), reconstructed.len());
for (orig, recon) in vector.iter().zip(reconstructed.iter()) {
let max = vector.iter().copied().fold(f32::NEG_INFINITY, f32::max);
let min = vector.iter().copied().fold(f32::INFINITY, f32::min);
let max_error = (max - min) / 255.0 * 2.0;
assert!(
(orig - recon).abs() < max_error,
"Roundtrip error too large: orig={}, recon={}, error={}",
orig,
recon,
(orig - recon).abs()
);
}
}
}
#[test]
fn test_scalar_distance_symmetry() {
let v1 = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let v2 = vec![2.0, 3.0, 4.0, 5.0, 6.0];
let q1 = ScalarQuantized::quantize(&v1);
let q2 = ScalarQuantized::quantize(&v2);
let dist_ab = q1.distance(&q2);
let dist_ba = q2.distance(&q1);
assert!(
(dist_ab - dist_ba).abs() < 0.01,
"Distance is not symmetric: d(a,b)={}, d(b,a)={}",
dist_ab,
dist_ba
);
}
#[test]
fn test_scalar_distance_different_scales() {
let v1 = vec![1.0, 2.0, 3.0, 4.0, 5.0]; let v2 = vec![10.0, 20.0, 30.0, 40.0, 50.0];
let q1 = ScalarQuantized::quantize(&v1);
let q2 = ScalarQuantized::quantize(&v2);
let dist_ab = q1.distance(&q2);
let dist_ba = q2.distance(&q1);
assert!(
(dist_ab - dist_ba).abs() < 0.01,
"Distance with different scales not symmetric: d(a,b)={}, d(b,a)={}",
dist_ab,
dist_ba
);
}
#[test]
fn test_scalar_quantization_edge_cases() {
let same_values = vec![5.0, 5.0, 5.0, 5.0];
let quantized = ScalarQuantized::quantize(&same_values);
let reconstructed = quantized.reconstruct();
for (orig, recon) in same_values.iter().zip(reconstructed.iter()) {
assert!((orig - recon).abs() < 0.01);
}
let extreme = vec![f32::MIN / 1e10, 0.0, f32::MAX / 1e10];
let quantized = ScalarQuantized::quantize(&extreme);
let reconstructed = quantized.reconstruct();
assert_eq!(extreme.len(), reconstructed.len());
}
#[test]
fn test_binary_distance_symmetry() {
let v1 = vec![1.0, -1.0, 1.0, -1.0];
let v2 = vec![1.0, 1.0, -1.0, -1.0];
let q1 = BinaryQuantized::quantize(&v1);
let q2 = BinaryQuantized::quantize(&v2);
let dist_ab = q1.distance(&q2);
let dist_ba = q2.distance(&q1);
assert_eq!(
dist_ab, dist_ba,
"Binary distance not symmetric: d(a,b)={}, d(b,a)={}",
dist_ab, dist_ba
);
}
#[test]
fn test_int4_quantization() {
let vector = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let quantized = Int4Quantized::quantize(&vector);
let reconstructed = quantized.reconstruct();
assert_eq!(quantized.dimensions, 5);
assert_eq!(quantized.data.len(), 3);
for (orig, recon) in vector.iter().zip(&reconstructed) {
let max_error = (5.0 - 1.0) / 15.0 * 2.0;
assert!(
(orig - recon).abs() < max_error,
"Int4 roundtrip error too large: orig={}, recon={}",
orig,
recon
);
}
}
#[test]
fn test_int4_distance() {
let v1 = vec![0.0, 5.0, 10.0, 15.0];
let v2 = vec![0.0, 3.0, 12.0, 15.0];
let q1 = Int4Quantized::quantize(&v1);
let q2 = Int4Quantized::quantize(&v2);
let dist = q1.distance(&q2);
assert!(
dist > 0.0,
"Distance should be positive, got {}. q1.data={:?}, q2.data={:?}",
dist,
q1.data,
q2.data
);
}
#[test]
fn test_int4_distance_symmetry() {
let v1 = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let v2 = vec![2.0, 3.0, 4.0, 5.0, 6.0];
let q1 = Int4Quantized::quantize(&v1);
let q2 = Int4Quantized::quantize(&v2);
let dist_ab = q1.distance(&q2);
let dist_ba = q2.distance(&q1);
assert!(
(dist_ab - dist_ba).abs() < 0.01,
"Int4 distance not symmetric: d(a,b)={}, d(b,a)={}",
dist_ab,
dist_ba
);
}
#[test]
fn test_int4_compression_ratio() {
assert_eq!(Int4Quantized::compression_ratio(), 8.0);
}
#[test]
fn test_binary_fast_hamming() {
let a = vec![0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xAA];
let b = vec![0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x55];
let distance = BinaryQuantized::hamming_distance_fast(&a, &b);
assert_eq!(distance, 72);
}
#[test]
fn test_binary_similarity() {
let v1 = vec![1.0; 8]; let v2 = vec![1.0; 8];
let q1 = BinaryQuantized::quantize(&v1);
let q2 = BinaryQuantized::quantize(&v2);
let sim = q1.similarity(&q2);
assert!(
(sim - 1.0).abs() < 0.001,
"Same vectors should have similarity 1.0"
);
}
#[test]
fn test_binary_compression_ratio() {
assert_eq!(BinaryQuantized::compression_ratio(), 32.0);
}
}