use std::io;
use super::QuantizationCodec;
#[derive(Debug, Clone)]
pub struct BinaryQuantizedVector {
pub data: Vec<u8>,
dimension: usize,
}
impl BinaryQuantizedVector {
#[must_use]
pub fn from_f32(vector: &[f32]) -> Self {
assert!(!vector.is_empty(), "Cannot quantize empty vector");
let dimension = vector.len();
let num_bytes = dimension.div_ceil(8);
let mut data = vec![0u8; num_bytes];
for (i, &value) in vector.iter().enumerate() {
if value >= 0.0 {
let byte_idx = i / 8;
let bit_idx = i % 8;
data[byte_idx] |= 1 << bit_idx;
}
}
Self { data, dimension }
}
#[must_use]
pub fn dimension(&self) -> usize {
self.dimension
}
#[must_use]
pub fn memory_size(&self) -> usize {
self.data.len()
}
#[must_use]
pub fn get_bits(&self) -> Vec<bool> {
(0..self.dimension)
.map(|i| {
let byte_idx = i / 8;
let bit_idx = i % 8;
(self.data[byte_idx] >> bit_idx) & 1 == 1
})
.collect()
}
#[must_use]
pub fn hamming_distance(&self, other: &Self) -> u32 {
debug_assert_eq!(
self.dimension, other.dimension,
"Dimension mismatch in hamming_distance"
);
self.data
.iter()
.zip(other.data.iter())
.map(|(&a, &b)| (a ^ b).count_ones())
.sum()
}
#[must_use]
#[allow(clippy::cast_precision_loss)]
pub fn hamming_similarity(&self, other: &Self) -> f32 {
let distance = self.hamming_distance(other);
1.0 - (distance as f32 / self.dimension as f32)
}
}
impl QuantizationCodec for BinaryQuantizedVector {
fn to_bytes(&self) -> Vec<u8> {
assert!(
u32::try_from(self.dimension).is_ok(),
"BinaryQuantizedVector dimension {} exceeds u32::MAX for serialization",
self.dimension
);
let mut bytes = Vec::with_capacity(4 + self.data.len());
#[allow(clippy::cast_possible_truncation)]
bytes.extend_from_slice(&(self.dimension as u32).to_le_bytes());
bytes.extend_from_slice(&self.data);
bytes
}
fn from_bytes(bytes: &[u8]) -> io::Result<Self> {
if bytes.len() < 4 {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Not enough bytes for BinaryQuantizedVector header",
));
}
#[allow(clippy::cast_possible_truncation)]
let dimension = u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]) as usize;
let expected_data_len = dimension.div_ceil(8);
if bytes.len() < 4 + expected_data_len {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!(
"Not enough bytes for BinaryQuantizedVector data: expected {}, got {}",
4 + expected_data_len,
bytes.len()
),
));
}
let data = bytes[4..4 + expected_data_len].to_vec();
Ok(Self { data, dimension })
}
}