pub struct ScalarQuantizer { /* private fields */ }Expand description
A central parameter collection for a scalar quantization schema.
§Example
An self-contained end-to-end example containing training, compression, and distance computations is shown below.
use diskann_quantization::{
AsFunctor, CompressInto,
distances,
num::Positive, bits::MutBitSlice,
scalar::{
self,
ScalarQuantizer,
train::ScalarQuantizationParameters,
CompensatedVector, MutCompensatedVectorRef,
CompensatedIP, CompensatedSquaredL2,
}
};
use diskann_utils::{views::Matrix, Reborrow, ReborrowMut};
use diskann_vector::DistanceFunction;
// A small training set consisting of two 5-dimensional vectors.
let mut data = Matrix::<f32>::new(0.0, 2, 5);
data.row_mut(0).copy_from_slice(&[-1.0, -1.0, -1.0, -1.0, -1.0]);
data.row_mut(1).copy_from_slice(&[1.0, 1.0, 1.0, 1.0, 1.0]);
let trainer = ScalarQuantizationParameters::new(Positive::new(1.0).unwrap());
let quantizer: ScalarQuantizer = trainer.train(data.as_view());
// The dimension of the quantizer is based on the dimension of the training data.
assert_eq!(quantizer.dim(), data.ncols());
// Compress the two input vectors.
// For one vector, we will use the "boxed" API. The other we will construct "manually".
// Boxed API
let mut c0 = CompensatedVector::<8>::new_boxed(data.ncols());
// Manual construction.
let mut buffer: Vec<u8> = vec![0; c0.vector().bytes()];
let mut compensation = scalar::Compensation(0.0);
let mut c1 = MutCompensatedVectorRef::new(
MutBitSlice::new(buffer.as_mut_slice(), data.ncols()).unwrap(),
&mut compensation
);
quantizer.compress_into(data.row(0), c0.reborrow_mut()).unwrap();
quantizer.compress_into(data.row(1), c1.reborrow_mut()).unwrap();
// Compute inner product.
let ip: CompensatedIP = quantizer.as_functor();
let distance: distances::Result<f32> = ip.evaluate_similarity(c0.reborrow(), c1.reborrow());
// The inner product computation to `f32` is the same as a SimilarityScore and is
// therefore negative of the mathematical value.
assert!((distance.unwrap() - 5.0).abs() < 0.00001);
// Compute squared eudlicean distance.
let l2: CompensatedSquaredL2 = quantizer.as_functor();
let distance: distances::Result<f32> = l2.evaluate_similarity(c0.reborrow(), c1.reborrow());
assert!((distance.unwrap() - 20.0).abs() < 0.00001);Implementations§
Source§impl ScalarQuantizer
impl ScalarQuantizer
Sourcepub fn new(scale: f32, shift: Vec<f32>, mean_norm: Option<f32>) -> Self
pub fn new(scale: f32, shift: Vec<f32>, mean_norm: Option<f32>) -> Self
Construct a new scalar quantizer.
Sourcepub fn dim(&self) -> usize
pub fn dim(&self) -> usize
Return the number dimensions this ScalarQuantizer has been trained for.
Sourcepub fn shift_square_norm(&self) -> f32
pub fn shift_square_norm(&self) -> f32
Return the square norm of the dataset shift.
Sourcepub fn shift(&self) -> &[f32]
pub fn shift(&self) -> &[f32]
Return the per-dimension shift vector.
This vector is meant to accomplish two goals:
- Centers the data around the training dataset mean.
- Offsets each dimension into a range that can be encoded in unsigned values.
Sourcepub fn rescale(&self, x: &mut [f32]) -> Result<(), MeanNormMissing>
pub fn rescale(&self, x: &mut [f32]) -> Result<(), MeanNormMissing>
Rescale the argument so it has the average norm of the training set.
This can be used to help with compression queries that come from a different distribution when the norm of the query may be safely discarded for purposes of distance computations.
This operation can fail is the mean norm was not computed during training.
Trait Implementations§
Source§impl AsFunctor<CompensatedCosineNormalized> for ScalarQuantizer
impl AsFunctor<CompensatedCosineNormalized> for ScalarQuantizer
fn as_functor(&self) -> CompensatedCosineNormalized
Source§impl AsFunctor<CompensatedIP> for ScalarQuantizer
impl AsFunctor<CompensatedIP> for ScalarQuantizer
fn as_functor(&self) -> CompensatedIP
Source§impl AsFunctor<CompensatedSquaredL2> for ScalarQuantizer
impl AsFunctor<CompensatedSquaredL2> for ScalarQuantizer
fn as_functor(&self) -> CompensatedSquaredL2
Source§impl Clone for ScalarQuantizer
impl Clone for ScalarQuantizer
Source§fn clone(&self) -> ScalarQuantizer
fn clone(&self) -> ScalarQuantizer
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read moreSource§impl<const NBITS: usize, T, Perm> CompressInto<&[T], BitSliceBase<NBITS, Unsigned, MutSlicePtr<'_, u8>, Perm>> for ScalarQuantizer
impl<const NBITS: usize, T, Perm> CompressInto<&[T], BitSliceBase<NBITS, Unsigned, MutSlicePtr<'_, u8>, Perm>> for ScalarQuantizer
Source§fn compress_into(
&self,
from: &[T],
into: MutBitSlice<'_, NBITS, Unsigned, Perm>,
) -> Result<(), Self::Error>
fn compress_into( &self, from: &[T], into: MutBitSlice<'_, NBITS, Unsigned, Perm>, ) -> Result<(), Self::Error>
Compress the input vector from into the bitslice into.
This method does not compute compensation coefficients required for fast inner product computations. If only L2 distances is desired, this method can be slightly faster.
§Error
Returns an error if the input contains NaN.
§Panics
Panics if:
from.len() != self.dim(): Vector to be compressed must have the same dimensionality as the quantizer.into.len() != self.dim(): Compressed vector must have the same dimensionality as the quantizer.
Source§type Error = InputContainsNaN
type Error = InputContainsNaN
Source§impl<const NBITS: usize, T, Perm> CompressInto<&[T], VectorBase<NBITS, Unsigned, MutSlicePtr<'_, u8>, Mut<'_, Compensation>, Perm>> for ScalarQuantizer
impl<const NBITS: usize, T, Perm> CompressInto<&[T], VectorBase<NBITS, Unsigned, MutSlicePtr<'_, u8>, Mut<'_, Compensation>, Perm>> for ScalarQuantizer
Source§fn compress_into(
&self,
from: &[T],
into: MutCompensatedVectorRef<'_, NBITS, Perm>,
) -> Result<(), Self::Error>
fn compress_into( &self, from: &[T], into: MutCompensatedVectorRef<'_, NBITS, Perm>, ) -> Result<(), Self::Error>
Compress the input vector from into the bitslice into.
This method computes and stores the compensation coefficient required for fast inner product computations.
§Error
Returns an error if the input contains NaN.
§Panics
Panics if:
from.len() != self.dim(): Vector to be compressed must have the same dimensionality as the quantizer.into.len() != self.dim(): Compressed vector must have the same dimensionality as the quantizer.
Source§type Error = InputContainsNaN
type Error = InputContainsNaN
Auto Trait Implementations§
impl Freeze for ScalarQuantizer
impl RefUnwindSafe for ScalarQuantizer
impl Send for ScalarQuantizer
impl Sync for ScalarQuantizer
impl Unpin for ScalarQuantizer
impl UnsafeUnpin for ScalarQuantizer
impl UnwindSafe for ScalarQuantizer
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more