pub struct ProductQuantizer<S: EmbeddingSpace, const M: usize, const NBITS: usize>{ /* private fields */ }Expand description
Product Quantizer for vector compression and fast distance computation.
Product Quantization splits a D-dimensional vector into M subvectors, learns a codebook for each subspace via k-means, and encodes each subvector as the index of its nearest centroid.
Const generics:
- M: number of subquantizers
- NBITS: bits per centroid index (determines storage and centroid count)
The embedding dimension must be divisible by M.
After training, ProductQuantizer implements EmbeddingSpace with
EmbeddingData = PQCode<M, NBITS>, allowing direct use with FlatIndex
and other structures.
This implementation supports:
- Training via k-means++ on each subspace
- Encoding: vector -> PQCode<M, NBITS>
- Decoding: PQCode<M, NBITS> -> reconstructed vector
- ADC: Asymmetric Distance Computation via precomputed distance tables
- SDC: Symmetric Distance Computation via precomputed centroid-to-centroid distances
Note: PQ internally uses L2 distance for subspace quantization.
Implementations§
Source§impl<S: EmbeddingSpace, const M: usize, const NBITS: usize> ProductQuantizer<S, M, NBITS>
impl<S: EmbeddingSpace, const M: usize, const NBITS: usize> ProductQuantizer<S, M, NBITS>
pub fn dsub(&self) -> usize
Sourcepub fn encode_embedding(
&mut self,
embedding: &S::EmbeddingData,
) -> PQCode<M, NBITS>
pub fn encode_embedding( &mut self, embedding: &S::EmbeddingData, ) -> PQCode<M, NBITS>
Encode a single embedding to a PQ code.
Sourcepub fn decode_code(&self, code: &PQCode<M, NBITS>) -> S::EmbeddingData
pub fn decode_code(&self, code: &PQCode<M, NBITS>) -> S::EmbeddingData
Decode a PQ code to a reconstructed embedding.
Sourcepub fn train_on(&mut self, data: &[S::EmbeddingData])
pub fn train_on(&mut self, data: &[S::EmbeddingData])
Train the quantizer on a dataset using k-means++.
Sourcepub fn build_distance_table(
&mut self,
query: &S::EmbeddingData,
) -> PQDistanceTable<S, M, NBITS>
pub fn build_distance_table( &mut self, query: &S::EmbeddingData, ) -> PQDistanceTable<S, M, NBITS>
Build a distance table for ADC (Asymmetric Distance Computation).
Source§impl<S: EmbeddingSpace, const M: usize, const NBITS: usize> ProductQuantizer<S, M, NBITS>
impl<S: EmbeddingSpace, const M: usize, const NBITS: usize> ProductQuantizer<S, M, NBITS>
Trait Implementations§
Source§impl<S: Clone + EmbeddingSpace, const M: usize, const NBITS: usize> Clone for ProductQuantizer<S, M, NBITS>
impl<S: Clone + EmbeddingSpace, const M: usize, const NBITS: usize> Clone for ProductQuantizer<S, M, NBITS>
Source§fn clone(&self) -> ProductQuantizer<S, M, NBITS>
fn clone(&self) -> ProductQuantizer<S, M, NBITS>
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read moreSource§impl<S: EmbeddingSpace, const M: usize, const NBITS: usize> Codec<S> for ProductQuantizer<S, M, NBITS>
impl<S: EmbeddingSpace, const M: usize, const NBITS: usize> Codec<S> for ProductQuantizer<S, M, NBITS>
Source§type EncodeRef<'b> = EagerOpRef<PQCode<M, NBITS>, PQError>
where
Self: 'b
type EncodeRef<'b> = EagerOpRef<PQCode<M, NBITS>, PQError> where Self: 'b
Source§type DecodeRef<'b> = EagerOpRef<<S as EmbeddingSpace>::EmbeddingData, PQError>
where
Self: 'b
type DecodeRef<'b> = EagerOpRef<<S as EmbeddingSpace>::EmbeddingData, PQError> where Self: 'b
Source§type TrainRef<'b> = EagerOpRef<(), PQError>
where
Self: 'b
type TrainRef<'b> = EagerOpRef<(), PQError> where Self: 'b
Source§type ObserveRef<'b> = EagerOpRef<(), PQError>
where
Self: 'b
type ObserveRef<'b> = EagerOpRef<(), PQError> where Self: 'b
Source§fn encode(&mut self, embedding: &S::EmbeddingData) -> Self::EncodeRef<'_>
fn encode(&mut self, embedding: &S::EmbeddingData) -> Self::EncodeRef<'_>
Source§fn encode_batch(
&mut self,
embeddings: &[S::EmbeddingData],
) -> Vec<Self::EncodeRef<'_>>
fn encode_batch( &mut self, embeddings: &[S::EmbeddingData], ) -> Vec<Self::EncodeRef<'_>>
Source§fn decode(&self, encoded: &Self::Encoded) -> Self::DecodeRef<'_>
fn decode(&self, encoded: &Self::Encoded) -> Self::DecodeRef<'_>
Source§fn decode_batch(&self, encoded: &[Self::Encoded]) -> Vec<Self::DecodeRef<'_>>
fn decode_batch(&self, encoded: &[Self::Encoded]) -> Vec<Self::DecodeRef<'_>>
Source§fn code_size(&self) -> Option<usize>
fn code_size(&self) -> Option<usize>
None.Source§fn train(&mut self, embeddings: &[S::EmbeddingData]) -> Self::TrainRef<'_>
fn train(&mut self, embeddings: &[S::EmbeddingData]) -> Self::TrainRef<'_>
Source§fn observe(&mut self, _embedding: &S::EmbeddingData) -> Self::ObserveRef<'_>
fn observe(&mut self, _embedding: &S::EmbeddingData) -> Self::ObserveRef<'_>
Source§fn observe_batch(
&mut self,
embeddings: &[S::EmbeddingData],
) -> Vec<Self::ObserveRef<'_>>
fn observe_batch( &mut self, embeddings: &[S::EmbeddingData], ) -> Vec<Self::ObserveRef<'_>>
Source§fn is_trained(&self) -> bool
fn is_trained(&self) -> bool
Source§impl<S: EmbeddingSpace, const M: usize, const NBITS: usize> Debug for ProductQuantizer<S, M, NBITS>
impl<S: EmbeddingSpace, const M: usize, const NBITS: usize> Debug for ProductQuantizer<S, M, NBITS>
Source§impl<S: EmbeddingSpace + Default, const M: usize, const NBITS: usize> Default for ProductQuantizer<S, M, NBITS>
impl<S: EmbeddingSpace + Default, const M: usize, const NBITS: usize> Default for ProductQuantizer<S, M, NBITS>
Source§impl<S: EmbeddingSpace, const M: usize, const NBITS: usize> EmbeddingSpace for ProductQuantizer<S, M, NBITS>
impl<S: EmbeddingSpace, const M: usize, const NBITS: usize> EmbeddingSpace for ProductQuantizer<S, M, NBITS>
type EmbeddingData = PQCode<M, NBITS>
type DistanceValue = <S as EmbeddingSpace>::DistanceValue
fn space_id(&self) -> &'static str
Source§fn distance(
&self,
lhs: &Self::EmbeddingData,
rhs: &Self::EmbeddingData,
) -> Self::DistanceValue
fn distance( &self, lhs: &Self::EmbeddingData, rhs: &Self::EmbeddingData, ) -> Self::DistanceValue
Source§fn prepare(&self, embedding: &Self::EmbeddingData) -> Self::Prepared
fn prepare(&self, embedding: &Self::EmbeddingData) -> Self::Prepared
Source§fn distance_prepared(
&self,
prepared: &Self::Prepared,
target: &Self::EmbeddingData,
) -> Self::DistanceValue
fn distance_prepared( &self, prepared: &Self::Prepared, target: &Self::EmbeddingData, ) -> Self::DistanceValue
fn length() -> usize
Source§fn slice_distance(a: &[f32], b: &[f32]) -> f32
fn slice_distance(a: &[f32], b: &[f32]) -> f32
Source§fn infinite_mapping(native_distance: &Self::DistanceValue) -> f32
fn infinite_mapping(native_distance: &Self::DistanceValue) -> f32
fn create_embedding( data: Vec<<Self::EmbeddingData as Embedding>::Scalar>, ) -> Self::EmbeddingData
fn create_distance(dist: f32) -> Self::DistanceValue
fn zero_vector() -> Self::EmbeddingData
fn zero_distance() -> Self::DistanceValue
Source§impl<S: EmbeddingSpace, const M: usize, const NBITS: usize> PartialEq for ProductQuantizer<S, M, NBITS>
impl<S: EmbeddingSpace, const M: usize, const NBITS: usize> PartialEq for ProductQuantizer<S, M, NBITS>
impl<S: EmbeddingSpace, const M: usize, const NBITS: usize> Eq for ProductQuantizer<S, M, NBITS>
Auto Trait Implementations§
impl<S, const M: usize, const NBITS: usize> Freeze for ProductQuantizer<S, M, NBITS>where
S: Freeze,
impl<S, const M: usize, const NBITS: usize> RefUnwindSafe for ProductQuantizer<S, M, NBITS>where
S: RefUnwindSafe,
impl<S, const M: usize, const NBITS: usize> Send for ProductQuantizer<S, M, NBITS>
impl<S, const M: usize, const NBITS: usize> Sync for ProductQuantizer<S, M, NBITS>
impl<S, const M: usize, const NBITS: usize> Unpin for ProductQuantizer<S, M, NBITS>where
S: Unpin,
impl<S, const M: usize, const NBITS: usize> UnsafeUnpin for ProductQuantizer<S, M, NBITS>where
S: UnsafeUnpin,
impl<S, const M: usize, const NBITS: usize> UnwindSafe for ProductQuantizer<S, M, NBITS>where
S: UnwindSafe,
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<Q, K> Equivalent<K> for Q
impl<Q, K> Equivalent<K> for Q
Source§fn equivalent(&self, key: &K) -> bool
fn equivalent(&self, key: &K) -> bool
key and return true if they are equal.