Skip to main content

ProductQuantizer

Struct ProductQuantizer 

Source
pub struct ProductQuantizer<S: EmbeddingSpace, const M: usize, const NBITS: usize>
where [(); { _ }]:,
{ /* private fields */ }
Expand description

Product Quantizer for vector compression and fast distance computation.

Product Quantization splits a D-dimensional vector into M subvectors, learns a codebook for each subspace via k-means, and encodes each subvector as the index of its nearest centroid.

Const generics:

  • M: number of subquantizers
  • NBITS: bits per centroid index (determines storage and centroid count)

The embedding dimension must be divisible by M.

After training, ProductQuantizer implements EmbeddingSpace with EmbeddingData = PQCode<M, NBITS>, allowing direct use with FlatIndex and other structures.

This implementation supports:

  • Training via k-means++ on each subspace
  • Encoding: vector -> PQCode<M, NBITS>
  • Decoding: PQCode<M, NBITS> -> reconstructed vector
  • ADC: Asymmetric Distance Computation via precomputed distance tables
  • SDC: Symmetric Distance Computation via precomputed centroid-to-centroid distances

Note: PQ internally uses L2 distance for subspace quantization.

Implementations§

Source§

impl<S: EmbeddingSpace, const M: usize, const NBITS: usize> ProductQuantizer<S, M, NBITS>
where [(); { _ }]:, <S::EmbeddingData as Embedding>::Scalar: Into<f32> + From<f32>,

Source

pub const KSUB: usize

Number of centroids per subspace (2^NBITS)

Source

pub fn new(space: S) -> Self

Create a new Product Quantizer.

§Arguments
  • space - The embedding space
§Panics

Panics if dimension is not divisible by M.

Source

pub fn space(&self) -> &S

Get a reference to the underlying embedding space.

Source

pub fn m(&self) -> usize

Number of subquantizers.

Source

pub fn ksub(&self) -> usize

Number of centroids per subspace.

Source

pub fn dsub(&self) -> usize

Source

pub fn encode_embedding( &mut self, embedding: &S::EmbeddingData, ) -> PQCode<M, NBITS>

Encode a single embedding to a PQ code.

Source

pub fn decode_code(&self, code: &PQCode<M, NBITS>) -> S::EmbeddingData

Decode a PQ code to a reconstructed embedding.

Source

pub fn train_on(&mut self, data: &[S::EmbeddingData])

Train the quantizer on a dataset using k-means++.

Source

pub fn build_distance_table( &mut self, query: &S::EmbeddingData, ) -> PQDistanceTable<S, M, NBITS>

Build a distance table for ADC (Asymmetric Distance Computation).

Source

pub fn sdc_table(&self) -> Option<&SDCTable<M, NBITS>>

Get the SDC table for symmetric distance computation.

Returns None if the quantizer is not trained.

Source§

impl<S: EmbeddingSpace, const M: usize, const NBITS: usize> ProductQuantizer<S, M, NBITS>
where [(); { _ }]:, <S::EmbeddingData as Embedding>::Scalar: Into<f32> + From<f32>,

Source

pub fn save_codebook(&self, path: &Path) -> Result<(), PQError>

Save the trained codebook to a file.

Source

pub fn load_codebook(path: &Path, space: S) -> Result<Self, PQError>

Load a trained codebook from a file and reconstruct the ProductQuantizer.

Trait Implementations§

Source§

impl<S: Clone + EmbeddingSpace, const M: usize, const NBITS: usize> Clone for ProductQuantizer<S, M, NBITS>
where [(); { _ }]:,

Source§

fn clone(&self) -> ProductQuantizer<S, M, NBITS>

Returns a duplicate of the value. Read more
1.0.0 · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
Source§

impl<S: EmbeddingSpace, const M: usize, const NBITS: usize> Codec<S> for ProductQuantizer<S, M, NBITS>
where [(); { _ }]:, <S::EmbeddingData as Embedding>::Scalar: Into<f32> + From<f32>,

Source§

type Encoded = PQCode<M, NBITS>

The compressed representation produced by encode.
Source§

type EncodeRef<'b> = EagerOpRef<PQCode<M, NBITS>, PQError> where Self: 'b

Handle to an in-flight encode operation.
Source§

type DecodeRef<'b> = EagerOpRef<<S as EmbeddingSpace>::EmbeddingData, PQError> where Self: 'b

Handle to an in-flight decode operation.
Source§

type TrainRef<'b> = EagerOpRef<(), PQError> where Self: 'b

Handle to an in-flight train operation.
Source§

type ObserveRef<'b> = EagerOpRef<(), PQError> where Self: 'b

Handle to an in-flight observe operation.
Source§

fn encode(&mut self, embedding: &S::EmbeddingData) -> Self::EncodeRef<'_>

Compress a single embedding.
Source§

fn encode_batch( &mut self, embeddings: &[S::EmbeddingData], ) -> Vec<Self::EncodeRef<'_>>

Compress a batch of embeddings.
Source§

fn decode(&self, encoded: &Self::Encoded) -> Self::DecodeRef<'_>

Reconstruct an embedding from its compressed form.
Source§

fn decode_batch(&self, encoded: &[Self::Encoded]) -> Vec<Self::DecodeRef<'_>>

Reconstruct a batch of embeddings from their compressed forms.
Source§

fn code_size(&self) -> Option<usize>

Returns the fixed byte-size of an encoded vector, if the encoding is fixed-size. Variable-length encodings return None.
Source§

fn train(&mut self, embeddings: &[S::EmbeddingData]) -> Self::TrainRef<'_>

Batch training from a set of embeddings. Read more
Source§

fn observe(&mut self, _embedding: &S::EmbeddingData) -> Self::ObserveRef<'_>

Online/incremental update from a single observation.
Source§

fn observe_batch( &mut self, embeddings: &[S::EmbeddingData], ) -> Vec<Self::ObserveRef<'_>>

Online/incremental update from a batch of observations.
Source§

fn is_trained(&self) -> bool

Whether the codec has been trained and is ready to encode/decode.
Source§

impl<S: EmbeddingSpace, const M: usize, const NBITS: usize> Debug for ProductQuantizer<S, M, NBITS>
where [(); { _ }]:,

Source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more
Source§

impl<S: EmbeddingSpace + Default, const M: usize, const NBITS: usize> Default for ProductQuantizer<S, M, NBITS>
where [(); { _ }]:, <S::EmbeddingData as Embedding>::Scalar: Into<f32> + From<f32>,

Source§

fn default() -> Self

Returns the “default value” for a type. Read more
Source§

impl<S: EmbeddingSpace, const M: usize, const NBITS: usize> EmbeddingSpace for ProductQuantizer<S, M, NBITS>
where [(); { _ }]:, <S::EmbeddingData as Embedding>::Scalar: Into<f32> + From<f32>,

Source§

type EmbeddingData = PQCode<M, NBITS>

Source§

type DistanceValue = <S as EmbeddingSpace>::DistanceValue

Source§

type Prepared = PQCode<M, NBITS>

Precomputed state for efficient distance queries.
Source§

fn space_id(&self) -> &'static str

Source§

fn distance( &self, lhs: &Self::EmbeddingData, rhs: &Self::EmbeddingData, ) -> Self::DistanceValue

Compute distance between two embeddings.
Source§

fn prepare(&self, embedding: &Self::EmbeddingData) -> Self::Prepared

Prepare a query embedding for efficient repeated distance computations.
Source§

fn distance_prepared( &self, prepared: &Self::Prepared, target: &Self::EmbeddingData, ) -> Self::DistanceValue

Compute distance using prepared query state.
Source§

fn length() -> usize

Source§

fn slice_distance(a: &[f32], b: &[f32]) -> f32

Compute the space’s distance metric on raw scalar slices of arbitrary length. Read more
Source§

fn infinite_mapping(native_distance: &Self::DistanceValue) -> f32

Maps a finite distance range to an infinite range (e.g., tan(pi * x / 4))
Source§

fn create_embedding( data: Vec<<Self::EmbeddingData as Embedding>::Scalar>, ) -> Self::EmbeddingData

Source§

fn create_distance(dist: f32) -> Self::DistanceValue

Source§

fn zero_vector() -> Self::EmbeddingData

Source§

fn zero_distance() -> Self::DistanceValue

Source§

impl<S: EmbeddingSpace, const M: usize, const NBITS: usize> PartialEq for ProductQuantizer<S, M, NBITS>
where [(); { _ }]:,

Source§

fn eq(&self, other: &Self) -> bool

Tests for self and other values to be equal, and is used by ==.
1.0.0 · Source§

fn ne(&self, other: &Rhs) -> bool

Tests for !=. The default implementation is almost always sufficient, and should not be overridden without very good reason.
Source§

impl<S: EmbeddingSpace, const M: usize, const NBITS: usize> Eq for ProductQuantizer<S, M, NBITS>
where [(); { _ }]:,

Auto Trait Implementations§

§

impl<S, const M: usize, const NBITS: usize> Freeze for ProductQuantizer<S, M, NBITS>
where S: Freeze,

§

impl<S, const M: usize, const NBITS: usize> RefUnwindSafe for ProductQuantizer<S, M, NBITS>
where S: RefUnwindSafe,

§

impl<S, const M: usize, const NBITS: usize> Send for ProductQuantizer<S, M, NBITS>

§

impl<S, const M: usize, const NBITS: usize> Sync for ProductQuantizer<S, M, NBITS>

§

impl<S, const M: usize, const NBITS: usize> Unpin for ProductQuantizer<S, M, NBITS>
where S: Unpin,

§

impl<S, const M: usize, const NBITS: usize> UnsafeUnpin for ProductQuantizer<S, M, NBITS>
where S: UnsafeUnpin,

§

impl<S, const M: usize, const NBITS: usize> UnwindSafe for ProductQuantizer<S, M, NBITS>
where S: UnwindSafe,

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> CloneToUninit for T
where T: Clone,

Source§

unsafe fn clone_to_uninit(&self, dest: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)
Performs copy-assignment from self to dest. Read more
Source§

impl<Q, K> Equivalent<K> for Q
where Q: Eq + ?Sized, K: Borrow<Q> + ?Sized,

Source§

fn equivalent(&self, key: &K) -> bool

Compare self to key and return true if they are equal.
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> ToOwned for T
where T: Clone,

Source§

type Owned = T

The resulting type after obtaining ownership.
Source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
Source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
Source§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

Source§

fn vzip(self) -> V