Skip to main content

diskann_quantization/scalar/
mod.rs

1/*
2 * Copyright (c) Microsoft Corporation.
3 * Licensed under the MIT license.
4 */
5
6//! Scalar quantization training, compression data, and distance comparisons.
7//!
8//! Scalar quantization works by converting floating point values to small n-bit integers
9//! using the formula
10//! ```math
11//! X' = round((X - B) / a).clamp(0, 2^n - 1)
12//! ```
13//! where `B` is a shift vector and `a` is a scaling parameter. The training algorithm is this:
14//!
15//! 1. Find the mean vector M of the training data set.
16//! 2. Compute the standard deviation of each dimension and find `stdmax`: the maximum
17//!    standard deviation.
18//! 3. The parameter `a` is then `(2 * S * stdmax) / (2^n - 1)` where `S` is configurable
19//!    in [`train::ScalarQuantizationParameters`].
20//! 4. `B` is finally computed as `M - S * stdmax`.
21//!
22//! Compression then via [`ScalarQuantizer`] simply consists of applying the above formula
23//! to each vector.
24//!
25//! # Training
26//!
27//! Training is provided by the [`train::ScalarQuantizationParameters`] struct.
28//! See the struct level documentation for more details.
29//!
30//! # Compensated Vectors
31//!
32//! Scalar compression involves data shifts by both the dataset mean and to shift the
33//! representable range into a domain representable by an unsigned integer.
34//!
35//! This does not preserve inner-products at all and can result in a scaled value for
36//! L2 computations.
37//!
38//! To deal with this, we can use [`CompensatedVector`]s, which consist of both the scalar
39//! quantization integer codes and the scaled inner product of the compressed value with the
40//! data set mean. This enables inner-products to be computed efficiently and correctly, at
41//! the cost of some extra storage.
42//!
43//! See the struct-level documentation for more information as well as the documentation
44//! for distance function objects:
45//!
46//! * [`CompensatedSquaredL2`]
47//! * [`CompensatedIP`]
48//!
49//! # Example
50//!
51//! In this example, we will do the following:
52//!
53//! 1. Create a sample data set with normally distributed data using random offsets.
54//! 2. Train a 4-bit scalar quantizer on this data.
55//! 3. Compress elements of the dataset using the quantizer.
56//! 4. Perform distance computations using the compressed vectors.
57//!
58//! ```
59//! use diskann_quantization::{
60//!     AsFunctor, CompressInto,
61//!     distances,
62//!     scalar::{self, train, CompensatedVector, CompensatedIP, CompensatedSquaredL2},
63//!     num::Positive,
64//! };
65//! use diskann_utils::{Reborrow, ReborrowMut, views::Matrix};
66//! use rand::{rngs::StdRng, SeedableRng, distr::Distribution};
67//! use rand_distr::StandardNormal;
68//! use diskann_vector::{PureDistanceFunction, DistanceFunction, distance};
69//!
70//! let dim = 20;
71//! let nvectors = 100;
72//! let distribution = StandardNormal;
73//! let mut rng = StdRng::seed_from_u64(0xc674c06f4f8013f7);
74//! // Construct a set of offsets for each dimension.
75//! let offset: Vec<f32> = (0..dim).map(|_| distribution.sample(&mut rng)).collect();
76//! // The output dataset.
77//! let mut data = Matrix::<f32>::new(0.0, nvectors, dim);
78//! for row in data.row_iter_mut() {
79//!     std::iter::zip(row.iter_mut(), offset.iter()).for_each(|(r, i)| {
80//!         let v: f32 = distribution.sample(&mut rng);
81//!         *r = i + v;
82//!     });
83//! }
84//!
85//! // Create parameters for 4-bit compression.
86//! // Here, we use 2.0 standard deviations are our cutoff.
87//! let p = train::ScalarQuantizationParameters::new(Positive::new(2.0).unwrap());
88//!
89//! // Train a quantizer.
90//! let quantizer: scalar::ScalarQuantizer = p.train(data.as_view());
91//!
92//! // Compress vectors 0 and 1 into their scalar quantized representation.
93//! let mut v0 = CompensatedVector::<4>::new_boxed(quantizer.dim());
94//! let mut v1 = CompensatedVector::<4>::new_boxed(quantizer.dim());
95//! quantizer.compress_into(data.row(0), v0.reborrow_mut()).unwrap();
96//! quantizer.compress_into(data.row(1), v1.reborrow_mut()).unwrap();
97//!
98//! // Compute inner product distances.
99//! let f: CompensatedIP = quantizer.as_functor();
100//! let distance_quantized: distances::Result<f32> = f.evaluate_similarity(
101//!     v0.reborrow(),
102//!     v1.reborrow()
103//! );
104//! let distance_full: f32 = distance::InnerProduct::evaluate(data.row(0), data.row(1));
105//! let relative_error = (distance_quantized.unwrap() - distance_full).abs() / distance_full.abs();
106//! assert!(relative_error < 0.02);
107//!
108//! // Compute squared l2 distances.
109//! let f: CompensatedSquaredL2 = quantizer.as_functor();
110//! let distance_quantized: distances::Result<f32> = f.evaluate_similarity(
111//!     v0.reborrow(),
112//!     v1.reborrow()
113//! );
114//! let distance_full: f32 = distance::SquaredL2::evaluate(data.row(0), data.row(1));
115//! let relative_error = (distance_quantized.unwrap() - distance_full).abs() / distance_full.abs();
116//! assert!(relative_error < 0.03);
117//! ```
118
119pub mod train;
120
121mod quantizer;
122mod vectors;
123
124/////////////
125// Exports //
126/////////////
127
128/// Return the scaling parameter `a` that should be multiplied to compressed vector codes.
129pub const fn bit_scale<const NBITS: usize>() -> f32 {
130    (2_usize.pow(NBITS as u32) - 1) as f32
131}
132
133pub const fn inverse_bit_scale<const NBITS: usize>() -> f32 {
134    1.0 / bit_scale::<NBITS>()
135}
136
137// The central scalar quantization schema.
138// Error types.
139pub use quantizer::{InputContainsNaN, MeanNormMissing, ScalarQuantizer};
140// Distance Functions.
141pub use vectors::CompensatedIP;
142// Scalar-quantized specific vector types.
143pub use vectors::CompensatedVector;
144pub use vectors::{
145    CompensatedCosineNormalized, CompensatedSquaredL2, CompensatedVectorRef, Compensation,
146    MutCompensatedVectorRef,
147};