Skip to main content

nodedb_codec/vector_quant/
codec.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Dual-phase `VectorCodec` trait — the seam that makes future quantization
4//! algorithms drop-in additions rather than engine rewrites.
5//!
6//! Upper-layer routing (HNSW navigation, beam expansion) calls
7//! `fast_symmetric_distance` on bitwise/heuristic kernels.
8//! Base-layer rerank calls `exact_asymmetric_distance` with full ADC +
9//! scalar correction + sparse outlier resolution.
10
11use crate::vector_quant::layout::UnifiedQuantizedVector;
12
13/// Asymmetric Distance Computation lookup table — used by codecs that
14/// pre-decompose the query into per-subspace distance tables (PQ, IVF-PQ,
15/// TurboQuant). Consumed by base-layer rerank kernels via AVX2 `pshufb`
16/// or AVX-512 `vpermb`.
17///
18/// Layout:
19/// - `subspace_count`: number of independent subspaces (PQ M parameter)
20/// - `centroids_per_subspace`: typically 256 (one byte per code)
21/// - `table`: row-major `[subspace][centroid] -> f32 distance`
22pub struct AdcLut {
23    pub subspace_count: u16,
24    pub centroids_per_subspace: u16,
25    pub table: Vec<f32>,
26}
27
28impl AdcLut {
29    #[inline]
30    pub fn new(subspace_count: u16, centroids_per_subspace: u16) -> Self {
31        let n = subspace_count as usize * centroids_per_subspace as usize;
32        Self {
33            subspace_count,
34            centroids_per_subspace,
35            table: vec![0.0; n],
36        }
37    }
38
39    /// Return the precomputed distance for the given `subspace` and `centroid`.
40    ///
41    /// # Panics
42    ///
43    /// Panics if `subspace >= self.subspace_count` or
44    /// `centroid as usize >= self.centroids_per_subspace as usize`.
45    /// Bounds checking is the caller's responsibility on this hot-path accessor.
46    #[inline]
47    pub fn lookup(&self, subspace: u16, centroid: u8) -> f32 {
48        let idx = subspace as usize * self.centroids_per_subspace as usize + centroid as usize;
49        self.table[idx]
50    }
51}
52
53/// The dual-phase quantization codec trait.
54///
55/// `Quantized` is the on-disk / in-memory packed form (one per vector).
56/// `Query` is the prepared query — may be raw FP32, may be rotated, may
57/// hold a precomputed ADC LUT, depending on the codec.
58///
59/// # Extensibility audit — future algorithms
60///
61/// The following six algorithms can each be added as a new `impl VectorCodec`
62/// without changing the trait surface:
63///
64/// **TurboQuant** — learned rotation matrix applied before scalar quantization.
65/// The rotation is held in the codec struct (same pattern as `OpqCodec.rotation`)
66/// and applied inside `encode` and `prepare_query`. No trait change needed.
67/// `QuantMode::TurboQuant4b` already exists as a layout discriminant.
68///
69/// **PolarQuant** — encodes magnitude and direction as separate components.
70/// Both components are packed into a single `packed_bits` region whose layout
71/// the codec controls. The `encode` return type is `Vec<u8>` of arbitrary
72/// length via `UnifiedQuantizedVector::packed_bits`, so multi-component payloads
73/// fit without a trait change. `QuantMode::PolarQuant` is already reserved.
74///
75/// **ITQ3_S** — Iterative Quantization with 3-bit codes, learned via SVD.
76/// Requires a calibration/fitting step before encoding begins. Existing codecs
77/// expose this as a concrete static `calibrate` method (e.g. `Sq8Codec::calibrate`,
78/// `RaBitQCodec::calibrate`). The `train` method added below makes this fitting
79/// phase reachable through the trait for generic dispatchers that hold a
80/// `C: VectorCodec` without knowing the concrete type. The default impl is a
81/// no-op so all existing impls compile without change.
82///
83/// **OSAQ** — Optimized Symmetric/Asymmetric Quantization. Uses symmetric
84/// encoding for indexed vectors and asymmetric encoding for queries.
85/// The existing trait already separates these paths: `encode` is the index path,
86/// `prepare_query` is the query path, and `type Query` is a distinct associated
87/// type. No trait change needed.
88///
89/// **RaBiT** — Rotation + Binary quantization with full-precision query and
90/// 1-bit index. Exactly the pattern implemented by `RaBitQCodec`: `type Query`
91/// holds a full-precision `Vec<f32>`, `type Quantized` holds 1-bit sign codes,
92/// and `exact_asymmetric_distance` uses the full query against binary candidates.
93/// No trait change needed.
94///
95/// **LVQ** — Locally-adaptive Vector Quantization with per-vector scale/offset.
96/// The `QuantHeader` carries `global_scale` and `residual_norm` — two f32
97/// scalars sufficient for per-vector min/max parameters. If a codec needs
98/// additional per-vector state it can prepend a small structured header inside
99/// `packed_bits` (the codec controls that region's layout entirely). No trait
100/// change needed.
101pub trait VectorCodec: Send + Sync {
102    /// The packed quantized form. Must be convertible to a `UnifiedQuantizedVector`
103    /// reference via `AsRef`.
104    type Quantized: AsRef<UnifiedQuantizedVector>;
105
106    /// The prepared query form (codec-specific).
107    type Query;
108
109    /// Encode a single FP32 vector into the codec's packed form.
110    fn encode(&self, v: &[f32]) -> Self::Quantized;
111
112    /// Prepare a query for distance computations against this codec.
113    /// May rotate, normalize, build a LUT, etc.
114    fn prepare_query(&self, q: &[f32]) -> Self::Query;
115
116    /// Optional: precompute ADC lookup table for codecs that use one
117    /// (PQ, IVF-PQ, TurboQuant). Returns `None` for codecs that don't
118    /// (RaBitQ, BBQ, ternary, binary).
119    fn adc_lut(&self, _q: &Self::Query) -> Option<AdcLut> {
120        None
121    }
122
123    /// Optional: fit the codec's learned parameters on a set of training vectors.
124    ///
125    /// Called once before encoding begins, typically at index-build time. Codecs
126    /// with no learnable parameters (binary sign-bit, ternary) can use the
127    /// default no-op. Codecs with an SVD- or k-means-based fitting phase
128    /// (ITQ3_S, OPQ, PQ, TurboQuant) override this to update their internal
129    /// rotation matrices or codebooks.
130    ///
131    /// Returns `Ok(())` on success. The error type is `CodecError` so callers
132    /// that drive training through a generic bound can propagate failures without
133    /// knowing the concrete codec.
134    ///
135    /// The default implementation is a no-op and always returns `Ok(())`.
136    #[allow(unused_variables)]
137    fn train(&mut self, samples: &[&[f32]]) -> Result<(), crate::error::CodecError> {
138        Ok(())
139    }
140
141    /// Fast symmetric distance — bitwise / heuristic. Used during HNSW
142    /// upper-layer routing. Both arguments are quantized; no scalar
143    /// correction; no outlier resolution. Hot path; must be inline-friendly.
144    fn fast_symmetric_distance(&self, q: &Self::Quantized, v: &Self::Quantized) -> f32;
145
146    /// Exact asymmetric distance — full ADC with scalar correction and
147    /// sparse outlier resolution. Used at base-layer rerank only. Slower
148    /// but high-fidelity.
149    fn exact_asymmetric_distance(&self, q: &Self::Query, v: &Self::Quantized) -> f32;
150}
151
152#[cfg(test)]
153mod tests {
154    use super::*;
155    use crate::vector_quant::layout::{QuantHeader, QuantMode, UnifiedQuantizedVector};
156
157    #[test]
158    fn adc_lut_new_produces_zeroed_table() {
159        let lut = AdcLut::new(4, 256);
160        assert_eq!(lut.table.len(), 1024);
161        assert!(lut.table.iter().all(|&v| v == 0.0));
162    }
163
164    #[test]
165    fn adc_lut_lookup_returns_written_value() {
166        let mut lut = AdcLut::new(4, 256);
167        // Write a sentinel into subspace=2, centroid=17
168        let idx = 2usize * 256 + 17;
169        lut.table[idx] = 2.5;
170        assert_eq!(lut.lookup(2, 17), 2.5);
171        // Other entries remain zero.
172        assert_eq!(lut.lookup(0, 0), 0.0);
173        assert_eq!(lut.lookup(3, 255), 0.0);
174    }
175
176    // --- Stub codec used only to verify that the trait compiles ---
177
178    /// Minimal quantized wrapper for test purposes.
179    struct StubQuantized(UnifiedQuantizedVector);
180
181    impl AsRef<UnifiedQuantizedVector> for StubQuantized {
182        fn as_ref(&self) -> &UnifiedQuantizedVector {
183            &self.0
184        }
185    }
186
187    struct StubCodec;
188
189    impl VectorCodec for StubCodec {
190        type Quantized = StubQuantized;
191        type Query = Vec<f32>;
192
193        fn encode(&self, v: &[f32]) -> Self::Quantized {
194            let header = QuantHeader {
195                quant_mode: QuantMode::Binary as u16,
196                dim: v.len() as u16,
197                global_scale: 1.0,
198                residual_norm: 0.0,
199                dot_quantized: 0.0,
200                outlier_bitmask: 0,
201                reserved: [0; 8],
202            };
203            let packed = vec![0u8; v.len().div_ceil(8)];
204            let uqv = UnifiedQuantizedVector::new(header, &packed, &[])
205                .expect("stub encode: layout construction must succeed");
206            StubQuantized(uqv)
207        }
208
209        fn prepare_query(&self, q: &[f32]) -> Self::Query {
210            q.to_vec()
211        }
212
213        fn fast_symmetric_distance(&self, _q: &Self::Quantized, _v: &Self::Quantized) -> f32 {
214            0.0
215        }
216
217        fn exact_asymmetric_distance(&self, _q: &Self::Query, _v: &Self::Quantized) -> f32 {
218            0.0
219        }
220    }
221
222    /// Verify a generic function parameterised on `VectorCodec` compiles.
223    fn use_codec<C: VectorCodec>(c: &C, q: &[f32], v: &[f32]) -> f32 {
224        let qv = c.encode(v);
225        let qq = c.prepare_query(q);
226        let sym = c.fast_symmetric_distance(&qv, &qv);
227        let asym = c.exact_asymmetric_distance(&qq, &qv);
228        sym + asym
229    }
230
231    #[test]
232    fn generic_use_codec_compiles_and_runs() {
233        let codec = StubCodec;
234        let result = use_codec(&codec, &[1.0, 0.0], &[0.0, 1.0]);
235        assert_eq!(result, 0.0);
236    }
237}