nodedb_codec/vector_quant/codec.rs
1// SPDX-License-Identifier: Apache-2.0
2
3//! Dual-phase `VectorCodec` trait — the seam that makes future quantization
4//! algorithms drop-in additions rather than engine rewrites.
5//!
6//! Upper-layer routing (HNSW navigation, beam expansion) calls
7//! `fast_symmetric_distance` on bitwise/heuristic kernels.
8//! Base-layer rerank calls `exact_asymmetric_distance` with full ADC +
9//! scalar correction + sparse outlier resolution.
10
11use crate::vector_quant::layout::UnifiedQuantizedVector;
12
13/// Asymmetric Distance Computation lookup table — used by codecs that
14/// pre-decompose the query into per-subspace distance tables (PQ, IVF-PQ,
15/// TurboQuant). Consumed by base-layer rerank kernels via AVX2 `pshufb`
16/// or AVX-512 `vpermb`.
17///
18/// Layout:
19/// - `subspace_count`: number of independent subspaces (PQ M parameter)
20/// - `centroids_per_subspace`: typically 256 (one byte per code)
21/// - `table`: row-major `[subspace][centroid] -> f32 distance`
22pub struct AdcLut {
23 pub subspace_count: u16,
24 pub centroids_per_subspace: u16,
25 pub table: Vec<f32>,
26}
27
28impl AdcLut {
29 #[inline]
30 pub fn new(subspace_count: u16, centroids_per_subspace: u16) -> Self {
31 let n = subspace_count as usize * centroids_per_subspace as usize;
32 Self {
33 subspace_count,
34 centroids_per_subspace,
35 table: vec![0.0; n],
36 }
37 }
38
39 /// Return the precomputed distance for the given `subspace` and `centroid`.
40 ///
41 /// # Panics
42 ///
43 /// Panics if `subspace >= self.subspace_count` or
44 /// `centroid as usize >= self.centroids_per_subspace as usize`.
45 /// Bounds checking is the caller's responsibility on this hot-path accessor.
46 #[inline]
47 pub fn lookup(&self, subspace: u16, centroid: u8) -> f32 {
48 let idx = subspace as usize * self.centroids_per_subspace as usize + centroid as usize;
49 self.table[idx]
50 }
51}
52
53/// The dual-phase quantization codec trait.
54///
55/// `Quantized` is the on-disk / in-memory packed form (one per vector).
56/// `Query` is the prepared query — may be raw FP32, may be rotated, may
57/// hold a precomputed ADC LUT, depending on the codec.
58///
59/// # Extensibility audit — future algorithms
60///
61/// The following six algorithms can each be added as a new `impl VectorCodec`
62/// without changing the trait surface:
63///
64/// **TurboQuant** — learned rotation matrix applied before scalar quantization.
65/// The rotation is held in the codec struct (same pattern as `OpqCodec.rotation`)
66/// and applied inside `encode` and `prepare_query`. No trait change needed.
67/// `QuantMode::TurboQuant4b` already exists as a layout discriminant.
68///
69/// **PolarQuant** — encodes magnitude and direction as separate components.
70/// Both components are packed into a single `packed_bits` region whose layout
71/// the codec controls. The `encode` return type is `Vec<u8>` of arbitrary
72/// length via `UnifiedQuantizedVector::packed_bits`, so multi-component payloads
73/// fit without a trait change. `QuantMode::PolarQuant` is already reserved.
74///
75/// **ITQ3_S** — Iterative Quantization with 3-bit codes, learned via SVD.
76/// Requires a calibration/fitting step before encoding begins. Existing codecs
77/// expose this as a concrete static `calibrate` method (e.g. `Sq8Codec::calibrate`,
78/// `RaBitQCodec::calibrate`). The `train` method added below makes this fitting
79/// phase reachable through the trait for generic dispatchers that hold a
80/// `C: VectorCodec` without knowing the concrete type. The default impl is a
81/// no-op so all existing impls compile without change.
82///
83/// **OSAQ** — Optimized Symmetric/Asymmetric Quantization. Uses symmetric
84/// encoding for indexed vectors and asymmetric encoding for queries.
85/// The existing trait already separates these paths: `encode` is the index path,
86/// `prepare_query` is the query path, and `type Query` is a distinct associated
87/// type. No trait change needed.
88///
89/// **RaBiT** — Rotation + Binary quantization with full-precision query and
90/// 1-bit index. Exactly the pattern implemented by `RaBitQCodec`: `type Query`
91/// holds a full-precision `Vec<f32>`, `type Quantized` holds 1-bit sign codes,
92/// and `exact_asymmetric_distance` uses the full query against binary candidates.
93/// No trait change needed.
94///
95/// **LVQ** — Locally-adaptive Vector Quantization with per-vector scale/offset.
96/// The `QuantHeader` carries `global_scale` and `residual_norm` — two f32
97/// scalars sufficient for per-vector min/max parameters. If a codec needs
98/// additional per-vector state it can prepend a small structured header inside
99/// `packed_bits` (the codec controls that region's layout entirely). No trait
100/// change needed.
101pub trait VectorCodec: Send + Sync {
102 /// The packed quantized form. Must be convertible to a `UnifiedQuantizedVector`
103 /// reference via `AsRef`.
104 type Quantized: AsRef<UnifiedQuantizedVector>;
105
106 /// The prepared query form (codec-specific).
107 type Query;
108
109 /// Encode a single FP32 vector into the codec's packed form.
110 fn encode(&self, v: &[f32]) -> Self::Quantized;
111
112 /// Prepare a query for distance computations against this codec.
113 /// May rotate, normalize, build a LUT, etc.
114 fn prepare_query(&self, q: &[f32]) -> Self::Query;
115
116 /// Optional: precompute ADC lookup table for codecs that use one
117 /// (PQ, IVF-PQ, TurboQuant). Returns `None` for codecs that don't
118 /// (RaBitQ, BBQ, ternary, binary).
119 fn adc_lut(&self, _q: &Self::Query) -> Option<AdcLut> {
120 None
121 }
122
123 /// Optional: fit the codec's learned parameters on a set of training vectors.
124 ///
125 /// Called once before encoding begins, typically at index-build time. Codecs
126 /// with no learnable parameters (binary sign-bit, ternary) can use the
127 /// default no-op. Codecs with an SVD- or k-means-based fitting phase
128 /// (ITQ3_S, OPQ, PQ, TurboQuant) override this to update their internal
129 /// rotation matrices or codebooks.
130 ///
131 /// Returns `Ok(())` on success. The error type is `CodecError` so callers
132 /// that drive training through a generic bound can propagate failures without
133 /// knowing the concrete codec.
134 ///
135 /// The default implementation is a no-op and always returns `Ok(())`.
136 #[allow(unused_variables)]
137 fn train(&mut self, samples: &[&[f32]]) -> Result<(), crate::error::CodecError> {
138 Ok(())
139 }
140
141 /// Fast symmetric distance — bitwise / heuristic. Used during HNSW
142 /// upper-layer routing. Both arguments are quantized; no scalar
143 /// correction; no outlier resolution. Hot path; must be inline-friendly.
144 fn fast_symmetric_distance(&self, q: &Self::Quantized, v: &Self::Quantized) -> f32;
145
146 /// Exact asymmetric distance — full ADC with scalar correction and
147 /// sparse outlier resolution. Used at base-layer rerank only. Slower
148 /// but high-fidelity.
149 fn exact_asymmetric_distance(&self, q: &Self::Query, v: &Self::Quantized) -> f32;
150}
151
152#[cfg(test)]
153mod tests {
154 use super::*;
155 use crate::vector_quant::layout::{QuantHeader, QuantMode, UnifiedQuantizedVector};
156
157 #[test]
158 fn adc_lut_new_produces_zeroed_table() {
159 let lut = AdcLut::new(4, 256);
160 assert_eq!(lut.table.len(), 1024);
161 assert!(lut.table.iter().all(|&v| v == 0.0));
162 }
163
164 #[test]
165 fn adc_lut_lookup_returns_written_value() {
166 let mut lut = AdcLut::new(4, 256);
167 // Write a sentinel into subspace=2, centroid=17
168 let idx = 2usize * 256 + 17;
169 lut.table[idx] = 2.5;
170 assert_eq!(lut.lookup(2, 17), 2.5);
171 // Other entries remain zero.
172 assert_eq!(lut.lookup(0, 0), 0.0);
173 assert_eq!(lut.lookup(3, 255), 0.0);
174 }
175
176 // --- Stub codec used only to verify that the trait compiles ---
177
178 /// Minimal quantized wrapper for test purposes.
179 struct StubQuantized(UnifiedQuantizedVector);
180
181 impl AsRef<UnifiedQuantizedVector> for StubQuantized {
182 fn as_ref(&self) -> &UnifiedQuantizedVector {
183 &self.0
184 }
185 }
186
187 struct StubCodec;
188
189 impl VectorCodec for StubCodec {
190 type Quantized = StubQuantized;
191 type Query = Vec<f32>;
192
193 fn encode(&self, v: &[f32]) -> Self::Quantized {
194 let header = QuantHeader {
195 quant_mode: QuantMode::Binary as u16,
196 dim: v.len() as u16,
197 global_scale: 1.0,
198 residual_norm: 0.0,
199 dot_quantized: 0.0,
200 outlier_bitmask: 0,
201 reserved: [0; 8],
202 };
203 let packed = vec![0u8; v.len().div_ceil(8)];
204 let uqv = UnifiedQuantizedVector::new(header, &packed, &[])
205 .expect("stub encode: layout construction must succeed");
206 StubQuantized(uqv)
207 }
208
209 fn prepare_query(&self, q: &[f32]) -> Self::Query {
210 q.to_vec()
211 }
212
213 fn fast_symmetric_distance(&self, _q: &Self::Quantized, _v: &Self::Quantized) -> f32 {
214 0.0
215 }
216
217 fn exact_asymmetric_distance(&self, _q: &Self::Query, _v: &Self::Quantized) -> f32 {
218 0.0
219 }
220 }
221
222 /// Verify a generic function parameterised on `VectorCodec` compiles.
223 fn use_codec<C: VectorCodec>(c: &C, q: &[f32], v: &[f32]) -> f32 {
224 let qv = c.encode(v);
225 let qq = c.prepare_query(q);
226 let sym = c.fast_symmetric_distance(&qv, &qv);
227 let asym = c.exact_asymmetric_distance(&qq, &qv);
228 sym + asym
229 }
230
231 #[test]
232 fn generic_use_codec_compiles_and_runs() {
233 let codec = StubCodec;
234 let result = use_codec(&codec, &[1.0, 0.0], &[0.0, 1.0]);
235 assert_eq!(result, 0.0);
236 }
237}