Skip to main content

nodedb_vector/quantize/
sq8_codec.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! `VectorCodec` implementation for `Sq8Codec`.
4//!
5//! Wraps the existing concrete SQ8 quantizer as a dual-phase codec. The
6//! `Quantized` newtype holds a `UnifiedQuantizedVector` with `QuantMode::Sq8`
7//! and M = dim u8 codes in the packed-bits region. The `Query` type is a raw
8//! FP32 slice (asymmetric: query stays full-precision, candidates are INT8).
9
10use nodedb_codec::vector_quant::{
11    codec::{AdcLut, VectorCodec},
12    layout::{QuantHeader, QuantMode, UnifiedQuantizedVector},
13};
14
15use crate::quantize::sq8::Sq8Codec;
16
17// ── Newtype ──────────────────────────────────────────────────────────────────
18
19/// Thin newtype wrapping `UnifiedQuantizedVector` for SQ8-encoded vectors.
20pub struct Sq8Quantized(pub UnifiedQuantizedVector);
21
22impl AsRef<UnifiedQuantizedVector> for Sq8Quantized {
23    #[inline]
24    fn as_ref(&self) -> &UnifiedQuantizedVector {
25        &self.0
26    }
27}
28
29// ── Helper ───────────────────────────────────────────────────────────────────
30
31#[inline]
32fn packed_bits_of(q: &Sq8Quantized) -> &[u8] {
33    q.0.packed_bits()
34}
35
36// ── VectorCodec impl ─────────────────────────────────────────────────────────
37
38impl VectorCodec for Sq8Codec {
39    type Quantized = Sq8Quantized;
40    /// Raw FP32 query — asymmetric distance; query is never quantized.
41    type Query = Vec<f32>;
42
43    /// Encode a single FP32 vector into an SQ8 `UnifiedQuantizedVector`.
44    ///
45    /// # Panics
46    ///
47    /// The `UnifiedQuantizedVector::new` call will only fail if the outlier
48    /// count mismatches the bitmask, or an outlier dim_index ≥ 64. Neither
49    /// condition can arise here: `outlier_bitmask` is 0 and `outliers` is
50    /// empty. The `expect` is therefore unreachable in practice.
51    fn encode(&self, v: &[f32]) -> Self::Quantized {
52        let codes = self.quantize(v);
53        let header = QuantHeader {
54            quant_mode: QuantMode::Sq8 as u16,
55            dim: self.dim as u16,
56            global_scale: 0.0,
57            residual_norm: 0.0,
58            dot_quantized: 0.0,
59            outlier_bitmask: 0,
60            reserved: [0; 8],
61        };
62        let uqv = UnifiedQuantizedVector::new(header, &codes, &[])
63            .expect("Sq8Codec::encode: layout construction is infallible (no outliers)");
64        Sq8Quantized(uqv)
65    }
66
67    /// Prepare the FP32 query for asymmetric distance computations.
68    ///
69    /// For SQ8 the query is used directly without rotation or normalization.
70    fn prepare_query(&self, q: &[f32]) -> Self::Query {
71        q.to_vec()
72    }
73
74    /// SQ8 has no precomputed ADC table — returns `None`.
75    fn adc_lut(&self, _q: &Self::Query) -> Option<AdcLut> {
76        None
77    }
78
79    /// Symmetric L2 squared distance between two SQ8-quantized vectors.
80    ///
81    /// Both codes are dequantized to FP32 via `self.mins` + `self.scales`, then
82    /// the squared difference is accumulated. This is slower than an exact
83    /// INT8-INT8 Hamming estimate but SQ8 has no faster bitwise symmetric form.
84    #[inline]
85    fn fast_symmetric_distance(&self, q: &Self::Quantized, v: &Self::Quantized) -> f32 {
86        let qa = packed_bits_of(q);
87        let qb = packed_bits_of(v);
88        let dq_a = self.dequantize(qa);
89        let dq_b = self.dequantize(qb);
90        dq_a.iter()
91            .zip(dq_b.iter())
92            .map(|(&a, &b)| {
93                let d = a - b;
94                d * d
95            })
96            .sum()
97    }
98
99    /// Asymmetric L2 squared distance: FP32 query vs INT8 candidate.
100    ///
101    /// Delegates directly to `Sq8Codec::asymmetric_l2`.
102    /// Cosine / IP variants require a separate codec wrapper that normalizes
103    /// or negates at encode time.
104    #[inline]
105    fn exact_asymmetric_distance(&self, q: &Self::Query, v: &Self::Quantized) -> f32 {
106        self.asymmetric_l2(q, packed_bits_of(v))
107    }
108}
109
110// ── Tests ────────────────────────────────────────────────────────────────────
111
112#[cfg(test)]
113mod tests {
114    use super::*;
115
116    fn make_codec() -> Sq8Codec {
117        let vecs: Vec<Vec<f32>> = (0..50)
118            .map(|i| vec![i as f32 * 0.1, -(i as f32) * 0.05, 1.0 + i as f32 * 0.02])
119            .collect();
120        let refs: Vec<&[f32]> = vecs.iter().map(|v| v.as_slice()).collect();
121        Sq8Codec::calibrate(&refs, 3)
122    }
123
124    /// `encode` round-trip: packed_bits in the UQV must match the raw
125    /// `quantize` output from the underlying codec.
126    #[test]
127    fn encode_packed_bits_matches_raw_quantize() {
128        let codec = make_codec();
129        let v = vec![1.5f32, -0.3, 1.1];
130        let raw = codec.quantize(&v);
131        let quantized = <Sq8Codec as VectorCodec>::encode(&codec, &v);
132        assert_eq!(quantized.as_ref().packed_bits(), raw.as_slice());
133    }
134
135    /// `fast_symmetric_distance` returns a non-negative finite value.
136    #[test]
137    fn fast_symmetric_distance_is_non_negative_finite() {
138        let codec = make_codec();
139        let a = <Sq8Codec as VectorCodec>::encode(&codec, &[0.5, -0.1, 1.0]);
140        let b = <Sq8Codec as VectorCodec>::encode(&codec, &[2.0, -0.5, 1.5]);
141        let d = codec.fast_symmetric_distance(&a, &b);
142        assert!(d.is_finite(), "expected finite distance, got {d}");
143        assert!(d >= 0.0, "expected non-negative distance, got {d}");
144    }
145
146    /// `exact_asymmetric_distance` returns a non-negative finite value.
147    #[test]
148    fn exact_asymmetric_distance_is_non_negative_finite() {
149        let codec = make_codec();
150        let q = codec.prepare_query(&[0.5, -0.1, 1.0]);
151        let v = <Sq8Codec as VectorCodec>::encode(&codec, &[2.0, -0.5, 1.5]);
152        let d = codec.exact_asymmetric_distance(&q, &v);
153        assert!(d.is_finite(), "expected finite distance, got {d}");
154        assert!(d >= 0.0, "expected non-negative distance, got {d}");
155    }
156
157    /// Verify the trait impl compiles via a generic function.
158    fn use_vector_codec<C: VectorCodec>(c: &C, q: &[f32], v: &[f32]) -> f32 {
159        let qv = c.encode(v);
160        let qq = c.prepare_query(q);
161        c.fast_symmetric_distance(&qv, &qv) + c.exact_asymmetric_distance(&qq, &qv)
162    }
163
164    #[test]
165    fn trait_bounds_compile() {
166        let codec = make_codec();
167        let result = use_vector_codec(&codec, &[0.5, -0.1, 1.0], &[1.0, 0.0, 1.2]);
168        assert!(result.is_finite());
169    }
170}