Skip to main content

nodedb_codec/vector_quant/
layout.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Unified Quantized Vector layout — the cache-aligned superset format that
4//! absorbs binary / ternary (BitNet 1.58) / 4-bit scalar / residual codecs
5//! without polymorphic indirection in the hot path.
6//!
7//! 128-byte alignment matches AVX-512 cache-line pair and avoids false
8//! sharing under thread-per-core execution.
9//!
10//! ## Outlier bitmask limit
11//!
12//! The `outlier_bitmask` in [`QuantHeader`] supports up to 64 outlier
13//! dimensions per vector. Callers that need to mark more than 64 outliers
14//! must bucket by 64-dim windows; multi-window support is out of scope for
15//! this module.
16
17use crate::error::CodecError;
18
19// ── QuantMode ──────────────────────────────────────────────────────────────
20
21/// Quantization mode discriminator stored in the header.
22///
23/// Values are **stable on disk** — never reorder, only append.
24#[repr(u16)]
25#[derive(Debug, Clone, Copy, PartialEq, Eq)]
26#[non_exhaustive]
27pub enum QuantMode {
28    Binary = 0,
29    RaBitQ = 1,
30    Bbq = 2,
31    /// 5 trits/byte cold layout, 1.6 bpw.
32    TernaryPacked = 3,
33    /// 2 bpw expanded for SIMD hot path.
34    TernarySimd = 4,
35    TurboQuant4b = 5,
36    Sq8 = 6,
37    Pq = 7,
38    /// Reserved discriminant for ITQ3_S (Interleaved Ternary Quantization).
39    Itq3S = 8,
40    /// Reserved discriminant for PolarQuant (Cartesian→polar 0.5 bpw).
41    PolarQuant = 9,
42}
43
44impl QuantMode {
45    /// Bits per weight for each mode — used by [`target_size`] to compute the
46    /// packed-bits byte count.
47    fn bits_per_weight(self) -> u32 {
48        match self {
49            QuantMode::Binary => 1,
50            QuantMode::RaBitQ => 1,
51            QuantMode::TernaryPacked => 2, // 5 trits/byte ≈ 1.6 bpw; ceil to 2 for alignment
52            QuantMode::TernarySimd => 2,
53            QuantMode::Bbq => 1,
54            QuantMode::TurboQuant4b => 4,
55            QuantMode::Sq8 => 8,
56            QuantMode::Pq => 8,
57            QuantMode::Itq3S => 2,
58            QuantMode::PolarQuant => 4,
59        }
60    }
61}
62
63// ── QuantHeader ────────────────────────────────────────────────────────────
64
65/// 32-byte interleaved header preceding the packed bit array.
66///
67/// `#[repr(C)]` for stable layout; serializable via raw bytes.
68#[repr(C)]
69#[derive(Debug, Clone, Copy)]
70pub struct QuantHeader {
71    /// [`QuantMode`] discriminant.
72    pub quant_mode: u16,
73    pub dim: u16,
74    /// BitNet absmean / SQ8 scale / RaBitQ rotation magnitude.
75    pub global_scale: f32,
76    /// RaBitQ ‖v−c‖ / TurboQuant ‖r‖ / BBQ centroid distance.
77    pub residual_norm: f32,
78    /// RaBitQ ⟨v, q(v)⟩ / BBQ corrective / TurboQuant QJL bias.
79    pub dot_quantized: f32,
80    /// Sparse outlier index — branchless gather via popcnt.
81    /// Supports up to 64 outlier dimensions per vector.
82    pub outlier_bitmask: u64,
83    /// QJL projection seed / OSAQ Hessian coeff / future use.
84    pub reserved: [u8; 8],
85}
86
87// Compile-time assertion: header is exactly 32 bytes.
88const _: () = assert!(core::mem::size_of::<QuantHeader>() == 32);
89
90// ── Layout constants ────────────────────────────────────────────────────────
91
92/// Byte size of one outlier entry: (dim_index: u32, value: f32).
93const OUTLIER_ENTRY_BYTES: usize = 8;
94
95/// Cache-line pair alignment — also the minimum allocation unit.
96const ALIGN: usize = 128;
97
98// ── target_size ─────────────────────────────────────────────────────────────
99
100/// Compute the buffer size (rounded up to a 128-byte multiple) required to
101/// hold a [`UnifiedQuantizedVector`] with the given parameters.
102///
103/// This is the pre-sizing helper for callers that want to allocate before
104/// constructing.
105pub fn target_size(quant_mode: QuantMode, dim: u16, outlier_count: u32) -> usize {
106    let packed_bits_bytes = packed_bits_len(quant_mode, dim);
107    let outlier_bytes = outlier_count as usize * OUTLIER_ENTRY_BYTES;
108    let raw = core::mem::size_of::<QuantHeader>() + packed_bits_bytes + outlier_bytes;
109    round_up_128(raw)
110}
111
112// ── Internal helpers ────────────────────────────────────────────────────────
113
114#[inline]
115fn packed_bits_len(quant_mode: QuantMode, dim: u16) -> usize {
116    let bpw = quant_mode.bits_per_weight() as usize;
117    let total_bits = dim as usize * bpw;
118    total_bits.div_ceil(8)
119}
120
121#[inline]
122fn round_up_128(n: usize) -> usize {
123    (n + ALIGN - 1) & !(ALIGN - 1)
124}
125
126// ── UnifiedQuantizedVector ──────────────────────────────────────────────────
127
128/// Owned, 128-byte-aligned unified quantized vector buffer.
129///
130/// Layout (contiguous bytes):
131/// ```text
132/// [ QuantHeader (32 B) | packed_bits (variable) | outlier_payload (8 B × n) | tail_pad ]
133/// ```
134///
135/// The total allocation is always a multiple of 128 bytes (one AVX-512
136/// cache-line pair).
137pub struct UnifiedQuantizedVector {
138    /// Backing storage.  Always a multiple of 128 bytes.
139    buf: Vec<u8>,
140    /// Byte length of the packed-bits region (excludes header and outliers).
141    packed_bits_len: usize,
142}
143
144impl UnifiedQuantizedVector {
145    /// Construct from an explicit header, packed-bit slice, and sparse
146    /// outlier list.
147    ///
148    /// `outliers` is a slice of `(dim_index, value)` pairs.  The
149    /// `outlier_bitmask` in `header` must have exactly one bit set for each
150    /// entry in `outliers`, and bits must correspond to entries in ascending
151    /// `dim_index` order (i.e. popcnt-dense order).
152    ///
153    /// # Errors
154    ///
155    /// Returns [`CodecError::LayoutError`] if:
156    /// - `outliers.len()` does not match `popcnt(header.outlier_bitmask)`.
157    /// - Any `dim_index` in `outliers` is ≥ 64 (bitmask only covers 64 dims).
158    pub fn new(
159        header: QuantHeader,
160        packed_bits: &[u8],
161        outliers: &[(u32, f32)],
162    ) -> Result<Self, CodecError> {
163        let expected_outlier_count = header.outlier_bitmask.count_ones() as usize;
164        if outliers.len() != expected_outlier_count {
165            return Err(CodecError::LayoutError {
166                detail: format!(
167                    "outlier count mismatch: bitmask has {} bits set but {} outliers provided",
168                    expected_outlier_count,
169                    outliers.len()
170                ),
171            });
172        }
173        for &(dim_idx, _) in outliers {
174            if dim_idx >= 64 {
175                return Err(CodecError::LayoutError {
176                    detail: format!("outlier dim_index {dim_idx} exceeds bitmask capacity of 64"),
177                });
178            }
179        }
180
181        let header_bytes = core::mem::size_of::<QuantHeader>();
182        let outlier_bytes = outliers.len() * OUTLIER_ENTRY_BYTES;
183        let raw = header_bytes + packed_bits.len() + outlier_bytes;
184        let total = round_up_128(raw);
185
186        let mut buf = vec![0u8; total];
187
188        // Write header via raw copy (QuantHeader is repr(C), no padding issues).
189        let header_src = unsafe {
190            core::slice::from_raw_parts(&header as *const QuantHeader as *const u8, header_bytes)
191        };
192        buf[..header_bytes].copy_from_slice(header_src);
193
194        // Write packed bits.
195        let pb_start = header_bytes;
196        let pb_end = pb_start + packed_bits.len();
197        buf[pb_start..pb_end].copy_from_slice(packed_bits);
198
199        // Write outlier payload: each entry is u32 dim_index || f32 value (LE).
200        let mut off = pb_end;
201        for &(dim_idx, value) in outliers {
202            buf[off..off + 4].copy_from_slice(&dim_idx.to_le_bytes());
203            buf[off + 4..off + 8].copy_from_slice(&value.to_le_bytes());
204            off += OUTLIER_ENTRY_BYTES;
205        }
206
207        Ok(Self {
208            buf,
209            packed_bits_len: packed_bits.len(),
210        })
211    }
212
213    // ── Accessors ────────────────────────────────────────────────────────────
214
215    /// Zero-copy reference to the header (first 32 bytes).
216    #[inline]
217    pub fn header(&self) -> &QuantHeader {
218        let ptr = self.buf.as_ptr() as *const QuantHeader;
219        // SAFETY: buf is always at least 32 bytes and QuantHeader is repr(C).
220        unsafe { &*ptr }
221    }
222
223    /// Slice of the packed-bit region.
224    #[inline]
225    pub fn packed_bits(&self) -> &[u8] {
226        let start = core::mem::size_of::<QuantHeader>();
227        &self.buf[start..start + self.packed_bits_len]
228    }
229
230    /// Number of outlier entries, computed via popcnt of `outlier_bitmask`.
231    #[inline]
232    pub fn outlier_count(&self) -> u32 {
233        self.header().outlier_bitmask.count_ones()
234    }
235
236    /// Return the outlier `(dim_index, value)` for the dimension at position
237    /// `slot` in the bitmask.
238    ///
239    /// `slot` is the dimension index (0–63).  Returns `None` if the bit for
240    /// `slot` is not set in `outlier_bitmask`, or if `slot ≥ 64`.
241    ///
242    /// Uses a branchless popcnt to find the dense offset into the outlier
243    /// payload.
244    pub fn outlier_at(&self, slot: u32) -> Option<(u32, f32)> {
245        if slot >= 64 {
246            return None;
247        }
248        let bitmask = self.header().outlier_bitmask;
249        if bitmask & (1u64 << slot) == 0 {
250            return None;
251        }
252        // Number of set bits below `slot` gives the dense array index.
253        let mask = bitmask & ((1u64 << slot).wrapping_sub(1));
254        let offset = mask.count_ones() as usize;
255
256        let header_bytes = core::mem::size_of::<QuantHeader>();
257        let base = header_bytes + self.packed_bits_len + offset * OUTLIER_ENTRY_BYTES;
258
259        let dim_idx = u32::from_le_bytes(self.buf[base..base + 4].try_into().ok()?);
260        let value = f32::from_le_bytes(self.buf[base + 4..base + 8].try_into().ok()?);
261        Some((dim_idx, value))
262    }
263
264    /// Full backing buffer suitable for direct I/O.
265    #[inline]
266    pub fn as_bytes(&self) -> &[u8] {
267        &self.buf
268    }
269}
270
271// ── UnifiedQuantizedVectorRef ───────────────────────────────────────────────
272
273/// Zero-copy borrowed view into a `UnifiedQuantizedVector` buffer.
274///
275/// Suitable for reads from the io_uring page cache — no allocation required.
276pub struct UnifiedQuantizedVectorRef<'a> {
277    buf: &'a [u8],
278    packed_bits_len: usize,
279}
280
281impl<'a> UnifiedQuantizedVectorRef<'a> {
282    /// Borrow a raw byte slice as a quantized vector view.
283    ///
284    /// # Errors
285    ///
286    /// Returns [`CodecError::LayoutError`] if the slice is shorter than 32 bytes
287    /// (minimum header size).
288    pub fn from_bytes(buf: &'a [u8], packed_bits_len: usize) -> Result<Self, CodecError> {
289        let header_bytes = core::mem::size_of::<QuantHeader>();
290        if buf.len() < header_bytes + packed_bits_len {
291            return Err(CodecError::LayoutError {
292                detail: format!(
293                    "buffer too short: need at least {} bytes, got {}",
294                    header_bytes + packed_bits_len,
295                    buf.len()
296                ),
297            });
298        }
299        Ok(Self {
300            buf,
301            packed_bits_len,
302        })
303    }
304
305    /// Zero-copy header reference.
306    #[inline]
307    pub fn header(&self) -> &QuantHeader {
308        let ptr = self.buf.as_ptr() as *const QuantHeader;
309        // SAFETY: validated in from_bytes that buf.len() >= 32.
310        unsafe { &*ptr }
311    }
312
313    /// Packed-bits slice.
314    #[inline]
315    pub fn packed_bits(&self) -> &[u8] {
316        let start = core::mem::size_of::<QuantHeader>();
317        &self.buf[start..start + self.packed_bits_len]
318    }
319
320    /// Number of outlier entries via popcnt.
321    #[inline]
322    pub fn outlier_count(&self) -> u32 {
323        self.header().outlier_bitmask.count_ones()
324    }
325
326    /// Outlier lookup — same semantics as [`UnifiedQuantizedVector::outlier_at`].
327    pub fn outlier_at(&self, slot: u32) -> Option<(u32, f32)> {
328        if slot >= 64 {
329            return None;
330        }
331        let bitmask = self.header().outlier_bitmask;
332        if bitmask & (1u64 << slot) == 0 {
333            return None;
334        }
335        let mask = bitmask & ((1u64 << slot).wrapping_sub(1));
336        let offset = mask.count_ones() as usize;
337
338        let header_bytes = core::mem::size_of::<QuantHeader>();
339        let base = header_bytes + self.packed_bits_len + offset * OUTLIER_ENTRY_BYTES;
340
341        let dim_idx = u32::from_le_bytes(self.buf[base..base + 4].try_into().ok()?);
342        let value = f32::from_le_bytes(self.buf[base + 4..base + 8].try_into().ok()?);
343        Some((dim_idx, value))
344    }
345}
346
347// ── Tests ───────────────────────────────────────────────────────────────────
348
349#[cfg(test)]
350mod tests {
351    use super::*;
352
353    fn make_header(mode: QuantMode, dim: u16, bitmask: u64) -> QuantHeader {
354        QuantHeader {
355            quant_mode: mode as u16,
356            dim,
357            global_scale: 1.5,
358            residual_norm: 0.25,
359            dot_quantized: 2.5,
360            outlier_bitmask: bitmask,
361            reserved: [0xAB; 8],
362        }
363    }
364
365    #[test]
366    fn header_is_32_bytes() {
367        // Also enforced by the const assert above; belt-and-suspenders.
368        assert_eq!(core::mem::size_of::<QuantHeader>(), 32);
369    }
370
371    #[test]
372    fn target_size_is_128_multiple() {
373        for mode in [
374            QuantMode::Binary,
375            QuantMode::RaBitQ,
376            QuantMode::TernarySimd,
377            QuantMode::TurboQuant4b,
378            QuantMode::Sq8,
379        ] {
380            for dim in [64u16, 128, 256, 512, 1536] {
381                for outliers in [0u32, 1, 8, 64] {
382                    let sz = target_size(mode, dim, outliers);
383                    assert_eq!(
384                        sz % 128,
385                        0,
386                        "target_size not 128-aligned for {mode:?}/{dim}/{outliers}"
387                    );
388                    assert!(
389                        sz >= 128,
390                        "target_size below minimum for {mode:?}/{dim}/{outliers}"
391                    );
392                }
393            }
394        }
395    }
396
397    #[test]
398    fn no_outliers_roundtrip() {
399        let header = make_header(QuantMode::Binary, 128, 0);
400        let packed = vec![0xFFu8; 16]; // 128 dims / 8 = 16 bytes
401        let vec = UnifiedQuantizedVector::new(header, &packed, &[]).unwrap();
402
403        assert_eq!(vec.outlier_count(), 0);
404        assert_eq!(vec.packed_bits(), packed.as_slice());
405        assert_eq!(vec.as_bytes().len() % 128, 0);
406    }
407
408    #[test]
409    fn one_outlier_roundtrip() {
410        // Bit 5 set → dim_index 5 is an outlier.
411        let bitmask: u64 = 1 << 5;
412        let header = make_header(QuantMode::Sq8, 64, bitmask);
413        let packed = vec![0u8; 64]; // 64 dims × 8bpw = 64 bytes
414        let outliers = [(5u32, 42.0f32)];
415        let vec = UnifiedQuantizedVector::new(header, &packed, &outliers).unwrap();
416
417        assert_eq!(vec.outlier_count(), 1);
418        let (dim, val) = vec.outlier_at(5).expect("bit 5 should be set");
419        assert_eq!(dim, 5);
420        assert!((val - 42.0).abs() < f32::EPSILON);
421        assert!(vec.outlier_at(0).is_none());
422        assert!(vec.outlier_at(6).is_none());
423    }
424
425    #[test]
426    fn eight_outliers_roundtrip() {
427        // Bits 0,3,7,12,20,33,50,63 set.
428        let bits: &[u32] = &[0, 3, 7, 12, 20, 33, 50, 63];
429        let mut bitmask: u64 = 0;
430        for &b in bits {
431            bitmask |= 1 << b;
432        }
433        let header = make_header(QuantMode::TurboQuant4b, 128, bitmask);
434        let packed = vec![0xAAu8; 64]; // 128 dims × 4bpw = 64 bytes
435        let outlier_list: Vec<(u32, f32)> = bits
436            .iter()
437            .enumerate()
438            .map(|(i, &b)| (b, i as f32 * 1.1))
439            .collect();
440        let vec = UnifiedQuantizedVector::new(header, &packed, &outlier_list).unwrap();
441
442        assert_eq!(vec.outlier_count(), 8);
443        for (i, &b) in bits.iter().enumerate() {
444            let (dim, val) = vec
445                .outlier_at(b)
446                .unwrap_or_else(|| panic!("outlier at {b} missing"));
447            assert_eq!(dim, b);
448            assert!(
449                (val - i as f32 * 1.1f32).abs() < 1e-5,
450                "value mismatch at dim {b}"
451            );
452        }
453    }
454
455    #[test]
456    fn as_bytes_reborrow_via_ref() {
457        let bitmask: u64 = 1 << 10;
458        let header = make_header(QuantMode::RaBitQ, 64, bitmask);
459        let packed = vec![0u8; 8]; // 64 dims / 8 = 8 bytes (1 bpw)
460        let outliers = [(10u32, 7.77f32)];
461        let vec = UnifiedQuantizedVector::new(header, &packed, &outliers).unwrap();
462
463        let bytes = vec.as_bytes();
464        let packed_bits_len = vec.packed_bits_len;
465        let vref = UnifiedQuantizedVectorRef::from_bytes(bytes, packed_bits_len).unwrap();
466
467        assert_eq!(vref.outlier_count(), 1);
468        let (dim, val) = vref.outlier_at(10).unwrap();
469        assert_eq!(dim, 10);
470        assert!((val - 7.77).abs() < 1e-5);
471    }
472
473    #[test]
474    fn header_field_roundtrip() {
475        let header = QuantHeader {
476            quant_mode: QuantMode::Bbq as u16,
477            dim: 512,
478            global_scale: 4.5,
479            residual_norm: 0.99,
480            dot_quantized: -1.23,
481            outlier_bitmask: 0xDEAD_BEEF_0000_0001,
482            reserved: [0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08],
483        };
484        let packed = vec![0u8; packed_bits_len(QuantMode::Bbq, 512)];
485
486        // Build bitmask consistent with header.
487        let bitmask = header.outlier_bitmask;
488        let count = bitmask.count_ones() as usize;
489        // Generate dummy outlier entries for each set bit (lowest bits first).
490        let mut outliers: Vec<(u32, f32)> = Vec::with_capacity(count);
491        for bit in 0u32..64 {
492            if bitmask & (1u64 << bit) != 0 {
493                outliers.push((bit, bit as f32));
494            }
495        }
496
497        let vec = UnifiedQuantizedVector::new(header, &packed, &outliers).unwrap();
498        let h = vec.header();
499
500        assert_eq!(h.quant_mode, QuantMode::Bbq as u16);
501        assert_eq!(h.dim, 512);
502        assert!((h.global_scale - 4.5).abs() < 1e-5);
503        assert!((h.residual_norm - 0.99).abs() < 1e-5);
504        assert!((h.dot_quantized - (-1.23)).abs() < 1e-5);
505        assert_eq!(h.outlier_bitmask, 0xDEAD_BEEF_0000_0001);
506        assert_eq!(h.reserved, [0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08]);
507    }
508
509    #[test]
510    fn outlier_ordering_popcnt() {
511        // Outliers at dims [3, 17, 40] — bit 17 should be the second entry.
512        let bitmask: u64 = (1 << 3) | (1 << 17) | (1 << 40);
513        let header = make_header(QuantMode::Sq8, 64, bitmask);
514        let packed = vec![0u8; 64];
515        let outliers = [(3u32, 100.0f32), (17u32, 200.0f32), (40u32, 300.0f32)];
516        let vec = UnifiedQuantizedVector::new(header, &packed, &outliers).unwrap();
517
518        // outlier_at(17) should return the second entry (value 200.0).
519        let (dim, val) = vec.outlier_at(17).expect("dim 17 should be an outlier");
520        assert_eq!(dim, 17);
521        assert!((val - 200.0).abs() < f32::EPSILON);
522
523        let (dim0, val0) = vec.outlier_at(3).expect("dim 3 should be an outlier");
524        assert_eq!(dim0, 3);
525        assert!((val0 - 100.0).abs() < f32::EPSILON);
526
527        let (dim2, val2) = vec.outlier_at(40).expect("dim 40 should be an outlier");
528        assert_eq!(dim2, 40);
529        assert!((val2 - 300.0).abs() < f32::EPSILON);
530    }
531
532    #[test]
533    fn out_of_range_slot_returns_none() {
534        let header = make_header(QuantMode::Binary, 64, 0);
535        let packed = vec![0u8; 8];
536        let vec = UnifiedQuantizedVector::new(header, &packed, &[]).unwrap();
537
538        assert!(vec.outlier_at(64).is_none(), "slot 64 is out of range");
539        assert!(vec.outlier_at(80).is_none(), "slot 80 is out of range");
540        assert!(
541            vec.outlier_at(u32::MAX).is_none(),
542            "slot u32::MAX is out of range"
543        );
544    }
545
546    #[test]
547    fn outlier_count_mismatch_is_error() {
548        // Bitmask says 1 outlier but we provide 0.
549        let bitmask: u64 = 1 << 2;
550        let header = make_header(QuantMode::Binary, 64, bitmask);
551        let packed = vec![0u8; 8];
552        let err = UnifiedQuantizedVector::new(header, &packed, &[]);
553        assert!(
554            err.is_err(),
555            "should fail when outlier count mismatches bitmask"
556        );
557    }
558}