Skip to main content

tinyquant_io/compressed_vector/
to_bytes.rs

1//! Encode a `CompressedVector` to its binary wire format.
2
3use crate::compressed_vector::{header::encode_header, pack::pack_indices};
4use tinyquant_core::codec::CompressedVector;
5
6/// Serialize `cv` to a `Vec<u8>` in the Level-1 wire format.
7///
8/// Layout: 70-byte header | packed indices | `residual_flag` | [`residual_length` u32 LE | residual bytes]
9pub fn to_bytes(cv: &CompressedVector) -> Vec<u8> {
10    let dim = cv.dimension() as usize;
11    let bw = cv.bit_width();
12    let packed_len = (dim * bw as usize + 7) / 8;
13    // 1 flag + optional (4 length bytes + residual data)
14    let residual_overhead = cv.residual().map_or(0, |r| 5 + r.len());
15    let total = 70 + packed_len + 1 + residual_overhead;
16
17    let mut out = Vec::with_capacity(total);
18    encode_header(&mut out, cv.config_hash(), cv.dimension(), bw);
19
20    // Pack indices
21    let mut packed = vec![0u8; packed_len];
22    pack_indices(cv.indices(), bw, &mut packed);
23    out.extend_from_slice(&packed);
24
25    // Residual section
26    match cv.residual() {
27        None => out.push(0x00),
28        Some(r) => {
29            out.push(0x01);
30            // dim is u32; residual len = 2*dim fits in u32 only when dim <= 2^31.
31            // Embedding dimensions in practice are <4096, so truncation cannot occur.
32            #[allow(clippy::cast_possible_truncation)]
33            let rlen = r.len() as u32;
34            out.extend_from_slice(&rlen.to_le_bytes());
35            out.extend_from_slice(r);
36        }
37    }
38    out
39}