Skip to main content

tinyquant_io/compressed_vector/
header.rs

1//! Header encoding/decoding for the `CompressedVector` binary format.
2//!
3//! Header layout (70 bytes, all fields little-endian):
4//!
5//! | offset | length | field       | encoding          |
6//! |-------:|-------:|-------------|-------------------|
7//! |      0 |      1 | version     | u8 = 0x01         |
8//! |      1 |     64 | config_hash | UTF-8, NUL-padded |
9//! |     65 |      4 | dimension   | u32 LE            |
10//! |     69 |      1 | bit_width   | u8                |
11
12use crate::errors::IoError;
13
14pub const FORMAT_VERSION: u8 = 0x01;
15pub const HASH_BYTES: usize = 64;
16/// Total header size in bytes.
17pub const HEADER_SIZE: usize = 70; // 1 + 64 + 4 + 1
18
19/// Append a 70-byte header to `out`.
20pub fn encode_header(out: &mut Vec<u8>, config_hash: &str, dimension: u32, bit_width: u8) {
21    out.push(FORMAT_VERSION);
22    let mut buf = [0u8; HASH_BYTES];
23    let src = config_hash.as_bytes();
24    let n = src.len().min(HASH_BYTES);
25    #[allow(clippy::indexing_slicing)]
26    // n <= HASH_BYTES by construction; buf is exactly HASH_BYTES
27    buf[..n].copy_from_slice(&src[..n]);
28    out.extend_from_slice(&buf);
29    out.extend_from_slice(&dimension.to_le_bytes());
30    out.push(bit_width);
31}
32
33/// Decode the header from the start of `data`.
34///
35/// Returns `(version, config_hash, dimension, bit_width)`.
36pub fn decode_header(data: &[u8]) -> Result<(u8, &str, u32, u8), IoError> {
37    if data.len() < HEADER_SIZE {
38        return Err(IoError::Truncated {
39            needed: HEADER_SIZE,
40            got: data.len(),
41        });
42    }
43    // Safety: bounds-checked above; index 0 is always valid
44    #[allow(clippy::indexing_slicing)]
45    let version = data[0];
46
47    let hash_raw = data.get(1..65).ok_or(IoError::Truncated {
48        needed: HEADER_SIZE,
49        got: data.len(),
50    })?;
51    let trim = hash_raw.iter().rposition(|&b| b != 0).map_or(0, |i| i + 1);
52    let config_hash = core::str::from_utf8(hash_raw.get(..trim).unwrap_or(hash_raw))
53        .map_err(|_| IoError::InvalidUtf8)?;
54
55    let dim_bytes: [u8; 4] = data
56        .get(65..69)
57        .ok_or(IoError::Truncated {
58            needed: HEADER_SIZE,
59            got: data.len(),
60        })?
61        .try_into()
62        .map_err(|_| IoError::Truncated {
63            needed: HEADER_SIZE,
64            got: data.len(),
65        })?;
66    let dimension = u32::from_le_bytes(dim_bytes);
67
68    let bit_width = data.get(69).copied().ok_or(IoError::Truncated {
69        needed: HEADER_SIZE,
70        got: data.len(),
71    })?;
72
73    Ok((version, config_hash, dimension, bit_width))
74}