tinyquant_core/codec/compressed_vector.rs
1//! In-memory `CompressedVector` value object (Phase 15).
2//!
3//! Serialization (`to_bytes` / `from_bytes`) arrives in Phase 16 inside
4//! `tinyquant-io`. Field layout is intentionally stable for that transition.
5//!
6//! One byte per dimension in `indices` (upper bits zero for `bit_width < 8`);
7//! when present, `residual` is exactly `2 * dimension` bytes of f16 LE.
8
9use crate::codec::codec_config::SUPPORTED_BIT_WIDTHS;
10use crate::errors::CodecError;
11use crate::types::ConfigHash;
12use alloc::boxed::Box;
13use alloc::sync::Arc;
14
15/// Immutable per-vector compressed payload. Mirrors Python `CompressedVector`.
16///
17/// # Invariants
18///
19/// - `indices.len() == dimension as usize`
20/// - `bit_width ∈ {2, 4, 8}`
21/// - If `residual.is_some()`, `residual.len() == dimension as usize * 2`
22#[derive(Clone, Debug)]
23pub struct CompressedVector {
24 indices: Arc<[u8]>,
25 residual: Option<Arc<[u8]>>,
26 config_hash: ConfigHash,
27 dimension: u32,
28 bit_width: u8,
29}
30
31impl CompressedVector {
32 /// Validated constructor. Mirrors Python `CompressedVector.__post_init__`.
33 ///
34 /// # Errors
35 ///
36 /// - [`CodecError::UnsupportedBitWidth`] if `bit_width ∉ {2, 4, 8}`
37 /// - [`CodecError::DimensionMismatch`] if `indices.len() != dimension`
38 /// - [`CodecError::LengthMismatch`] if `residual.len() != dimension * 2`
39 pub fn new(
40 indices: Box<[u8]>,
41 residual: Option<Box<[u8]>>,
42 config_hash: ConfigHash,
43 dimension: u32,
44 bit_width: u8,
45 ) -> Result<Self, CodecError> {
46 if !SUPPORTED_BIT_WIDTHS.contains(&bit_width) {
47 return Err(CodecError::UnsupportedBitWidth { got: bit_width });
48 }
49 // Dimensions are bounded by u32 in CodecConfig; casting is safe in practice
50 // and checked by the mismatch error path below.
51 #[allow(clippy::cast_possible_truncation)]
52 let got_dim = indices.len() as u32;
53 if got_dim != dimension {
54 return Err(CodecError::DimensionMismatch {
55 expected: dimension,
56 got: got_dim,
57 });
58 }
59 if let Some(r) = residual.as_ref() {
60 let expected = indices.len() * 2;
61 if r.len() != expected {
62 return Err(CodecError::LengthMismatch {
63 left: r.len(),
64 right: expected,
65 });
66 }
67 }
68 Ok(Self {
69 indices: Arc::from(indices),
70 residual: residual.map(Arc::from),
71 config_hash,
72 dimension,
73 bit_width,
74 })
75 }
76
77 /// Raw index bytes — one byte per dimension (upper bits zero for `bit_width < 8`).
78 #[inline]
79 #[must_use]
80 pub fn indices(&self) -> &[u8] {
81 &self.indices
82 }
83
84 /// Optional f16 LE residual bytes — `2 * dimension` bytes when present.
85 #[inline]
86 #[must_use]
87 pub fn residual(&self) -> Option<&[u8]> {
88 self.residual.as_deref()
89 }
90
91 /// The `config_hash` of the [`CodecConfig`](crate::codec::CodecConfig) used to compress this vector.
92 #[inline]
93 #[must_use]
94 pub const fn config_hash(&self) -> &ConfigHash {
95 &self.config_hash
96 }
97
98 /// Number of dimensions (= `indices.len()`).
99 #[inline]
100 #[must_use]
101 pub const fn dimension(&self) -> u32 {
102 self.dimension
103 }
104
105 /// Bit width used during compression.
106 #[inline]
107 #[must_use]
108 pub const fn bit_width(&self) -> u8 {
109 self.bit_width
110 }
111
112 /// `true` when a residual buffer is present.
113 #[inline]
114 #[must_use]
115 // Option::is_some() is not const-stable in MSRV 1.81.
116 #[allow(clippy::missing_const_for_fn)]
117 pub fn has_residual(&self) -> bool {
118 self.residual.is_some()
119 }
120
121 /// Python-parity size estimate: `ceil(dim * bit_width / 8) + residual.len()`.
122 ///
123 /// This is the *packed* on-disk footprint, not the in-memory footprint
124 /// (indices are stored unpacked, 1 byte/dim, until Phase 16).
125 #[must_use]
126 pub fn size_bytes(&self) -> usize {
127 let dim = self.dimension as usize;
128 let bw = self.bit_width as usize;
129 let packed = (dim * bw + 7) / 8;
130 packed + self.residual.as_ref().map_or(0, |r| r.len())
131 }
132}