Skip to main content

tinyquant_core/codec/
compressed_vector.rs

1//! In-memory `CompressedVector` value object (Phase 15).
2//!
3//! Serialization (`to_bytes` / `from_bytes`) arrives in Phase 16 inside
4//! `tinyquant-io`. Field layout is intentionally stable for that transition.
5//!
6//! One byte per dimension in `indices` (upper bits zero for `bit_width < 8`);
7//! when present, `residual` is exactly `2 * dimension` bytes of f16 LE.
8
9use crate::codec::codec_config::SUPPORTED_BIT_WIDTHS;
10use crate::errors::CodecError;
11use crate::types::ConfigHash;
12use alloc::boxed::Box;
13use alloc::sync::Arc;
14
15/// Immutable per-vector compressed payload. Mirrors Python `CompressedVector`.
16///
17/// # Invariants
18///
19/// - `indices.len() == dimension as usize`
20/// - `bit_width ∈ {2, 4, 8}`
21/// - If `residual.is_some()`, `residual.len() == dimension as usize * 2`
22#[derive(Clone, Debug)]
23pub struct CompressedVector {
24    indices: Arc<[u8]>,
25    residual: Option<Arc<[u8]>>,
26    config_hash: ConfigHash,
27    dimension: u32,
28    bit_width: u8,
29}
30
31impl CompressedVector {
32    /// Validated constructor. Mirrors Python `CompressedVector.__post_init__`.
33    ///
34    /// # Errors
35    ///
36    /// - [`CodecError::UnsupportedBitWidth`] if `bit_width ∉ {2, 4, 8}`
37    /// - [`CodecError::DimensionMismatch`] if `indices.len() != dimension`
38    /// - [`CodecError::LengthMismatch`] if `residual.len() != dimension * 2`
39    pub fn new(
40        indices: Box<[u8]>,
41        residual: Option<Box<[u8]>>,
42        config_hash: ConfigHash,
43        dimension: u32,
44        bit_width: u8,
45    ) -> Result<Self, CodecError> {
46        if !SUPPORTED_BIT_WIDTHS.contains(&bit_width) {
47            return Err(CodecError::UnsupportedBitWidth { got: bit_width });
48        }
49        // Dimensions are bounded by u32 in CodecConfig; casting is safe in practice
50        // and checked by the mismatch error path below.
51        #[allow(clippy::cast_possible_truncation)]
52        let got_dim = indices.len() as u32;
53        if got_dim != dimension {
54            return Err(CodecError::DimensionMismatch {
55                expected: dimension,
56                got: got_dim,
57            });
58        }
59        if let Some(r) = residual.as_ref() {
60            let expected = indices.len() * 2;
61            if r.len() != expected {
62                return Err(CodecError::LengthMismatch {
63                    left: r.len(),
64                    right: expected,
65                });
66            }
67        }
68        Ok(Self {
69            indices: Arc::from(indices),
70            residual: residual.map(Arc::from),
71            config_hash,
72            dimension,
73            bit_width,
74        })
75    }
76
77    /// Raw index bytes — one byte per dimension (upper bits zero for `bit_width < 8`).
78    #[inline]
79    #[must_use]
80    pub fn indices(&self) -> &[u8] {
81        &self.indices
82    }
83
84    /// Optional f16 LE residual bytes — `2 * dimension` bytes when present.
85    #[inline]
86    #[must_use]
87    pub fn residual(&self) -> Option<&[u8]> {
88        self.residual.as_deref()
89    }
90
91    /// The `config_hash` of the [`CodecConfig`](crate::codec::CodecConfig) used to compress this vector.
92    #[inline]
93    #[must_use]
94    pub const fn config_hash(&self) -> &ConfigHash {
95        &self.config_hash
96    }
97
98    /// Number of dimensions (= `indices.len()`).
99    #[inline]
100    #[must_use]
101    pub const fn dimension(&self) -> u32 {
102        self.dimension
103    }
104
105    /// Bit width used during compression.
106    #[inline]
107    #[must_use]
108    pub const fn bit_width(&self) -> u8 {
109        self.bit_width
110    }
111
112    /// `true` when a residual buffer is present.
113    #[inline]
114    #[must_use]
115    // Option::is_some() is not const-stable in MSRV 1.81.
116    #[allow(clippy::missing_const_for_fn)]
117    pub fn has_residual(&self) -> bool {
118        self.residual.is_some()
119    }
120
121    /// Python-parity size estimate: `ceil(dim * bit_width / 8) + residual.len()`.
122    ///
123    /// This is the *packed* on-disk footprint, not the in-memory footprint
124    /// (indices are stored unpacked, 1 byte/dim, until Phase 16).
125    #[must_use]
126    pub fn size_bytes(&self) -> usize {
127        let dim = self.dimension as usize;
128        let bw = self.bit_width as usize;
129        let packed = (dim * bw + 7) / 8;
130        packed + self.residual.as_ref().map_or(0, |r| r.len())
131    }
132}