Skip to main content

tinyquant_core/codec/
codec_config.rs

1//! `CodecConfig`: immutable value object describing codec parameters.
2//!
3//! Mirrors `tinyquant_cpu.codec.codec_config.CodecConfig` field-for-field.
4//! The canonical `config_hash` format is a SHA-256 digest of a fixed
5//! canonical string and matches the Python reference byte-for-byte so
6//! artifacts produced by either implementation are interchangeable.
7//!
8//! The canonical string format is:
9//!
10//! ```text
11//! CodecConfig(bit_width={b},seed={s},dimension={d},residual_enabled={r})
12//! ```
13//!
14//! where `{r}` stringifies the bool as Python's `str(bool)` spelling —
15//! `"True"` or `"False"` (capitalized). Deviating from this in any way
16//! (spaces, case, field order) breaks parity.
17
18use alloc::format;
19use alloc::sync::Arc;
20
21use sha2::{Digest, Sha256};
22
23use crate::errors::CodecError;
24use crate::types::ConfigHash;
25
26/// The complete set of quantization bit widths supported by `TinyQuant`.
27///
28/// Mirrors `tinyquant_cpu.codec.codec_config.SUPPORTED_BIT_WIDTHS`.
29pub const SUPPORTED_BIT_WIDTHS: &[u8] = &[2, 4, 8];
30
31/// Immutable configuration snapshot that fully determines codec behavior.
32///
33/// Two configs with identical primary fields are interchangeable. The
34/// cached `config_hash` is computed eagerly in [`CodecConfig::new`] and
35/// ignored by [`PartialEq`] / [`Hash`] so semantically equal configs
36/// compare equal regardless of which instance owns the `Arc<str>`.
37#[derive(Clone, Debug)]
38pub struct CodecConfig {
39    bit_width: u8,
40    seed: u64,
41    dimension: u32,
42    residual_enabled: bool,
43    config_hash: ConfigHash,
44}
45
46impl CodecConfig {
47    /// Validate the field invariants and return a new `CodecConfig`.
48    ///
49    /// # Errors
50    ///
51    /// * [`CodecError::UnsupportedBitWidth`] — `bit_width` is not in
52    ///   [`SUPPORTED_BIT_WIDTHS`].
53    /// * [`CodecError::InvalidDimension`] — `dimension == 0`.
54    pub fn new(
55        bit_width: u8,
56        seed: u64,
57        dimension: u32,
58        residual_enabled: bool,
59    ) -> Result<Self, CodecError> {
60        if !SUPPORTED_BIT_WIDTHS.contains(&bit_width) {
61            return Err(CodecError::UnsupportedBitWidth { got: bit_width });
62        }
63        if dimension == 0 {
64            return Err(CodecError::InvalidDimension { got: 0 });
65        }
66        let config_hash = compute_config_hash(bit_width, seed, dimension, residual_enabled);
67        Ok(Self {
68            bit_width,
69            seed,
70            dimension,
71            residual_enabled,
72            config_hash,
73        })
74    }
75
76    /// The bit width of the quantized indices.
77    #[inline]
78    pub const fn bit_width(&self) -> u8 {
79        self.bit_width
80    }
81
82    /// The seed used for deterministic rotation and codebook generation.
83    #[inline]
84    pub const fn seed(&self) -> u64 {
85        self.seed
86    }
87
88    /// The expected input vector dimensionality.
89    #[inline]
90    pub const fn dimension(&self) -> u32 {
91        self.dimension
92    }
93
94    /// Whether stage-2 residual correction is enabled.
95    #[inline]
96    pub const fn residual_enabled(&self) -> bool {
97        self.residual_enabled
98    }
99
100    /// `2^bit_width` — the number of quantization levels in the codebook.
101    #[inline]
102    pub const fn num_codebook_entries(&self) -> u32 {
103        1u32 << self.bit_width
104    }
105
106    /// Cached SHA-256 hex digest of the canonical string representation.
107    ///
108    /// Returned as an `Arc<str>` borrow; clone with `.clone()` for owned use.
109    #[inline]
110    pub const fn config_hash(&self) -> &ConfigHash {
111        &self.config_hash
112    }
113}
114
115impl PartialEq for CodecConfig {
116    fn eq(&self, other: &Self) -> bool {
117        self.bit_width == other.bit_width
118            && self.seed == other.seed
119            && self.dimension == other.dimension
120            && self.residual_enabled == other.residual_enabled
121    }
122}
123
124impl Eq for CodecConfig {}
125
126impl core::hash::Hash for CodecConfig {
127    fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
128        self.bit_width.hash(state);
129        self.seed.hash(state);
130        self.dimension.hash(state);
131        self.residual_enabled.hash(state);
132    }
133}
134
135/// Compute the canonical SHA-256 `config_hash` for the given field tuple.
136///
137/// Exposed at module scope (rather than as an associated function) so that
138/// tests can verify hash computation without constructing a full config.
139/// Kept `pub(crate)` because nothing outside the crate should need to
140/// compute a hash without going through [`CodecConfig::new`].
141pub(crate) fn compute_config_hash(
142    bit_width: u8,
143    seed: u64,
144    dimension: u32,
145    residual_enabled: bool,
146) -> ConfigHash {
147    // CRITICAL: Python bool stringifies as "True" / "False" (capitalized).
148    // See scripts/generate_rust_fixtures.py and the Python reference in
149    // src/tinyquant_cpu/codec/codec_config.py.
150    let canonical = format!(
151        "CodecConfig(bit_width={b},seed={s},dimension={d},residual_enabled={r})",
152        b = bit_width,
153        s = seed,
154        d = dimension,
155        r = if residual_enabled { "True" } else { "False" },
156    );
157    let digest = Sha256::digest(canonical.as_bytes());
158    Arc::from(hex::encode(digest).as_str())
159}