tinyquant_core/codec/codec_config.rs
1//! `CodecConfig`: immutable value object describing codec parameters.
2//!
3//! Mirrors `tinyquant_cpu.codec.codec_config.CodecConfig` field-for-field.
4//! The canonical `config_hash` format is a SHA-256 digest of a fixed
5//! canonical string and matches the Python reference byte-for-byte so
6//! artifacts produced by either implementation are interchangeable.
7//!
8//! The canonical string format is:
9//!
10//! ```text
11//! CodecConfig(bit_width={b},seed={s},dimension={d},residual_enabled={r})
12//! ```
13//!
14//! where `{r}` stringifies the bool as Python's `str(bool)` spelling —
15//! `"True"` or `"False"` (capitalized). Deviating from this in any way
16//! (spaces, case, field order) breaks parity.
17
18use alloc::format;
19use alloc::sync::Arc;
20
21use sha2::{Digest, Sha256};
22
23use crate::errors::CodecError;
24use crate::types::ConfigHash;
25
26/// The complete set of quantization bit widths supported by `TinyQuant`.
27///
28/// Mirrors `tinyquant_cpu.codec.codec_config.SUPPORTED_BIT_WIDTHS`.
29pub const SUPPORTED_BIT_WIDTHS: &[u8] = &[2, 4, 8];
30
31/// Immutable configuration snapshot that fully determines codec behavior.
32///
33/// Two configs with identical primary fields are interchangeable. The
34/// cached `config_hash` is computed eagerly in [`CodecConfig::new`] and
35/// ignored by [`PartialEq`] / [`Hash`] so semantically equal configs
36/// compare equal regardless of which instance owns the `Arc<str>`.
37#[derive(Clone, Debug)]
38pub struct CodecConfig {
39 bit_width: u8,
40 seed: u64,
41 dimension: u32,
42 residual_enabled: bool,
43 config_hash: ConfigHash,
44}
45
46impl CodecConfig {
47 /// Validate the field invariants and return a new `CodecConfig`.
48 ///
49 /// # Errors
50 ///
51 /// * [`CodecError::UnsupportedBitWidth`] — `bit_width` is not in
52 /// [`SUPPORTED_BIT_WIDTHS`].
53 /// * [`CodecError::InvalidDimension`] — `dimension == 0`.
54 pub fn new(
55 bit_width: u8,
56 seed: u64,
57 dimension: u32,
58 residual_enabled: bool,
59 ) -> Result<Self, CodecError> {
60 if !SUPPORTED_BIT_WIDTHS.contains(&bit_width) {
61 return Err(CodecError::UnsupportedBitWidth { got: bit_width });
62 }
63 if dimension == 0 {
64 return Err(CodecError::InvalidDimension { got: 0 });
65 }
66 let config_hash = compute_config_hash(bit_width, seed, dimension, residual_enabled);
67 Ok(Self {
68 bit_width,
69 seed,
70 dimension,
71 residual_enabled,
72 config_hash,
73 })
74 }
75
76 /// The bit width of the quantized indices.
77 #[inline]
78 pub const fn bit_width(&self) -> u8 {
79 self.bit_width
80 }
81
82 /// The seed used for deterministic rotation and codebook generation.
83 #[inline]
84 pub const fn seed(&self) -> u64 {
85 self.seed
86 }
87
88 /// The expected input vector dimensionality.
89 #[inline]
90 pub const fn dimension(&self) -> u32 {
91 self.dimension
92 }
93
94 /// Whether stage-2 residual correction is enabled.
95 #[inline]
96 pub const fn residual_enabled(&self) -> bool {
97 self.residual_enabled
98 }
99
100 /// `2^bit_width` — the number of quantization levels in the codebook.
101 #[inline]
102 pub const fn num_codebook_entries(&self) -> u32 {
103 1u32 << self.bit_width
104 }
105
106 /// Cached SHA-256 hex digest of the canonical string representation.
107 ///
108 /// Returned as an `Arc<str>` borrow; clone with `.clone()` for owned use.
109 #[inline]
110 pub const fn config_hash(&self) -> &ConfigHash {
111 &self.config_hash
112 }
113}
114
115impl PartialEq for CodecConfig {
116 fn eq(&self, other: &Self) -> bool {
117 self.bit_width == other.bit_width
118 && self.seed == other.seed
119 && self.dimension == other.dimension
120 && self.residual_enabled == other.residual_enabled
121 }
122}
123
124impl Eq for CodecConfig {}
125
126impl core::hash::Hash for CodecConfig {
127 fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
128 self.bit_width.hash(state);
129 self.seed.hash(state);
130 self.dimension.hash(state);
131 self.residual_enabled.hash(state);
132 }
133}
134
135/// Compute the canonical SHA-256 `config_hash` for the given field tuple.
136///
137/// Exposed at module scope (rather than as an associated function) so that
138/// tests can verify hash computation without constructing a full config.
139/// Kept `pub(crate)` because nothing outside the crate should need to
140/// compute a hash without going through [`CodecConfig::new`].
141pub(crate) fn compute_config_hash(
142 bit_width: u8,
143 seed: u64,
144 dimension: u32,
145 residual_enabled: bool,
146) -> ConfigHash {
147 // CRITICAL: Python bool stringifies as "True" / "False" (capitalized).
148 // See scripts/generate_rust_fixtures.py and the Python reference in
149 // src/tinyquant_cpu/codec/codec_config.py.
150 let canonical = format!(
151 "CodecConfig(bit_width={b},seed={s},dimension={d},residual_enabled={r})",
152 b = bit_width,
153 s = seed,
154 d = dimension,
155 r = if residual_enabled { "True" } else { "False" },
156 );
157 let digest = Sha256::digest(canonical.as_bytes());
158 Arc::from(hex::encode(digest).as_str())
159}