Skip to main content

semantic_memory/
vector_codec.rs

1//! Vector codec profile and artifact boundary.
2//!
3//! Codecs are derived acceleration/transport helpers only. SQLite memory rows
4//! and raw f32 embeddings remain authoritative and every artifact must carry a
5//! profile digest that can be checked before decoding.
6
7use crate::{db, quantize, MemoryError};
8use serde::{Deserialize, Serialize};
9use stack_ids::{ContentDigest, DigestBuilder};
10
11const PROFILE_SCHEMA_V1: &str = "vector_codec_profile_v1";
12const ARTIFACT_SCHEMA_V1: &str = "vector_artifact_v1";
13
14fn b3_digest(bytes: &[u8]) -> String {
15    format!("blake3:{}", ContentDigest::compute(bytes).hex())
16}
17
18fn b3_json_digest<T: Serialize>(domain: &str, value: &T) -> String {
19    let mut builder = DigestBuilder::new();
20    builder.update_str(domain).separator();
21    match builder.update_json(value) {
22        Ok(_) => format!("blake3:{}", builder.finalize().hex()),
23        Err(_) => b3_digest(format!("{domain}:digest-fallback").as_bytes()),
24    }
25}
26
27fn dim_u32(dim: usize) -> Result<u32, MemoryError> {
28    u32::try_from(dim).map_err(|_| MemoryError::InvalidConfig {
29        field: "embedding.dimensions",
30        reason: format!("dimension {dim} does not fit vector codec profile u32"),
31    })
32}
33
34fn dim_usize(dim: u32) -> usize {
35    dim as usize
36}
37
38/// Stable identity for a vector codec configuration.
39#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
40pub struct VectorCodecProfileV1 {
41    /// Stable schema marker for this profile format.
42    pub schema_version: String,
43    /// Codec implementation identifier.
44    pub codec: String,
45    /// Vector dimensionality. Persisted as fixed-width `u32`, never `usize`.
46    pub dim: u32,
47    /// Effective encoded bits per scalar, when applicable.
48    pub bits: u8,
49    /// Number of projection vectors, reserved for future derived codecs.
50    pub projections: Option<u32>,
51    /// Codec seed, reserved for randomized derived codecs.
52    pub seed: Option<u64>,
53    /// Codec wire/algorithm version.
54    pub codec_version: String,
55    /// Declared scoring semantics for decoded/reference comparison.
56    pub scoring_semantics: String,
57    /// Declared vector normalization contract.
58    pub normalization: String,
59}
60
61impl VectorCodecProfileV1 {
62    /// Build the reference raw f32 profile.
63    pub fn raw_f32(dim: usize) -> Result<Self, MemoryError> {
64        Ok(Self {
65            schema_version: PROFILE_SCHEMA_V1.into(),
66            codec: "raw_f32".into(),
67            dim: dim_u32(dim)?,
68            bits: 32,
69            projections: None,
70            seed: None,
71            codec_version: "1".into(),
72            scoring_semantics: "cosine_on_decoded_f32".into(),
73            normalization: "caller_supplied".into(),
74        })
75    }
76
77    /// Build the existing per-vector scalar quantization profile.
78    pub fn sq8(dim: usize) -> Result<Self, MemoryError> {
79        Ok(Self {
80            schema_version: PROFILE_SCHEMA_V1.into(),
81            codec: "sq8".into(),
82            dim: dim_u32(dim)?,
83            bits: 8,
84            projections: None,
85            seed: None,
86            codec_version: "1".into(),
87            scoring_semantics: "cosine_on_dequantized_f32".into(),
88            normalization: "caller_supplied".into(),
89        })
90    }
91
92    /// Build a TurboQuant profile.
93    #[cfg(feature = "turbo-quant-codec")]
94    pub fn turbo_quant(
95        dim: usize,
96        bits: u8,
97        projections: usize,
98        seed: u64,
99    ) -> Result<Self, MemoryError> {
100        Ok(Self {
101            schema_version: PROFILE_SCHEMA_V1.into(),
102            codec: "turbo_quant".into(),
103            dim: dim_u32(dim)?,
104            bits,
105            projections: Some(dim_u32(projections)?),
106            seed: Some(seed),
107            codec_version: "turbo-quant:0.2.0-alpha.1".into(),
108            scoring_semantics: "inner_product_estimate".into(),
109            normalization: "caller_supplied".into(),
110        })
111    }
112
113    /// Stable digest over the explicit profile fields.
114    pub fn digest(&self) -> String {
115        b3_json_digest(PROFILE_SCHEMA_V1, self)
116    }
117}
118
119/// Persistable encoded vector plus the profile identity required to decode it.
120#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
121pub struct VectorArtifactV1 {
122    /// Stable schema marker for this artifact format.
123    pub schema_version: String,
124    /// Full codec profile used to produce `encoded`.
125    pub profile: VectorCodecProfileV1,
126    /// Digest of `profile`; checked before decode.
127    pub profile_digest: String,
128    /// Digest of `encoded`; checked before decode when present.
129    #[serde(default)]
130    pub artifact_digest: String,
131    /// Codec-owned encoded bytes.
132    pub encoded: Vec<u8>,
133}
134
135impl VectorArtifactV1 {
136    /// Construct a new artifact and stamp the profile digest.
137    pub fn new(profile: VectorCodecProfileV1, encoded: Vec<u8>) -> Self {
138        let profile_digest = profile.digest();
139        let artifact_digest = b3_digest(&encoded);
140        Self {
141            schema_version: ARTIFACT_SCHEMA_V1.into(),
142            profile,
143            profile_digest,
144            artifact_digest,
145            encoded,
146        }
147    }
148
149    /// Stable digest over encoded artifact bytes.
150    pub fn encoded_digest(&self) -> String {
151        b3_digest(&self.encoded)
152    }
153}
154
155/// Object-safe vector codec boundary for derived vector artifacts.
156pub trait VectorCodec: Send + Sync {
157    /// Codec profile identity.
158    fn profile(&self) -> &VectorCodecProfileV1;
159
160    /// Encode a raw f32 vector into a byte artifact.
161    fn encode(&self, vector: &[f32]) -> Result<VectorArtifactV1, MemoryError>;
162
163    /// Decode an artifact back to f32 for reference scoring or differential tests.
164    fn decode(&self, artifact: &VectorArtifactV1) -> Result<Vec<f32>, MemoryError>;
165}
166
167fn validate_artifact_profile(
168    expected: &VectorCodecProfileV1,
169    artifact: &VectorArtifactV1,
170) -> Result<(), MemoryError> {
171    let artifact_profile_digest = artifact.profile.digest();
172    if artifact.profile_digest != artifact_profile_digest {
173        return Err(MemoryError::VectorCodecProfileMismatch {
174            expected_digest: artifact_profile_digest,
175            actual_digest: artifact.profile_digest.clone(),
176        });
177    }
178
179    let expected_digest = expected.digest();
180    if artifact.profile_digest != expected_digest {
181        return Err(MemoryError::VectorCodecProfileMismatch {
182            expected_digest,
183            actual_digest: artifact.profile_digest.clone(),
184        });
185    }
186
187    let encoded_digest = artifact.encoded_digest();
188    if !artifact.artifact_digest.is_empty() && artifact.artifact_digest != encoded_digest {
189        return Err(MemoryError::CorruptData {
190            table: "vector_artifacts",
191            row_id: artifact.profile_digest.clone(),
192            detail: format!(
193                "encoded artifact digest mismatch: expected {}, got {}",
194                artifact.artifact_digest, encoded_digest
195            ),
196        });
197    }
198
199    Ok(())
200}
201
202/// Reference codec that stores raw little-endian f32 bytes.
203#[derive(Debug, Clone)]
204pub struct RawF32Codec {
205    profile: VectorCodecProfileV1,
206}
207
208impl RawF32Codec {
209    /// Create a raw f32 codec for `dim` dimensions.
210    pub fn new(dim: usize) -> Result<Self, MemoryError> {
211        Ok(Self {
212            profile: VectorCodecProfileV1::raw_f32(dim)?,
213        })
214    }
215}
216
217impl VectorCodec for RawF32Codec {
218    fn profile(&self) -> &VectorCodecProfileV1 {
219        &self.profile
220    }
221
222    fn encode(&self, vector: &[f32]) -> Result<VectorArtifactV1, MemoryError> {
223        db::validate_embedding(vector, dim_usize(self.profile.dim))?;
224        Ok(VectorArtifactV1::new(
225            self.profile.clone(),
226            db::encode_f32_le(vector),
227        ))
228    }
229
230    fn decode(&self, artifact: &VectorArtifactV1) -> Result<Vec<f32>, MemoryError> {
231        validate_artifact_profile(&self.profile, artifact)?;
232        db::decode_f32_le(&artifact.encoded, dim_usize(self.profile.dim))
233    }
234}
235
236/// Codec wrapper around the existing per-vector SQ8 quantization path.
237#[derive(Debug, Clone)]
238pub struct Sq8Codec {
239    profile: VectorCodecProfileV1,
240}
241
242impl Sq8Codec {
243    /// Create an SQ8 codec for `dim` dimensions.
244    pub fn new(dim: usize) -> Result<Self, MemoryError> {
245        Ok(Self {
246            profile: VectorCodecProfileV1::sq8(dim)?,
247        })
248    }
249}
250
251impl VectorCodec for Sq8Codec {
252    fn profile(&self) -> &VectorCodecProfileV1 {
253        &self.profile
254    }
255
256    fn encode(&self, vector: &[f32]) -> Result<VectorArtifactV1, MemoryError> {
257        db::validate_embedding(vector, dim_usize(self.profile.dim))?;
258        let quantized = quantize::Quantizer::new(dim_usize(self.profile.dim)).quantize(vector)?;
259        Ok(VectorArtifactV1::new(
260            self.profile.clone(),
261            quantize::pack_quantized(&quantized),
262        ))
263    }
264
265    fn decode(&self, artifact: &VectorArtifactV1) -> Result<Vec<f32>, MemoryError> {
266        validate_artifact_profile(&self.profile, artifact)?;
267        let quantized = quantize::unpack_quantized(&artifact.encoded, dim_usize(self.profile.dim))?;
268        let decoded = quantize::Quantizer::new(dim_usize(self.profile.dim)).dequantize(&quantized);
269        db::validate_embedding(&decoded, dim_usize(self.profile.dim))?;
270        Ok(decoded)
271    }
272}
273
274#[cfg(feature = "turbo-quant-codec")]
275fn map_turbo_error(err: turbo_quant::TurboQuantError) -> MemoryError {
276    MemoryError::QuantizationError(format!("turbo-quant: {err}"))
277}
278
279/// Optional TurboQuant codec backend.
280#[cfg(feature = "turbo-quant-codec")]
281#[derive(Debug, Clone)]
282pub struct TurboQuantCodec {
283    profile: VectorCodecProfileV1,
284    quantizer: turbo_quant::TurboQuantizer,
285}
286
287#[cfg(feature = "turbo-quant-codec")]
288impl TurboQuantCodec {
289    /// Create a TurboQuant codec profile.
290    pub fn new(dim: usize, bits: u8, projections: usize, seed: u64) -> Result<Self, MemoryError> {
291        let quantizer = turbo_quant::TurboQuantizer::new(dim, bits, projections, seed)
292            .map_err(map_turbo_error)?;
293        Ok(Self {
294            profile: VectorCodecProfileV1::turbo_quant(dim, bits, projections, seed)?,
295            quantizer,
296        })
297    }
298
299    fn decode_code(
300        &self,
301        artifact: &VectorArtifactV1,
302    ) -> Result<turbo_quant::TurboCode, MemoryError> {
303        validate_artifact_profile(&self.profile, artifact)?;
304        self.quantizer
305            .decode_code_from_bytes(&artifact.encoded)
306            .map_err(map_turbo_error)
307    }
308
309    /// Estimate inner product between a raw query vector and a TurboQuant artifact.
310    pub fn score_inner_product(
311        &self,
312        artifact: &VectorArtifactV1,
313        query: &[f32],
314    ) -> Result<f32, MemoryError> {
315        db::validate_embedding(query, dim_usize(self.profile.dim))?;
316        validate_artifact_profile(&self.profile, artifact)?;
317        self.quantizer
318            .score_inner_product_from_bytes(&artifact.encoded, query)
319            .map_err(map_turbo_error)
320    }
321
322    /// Prepare a query once for scoring many TurboQuant artifacts.
323    pub fn prepare_query(
324        &self,
325        query: &[f32],
326    ) -> Result<turbo_quant::TurboProjectedQuery, MemoryError> {
327        db::validate_embedding(query, dim_usize(self.profile.dim))?;
328        self.quantizer.prepare_query(query).map_err(map_turbo_error)
329    }
330
331    /// Estimate inner product using a pre-projected query.
332    pub fn score_inner_product_prepared(
333        &self,
334        artifact: &VectorArtifactV1,
335        prepared: &turbo_quant::TurboProjectedQuery,
336    ) -> Result<f32, MemoryError> {
337        validate_artifact_profile(&self.profile, artifact)?;
338        let code = self.decode_code(artifact)?;
339        self.quantizer
340            .inner_product_estimate_prepared(&code, prepared)
341            .map_err(map_turbo_error)
342    }
343
344    /// Estimate squared L2 distance between a raw query vector and a TurboQuant artifact.
345    pub fn score_l2(&self, artifact: &VectorArtifactV1, query: &[f32]) -> Result<f32, MemoryError> {
346        db::validate_embedding(query, dim_usize(self.profile.dim))?;
347        validate_artifact_profile(&self.profile, artifact)?;
348        let code = self.decode_code(artifact)?;
349        self.quantizer
350            .l2_distance_estimate(&code, query)
351            .map_err(map_turbo_error)
352    }
353}
354
355#[cfg(feature = "turbo-quant-codec")]
356impl VectorCodec for TurboQuantCodec {
357    fn profile(&self) -> &VectorCodecProfileV1 {
358        &self.profile
359    }
360
361    fn encode(&self, vector: &[f32]) -> Result<VectorArtifactV1, MemoryError> {
362        db::validate_embedding(vector, dim_usize(self.profile.dim))?;
363        let encoded = self
364            .quantizer
365            .encode_to_bytes(vector)
366            .map_err(map_turbo_error)?;
367        Ok(VectorArtifactV1::new(self.profile.clone(), encoded))
368    }
369
370    fn decode(&self, artifact: &VectorArtifactV1) -> Result<Vec<f32>, MemoryError> {
371        let code = self.decode_code(artifact)?;
372        let decoded = self
373            .quantizer
374            .decode_approximate(&code)
375            .map_err(map_turbo_error)?;
376        db::validate_embedding(&decoded, dim_usize(self.profile.dim))?;
377        Ok(decoded)
378    }
379}