1use crate::{db, quantize, MemoryError};
8use serde::{Deserialize, Serialize};
9use stack_ids::{ContentDigest, DigestBuilder};
10
11const PROFILE_SCHEMA_V1: &str = "vector_codec_profile_v1";
12const ARTIFACT_SCHEMA_V1: &str = "vector_artifact_v1";
13
14fn b3_digest(bytes: &[u8]) -> String {
15 format!("blake3:{}", ContentDigest::compute(bytes).hex())
16}
17
18fn b3_json_digest<T: Serialize>(domain: &str, value: &T) -> String {
19 let mut builder = DigestBuilder::new();
20 builder.update_str(domain).separator();
21 match builder.update_json(value) {
22 Ok(_) => format!("blake3:{}", builder.finalize().hex()),
23 Err(_) => b3_digest(format!("{domain}:digest-fallback").as_bytes()),
24 }
25}
26
27fn dim_u32(dim: usize) -> Result<u32, MemoryError> {
28 u32::try_from(dim).map_err(|_| MemoryError::InvalidConfig {
29 field: "embedding.dimensions",
30 reason: format!("dimension {dim} does not fit vector codec profile u32"),
31 })
32}
33
34fn dim_usize(dim: u32) -> usize {
35 dim as usize
36}
37
38#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
40pub struct VectorCodecProfileV1 {
41 pub schema_version: String,
43 pub codec: String,
45 pub dim: u32,
47 pub bits: u8,
49 pub projections: Option<u32>,
51 pub seed: Option<u64>,
53 pub codec_version: String,
55 pub scoring_semantics: String,
57 pub normalization: String,
59}
60
61impl VectorCodecProfileV1 {
62 pub fn raw_f32(dim: usize) -> Result<Self, MemoryError> {
64 Ok(Self {
65 schema_version: PROFILE_SCHEMA_V1.into(),
66 codec: "raw_f32".into(),
67 dim: dim_u32(dim)?,
68 bits: 32,
69 projections: None,
70 seed: None,
71 codec_version: "1".into(),
72 scoring_semantics: "cosine_on_decoded_f32".into(),
73 normalization: "caller_supplied".into(),
74 })
75 }
76
77 pub fn sq8(dim: usize) -> Result<Self, MemoryError> {
79 Ok(Self {
80 schema_version: PROFILE_SCHEMA_V1.into(),
81 codec: "sq8".into(),
82 dim: dim_u32(dim)?,
83 bits: 8,
84 projections: None,
85 seed: None,
86 codec_version: "1".into(),
87 scoring_semantics: "cosine_on_dequantized_f32".into(),
88 normalization: "caller_supplied".into(),
89 })
90 }
91
92 #[cfg(feature = "turbo-quant-codec")]
94 pub fn turbo_quant(
95 dim: usize,
96 bits: u8,
97 projections: usize,
98 seed: u64,
99 ) -> Result<Self, MemoryError> {
100 Ok(Self {
101 schema_version: PROFILE_SCHEMA_V1.into(),
102 codec: "turbo_quant".into(),
103 dim: dim_u32(dim)?,
104 bits,
105 projections: Some(dim_u32(projections)?),
106 seed: Some(seed),
107 codec_version: "turbo-quant:0.2.0-alpha.1".into(),
108 scoring_semantics: "inner_product_estimate".into(),
109 normalization: "caller_supplied".into(),
110 })
111 }
112
113 pub fn digest(&self) -> String {
115 b3_json_digest(PROFILE_SCHEMA_V1, self)
116 }
117}
118
119#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
121pub struct VectorArtifactV1 {
122 pub schema_version: String,
124 pub profile: VectorCodecProfileV1,
126 pub profile_digest: String,
128 #[serde(default)]
130 pub artifact_digest: String,
131 pub encoded: Vec<u8>,
133}
134
135impl VectorArtifactV1 {
136 pub fn new(profile: VectorCodecProfileV1, encoded: Vec<u8>) -> Self {
138 let profile_digest = profile.digest();
139 let artifact_digest = b3_digest(&encoded);
140 Self {
141 schema_version: ARTIFACT_SCHEMA_V1.into(),
142 profile,
143 profile_digest,
144 artifact_digest,
145 encoded,
146 }
147 }
148
149 pub fn encoded_digest(&self) -> String {
151 b3_digest(&self.encoded)
152 }
153}
154
155pub trait VectorCodec: Send + Sync {
157 fn profile(&self) -> &VectorCodecProfileV1;
159
160 fn encode(&self, vector: &[f32]) -> Result<VectorArtifactV1, MemoryError>;
162
163 fn decode(&self, artifact: &VectorArtifactV1) -> Result<Vec<f32>, MemoryError>;
165}
166
167fn validate_artifact_profile(
168 expected: &VectorCodecProfileV1,
169 artifact: &VectorArtifactV1,
170) -> Result<(), MemoryError> {
171 let artifact_profile_digest = artifact.profile.digest();
172 if artifact.profile_digest != artifact_profile_digest {
173 return Err(MemoryError::VectorCodecProfileMismatch {
174 expected_digest: artifact_profile_digest,
175 actual_digest: artifact.profile_digest.clone(),
176 });
177 }
178
179 let expected_digest = expected.digest();
180 if artifact.profile_digest != expected_digest {
181 return Err(MemoryError::VectorCodecProfileMismatch {
182 expected_digest,
183 actual_digest: artifact.profile_digest.clone(),
184 });
185 }
186
187 let encoded_digest = artifact.encoded_digest();
188 if !artifact.artifact_digest.is_empty() && artifact.artifact_digest != encoded_digest {
189 return Err(MemoryError::CorruptData {
190 table: "vector_artifacts",
191 row_id: artifact.profile_digest.clone(),
192 detail: format!(
193 "encoded artifact digest mismatch: expected {}, got {}",
194 artifact.artifact_digest, encoded_digest
195 ),
196 });
197 }
198
199 Ok(())
200}
201
202#[derive(Debug, Clone)]
204pub struct RawF32Codec {
205 profile: VectorCodecProfileV1,
206}
207
208impl RawF32Codec {
209 pub fn new(dim: usize) -> Result<Self, MemoryError> {
211 Ok(Self {
212 profile: VectorCodecProfileV1::raw_f32(dim)?,
213 })
214 }
215}
216
217impl VectorCodec for RawF32Codec {
218 fn profile(&self) -> &VectorCodecProfileV1 {
219 &self.profile
220 }
221
222 fn encode(&self, vector: &[f32]) -> Result<VectorArtifactV1, MemoryError> {
223 db::validate_embedding(vector, dim_usize(self.profile.dim))?;
224 Ok(VectorArtifactV1::new(
225 self.profile.clone(),
226 db::encode_f32_le(vector),
227 ))
228 }
229
230 fn decode(&self, artifact: &VectorArtifactV1) -> Result<Vec<f32>, MemoryError> {
231 validate_artifact_profile(&self.profile, artifact)?;
232 db::decode_f32_le(&artifact.encoded, dim_usize(self.profile.dim))
233 }
234}
235
236#[derive(Debug, Clone)]
238pub struct Sq8Codec {
239 profile: VectorCodecProfileV1,
240}
241
242impl Sq8Codec {
243 pub fn new(dim: usize) -> Result<Self, MemoryError> {
245 Ok(Self {
246 profile: VectorCodecProfileV1::sq8(dim)?,
247 })
248 }
249}
250
251impl VectorCodec for Sq8Codec {
252 fn profile(&self) -> &VectorCodecProfileV1 {
253 &self.profile
254 }
255
256 fn encode(&self, vector: &[f32]) -> Result<VectorArtifactV1, MemoryError> {
257 db::validate_embedding(vector, dim_usize(self.profile.dim))?;
258 let quantized = quantize::Quantizer::new(dim_usize(self.profile.dim)).quantize(vector)?;
259 Ok(VectorArtifactV1::new(
260 self.profile.clone(),
261 quantize::pack_quantized(&quantized),
262 ))
263 }
264
265 fn decode(&self, artifact: &VectorArtifactV1) -> Result<Vec<f32>, MemoryError> {
266 validate_artifact_profile(&self.profile, artifact)?;
267 let quantized = quantize::unpack_quantized(&artifact.encoded, dim_usize(self.profile.dim))?;
268 let decoded = quantize::Quantizer::new(dim_usize(self.profile.dim)).dequantize(&quantized);
269 db::validate_embedding(&decoded, dim_usize(self.profile.dim))?;
270 Ok(decoded)
271 }
272}
273
274#[cfg(feature = "turbo-quant-codec")]
275fn map_turbo_error(err: turbo_quant::TurboQuantError) -> MemoryError {
276 MemoryError::QuantizationError(format!("turbo-quant: {err}"))
277}
278
279#[cfg(feature = "turbo-quant-codec")]
281#[derive(Debug, Clone)]
282pub struct TurboQuantCodec {
283 profile: VectorCodecProfileV1,
284 quantizer: turbo_quant::TurboQuantizer,
285}
286
287#[cfg(feature = "turbo-quant-codec")]
288impl TurboQuantCodec {
289 pub fn new(dim: usize, bits: u8, projections: usize, seed: u64) -> Result<Self, MemoryError> {
291 let quantizer = turbo_quant::TurboQuantizer::new(dim, bits, projections, seed)
292 .map_err(map_turbo_error)?;
293 Ok(Self {
294 profile: VectorCodecProfileV1::turbo_quant(dim, bits, projections, seed)?,
295 quantizer,
296 })
297 }
298
299 fn decode_code(
300 &self,
301 artifact: &VectorArtifactV1,
302 ) -> Result<turbo_quant::TurboCode, MemoryError> {
303 validate_artifact_profile(&self.profile, artifact)?;
304 self.quantizer
305 .decode_code_from_bytes(&artifact.encoded)
306 .map_err(map_turbo_error)
307 }
308
309 pub fn score_inner_product(
311 &self,
312 artifact: &VectorArtifactV1,
313 query: &[f32],
314 ) -> Result<f32, MemoryError> {
315 db::validate_embedding(query, dim_usize(self.profile.dim))?;
316 validate_artifact_profile(&self.profile, artifact)?;
317 self.quantizer
318 .score_inner_product_from_bytes(&artifact.encoded, query)
319 .map_err(map_turbo_error)
320 }
321
322 pub fn prepare_query(
324 &self,
325 query: &[f32],
326 ) -> Result<turbo_quant::TurboProjectedQuery, MemoryError> {
327 db::validate_embedding(query, dim_usize(self.profile.dim))?;
328 self.quantizer.prepare_query(query).map_err(map_turbo_error)
329 }
330
331 pub fn score_inner_product_prepared(
333 &self,
334 artifact: &VectorArtifactV1,
335 prepared: &turbo_quant::TurboProjectedQuery,
336 ) -> Result<f32, MemoryError> {
337 validate_artifact_profile(&self.profile, artifact)?;
338 let code = self.decode_code(artifact)?;
339 self.quantizer
340 .inner_product_estimate_prepared(&code, prepared)
341 .map_err(map_turbo_error)
342 }
343
344 pub fn score_l2(&self, artifact: &VectorArtifactV1, query: &[f32]) -> Result<f32, MemoryError> {
346 db::validate_embedding(query, dim_usize(self.profile.dim))?;
347 validate_artifact_profile(&self.profile, artifact)?;
348 let code = self.decode_code(artifact)?;
349 self.quantizer
350 .l2_distance_estimate(&code, query)
351 .map_err(map_turbo_error)
352 }
353}
354
355#[cfg(feature = "turbo-quant-codec")]
356impl VectorCodec for TurboQuantCodec {
357 fn profile(&self) -> &VectorCodecProfileV1 {
358 &self.profile
359 }
360
361 fn encode(&self, vector: &[f32]) -> Result<VectorArtifactV1, MemoryError> {
362 db::validate_embedding(vector, dim_usize(self.profile.dim))?;
363 let encoded = self
364 .quantizer
365 .encode_to_bytes(vector)
366 .map_err(map_turbo_error)?;
367 Ok(VectorArtifactV1::new(self.profile.clone(), encoded))
368 }
369
370 fn decode(&self, artifact: &VectorArtifactV1) -> Result<Vec<f32>, MemoryError> {
371 let code = self.decode_code(artifact)?;
372 let decoded = self
373 .quantizer
374 .decode_approximate(&code)
375 .map_err(map_turbo_error)?;
376 db::validate_embedding(&decoded, dim_usize(self.profile.dim))?;
377 Ok(decoded)
378 }
379}