Skip to main content

turbo_quant/
profile.rs

1//! Stable codec profiles, compression policies, and receipts.
2
3use schemars::JsonSchema;
4use serde::{Deserialize, Serialize};
5
6/// Stable codec profile schema shared across TurboQuant-family receipts.
7#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
8pub struct CodecProfileV1 {
9    pub schema: String,
10    pub crate_name: String,
11    pub crate_version: String,
12    pub codec_kind: String,
13    pub dim: usize,
14    pub bits: u8,
15    pub projections: Option<usize>,
16    pub seed: u64,
17    pub rotation_kind: String,
18    pub storage_layout: String,
19    pub codebook_kind: Option<String>,
20    pub qjl_enabled: bool,
21    pub score_semantics: String,
22    pub profile_digest: Option<String>,
23    pub limitations: Vec<String>,
24}
25
26impl CodecProfileV1 {
27    pub fn turbo(
28        dim: usize,
29        bits: u8,
30        projections: usize,
31        seed: u64,
32        qjl_enabled: bool,
33        rotation_kind: impl Into<String>,
34    ) -> Self {
35        let mut profile = Self {
36            schema: "CodecProfileV1".into(),
37            crate_name: "turbo-quant".into(),
38            crate_version: env!("CARGO_PKG_VERSION").into(),
39            codec_kind: "TurboQuant".into(),
40            dim,
41            bits,
42            projections: if qjl_enabled { Some(projections) } else { None },
43            seed,
44            rotation_kind: rotation_kind.into(),
45            storage_layout: if qjl_enabled {
46                "polar_radii_f32_angles_bitpacked_qjl_signs_bitpacked".into()
47            } else {
48                "polar_radii_f32_angles_bitpacked".into()
49            },
50            codebook_kind: Some("uniform_angle_grid".into()),
51            qjl_enabled,
52            score_semantics: if qjl_enabled {
53                "approximate_inner_product_polar_plus_qjl_residual".into()
54            } else {
55                "approximate_inner_product_polar_only".into()
56            },
57            profile_digest: None,
58            limitations: vec![
59                "compressed codes are derived sidecars, not canonical vectors".into(),
60                "quality is workload-dependent and must be benchmarked".into(),
61                "KV-cache use is experimental shadow-mode only".into(),
62            ],
63        };
64        profile.profile_digest = Some(profile.compute_digest());
65        profile
66    }
67
68    /// Deterministic non-cryptographic digest for profile identity.
69    pub fn compute_digest(&self) -> String {
70        let projections = self
71            .projections
72            .map_or_else(|| "none".to_string(), |value| value.to_string());
73        let body = format!(
74            "{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}",
75            self.schema,
76            self.crate_name,
77            self.crate_version,
78            self.codec_kind,
79            self.dim,
80            self.bits,
81            projections,
82            self.seed,
83            self.rotation_kind,
84            self.storage_layout,
85            self.qjl_enabled
86        );
87        format!("fnv1a64:{:016x}", fnv1a64(body.as_bytes()))
88    }
89}
90
91/// Stable compression policy record for sidecar integrations.
92#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
93pub struct CompressionPolicyV1 {
94    pub schema: String,
95    pub profile: CodecProfileV1,
96    pub canonical_vectors_required: bool,
97    pub lossy_default_allowed: bool,
98    pub exact_fallback_required: bool,
99    pub benchmark_gate_required: bool,
100}
101
102impl CompressionPolicyV1 {
103    pub fn sidecar_shadow(profile: CodecProfileV1) -> Self {
104        Self {
105            schema: "CompressionPolicyV1".into(),
106            profile,
107            canonical_vectors_required: true,
108            lossy_default_allowed: false,
109            exact_fallback_required: true,
110            benchmark_gate_required: true,
111        }
112    }
113}
114
115/// Validation state bound into a compression receipt.
116#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
117pub enum ValidationState {
118    Validated,
119    Rejected,
120}
121
122/// Receipt emitted for a single derived compressed code.
123#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema)]
124pub struct CompressionReceiptV1 {
125    pub schema: String,
126    pub profile: CodecProfileV1,
127    pub source_digest: Option<String>,
128    pub input_dim: usize,
129    pub validation_state: ValidationState,
130    pub encoded_bytes: usize,
131    pub fp16_baseline_bytes: usize,
132    pub fp32_baseline_bytes: usize,
133    pub compression_ratio_vs_fp16: Option<f32>,
134    pub compression_ratio_vs_fp32: f32,
135    pub warnings: Vec<String>,
136}
137
138impl CompressionReceiptV1 {
139    pub fn new(
140        profile: CodecProfileV1,
141        source_digest: Option<String>,
142        input_dim: usize,
143        encoded_bytes: usize,
144        validation_state: ValidationState,
145    ) -> Self {
146        let fp16_baseline_bytes = input_dim * 2;
147        let fp32_baseline_bytes = input_dim * 4;
148        Self {
149            schema: "CompressionReceiptV1".into(),
150            profile,
151            source_digest,
152            input_dim,
153            validation_state,
154            encoded_bytes,
155            fp16_baseline_bytes,
156            fp32_baseline_bytes,
157            compression_ratio_vs_fp16: if encoded_bytes > 0 {
158                Some(fp16_baseline_bytes as f32 / encoded_bytes as f32)
159            } else {
160                None
161            },
162            compression_ratio_vs_fp32: if encoded_bytes > 0 {
163                fp32_baseline_bytes as f32 / encoded_bytes as f32
164            } else {
165                0.0
166            },
167            warnings: vec![
168                "receipt describes a derived sidecar code, not a replacement for source vectors"
169                    .into(),
170            ],
171        }
172    }
173}
174
175fn fnv1a64(bytes: &[u8]) -> u64 {
176    let mut hash = 0xcbf2_9ce4_8422_2325u64;
177    for byte in bytes {
178        hash ^= u64::from(*byte);
179        hash = hash.wrapping_mul(0x0000_0100_0000_01b3);
180    }
181    hash
182}