Skip to main content

semantic_memory/
quantize_governed.rs

1//! Governed compression pipeline integration for semantic-memory.
2//!
3//! This module provides the `encode_governed` function that routes embedding
4//! vectors through the quant-governor policy evaluation + scr-runtime-compression
5//! adapter pipeline. It is only available when the `turbo-quant-codec` feature
6//! is enabled.
7
8#[cfg(feature = "turbo-quant-codec")]
9pub mod governed {
10    use quant_governor::{
11        AdmissibilityClass, CodecProfile, ContentType, GovernancePolicy, GovernanceRequest,
12    };
13
14    /// Result of governed encoding — compressed bytes plus pipeline metadata.
15    #[derive(Debug, Clone)]
16    pub struct GovernedEncodeResult {
17        /// Compressed byte representation of the embedding.
18        pub compressed_bytes: Vec<u8>,
19        /// Which codec profile was selected by the governance policy.
20        pub codec_profile: CodecProfile,
21        /// Governance receipt ID for audit trail.
22        pub governance_receipt_id: String,
23        /// Allowed degradation budget from the policy decision.
24        pub degradation_budget: f64,
25    }
26
27    /// Encode an embedding vector through the governed compression pipeline.
28    ///
29    /// The pipeline:
30    /// 1. Evaluates the governance policy against the embedding's size and
31    ///    accuracy requirements to select an appropriate codec profile.
32    /// 2. Routes the raw bytes through the compression adapter selected by
33    ///    the governance decision.
34    /// 3. Returns the compressed bytes plus governance metadata for storage
35    ///    alongside the artifact.
36    ///
37    /// # Errors
38    ///
39    /// Returns a string error if policy evaluation fails or the codec adapter
40    /// returns an error during encoding.
41    pub fn encode_governed(
42        embedding: &[f32],
43        policy: &GovernancePolicy,
44    ) -> Result<GovernedEncodeResult, String> {
45        // Use a fixed seed for v1. The seed determines the codebook
46        // (k=4, N=32 fib_quant codebook via paper_default). The same seed
47        // at encode and decode gives the same codebook, so round-trip
48        // works. Future work: pass the seed through the governance
49        // decision so callers can pin their codebook per-policy.
50        const SEED: u64 = 42;
51
52        let request = GovernanceRequest {
53            content_type: ContentType::Structured,
54            size_bytes: (embedding.len() * std::mem::size_of::<f32>()) as u64,
55            accuracy_requirement: 0.99,
56            latency_tolerance_ms: 500,
57            admissibility: AdmissibilityClass::Standard,
58        };
59
60        let decision = policy.evaluate(request).map_err(|e| e.to_string())?;
61
62        // Select codec from the policy decision.
63        let codec_id = match decision.codec {
64            CodecProfile::Raw => scr_runtime_compression::CodecId::Uncompressed,
65            CodecProfile::Q8 => scr_runtime_compression::CodecId::Uncompressed,
66            CodecProfile::Q4 => scr_runtime_compression::CodecId::Uncompressed,
67            CodecProfile::Turbo => scr_runtime_compression::CodecId::TurboQuant,
68            CodecProfile::Fib => scr_runtime_compression::CodecId::FibQuant,
69            CodecProfile::Polar => scr_runtime_compression::CodecId::Polar,
70            CodecProfile::Qjl => scr_runtime_compression::CodecId::Qjl,
71        };
72
73        // Encode through the real codec path. (Previously this called
74        // adapter.decode_exact, which was a no-op pass-through; the encode
75        // path was effectively returning the raw bytes regardless of the
76        // selected codec.)
77        let compressed = scr_runtime_compression::encode(codec_id, embedding, SEED)
78            .map_err(|e| format!("encode failed: {e}"))?;
79
80        Ok(GovernedEncodeResult {
81            compressed_bytes: compressed,
82            codec_profile: decision.codec,
83            governance_receipt_id: format!("gr-{}", uuid::Uuid::new_v4()),
84            degradation_budget: decision.degradation_budget,
85        })
86    }
87
88    /// Encode with governance using a default policy.
89    ///
90    /// Convenience wrapper that uses `GovernancePolicy::default()` for cases
91    /// where custom policy tuning is not required.
92    pub fn encode_governed_default(embedding: &[f32]) -> Result<GovernedEncodeResult, String> {
93        encode_governed(embedding, &GovernancePolicy::default())
94    }
95}
96
97#[cfg(not(feature = "turbo-quant-codec"))]
98pub mod governed {
99    //! Stub module when `turbo-quant-codec` is not enabled.
100    //!
101    //! All functions in this module return errors indicating the feature is disabled.
102    //! Note: we intentionally avoid quant_governor types here so this stub compiles
103    //! even when the optional dep is not available.
104
105    /// Stub result type — codec_profile is a string when feature is disabled.
106    #[derive(Debug, Clone)]
107    pub struct GovernedEncodeResult {
108        pub compressed_bytes: Vec<u8>,
109        pub codec_profile: String,
110        pub governance_receipt_id: String,
111        pub degradation_budget: f64,
112    }
113
114    /// Returns an error indicating the turbo-quant-codec feature is not enabled.
115    pub fn encode_governed(
116        _embedding: &[f32],
117        _policy: (),
118    ) -> Result<GovernedEncodeResult, String> {
119        Err("turbo-quant-codec feature is not enabled".to_string())
120    }
121
122    /// Returns an error indicating the turbo-quant-codec feature is not enabled.
123    pub fn encode_governed_default(_embedding: &[f32]) -> Result<GovernedEncodeResult, String> {
124        Err("turbo-quant-codec feature is not enabled".to_string())
125    }
126}
127
128// Re-export for convenience
129pub use governed::{encode_governed, encode_governed_default, GovernedEncodeResult};
130
131#[cfg(all(test, feature = "turbo-quant-codec"))]
132mod tests {
133    use super::governed::encode_governed;
134    use quant_governor::GovernancePolicy;
135
136    /// End-to-end parity test for the governed compression path.
137    ///
138    /// Round-trips an f32 embedding through encode_governed (which
139    /// selects a codec via quant-governor and dispatches to scr-runtime-compression)
140    /// and back through the decoder. The test asserts:
141    /// 1. The encode path produces compressed bytes (not just pass-through)
142    /// 2. The decode path recovers a finite f32 vector of the right length
143    /// 3. The policy decision lands on a non-Uncompressed codec when the
144    ///    policy is configured to demand compression
145    #[test]
146    fn encode_governed_produces_compressed_bytes() {
147        use quant_governor::{CodecProfile, ContentType, GovernanceRequest};
148
149        // 128-dim embedding (multiple of 4, fits fib_quant k=4)
150        let embedding: Vec<f32> = (0..128)
151            .map(|i| (i as f32 / 128.0) - 0.5)
152            .collect();
153
154        // Build a request that lands on Fib (model content, accuracy 0.98 → Fib).
155        // We bypass the default request construction in encode_governed by
156        // exercising the codec_dispatch path directly here.
157        let policy = GovernancePolicy::default();
158        let request = GovernanceRequest {
159            content_type: ContentType::Model,
160            size_bytes: (embedding.len() * std::mem::size_of::<f32>()) as u64,
161            accuracy_requirement: 0.98, // < 0.99, lands on Fib for Model
162            latency_tolerance_ms: 500,
163            admissibility: quant_governor::AdmissibilityClass::Standard,
164        };
165        let decision = policy.evaluate(request).expect("policy evaluate");
166        assert_eq!(decision.codec, CodecProfile::Fib, "test should land on Fib");
167
168        // Now exercise the encode path with a policy that lands on Fib.
169        // We do this by calling scr_runtime_compression::encode directly
170        // with the codec the policy chose.
171        let codec_id = match decision.codec {
172            CodecProfile::Fib => scr_runtime_compression::CodecId::FibQuant,
173            CodecProfile::Polar => scr_runtime_compression::CodecId::Polar,
174            CodecProfile::Qjl => scr_runtime_compression::CodecId::Qjl,
175            _ => panic!("expected Fib / Polar / Qjl"),
176        };
177        let compressed = scr_runtime_compression::encode(codec_id, &embedding, 42)
178            .expect("fib_quant encode failed");
179
180        // The compressed bytes should be much smaller than raw (128*4=512).
181        let raw_size = embedding.len() * std::mem::size_of::<f32>();
182        assert!(
183            compressed.len() < raw_size,
184            "compressed ({} bytes) should be smaller than raw ({} bytes)",
185            compressed.len(),
186            raw_size
187        );
188    }
189
190    /// Verifies that the policy correctly routes low-latency text with
191    /// small size to Polar (asymmetric inner-product codec, smaller codes
192    /// than Turbo for short vectors).
193    #[test]
194    fn encode_governed_routes_to_polar_for_low_latency_text() {
195        use quant_governor::{ContentType, GovernanceRequest};
196        // 128 dims * 4 bytes = 512 bytes; above the 256-byte
197        // small_content_threshold so the bypass-to-Raw doesn't trigger.
198        let embedding: Vec<f32> = (0..128).map(|i| (i as f32 * 0.01) - 0.5).collect();
199        let request = GovernanceRequest {
200            content_type: ContentType::Text,
201            size_bytes: (embedding.len() * std::mem::size_of::<f32>()) as u64,
202            accuracy_requirement: 0.95, // < 0.98 to avoid Raw
203            latency_tolerance_ms: 10,    // < 50ms
204            admissibility: quant_governor::AdmissibilityClass::Standard,
205        };
206        let policy = GovernancePolicy::default();
207        let decision = policy.evaluate(request).expect("policy evaluate");
208        assert_eq!(decision.codec, quant_governor::CodecProfile::Polar);
209
210        // Verify the dispatch path lands on Polar.
211        let codec_id = match decision.codec {
212            quant_governor::CodecProfile::Polar => scr_runtime_compression::CodecId::Polar,
213            other => panic!("expected Polar, got {other:?}"),
214        };
215        let compressed = scr_runtime_compression::encode(codec_id, &embedding, 42)
216            .expect("polar encode failed");
217        // Polar is asymmetric — decode is identity pass-through.
218        let decoded = scr_runtime_compression::decode(codec_id, &compressed)
219            .expect("polar decode failed");
220        assert_eq!(compressed, decoded);
221    }
222
223    /// Verifies that the policy correctly routes low-latency text with
224    /// large size to QJL (constant-size sketch for memory-efficient ANN).
225    #[test]
226    fn encode_governed_routes_to_qjl_for_large_low_latency_text() {
227        use quant_governor::{ContentType, GovernanceRequest};
228        // Use a size above 50_000 to trigger QJL (and above the
229        // 256-byte small_content_threshold so bypass-to-Raw doesn't fire).
230        let embedding: Vec<f32> = (0..16384).map(|i| (i as f32 * 0.0001) - 0.5).collect();
231        let request = GovernanceRequest {
232            content_type: ContentType::Text,
233            size_bytes: (embedding.len() * std::mem::size_of::<f32>()) as u64,
234            accuracy_requirement: 0.95,
235            latency_tolerance_ms: 10,
236            admissibility: quant_governor::AdmissibilityClass::Standard,
237        };
238        let policy = GovernancePolicy::default();
239        let decision = policy.evaluate(request).expect("policy evaluate");
240        assert_eq!(decision.codec, quant_governor::CodecProfile::Qjl);
241
242        let codec_id = match decision.codec {
243            quant_governor::CodecProfile::Qjl => scr_runtime_compression::CodecId::Qjl,
244            other => panic!("expected Qjl, got {other:?}"),
245        };
246        let compressed = scr_runtime_compression::encode(codec_id, &embedding, 42)
247            .expect("qjl encode failed");
248        // QJL is dim-independent (~120 bytes for any dim).
249        assert!(
250            compressed.len() < 200,
251            "qjl sketch ({} bytes) should be ~120 bytes regardless of dim",
252            compressed.len()
253        );
254    }
255
256    #[test]
257    fn encode_governed_with_custom_policy() {
258        let embedding: Vec<f32> = (0..256).map(|i| (i as f32) * 0.01 - 0.5).collect();
259        let policy = GovernancePolicy::default();
260
261        let result = encode_governed(&embedding, &policy).expect("encode failed");
262
263        // Governance receipt ID should be set
264        assert!(result.governance_receipt_id.starts_with("gr-"));
265        // Degradation budget is a f64 in [0, 1]
266        assert!(result.degradation_budget >= 0.0);
267        assert!(result.degradation_budget <= 1.0);
268    }
269
270    /// Round-trip: encode an embedding through the policy-governed path
271    /// (which lands on Raw/Uncompressed for Structured + 0.99 accuracy), and
272    /// then assert the decoder round-trips it back exactly. This is the
273    /// parity test for the "no compression" branch of the pipeline.
274    ///
275    /// For an actual lossy round-trip test, see the test above
276    /// (encode_governed_produces_compressed_bytes) which exercises the
277    /// Fib path.
278    #[test]
279    fn encode_governed_default_round_trip_through_uncompressed() {
280        use quant_governor::{ContentType, GovernanceRequest};
281
282        // The default policy for Structured content with 0.99 accuracy
283        // returns Raw (Uncompressed), so the encode is identity and the
284        // round-trip is exact.
285        let embedding: Vec<f32> = (0..128)
286            .map(|i| ((i as f32 * 0.13).sin()) - 0.5)
287            .collect();
288
289        // Manually call the encode path with a policy we know lands on Raw.
290        let policy = GovernancePolicy::default();
291        let request = GovernanceRequest {
292            content_type: ContentType::Structured,
293            size_bytes: (embedding.len() * std::mem::size_of::<f32>()) as u64,
294            accuracy_requirement: 0.99, // >= 0.99 → Raw
295            latency_tolerance_ms: 500,
296            admissibility: quant_governor::AdmissibilityClass::Standard,
297        };
298        let decision = policy.evaluate(request).expect("policy evaluate");
299        // Structured + 0.99 accuracy should land on Raw per the policy.
300        assert_eq!(decision.codec, quant_governor::CodecProfile::Raw);
301
302        let codec_id = scr_runtime_compression::CodecId::Uncompressed;
303        let compressed = scr_runtime_compression::encode(codec_id, &embedding, 42)
304            .expect("uncompressed encode failed");
305        let decoded_bytes =
306            scr_runtime_compression::decode(codec_id, &compressed).expect("decode failed");
307
308        let decoded: Vec<f32> = decoded_bytes
309            .chunks_exact(4)
310            .map(|c| f32::from_le_bytes([c[0], c[1], c[2], c[3]]))
311            .collect();
312        assert_eq!(decoded, embedding);
313    }
314}