1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
//! Governed compression pipeline integration for semantic-memory.
//!
//! This module provides the `encode_governed` function that routes embedding
//! vectors through the quant-governor policy evaluation + scr-runtime-compression
//! adapter pipeline. It is only available when the `turbo-quant-codec` feature
//! is enabled.
#[cfg(feature = "turbo-quant-codec")]
pub mod governed {
use quant_governor::{
AdmissibilityClass, CodecProfile, ContentType, GovernancePolicy, GovernanceRequest,
};
/// Result of governed encoding — compressed bytes plus pipeline metadata.
#[derive(Debug, Clone)]
pub struct GovernedEncodeResult {
/// Compressed byte representation of the embedding.
pub compressed_bytes: Vec<u8>,
/// Which codec profile was selected by the governance policy.
pub codec_profile: CodecProfile,
/// Governance receipt ID for audit trail.
pub governance_receipt_id: String,
/// Allowed degradation budget from the policy decision.
pub degradation_budget: f64,
}
/// Encode an embedding vector through the governed compression pipeline.
///
/// The pipeline:
/// 1. Evaluates the governance policy against the embedding's size and
/// accuracy requirements to select an appropriate codec profile.
/// 2. Routes the raw bytes through the compression adapter selected by
/// the governance decision.
/// 3. Returns the compressed bytes plus governance metadata for storage
/// alongside the artifact.
///
/// # Errors
///
/// Returns a string error if policy evaluation fails or the codec adapter
/// returns an error during encoding.
pub fn encode_governed(
embedding: &[f32],
policy: &GovernancePolicy,
) -> Result<GovernedEncodeResult, String> {
// Use a fixed seed for v1. The seed determines the codebook
// (k=4, N=32 fib_quant codebook via paper_default). The same seed
// at encode and decode gives the same codebook, so round-trip
// works. Future work: pass the seed through the governance
// decision so callers can pin their codebook per-policy.
const SEED: u64 = 42;
let request = GovernanceRequest {
content_type: ContentType::Structured,
size_bytes: (embedding.len() * std::mem::size_of::<f32>()) as u64,
accuracy_requirement: 0.99,
latency_tolerance_ms: 500,
admissibility: AdmissibilityClass::Standard,
};
let decision = policy.evaluate(request).map_err(|e| e.to_string())?;
// Select codec from the policy decision.
let codec_id = match decision.codec {
CodecProfile::Raw => scr_runtime_compression::CodecId::Uncompressed,
CodecProfile::Q8 => scr_runtime_compression::CodecId::Uncompressed,
CodecProfile::Q4 => scr_runtime_compression::CodecId::Uncompressed,
CodecProfile::Turbo => scr_runtime_compression::CodecId::TurboQuant,
CodecProfile::Fib => scr_runtime_compression::CodecId::FibQuant,
CodecProfile::Polar => scr_runtime_compression::CodecId::Polar,
CodecProfile::Qjl => scr_runtime_compression::CodecId::Qjl,
};
// Encode through the real codec path. (Previously this called
// adapter.decode_exact, which was a no-op pass-through; the encode
// path was effectively returning the raw bytes regardless of the
// selected codec.)
let compressed = scr_runtime_compression::encode(codec_id, embedding, SEED)
.map_err(|e| format!("encode failed: {e}"))?;
Ok(GovernedEncodeResult {
compressed_bytes: compressed,
codec_profile: decision.codec,
governance_receipt_id: format!("gr-{}", uuid::Uuid::new_v4()),
degradation_budget: decision.degradation_budget,
})
}
/// Encode with governance using a default policy.
///
/// Convenience wrapper that uses `GovernancePolicy::default()` for cases
/// where custom policy tuning is not required.
pub fn encode_governed_default(embedding: &[f32]) -> Result<GovernedEncodeResult, String> {
encode_governed(embedding, &GovernancePolicy::default())
}
}
#[cfg(not(feature = "turbo-quant-codec"))]
pub mod governed {
//! Stub module when `turbo-quant-codec` is not enabled.
//!
//! All functions in this module return errors indicating the feature is disabled.
//! Note: we intentionally avoid quant_governor types here so this stub compiles
//! even when the optional dep is not available.
/// Stub result type — codec_profile is a string when feature is disabled.
#[derive(Debug, Clone)]
pub struct GovernedEncodeResult {
pub compressed_bytes: Vec<u8>,
pub codec_profile: String,
pub governance_receipt_id: String,
pub degradation_budget: f64,
}
/// Returns an error indicating the turbo-quant-codec feature is not enabled.
pub fn encode_governed(
_embedding: &[f32],
_policy: (),
) -> Result<GovernedEncodeResult, String> {
Err("turbo-quant-codec feature is not enabled".to_string())
}
/// Returns an error indicating the turbo-quant-codec feature is not enabled.
pub fn encode_governed_default(_embedding: &[f32]) -> Result<GovernedEncodeResult, String> {
Err("turbo-quant-codec feature is not enabled".to_string())
}
}
// Re-export for convenience
pub use governed::{encode_governed, encode_governed_default, GovernedEncodeResult};
#[cfg(all(test, feature = "turbo-quant-codec"))]
mod tests {
use super::governed::encode_governed;
use quant_governor::GovernancePolicy;
/// End-to-end parity test for the governed compression path.
///
/// Round-trips an f32 embedding through encode_governed (which
/// selects a codec via quant-governor and dispatches to scr-runtime-compression)
/// and back through the decoder. The test asserts:
/// 1. The encode path produces compressed bytes (not just pass-through)
/// 2. The decode path recovers a finite f32 vector of the right length
/// 3. The policy decision lands on a non-Uncompressed codec when the
/// policy is configured to demand compression
#[test]
fn encode_governed_produces_compressed_bytes() {
use quant_governor::{CodecProfile, ContentType, GovernanceRequest};
// 128-dim embedding (multiple of 4, fits fib_quant k=4)
let embedding: Vec<f32> = (0..128)
.map(|i| (i as f32 / 128.0) - 0.5)
.collect();
// Build a request that lands on Fib (model content, accuracy 0.98 → Fib).
// We bypass the default request construction in encode_governed by
// exercising the codec_dispatch path directly here.
let policy = GovernancePolicy::default();
let request = GovernanceRequest {
content_type: ContentType::Model,
size_bytes: (embedding.len() * std::mem::size_of::<f32>()) as u64,
accuracy_requirement: 0.98, // < 0.99, lands on Fib for Model
latency_tolerance_ms: 500,
admissibility: quant_governor::AdmissibilityClass::Standard,
};
let decision = policy.evaluate(request).expect("policy evaluate");
assert_eq!(decision.codec, CodecProfile::Fib, "test should land on Fib");
// Now exercise the encode path with a policy that lands on Fib.
// We do this by calling scr_runtime_compression::encode directly
// with the codec the policy chose.
let codec_id = match decision.codec {
CodecProfile::Fib => scr_runtime_compression::CodecId::FibQuant,
CodecProfile::Polar => scr_runtime_compression::CodecId::Polar,
CodecProfile::Qjl => scr_runtime_compression::CodecId::Qjl,
_ => panic!("expected Fib / Polar / Qjl"),
};
let compressed = scr_runtime_compression::encode(codec_id, &embedding, 42)
.expect("fib_quant encode failed");
// The compressed bytes should be much smaller than raw (128*4=512).
let raw_size = embedding.len() * std::mem::size_of::<f32>();
assert!(
compressed.len() < raw_size,
"compressed ({} bytes) should be smaller than raw ({} bytes)",
compressed.len(),
raw_size
);
}
/// Verifies that the policy correctly routes low-latency text with
/// small size to Polar (asymmetric inner-product codec, smaller codes
/// than Turbo for short vectors).
#[test]
fn encode_governed_routes_to_polar_for_low_latency_text() {
use quant_governor::{ContentType, GovernanceRequest};
// 128 dims * 4 bytes = 512 bytes; above the 256-byte
// small_content_threshold so the bypass-to-Raw doesn't trigger.
let embedding: Vec<f32> = (0..128).map(|i| (i as f32 * 0.01) - 0.5).collect();
let request = GovernanceRequest {
content_type: ContentType::Text,
size_bytes: (embedding.len() * std::mem::size_of::<f32>()) as u64,
accuracy_requirement: 0.95, // < 0.98 to avoid Raw
latency_tolerance_ms: 10, // < 50ms
admissibility: quant_governor::AdmissibilityClass::Standard,
};
let policy = GovernancePolicy::default();
let decision = policy.evaluate(request).expect("policy evaluate");
assert_eq!(decision.codec, quant_governor::CodecProfile::Polar);
// Verify the dispatch path lands on Polar.
let codec_id = match decision.codec {
quant_governor::CodecProfile::Polar => scr_runtime_compression::CodecId::Polar,
other => panic!("expected Polar, got {other:?}"),
};
let compressed = scr_runtime_compression::encode(codec_id, &embedding, 42)
.expect("polar encode failed");
// Polar is asymmetric — decode is identity pass-through.
let decoded = scr_runtime_compression::decode(codec_id, &compressed)
.expect("polar decode failed");
assert_eq!(compressed, decoded);
}
/// Verifies that the policy correctly routes low-latency text with
/// large size to QJL (constant-size sketch for memory-efficient ANN).
#[test]
fn encode_governed_routes_to_qjl_for_large_low_latency_text() {
use quant_governor::{ContentType, GovernanceRequest};
// Use a size above 50_000 to trigger QJL (and above the
// 256-byte small_content_threshold so bypass-to-Raw doesn't fire).
let embedding: Vec<f32> = (0..16384).map(|i| (i as f32 * 0.0001) - 0.5).collect();
let request = GovernanceRequest {
content_type: ContentType::Text,
size_bytes: (embedding.len() * std::mem::size_of::<f32>()) as u64,
accuracy_requirement: 0.95,
latency_tolerance_ms: 10,
admissibility: quant_governor::AdmissibilityClass::Standard,
};
let policy = GovernancePolicy::default();
let decision = policy.evaluate(request).expect("policy evaluate");
assert_eq!(decision.codec, quant_governor::CodecProfile::Qjl);
let codec_id = match decision.codec {
quant_governor::CodecProfile::Qjl => scr_runtime_compression::CodecId::Qjl,
other => panic!("expected Qjl, got {other:?}"),
};
let compressed = scr_runtime_compression::encode(codec_id, &embedding, 42)
.expect("qjl encode failed");
// QJL is dim-independent (~120 bytes for any dim).
assert!(
compressed.len() < 200,
"qjl sketch ({} bytes) should be ~120 bytes regardless of dim",
compressed.len()
);
}
#[test]
fn encode_governed_with_custom_policy() {
let embedding: Vec<f32> = (0..256).map(|i| (i as f32) * 0.01 - 0.5).collect();
let policy = GovernancePolicy::default();
let result = encode_governed(&embedding, &policy).expect("encode failed");
// Governance receipt ID should be set
assert!(result.governance_receipt_id.starts_with("gr-"));
// Degradation budget is a f64 in [0, 1]
assert!(result.degradation_budget >= 0.0);
assert!(result.degradation_budget <= 1.0);
}
/// Round-trip: encode an embedding through the policy-governed path
/// (which lands on Raw/Uncompressed for Structured + 0.99 accuracy), and
/// then assert the decoder round-trips it back exactly. This is the
/// parity test for the "no compression" branch of the pipeline.
///
/// For an actual lossy round-trip test, see the test above
/// (encode_governed_produces_compressed_bytes) which exercises the
/// Fib path.
#[test]
fn encode_governed_default_round_trip_through_uncompressed() {
use quant_governor::{ContentType, GovernanceRequest};
// The default policy for Structured content with 0.99 accuracy
// returns Raw (Uncompressed), so the encode is identity and the
// round-trip is exact.
let embedding: Vec<f32> = (0..128)
.map(|i| ((i as f32 * 0.13).sin()) - 0.5)
.collect();
// Manually call the encode path with a policy we know lands on Raw.
let policy = GovernancePolicy::default();
let request = GovernanceRequest {
content_type: ContentType::Structured,
size_bytes: (embedding.len() * std::mem::size_of::<f32>()) as u64,
accuracy_requirement: 0.99, // >= 0.99 → Raw
latency_tolerance_ms: 500,
admissibility: quant_governor::AdmissibilityClass::Standard,
};
let decision = policy.evaluate(request).expect("policy evaluate");
// Structured + 0.99 accuracy should land on Raw per the policy.
assert_eq!(decision.codec, quant_governor::CodecProfile::Raw);
let codec_id = scr_runtime_compression::CodecId::Uncompressed;
let compressed = scr_runtime_compression::encode(codec_id, &embedding, 42)
.expect("uncompressed encode failed");
let decoded_bytes =
scr_runtime_compression::decode(codec_id, &compressed).expect("decode failed");
let decoded: Vec<f32> = decoded_bytes
.chunks_exact(4)
.map(|c| f32::from_le_bytes([c[0], c[1], c[2], c[3]]))
.collect();
assert_eq!(decoded, embedding);
}
}