mnem_extract/trust.rs
1//! Trust-boundary gate for opt-in typed-relation inference.
2//!
3//! This module implements the adversarial trust model described in
4//! `research/gap-catalog/03-typed-relation-inference/solution.md`
5//! (Round 3). It is the mandatory admission check between a
6//! candidate inferred edge and any downstream consumer (PPR, multihop
7//! traversal, retrieve) that might amplify the edge's weight in a
8//! ranking signal.
9//!
10//! # Design
11//!
12//! The gate is intentionally tiny: a single `admit` function over a
13//! [`TrustBoundary`] policy and a [`Candidate`] descriptor. No policy
14//! defaults are magic: every floor must be constructed explicitly
15//! from `InferenceBudget`-derived or spec-pinned values (floor-c). The
16//! caller is expected to plumb the same floor through gauges so the
17//! runtime view matches the code.
18//!
19//! # Determinism
20//!
21//! `admit` is a pure function of its inputs. No clocks, no randomness,
22//! no global state - safe to call inside property tests and inside
23//! the deterministic ingest pipeline.
24//!
25//! # Rate-limit fingerprint
26//!
27//! For the per-author token-bucket rate limiter in
28//! [`AuthorFingerprint`], the author identifier is hashed with
29//! SHA-256 and truncated to the first 8 bytes. This is documented in
30//! solution.md R3 §4: *no PII, forensics-ready*. The truncation
31//! boundary is 8 bytes (64 bits) so that collisions are vanishingly
32//! unlikely for real-world author-population sizes but still too
33//! short to recover the original id.
34
35use serde::{Deserialize, Serialize};
36use sha2::{Digest, Sha256};
37
38/// Default PPR-amplification confidence floor - solution.md R3 §3.
39///
40/// Reference: Tong 2008 §4 PPR-skip-risk envelope. Floor-c tunable
41/// backed by gauge `mnem_ppr_amplification_floor` and proptest
42/// [`proptests::admit_rejects_below_confidence_floor`].
43pub const PPR_AMPLIFICATION_FLOOR: f32 = 0.75;
44
45/// Truncation length for the SHA-256 author fingerprint, in bytes.
46///
47/// 8 bytes = 64 bits. See module docs for rationale.
48pub const AUTHOR_FINGERPRINT_BYTES: usize = 8;
49
50/// Width of the rate-limit rolling window, in seconds.
51///
52/// Fixed at 60s per solution.md R3 §4 ("rolling 1-min window").
53pub const AUTHOR_RATE_LIMIT_WINDOW_SECS: u64 = 60;
54
55/// Opt-in policy for admitting an inferred typed relation into a
56/// downstream ranking signal.
57///
58/// All fields are explicit: there is no `Default` because every caller
59/// MUST set a floor that matches its own gauge and proptest. The
60/// library-provided constant [`PPR_AMPLIFICATION_FLOOR`] is the
61/// solution.md-pinned value when the caller is PPR.
62#[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize)]
63pub struct TrustBoundary {
64 /// Minimum confidence for admission. Candidates strictly below
65 /// this value are rejected.
66 pub confidence_floor: f32,
67
68 /// Whether the downstream caller has explicitly opted into
69 /// inferred relations. When `false`, every candidate is rejected
70 /// regardless of confidence. Mirrors the `opt_in` flag on the
71 /// `ProvenanceTag::InferredRelation` variant (solution.md R3 §2).
72 pub consumer_opt_in: bool,
73}
74
75impl TrustBoundary {
76 /// Construct a new trust boundary.
77 ///
78 /// # Errors
79 ///
80 /// Returns `None` when `confidence_floor` is NaN or outside
81 /// `[0.0, 1.0]`. Callers that want the spec floor can use
82 /// [`TrustBoundary::ppr_default`] instead.
83 #[must_use]
84 pub fn new(confidence_floor: f32, consumer_opt_in: bool) -> Option<Self> {
85 if !confidence_floor.is_finite() || !(0.0..=1.0).contains(&confidence_floor) {
86 return None;
87 }
88 Some(Self {
89 confidence_floor,
90 consumer_opt_in,
91 })
92 }
93
94 /// Construct the spec-pinned PPR trust boundary
95 /// ([`PPR_AMPLIFICATION_FLOOR`]) with the caller's opt-in flag.
96 #[must_use]
97 pub fn ppr_default(consumer_opt_in: bool) -> Self {
98 Self {
99 confidence_floor: PPR_AMPLIFICATION_FLOOR,
100 consumer_opt_in,
101 }
102 }
103
104 /// Decide whether `candidate` is admissible under this policy.
105 ///
106 /// The function is a total, side-effect-free predicate over
107 /// `(self, candidate)`. It rejects on any of:
108 ///
109 /// 1. `consumer_opt_in == false`.
110 /// 2. `candidate.confidence` is NaN / non-finite.
111 /// 3. `candidate.confidence < self.confidence_floor`.
112 /// 4. `candidate.opt_in == false`.
113 #[must_use]
114 pub fn admit(&self, candidate: &Candidate) -> bool {
115 if !self.consumer_opt_in {
116 return false;
117 }
118 if !candidate.opt_in {
119 return false;
120 }
121 if !candidate.confidence.is_finite() {
122 return false;
123 }
124 candidate.confidence >= self.confidence_floor
125 }
126}
127
128/// A candidate typed relation awaiting admission.
129///
130/// Contains only the fields the trust gate needs; the full
131/// `TypedRelation` payload is kept in the inference module so that
132/// this file stays free of clustering-shape types.
133#[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize)]
134pub struct Candidate {
135 /// Clustering-assigned confidence in `[0.0, 1.0]`.
136 pub confidence: f32,
137 /// Whether the producer marked this edge opt-in (solution.md R3
138 /// §2 `ProvenanceTag::InferredRelation { opt_in: true }`).
139 pub opt_in: bool,
140}
141
142/// Truncated SHA-256 fingerprint of an author identifier.
143///
144/// Produced by [`AuthorFingerprint::from_author_id`]. The raw bytes
145/// are exposed as a hex string via [`AuthorFingerprint::as_hex`] for
146/// the metric `mnem_infer_author_ratelimit_hits_total{author_fingerprint}`.
147#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
148pub struct AuthorFingerprint([u8; AUTHOR_FINGERPRINT_BYTES]);
149
150impl AuthorFingerprint {
151 /// Hash `author_id` with SHA-256 and truncate to
152 /// [`AUTHOR_FINGERPRINT_BYTES`] bytes.
153 #[must_use]
154 pub fn from_author_id(author_id: &str) -> Self {
155 let mut hasher = Sha256::new();
156 hasher.update(author_id.as_bytes());
157 let digest = hasher.finalize();
158 let mut out = [0u8; AUTHOR_FINGERPRINT_BYTES];
159 out.copy_from_slice(&digest[..AUTHOR_FINGERPRINT_BYTES]);
160 Self(out)
161 }
162
163 /// Raw fingerprint bytes.
164 #[must_use]
165 pub fn as_bytes(&self) -> &[u8; AUTHOR_FINGERPRINT_BYTES] {
166 &self.0
167 }
168
169 /// Lowercase hex encoding, suitable for metric labels.
170 #[must_use]
171 pub fn as_hex(&self) -> String {
172 let mut s = String::with_capacity(AUTHOR_FINGERPRINT_BYTES * 2);
173 for b in &self.0 {
174 use std::fmt::Write as _;
175 let _ = write!(s, "{b:02x}");
176 }
177 s
178 }
179}
180
181/// Rolling per-author token-bucket rate limiter over a 1-minute
182/// window (solution.md R3 §4).
183///
184/// The bucket tracks `(fingerprint, window_start_secs, count)`
185/// tuples. `window_start_secs` is a monotonic clock value supplied by
186/// the caller - the limiter has no global clock access, keeping it
187/// deterministic under test. When the caller advances the window
188/// ([`AuthorRateLimiter::tick`]) beyond
189/// [`AUTHOR_RATE_LIMIT_WINDOW_SECS`], counts reset to zero.
190#[derive(Debug, Clone)]
191pub struct AuthorRateLimiter {
192 per_commit_cap: u32,
193 window_start_secs: u64,
194 buckets: std::collections::HashMap<AuthorFingerprint, u32>,
195}
196
197impl AuthorRateLimiter {
198 /// Create a new limiter with `per_commit_cap` phrases per
199 /// `(author, 1-minute window)` pair.
200 ///
201 /// `per_commit_cap` comes from
202 /// `InferenceBudget::author_rate_limit_per_commit` (corpus-
203 /// derived, not magic).
204 #[must_use]
205 pub fn new(per_commit_cap: u32, now_secs: u64) -> Self {
206 Self {
207 per_commit_cap,
208 window_start_secs: now_secs,
209 buckets: std::collections::HashMap::new(),
210 }
211 }
212
213 /// Advance the monotonic window clock. When `now_secs` is more
214 /// than [`AUTHOR_RATE_LIMIT_WINDOW_SECS`] past the current window
215 /// start, all buckets reset and the window rolls forward.
216 pub fn tick(&mut self, now_secs: u64) {
217 if now_secs >= self.window_start_secs
218 && now_secs - self.window_start_secs >= AUTHOR_RATE_LIMIT_WINDOW_SECS
219 {
220 self.buckets.clear();
221 self.window_start_secs = now_secs;
222 }
223 }
224
225 /// Try to admit one phrase for `author`.
226 ///
227 /// Returns `true` when the bucket is under the cap (and increments
228 /// the counter), `false` when the cap is reached. Callers should
229 /// drop the phrase from Leiden input on `false` and emit the
230 /// `mnem_infer_author_ratelimit_hits_total` counter.
231 pub fn admit(&mut self, author: &AuthorFingerprint) -> bool {
232 let counter = self.buckets.entry(*author).or_insert(0);
233 if *counter >= self.per_commit_cap {
234 return false;
235 }
236 *counter += 1;
237 true
238 }
239
240 /// Read-only count for `author` in the current window.
241 #[must_use]
242 pub fn count(&self, author: &AuthorFingerprint) -> u32 {
243 self.buckets.get(author).copied().unwrap_or(0)
244 }
245
246 /// Current window start (monotonic seconds, caller-supplied).
247 #[must_use]
248 pub fn window_start_secs(&self) -> u64 {
249 self.window_start_secs
250 }
251}
252
253#[cfg(test)]
254mod tests {
255 use super::*;
256
257 #[test]
258 fn admit_rejects_when_consumer_not_opted_in() {
259 let tb = TrustBoundary::new(0.5, false).unwrap();
260 let c = Candidate {
261 confidence: 0.99,
262 opt_in: true,
263 };
264 assert!(!tb.admit(&c));
265 }
266
267 #[test]
268 fn admit_rejects_when_producer_did_not_opt_in() {
269 let tb = TrustBoundary::new(0.5, true).unwrap();
270 let c = Candidate {
271 confidence: 0.99,
272 opt_in: false,
273 };
274 assert!(!tb.admit(&c));
275 }
276
277 #[test]
278 fn admit_rejects_below_confidence_floor() {
279 let tb = TrustBoundary::new(0.75, true).unwrap();
280 let c = Candidate {
281 confidence: 0.7499,
282 opt_in: true,
283 };
284 assert!(!tb.admit(&c));
285 }
286
287 #[test]
288 fn admit_accepts_at_and_above_floor() {
289 let tb = TrustBoundary::new(0.75, true).unwrap();
290 assert!(tb.admit(&Candidate {
291 confidence: 0.75,
292 opt_in: true
293 }));
294 assert!(tb.admit(&Candidate {
295 confidence: 0.99,
296 opt_in: true
297 }));
298 }
299
300 #[test]
301 fn admit_rejects_nan_confidence() {
302 let tb = TrustBoundary::new(0.5, true).unwrap();
303 assert!(!tb.admit(&Candidate {
304 confidence: f32::NAN,
305 opt_in: true
306 }));
307 }
308
309 #[test]
310 fn new_rejects_out_of_range_floor() {
311 assert!(TrustBoundary::new(-0.1, true).is_none());
312 assert!(TrustBoundary::new(1.1, true).is_none());
313 assert!(TrustBoundary::new(f32::NAN, true).is_none());
314 }
315
316 #[test]
317 fn ppr_default_uses_spec_pinned_floor() {
318 let tb = TrustBoundary::ppr_default(true);
319 assert!((tb.confidence_floor - PPR_AMPLIFICATION_FLOOR).abs() < f32::EPSILON);
320 }
321
322 #[test]
323 fn fingerprint_is_deterministic_and_truncated() {
324 let a = AuthorFingerprint::from_author_id("alice@example.com");
325 let b = AuthorFingerprint::from_author_id("alice@example.com");
326 assert_eq!(a, b);
327 assert_eq!(a.as_bytes().len(), AUTHOR_FINGERPRINT_BYTES);
328 assert_eq!(a.as_hex().len(), AUTHOR_FINGERPRINT_BYTES * 2);
329 }
330
331 #[test]
332 fn fingerprint_distinguishes_distinct_authors() {
333 let a = AuthorFingerprint::from_author_id("alice");
334 let b = AuthorFingerprint::from_author_id("bob");
335 assert_ne!(a, b);
336 }
337
338 #[test]
339 fn rate_limiter_admits_up_to_cap_then_rejects() {
340 let author = AuthorFingerprint::from_author_id("author-x");
341 let mut rl = AuthorRateLimiter::new(3, 0);
342 assert!(rl.admit(&author));
343 assert!(rl.admit(&author));
344 assert!(rl.admit(&author));
345 assert!(!rl.admit(&author));
346 assert_eq!(rl.count(&author), 3);
347 }
348
349 #[test]
350 fn rate_limiter_resets_after_window_elapses() {
351 let author = AuthorFingerprint::from_author_id("author-x");
352 let mut rl = AuthorRateLimiter::new(2, 0);
353 assert!(rl.admit(&author));
354 assert!(rl.admit(&author));
355 assert!(!rl.admit(&author));
356 rl.tick(AUTHOR_RATE_LIMIT_WINDOW_SECS);
357 assert!(rl.admit(&author));
358 assert_eq!(rl.window_start_secs(), AUTHOR_RATE_LIMIT_WINDOW_SECS);
359 }
360
361 #[test]
362 fn rate_limiter_does_not_reset_within_window() {
363 let author = AuthorFingerprint::from_author_id("author-x");
364 let mut rl = AuthorRateLimiter::new(2, 0);
365 assert!(rl.admit(&author));
366 assert!(rl.admit(&author));
367 rl.tick(AUTHOR_RATE_LIMIT_WINDOW_SECS - 1);
368 assert!(!rl.admit(&author));
369 }
370}
371
372#[cfg(test)]
373mod proptests {
374 use super::*;
375 use proptest::prelude::*;
376
377 proptest! {
378 /// Floor-c proptest: no candidate strictly below the floor is
379 /// ever admitted. Backs gauge `mnem_ppr_amplification_floor`.
380 #[test]
381 fn admit_rejects_below_confidence_floor(
382 floor in 0.0f32..=1.0,
383 delta in 0.0001f32..0.5,
384 opt_in in any::<bool>(),
385 ) {
386 let tb = TrustBoundary::new(floor, true).unwrap();
387 let conf = (floor - delta).max(0.0);
388 if conf < floor {
389 let c = Candidate { confidence: conf, opt_in };
390 prop_assert!(!tb.admit(&c));
391 }
392 }
393
394 /// Floor-c proptest: every candidate at or above floor with
395 /// both opt-ins set is admitted.
396 #[test]
397 fn admit_accepts_above_floor_with_opt_in(
398 floor in 0.0f32..=1.0,
399 above in 0.0f32..=0.5,
400 ) {
401 let tb = TrustBoundary::new(floor, true).unwrap();
402 let conf = (floor + above).min(1.0);
403 let c = Candidate { confidence: conf, opt_in: true };
404 prop_assert!(tb.admit(&c));
405 }
406
407 /// SHA-256 fingerprint is collision-stable across repeated
408 /// calls (determinism proptest).
409 #[test]
410 fn fingerprint_stable_across_calls(s in "[a-zA-Z0-9@._-]{1,64}") {
411 let a = AuthorFingerprint::from_author_id(&s);
412 let b = AuthorFingerprint::from_author_id(&s);
413 prop_assert_eq!(a, b);
414 }
415 }
416}