Skip to main content

mnem_extract/
trust.rs

1//! Trust-boundary gate for opt-in typed-relation inference.
2//!
3//! This module implements the adversarial trust model described in
4//! `research/gap-catalog/03-typed-relation-inference/solution.md`
5//! (Round 3). It is the mandatory admission check between a
6//! candidate inferred edge and any downstream consumer (PPR, multihop
7//! traversal, retrieve) that might amplify the edge's weight in a
8//! ranking signal.
9//!
10//! # Design
11//!
12//! The gate is intentionally tiny: a single `admit` function over a
13//! [`TrustBoundary`] policy and a [`Candidate`] descriptor. No policy
14//! defaults are magic: every floor must be constructed explicitly
15//! from `InferenceBudget`-derived or spec-pinned values (floor-c). The
16//! caller is expected to plumb the same floor through gauges so the
17//! runtime view matches the code.
18//!
19//! # Determinism
20//!
21//! `admit` is a pure function of its inputs. No clocks, no randomness,
22//! no global state - safe to call inside property tests and inside
23//! the deterministic ingest pipeline.
24//!
25//! # Rate-limit fingerprint
26//!
27//! For the per-author token-bucket rate limiter in
28//! [`AuthorFingerprint`], the author identifier is hashed with
29//! SHA-256 and truncated to the first 8 bytes. This is documented in
30//! solution.md R3 §4: *no PII, forensics-ready*. The truncation
31//! boundary is 8 bytes (64 bits) so that collisions are vanishingly
32//! unlikely for real-world author-population sizes but still too
33//! short to recover the original id.
34
35use serde::{Deserialize, Serialize};
36use sha2::{Digest, Sha256};
37
38/// Default PPR-amplification confidence floor - solution.md R3 §3.
39///
40/// Reference: Tong 2008 §4 PPR-skip-risk envelope. Floor-c tunable
41/// backed by gauge `mnem_ppr_amplification_floor` and proptest
42/// [`proptests::admit_rejects_below_confidence_floor`].
43pub const PPR_AMPLIFICATION_FLOOR: f32 = 0.75;
44
45/// Truncation length for the SHA-256 author fingerprint, in bytes.
46///
47/// 8 bytes = 64 bits. See module docs for rationale.
48pub const AUTHOR_FINGERPRINT_BYTES: usize = 8;
49
50/// Width of the rate-limit rolling window, in seconds.
51///
52/// Fixed at 60s per solution.md R3 §4 ("rolling 1-min window").
53pub const AUTHOR_RATE_LIMIT_WINDOW_SECS: u64 = 60;
54
55/// Opt-in policy for admitting an inferred typed relation into a
56/// downstream ranking signal.
57///
58/// All fields are explicit: there is no `Default` because every caller
59/// MUST set a floor that matches its own gauge and proptest. The
60/// library-provided constant [`PPR_AMPLIFICATION_FLOOR`] is the
61/// solution.md-pinned value when the caller is PPR.
62#[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize)]
63pub struct TrustBoundary {
64    /// Minimum confidence for admission. Candidates strictly below
65    /// this value are rejected.
66    pub confidence_floor: f32,
67
68    /// Whether the downstream caller has explicitly opted into
69    /// inferred relations. When `false`, every candidate is rejected
70    /// regardless of confidence. Mirrors the `opt_in` flag on the
71    /// `ProvenanceTag::InferredRelation` variant (solution.md R3 §2).
72    pub consumer_opt_in: bool,
73}
74
75impl TrustBoundary {
76    /// Construct a new trust boundary.
77    ///
78    /// # Errors
79    ///
80    /// Returns `None` when `confidence_floor` is NaN or outside
81    /// `[0.0, 1.0]`. Callers that want the spec floor can use
82    /// [`TrustBoundary::ppr_default`] instead.
83    #[must_use]
84    pub fn new(confidence_floor: f32, consumer_opt_in: bool) -> Option<Self> {
85        if !confidence_floor.is_finite() || !(0.0..=1.0).contains(&confidence_floor) {
86            return None;
87        }
88        Some(Self {
89            confidence_floor,
90            consumer_opt_in,
91        })
92    }
93
94    /// Construct the spec-pinned PPR trust boundary
95    /// ([`PPR_AMPLIFICATION_FLOOR`]) with the caller's opt-in flag.
96    #[must_use]
97    pub fn ppr_default(consumer_opt_in: bool) -> Self {
98        Self {
99            confidence_floor: PPR_AMPLIFICATION_FLOOR,
100            consumer_opt_in,
101        }
102    }
103
104    /// Decide whether `candidate` is admissible under this policy.
105    ///
106    /// The function is a total, side-effect-free predicate over
107    /// `(self, candidate)`. It rejects on any of:
108    ///
109    /// 1. `consumer_opt_in == false`.
110    /// 2. `candidate.confidence` is NaN / non-finite.
111    /// 3. `candidate.confidence < self.confidence_floor`.
112    /// 4. `candidate.opt_in == false`.
113    #[must_use]
114    pub fn admit(&self, candidate: &Candidate) -> bool {
115        if !self.consumer_opt_in {
116            return false;
117        }
118        if !candidate.opt_in {
119            return false;
120        }
121        if !candidate.confidence.is_finite() {
122            return false;
123        }
124        candidate.confidence >= self.confidence_floor
125    }
126}
127
128/// A candidate typed relation awaiting admission.
129///
130/// Contains only the fields the trust gate needs; the full
131/// `TypedRelation` payload is kept in the inference module so that
132/// this file stays free of clustering-shape types.
133#[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize)]
134pub struct Candidate {
135    /// Clustering-assigned confidence in `[0.0, 1.0]`.
136    pub confidence: f32,
137    /// Whether the producer marked this edge opt-in (solution.md R3
138    /// §2 `ProvenanceTag::InferredRelation { opt_in: true }`).
139    pub opt_in: bool,
140}
141
142/// Truncated SHA-256 fingerprint of an author identifier.
143///
144/// Produced by [`AuthorFingerprint::from_author_id`]. The raw bytes
145/// are exposed as a hex string via [`AuthorFingerprint::as_hex`] for
146/// the metric `mnem_infer_author_ratelimit_hits_total{author_fingerprint}`.
147#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
148pub struct AuthorFingerprint([u8; AUTHOR_FINGERPRINT_BYTES]);
149
150impl AuthorFingerprint {
151    /// Hash `author_id` with SHA-256 and truncate to
152    /// [`AUTHOR_FINGERPRINT_BYTES`] bytes.
153    #[must_use]
154    pub fn from_author_id(author_id: &str) -> Self {
155        let mut hasher = Sha256::new();
156        hasher.update(author_id.as_bytes());
157        let digest = hasher.finalize();
158        let mut out = [0u8; AUTHOR_FINGERPRINT_BYTES];
159        out.copy_from_slice(&digest[..AUTHOR_FINGERPRINT_BYTES]);
160        Self(out)
161    }
162
163    /// Raw fingerprint bytes.
164    #[must_use]
165    pub fn as_bytes(&self) -> &[u8; AUTHOR_FINGERPRINT_BYTES] {
166        &self.0
167    }
168
169    /// Lowercase hex encoding, suitable for metric labels.
170    #[must_use]
171    pub fn as_hex(&self) -> String {
172        let mut s = String::with_capacity(AUTHOR_FINGERPRINT_BYTES * 2);
173        for b in &self.0 {
174            use std::fmt::Write as _;
175            let _ = write!(s, "{b:02x}");
176        }
177        s
178    }
179}
180
181/// Rolling per-author token-bucket rate limiter over a 1-minute
182/// window (solution.md R3 §4).
183///
184/// The bucket tracks `(fingerprint, window_start_secs, count)`
185/// tuples. `window_start_secs` is a monotonic clock value supplied by
186/// the caller - the limiter has no global clock access, keeping it
187/// deterministic under test. When the caller advances the window
188/// ([`AuthorRateLimiter::tick`]) beyond
189/// [`AUTHOR_RATE_LIMIT_WINDOW_SECS`], counts reset to zero.
190#[derive(Debug, Clone)]
191pub struct AuthorRateLimiter {
192    per_commit_cap: u32,
193    window_start_secs: u64,
194    buckets: std::collections::HashMap<AuthorFingerprint, u32>,
195}
196
197impl AuthorRateLimiter {
198    /// Create a new limiter with `per_commit_cap` phrases per
199    /// `(author, 1-minute window)` pair.
200    ///
201    /// `per_commit_cap` comes from
202    /// `InferenceBudget::author_rate_limit_per_commit` (corpus-
203    /// derived, not magic).
204    #[must_use]
205    pub fn new(per_commit_cap: u32, now_secs: u64) -> Self {
206        Self {
207            per_commit_cap,
208            window_start_secs: now_secs,
209            buckets: std::collections::HashMap::new(),
210        }
211    }
212
213    /// Advance the monotonic window clock. When `now_secs` is more
214    /// than [`AUTHOR_RATE_LIMIT_WINDOW_SECS`] past the current window
215    /// start, all buckets reset and the window rolls forward.
216    pub fn tick(&mut self, now_secs: u64) {
217        if now_secs >= self.window_start_secs
218            && now_secs - self.window_start_secs >= AUTHOR_RATE_LIMIT_WINDOW_SECS
219        {
220            self.buckets.clear();
221            self.window_start_secs = now_secs;
222        }
223    }
224
225    /// Try to admit one phrase for `author`.
226    ///
227    /// Returns `true` when the bucket is under the cap (and increments
228    /// the counter), `false` when the cap is reached. Callers should
229    /// drop the phrase from Leiden input on `false` and emit the
230    /// `mnem_infer_author_ratelimit_hits_total` counter.
231    pub fn admit(&mut self, author: &AuthorFingerprint) -> bool {
232        let counter = self.buckets.entry(*author).or_insert(0);
233        if *counter >= self.per_commit_cap {
234            return false;
235        }
236        *counter += 1;
237        true
238    }
239
240    /// Read-only count for `author` in the current window.
241    #[must_use]
242    pub fn count(&self, author: &AuthorFingerprint) -> u32 {
243        self.buckets.get(author).copied().unwrap_or(0)
244    }
245
246    /// Current window start (monotonic seconds, caller-supplied).
247    #[must_use]
248    pub fn window_start_secs(&self) -> u64 {
249        self.window_start_secs
250    }
251}
252
253#[cfg(test)]
254mod tests {
255    use super::*;
256
257    #[test]
258    fn admit_rejects_when_consumer_not_opted_in() {
259        let tb = TrustBoundary::new(0.5, false).unwrap();
260        let c = Candidate {
261            confidence: 0.99,
262            opt_in: true,
263        };
264        assert!(!tb.admit(&c));
265    }
266
267    #[test]
268    fn admit_rejects_when_producer_did_not_opt_in() {
269        let tb = TrustBoundary::new(0.5, true).unwrap();
270        let c = Candidate {
271            confidence: 0.99,
272            opt_in: false,
273        };
274        assert!(!tb.admit(&c));
275    }
276
277    #[test]
278    fn admit_rejects_below_confidence_floor() {
279        let tb = TrustBoundary::new(0.75, true).unwrap();
280        let c = Candidate {
281            confidence: 0.7499,
282            opt_in: true,
283        };
284        assert!(!tb.admit(&c));
285    }
286
287    #[test]
288    fn admit_accepts_at_and_above_floor() {
289        let tb = TrustBoundary::new(0.75, true).unwrap();
290        assert!(tb.admit(&Candidate {
291            confidence: 0.75,
292            opt_in: true
293        }));
294        assert!(tb.admit(&Candidate {
295            confidence: 0.99,
296            opt_in: true
297        }));
298    }
299
300    #[test]
301    fn admit_rejects_nan_confidence() {
302        let tb = TrustBoundary::new(0.5, true).unwrap();
303        assert!(!tb.admit(&Candidate {
304            confidence: f32::NAN,
305            opt_in: true
306        }));
307    }
308
309    #[test]
310    fn new_rejects_out_of_range_floor() {
311        assert!(TrustBoundary::new(-0.1, true).is_none());
312        assert!(TrustBoundary::new(1.1, true).is_none());
313        assert!(TrustBoundary::new(f32::NAN, true).is_none());
314    }
315
316    #[test]
317    fn ppr_default_uses_spec_pinned_floor() {
318        let tb = TrustBoundary::ppr_default(true);
319        assert!((tb.confidence_floor - PPR_AMPLIFICATION_FLOOR).abs() < f32::EPSILON);
320    }
321
322    #[test]
323    fn fingerprint_is_deterministic_and_truncated() {
324        let a = AuthorFingerprint::from_author_id("alice@example.com");
325        let b = AuthorFingerprint::from_author_id("alice@example.com");
326        assert_eq!(a, b);
327        assert_eq!(a.as_bytes().len(), AUTHOR_FINGERPRINT_BYTES);
328        assert_eq!(a.as_hex().len(), AUTHOR_FINGERPRINT_BYTES * 2);
329    }
330
331    #[test]
332    fn fingerprint_distinguishes_distinct_authors() {
333        let a = AuthorFingerprint::from_author_id("alice");
334        let b = AuthorFingerprint::from_author_id("bob");
335        assert_ne!(a, b);
336    }
337
338    #[test]
339    fn rate_limiter_admits_up_to_cap_then_rejects() {
340        let author = AuthorFingerprint::from_author_id("author-x");
341        let mut rl = AuthorRateLimiter::new(3, 0);
342        assert!(rl.admit(&author));
343        assert!(rl.admit(&author));
344        assert!(rl.admit(&author));
345        assert!(!rl.admit(&author));
346        assert_eq!(rl.count(&author), 3);
347    }
348
349    #[test]
350    fn rate_limiter_resets_after_window_elapses() {
351        let author = AuthorFingerprint::from_author_id("author-x");
352        let mut rl = AuthorRateLimiter::new(2, 0);
353        assert!(rl.admit(&author));
354        assert!(rl.admit(&author));
355        assert!(!rl.admit(&author));
356        rl.tick(AUTHOR_RATE_LIMIT_WINDOW_SECS);
357        assert!(rl.admit(&author));
358        assert_eq!(rl.window_start_secs(), AUTHOR_RATE_LIMIT_WINDOW_SECS);
359    }
360
361    #[test]
362    fn rate_limiter_does_not_reset_within_window() {
363        let author = AuthorFingerprint::from_author_id("author-x");
364        let mut rl = AuthorRateLimiter::new(2, 0);
365        assert!(rl.admit(&author));
366        assert!(rl.admit(&author));
367        rl.tick(AUTHOR_RATE_LIMIT_WINDOW_SECS - 1);
368        assert!(!rl.admit(&author));
369    }
370}
371
372#[cfg(test)]
373mod proptests {
374    use super::*;
375    use proptest::prelude::*;
376
377    proptest! {
378        /// Floor-c proptest: no candidate strictly below the floor is
379        /// ever admitted. Backs gauge `mnem_ppr_amplification_floor`.
380        #[test]
381        fn admit_rejects_below_confidence_floor(
382            floor in 0.0f32..=1.0,
383            delta in 0.0001f32..0.5,
384            opt_in in any::<bool>(),
385        ) {
386            let tb = TrustBoundary::new(floor, true).unwrap();
387            let conf = (floor - delta).max(0.0);
388            if conf < floor {
389                let c = Candidate { confidence: conf, opt_in };
390                prop_assert!(!tb.admit(&c));
391            }
392        }
393
394        /// Floor-c proptest: every candidate at or above floor with
395        /// both opt-ins set is admitted.
396        #[test]
397        fn admit_accepts_above_floor_with_opt_in(
398            floor in 0.0f32..=1.0,
399            above in 0.0f32..=0.5,
400        ) {
401            let tb = TrustBoundary::new(floor, true).unwrap();
402            let conf = (floor + above).min(1.0);
403            let c = Candidate { confidence: conf, opt_in: true };
404            prop_assert!(tb.admit(&c));
405        }
406
407        /// SHA-256 fingerprint is collision-stable across repeated
408        /// calls (determinism proptest).
409        #[test]
410        fn fingerprint_stable_across_calls(s in "[a-zA-Z0-9@._-]{1,64}") {
411            let a = AuthorFingerprint::from_author_id(&s);
412            let b = AuthorFingerprint::from_author_id(&s);
413            prop_assert_eq!(a, b);
414        }
415    }
416}