mnem_extract/inference.rs
1//! Optional typed-relation inference for mnem-extract.
2//!
3//! Gated behind the `typed-relations` Cargo feature. Default OFF.
4//!
5//! # What this module defines
6//!
7//! - [`InferenceBudget`] - shared primitive, wall-clock + volume caps
8//! for the inference stage of one commit.
9//! - [`TypedRelation`] - edge payload emitted by
10//! [`crate::traits::Extractor::infer_typed_relations`].
11//! - [`InferenceMethod`] - provenance tag attached to every inferred
12//! edge (solution.md R3 §2).
13//!
14//! # What this module does NOT define
15//!
16//! - Clustering (Leiden). Lives in a downstream crate.
17//! - Canary suite. See `gap-catalog/shared/canary-suite.md`.
18//! - CLI wiring. That's `mnem-ingest` + `mnem-cli`.
19//!
20//! # Floor-c tunable
21//!
22//! [`InferenceBudget::MAX_INFERENCE_MS_PER_COMMIT`] = 250ms. Half of
23//! `max_cooccurrence_ms = 500` (commit-envelope reserve, gap 10).
24//! Exposed as gauge `mnem_inference_budget_effective_ms` via
25//! [`InferenceBudget::effective_ms_gauge`] and enforced by proptest
26//! [`proptests::budget_respected`].
27
28use serde::{Deserialize, Serialize};
29
30use crate::traits::ExtractionSource;
31
32/// Gauge name for the runtime-effective inference budget.
33///
34/// Emitted by [`InferenceBudget::effective_ms_gauge`]. The three-
35/// condition floor-c apparatus (named constant + gauge + proptest)
36/// lives in this module's tests.
37pub const BUDGET_GAUGE_NAME: &str = "mnem_inference_budget_effective_ms";
38
39/// Shared primitive: wall-clock and volume caps for the inference
40/// stage of a single commit.
41///
42/// All fields are floor-c tunables (solution.md R6 §Constant
43/// classification table) or corpus-derived. No magic numbers.
44#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
45pub struct InferenceBudget {
46 /// Rolling wall-clock budget for the extract-and-embed phase of
47 /// inference, derived from `3 * rolling.p50_ingest_phrase_embed_ms`
48 /// (fallback 500ms). Class `a` (rolling telemetry).
49 pub extract_latency_budget_ms: u32,
50
51 /// Hard wall-clock ceiling per commit. Class `c` (reference
52 /// standard): 250ms = 50% of `max_cooccurrence_ms = 500`.
53 pub max_inference_ms_per_commit: u32,
54
55 /// Hard ceiling on bridging-phrase embeddings per commit. Class
56 /// `a` (corpus-derived): `min(50_000, sqrt(N_phrases) * 100)`.
57 pub max_phrases_embedded: u32,
58
59 /// Max inferred relation types emitted per commit. Class `a`
60 /// (corpus-derived): `ceil(log2(corpus_size))`.
61 pub max_types: u32,
62
63 /// Per-author bridging-phrase cap per commit. Class `a`
64 /// (corpus-derived): `max(200, 0.01 * mean_phrases_per_author)`.
65 pub author_rate_limit_per_commit: u32,
66}
67
68impl InferenceBudget {
69 /// Floor-c reference standard for the per-commit hard wall.
70 ///
71 /// 250ms. See `solution.md` R6 §Floor-c apparatus.
72 pub const MAX_INFERENCE_MS_PER_COMMIT: u32 = 250;
73
74 /// Fallback for `extract_latency_budget_ms` when rolling p50
75 /// telemetry is unavailable. See
76 /// `shared/inference-budget.md` §API sketch.
77 pub const FALLBACK_EXTRACT_LATENCY_MS: u32 = 500;
78
79 /// Conservative defaults for CI / proptest / initial runs.
80 ///
81 /// Real deployments derive via a stats-aware constructor in the
82 /// ingest crate; this keeps `mnem-extract` free of clock or
83 /// telemetry dependencies.
84 #[must_use]
85 pub const fn conservative() -> Self {
86 Self {
87 extract_latency_budget_ms: Self::MAX_INFERENCE_MS_PER_COMMIT,
88 max_inference_ms_per_commit: Self::MAX_INFERENCE_MS_PER_COMMIT,
89 max_phrases_embedded: 10_000,
90 max_types: 8,
91 author_rate_limit_per_commit: 200,
92 }
93 }
94
95 /// Effective runtime budget = `min(extract_latency_budget_ms,
96 /// max_inference_ms_per_commit)`.
97 ///
98 /// The hard wall is always the ceiling, even when rolling
99 /// telemetry computes a higher extract budget.
100 #[must_use]
101 pub const fn effective_ms(&self) -> u32 {
102 let extract = self.extract_latency_budget_ms;
103 let hard = self.max_inference_ms_per_commit;
104 if extract < hard { extract } else { hard }
105 }
106
107 /// Sample for gauge `mnem_inference_budget_effective_ms`.
108 ///
109 /// Caller is responsible for emission; this module does not link
110 /// a metrics backend. Returning `(name, value)` lets the caller
111 /// use either `metrics::gauge!` or a custom registry.
112 #[must_use]
113 pub fn effective_ms_gauge(&self) -> (&'static str, f64) {
114 (BUDGET_GAUGE_NAME, f64::from(self.effective_ms()))
115 }
116
117 /// Validate that the budget is internally consistent.
118 ///
119 /// Returns `Err` with a static reason when:
120 ///
121 /// 1. `max_inference_ms_per_commit` is zero.
122 /// 2. `extract_latency_budget_ms` is zero.
123 /// 3. `max_phrases_embedded` is zero.
124 ///
125 /// Zero-valued caps are always a programming error: they would
126 /// make the entire inference pass a no-op and silently hide bugs.
127 pub fn validate(&self) -> Result<(), &'static str> {
128 if self.max_inference_ms_per_commit == 0 {
129 return Err("max_inference_ms_per_commit must be > 0");
130 }
131 if self.extract_latency_budget_ms == 0 {
132 return Err("extract_latency_budget_ms must be > 0");
133 }
134 if self.max_phrases_embedded == 0 {
135 return Err("max_phrases_embedded must be > 0");
136 }
137 Ok(())
138 }
139}
140
141impl Default for InferenceBudget {
142 fn default() -> Self {
143 Self::conservative()
144 }
145}
146
147/// Provenance tag describing *how* a typed relation was inferred.
148///
149/// Serialised into `TypedRelation::source_label` as
150/// `"inferred:<method>"` per solution.md R3: every edge carries its
151/// inference method so rollback and audit can filter by origin.
152#[non_exhaustive]
153#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
154#[serde(rename_all = "snake_case")]
155pub enum InferenceMethod {
156 /// KeyBERT-style pattern-embedding clustering (Leiden).
157 PatternEmbedding,
158 /// Co-occurrence PMI promoted to a named type via clustering.
159 CooccurrencePmi,
160 /// Caller-supplied custom method. String is forensics-tagged in
161 /// the provenance label verbatim; keep it short and snake_case.
162 Custom(String),
163}
164
165impl InferenceMethod {
166 /// Render as the `inferred:<method>` tag used in provenance
167 /// labels and the `mnem commit` audit stream.
168 #[must_use]
169 pub fn provenance_label(&self) -> String {
170 match self {
171 Self::PatternEmbedding => "inferred:pattern_embedding".to_string(),
172 Self::CooccurrencePmi => "inferred:cooccurrence_pmi".to_string(),
173 Self::Custom(s) => format!("inferred:{s}"),
174 }
175 }
176}
177
178/// An inferred typed edge between two previously-extracted entities.
179///
180/// Distinct from [`crate::traits::Relation`]:
181///
182/// - [`Relation`](crate::traits::Relation) is a raw `(src, dst,
183/// weight)` triple with no named predicate.
184/// - `TypedRelation` has a clustering-assigned predicate and a
185/// confidence in `[0.0, 1.0]` so the trust gate can filter it.
186#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
187pub struct TypedRelation {
188 /// Subject entity mention.
189 pub src: String,
190 /// Object entity mention.
191 pub dst: String,
192 /// Clustering-assigned predicate label (e.g. `"causes"`).
193 pub predicate: String,
194 /// Confidence in `[0.0, 1.0]`. Fed to
195 /// [`crate::trust::TrustBoundary::admit`] downstream.
196 pub confidence: f32,
197 /// Provenance in the full taxonomy ([`ExtractionSource`]).
198 pub source: ExtractionSource,
199 /// Short human-readable provenance label, always of the shape
200 /// `"inferred:<method>"`. Derived from [`InferenceMethod`] at
201 /// emission time to keep the struct `serde`-flat.
202 pub source_label: String,
203}
204
205impl TypedRelation {
206 /// Build a new typed relation with the provenance label
207 /// auto-derived from `method`.
208 #[must_use]
209 pub fn new(
210 src: impl Into<String>,
211 dst: impl Into<String>,
212 predicate: impl Into<String>,
213 confidence: f32,
214 method: &InferenceMethod,
215 ) -> Self {
216 Self {
217 src: src.into(),
218 dst: dst.into(),
219 predicate: predicate.into(),
220 confidence,
221 source: ExtractionSource::Statistical,
222 source_label: method.provenance_label(),
223 }
224 }
225}
226
227#[cfg(test)]
228mod tests {
229 use super::*;
230
231 #[test]
232 fn conservative_budget_passes_validation() {
233 let b = InferenceBudget::conservative();
234 assert!(b.validate().is_ok());
235 }
236
237 #[test]
238 fn effective_ms_is_minimum_of_extract_and_hard_wall() {
239 let mut b = InferenceBudget::conservative();
240 b.extract_latency_budget_ms = 100;
241 assert_eq!(b.effective_ms(), 100);
242 b.extract_latency_budget_ms = 1_000;
243 assert_eq!(
244 b.effective_ms(),
245 InferenceBudget::MAX_INFERENCE_MS_PER_COMMIT
246 );
247 }
248
249 #[test]
250 fn hard_wall_matches_spec_pinned_value() {
251 // solution.md R6 §Floor-c apparatus: 250ms.
252 assert_eq!(InferenceBudget::MAX_INFERENCE_MS_PER_COMMIT, 250);
253 }
254
255 #[test]
256 fn gauge_emits_stable_name_and_effective_value() {
257 let b = InferenceBudget::conservative();
258 let (name, val) = b.effective_ms_gauge();
259 assert_eq!(name, "mnem_inference_budget_effective_ms");
260 assert!((val - f64::from(b.effective_ms())).abs() < f64::EPSILON);
261 }
262
263 #[test]
264 fn validate_rejects_zero_caps() {
265 let mut b = InferenceBudget::conservative();
266 b.max_inference_ms_per_commit = 0;
267 assert!(b.validate().is_err());
268 let mut b = InferenceBudget::conservative();
269 b.extract_latency_budget_ms = 0;
270 assert!(b.validate().is_err());
271 let mut b = InferenceBudget::conservative();
272 b.max_phrases_embedded = 0;
273 assert!(b.validate().is_err());
274 }
275
276 #[test]
277 fn inference_method_renders_provenance_label() {
278 assert_eq!(
279 InferenceMethod::PatternEmbedding.provenance_label(),
280 "inferred:pattern_embedding",
281 );
282 assert_eq!(
283 InferenceMethod::CooccurrencePmi.provenance_label(),
284 "inferred:cooccurrence_pmi",
285 );
286 assert_eq!(
287 InferenceMethod::Custom("my_method".into()).provenance_label(),
288 "inferred:my_method",
289 );
290 }
291
292 #[test]
293 fn typed_relation_auto_tags_provenance_label() {
294 let r = TypedRelation::new(
295 "alice",
296 "bob",
297 "knows",
298 0.9,
299 &InferenceMethod::PatternEmbedding,
300 );
301 assert_eq!(r.source_label, "inferred:pattern_embedding");
302 assert_eq!(r.source, ExtractionSource::Statistical);
303 }
304}
305
306#[cfg(test)]
307mod proptests {
308 use super::*;
309 use proptest::prelude::*;
310
311 proptest! {
312 /// Floor-c proptest for `mnem_inference_budget_effective_ms`.
313 ///
314 /// Over arbitrary `(extract_ms, hard_ms)`, the effective
315 /// budget is always `min(extract_ms, hard_ms)` and never
316 /// exceeds the hard wall. The gauge reports exactly this.
317 #[test]
318 fn budget_respected(
319 extract_ms in 1u32..10_000,
320 hard_ms in 1u32..10_000,
321 max_phrases in 1u32..100_000,
322 max_types in 1u32..64,
323 author_cap in 1u32..10_000,
324 ) {
325 let b = InferenceBudget {
326 extract_latency_budget_ms: extract_ms,
327 max_inference_ms_per_commit: hard_ms,
328 max_phrases_embedded: max_phrases,
329 max_types,
330 author_rate_limit_per_commit: author_cap,
331 };
332 prop_assert!(b.validate().is_ok());
333 let eff = b.effective_ms();
334 prop_assert!(eff <= extract_ms);
335 prop_assert!(eff <= hard_ms);
336 prop_assert!(eff == extract_ms.min(hard_ms));
337 let (_, val) = b.effective_ms_gauge();
338 prop_assert!((val - f64::from(eff)).abs() < f64::EPSILON);
339 }
340
341 /// Under the conservative default, the effective budget
342 /// equals the spec-pinned 250ms floor.
343 #[test]
344 fn conservative_default_matches_hard_wall(_n in 0u32..8) {
345 let b = InferenceBudget::conservative();
346 prop_assert_eq!(
347 b.effective_ms(),
348 InferenceBudget::MAX_INFERENCE_MS_PER_COMMIT,
349 );
350 }
351 }
352}