Skip to main content

mnem_extract/
inference.rs

1//! Optional typed-relation inference for mnem-extract.
2//!
3//! Gated behind the `typed-relations` Cargo feature. Default OFF.
4//!
5//! # What this module defines
6//!
7//! - [`InferenceBudget`] - shared primitive, wall-clock + volume caps
8//!   for the inference stage of one commit.
9//! - [`TypedRelation`] - edge payload emitted by
10//!   [`crate::traits::Extractor::infer_typed_relations`].
11//! - [`InferenceMethod`] - provenance tag attached to every inferred
12//!   edge (solution.md R3 §2).
13//!
14//! # What this module does NOT define
15//!
16//! - Clustering (Leiden). Lives in a downstream crate.
17//! - Canary suite. See `gap-catalog/shared/canary-suite.md`.
18//! - CLI wiring. That's `mnem-ingest` + `mnem-cli`.
19//!
20//! # Floor-c tunable
21//!
22//! [`InferenceBudget::MAX_INFERENCE_MS_PER_COMMIT`] = 250ms. Half of
23//! `max_cooccurrence_ms = 500` (commit-envelope reserve, gap 10).
24//! Exposed as gauge `mnem_inference_budget_effective_ms` via
25//! [`InferenceBudget::effective_ms_gauge`] and enforced by proptest
26//! [`proptests::budget_respected`].
27
28use serde::{Deserialize, Serialize};
29
30use crate::traits::ExtractionSource;
31
32/// Gauge name for the runtime-effective inference budget.
33///
34/// Emitted by [`InferenceBudget::effective_ms_gauge`]. The three-
35/// condition floor-c apparatus (named constant + gauge + proptest)
36/// lives in this module's tests.
37pub const BUDGET_GAUGE_NAME: &str = "mnem_inference_budget_effective_ms";
38
39/// Shared primitive: wall-clock and volume caps for the inference
40/// stage of a single commit.
41///
42/// All fields are floor-c tunables (solution.md R6 §Constant
43/// classification table) or corpus-derived. No magic numbers.
44#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
45pub struct InferenceBudget {
46    /// Rolling wall-clock budget for the extract-and-embed phase of
47    /// inference, derived from `3 * rolling.p50_ingest_phrase_embed_ms`
48    /// (fallback 500ms). Class `a` (rolling telemetry).
49    pub extract_latency_budget_ms: u32,
50
51    /// Hard wall-clock ceiling per commit. Class `c` (reference
52    /// standard): 250ms = 50% of `max_cooccurrence_ms = 500`.
53    pub max_inference_ms_per_commit: u32,
54
55    /// Hard ceiling on bridging-phrase embeddings per commit. Class
56    /// `a` (corpus-derived): `min(50_000, sqrt(N_phrases) * 100)`.
57    pub max_phrases_embedded: u32,
58
59    /// Max inferred relation types emitted per commit. Class `a`
60    /// (corpus-derived): `ceil(log2(corpus_size))`.
61    pub max_types: u32,
62
63    /// Per-author bridging-phrase cap per commit. Class `a`
64    /// (corpus-derived): `max(200, 0.01 * mean_phrases_per_author)`.
65    pub author_rate_limit_per_commit: u32,
66}
67
68impl InferenceBudget {
69    /// Floor-c reference standard for the per-commit hard wall.
70    ///
71    /// 250ms. See `solution.md` R6 §Floor-c apparatus.
72    pub const MAX_INFERENCE_MS_PER_COMMIT: u32 = 250;
73
74    /// Fallback for `extract_latency_budget_ms` when rolling p50
75    /// telemetry is unavailable. See
76    /// `shared/inference-budget.md` §API sketch.
77    pub const FALLBACK_EXTRACT_LATENCY_MS: u32 = 500;
78
79    /// Conservative defaults for CI / proptest / initial runs.
80    ///
81    /// Real deployments derive via a stats-aware constructor in the
82    /// ingest crate; this keeps `mnem-extract` free of clock or
83    /// telemetry dependencies.
84    #[must_use]
85    pub const fn conservative() -> Self {
86        Self {
87            extract_latency_budget_ms: Self::MAX_INFERENCE_MS_PER_COMMIT,
88            max_inference_ms_per_commit: Self::MAX_INFERENCE_MS_PER_COMMIT,
89            max_phrases_embedded: 10_000,
90            max_types: 8,
91            author_rate_limit_per_commit: 200,
92        }
93    }
94
95    /// Effective runtime budget = `min(extract_latency_budget_ms,
96    /// max_inference_ms_per_commit)`.
97    ///
98    /// The hard wall is always the ceiling, even when rolling
99    /// telemetry computes a higher extract budget.
100    #[must_use]
101    pub const fn effective_ms(&self) -> u32 {
102        let extract = self.extract_latency_budget_ms;
103        let hard = self.max_inference_ms_per_commit;
104        if extract < hard { extract } else { hard }
105    }
106
107    /// Sample for gauge `mnem_inference_budget_effective_ms`.
108    ///
109    /// Caller is responsible for emission; this module does not link
110    /// a metrics backend. Returning `(name, value)` lets the caller
111    /// use either `metrics::gauge!` or a custom registry.
112    #[must_use]
113    pub fn effective_ms_gauge(&self) -> (&'static str, f64) {
114        (BUDGET_GAUGE_NAME, f64::from(self.effective_ms()))
115    }
116
117    /// Validate that the budget is internally consistent.
118    ///
119    /// Returns `Err` with a static reason when:
120    ///
121    /// 1. `max_inference_ms_per_commit` is zero.
122    /// 2. `extract_latency_budget_ms` is zero.
123    /// 3. `max_phrases_embedded` is zero.
124    ///
125    /// Zero-valued caps are always a programming error: they would
126    /// make the entire inference pass a no-op and silently hide bugs.
127    pub fn validate(&self) -> Result<(), &'static str> {
128        if self.max_inference_ms_per_commit == 0 {
129            return Err("max_inference_ms_per_commit must be > 0");
130        }
131        if self.extract_latency_budget_ms == 0 {
132            return Err("extract_latency_budget_ms must be > 0");
133        }
134        if self.max_phrases_embedded == 0 {
135            return Err("max_phrases_embedded must be > 0");
136        }
137        Ok(())
138    }
139}
140
141impl Default for InferenceBudget {
142    fn default() -> Self {
143        Self::conservative()
144    }
145}
146
147/// Provenance tag describing *how* a typed relation was inferred.
148///
149/// Serialised into `TypedRelation::source_label` as
150/// `"inferred:<method>"` per solution.md R3: every edge carries its
151/// inference method so rollback and audit can filter by origin.
152#[non_exhaustive]
153#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
154#[serde(rename_all = "snake_case")]
155pub enum InferenceMethod {
156    /// KeyBERT-style pattern-embedding clustering (Leiden).
157    PatternEmbedding,
158    /// Co-occurrence PMI promoted to a named type via clustering.
159    CooccurrencePmi,
160    /// Caller-supplied custom method. String is forensics-tagged in
161    /// the provenance label verbatim; keep it short and snake_case.
162    Custom(String),
163}
164
165impl InferenceMethod {
166    /// Render as the `inferred:<method>` tag used in provenance
167    /// labels and the `mnem commit` audit stream.
168    #[must_use]
169    pub fn provenance_label(&self) -> String {
170        match self {
171            Self::PatternEmbedding => "inferred:pattern_embedding".to_string(),
172            Self::CooccurrencePmi => "inferred:cooccurrence_pmi".to_string(),
173            Self::Custom(s) => format!("inferred:{s}"),
174        }
175    }
176}
177
178/// An inferred typed edge between two previously-extracted entities.
179///
180/// Distinct from [`crate::traits::Relation`]:
181///
182/// - [`Relation`](crate::traits::Relation) is a raw `(src, dst,
183///   weight)` triple with no named predicate.
184/// - `TypedRelation` has a clustering-assigned predicate and a
185///   confidence in `[0.0, 1.0]` so the trust gate can filter it.
186#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
187pub struct TypedRelation {
188    /// Subject entity mention.
189    pub src: String,
190    /// Object entity mention.
191    pub dst: String,
192    /// Clustering-assigned predicate label (e.g. `"causes"`).
193    pub predicate: String,
194    /// Confidence in `[0.0, 1.0]`. Fed to
195    /// [`crate::trust::TrustBoundary::admit`] downstream.
196    pub confidence: f32,
197    /// Provenance in the full taxonomy ([`ExtractionSource`]).
198    pub source: ExtractionSource,
199    /// Short human-readable provenance label, always of the shape
200    /// `"inferred:<method>"`. Derived from [`InferenceMethod`] at
201    /// emission time to keep the struct `serde`-flat.
202    pub source_label: String,
203}
204
205impl TypedRelation {
206    /// Build a new typed relation with the provenance label
207    /// auto-derived from `method`.
208    #[must_use]
209    pub fn new(
210        src: impl Into<String>,
211        dst: impl Into<String>,
212        predicate: impl Into<String>,
213        confidence: f32,
214        method: &InferenceMethod,
215    ) -> Self {
216        Self {
217            src: src.into(),
218            dst: dst.into(),
219            predicate: predicate.into(),
220            confidence,
221            source: ExtractionSource::Statistical,
222            source_label: method.provenance_label(),
223        }
224    }
225}
226
227#[cfg(test)]
228mod tests {
229    use super::*;
230
231    #[test]
232    fn conservative_budget_passes_validation() {
233        let b = InferenceBudget::conservative();
234        assert!(b.validate().is_ok());
235    }
236
237    #[test]
238    fn effective_ms_is_minimum_of_extract_and_hard_wall() {
239        let mut b = InferenceBudget::conservative();
240        b.extract_latency_budget_ms = 100;
241        assert_eq!(b.effective_ms(), 100);
242        b.extract_latency_budget_ms = 1_000;
243        assert_eq!(
244            b.effective_ms(),
245            InferenceBudget::MAX_INFERENCE_MS_PER_COMMIT
246        );
247    }
248
249    #[test]
250    fn hard_wall_matches_spec_pinned_value() {
251        // solution.md R6 §Floor-c apparatus: 250ms.
252        assert_eq!(InferenceBudget::MAX_INFERENCE_MS_PER_COMMIT, 250);
253    }
254
255    #[test]
256    fn gauge_emits_stable_name_and_effective_value() {
257        let b = InferenceBudget::conservative();
258        let (name, val) = b.effective_ms_gauge();
259        assert_eq!(name, "mnem_inference_budget_effective_ms");
260        assert!((val - f64::from(b.effective_ms())).abs() < f64::EPSILON);
261    }
262
263    #[test]
264    fn validate_rejects_zero_caps() {
265        let mut b = InferenceBudget::conservative();
266        b.max_inference_ms_per_commit = 0;
267        assert!(b.validate().is_err());
268        let mut b = InferenceBudget::conservative();
269        b.extract_latency_budget_ms = 0;
270        assert!(b.validate().is_err());
271        let mut b = InferenceBudget::conservative();
272        b.max_phrases_embedded = 0;
273        assert!(b.validate().is_err());
274    }
275
276    #[test]
277    fn inference_method_renders_provenance_label() {
278        assert_eq!(
279            InferenceMethod::PatternEmbedding.provenance_label(),
280            "inferred:pattern_embedding",
281        );
282        assert_eq!(
283            InferenceMethod::CooccurrencePmi.provenance_label(),
284            "inferred:cooccurrence_pmi",
285        );
286        assert_eq!(
287            InferenceMethod::Custom("my_method".into()).provenance_label(),
288            "inferred:my_method",
289        );
290    }
291
292    #[test]
293    fn typed_relation_auto_tags_provenance_label() {
294        let r = TypedRelation::new(
295            "alice",
296            "bob",
297            "knows",
298            0.9,
299            &InferenceMethod::PatternEmbedding,
300        );
301        assert_eq!(r.source_label, "inferred:pattern_embedding");
302        assert_eq!(r.source, ExtractionSource::Statistical);
303    }
304}
305
306#[cfg(test)]
307mod proptests {
308    use super::*;
309    use proptest::prelude::*;
310
311    proptest! {
312        /// Floor-c proptest for `mnem_inference_budget_effective_ms`.
313        ///
314        /// Over arbitrary `(extract_ms, hard_ms)`, the effective
315        /// budget is always `min(extract_ms, hard_ms)` and never
316        /// exceeds the hard wall. The gauge reports exactly this.
317        #[test]
318        fn budget_respected(
319            extract_ms in 1u32..10_000,
320            hard_ms in 1u32..10_000,
321            max_phrases in 1u32..100_000,
322            max_types in 1u32..64,
323            author_cap in 1u32..10_000,
324        ) {
325            let b = InferenceBudget {
326                extract_latency_budget_ms: extract_ms,
327                max_inference_ms_per_commit: hard_ms,
328                max_phrases_embedded: max_phrases,
329                max_types,
330                author_rate_limit_per_commit: author_cap,
331            };
332            prop_assert!(b.validate().is_ok());
333            let eff = b.effective_ms();
334            prop_assert!(eff <= extract_ms);
335            prop_assert!(eff <= hard_ms);
336            prop_assert!(eff == extract_ms.min(hard_ms));
337            let (_, val) = b.effective_ms_gauge();
338            prop_assert!((val - f64::from(eff)).abs() < f64::EPSILON);
339        }
340
341        /// Under the conservative default, the effective budget
342        /// equals the spec-pinned 250ms floor.
343        #[test]
344        fn conservative_default_matches_hard_wall(_n in 0u32..8) {
345            let b = InferenceBudget::conservative();
346            prop_assert_eq!(
347                b.effective_ms(),
348                InferenceBudget::MAX_INFERENCE_MS_PER_COMMIT,
349            );
350        }
351    }
352}