Skip to main content

mnem_core/resolve/
mod.rs

1//! Entity canonicalization - `resolve_or_create` (gap-catalog gap 04).
2//!
3//! This module implements the collapse-or-create decision for a string
4//! `query` against an HNSW-indexed population of already-known nodes.
5//! The design follows `research/gap-catalog/04-entity-canonicalization/`
6//! R1-R6, in particular:
7//!
8//! - **Distribution-derived collapse threshold** `tau_n` computed from
9//!   a k=2 Gaussian Mixture over the HNSW-local cosine sample. No global
10//!   magic cosine constant; the threshold tracks the corpus geometry.
11//!   `tau_n = max(mu_same - 2*sigma_same, mu_diff + sigma_diff)`.
12//! - **Two-of-three consensus collapse gate**: at least two of
13//!   (cosine, normalized_levenshtein, namespace/trust) must agree for
14//!   two nodes to be merged. Single-signal collapses are refused.
15//! - **Commit-id-derived HNSW seed**: the HNSW walk seed is
16//!   `BLAKE3(commit_cid || domain_sep)[..8]` - two runs against the
17//!   same commit get the same seed; different commits get independent
18//!   seeds. Bootstrap fallback `0xCANO_N_0001_u64` when the commit CID
19//!   is the zero CID.
20//! - **`CommitBudgetGuard` wiring**: caller passes
21//!   `latency_budget_ms: Option<u32>` and the module opens a guard at
22//!   [`RESOLVE_OR_CREATE_P99_MS`] hard wall; exhaustion returns
23//!   [`ResolveResult::BudgetExhausted`] carrying the best-effort
24//!   candidate.
25//!
26//! # p99 floor-c apparatus (R6)
27//!
28//! [`RESOLVE_OR_CREATE_P99_MS`] is a tunable floor-c constant:
29//!
30//! - Reference standard: `p95_hnsw_walk_ms + consensus_overhead_ms
31//!   + p99_headroom = 50` on the reference repo (`|V|=1M`,
32//!   `avg_degree=12`).
33//! - Gauge: `mnem_resolve_or_create_p99_breach_total`.
34//! - Proptest: [`tests::resolve_or_create_hits_50ms_hard_wall`].
35//! - Unit test:
36//!   [`tests::resolve_creates_below_threshold`],
37//!   [`tests::resolve_merges_above_threshold`],
38//!   [`tests::threshold_derived_from_local_samples`],
39//!   [`tests::commit_budget_guard_cuts_off`].
40//!
41//! # Rollback template (see `scripts/rollback-gap-04.sql`)
42//!
43//! Rolling canonicalization back uses the following idempotent SQL
44//! template, kept here as a comment so readers don't have to chase the
45//! script file:
46//!
47//! ```sql
48//! -- scripts/rollback-gap-04.sql
49//! -- Rollback entity canonicalization emitted after <ROLLBACK_CID>.
50//! -- Invocation: mnem admin rollback --feature=canonicalization --after=<CID>
51//! -- Idempotent: re-running is safe (second run is a no-op).
52//!
53//! BEGIN TRANSACTION;
54//!
55//! -- 1. Drop canonical_cid props from nodes committed after the point.
56//! UPDATE nodes
57//!    SET props = json_remove(props, '$.canonical_cid')
58//!  WHERE commit_cid > :ROLLBACK_CID
59//!    AND json_extract(props, '$.canonical_cid') IS NOT NULL;
60//!
61//! -- 2. Drop the canonical cluster manifest rows.
62//! DROP TABLE IF EXISTS canonical_manifest_staging;
63//! DELETE FROM canonical_manifest
64//!  WHERE commit_cid > :ROLLBACK_CID;
65//!
66//! -- 3. Cache-flush NOTIFY handled post-SQL by mnem admin rollback:
67//! --    posts INTERNAL ResetCanonicalCache event to runtime, which
68//! --    drains AppState::canonical_cache + rebuilds lazily.
69//! NOTIFY canonical_cache_flush, :ROLLBACK_CID;
70//!
71//! -- 4. Reset rolling-telemetry derived counters so SLO alerting
72//! --    does not attribute post-rollback baselines to rolled commits.
73//! UPDATE rolling_stats
74//!    SET p50_canonicalize_ms = NULL,
75//!        p99_canonicalize_ms = NULL
76//!  WHERE last_updated_commit_cid > :ROLLBACK_CID;
77//!
78//! COMMIT;
79//! ```
80
81use crate::guard::CommitBudgetGuard;
82use crate::id::{Cid, NodeId};
83
84/// R5 numeric p99 SLO for `mnem_resolve_or_create`.
85///
86/// Derivation (floor-c, R6): `p95_hnsw_walk_ms (~35ms) +
87/// consensus_overhead_ms (~5ms) + p99_headroom (~10ms) = 50`.
88/// Labelled tunable. Exposed via `mnem_resolve_or_create_p99_breach_total`.
89#[doc = "#[tunable]"]
90pub const RESOLVE_OR_CREATE_P99_MS: u32 = 50;
91
92/// R4 pinned ef_search for canonicalization HNSW handle. Separate
93/// from retrieve ef_search to avoid cross-path drift. Reference
94/// standard: Malkov-Yashunin 2016 ยง4 recall-vs-latency envelope
95/// (ef=128 yields recall >= 0.95 at p95 latency < 20ms for 768-dim).
96pub const EF_SEARCH_CANONICAL: u32 = 128;
97
98/// R5 bootstrap-only HNSW seed fallback for when `commit_cid` is the
99/// zero CID (e.g. the first commit in an empty repo).
100pub const HNSW_SEED_FALLBACK: u64 = 0xCA_00_00_00_01_00_00_00_u64;
101
102/// R3 same-class sigma multiplier for collapse threshold. Derivation:
103/// DBSCAN-/HDBSCAN-style inlier boundary `mean - 2*sigma`. Clamped to
104/// `[1.5, 3.0]` at manifest-load time.
105pub const SIGMA_MULTIPLIER_FOR_COLLAPSE: f32 = 2.0;
106
107/// R3 same-class edit-distance tau (embedder-calibrated).
108/// Max 25% normalized Levenshtein distance qualifies as an edit-dist
109/// collapse signal.
110pub const EDIT_DISTANCE_TAU: f32 = 0.25;
111
112/// R4 minimum HNSW neighbourhood size below which threshold
113/// derivation refuses to emit `canonical_cid`.
114pub const MIN_SAMPLE_SIZE: usize = 128;
115
116/// Origin of the HNSW build seed used for this run.
117#[derive(Debug, Clone, Copy, PartialEq, Eq)]
118pub enum HnswSeedSource {
119    /// Seed was derived via BLAKE3(commit_cid || domain_sep).
120    CommitDerived,
121    /// Seed came from `MNEM_CANONICAL_HNSW_SEED` env var.
122    EnvOverride,
123    /// `commit_cid.is_zero()` path: bootstrap constant.
124    Fallback,
125}
126
127/// Reasons a resolve call was refused (not merged, not created).
128#[derive(Debug, Clone, Copy, PartialEq, Eq)]
129pub enum RefusalReason {
130    /// HNSW-local sample below [`MIN_SAMPLE_SIZE`]; threshold cannot
131    /// be derived with statistical significance.
132    SampleTooSmall,
133    /// Best candidate passed only one of the three consensus signals.
134    SingleSignalOnly,
135}
136
137/// Outcome of [`resolve_or_create`].
138#[derive(Debug, Clone, PartialEq)]
139pub enum ResolveResult {
140    /// Query collapsed onto an existing node.
141    Resolved {
142        /// The existing canonical node.
143        node_id: NodeId,
144        /// Number of consensus signals that agreed (2 or 3).
145        signals_passed: u8,
146    },
147    /// Query did not match any existing node; caller should create.
148    Created {
149        /// Threshold used to decide the above-threshold mass was empty.
150        tau_n: f32,
151    },
152    /// Guard ran the wall-clock budget out. `best_effort` is the
153    /// top HNSW candidate if any; caller may retry with a larger
154    /// budget.
155    BudgetExhausted {
156        /// Best candidate observed before the budget ran out.
157        best_effort: Option<NodeId>,
158    },
159    /// Refused to emit a decision (see [`RefusalReason`]).
160    Refused(RefusalReason),
161}
162
163/// A sampled (candidate_id, cosine_to_query, name_for_edit_dist,
164/// namespace, trust) tuple. Lifetime-free for testability: a real
165/// caller pulls these from the HNSW walk.
166#[derive(Debug, Clone)]
167pub struct Candidate {
168    /// Stable id of the candidate node.
169    pub node_id: NodeId,
170    /// Cosine similarity of candidate's embedding to the query.
171    pub cosine: f32,
172    /// Surface-form name of the candidate (for edit-distance signal).
173    pub name: String,
174    /// Candidate's namespace (e.g. "person", "company").
175    pub namespace: String,
176    /// Candidate's trust label.
177    pub trust: String,
178}
179
180/// Per-node distribution-derived threshold and its component stats.
181#[derive(Debug, Clone, Copy, PartialEq)]
182pub struct LocalThreshold {
183    /// The derived `tau_n = max(mu_same - k*sigma_same, mu_diff + sigma_diff)`.
184    pub tau_n: f32,
185    /// Mean of the same-class (higher-mean) GMM component.
186    pub mu_same: f32,
187    /// Std-dev of the same-class GMM component.
188    pub sigma_same: f32,
189    /// Mean of the different-class (lower-mean) GMM component.
190    pub mu_diff: f32,
191    /// Std-dev of the different-class GMM component.
192    pub sigma_diff: f32,
193    /// Size of the HNSW-local sample used.
194    pub sample_size: usize,
195}
196
197/// Run k=2 Gaussian Mixture on a pre-computed HNSW-local cosine
198/// sample and return the distribution-derived collapse threshold.
199///
200/// The sample must contain at least [`MIN_SAMPLE_SIZE`] observations;
201/// smaller samples return `None` (caller must then refuse to emit a
202/// `canonical_cid` and log
203/// `mnem_canonical_threshold_sample_insufficient_total`).
204///
205/// The implementation is a 1-D two-component GMM via a compact EM
206/// loop - no external crate. The loop is bounded at 32 iterations
207/// with a tolerance of `1e-4` on component-mean movement, both
208/// sufficient for 1-D bi-modal separation of embedding cosines.
209#[must_use]
210pub fn derive_local_threshold(cosines: &[f32], sigma_multiplier: f32) -> Option<LocalThreshold> {
211    if cosines.len() < MIN_SAMPLE_SIZE {
212        return None;
213    }
214    // k=2 EM with deterministic init from sample min/max.
215    let mut lo = f32::INFINITY;
216    let mut hi = f32::NEG_INFINITY;
217    for &c in cosines {
218        if c < lo {
219            lo = c;
220        }
221        if c > hi {
222            hi = c;
223        }
224    }
225    if hi <= lo {
226        // degenerate constant sample - no bimodality; return a
227        // single-gaussian surrogate so the caller still gets a tau.
228        let mu = f32::midpoint(hi, lo);
229        return Some(LocalThreshold {
230            tau_n: mu,
231            mu_same: mu,
232            sigma_same: 0.0,
233            mu_diff: mu,
234            sigma_diff: 0.0,
235            sample_size: cosines.len(),
236        });
237    }
238    // Deterministic init: low-quartile mean vs high-quartile mean.
239    let mut mu0 = lo + (hi - lo) * 0.25;
240    let mut mu1 = lo + (hi - lo) * 0.75;
241    let mut s0 = (hi - lo) / 4.0;
242    let mut s1 = (hi - lo) / 4.0;
243    let mut w0 = 0.5_f32;
244    let mut w1 = 0.5_f32;
245    for _ in 0..32 {
246        // E-step: soft responsibilities.
247        let mut n0 = 0.0f32;
248        let mut n1 = 0.0f32;
249        let mut sum0 = 0.0f32;
250        let mut sum1 = 0.0f32;
251        let mut sq0 = 0.0f32;
252        let mut sq1 = 0.0f32;
253        for &x in cosines {
254            let p0 = w0 * gaussian_pdf(x, mu0, s0.max(1e-6));
255            let p1 = w1 * gaussian_pdf(x, mu1, s1.max(1e-6));
256            let z = p0 + p1;
257            let (r0, r1) = if z > 0.0 {
258                (p0 / z, p1 / z)
259            } else {
260                (0.5, 0.5)
261            };
262            n0 += r0;
263            n1 += r1;
264            sum0 += r0 * x;
265            sum1 += r1 * x;
266            sq0 += r0 * x * x;
267            sq1 += r1 * x * x;
268        }
269        // M-step.
270        let new_mu0 = if n0 > 0.0 { sum0 / n0 } else { mu0 };
271        let new_mu1 = if n1 > 0.0 { sum1 / n1 } else { mu1 };
272        // clippy::suspicious_operation_groupings flags `a*a` adjacent to
273        // `b/c - d*d`; the expression is the standard
274        // variance = E[X^2] - (E[X])^2 form so we silence the lint.
275        #[allow(clippy::suspicious_operation_groupings)]
276        let new_s0 = if n0 > 0.0 {
277            ((sq0 / n0) - new_mu0 * new_mu0).max(1e-8).sqrt()
278        } else {
279            s0
280        };
281        #[allow(clippy::suspicious_operation_groupings)]
282        let new_s1 = if n1 > 0.0 {
283            ((sq1 / n1) - new_mu1 * new_mu1).max(1e-8).sqrt()
284        } else {
285            s1
286        };
287        let n_total = n0 + n1;
288        let new_w0 = if n_total > 0.0 { n0 / n_total } else { 0.5 };
289        let new_w1 = 1.0 - new_w0;
290        let moved = (new_mu0 - mu0).abs() + (new_mu1 - mu1).abs();
291        mu0 = new_mu0;
292        mu1 = new_mu1;
293        s0 = new_s0;
294        s1 = new_s1;
295        w0 = new_w0;
296        w1 = new_w1;
297        if moved < 1e-4 {
298            break;
299        }
300    }
301    // Same-class = higher-mean component.
302    let (mu_same, sigma_same, mu_diff, sigma_diff) = if mu1 >= mu0 {
303        (mu1, s1, mu0, s0)
304    } else {
305        (mu0, s0, mu1, s1)
306    };
307    let low = mu_same - sigma_multiplier * sigma_same;
308    let high = mu_diff + sigma_diff;
309    let tau_n = if low >= high { low } else { high };
310    Some(LocalThreshold {
311        tau_n,
312        mu_same,
313        sigma_same,
314        mu_diff,
315        sigma_diff,
316        sample_size: cosines.len(),
317    })
318}
319
320/// 1-D gaussian PDF. Pulled inline - no external `statrs` dep.
321#[inline]
322fn gaussian_pdf(x: f32, mu: f32, sigma: f32) -> f32 {
323    let inv = 1.0 / (sigma * (2.0 * core::f32::consts::PI).sqrt());
324    let z = (x - mu) / sigma;
325    inv * (-0.5 * z * z).exp()
326}
327
328/// Normalized Levenshtein distance in `[0, 1]`. `0` = identical,
329/// `1` = maximally different. Used by the edit-distance consensus
330/// signal. Implementation is the classic O(m*n) DP matrix, pure-Rust,
331/// no extra crate. Short names dominate here so memory is a non-issue.
332#[must_use]
333#[allow(clippy::many_single_char_names)]
334pub fn normalized_levenshtein(a: &str, b: &str) -> f32 {
335    let av: Vec<char> = a.chars().collect();
336    let bv: Vec<char> = b.chars().collect();
337    if av.is_empty() && bv.is_empty() {
338        return 0.0;
339    }
340    let m = av.len();
341    let n = bv.len();
342    let mut prev: Vec<usize> = (0..=n).collect();
343    let mut cur: Vec<usize> = vec![0; n + 1];
344    for i in 1..=m {
345        cur[0] = i;
346        for j in 1..=n {
347            let cost = usize::from(av[i - 1] != bv[j - 1]);
348            cur[j] = (prev[j] + 1).min(cur[j - 1] + 1).min(prev[j - 1] + cost);
349        }
350        std::mem::swap(&mut prev, &mut cur);
351    }
352    let max_len = m.max(n);
353    #[allow(clippy::cast_precision_loss)]
354    let d = prev[n] as f32 / max_len as f32;
355    d.clamp(0.0, 1.0)
356}
357
358/// Two-of-three consensus: returns `(signals_passed, per_signal)` where
359/// `per_signal = [cosine_ok, edit_ok, namespace_ok]`.
360///
361/// - `cosine_ok`: `cand.cosine >= tau_n` AND `cand.cosine >= tau_query`
362///   (symmetric collapse). We pass the same `tau_n` twice for the
363///   standalone resolve path (query has no pre-existing neighbourhood).
364/// - `edit_ok`: `normalized_levenshtein(query, cand.name) <= EDIT_DISTANCE_TAU`.
365/// - `namespace_ok`: `cand.namespace == query_namespace
366///   AND cand.trust == query_trust`.
367#[must_use]
368pub fn two_of_three_consensus(
369    query: &str,
370    query_namespace: &str,
371    query_trust: &str,
372    cand: &Candidate,
373    tau_n: f32,
374) -> (u8, [bool; 3]) {
375    let cosine_ok = cand.cosine >= tau_n;
376    let edit_ok = normalized_levenshtein(query, &cand.name) <= EDIT_DISTANCE_TAU;
377    let namespace_ok = cand.namespace == query_namespace && cand.trust == query_trust;
378    let passed = u8::from(cosine_ok) + u8::from(edit_ok) + u8::from(namespace_ok);
379    (passed, [cosine_ok, edit_ok, namespace_ok])
380}
381
382/// Resolve the commit-derived HNSW build seed.
383///
384/// - `MNEM_CANONICAL_HNSW_SEED` env var wins (decimal or `0x...` hex).
385/// - Else `BLAKE3(commit_cid.to_bytes() || domain_sep)[..8]` little-endian.
386/// - Else (commit_cid is zero): [`HNSW_SEED_FALLBACK`].
387///
388/// Note: `commit_cid.is_zero()` is approximated by comparing the CID's
389/// binary form to the zero-digest for the configured codec / hash.
390/// For testing we check whether every byte of the multihash digest is
391/// zero.
392#[must_use]
393pub fn resolve_hnsw_seed(commit_cid: &Cid) -> (u64, HnswSeedSource) {
394    if let Ok(s) = std::env::var("MNEM_CANONICAL_HNSW_SEED") {
395        if let Some(val) = parse_u64_dec_or_hex(&s) {
396            return (val, HnswSeedSource::EnvOverride);
397        }
398    }
399    if cid_has_zero_digest(commit_cid) {
400        return (HNSW_SEED_FALLBACK, HnswSeedSource::Fallback);
401    }
402    let mut h = blake3::Hasher::new();
403    let bytes = commit_cid.to_bytes();
404    h.update(&bytes);
405    h.update(b"mnem-gap-04-canonical-hnsw-v1");
406    let digest = h.finalize();
407    let d = digest.as_bytes();
408    let seed = u64::from_le_bytes([d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7]]);
409    (seed, HnswSeedSource::CommitDerived)
410}
411
412fn parse_u64_dec_or_hex(s: &str) -> Option<u64> {
413    let t = s.trim();
414    if let Some(rest) = t.strip_prefix("0x").or_else(|| t.strip_prefix("0X")) {
415        u64::from_str_radix(rest, 16).ok()
416    } else {
417        t.parse::<u64>().ok()
418    }
419}
420
421fn cid_has_zero_digest(cid: &Cid) -> bool {
422    let bytes = cid.to_bytes();
423    // A CID in wire form is (version || codec || multihash). The
424    // multihash is (hash_code || len || digest). The trailing
425    // `len` bytes of `bytes` are the digest. We approximate
426    // "zero CID" as "all digest bytes are zero" which matches the
427    // `zero_cid()` helper used in tests.
428    bytes.iter().rev().take(32).all(|&b| b == 0)
429}
430
431/// Request payload for [`resolve_or_create`].
432#[derive(Debug, Clone)]
433pub struct ResolveRequest {
434    /// The surface-form string to resolve.
435    pub query: String,
436    /// Namespace (e.g. "person", "company").
437    pub namespace: String,
438    /// Trust label (e.g. "verified").
439    pub trust: String,
440    /// Candidates pre-sampled from the HNSW walk. In a real pipeline
441    /// this is populated by the caller from the canonicalization
442    /// HNSW handle keyed by the commit-derived seed.
443    pub candidates: Vec<Candidate>,
444    /// HNSW-local cosine sample for threshold derivation. Must be at
445    /// least [`MIN_SAMPLE_SIZE`] long; shorter samples return
446    /// [`ResolveResult::Refused`].
447    pub local_sample: Vec<f32>,
448    /// Caller-supplied latency budget override. `None` means use
449    /// [`RESOLVE_OR_CREATE_P99_MS`].
450    pub latency_budget_ms: Option<u32>,
451    /// The commit the resolve is happening under (for HNSW seed +
452    /// guard envelope).
453    pub commit_cid: Cid,
454}
455
456/// Full outcome of a resolve call, including the guard's report for
457/// embedding in the commit envelope and the (seed, source) pair used
458/// for the HNSW walk.
459#[derive(Debug)]
460pub struct ResolveOutcome {
461    /// Primary resolution decision.
462    pub result: ResolveResult,
463    /// Budget-guard report - host embeds in the commit envelope and
464    /// feeds to the metric sink.
465    pub report: crate::guard::CommitBudgetReport,
466    /// Seed used for the HNSW walk (for audit / replay determinism).
467    pub seed: u64,
468    /// Source of the HNSW seed for this run.
469    pub seed_source: HnswSeedSource,
470    /// The per-node distribution-derived threshold. `None` when the
471    /// local sample was too small.
472    pub threshold: Option<LocalThreshold>,
473}
474
475/// Resolve a query string onto an existing canonical node, or
476/// decide that a new node should be created.
477///
478/// The function is pure except for (a) wall-clock reads via the
479/// `CommitBudgetGuard` and (b) the optional
480/// `MNEM_CANONICAL_HNSW_SEED` env-var. Both are documented.
481///
482/// For a concrete single-shot API `(query, threshold) -> ResolveResult`
483/// see [`resolve_or_create_simple`]; this function is the full
484/// production shape carrying candidates + local sample.
485///
486/// # Panics
487///
488/// Does not panic. All fallible paths map to [`ResolveResult`] variants.
489pub fn resolve_or_create(req: &ResolveRequest) -> ResolveOutcome {
490    let budget_ms = req.latency_budget_ms.unwrap_or(RESOLVE_OR_CREATE_P99_MS);
491    let mut guard = CommitBudgetGuard::start(
492        "gap-04-resolve-or-create",
493        budget_ms,
494        RESOLVE_OR_CREATE_P99_MS,
495        req.commit_cid.clone(),
496    );
497    let (seed, seed_source) = resolve_hnsw_seed(&req.commit_cid);
498
499    // Stage 1: derive distribution threshold.
500    let threshold = derive_local_threshold(&req.local_sample, SIGMA_MULTIPLIER_FOR_COLLAPSE);
501    let charge1 = guard.charge("derive_threshold");
502    if charge1.is_err() {
503        let report = guard.into_report();
504        return ResolveOutcome {
505            result: ResolveResult::BudgetExhausted { best_effort: None },
506            report,
507            seed,
508            seed_source,
509            threshold,
510        };
511    }
512    let Some(thr) = threshold else {
513        let report = guard.into_report();
514        return ResolveOutcome {
515            result: ResolveResult::Refused(RefusalReason::SampleTooSmall),
516            report,
517            seed,
518            seed_source,
519            threshold,
520        };
521    };
522
523    // Stage 2: pick best candidate by cosine and run two-of-three.
524    let mut best: Option<(&Candidate, u8)> = None;
525    for cand in &req.candidates {
526        let (passed, _) =
527            two_of_three_consensus(&req.query, &req.namespace, &req.trust, cand, thr.tau_n);
528        match best {
529            Some((_, p)) if p >= passed => {}
530            _ => best = Some((cand, passed)),
531        }
532    }
533    let charge2 = guard.charge("consensus");
534    if let Err(_e) = charge2 {
535        let best_effort = best.map(|(c, _)| c.node_id);
536        let report = guard.into_report();
537        return ResolveOutcome {
538            result: ResolveResult::BudgetExhausted { best_effort },
539            report,
540            seed,
541            seed_source,
542            threshold,
543        };
544    }
545    // Per R3: single-signal collapses are refused, meaning the node
546    // stays un-collapsed; i.e. the caller gets a Created decision. We
547    // preserve the explicit `Refused(SingleSignalOnly)` variant for
548    // callers (and gauges) that need to observe the distinction, but
549    // only when the caller explicitly opts in via
550    // [`resolve_or_create_simple`]. For the production path,
551    // un-collapsed == Created so the host creates the node.
552    let result = match best {
553        Some((cand, signals)) if signals >= 2 => ResolveResult::Resolved {
554            node_id: cand.node_id,
555            signals_passed: signals,
556        },
557        _ => ResolveResult::Created { tau_n: thr.tau_n },
558    };
559    let report = guard.into_report();
560    ResolveOutcome {
561        result,
562        report,
563        seed,
564        seed_source,
565        threshold,
566    }
567}
568
569/// Tight `(query, threshold) -> ResolveResult` shape from the gap brief.
570///
571/// This is a thin convenience wrapper for a caller that has already
572/// done its own HNSW walk and just wants the decision: passing a
573/// `threshold` here bypasses the GMM derivation and checks the best
574/// candidate against the given fixed threshold using the same
575/// two-of-three consensus gate. Present primarily to keep the public
576/// API surface matching the gap brief; production callers should use
577/// [`resolve_or_create`] with a `local_sample`.
578#[must_use]
579pub fn resolve_or_create_simple(
580    query: &str,
581    threshold: f32,
582    candidates: &[Candidate],
583    namespace: &str,
584    trust: &str,
585) -> ResolveResult {
586    let mut best: Option<(&Candidate, u8)> = None;
587    for cand in candidates {
588        let (passed, _) = two_of_three_consensus(query, namespace, trust, cand, threshold);
589        match best {
590            Some((_, p)) if p >= passed => {}
591            _ => best = Some((cand, passed)),
592        }
593    }
594    match best {
595        Some((cand, signals)) if signals >= 2 => ResolveResult::Resolved {
596            node_id: cand.node_id,
597            signals_passed: signals,
598        },
599        Some((_, 1)) => ResolveResult::Refused(RefusalReason::SingleSignalOnly),
600        _ => ResolveResult::Created { tau_n: threshold },
601    }
602}
603
604#[cfg(test)]
605mod tests;