mnem_core/resolve/mod.rs
1//! Entity canonicalization - `resolve_or_create` (gap-catalog gap 04).
2//!
3//! This module implements the collapse-or-create decision for a string
4//! `query` against an HNSW-indexed population of already-known nodes.
5//! The design follows `research/gap-catalog/04-entity-canonicalization/`
6//! R1-R6, in particular:
7//!
8//! - **Distribution-derived collapse threshold** `tau_n` computed from
9//! a k=2 Gaussian Mixture over the HNSW-local cosine sample. No global
10//! magic cosine constant; the threshold tracks the corpus geometry.
11//! `tau_n = max(mu_same - 2*sigma_same, mu_diff + sigma_diff)`.
12//! - **Two-of-three consensus collapse gate**: at least two of
13//! (cosine, normalized_levenshtein, namespace/trust) must agree for
14//! two nodes to be merged. Single-signal collapses are refused.
15//! - **Commit-id-derived HNSW seed**: the HNSW walk seed is
16//! `BLAKE3(commit_cid || domain_sep)[..8]` - two runs against the
17//! same commit get the same seed; different commits get independent
18//! seeds. Bootstrap fallback `0xCANO_N_0001_u64` when the commit CID
19//! is the zero CID.
20//! - **`CommitBudgetGuard` wiring**: caller passes
21//! `latency_budget_ms: Option<u32>` and the module opens a guard at
22//! [`RESOLVE_OR_CREATE_P99_MS`] hard wall; exhaustion returns
23//! [`ResolveResult::BudgetExhausted`] carrying the best-effort
24//! candidate.
25//!
26//! # p99 floor-c apparatus (R6)
27//!
28//! [`RESOLVE_OR_CREATE_P99_MS`] is a tunable floor-c constant:
29//!
30//! - Reference standard: `p95_hnsw_walk_ms + consensus_overhead_ms
31//! + p99_headroom = 50` on the reference repo (`|V|=1M`,
32//! `avg_degree=12`).
33//! - Gauge: `mnem_resolve_or_create_p99_breach_total`.
34//! - Proptest: [`tests::resolve_or_create_hits_50ms_hard_wall`].
35//! - Unit test:
36//! [`tests::resolve_creates_below_threshold`],
37//! [`tests::resolve_merges_above_threshold`],
38//! [`tests::threshold_derived_from_local_samples`],
39//! [`tests::commit_budget_guard_cuts_off`].
40//!
41//! # Rollback template (see `scripts/rollback-gap-04.sql`)
42//!
43//! Rolling canonicalization back uses the following idempotent SQL
44//! template, kept here as a comment so readers don't have to chase the
45//! script file:
46//!
47//! ```sql
48//! -- scripts/rollback-gap-04.sql
49//! -- Rollback entity canonicalization emitted after <ROLLBACK_CID>.
50//! -- Invocation: mnem admin rollback --feature=canonicalization --after=<CID>
51//! -- Idempotent: re-running is safe (second run is a no-op).
52//!
53//! BEGIN TRANSACTION;
54//!
55//! -- 1. Drop canonical_cid props from nodes committed after the point.
56//! UPDATE nodes
57//! SET props = json_remove(props, '$.canonical_cid')
58//! WHERE commit_cid > :ROLLBACK_CID
59//! AND json_extract(props, '$.canonical_cid') IS NOT NULL;
60//!
61//! -- 2. Drop the canonical cluster manifest rows.
62//! DROP TABLE IF EXISTS canonical_manifest_staging;
63//! DELETE FROM canonical_manifest
64//! WHERE commit_cid > :ROLLBACK_CID;
65//!
66//! -- 3. Cache-flush NOTIFY handled post-SQL by mnem admin rollback:
67//! -- posts INTERNAL ResetCanonicalCache event to runtime, which
68//! -- drains AppState::canonical_cache + rebuilds lazily.
69//! NOTIFY canonical_cache_flush, :ROLLBACK_CID;
70//!
71//! -- 4. Reset rolling-telemetry derived counters so SLO alerting
72//! -- does not attribute post-rollback baselines to rolled commits.
73//! UPDATE rolling_stats
74//! SET p50_canonicalize_ms = NULL,
75//! p99_canonicalize_ms = NULL
76//! WHERE last_updated_commit_cid > :ROLLBACK_CID;
77//!
78//! COMMIT;
79//! ```
80
81use crate::guard::CommitBudgetGuard;
82use crate::id::{Cid, NodeId};
83
84/// R5 numeric p99 SLO for `mnem_resolve_or_create`.
85///
86/// Derivation (floor-c, R6): `p95_hnsw_walk_ms (~35ms) +
87/// consensus_overhead_ms (~5ms) + p99_headroom (~10ms) = 50`.
88/// Labelled tunable. Exposed via `mnem_resolve_or_create_p99_breach_total`.
89#[doc = "#[tunable]"]
90pub const RESOLVE_OR_CREATE_P99_MS: u32 = 50;
91
92/// R4 pinned ef_search for canonicalization HNSW handle. Separate
93/// from retrieve ef_search to avoid cross-path drift. Reference
94/// standard: Malkov-Yashunin 2016 ยง4 recall-vs-latency envelope
95/// (ef=128 yields recall >= 0.95 at p95 latency < 20ms for 768-dim).
96pub const EF_SEARCH_CANONICAL: u32 = 128;
97
98/// R5 bootstrap-only HNSW seed fallback for when `commit_cid` is the
99/// zero CID (e.g. the first commit in an empty repo).
100pub const HNSW_SEED_FALLBACK: u64 = 0xCA_00_00_00_01_00_00_00_u64;
101
102/// R3 same-class sigma multiplier for collapse threshold. Derivation:
103/// DBSCAN-/HDBSCAN-style inlier boundary `mean - 2*sigma`. Clamped to
104/// `[1.5, 3.0]` at manifest-load time.
105pub const SIGMA_MULTIPLIER_FOR_COLLAPSE: f32 = 2.0;
106
107/// R3 same-class edit-distance tau (embedder-calibrated).
108/// Max 25% normalized Levenshtein distance qualifies as an edit-dist
109/// collapse signal.
110pub const EDIT_DISTANCE_TAU: f32 = 0.25;
111
112/// R4 minimum HNSW neighbourhood size below which threshold
113/// derivation refuses to emit `canonical_cid`.
114pub const MIN_SAMPLE_SIZE: usize = 128;
115
116/// Origin of the HNSW build seed used for this run.
117#[derive(Debug, Clone, Copy, PartialEq, Eq)]
118pub enum HnswSeedSource {
119 /// Seed was derived via BLAKE3(commit_cid || domain_sep).
120 CommitDerived,
121 /// Seed came from `MNEM_CANONICAL_HNSW_SEED` env var.
122 EnvOverride,
123 /// `commit_cid.is_zero()` path: bootstrap constant.
124 Fallback,
125}
126
127/// Reasons a resolve call was refused (not merged, not created).
128#[derive(Debug, Clone, Copy, PartialEq, Eq)]
129pub enum RefusalReason {
130 /// HNSW-local sample below [`MIN_SAMPLE_SIZE`]; threshold cannot
131 /// be derived with statistical significance.
132 SampleTooSmall,
133 /// Best candidate passed only one of the three consensus signals.
134 SingleSignalOnly,
135}
136
137/// Outcome of [`resolve_or_create`].
138#[derive(Debug, Clone, PartialEq)]
139pub enum ResolveResult {
140 /// Query collapsed onto an existing node.
141 Resolved {
142 /// The existing canonical node.
143 node_id: NodeId,
144 /// Number of consensus signals that agreed (2 or 3).
145 signals_passed: u8,
146 },
147 /// Query did not match any existing node; caller should create.
148 Created {
149 /// Threshold used to decide the above-threshold mass was empty.
150 tau_n: f32,
151 },
152 /// Guard ran the wall-clock budget out. `best_effort` is the
153 /// top HNSW candidate if any; caller may retry with a larger
154 /// budget.
155 BudgetExhausted {
156 /// Best candidate observed before the budget ran out.
157 best_effort: Option<NodeId>,
158 },
159 /// Refused to emit a decision (see [`RefusalReason`]).
160 Refused(RefusalReason),
161}
162
163/// A sampled (candidate_id, cosine_to_query, name_for_edit_dist,
164/// namespace, trust) tuple. Lifetime-free for testability: a real
165/// caller pulls these from the HNSW walk.
166#[derive(Debug, Clone)]
167pub struct Candidate {
168 /// Stable id of the candidate node.
169 pub node_id: NodeId,
170 /// Cosine similarity of candidate's embedding to the query.
171 pub cosine: f32,
172 /// Surface-form name of the candidate (for edit-distance signal).
173 pub name: String,
174 /// Candidate's namespace (e.g. "person", "company").
175 pub namespace: String,
176 /// Candidate's trust label.
177 pub trust: String,
178}
179
180/// Per-node distribution-derived threshold and its component stats.
181#[derive(Debug, Clone, Copy, PartialEq)]
182pub struct LocalThreshold {
183 /// The derived `tau_n = max(mu_same - k*sigma_same, mu_diff + sigma_diff)`.
184 pub tau_n: f32,
185 /// Mean of the same-class (higher-mean) GMM component.
186 pub mu_same: f32,
187 /// Std-dev of the same-class GMM component.
188 pub sigma_same: f32,
189 /// Mean of the different-class (lower-mean) GMM component.
190 pub mu_diff: f32,
191 /// Std-dev of the different-class GMM component.
192 pub sigma_diff: f32,
193 /// Size of the HNSW-local sample used.
194 pub sample_size: usize,
195}
196
197/// Run k=2 Gaussian Mixture on a pre-computed HNSW-local cosine
198/// sample and return the distribution-derived collapse threshold.
199///
200/// The sample must contain at least [`MIN_SAMPLE_SIZE`] observations;
201/// smaller samples return `None` (caller must then refuse to emit a
202/// `canonical_cid` and log
203/// `mnem_canonical_threshold_sample_insufficient_total`).
204///
205/// The implementation is a 1-D two-component GMM via a compact EM
206/// loop - no external crate. The loop is bounded at 32 iterations
207/// with a tolerance of `1e-4` on component-mean movement, both
208/// sufficient for 1-D bi-modal separation of embedding cosines.
209#[must_use]
210pub fn derive_local_threshold(cosines: &[f32], sigma_multiplier: f32) -> Option<LocalThreshold> {
211 if cosines.len() < MIN_SAMPLE_SIZE {
212 return None;
213 }
214 // k=2 EM with deterministic init from sample min/max.
215 let mut lo = f32::INFINITY;
216 let mut hi = f32::NEG_INFINITY;
217 for &c in cosines {
218 if c < lo {
219 lo = c;
220 }
221 if c > hi {
222 hi = c;
223 }
224 }
225 if hi <= lo {
226 // degenerate constant sample - no bimodality; return a
227 // single-gaussian surrogate so the caller still gets a tau.
228 let mu = f32::midpoint(hi, lo);
229 return Some(LocalThreshold {
230 tau_n: mu,
231 mu_same: mu,
232 sigma_same: 0.0,
233 mu_diff: mu,
234 sigma_diff: 0.0,
235 sample_size: cosines.len(),
236 });
237 }
238 // Deterministic init: low-quartile mean vs high-quartile mean.
239 let mut mu0 = lo + (hi - lo) * 0.25;
240 let mut mu1 = lo + (hi - lo) * 0.75;
241 let mut s0 = (hi - lo) / 4.0;
242 let mut s1 = (hi - lo) / 4.0;
243 let mut w0 = 0.5_f32;
244 let mut w1 = 0.5_f32;
245 for _ in 0..32 {
246 // E-step: soft responsibilities.
247 let mut n0 = 0.0f32;
248 let mut n1 = 0.0f32;
249 let mut sum0 = 0.0f32;
250 let mut sum1 = 0.0f32;
251 let mut sq0 = 0.0f32;
252 let mut sq1 = 0.0f32;
253 for &x in cosines {
254 let p0 = w0 * gaussian_pdf(x, mu0, s0.max(1e-6));
255 let p1 = w1 * gaussian_pdf(x, mu1, s1.max(1e-6));
256 let z = p0 + p1;
257 let (r0, r1) = if z > 0.0 {
258 (p0 / z, p1 / z)
259 } else {
260 (0.5, 0.5)
261 };
262 n0 += r0;
263 n1 += r1;
264 sum0 += r0 * x;
265 sum1 += r1 * x;
266 sq0 += r0 * x * x;
267 sq1 += r1 * x * x;
268 }
269 // M-step.
270 let new_mu0 = if n0 > 0.0 { sum0 / n0 } else { mu0 };
271 let new_mu1 = if n1 > 0.0 { sum1 / n1 } else { mu1 };
272 // clippy::suspicious_operation_groupings flags `a*a` adjacent to
273 // `b/c - d*d`; the expression is the standard
274 // variance = E[X^2] - (E[X])^2 form so we silence the lint.
275 #[allow(clippy::suspicious_operation_groupings)]
276 let new_s0 = if n0 > 0.0 {
277 ((sq0 / n0) - new_mu0 * new_mu0).max(1e-8).sqrt()
278 } else {
279 s0
280 };
281 #[allow(clippy::suspicious_operation_groupings)]
282 let new_s1 = if n1 > 0.0 {
283 ((sq1 / n1) - new_mu1 * new_mu1).max(1e-8).sqrt()
284 } else {
285 s1
286 };
287 let n_total = n0 + n1;
288 let new_w0 = if n_total > 0.0 { n0 / n_total } else { 0.5 };
289 let new_w1 = 1.0 - new_w0;
290 let moved = (new_mu0 - mu0).abs() + (new_mu1 - mu1).abs();
291 mu0 = new_mu0;
292 mu1 = new_mu1;
293 s0 = new_s0;
294 s1 = new_s1;
295 w0 = new_w0;
296 w1 = new_w1;
297 if moved < 1e-4 {
298 break;
299 }
300 }
301 // Same-class = higher-mean component.
302 let (mu_same, sigma_same, mu_diff, sigma_diff) = if mu1 >= mu0 {
303 (mu1, s1, mu0, s0)
304 } else {
305 (mu0, s0, mu1, s1)
306 };
307 let low = mu_same - sigma_multiplier * sigma_same;
308 let high = mu_diff + sigma_diff;
309 let tau_n = if low >= high { low } else { high };
310 Some(LocalThreshold {
311 tau_n,
312 mu_same,
313 sigma_same,
314 mu_diff,
315 sigma_diff,
316 sample_size: cosines.len(),
317 })
318}
319
320/// 1-D gaussian PDF. Pulled inline - no external `statrs` dep.
321#[inline]
322fn gaussian_pdf(x: f32, mu: f32, sigma: f32) -> f32 {
323 let inv = 1.0 / (sigma * (2.0 * core::f32::consts::PI).sqrt());
324 let z = (x - mu) / sigma;
325 inv * (-0.5 * z * z).exp()
326}
327
328/// Normalized Levenshtein distance in `[0, 1]`. `0` = identical,
329/// `1` = maximally different. Used by the edit-distance consensus
330/// signal. Implementation is the classic O(m*n) DP matrix, pure-Rust,
331/// no extra crate. Short names dominate here so memory is a non-issue.
332#[must_use]
333#[allow(clippy::many_single_char_names)]
334pub fn normalized_levenshtein(a: &str, b: &str) -> f32 {
335 let av: Vec<char> = a.chars().collect();
336 let bv: Vec<char> = b.chars().collect();
337 if av.is_empty() && bv.is_empty() {
338 return 0.0;
339 }
340 let m = av.len();
341 let n = bv.len();
342 let mut prev: Vec<usize> = (0..=n).collect();
343 let mut cur: Vec<usize> = vec![0; n + 1];
344 for i in 1..=m {
345 cur[0] = i;
346 for j in 1..=n {
347 let cost = usize::from(av[i - 1] != bv[j - 1]);
348 cur[j] = (prev[j] + 1).min(cur[j - 1] + 1).min(prev[j - 1] + cost);
349 }
350 std::mem::swap(&mut prev, &mut cur);
351 }
352 let max_len = m.max(n);
353 #[allow(clippy::cast_precision_loss)]
354 let d = prev[n] as f32 / max_len as f32;
355 d.clamp(0.0, 1.0)
356}
357
358/// Two-of-three consensus: returns `(signals_passed, per_signal)` where
359/// `per_signal = [cosine_ok, edit_ok, namespace_ok]`.
360///
361/// - `cosine_ok`: `cand.cosine >= tau_n` AND `cand.cosine >= tau_query`
362/// (symmetric collapse). We pass the same `tau_n` twice for the
363/// standalone resolve path (query has no pre-existing neighbourhood).
364/// - `edit_ok`: `normalized_levenshtein(query, cand.name) <= EDIT_DISTANCE_TAU`.
365/// - `namespace_ok`: `cand.namespace == query_namespace
366/// AND cand.trust == query_trust`.
367#[must_use]
368pub fn two_of_three_consensus(
369 query: &str,
370 query_namespace: &str,
371 query_trust: &str,
372 cand: &Candidate,
373 tau_n: f32,
374) -> (u8, [bool; 3]) {
375 let cosine_ok = cand.cosine >= tau_n;
376 let edit_ok = normalized_levenshtein(query, &cand.name) <= EDIT_DISTANCE_TAU;
377 let namespace_ok = cand.namespace == query_namespace && cand.trust == query_trust;
378 let passed = u8::from(cosine_ok) + u8::from(edit_ok) + u8::from(namespace_ok);
379 (passed, [cosine_ok, edit_ok, namespace_ok])
380}
381
382/// Resolve the commit-derived HNSW build seed.
383///
384/// - `MNEM_CANONICAL_HNSW_SEED` env var wins (decimal or `0x...` hex).
385/// - Else `BLAKE3(commit_cid.to_bytes() || domain_sep)[..8]` little-endian.
386/// - Else (commit_cid is zero): [`HNSW_SEED_FALLBACK`].
387///
388/// Note: `commit_cid.is_zero()` is approximated by comparing the CID's
389/// binary form to the zero-digest for the configured codec / hash.
390/// For testing we check whether every byte of the multihash digest is
391/// zero.
392#[must_use]
393pub fn resolve_hnsw_seed(commit_cid: &Cid) -> (u64, HnswSeedSource) {
394 if let Ok(s) = std::env::var("MNEM_CANONICAL_HNSW_SEED") {
395 if let Some(val) = parse_u64_dec_or_hex(&s) {
396 return (val, HnswSeedSource::EnvOverride);
397 }
398 }
399 if cid_has_zero_digest(commit_cid) {
400 return (HNSW_SEED_FALLBACK, HnswSeedSource::Fallback);
401 }
402 let mut h = blake3::Hasher::new();
403 let bytes = commit_cid.to_bytes();
404 h.update(&bytes);
405 h.update(b"mnem-gap-04-canonical-hnsw-v1");
406 let digest = h.finalize();
407 let d = digest.as_bytes();
408 let seed = u64::from_le_bytes([d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7]]);
409 (seed, HnswSeedSource::CommitDerived)
410}
411
412fn parse_u64_dec_or_hex(s: &str) -> Option<u64> {
413 let t = s.trim();
414 if let Some(rest) = t.strip_prefix("0x").or_else(|| t.strip_prefix("0X")) {
415 u64::from_str_radix(rest, 16).ok()
416 } else {
417 t.parse::<u64>().ok()
418 }
419}
420
421fn cid_has_zero_digest(cid: &Cid) -> bool {
422 let bytes = cid.to_bytes();
423 // A CID in wire form is (version || codec || multihash). The
424 // multihash is (hash_code || len || digest). The trailing
425 // `len` bytes of `bytes` are the digest. We approximate
426 // "zero CID" as "all digest bytes are zero" which matches the
427 // `zero_cid()` helper used in tests.
428 bytes.iter().rev().take(32).all(|&b| b == 0)
429}
430
431/// Request payload for [`resolve_or_create`].
432#[derive(Debug, Clone)]
433pub struct ResolveRequest {
434 /// The surface-form string to resolve.
435 pub query: String,
436 /// Namespace (e.g. "person", "company").
437 pub namespace: String,
438 /// Trust label (e.g. "verified").
439 pub trust: String,
440 /// Candidates pre-sampled from the HNSW walk. In a real pipeline
441 /// this is populated by the caller from the canonicalization
442 /// HNSW handle keyed by the commit-derived seed.
443 pub candidates: Vec<Candidate>,
444 /// HNSW-local cosine sample for threshold derivation. Must be at
445 /// least [`MIN_SAMPLE_SIZE`] long; shorter samples return
446 /// [`ResolveResult::Refused`].
447 pub local_sample: Vec<f32>,
448 /// Caller-supplied latency budget override. `None` means use
449 /// [`RESOLVE_OR_CREATE_P99_MS`].
450 pub latency_budget_ms: Option<u32>,
451 /// The commit the resolve is happening under (for HNSW seed +
452 /// guard envelope).
453 pub commit_cid: Cid,
454}
455
456/// Full outcome of a resolve call, including the guard's report for
457/// embedding in the commit envelope and the (seed, source) pair used
458/// for the HNSW walk.
459#[derive(Debug)]
460pub struct ResolveOutcome {
461 /// Primary resolution decision.
462 pub result: ResolveResult,
463 /// Budget-guard report - host embeds in the commit envelope and
464 /// feeds to the metric sink.
465 pub report: crate::guard::CommitBudgetReport,
466 /// Seed used for the HNSW walk (for audit / replay determinism).
467 pub seed: u64,
468 /// Source of the HNSW seed for this run.
469 pub seed_source: HnswSeedSource,
470 /// The per-node distribution-derived threshold. `None` when the
471 /// local sample was too small.
472 pub threshold: Option<LocalThreshold>,
473}
474
475/// Resolve a query string onto an existing canonical node, or
476/// decide that a new node should be created.
477///
478/// The function is pure except for (a) wall-clock reads via the
479/// `CommitBudgetGuard` and (b) the optional
480/// `MNEM_CANONICAL_HNSW_SEED` env-var. Both are documented.
481///
482/// For a concrete single-shot API `(query, threshold) -> ResolveResult`
483/// see [`resolve_or_create_simple`]; this function is the full
484/// production shape carrying candidates + local sample.
485///
486/// # Panics
487///
488/// Does not panic. All fallible paths map to [`ResolveResult`] variants.
489pub fn resolve_or_create(req: &ResolveRequest) -> ResolveOutcome {
490 let budget_ms = req.latency_budget_ms.unwrap_or(RESOLVE_OR_CREATE_P99_MS);
491 let mut guard = CommitBudgetGuard::start(
492 "gap-04-resolve-or-create",
493 budget_ms,
494 RESOLVE_OR_CREATE_P99_MS,
495 req.commit_cid.clone(),
496 );
497 let (seed, seed_source) = resolve_hnsw_seed(&req.commit_cid);
498
499 // Stage 1: derive distribution threshold.
500 let threshold = derive_local_threshold(&req.local_sample, SIGMA_MULTIPLIER_FOR_COLLAPSE);
501 let charge1 = guard.charge("derive_threshold");
502 if charge1.is_err() {
503 let report = guard.into_report();
504 return ResolveOutcome {
505 result: ResolveResult::BudgetExhausted { best_effort: None },
506 report,
507 seed,
508 seed_source,
509 threshold,
510 };
511 }
512 let Some(thr) = threshold else {
513 let report = guard.into_report();
514 return ResolveOutcome {
515 result: ResolveResult::Refused(RefusalReason::SampleTooSmall),
516 report,
517 seed,
518 seed_source,
519 threshold,
520 };
521 };
522
523 // Stage 2: pick best candidate by cosine and run two-of-three.
524 let mut best: Option<(&Candidate, u8)> = None;
525 for cand in &req.candidates {
526 let (passed, _) =
527 two_of_three_consensus(&req.query, &req.namespace, &req.trust, cand, thr.tau_n);
528 match best {
529 Some((_, p)) if p >= passed => {}
530 _ => best = Some((cand, passed)),
531 }
532 }
533 let charge2 = guard.charge("consensus");
534 if let Err(_e) = charge2 {
535 let best_effort = best.map(|(c, _)| c.node_id);
536 let report = guard.into_report();
537 return ResolveOutcome {
538 result: ResolveResult::BudgetExhausted { best_effort },
539 report,
540 seed,
541 seed_source,
542 threshold,
543 };
544 }
545 // Per R3: single-signal collapses are refused, meaning the node
546 // stays un-collapsed; i.e. the caller gets a Created decision. We
547 // preserve the explicit `Refused(SingleSignalOnly)` variant for
548 // callers (and gauges) that need to observe the distinction, but
549 // only when the caller explicitly opts in via
550 // [`resolve_or_create_simple`]. For the production path,
551 // un-collapsed == Created so the host creates the node.
552 let result = match best {
553 Some((cand, signals)) if signals >= 2 => ResolveResult::Resolved {
554 node_id: cand.node_id,
555 signals_passed: signals,
556 },
557 _ => ResolveResult::Created { tau_n: thr.tau_n },
558 };
559 let report = guard.into_report();
560 ResolveOutcome {
561 result,
562 report,
563 seed,
564 seed_source,
565 threshold,
566 }
567}
568
569/// Tight `(query, threshold) -> ResolveResult` shape from the gap brief.
570///
571/// This is a thin convenience wrapper for a caller that has already
572/// done its own HNSW walk and just wants the decision: passing a
573/// `threshold` here bypasses the GMM derivation and checks the best
574/// candidate against the given fixed threshold using the same
575/// two-of-three consensus gate. Present primarily to keep the public
576/// API surface matching the gap brief; production callers should use
577/// [`resolve_or_create`] with a `local_sample`.
578#[must_use]
579pub fn resolve_or_create_simple(
580 query: &str,
581 threshold: f32,
582 candidates: &[Candidate],
583 namespace: &str,
584 trust: &str,
585) -> ResolveResult {
586 let mut best: Option<(&Candidate, u8)> = None;
587 for cand in candidates {
588 let (passed, _) = two_of_three_consensus(query, namespace, trust, cand, threshold);
589 match best {
590 Some((_, p)) if p >= passed => {}
591 _ => best = Some((cand, passed)),
592 }
593 }
594 match best {
595 Some((cand, signals)) if signals >= 2 => ResolveResult::Resolved {
596 node_id: cand.node_id,
597 signals_passed: signals,
598 },
599 Some((_, 1)) => ResolveResult::Refused(RefusalReason::SingleSignalOnly),
600 _ => ResolveResult::Created { tau_n: threshold },
601 }
602}
603
604#[cfg(test)]
605mod tests;