use crate::guard::CommitBudgetGuard;
use crate::id::{Cid, NodeId};
#[doc = "#[tunable]"]
pub const RESOLVE_OR_CREATE_P99_MS: u32 = 50;
pub const EF_SEARCH_CANONICAL: u32 = 128;
pub const HNSW_SEED_FALLBACK: u64 = 0xCA_00_00_00_01_00_00_00_u64;
pub const SIGMA_MULTIPLIER_FOR_COLLAPSE: f32 = 2.0;
pub const EDIT_DISTANCE_TAU: f32 = 0.25;
pub const MIN_SAMPLE_SIZE: usize = 128;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum HnswSeedSource {
CommitDerived,
EnvOverride,
Fallback,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum RefusalReason {
SampleTooSmall,
SingleSignalOnly,
}
#[derive(Debug, Clone, PartialEq)]
pub enum ResolveResult {
Resolved {
node_id: NodeId,
signals_passed: u8,
},
Created {
tau_n: f32,
},
BudgetExhausted {
best_effort: Option<NodeId>,
},
Refused(RefusalReason),
}
#[derive(Debug, Clone)]
pub struct Candidate {
pub node_id: NodeId,
pub cosine: f32,
pub name: String,
pub namespace: String,
pub trust: String,
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct LocalThreshold {
pub tau_n: f32,
pub mu_same: f32,
pub sigma_same: f32,
pub mu_diff: f32,
pub sigma_diff: f32,
pub sample_size: usize,
}
#[must_use]
pub fn derive_local_threshold(cosines: &[f32], sigma_multiplier: f32) -> Option<LocalThreshold> {
if cosines.len() < MIN_SAMPLE_SIZE {
return None;
}
let mut lo = f32::INFINITY;
let mut hi = f32::NEG_INFINITY;
for &c in cosines {
if c < lo {
lo = c;
}
if c > hi {
hi = c;
}
}
if hi <= lo {
let mu = f32::midpoint(hi, lo);
return Some(LocalThreshold {
tau_n: mu,
mu_same: mu,
sigma_same: 0.0,
mu_diff: mu,
sigma_diff: 0.0,
sample_size: cosines.len(),
});
}
let mut mu0 = lo + (hi - lo) * 0.25;
let mut mu1 = lo + (hi - lo) * 0.75;
let mut s0 = (hi - lo) / 4.0;
let mut s1 = (hi - lo) / 4.0;
let mut w0 = 0.5_f32;
let mut w1 = 0.5_f32;
for _ in 0..32 {
let mut n0 = 0.0f32;
let mut n1 = 0.0f32;
let mut sum0 = 0.0f32;
let mut sum1 = 0.0f32;
let mut sq0 = 0.0f32;
let mut sq1 = 0.0f32;
for &x in cosines {
let p0 = w0 * gaussian_pdf(x, mu0, s0.max(1e-6));
let p1 = w1 * gaussian_pdf(x, mu1, s1.max(1e-6));
let z = p0 + p1;
let (r0, r1) = if z > 0.0 {
(p0 / z, p1 / z)
} else {
(0.5, 0.5)
};
n0 += r0;
n1 += r1;
sum0 += r0 * x;
sum1 += r1 * x;
sq0 += r0 * x * x;
sq1 += r1 * x * x;
}
let new_mu0 = if n0 > 0.0 { sum0 / n0 } else { mu0 };
let new_mu1 = if n1 > 0.0 { sum1 / n1 } else { mu1 };
#[allow(clippy::suspicious_operation_groupings)]
let new_s0 = if n0 > 0.0 {
((sq0 / n0) - new_mu0 * new_mu0).max(1e-8).sqrt()
} else {
s0
};
#[allow(clippy::suspicious_operation_groupings)]
let new_s1 = if n1 > 0.0 {
((sq1 / n1) - new_mu1 * new_mu1).max(1e-8).sqrt()
} else {
s1
};
let n_total = n0 + n1;
let new_w0 = if n_total > 0.0 { n0 / n_total } else { 0.5 };
let new_w1 = 1.0 - new_w0;
let moved = (new_mu0 - mu0).abs() + (new_mu1 - mu1).abs();
mu0 = new_mu0;
mu1 = new_mu1;
s0 = new_s0;
s1 = new_s1;
w0 = new_w0;
w1 = new_w1;
if moved < 1e-4 {
break;
}
}
let (mu_same, sigma_same, mu_diff, sigma_diff) = if mu1 >= mu0 {
(mu1, s1, mu0, s0)
} else {
(mu0, s0, mu1, s1)
};
let low = mu_same - sigma_multiplier * sigma_same;
let high = mu_diff + sigma_diff;
let tau_n = if low >= high { low } else { high };
Some(LocalThreshold {
tau_n,
mu_same,
sigma_same,
mu_diff,
sigma_diff,
sample_size: cosines.len(),
})
}
#[inline]
fn gaussian_pdf(x: f32, mu: f32, sigma: f32) -> f32 {
let inv = 1.0 / (sigma * (2.0 * core::f32::consts::PI).sqrt());
let z = (x - mu) / sigma;
inv * (-0.5 * z * z).exp()
}
#[must_use]
#[allow(clippy::many_single_char_names)]
pub fn normalized_levenshtein(a: &str, b: &str) -> f32 {
let av: Vec<char> = a.chars().collect();
let bv: Vec<char> = b.chars().collect();
if av.is_empty() && bv.is_empty() {
return 0.0;
}
let m = av.len();
let n = bv.len();
let mut prev: Vec<usize> = (0..=n).collect();
let mut cur: Vec<usize> = vec![0; n + 1];
for i in 1..=m {
cur[0] = i;
for j in 1..=n {
let cost = usize::from(av[i - 1] != bv[j - 1]);
cur[j] = (prev[j] + 1).min(cur[j - 1] + 1).min(prev[j - 1] + cost);
}
std::mem::swap(&mut prev, &mut cur);
}
let max_len = m.max(n);
#[allow(clippy::cast_precision_loss)]
let d = prev[n] as f32 / max_len as f32;
d.clamp(0.0, 1.0)
}
#[must_use]
pub fn two_of_three_consensus(
query: &str,
query_namespace: &str,
query_trust: &str,
cand: &Candidate,
tau_n: f32,
) -> (u8, [bool; 3]) {
let cosine_ok = cand.cosine >= tau_n;
let edit_ok = normalized_levenshtein(query, &cand.name) <= EDIT_DISTANCE_TAU;
let namespace_ok = cand.namespace == query_namespace && cand.trust == query_trust;
let passed = u8::from(cosine_ok) + u8::from(edit_ok) + u8::from(namespace_ok);
(passed, [cosine_ok, edit_ok, namespace_ok])
}
#[must_use]
pub fn resolve_hnsw_seed(commit_cid: &Cid) -> (u64, HnswSeedSource) {
if let Ok(s) = std::env::var("MNEM_CANONICAL_HNSW_SEED") {
if let Some(val) = parse_u64_dec_or_hex(&s) {
return (val, HnswSeedSource::EnvOverride);
}
}
if cid_has_zero_digest(commit_cid) {
return (HNSW_SEED_FALLBACK, HnswSeedSource::Fallback);
}
let mut h = blake3::Hasher::new();
let bytes = commit_cid.to_bytes();
h.update(&bytes);
h.update(b"mnem-gap-04-canonical-hnsw-v1");
let digest = h.finalize();
let d = digest.as_bytes();
let seed = u64::from_le_bytes([d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7]]);
(seed, HnswSeedSource::CommitDerived)
}
fn parse_u64_dec_or_hex(s: &str) -> Option<u64> {
let t = s.trim();
if let Some(rest) = t.strip_prefix("0x").or_else(|| t.strip_prefix("0X")) {
u64::from_str_radix(rest, 16).ok()
} else {
t.parse::<u64>().ok()
}
}
fn cid_has_zero_digest(cid: &Cid) -> bool {
let bytes = cid.to_bytes();
bytes.iter().rev().take(32).all(|&b| b == 0)
}
#[derive(Debug, Clone)]
pub struct ResolveRequest {
pub query: String,
pub namespace: String,
pub trust: String,
pub candidates: Vec<Candidate>,
pub local_sample: Vec<f32>,
pub latency_budget_ms: Option<u32>,
pub commit_cid: Cid,
}
#[derive(Debug)]
pub struct ResolveOutcome {
pub result: ResolveResult,
pub report: crate::guard::CommitBudgetReport,
pub seed: u64,
pub seed_source: HnswSeedSource,
pub threshold: Option<LocalThreshold>,
}
pub fn resolve_or_create(req: &ResolveRequest) -> ResolveOutcome {
let budget_ms = req.latency_budget_ms.unwrap_or(RESOLVE_OR_CREATE_P99_MS);
let mut guard = CommitBudgetGuard::start(
"gap-04-resolve-or-create",
budget_ms,
RESOLVE_OR_CREATE_P99_MS,
req.commit_cid.clone(),
);
let (seed, seed_source) = resolve_hnsw_seed(&req.commit_cid);
let threshold = derive_local_threshold(&req.local_sample, SIGMA_MULTIPLIER_FOR_COLLAPSE);
let charge1 = guard.charge("derive_threshold");
if charge1.is_err() {
let report = guard.into_report();
return ResolveOutcome {
result: ResolveResult::BudgetExhausted { best_effort: None },
report,
seed,
seed_source,
threshold,
};
}
let Some(thr) = threshold else {
let report = guard.into_report();
return ResolveOutcome {
result: ResolveResult::Refused(RefusalReason::SampleTooSmall),
report,
seed,
seed_source,
threshold,
};
};
let mut best: Option<(&Candidate, u8)> = None;
for cand in &req.candidates {
let (passed, _) =
two_of_three_consensus(&req.query, &req.namespace, &req.trust, cand, thr.tau_n);
match best {
Some((_, p)) if p >= passed => {}
_ => best = Some((cand, passed)),
}
}
let charge2 = guard.charge("consensus");
if let Err(_e) = charge2 {
let best_effort = best.map(|(c, _)| c.node_id);
let report = guard.into_report();
return ResolveOutcome {
result: ResolveResult::BudgetExhausted { best_effort },
report,
seed,
seed_source,
threshold,
};
}
let result = match best {
Some((cand, signals)) if signals >= 2 => ResolveResult::Resolved {
node_id: cand.node_id,
signals_passed: signals,
},
_ => ResolveResult::Created { tau_n: thr.tau_n },
};
let report = guard.into_report();
ResolveOutcome {
result,
report,
seed,
seed_source,
threshold,
}
}
#[must_use]
pub fn resolve_or_create_simple(
query: &str,
threshold: f32,
candidates: &[Candidate],
namespace: &str,
trust: &str,
) -> ResolveResult {
let mut best: Option<(&Candidate, u8)> = None;
for cand in candidates {
let (passed, _) = two_of_three_consensus(query, namespace, trust, cand, threshold);
match best {
Some((_, p)) if p >= passed => {}
_ => best = Some((cand, passed)),
}
}
match best {
Some((cand, signals)) if signals >= 2 => ResolveResult::Resolved {
node_id: cand.node_id,
signals_passed: signals,
},
Some((_, 1)) => ResolveResult::Refused(RefusalReason::SingleSignalOnly),
_ => ResolveResult::Created { tau_n: threshold },
}
}
#[cfg(test)]
mod tests;