use anyhow::{Result, bail};
use crate::cli::EntryFilter;
use crate::index::IndexConfig;
use crate::knowledge;
use crate::store;
use crate::surreal_db::SurrealDatabase;
pub(crate) fn apply_entry_filters(
entries: Vec<knowledge::KnowledgeEntry>,
filter: &EntryFilter,
) -> Vec<knowledge::KnowledgeEntry> {
let mut entries: Vec<_> = entries
.into_iter()
.filter(|e| !filter.has_wake_phrase || e.has_any_wake_phrase())
.filter(|e| !filter.missing_wake_phrase || !e.has_any_wake_phrase())
.filter(|e| !filter.has_anchors || !e.anchors.is_empty())
.filter(|e| !filter.missing_anchors || e.anchors.is_empty())
.filter(|e| {
!filter.has_resonance_type || e.resonance_type.as_ref().is_some_and(|s| !s.is_empty())
})
.filter(|e| {
!filter.missing_resonance_type || e.resonance_type.as_ref().is_none_or(|s| s.is_empty())
})
.filter(|e| {
filter
.tags
.as_ref()
.is_none_or(|filter_tags| filter_tags.iter().any(|t| e.tags.contains(t)))
})
.collect();
if let Some(n) = filter.limit {
entries.truncate(n);
}
entries
}
pub(crate) fn normalize_id(id: &str) -> String {
if id.starts_with("kn-") {
id.to_string()
} else {
format!("kn-{}", id)
}
}
pub(crate) struct FactRouting {
pub(crate) category: &'static str,
pub(crate) tags: Vec<&'static str>,
}
pub(crate) fn find_open_thread_by_content(
db: &dyn store::KnowledgeStore,
content: &str,
agent_id: &str,
) -> Result<String> {
use crate::knowledge::KnowledgeEntry;
let ctx = store::AgentContext::for_agent(agent_id);
let filter = store::KnowledgeFilter {
categories: Some(vec!["thread".to_string()]),
..Default::default()
};
let threads = db.list_by_category("thread", &ctx, &filter)?;
let normalized_content = KnowledgeEntry::normalize_content(content);
for thread in threads {
let is_open = match thread.get_summary_state().as_deref() {
None => true, Some("open") => true,
_ => false,
};
if is_open && let Some(body) = &thread.body {
let normalized_body = KnowledgeEntry::normalize_content(body);
if normalized_body == normalized_content {
return Ok(thread.id);
}
}
}
bail!("No open thread found matching content: '{}'", content)
}
pub(crate) fn route_fact_type(fact_type: &str) -> Result<FactRouting> {
const VALID_FACT_TYPES: &[&str] = &[
"decision",
"insight",
"person",
"quote",
"thread_opened",
"commitment",
"thread_closed",
];
match fact_type {
"decision" => Ok(FactRouting {
category: "decision",
tags: vec![],
}),
"insight" => Ok(FactRouting {
category: "insight",
tags: vec![],
}),
"person" => Ok(FactRouting {
category: "reference",
tags: vec!["person"],
}),
"quote" => Ok(FactRouting {
category: "reference",
tags: vec!["quote"],
}),
"thread_opened" => Ok(FactRouting {
category: "thread",
tags: vec!["question"],
}),
"commitment" => Ok(FactRouting {
category: "thread",
tags: vec!["commitment"],
}),
"thread_closed" => Ok(FactRouting {
category: "thread",
tags: vec![],
}),
unknown => {
bail!(
"Invalid fact type '{}'. Valid types: {}",
unknown,
VALID_FACT_TYPES.join(", ")
)
}
}
}
pub(crate) fn resolve_agent_context(mine: bool, include_private: bool) -> store::AgentContext {
match std::env::var("MX_CURRENT_AGENT") {
Ok(agent) if !agent.is_empty() => {
if mine {
store::AgentContext::for_agent(agent)
} else if include_private {
store::AgentContext::for_agent(agent)
} else {
store::AgentContext::public_for_agent(agent)
}
}
_ => store::AgentContext::public_only(),
}
}
pub(crate) const NEAR_DUPLICATE_CEILING: f32 = 0.95;
pub(crate) const DEFAULT_ANCHOR_THRESHOLD: f32 = 0.75;
pub(crate) const ANCHOR_CANDIDATE_OVERFETCH: usize = 5;
pub(crate) const MAX_ANCHOR_CANDIDATES: usize = 500;
pub(crate) fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
if a.len() != b.len() {
return 0.0;
}
let dot_product: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
let magnitude_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
let magnitude_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
if magnitude_a == 0.0 || magnitude_b == 0.0 {
return 0.0;
}
dot_product / (magnitude_a * magnitude_b)
}
pub(crate) fn auto_embed(entry_id: &str, db: &dyn store::KnowledgeStore) -> Result<()> {
use crate::chunking::{ChunkConfig, chunk_text};
use crate::embeddings::{EmbeddingProvider, TractProvider};
let ctx = match std::env::var("MX_CURRENT_AGENT") {
Ok(agent) if !agent.is_empty() => store::AgentContext::for_agent(agent),
_ => store::AgentContext::public_only(),
};
let mut entry = match db.get(entry_id, &ctx)? {
Some(e) => e,
None => return Ok(()),
};
let provider = TractProvider::new()?;
let embedding_text = entry.embedding_text();
let config = ChunkConfig::default();
let chunking_tokenizer = crate::embeddings::load_tokenizer()?;
let chunks = chunk_text(&embedding_text, &chunking_tokenizer, &config);
if chunks.len() == 1 {
let embedding = provider.embed(&chunks[0].text)?;
entry.embedding = Some(embedding);
entry.embedding_model = Some(provider.model_id().to_string());
entry.embedded_at = Some(chrono::Utc::now().to_rfc3339());
entry.chunk_count = 0;
entry.updated_at = Some(chrono::Utc::now().to_rfc3339());
db.upsert_knowledge(&entry)?;
db.delete_embedding_chunks(entry_id)?; } else {
let mut chunk_embeddings = Vec::with_capacity(chunks.len());
for chunk in &chunks {
chunk_embeddings.push(provider.embed(&chunk.text)?);
}
db.delete_embedding_chunks(entry_id)?;
for (chunk, embedding) in chunks.iter().zip(chunk_embeddings.iter()) {
db.insert_embedding_chunk(
entry_id,
chunk.chunk_index,
&chunk.text,
chunk.token_offset,
chunk.token_count,
embedding,
provider.model_id(),
)?;
}
let dims = provider.dimensions();
let mut mean_vec = vec![0.0f32; dims];
for emb in &chunk_embeddings {
for (i, v) in emb.iter().enumerate() {
mean_vec[i] += v;
}
}
let n = chunk_embeddings.len() as f32;
for v in mean_vec.iter_mut() {
*v /= n;
}
let l2: f32 = mean_vec.iter().map(|x| x * x).sum::<f32>().sqrt();
if l2 > 0.0 {
for v in mean_vec.iter_mut() {
*v /= l2;
}
}
entry.embedding = Some(mean_vec);
entry.embedding_model = Some(provider.model_id().to_string());
entry.embedded_at = Some(chrono::Utc::now().to_rfc3339());
entry.chunk_count = chunks.len() as i32;
entry.updated_at = Some(chrono::Utc::now().to_rfc3339());
db.upsert_knowledge(&entry)?;
}
Ok(())
}
pub(crate) fn write_anchor_enabled(no_auto_anchor: bool) -> bool {
let skip_via_env =
std::env::var("MX_SKIP_WRITE_ANCHOR").is_ok_and(|v| v == "1" || v.to_lowercase() == "true");
!no_auto_anchor && !skip_via_env
}
pub(crate) fn auto_anchor(
entry_id: &str,
db: &dyn store::KnowledgeStore,
explicitly_removed: Option<&[String]>,
) -> Result<()> {
let ctx = match std::env::var("MX_CURRENT_AGENT") {
Ok(agent) if !agent.is_empty() => store::AgentContext::for_agent(agent),
_ => store::AgentContext::public_only(),
};
let entry = match db.get(entry_id, &ctx)? {
Some(e) => e,
None => return Ok(()), };
if entry.embedding.is_none() {
return Ok(());
}
let entry_embedding = entry.embedding.as_ref().unwrap();
let threshold = DEFAULT_ANCHOR_THRESHOLD;
let max_anchors = 5;
let candidate_fetch_k = max_anchors * ANCHOR_CANDIDATE_OVERFETCH;
let mut scored_candidates =
db.semantic_search_entries_scored(entry_embedding, &ctx, candidate_fetch_k)?;
let saturated = scored_candidates.len() == candidate_fetch_k
&& scored_candidates
.last()
.is_some_and(|(_, score)| *score >= threshold);
if saturated {
scored_candidates =
db.semantic_search_entries_scored(entry_embedding, &ctx, MAX_ANCHOR_CANDIDATES)?;
}
let mut similarities: Vec<(String, f32)> = Vec::new();
for (candidate, similarity) in &scored_candidates {
if candidate.id == entry.id {
continue;
}
if entry.anchors.contains(&candidate.id) {
continue;
}
if let Some(removed) = explicitly_removed
&& removed.contains(&candidate.id)
{
continue;
}
let can_anchor = if entry.visibility == "private" {
candidate.visibility == "public"
|| (candidate.visibility == "private" && candidate.owner == entry.owner)
} else {
candidate.visibility == "public"
};
if !can_anchor {
continue;
}
if *similarity >= threshold && *similarity <= NEAR_DUPLICATE_CEILING {
similarities.push((candidate.id.clone(), *similarity));
}
}
let mut stale_anchors: Vec<String> = Vec::new();
for anchor_id in &entry.anchors {
if *anchor_id == entry.id {
continue;
}
let anchor_entry = match db.get(anchor_id, &ctx)? {
Some(e) => e,
None => continue,
};
let Some(anchor_embedding) = anchor_entry.embedding.as_ref() else {
continue;
};
let similarity = cosine_similarity(entry_embedding, anchor_embedding);
if similarity < threshold || similarity > NEAR_DUPLICATE_CEILING {
stale_anchors.push(anchor_id.clone());
}
}
if stale_anchors.is_empty() && similarities.is_empty() {
return Ok(());
}
similarities.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
let top_matches: Vec<String> = similarities
.into_iter()
.take(max_anchors)
.map(|(id, _)| id)
.collect();
let mut updated_anchors: Vec<String> = entry
.anchors
.clone()
.into_iter()
.filter(|a| !stale_anchors.contains(a))
.collect();
if let Some(removed) = explicitly_removed {
updated_anchors.retain(|a| !removed.contains(a));
}
updated_anchors.extend(top_matches);
updated_anchors.sort();
updated_anchors.dedup();
let mut updated_entry = entry.clone();
updated_entry.anchors = updated_anchors;
updated_entry.updated_at = Some(chrono::Utc::now().to_rfc3339());
db.upsert_knowledge(&updated_entry)?;
Ok(())
}
pub(crate) fn open_surreal(config: &IndexConfig, verbose: bool) -> Result<SurrealDatabase> {
let surreal_path = config.db_path.with_extension("surreal");
SurrealDatabase::open_with_verbose(surreal_path, verbose)
}
#[cfg(test)]
mod auto_anchor_tests {
use super::*;
use crate::knowledge::KnowledgeEntry;
use crate::store::{AgentContext, KnowledgeStore};
use serial_test::serial;
fn unit_vec(cos: f32) -> Vec<f32> {
let sin = (1.0 - cos * cos).max(0.0).sqrt();
vec![cos, sin, 0.0, 0.0]
}
fn unit_query() -> Vec<f32> {
vec![1.0, 0.0, 0.0, 0.0]
}
fn entry_with_embedding(
id: &str,
embedding: Vec<f32>,
visibility: &str,
owner: Option<&str>,
anchors: Vec<String>,
) -> KnowledgeEntry {
let now = chrono::Utc::now().to_rfc3339();
KnowledgeEntry {
id: id.to_string(),
category_id: "test".to_string(),
title: format!("Entry {id}"),
body: Some("body".to_string()),
summary: None,
applicability: vec![],
source_project_id: None,
source_agent_id: None,
file_path: None,
tags: vec![],
created_at: Some(now.clone()),
updated_at: Some(now.clone()),
content_hash: Some(format!("hash-{id}")),
source_type_id: Some("manual".to_string()),
entry_type_id: Some("primary".to_string()),
session_id: None,
ephemeral: false,
content_type_id: Some("text".to_string()),
owner: owner.map(|o| o.to_string()),
visibility: visibility.to_string(),
resonance: 5,
resonance_type: Some("ephemeral".to_string()),
last_activated: Some(now),
activation_count: 0,
decay_rate: 0.0,
anchors,
wake_phrases: vec![],
triggers: vec![],
wake_order: None,
wake_phrase: None,
embedding: Some(embedding),
embedding_model: Some("test-model".to_string()),
embedded_at: Some(chrono::Utc::now().to_rfc3339()),
chunk_count: 0,
format: "markdown".to_string(),
effective_resonance: None,
}
}
fn clear_agent_env() {
unsafe {
std::env::remove_var("MX_CURRENT_AGENT");
}
}
fn reference_old_anchors(
target: &KnowledgeEntry,
all: &[KnowledgeEntry],
max_anchors: usize,
) -> Vec<String> {
let threshold = DEFAULT_ANCHOR_THRESHOLD;
let target_emb = target.embedding.as_ref().unwrap();
let mut sims: Vec<(String, f32)> = Vec::new();
for cand in all {
if cand.id == target.id {
continue;
}
if target.anchors.contains(&cand.id) {
continue;
}
let Some(cand_emb) = cand.embedding.as_ref() else {
continue;
};
let can_anchor = if target.visibility == "private" {
cand.visibility == "public"
|| (cand.visibility == "private" && cand.owner == target.owner)
} else {
cand.visibility == "public"
};
if !can_anchor {
continue;
}
let sim = cosine_similarity(target_emb, cand_emb);
if sim >= threshold && sim <= NEAR_DUPLICATE_CEILING {
sims.push((cand.id.clone(), sim));
}
}
sims.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
let mut ids: Vec<String> = sims
.into_iter()
.take(max_anchors)
.map(|(id, _)| id)
.collect();
ids.sort();
ids
}
fn anchors_of(db: &dyn KnowledgeStore, id: &str) -> Vec<String> {
let ctx = AgentContext::public_only();
let mut a = db.get(id, &ctx).unwrap().unwrap().anchors;
a.sort();
a
}
#[test]
#[serial]
fn auto_anchor_picks_same_anchors_as_old_full_scan() {
clear_agent_env();
let db = SurrealDatabase::open_in_memory().unwrap();
let target = entry_with_embedding("kn-target", unit_query(), "public", None, vec![]);
let graph = vec![
target.clone(),
entry_with_embedding("kn-a", unit_vec(0.90), "public", None, vec![]), entry_with_embedding("kn-b", unit_vec(0.85), "public", None, vec![]), entry_with_embedding("kn-c", unit_vec(0.80), "public", None, vec![]), entry_with_embedding("kn-d", unit_vec(0.78), "public", None, vec![]), entry_with_embedding("kn-e", unit_vec(0.76), "public", None, vec![]), entry_with_embedding("kn-dup", unit_vec(0.97), "public", None, vec![]), entry_with_embedding("kn-far", unit_vec(0.60), "public", None, vec![]), ];
for e in &graph {
db.upsert_knowledge(e).unwrap();
}
auto_anchor("kn-target", &db, None).unwrap();
let got = anchors_of(&db, "kn-target");
let expected = reference_old_anchors(&target, &graph, 5);
assert_eq!(
got, expected,
"rewrite must select the same anchors as the old full scan"
);
assert_eq!(
got,
vec![
"kn-a".to_string(),
"kn-b".to_string(),
"kn-c".to_string(),
"kn-d".to_string(),
"kn-e".to_string()
]
);
}
#[test]
#[serial]
fn band_filter_excludes_near_duplicates_and_below_threshold() {
clear_agent_env();
let db = SurrealDatabase::open_in_memory().unwrap();
let target = entry_with_embedding("kn-t", unit_query(), "public", None, vec![]);
db.upsert_knowledge(&target).unwrap();
db.upsert_knowledge(&entry_with_embedding(
"kn-dup",
unit_vec(0.99),
"public",
None,
vec![],
))
.unwrap();
db.upsert_knowledge(&entry_with_embedding(
"kn-low",
unit_vec(0.50),
"public",
None,
vec![],
))
.unwrap();
db.upsert_knowledge(&entry_with_embedding(
"kn-mid",
unit_vec(0.85),
"public",
None,
vec![],
))
.unwrap();
auto_anchor("kn-t", &db, None).unwrap();
let got = anchors_of(&db, "kn-t");
assert_eq!(
got,
vec!["kn-mid".to_string()],
"only the in-band entry is anchored; near-dup (>0.95) and below-threshold (<0.75) excluded"
);
}
#[test]
#[serial]
fn max_anchors_cap_respected() {
clear_agent_env();
let db = SurrealDatabase::open_in_memory().unwrap();
let target = entry_with_embedding("kn-t", unit_query(), "public", None, vec![]);
db.upsert_knowledge(&target).unwrap();
let scores = [0.94, 0.92, 0.90, 0.88, 0.86, 0.84, 0.82];
for (i, s) in scores.iter().enumerate() {
db.upsert_knowledge(&entry_with_embedding(
&format!("kn-c{i}"),
unit_vec(*s),
"public",
None,
vec![],
))
.unwrap();
}
auto_anchor("kn-t", &db, None).unwrap();
let got = anchors_of(&db, "kn-t");
assert_eq!(got.len(), 5, "cap at max_anchors = 5");
assert_eq!(
got,
vec![
"kn-c0".to_string(),
"kn-c1".to_string(),
"kn-c2".to_string(),
"kn-c3".to_string(),
"kn-c4".to_string()
]
);
}
#[test]
#[serial]
fn stale_anchor_pruned_via_by_id_recompute() {
clear_agent_env();
let db = SurrealDatabase::open_in_memory().unwrap();
let mut target = entry_with_embedding("kn-t", unit_query(), "public", None, vec![]);
target.anchors = vec!["kn-stale".to_string(), "kn-keep".to_string()];
db.upsert_knowledge(&target).unwrap();
db.upsert_knowledge(&entry_with_embedding(
"kn-keep",
unit_vec(0.88),
"public",
None,
vec![],
))
.unwrap();
db.upsert_knowledge(&entry_with_embedding(
"kn-stale",
unit_vec(0.40),
"public",
None,
vec![],
))
.unwrap();
for i in 0..10 {
db.upsert_knowledge(&entry_with_embedding(
&format!("kn-n{i}"),
unit_vec(0.80 + i as f32 * 0.001),
"public",
None,
vec![],
))
.unwrap();
}
auto_anchor("kn-t", &db, None).unwrap();
let got = anchors_of(&db, "kn-t");
assert!(
!got.contains(&"kn-stale".to_string()),
"stale anchor below threshold must be pruned (by-ID recompute)"
);
assert!(
got.contains(&"kn-keep".to_string()),
"in-band existing anchor must be preserved"
);
}
#[test]
#[serial]
fn near_duplicate_existing_anchor_is_pruned() {
clear_agent_env();
let db = SurrealDatabase::open_in_memory().unwrap();
let mut target = entry_with_embedding("kn-t", unit_query(), "public", None, vec![]);
target.anchors = vec!["kn-toodup".to_string()];
db.upsert_knowledge(&target).unwrap();
db.upsert_knowledge(&entry_with_embedding(
"kn-toodup",
unit_vec(0.98),
"public",
None,
vec![],
))
.unwrap();
auto_anchor("kn-t", &db, None).unwrap();
let got = anchors_of(&db, "kn-t");
assert!(
!got.contains(&"kn-toodup".to_string()),
"existing anchor above the near-duplicate ceiling must be pruned"
);
}
#[test]
#[serial]
fn self_is_never_anchored() {
clear_agent_env();
let db = SurrealDatabase::open_in_memory().unwrap();
let target = entry_with_embedding("kn-solo", unit_query(), "public", None, vec![]);
db.upsert_knowledge(&target).unwrap();
db.upsert_knowledge(&entry_with_embedding(
"kn-near",
unit_vec(0.85),
"public",
None,
vec![],
))
.unwrap();
auto_anchor("kn-solo", &db, None).unwrap();
let got = anchors_of(&db, "kn-solo");
assert!(
!got.contains(&"kn-solo".to_string()),
"an entry must never anchor to itself (self-similarity ~1.0 excluded)"
);
assert_eq!(got, vec!["kn-near".to_string()]);
}
#[test]
#[serial]
fn public_entry_does_not_anchor_to_private() {
clear_agent_env();
let db = SurrealDatabase::open_in_memory().unwrap();
let target = entry_with_embedding("kn-pub", unit_query(), "public", None, vec![]);
db.upsert_knowledge(&target).unwrap();
db.upsert_knowledge(&entry_with_embedding(
"kn-priv",
unit_vec(0.90),
"private",
Some("agent-x"),
vec![],
))
.unwrap();
db.upsert_knowledge(&entry_with_embedding(
"kn-pub2",
unit_vec(0.85),
"public",
None,
vec![],
))
.unwrap();
auto_anchor("kn-pub", &db, None).unwrap();
let got = anchors_of(&db, "kn-pub");
assert!(
!got.contains(&"kn-priv".to_string()),
"public entry must not anchor to a private entry"
);
assert_eq!(got, vec!["kn-pub2".to_string()]);
}
#[test]
#[serial]
fn explicitly_removed_anchor_not_readded() {
clear_agent_env();
let db = SurrealDatabase::open_in_memory().unwrap();
let target = entry_with_embedding("kn-t", unit_query(), "public", None, vec![]);
db.upsert_knowledge(&target).unwrap();
db.upsert_knowledge(&entry_with_embedding(
"kn-removed",
unit_vec(0.90),
"public",
None,
vec![],
))
.unwrap();
db.upsert_knowledge(&entry_with_embedding(
"kn-other",
unit_vec(0.85),
"public",
None,
vec![],
))
.unwrap();
let removed = vec!["kn-removed".to_string()];
auto_anchor("kn-t", &db, Some(&removed)).unwrap();
let got = anchors_of(&db, "kn-t");
assert!(
!got.contains(&"kn-removed".to_string()),
"auto_anchor must not re-add an explicitly removed anchor"
);
assert_eq!(got, vec!["kn-other".to_string()]);
}
#[test]
#[serial]
fn no_embedding_skips_anchoring() {
clear_agent_env();
let db = SurrealDatabase::open_in_memory().unwrap();
let mut target = entry_with_embedding("kn-noemb", unit_query(), "public", None, vec![]);
target.embedding = None;
db.upsert_knowledge(&target).unwrap();
db.upsert_knowledge(&entry_with_embedding(
"kn-x",
unit_vec(0.90),
"public",
None,
vec![],
))
.unwrap();
auto_anchor("kn-noemb", &db, None).unwrap();
let got = anchors_of(&db, "kn-noemb");
assert!(got.is_empty(), "no embedding -> no anchoring");
}
#[test]
#[serial]
fn escalates_when_saturated_by_near_duplicate_flood() {
clear_agent_env();
let db = SurrealDatabase::open_in_memory().unwrap();
let max_anchors = 5usize;
let k = max_anchors * ANCHOR_CANDIDATE_OVERFETCH;
let target = entry_with_embedding("kn-target", unit_query(), "public", None, vec![]);
let mut graph = vec![target.clone()];
let flood = k - 1; for i in 0..flood {
let cos = 0.999 - (i as f32) * 0.0005;
graph.push(entry_with_embedding(
&format!("kn-dup{i:02}"),
unit_vec(cos),
"public",
None,
vec![],
));
}
let in_band = [
("kn-real-a", 0.90f32),
("kn-real-b", 0.87),
("kn-real-c", 0.84),
("kn-real-d", 0.81),
("kn-real-e", 0.78),
];
for (id, cos) in in_band {
graph.push(entry_with_embedding(
id,
unit_vec(cos),
"public",
None,
vec![],
));
}
for e in &graph {
db.upsert_knowledge(e).unwrap();
}
let ctx = AgentContext::public_only();
let initial = db
.semantic_search_entries_scored(target.embedding.as_ref().unwrap(), &ctx, k)
.unwrap();
assert_eq!(initial.len(), k, "initial fetch must be K-saturated");
assert!(
initial.last().unwrap().1 >= DEFAULT_ANCHOR_THRESHOLD,
"lowest returned score must still be >= floor (saturation signal fires)"
);
auto_anchor("kn-target", &db, None).unwrap();
let got = anchors_of(&db, "kn-target");
let expected = reference_old_anchors(&target, &graph, max_anchors);
assert_eq!(
got, expected,
"escalation must select the same in-band anchors as the old full scan"
);
assert_eq!(
got,
vec![
"kn-real-a".to_string(),
"kn-real-b".to_string(),
"kn-real-c".to_string(),
"kn-real-d".to_string(),
"kn-real-e".to_string(),
],
"all five genuine in-band anchors recovered despite the near-duplicate flood"
);
}
#[test]
#[serial]
fn does_not_escalate_when_lowest_score_below_floor() {
clear_agent_env();
let db = SurrealDatabase::open_in_memory().unwrap();
let max_anchors = 5usize;
let k = max_anchors * ANCHOR_CANDIDATE_OVERFETCH;
let target = entry_with_embedding("kn-target", unit_query(), "public", None, vec![]);
let mut graph = vec![target.clone()];
let in_band = [("kn-a", 0.90f32), ("kn-b", 0.85), ("kn-c", 0.80)];
for (id, cos) in in_band {
graph.push(entry_with_embedding(
id,
unit_vec(cos),
"public",
None,
vec![],
));
}
for i in 0..(k + 10) {
let cos = 0.70 - (i as f32) * 0.001; graph.push(entry_with_embedding(
&format!("kn-lo{i:02}"),
unit_vec(cos),
"public",
None,
vec![],
));
}
for e in &graph {
db.upsert_knowledge(e).unwrap();
}
let ctx = AgentContext::public_only();
let initial = db
.semantic_search_entries_scored(target.embedding.as_ref().unwrap(), &ctx, k)
.unwrap();
assert_eq!(initial.len(), k, "fetch is K-saturated");
assert!(
initial.last().unwrap().1 < DEFAULT_ANCHOR_THRESHOLD,
"lowest returned score is below floor -> escalation must NOT fire"
);
auto_anchor("kn-target", &db, None).unwrap();
let got = anchors_of(&db, "kn-target");
assert_eq!(
got,
vec!["kn-a".to_string(), "kn-b".to_string(), "kn-c".to_string()],
"the three in-band anchors are selected from the initial top-K (no escalation needed)"
);
}
fn gate_with_env(value: Option<&str>, no_auto_anchor: bool) -> bool {
let prev = std::env::var("MX_SKIP_WRITE_ANCHOR").ok();
unsafe {
match value {
Some(v) => std::env::set_var("MX_SKIP_WRITE_ANCHOR", v),
None => std::env::remove_var("MX_SKIP_WRITE_ANCHOR"),
}
}
let enabled = write_anchor_enabled(no_auto_anchor);
unsafe {
match prev {
Some(v) => std::env::set_var("MX_SKIP_WRITE_ANCHOR", v),
None => std::env::remove_var("MX_SKIP_WRITE_ANCHOR"),
}
}
enabled
}
#[test]
#[serial]
fn write_anchor_enabled_unset_flag_runs_anchoring() {
assert!(
gate_with_env(None, false),
"unset MX_SKIP_WRITE_ANCHOR must leave anchoring ON (default behavior preserved)"
);
}
#[test]
#[serial]
fn write_anchor_enabled_flag_1_skips_anchoring() {
assert!(
!gate_with_env(Some("1"), false),
"MX_SKIP_WRITE_ANCHOR=1 must turn write-path anchoring OFF"
);
}
#[test]
#[serial]
fn write_anchor_enabled_flag_true_skips_anchoring() {
assert!(
!gate_with_env(Some("true"), false),
"MX_SKIP_WRITE_ANCHOR=true must turn write-path anchoring OFF"
);
assert!(
!gate_with_env(Some("TRUE"), false),
"MX_SKIP_WRITE_ANCHOR is case-insensitive for 'true'"
);
}
#[test]
#[serial]
fn write_anchor_enabled_other_values_run_anchoring() {
assert!(gate_with_env(Some("0"), false), "'0' must not opt out");
assert!(
gate_with_env(Some("false"), false),
"'false' must not opt out"
);
assert!(gate_with_env(Some(""), false), "empty must not opt out");
}
#[test]
#[serial]
fn write_anchor_enabled_cli_flag_always_skips() {
assert!(
!gate_with_env(None, true),
"--no-auto-anchor must skip anchoring even with the env flag unset"
);
assert!(
!gate_with_env(Some("0"), true),
"--no-auto-anchor must skip anchoring even when env flag would allow it"
);
}
#[test]
#[serial]
fn skipped_anchor_write_persists_across_reopen() {
clear_agent_env();
let prev = std::env::var("MX_SKIP_WRITE_ANCHOR").ok();
unsafe { std::env::set_var("MX_SKIP_WRITE_ANCHOR", "1") };
let tmp = tempfile::tempdir().unwrap();
let db_path = tmp.path().join("durability.surreal");
assert!(
!write_anchor_enabled(false),
"precondition: flag=1 must skip anchoring"
);
{
let db = SurrealDatabase::open_file_backed_for_test(&db_path).unwrap();
let entry =
entry_with_embedding("kn-skip-durable", unit_query(), "public", None, vec![]);
db.upsert_knowledge(&entry).unwrap();
}
let reopened = SurrealDatabase::open_file_backed_for_test(&db_path).unwrap();
let ctx = AgentContext::public_only();
let got = reopened.get("kn-skip-durable", &ctx).unwrap();
unsafe {
match prev {
Some(v) => std::env::set_var("MX_SKIP_WRITE_ANCHOR", v),
None => std::env::remove_var("MX_SKIP_WRITE_ANCHOR"),
}
}
assert!(
got.is_some(),
"a write with anchoring skipped must persist across a drop+reopen (no commit_entry needed)"
);
}
}