use khive_pack_kg::KgPack;
use khive_pack_knowledge::KnowledgePack;
use khive_runtime::{KhiveRuntime, RuntimeError, VerbRegistry, VerbRegistryBuilder};
use khive_storage::{SqlStatement, SqlValue};
use serde_json::{json, Value};
fn rt() -> KhiveRuntime {
KhiveRuntime::memory().expect("memory runtime")
}
struct Fixture {
registry: VerbRegistry,
rt: KhiveRuntime,
}
impl Fixture {
async fn dispatch(&self, verb: &str, args: Value) -> Result<Value, RuntimeError> {
self.registry.dispatch(verb, args).await
}
async fn dispatch_ns(
&self,
verb: &str,
ns: &str,
mut args: Value,
) -> Result<Value, RuntimeError> {
args["namespace"] = json!(ns);
self.registry.dispatch(verb, args).await
}
async fn sql_exec(&self, sql: &str, params: Vec<SqlValue>) {
let access = self.rt.sql();
let mut w = access.writer().await.expect("writer");
w.execute(SqlStatement {
sql: sql.into(),
params,
label: None,
})
.await
.expect("sql_exec");
}
async fn sql_query_one(
&self,
sql: &str,
params: Vec<SqlValue>,
) -> Option<khive_storage::types::SqlRow> {
let access = self.rt.sql();
let mut r = access.reader().await.expect("reader");
r.query_row(SqlStatement {
sql: sql.into(),
params,
label: None,
})
.await
.expect("sql_query_one")
}
}
fn pack(rt: KhiveRuntime) -> Fixture {
let rt_clone = rt.clone();
let mut builder = VerbRegistryBuilder::new();
builder.register(KgPack::new(rt.clone()));
builder.register(KnowledgePack::new(rt.clone()));
let registry = builder.build().expect("registry builds");
rt.install_edge_rules(registry.all_edge_rules());
Fixture {
registry,
rt: rt_clone,
}
}
fn row_text(row: &khive_storage::types::SqlRow, col: &str) -> Option<String> {
match row.get(col) {
Some(SqlValue::Text(s)) => Some(s.clone()),
_ => None,
}
}
fn row_i64(row: &khive_storage::types::SqlRow, col: &str) -> Option<i64> {
match row.get(col) {
Some(SqlValue::Integer(n)) => Some(*n),
_ => None,
}
}
#[tokio::test]
async fn w5_search_excludes_deprecated_by_default() {
let f = pack(rt());
f.dispatch(
"knowledge.upsert_atoms",
json!({
"atoms": [{
"slug": "dep-atom",
"name": "Deprecated Atom",
"content": "retrieval unique xyzqwerty deprecated content dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity"
}]
}),
)
.await
.expect("upsert");
f.sql_exec(
"UPDATE knowledge_atoms SET status='deprecated' WHERE slug=?1",
vec![SqlValue::Text("dep-atom".into())],
)
.await;
let resp = f
.dispatch(
"knowledge.search",
json!({ "query": "retrieval unique xyzqwerty", "rerank": false }),
)
.await
.expect("search ok");
let results = resp["results"].as_array().expect("results");
let names: Vec<&str> = results.iter().filter_map(|r| r["name"].as_str()).collect();
assert!(
!names.contains(&"Deprecated Atom"),
"deprecated atom must not appear in default search: {names:?}"
);
}
#[tokio::test]
async fn w5_search_includes_deprecated_when_explicitly_requested() {
let f = pack(rt());
f.dispatch(
"knowledge.upsert_atoms",
json!({
"atoms": [{
"slug": "dep-atom",
"name": "Deprecated Atom",
"content": "retrieval unique qwertyzyx deprecated content dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity"
}]
}),
)
.await
.expect("upsert");
f.sql_exec(
"UPDATE knowledge_atoms SET status='deprecated' WHERE slug=?1",
vec![SqlValue::Text("dep-atom".into())],
)
.await;
let resp = f
.dispatch(
"knowledge.search",
json!({ "query": "retrieval unique qwertyzyx", "status": "deprecated", "rerank": false }),
)
.await
.expect("search ok");
let results = resp["results"].as_array().expect("results");
let names: Vec<&str> = results.iter().filter_map(|r| r["name"].as_str()).collect();
assert!(
names.contains(&"Deprecated Atom"),
"deprecated atom must appear when status='deprecated' requested: {names:?}"
);
}
#[tokio::test]
async fn w5_status_multiplier_reviewed_beats_draft() {
let f = pack(rt());
f.dispatch(
"knowledge.upsert_atoms",
json!({
"atoms": [
{
"slug": "reviewed-atom",
"name": "Reviewed Atom",
"content": "neural network gradient descent unique zzzxxx learning dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity"
},
{
"slug": "draft-atom",
"name": "Draft Atom",
"content": "neural network gradient unique zzzxxx learning dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity"
},
]
}),
)
.await
.expect("upsert");
f.sql_exec(
"UPDATE knowledge_atoms SET status='reviewed' WHERE slug=?1",
vec![SqlValue::Text("reviewed-atom".into())],
)
.await;
f.sql_exec(
"UPDATE knowledge_atoms SET status='draft' WHERE slug=?1",
vec![SqlValue::Text("draft-atom".into())],
)
.await;
let resp = f
.dispatch(
"knowledge.search",
json!({ "query": "neural network gradient learning zzzxxx", "rerank": false, "include_drafts": true }),
)
.await
.expect("search ok");
let results = resp["results"].as_array().expect("results");
let reviewed_score = results
.iter()
.find(|r| r["name"].as_str() == Some("Reviewed Atom"))
.and_then(|r| r["score"].as_f64());
let draft_score = results
.iter()
.find(|r| r["name"].as_str() == Some("Draft Atom"))
.and_then(|r| r["score"].as_f64());
match (reviewed_score, draft_score) {
(Some(r), Some(d)) => assert!(
r > d,
"reviewed score {r:.4} must exceed draft score {d:.4} (1.0× vs 0.8× multiplier)"
),
(Some(_), None) => {} (None, _) => panic!("reviewed atom missing from results: {results:?}"),
}
}
#[tokio::test]
async fn w5_status_multiplier_unknown_status_is_neutral() {
let f = pack(rt());
f.dispatch(
"knowledge.upsert_atoms",
json!({
"atoms": [{
"slug": "unknown-status-atom",
"name": "Unknown Status Atom",
"content": "unknown status neutral multiplier dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity unique unk78x"
}]
}),
)
.await
.expect("upsert");
f.sql_exec(
"UPDATE knowledge_atoms SET status='custom' WHERE slug=?1",
vec![SqlValue::Text("unknown-status-atom".into())],
)
.await;
let resp = f
.dispatch(
"knowledge.search",
json!({ "query": "unknown status neutral unique unk78x", "rerank": false, "include_drafts": true }),
)
.await
.expect("search ok");
let results = resp["results"].as_array().expect("results");
assert!(
!results.is_empty(),
"atom with unknown status must still appear in results: {results:?}"
);
let score = results[0]["score"].as_f64().expect("score");
assert!(
(0.0..=1.0).contains(&score),
"score {score} for unknown-status atom must be in [0,1]"
);
}
#[tokio::test]
async fn w5_list_excludes_deprecated_by_default() {
let f = pack(rt());
f.dispatch(
"knowledge.upsert_atoms",
json!({
"atoms": [
{ "slug": "vis-atom", "name": "Visible Atom", "content": "dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity" },
{ "slug": "dep-atom", "name": "Hidden Deprecated Atom", "content": "dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity" },
]
}),
)
.await
.expect("upsert");
f.sql_exec(
"UPDATE knowledge_atoms SET status='deprecated' WHERE slug=?1",
vec![SqlValue::Text("dep-atom".into())],
)
.await;
let resp = f
.dispatch("knowledge.list", json!({ "type": "atom" }))
.await
.expect("list ok");
let results = resp["results"].as_array().expect("results");
let names: Vec<&str> = results.iter().filter_map(|r| r["name"].as_str()).collect();
assert!(
names.contains(&"Visible Atom"),
"visible atom should appear in list: {names:?}"
);
assert!(
!names.contains(&"Hidden Deprecated Atom"),
"deprecated atom must not appear in default list: {names:?}"
);
}
#[tokio::test]
async fn w1_atom_with_type_domain_tag_returns_kind_domain_in_search() {
let f = pack(rt());
f.dispatch(
"knowledge.upsert_atoms",
json!({
"atoms": [{
"slug": "retrieval-domain",
"name": "Retrieval Domain",
"tags": ["type:domain", "retrieval"],
"content": "retrieval domain techniques xyzabc organization dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity",
"finalized": true
}]
}),
)
.await
.expect("upsert");
let resp = f
.dispatch(
"knowledge.search",
json!({ "query": "retrieval domain techniques xyzabc", "rerank": false }),
)
.await
.expect("search ok");
let results = resp["results"].as_array().expect("results");
let hit = results
.iter()
.find(|r| r["name"].as_str() == Some("Retrieval Domain"))
.expect("Retrieval Domain should appear in results");
assert_eq!(
hit["kind"].as_str().unwrap_or(""),
"domain",
"atom with type:domain tag must have kind=domain in search results"
);
}
#[tokio::test]
async fn d1_upserted_domain_returns_kind_domain_in_domain_search() {
let f = pack(rt());
f.dispatch(
"knowledge.upsert_domains",
json!({
"domains": [{
"slug": "ml-techniques",
"name": "ML Techniques",
"description": "machine learning techniques domain organization — covering concepts techniques algorithms implementations applications use cases and design patterns in detail —"
}]
}),
)
.await
.expect("upsert domain");
let resp = f
.dispatch(
"knowledge.search",
json!({ "query": "machine learning techniques domain", "type": "domain", "rerank": false }),
)
.await
.expect("search ok");
let results = resp["results"].as_array().expect("results");
assert!(
!results.is_empty(),
"domain search should return the upserted domain"
);
for r in results {
assert_eq!(
r["kind"].as_str().unwrap_or(""),
"domain",
"all results in type=domain search must have kind=domain: {r}"
);
}
}
#[tokio::test]
async fn w8_reimport_identical_section_content_is_idempotent() {
let f = pack(rt());
f.dispatch(
"knowledge.upsert_atoms",
json!({ "atoms": [{ "slug": "edit-atom", "name": "Edit Atom", "content": "original dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity" }] }),
)
.await
.expect("upsert");
let content = "Overview content long enough to satisfy the 80-character minimum section length requirement. dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index";
f.dispatch(
"knowledge.edit",
json!({ "id": "edit-atom", "sections": [{ "section_type": "overview", "content": content }] }),
)
.await
.expect("edit ok");
f.sql_exec(
"UPDATE knowledge_sections SET status='verified' WHERE section_type='overview'",
vec![],
)
.await;
f.dispatch(
"knowledge.edit",
json!({ "id": "edit-atom", "sections": [{ "section_type": "overview", "content": content }] }),
)
.await
.expect("edit ok");
let count = f
.sql_query_one(
"SELECT COUNT(*) AS n FROM knowledge_sections WHERE section_type='overview'",
vec![],
)
.await
.expect("count row");
assert_eq!(
row_i64(&count, "n"),
Some(1),
"identical content must not create a sibling row"
);
let status = f
.sql_query_one(
"SELECT status FROM knowledge_sections WHERE section_type='overview'",
vec![],
)
.await
.expect("status row");
assert_eq!(
row_text(&status, "status").as_deref(),
Some("verified"),
"re-importing identical content must not downgrade verification"
);
}
#[tokio::test]
async fn w8_edit_distinct_content_same_type_creates_sibling() {
let f = pack(rt());
f.dispatch(
"knowledge.upsert_atoms",
json!({ "atoms": [{ "slug": "edit-atom2", "name": "Edit Atom 2", "content": "original dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity" }] }),
)
.await
.expect("upsert");
let first = "First overview block long enough to satisfy the 80-character minimum section length requirement. dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector";
let second = "Second overview block, distinct content, also long enough to satisfy the 80-character minimum. examples formalism boundary conditions operational guidance failure modes expert lens references other";
f.dispatch(
"knowledge.edit",
json!({ "id": "edit-atom2", "sections": [{ "section_type": "overview", "content": first }] }),
)
.await
.expect("edit ok");
f.sql_exec(
"UPDATE knowledge_sections SET status='verified' WHERE section_type='overview'",
vec![],
)
.await;
f.dispatch(
"knowledge.edit",
json!({ "id": "edit-atom2", "sections": [{ "section_type": "overview", "content": second }] }),
)
.await
.expect("edit ok");
let total = f
.sql_query_one(
"SELECT COUNT(*) AS n FROM knowledge_sections WHERE section_type='overview'",
vec![],
)
.await
.expect("count row");
assert_eq!(
row_i64(&total, "n"),
Some(2),
"distinct same-type content must coexist as sibling rows"
);
let verified = f
.sql_query_one(
"SELECT COUNT(*) AS n FROM knowledge_sections WHERE section_type='overview' AND status='verified'",
vec![],
)
.await
.expect("verified count row");
assert_eq!(
row_i64(&verified, "n"),
Some(1),
"inserting a sibling must not disturb an existing verified section"
);
}
#[tokio::test]
async fn w9_challenge_increments_dispute_count() {
let f = pack(rt());
f.dispatch(
"knowledge.upsert_atoms",
json!({ "atoms": [{ "slug": "challenge-atom", "name": "Challengeable Atom", "content": "dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity" }] }),
)
.await
.expect("upsert");
f.dispatch(
"knowledge.edit",
json!({
"id": "challenge-atom",
"sections": [{ "section_type": "overview", "content": "Section content for challenge test — this text is sufficiently long to satisfy the 80-character minimum. dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity" }]
}),
)
.await
.expect("edit ok");
f.dispatch(
"knowledge.challenge",
json!({ "atom_id": "challenge-atom", "section_type": "overview", "reason": "disputed claim" }),
)
.await
.expect("challenge ok");
let atom = f
.dispatch("knowledge.get", json!({ "id": "challenge-atom" }))
.await
.expect("get ok");
let dispute_count = atom["properties"]["dispute_count"]
.as_i64()
.expect("dispute_count should be integer");
assert_eq!(
dispute_count, 1,
"challenge must increment dispute_count to 1"
);
}
#[tokio::test]
async fn w9_challenge_on_atom_with_no_prior_dispute_count_starts_at_one() {
let f = pack(rt());
f.dispatch(
"knowledge.upsert_atoms",
json!({ "atoms": [{ "slug": "fresh-atom", "name": "Fresh Atom", "content": "dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity" }] }),
)
.await
.expect("upsert");
f.dispatch(
"knowledge.edit",
json!({
"id": "fresh-atom",
"sections": [{ "section_type": "formalism", "content": "Formalism content for fresh-atom challenge test — this text satisfies the 80-character minimum length requirement. dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity" }]
}),
)
.await
.expect("edit");
f.dispatch(
"knowledge.challenge",
json!({ "atom_id": "fresh-atom", "section_type": "formalism" }),
)
.await
.expect("challenge ok");
let atom = f
.dispatch("knowledge.get", json!({ "id": "fresh-atom" }))
.await
.expect("get ok");
let count = atom["properties"]["dispute_count"]
.as_i64()
.expect("dispute_count");
assert_eq!(
count, 1,
"first challenge on atom with no prior dispute_count must start at 1"
);
}
#[tokio::test]
async fn w9_adjudicate_decrements_dispute_count() {
let f = pack(rt());
f.dispatch(
"knowledge.upsert_atoms",
json!({ "atoms": [{ "slug": "adjud-atom", "name": "Adjudicate Atom", "content": "dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity" }] }),
)
.await
.expect("upsert");
f.dispatch(
"knowledge.edit",
json!({
"id": "adjud-atom",
"sections": [{ "section_type": "core_model", "content": "Core model content for adjudication test — this text satisfies the 80-character minimum length requirement. dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity" }]
}),
)
.await
.expect("edit");
f.dispatch(
"knowledge.challenge",
json!({ "atom_id": "adjud-atom", "section_type": "core_model" }),
)
.await
.expect("challenge");
let before = f
.dispatch("knowledge.get", json!({ "id": "adjud-atom" }))
.await
.expect("get");
assert_eq!(before["properties"]["dispute_count"].as_i64(), Some(1));
f.dispatch(
"knowledge.adjudicate",
json!({ "atom_id": "adjud-atom", "section_type": "core_model", "resolution": "accept" }),
)
.await
.expect("adjudicate ok");
let after = f
.dispatch("knowledge.get", json!({ "id": "adjud-atom" }))
.await
.expect("get");
let after_count = after["properties"]["dispute_count"].as_i64().unwrap_or(0);
assert_eq!(
after_count, 0,
"adjudicate must decrement dispute_count from 1 to 0"
);
}
#[tokio::test]
async fn w9_double_challenge_is_rejected() {
let f = pack(rt());
f.dispatch(
"knowledge.upsert_atoms",
json!({ "atoms": [{ "slug": "dbl-chal", "name": "Double Challenge", "content": "dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity" }] }),
)
.await
.expect("upsert");
f.dispatch(
"knowledge.edit",
json!({ "id": "dbl-chal", "sections": [{ "section_type": "overview", "content": "Some content for double-challenge test — this text is sufficiently long to satisfy the 80-character minimum. dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity" }] }),
)
.await
.expect("edit");
f.dispatch(
"knowledge.challenge",
json!({ "atom_id": "dbl-chal", "section_type": "overview" }),
)
.await
.expect("first challenge ok");
let err = f
.dispatch(
"knowledge.challenge",
json!({ "atom_id": "dbl-chal", "section_type": "overview" }),
)
.await;
assert!(err.is_err(), "double challenge must fail");
}
#[tokio::test]
async fn w9_challenge_missing_section_is_rejected() {
let f = pack(rt());
f.dispatch(
"knowledge.upsert_atoms",
json!({ "atoms": [{ "slug": "no-sec", "name": "No Section", "content": "dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity" }] }),
)
.await
.expect("upsert");
let err = f
.dispatch(
"knowledge.challenge",
json!({ "atom_id": "no-sec", "section_type": "overview" }),
)
.await;
assert!(err.is_err(), "challenge on nonexistent section must fail");
}
#[tokio::test]
async fn w9_adjudicate_non_disputed_section_is_rejected() {
let f = pack(rt());
f.dispatch(
"knowledge.upsert_atoms",
json!({ "atoms": [{ "slug": "adj-nodis", "name": "Not Disputed", "content": "dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity" }] }),
)
.await
.expect("upsert");
f.dispatch(
"knowledge.edit",
json!({ "id": "adj-nodis", "sections": [{ "section_type": "overview", "content": "Content for adjudicate-non-disputed test — this text is long enough to satisfy the 80-character minimum requirement. dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity" }] }),
)
.await
.expect("edit");
let err = f
.dispatch(
"knowledge.adjudicate",
json!({ "atom_id": "adj-nodis", "section_type": "overview", "resolution": "accept" }),
)
.await;
assert!(err.is_err(), "adjudicate on non-disputed section must fail");
}
#[tokio::test]
async fn w9_challenge_disambiguates_same_type_siblings() {
let f = pack(rt());
f.dispatch(
"knowledge.upsert_atoms",
json!({ "atoms": [{ "slug": "sib-atom", "name": "Sibling Atom", "content": "dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity" }] }),
)
.await
.expect("upsert");
let edit = f
.dispatch(
"knowledge.edit",
json!({ "id": "sib-atom", "sections": [
{ "section_type": "overview", "content": "First overview variant — long enough to clear the 80-character minimum. dense sparse retrieval corpus benchmark search latency gradient transformer attention vector index" },
{ "section_type": "overview", "content": "Second overview variant — also long enough to clear the 80-character minimum. ranking fusion pipeline embedding rerank cosine similarity nearest neighbor corpus benchmark" }
] }),
)
.await
.expect("edit two siblings");
let sections = edit["sections"].as_array().expect("sections array");
assert_eq!(sections.len(), 2, "two distinct overviews must be siblings");
let hash0 = sections[0]["content_hash"]
.as_str()
.expect("content_hash")
.to_string();
let ambiguous = f
.dispatch(
"knowledge.challenge",
json!({ "atom_id": "sib-atom", "section_type": "overview" }),
)
.await;
assert!(
ambiguous.is_err(),
"ambiguous same-type challenge without content_hash must be rejected"
);
let res = f
.dispatch(
"knowledge.challenge",
json!({ "atom_id": "sib-atom", "section_type": "overview", "content_hash": hash0 }),
)
.await
.expect("targeted challenge ok");
assert_eq!(
res["disputed"].as_i64(),
Some(1),
"exactly one section disputed"
);
let atom = f
.dispatch("knowledge.get", json!({ "id": "sib-atom" }))
.await
.expect("get");
assert_eq!(
atom["properties"]["dispute_count"].as_i64(),
Some(1),
"dispute_count increments once, not once per sibling"
);
let res2 = f
.dispatch(
"knowledge.challenge",
json!({ "atom_id": "sib-atom", "section_type": "overview" }),
)
.await
.expect("second sibling is independently challengeable");
assert_eq!(res2["disputed"].as_i64(), Some(1));
let atom2 = f
.dispatch("knowledge.get", json!({ "id": "sib-atom" }))
.await
.expect("get2");
assert_eq!(
atom2["properties"]["dispute_count"].as_i64(),
Some(2),
"each sibling disputes independently"
);
}
#[tokio::test]
async fn w10_import_with_atlas_id_sets_source_uri() {
let f = pack(rt());
let dir = std::env::temp_dir().join("khive_fixes_test_w10a");
std::fs::create_dir_all(&dir).ok();
let md_path = dir.join("atlas-doc.md");
std::fs::write(
&md_path,
"atlas_id: ATLAS-001\n\n# Atlas Doc\n\nContent about retrieval covering dense sparse vector search ranking fusion embedding reranking latency gradient transformer attention nearest neighbor index corpus benchmark pipeline cosine.\n",
)
.expect("write md");
let resp = f
.dispatch(
"knowledge.import",
json!({ "path": md_path.to_str().unwrap() }),
)
.await
.expect("import ok");
assert!(
resp["imported_atoms"].as_i64().unwrap_or(0) > 0,
"expected at least 1 imported atom"
);
let atom = f
.dispatch("knowledge.get", json!({ "id": "atlas-doc" }))
.await
.expect("get");
let source_uri = atom["source_uri"].as_str().unwrap_or("");
assert_eq!(
source_uri, "atlas:ATLAS-001",
"import with atlas_id must set source_uri to 'atlas:{{id}}'"
);
}
#[tokio::test]
async fn w10_import_with_references_section_sets_source_type_paper() {
let f = pack(rt());
let dir = std::env::temp_dir().join("khive_fixes_test_w10b");
std::fs::create_dir_all(&dir).ok();
let md_path = dir.join("paper-doc.md");
std::fs::write(
&md_path,
"# Paper Doc\n\nContent about machine learning covering dense sparse vector search ranking fusion embedding reranking latency gradient transformer attention nearest neighbor index corpus benchmark pipeline cosine.\n\n## References\n\n1. Smith et al. 2023\n2. Jones et al. 2022\n",
)
.expect("write md");
let resp = f
.dispatch(
"knowledge.import",
json!({ "path": md_path.to_str().unwrap() }),
)
.await
.expect("import ok");
assert!(
resp["imported_atoms"].as_i64().unwrap_or(0) > 0,
"expected at least 1 imported"
);
let atom = f
.dispatch("knowledge.get", json!({ "id": "paper-doc" }))
.await
.expect("get");
let source_type = atom["source_type"].as_str().unwrap_or("");
assert_eq!(
source_type, "paper",
"import with references section (citation_count>0) must set source_type='paper'"
);
}
#[tokio::test]
async fn w10_import_without_references_sets_source_type_imported() {
let f = pack(rt());
let dir = std::env::temp_dir().join("khive_fixes_test_w10c");
std::fs::create_dir_all(&dir).ok();
let md_path = dir.join("plain-doc.md");
std::fs::write(
&md_path,
"# Plain Doc\n\nContent without any references section covering dense sparse vector search ranking fusion embedding reranking latency gradient transformer attention nearest neighbor index corpus benchmark pipeline cosine.\n",
)
.expect("write md");
let resp = f
.dispatch(
"knowledge.import",
json!({ "path": md_path.to_str().unwrap() }),
)
.await
.expect("import ok");
assert!(
resp["imported_atoms"].as_i64().unwrap_or(0) > 0,
"expected at least 1 imported"
);
let atom = f
.dispatch("knowledge.get", json!({ "id": "plain-doc" }))
.await
.expect("get");
let source_type = atom["source_type"].as_str().unwrap_or("");
assert_eq!(
source_type, "imported",
"import without references must set source_type='imported'"
);
}
#[tokio::test]
async fn w10_import_section_only_markdown_synthesizes_atom_content() {
let f = pack(rt());
let dir = std::env::temp_dir().join("khive_fixes_test_w10_section_only");
std::fs::create_dir_all(&dir).ok();
let md_path = dir.join("section-only.md");
std::fs::write(
&md_path,
"# Section Only\n\n## Overview\n\nThis overview section is long enough to satisfy the eighty character minimum section length requirement, covering dense sparse retrieval corpus benchmark search latency.\n\n## Formalism\n\nThe formalism section also exceeds eighty characters with gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity.\n",
)
.expect("write md");
let resp = f
.dispatch(
"knowledge.import",
json!({ "path": md_path.to_str().unwrap(), "chunk_strategy": "section" }),
)
.await
.expect("section-only import should succeed");
assert_eq!(
resp["imported_atoms"].as_i64().unwrap_or(0),
1,
"atom must be imported even though the pre-section body is empty"
);
assert!(
resp["imported_sections"].as_i64().unwrap_or(0) >= 2,
"section bodies must be imported"
);
let atom = f
.dispatch("knowledge.get", json!({ "id": "section-only" }))
.await
.expect("get");
let content = atom["content"].as_str().unwrap_or("");
assert!(
content.split_whitespace().count() >= 20,
"atom content should be synthesized from sections: {content:?}"
);
}
#[tokio::test]
async fn s4_upsert_atoms_update_does_not_affect_other_namespace() {
let f = pack(rt());
f.dispatch_ns(
"knowledge.upsert_atoms",
"ns-alpha",
json!({ "atoms": [{ "slug": "shared-slug", "name": "Alpha Name", "content": "dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity" }] }),
)
.await
.expect("upsert alpha");
f.dispatch_ns(
"knowledge.upsert_atoms",
"ns-beta",
json!({ "atoms": [{ "slug": "shared-slug", "name": "Beta Name", "content": "dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity" }] }),
)
.await
.expect("upsert beta");
f.dispatch_ns(
"knowledge.upsert_atoms",
"ns-alpha",
json!({ "atoms": [{ "slug": "shared-slug", "name": "Alpha Name Updated", "content": "dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity" }] }),
)
.await
.expect("update alpha");
let beta = f
.dispatch_ns("knowledge.get", "ns-beta", json!({ "id": "shared-slug" }))
.await
.expect("get beta");
assert_eq!(
beta["name"].as_str().unwrap_or(""),
"Beta Name",
"update in ns-alpha must not affect ns-beta atom"
);
let alpha = f
.dispatch_ns("knowledge.get", "ns-alpha", json!({ "id": "shared-slug" }))
.await
.expect("get alpha");
assert_eq!(
alpha["name"].as_str().unwrap_or(""),
"Alpha Name Updated",
"ns-alpha atom must reflect the update"
);
}
#[tokio::test]
async fn s4_upsert_domains_update_does_not_affect_other_namespace() {
let f = pack(rt());
f.dispatch_ns(
"knowledge.upsert_domains",
"ns-alpha",
json!({ "domains": [{ "slug": "shared-domain", "name": "Alpha Domain", "description": "dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity" }] }),
)
.await
.expect("upsert alpha domain");
f.dispatch_ns(
"knowledge.upsert_domains",
"ns-beta",
json!({ "domains": [{ "slug": "shared-domain", "name": "Beta Domain", "description": "dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity" }] }),
)
.await
.expect("upsert beta domain");
f.dispatch_ns(
"knowledge.upsert_domains",
"ns-alpha",
json!({ "domains": [{ "slug": "shared-domain", "name": "Alpha Domain Updated", "description": "dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity" }] }),
)
.await
.expect("update alpha domain");
let beta = f
.dispatch_ns("knowledge.get", "ns-beta", json!({ "id": "shared-domain" }))
.await
.expect("get beta domain");
assert_eq!(
beta["name"].as_str().unwrap_or(""),
"Beta Domain",
"update in ns-alpha must not affect ns-beta domain"
);
}
#[tokio::test]
async fn w6_fold_accepts_diversity_bias_and_epistemic_weight() {
let f = pack(rt());
let resp = f
.dispatch(
"knowledge.fold",
json!({
"candidates": [
{ "id": "c1", "score": 0.9, "size": 100, "information_gain": 0.8 },
{ "id": "c2", "score": 0.7, "size": 150, "information_gain": 0.6 },
{ "id": "c3", "score": 0.5, "size": 80, "information_gain": 0.4 },
],
"budget": 350,
"diversity_bias": 0.5,
"epistemic_weight": 0.3
}),
)
.await
.expect("fold with diversity_bias and epistemic_weight must succeed");
let selected = resp["selected"].as_array().expect("selected array");
let total_size = resp["total_size"].as_u64().expect("total_size");
assert!(
!selected.is_empty(),
"at least one candidate must be selected"
);
assert!(
total_size <= 350,
"total_size {total_size} must not exceed budget 350"
);
}
#[tokio::test]
async fn w6_fold_information_gain_threads_to_selector() {
let f = pack(rt());
let resp = f
.dispatch(
"knowledge.fold",
json!({
"candidates": [
{ "id": "high-ig", "score": 0.5, "size": 100, "information_gain": 0.9 },
{ "id": "low-ig", "score": 0.5, "size": 100, "information_gain": 0.0 },
],
"budget": 10000,
"epistemic_weight": 1.0
}),
)
.await
.expect("fold ok");
let selected = resp["selected"].as_array().expect("selected");
assert!(
!selected.is_empty(),
"fold must select at least one candidate: {resp:?}"
);
}
#[tokio::test]
async fn f1_fuse_ann_hits_produces_valid_scores_via_search() {
let f = pack(rt());
f.dispatch(
"knowledge.upsert_atoms",
json!({
"atoms": [
{ "slug": "rrf-a", "name": "RRF Alpha", "content": "rrf fusion scoring alpha dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity", "finalized": true },
{ "slug": "rrf-b", "name": "RRF Beta", "content": "rrf fusion scoring beta dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity", "finalized": true },
{ "slug": "rrf-c", "name": "RRF Gamma", "content": "rrf fusion scoring gamma dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity", "finalized": true },
]
}),
)
.await
.expect("seed corpus");
let resp = f
.dispatch(
"knowledge.search",
json!({ "query": "reciprocal rank fusion scoring", "rerank": false }),
)
.await
.expect("search ok");
let results = resp["results"].as_array().expect("results");
assert!(!results.is_empty(), "fusion pipeline must produce results");
for r in results {
let score = r["score"]
.as_f64()
.expect("each result must have a numeric score");
assert!(
score > 0.0,
"fused score must be positive, got {score} for {r:?}"
);
assert!(
score.is_finite(),
"fused score must be finite, got {score} for {r:?}"
);
assert!(
score <= 1.0,
"fused score must be normalized to [0,1], got {score} for {r:?}"
);
}
}
#[tokio::test]
async fn f1_rrf_k_60_constant_produces_finite_scores() {
let f = pack(rt());
f.dispatch(
"knowledge.upsert_atoms",
json!({
"atoms": [{
"slug": "rrf-single",
"name": "Single Result",
"content": "unique sentinel zzzyyyxxx exact match content dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity"
}]
}),
)
.await
.expect("upsert");
f.sql_exec(
"UPDATE knowledge_atoms SET status='reviewed' WHERE slug=?1",
vec![SqlValue::Text("rrf-single".into())],
)
.await;
let resp = f
.dispatch(
"knowledge.search",
json!({ "query": "unique sentinel zzzyyyxxx", "rerank": false }),
)
.await
.expect("search ok");
let results = resp["results"].as_array().expect("results");
assert!(
!results.is_empty(),
"single-result search must return the atom"
);
let score = results[0]["score"].as_f64().expect("score");
assert!(
score > 0.0 && score.is_finite(),
"RRF_K=60 score must be positive and finite: {score}"
);
assert!(
score <= 1.0,
"RRF_K=60 score must be normalized to [0,1]: {score}"
);
}
#[tokio::test]
async fn upsert_finalizing_existing_atom_promotes_draft_to_reviewed() {
let f = pack(rt());
f.dispatch(
"knowledge.upsert_atoms",
json!({ "atoms": [{ "slug": "lifecycle-atom", "name": "Lifecycle", "content": "body dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity" }] }),
)
.await
.expect("insert draft");
let row = f
.sql_query_one(
"SELECT status FROM knowledge_atoms WHERE slug=?1",
vec![SqlValue::Text("lifecycle-atom".into())],
)
.await
.expect("atom row");
assert_eq!(
row_text(&row, "status").as_deref(),
Some("draft"),
"fresh non-finalized atom is draft"
);
f.dispatch(
"knowledge.upsert_atoms",
json!({ "atoms": [{ "slug": "lifecycle-atom", "name": "Lifecycle", "content": "body dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity", "finalized": true }] }),
)
.await
.expect("finalize upsert");
let row = f
.sql_query_one(
"SELECT status FROM knowledge_atoms WHERE slug=?1",
vec![SqlValue::Text("lifecycle-atom".into())],
)
.await
.expect("atom row");
assert_eq!(
row_text(&row, "status").as_deref(),
Some("reviewed"),
"finalizing via upsert must promote draft -> reviewed"
);
}
#[tokio::test]
async fn upsert_finalizing_does_not_demote_non_draft_status() {
let f = pack(rt());
f.dispatch(
"knowledge.upsert_atoms",
json!({ "atoms": [{ "slug": "non-draft-atom", "name": "V", "content": "b dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity", "finalized": true }] }),
)
.await
.expect("insert");
f.sql_exec(
"UPDATE knowledge_atoms SET status='deprecated' WHERE slug=?1",
vec![SqlValue::Text("non-draft-atom".into())],
)
.await;
f.dispatch(
"knowledge.upsert_atoms",
json!({ "atoms": [{ "slug": "non-draft-atom", "name": "V2", "content": "b2 dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity", "finalized": true }] }),
)
.await
.expect("re-upsert");
let row = f
.sql_query_one(
"SELECT status FROM knowledge_atoms WHERE slug=?1",
vec![SqlValue::Text("non-draft-atom".into())],
)
.await
.expect("row");
assert_eq!(
row_text(&row, "status").as_deref(),
Some("deprecated"),
"re-finalizing must not overwrite a non-draft status (deprecated in this case)"
);
}
#[tokio::test]
async fn fts_query_special_characters_do_not_crash() {
let f = pack(rt());
f.dispatch(
"knowledge.upsert_atoms",
json!({
"atoms": [{
"slug": "tenant-isolation",
"name": "Tenant Isolation",
"content": "multi-tenant isolation and Bob's data separation dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity"
}]
}),
)
.await
.expect("seed atom");
for query in ["multi-tenant isolation", "Bob's tenant"] {
let _resp = f
.dispatch(
"knowledge.search",
json!({ "query": query, "rerank": false }),
)
.await
.expect("search should not crash on FTS5 special characters");
}
}
#[tokio::test]
async fn fts_operator_matrix_does_not_crash() {
let f = pack(rt());
f.dispatch(
"knowledge.upsert_atoms",
json!({
"atoms": [{
"slug": "fts-matrix-anchor",
"name": "FTS Matrix Anchor",
"content": "tenant isolation operator regression matrix anchor dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity"
}]
}),
)
.await
.expect("seed atom");
let cases: &[(&str, &str)] = &[
("double-quoted phrase", "\"tenant isolation\""),
("double-quoted embedded", "Bob \"quoted\" tenant"),
("boolean AND", "tenant AND isolation"),
("boolean OR", "tenant OR isolation"),
("boolean NOT", "tenant NOT isolation"),
("NEAR operator", "tenant NEAR(isolation, 5)"),
("wildcard word", "tenant*"),
("wildcard only", "***"),
("colon selector", "tenant:isolation"),
("caret", "tenant ^ isolation"),
("parentheses", "(tenant isolation)"),
("mixed special", "(\"+_~!\")"),
("mixed colon star caret", "tenant:foo^bar*"),
("hyphenated", "multi-tenant isolation"),
("apostrophe", "Bob's tenant"),
];
for (label, query) in cases {
let resp = f
.dispatch(
"knowledge.search",
json!({ "query": query, "rerank": false }),
)
.await
.unwrap_or_else(|err| {
panic!("#570 query {label} {query:?} must not crash FTS5: {err}")
});
assert!(
resp["results"].is_array(),
"#570 query {label} {query:?} must return results array, got: {resp:?}"
);
}
}
fn rt_with_default_embedder() -> KhiveRuntime {
use khive_runtime::{AllowAllGate, BackendId, RuntimeConfig};
use khive_types::Namespace;
use lattice_embed::EmbeddingModel;
use std::sync::Arc;
KhiveRuntime::new(RuntimeConfig {
db_path: None,
default_namespace: Namespace::local(),
embedding_model: Some(EmbeddingModel::AllMiniLmL6V2),
additional_embedding_models: vec![],
gate: Arc::new(AllowAllGate),
packs: vec!["kg".to_string(), "knowledge".to_string()],
backend_id: BackendId::main(),
brain_profile: None,
})
.expect("runtime with default embedder")
}
#[tokio::test]
async fn stats_embedding_coverage_counts_atom_vectors() {
use khive_types::{Namespace, SubstrateKind};
use uuid::Uuid;
let f = pack(rt_with_default_embedder());
f.dispatch(
"knowledge.upsert_atoms",
json!({
"atoms": [
{ "slug": "covered", "name": "Covered", "content": "has vector dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity" },
{ "slug": "uncovered", "name": "Uncovered", "content": "no vector dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity" }
]
}),
)
.await
.expect("upsert atoms");
let row = f
.sql_query_one(
"SELECT id FROM knowledge_atoms WHERE namespace = ?1 AND slug = ?2",
vec![
SqlValue::Text("local".into()),
SqlValue::Text("covered".into()),
],
)
.await
.expect("covered atom row");
let atom_id = match row.get("id") {
Some(SqlValue::Text(id)) => Uuid::parse_str(id).expect("uuid id"),
other => panic!("expected id text, got {other:?}"),
};
let token =
f.rt.authorize(Namespace::local())
.expect("local namespace token");
let vectors = f.rt.vectors(&token).expect("vector store");
vectors
.insert(
atom_id,
SubstrateKind::Entity,
"local",
"knowledge.atom",
vec![vec![0.0f32; 384]],
)
.await
.expect("insert vector");
let stats = f
.dispatch("knowledge.stats", json!({}))
.await
.expect("stats ok");
let coverage = stats["embedding_coverage"]
.as_f64()
.expect("embedding_coverage f64");
assert!(
(coverage - 0.5).abs() < 1e-6,
"expected 0.5 coverage, got: {coverage}"
);
}
#[tokio::test]
async fn search_scores_are_normalized_without_rank_inversion() {
let f = pack(rt());
f.dispatch(
"knowledge.upsert_atoms",
json!({
"atoms": [
{
"slug": "norm-high",
"name": "Normalization High",
"content": "normalization unique qzxqzx scoring alpha gamma delta epsilon dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity"
},
{
"slug": "norm-mid",
"name": "Normalization Mid",
"content": "normalization unique qzxqzx beta scoring dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity"
},
{
"slug": "norm-low",
"name": "Normalization Low",
"content": "normalization qzxqzx dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity"
},
]
}),
)
.await
.expect("seed atoms");
f.sql_exec(
"UPDATE knowledge_atoms SET status='reviewed' WHERE slug IN ('norm-high', 'norm-mid', 'norm-low')",
vec![],
)
.await;
let resp = f
.dispatch(
"knowledge.search",
json!({ "query": "normalization unique qzxqzx", "rerank": false }),
)
.await
.expect("search ok");
let results = resp["results"].as_array().expect("results");
assert!(
results.len() >= 2,
"expected at least 2 results: {results:?}"
);
for r in results {
let score = r["score"].as_f64().expect("score");
assert!(
(0.0..=1.0).contains(&score),
"score {score} out of [0,1] range for result {r:?}"
);
}
let high = results
.iter()
.find(|r| r["slug"].as_str() == Some("norm-high"));
let mid = results
.iter()
.find(|r| r["slug"].as_str() == Some("norm-mid"));
if let (Some(h), Some(m)) = (high, mid) {
let hs = h["score"].as_f64().unwrap();
let ms = m["score"].as_f64().unwrap();
assert!(
hs >= ms,
"high-relevance atom score {hs:.4} must not be less than mid-relevance score {ms:.4}"
);
}
}
#[tokio::test]
async fn search_defaults_to_embedding_rerank_when_embedder_configured() {
let f = pack(rt_with_default_embedder());
f.dispatch(
"knowledge.upsert_atoms",
json!({
"atoms": [
{ "slug": "rerank-a", "name": "Cosine Alpha", "content": "cosine similarity embedding rerank vector dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity", "finalized": true },
{ "slug": "rerank-b", "name": "Cosine Beta", "content": "cosine similarity embedding rerank dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity", "finalized": true },
]
}),
)
.await
.expect("seed atoms");
let resp_default = f
.dispatch(
"knowledge.search",
json!({ "query": "cosine similarity embedding rerank unique uuuvvv" }),
)
.await
.expect("default rerank search ok");
let results_default = resp_default["results"].as_array().expect("results");
assert!(
!results_default.is_empty(),
"expected results with default rerank"
);
for r in results_default {
let score = r["score"].as_f64().expect("score");
assert!(
(0.0..=1.0).contains(&score),
"default-rerank score {score} out of [0,1] for {r:?}"
);
}
let resp_norerank = f
.dispatch(
"knowledge.search",
json!({ "query": "cosine similarity embedding rerank unique uuuvvv", "rerank": false }),
)
.await
.expect("explicit rerank=false search ok");
let results_norerank = resp_norerank["results"].as_array().expect("results");
assert!(
!results_norerank.is_empty(),
"expected results with rerank=false"
);
let default_scores: Vec<f64> = results_default
.iter()
.filter_map(|r| r["score"].as_f64())
.collect();
let norerank_scores: Vec<f64> = results_norerank
.iter()
.filter_map(|r| r["score"].as_f64())
.collect();
let _scores_differ = default_scores
.iter()
.zip(norerank_scores.iter())
.any(|(a, b)| (a - b).abs() > 1e-6);
}
#[tokio::test]
async fn issue78_search_excludes_drafts_by_default() {
let f = pack(rt());
f.dispatch(
"knowledge.upsert_atoms",
json!({
"atoms": [
{
"slug": "reviewed-atom-78",
"name": "Reviewed Atom 78",
"content": "transformer attention mechanism self-attention multi-head unique zz78 covering concepts techniques algorithms implementations applications use cases and design patterns in detail for production systems"
},
{
"slug": "draft-atom-78",
"name": "Draft Atom 78",
"content": "transformer attention mechanism self-attention multi-head unique zz78 covering concepts techniques algorithms implementations applications use cases and design patterns in detail for production systems"
},
]
}),
)
.await
.expect("upsert");
f.sql_exec(
"UPDATE knowledge_atoms SET status='reviewed' WHERE slug=?1",
vec![SqlValue::Text("reviewed-atom-78".into())],
)
.await;
f.sql_exec(
"UPDATE knowledge_atoms SET status='draft' WHERE slug=?1",
vec![SqlValue::Text("draft-atom-78".into())],
)
.await;
let resp = f
.dispatch(
"knowledge.search",
json!({ "query": "transformer attention zz78", "rerank": false }),
)
.await
.expect("search ok");
let results = resp["results"].as_array().expect("results");
let names: Vec<&str> = results.iter().filter_map(|r| r["name"].as_str()).collect();
assert!(
names.contains(&"Reviewed Atom 78"),
"reviewed atom must appear by default: {names:?}"
);
assert!(
!names.contains(&"Draft Atom 78"),
"draft atom must be excluded by default: {names:?}"
);
}
#[tokio::test]
async fn issue78_include_drafts_true_returns_draft_atoms() {
let f = pack(rt());
f.dispatch(
"knowledge.upsert_atoms",
json!({
"atoms": [
{
"slug": "rev-atom-78b",
"name": "Reviewed Atom 78b",
"content": "sparse retrieval bm25 inverted index ranking corpus search unique zz78b covering concepts techniques algorithms implementations applications use cases and design patterns for production systems"
},
{
"slug": "dft-atom-78b",
"name": "Draft Atom 78b",
"content": "sparse retrieval bm25 inverted index ranking corpus search unique zz78b covering concepts techniques algorithms implementations applications use cases and design patterns for production systems"
},
]
}),
)
.await
.expect("upsert");
f.sql_exec(
"UPDATE knowledge_atoms SET status='reviewed' WHERE slug=?1",
vec![SqlValue::Text("rev-atom-78b".into())],
)
.await;
f.sql_exec(
"UPDATE knowledge_atoms SET status='draft' WHERE slug=?1",
vec![SqlValue::Text("dft-atom-78b".into())],
)
.await;
let resp = f
.dispatch(
"knowledge.search",
json!({ "query": "sparse retrieval bm25 zz78b", "rerank": false, "include_drafts": true }),
)
.await
.expect("search ok");
let results = resp["results"].as_array().expect("results");
let names: Vec<&str> = results.iter().filter_map(|r| r["name"].as_str()).collect();
assert!(
names.contains(&"Draft Atom 78b"),
"draft atom must appear when include_drafts=true: {names:?}"
);
assert!(
names.contains(&"Reviewed Atom 78b"),
"reviewed atom must also appear when include_drafts=true: {names:?}"
);
}
#[tokio::test]
async fn issue78_include_drafts_does_not_surface_deprecated() {
let f = pack(rt());
f.dispatch(
"knowledge.upsert_atoms",
json!({
"atoms": [
{
"slug": "rev-atom-78c",
"name": "Reviewed Atom 78c",
"content": "vector quantization product quantization compression retrieval unique zz78c covering concepts techniques algorithms implementations applications use cases and design patterns for production systems"
},
{
"slug": "dep-atom-78c",
"name": "Deprecated Atom 78c",
"content": "vector quantization product quantization compression retrieval unique zz78c covering concepts techniques algorithms implementations applications use cases and design patterns for production systems"
},
]
}),
)
.await
.expect("upsert");
f.sql_exec(
"UPDATE knowledge_atoms SET status='reviewed' WHERE slug=?1",
vec![SqlValue::Text("rev-atom-78c".into())],
)
.await;
f.sql_exec(
"UPDATE knowledge_atoms SET status='deprecated' WHERE slug=?1",
vec![SqlValue::Text("dep-atom-78c".into())],
)
.await;
let resp = f
.dispatch(
"knowledge.search",
json!({ "query": "vector quantization zz78c", "rerank": false, "include_drafts": true }),
)
.await
.expect("search ok");
let results = resp["results"].as_array().expect("results");
let names: Vec<&str> = results.iter().filter_map(|r| r["name"].as_str()).collect();
assert!(
names.contains(&"Reviewed Atom 78c"),
"reviewed atom must appear: {names:?}"
);
assert!(
!names.contains(&"Deprecated Atom 78c"),
"deprecated atom must not appear even with include_drafts=true: {names:?}"
);
}
#[tokio::test]
async fn issue78_explicit_status_filter_overrides_include_drafts() {
let f = pack(rt());
f.dispatch(
"knowledge.upsert_atoms",
json!({
"atoms": [
{
"slug": "rev-atom-78d",
"name": "Reviewed Atom 78d",
"content": "graph neural network node embedding link prediction unique zz78d covering concepts techniques algorithms implementations applications use cases and design patterns for production systems"
},
{
"slug": "dft-atom-78d",
"name": "Draft Atom 78d",
"content": "graph neural network node embedding link prediction unique zz78d covering concepts techniques algorithms implementations applications use cases and design patterns for production systems"
},
]
}),
)
.await
.expect("upsert");
f.sql_exec(
"UPDATE knowledge_atoms SET status='reviewed' WHERE slug=?1",
vec![SqlValue::Text("rev-atom-78d".into())],
)
.await;
f.sql_exec(
"UPDATE knowledge_atoms SET status='draft' WHERE slug=?1",
vec![SqlValue::Text("dft-atom-78d".into())],
)
.await;
let resp = f
.dispatch(
"knowledge.search",
json!({ "query": "graph neural network zz78d", "rerank": false, "status": "draft" }),
)
.await
.expect("search ok");
let results = resp["results"].as_array().expect("results");
let names: Vec<&str> = results.iter().filter_map(|r| r["name"].as_str()).collect();
assert!(
names.contains(&"Draft Atom 78d"),
"explicit status=draft must return draft atoms: {names:?}"
);
assert!(
!names.contains(&"Reviewed Atom 78d"),
"explicit status=draft must not return reviewed atoms: {names:?}"
);
}
#[tokio::test]
async fn issue78_suggest_excludes_draft_domain_atoms_by_default() {
let f = pack(rt());
f.dispatch(
"knowledge.upsert_atoms",
json!({
"atoms": [
{
"slug": "suggest-domain-rev",
"name": "Suggest Domain Reviewed",
"content": "machine learning transformer architecture attention mechanism neural network deep learning optimization gradient descent backpropagation unique zz78s reviewed domain for suggest test",
"tags": ["type:domain"]
},
{
"slug": "suggest-domain-dft",
"name": "Suggest Domain Draft",
"content": "machine learning transformer architecture attention mechanism neural network deep learning optimization gradient descent backpropagation unique zz78s draft domain for suggest test",
"tags": ["type:domain"]
},
]
}),
)
.await
.expect("seed domain atoms");
f.sql_exec(
"UPDATE knowledge_atoms SET status='reviewed' WHERE slug=?1",
vec![SqlValue::Text("suggest-domain-rev".into())],
)
.await;
f.sql_exec(
"UPDATE knowledge_atoms SET status='draft' WHERE slug=?1",
vec![SqlValue::Text("suggest-domain-dft".into())],
)
.await;
let resp = f
.dispatch(
"knowledge.suggest",
json!({
"query": "machine learning transformer architecture attention mechanism gradient"
}),
)
.await
.expect("suggest ok");
let results = resp["results"].as_array().expect("results");
let names: Vec<&str> = results.iter().filter_map(|r| r["name"].as_str()).collect();
assert!(
!names.contains(&"Suggest Domain Draft"),
"suggest must exclude draft domain atoms by default: {names:?}"
);
}
#[tokio::test]
async fn search_rerank_false_is_explicit_opt_out() {
let f = pack(rt_with_default_embedder());
f.dispatch(
"knowledge.upsert_atoms",
json!({
"atoms": [
{ "slug": "optout-a", "name": "Opt Out Alpha", "content": "opt out rerank test dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity" },
]
}),
)
.await
.expect("seed atom");
let resp = f
.dispatch(
"knowledge.search",
json!({ "query": "opt out rerank false unique wwwxxx", "rerank": false }),
)
.await
.expect("rerank=false search ok");
let results = resp["results"].as_array().expect("results");
for r in results {
let score = r["score"].as_f64().expect("score");
assert!(
(0.0..=1.0).contains(&score),
"score {score} out of [0,1] with rerank=false"
);
}
}
#[tokio::test]
async fn search_default_rerank_decompose_guard_avoids_fts_no_such_column() {
let f = pack(rt_with_default_embedder());
f.dispatch(
"knowledge.upsert_atoms",
json!({
"atoms": [
{ "slug": "decompose-guard", "name": "Decompose Guard", "content": "multi-concept tenant isolation decompose guard dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity" },
]
}),
)
.await
.expect("seed atom");
let resp = f
.dispatch(
"knowledge.search",
json!({
"query": "multi-concept tenant:isolation decompose guard",
"decompose": true,
}),
)
.await
.expect("default rerank + decompose must not crash");
assert!(
resp["results"].is_array(),
"expected results array, got: {resp:?}"
);
}
mod embed_failure_tests {
use super::*;
use async_trait::async_trait;
use khive_runtime::{AllowAllGate, BackendId, EmbedderProvider, RuntimeConfig};
use khive_types::Namespace;
use lattice_embed::{EmbedError, EmbeddingModel, EmbeddingService};
use std::sync::Arc;
const MODEL_KEY: &str = "all-minilm-l6-v2";
struct OneDimService;
#[async_trait]
impl EmbeddingService for OneDimService {
async fn embed(
&self,
_texts: &[String],
_model: EmbeddingModel,
) -> std::result::Result<Vec<Vec<f32>>, EmbedError> {
Ok(vec![vec![1.0_f32; 4]])
}
fn supports_model(&self, _model: EmbeddingModel) -> bool {
true
}
fn name(&self) -> &'static str {
"one-dim"
}
}
struct OneDimProvider;
#[async_trait]
impl EmbedderProvider for OneDimProvider {
fn name(&self) -> &str {
MODEL_KEY
}
fn dimensions(&self) -> usize {
4
}
async fn build(
&self,
) -> std::result::Result<Arc<dyn EmbeddingService>, khive_runtime::RuntimeError> {
Ok(Arc::new(OneDimService))
}
}
struct AlwaysFailService;
#[async_trait]
impl EmbeddingService for AlwaysFailService {
async fn embed(
&self,
_texts: &[String],
_model: EmbeddingModel,
) -> std::result::Result<Vec<Vec<f32>>, EmbedError> {
Err(EmbedError::InferenceFailed("synthetic test failure".into()))
}
fn supports_model(&self, _model: EmbeddingModel) -> bool {
true
}
fn name(&self) -> &'static str {
"always-fail"
}
}
struct AlwaysFailProvider;
#[async_trait]
impl EmbedderProvider for AlwaysFailProvider {
fn name(&self) -> &str {
MODEL_KEY
}
fn dimensions(&self) -> usize {
4
}
async fn build(
&self,
) -> std::result::Result<Arc<dyn EmbeddingService>, khive_runtime::RuntimeError> {
Ok(Arc::new(AlwaysFailService))
}
}
fn rt_with_fake(fake: impl EmbedderProvider + 'static) -> KhiveRuntime {
let rt = KhiveRuntime::new(RuntimeConfig {
db_path: None,
default_namespace: Namespace::local(),
embedding_model: Some(EmbeddingModel::AllMiniLmL6V2),
additional_embedding_models: vec![],
gate: Arc::new(AllowAllGate),
packs: vec!["kg".to_string(), "knowledge".to_string()],
backend_id: BackendId::main(),
brain_profile: None,
})
.expect("runtime");
rt.register_embedder(fake);
rt
}
async fn fixture_with_two_atoms(rt: KhiveRuntime) -> Fixture {
let f = pack(rt);
f.dispatch(
"knowledge.upsert_atoms",
json!({
"atoms": [
{
"slug": "embed-fail-a",
"name": "Embed Fail A",
"content": "first atom content for embed failure regression test dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector"
},
{
"slug": "embed-fail-b",
"name": "Embed Fail B",
"content": "second atom content for embed failure regression test dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector"
}
]
}),
)
.await
.expect("upsert atoms");
f
}
#[tokio::test]
async fn index_embed_count_mismatch_counts_as_failed() {
let f = fixture_with_two_atoms(rt_with_fake(OneDimProvider)).await;
let result = f
.dispatch("knowledge.index", json!({}))
.await
.expect("index ok");
assert_eq!(
result["failed"].as_u64().unwrap_or(0),
2,
"count-mismatch must report both atoms as failed: {result:?}"
);
assert_eq!(
result["indexed"].as_u64().unwrap_or(u64::MAX),
0,
"no atoms must be indexed on count-mismatch: {result:?}"
);
assert_eq!(
result["skipped"].as_u64().unwrap_or(u64::MAX),
0,
"count-mismatch must not appear in skipped: {result:?}"
);
}
#[tokio::test]
async fn index_embed_error_counts_as_failed() {
let f = fixture_with_two_atoms(rt_with_fake(AlwaysFailProvider)).await;
let result = f
.dispatch("knowledge.index", json!({}))
.await
.expect("index ok");
assert_eq!(
result["failed"].as_u64().unwrap_or(0),
2,
"embed Err must report both atoms as failed: {result:?}"
);
assert_eq!(
result["indexed"].as_u64().unwrap_or(u64::MAX),
0,
"no atoms must be indexed on embed error: {result:?}"
);
assert_eq!(
result["skipped"].as_u64().unwrap_or(u64::MAX),
0,
"embed Err must not appear in skipped: {result:?}"
);
}
#[tokio::test]
async fn index_result_carries_ann_failed_false_when_ann_block_skipped() {
let f = fixture_with_two_atoms(rt_with_fake(OneDimProvider)).await;
let result = f
.dispatch("knowledge.index", json!({}))
.await
.expect("index ok");
assert!(
result.get("ann_failed").is_some(),
"result JSON must carry ann_failed key: {result:?}"
);
assert!(
!result["ann_failed"].as_bool().unwrap_or(true),
"ann_failed must be false when ANN block did not run: {result:?}"
);
}
async fn fixture_with_two_sections(rt: KhiveRuntime) -> Fixture {
let f = pack(rt);
f.dispatch(
"knowledge.upsert_atoms",
json!({
"atoms": [{
"slug": "sec-embed-fail",
"name": "Section Embed Fail",
"content": "atom content for section embed failure regression test dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor"
}]
}),
)
.await
.expect("upsert atom");
f.dispatch(
"knowledge.edit",
json!({
"id": "sec-embed-fail",
"sections": [
{
"section_type": "overview",
"content": "Overview content for section embed failure regression test. This text is long enough to satisfy the 80-character minimum section length requirement — dense sparse retrieval corpus benchmark search latency."
},
{
"section_type": "formalism",
"content": "Formalism content for section embed failure regression test. This text is long enough to satisfy the 80-character minimum section length requirement — gradient descent transformer attention vector index."
}
]
}),
)
.await
.expect("edit sections");
f
}
#[tokio::test]
async fn section_embed_count_mismatch_counts_as_sections_failed() {
let f = fixture_with_two_sections(rt_with_fake(OneDimProvider)).await;
let rt = f.rt.clone();
let token = rt
.authorize(khive_types::Namespace::local())
.expect("authorize");
let result = khive_pack_knowledge::reindex_knowledge(
&rt,
&token,
khive_pack_knowledge::KnowledgeReindexOptions {
atoms: false,
sections: true,
drop_existing: true,
rebuild_ann: false,
batch_size: None,
},
None,
None,
)
.await
.expect("reindex_knowledge ok");
assert_eq!(
result["sections_failed"].as_u64().unwrap_or(0),
2,
"count-mismatch must report both sections as sections_failed: {result:?}"
);
assert_eq!(
result["sections_indexed"].as_u64().unwrap_or(u64::MAX),
0,
"no sections must be indexed on count-mismatch: {result:?}"
);
}
#[tokio::test]
async fn section_embed_error_counts_as_sections_failed() {
let f = fixture_with_two_sections(rt_with_fake(AlwaysFailProvider)).await;
let rt = f.rt.clone();
let token = rt
.authorize(khive_types::Namespace::local())
.expect("authorize");
let result = khive_pack_knowledge::reindex_knowledge(
&rt,
&token,
khive_pack_knowledge::KnowledgeReindexOptions {
atoms: false,
sections: true,
drop_existing: true,
rebuild_ann: false,
batch_size: None,
},
None,
None,
)
.await
.expect("reindex_knowledge ok");
assert_eq!(
result["sections_failed"].as_u64().unwrap_or(0),
2,
"embed Err must report both sections as sections_failed: {result:?}"
);
assert_eq!(
result["sections_indexed"].as_u64().unwrap_or(u64::MAX),
0,
"no sections must be indexed on embed error: {result:?}"
);
}
#[tokio::test]
async fn section_keep_existing_failures_terminate_and_report() {
let f = fixture_with_two_sections(rt_with_fake(AlwaysFailProvider)).await;
let rt = f.rt.clone();
let token = rt
.authorize(khive_types::Namespace::local())
.expect("authorize");
let result = khive_pack_knowledge::reindex_knowledge(
&rt,
&token,
khive_pack_knowledge::KnowledgeReindexOptions {
atoms: false,
sections: true,
drop_existing: false,
rebuild_ann: false,
batch_size: Some(1),
},
None,
None,
)
.await
.expect("reindex_knowledge must terminate, not loop");
assert_eq!(
result["sections_failed"].as_u64().unwrap_or(0),
2,
"keep-existing must attempt each section once and report both failed: {result:?}"
);
assert_eq!(
result["sections_indexed"].as_u64().unwrap_or(u64::MAX),
0,
"no sections indexed when every embed fails: {result:?}"
);
}
}
mod ann_bypass_regression {
use super::*;
use async_trait::async_trait;
use khive_runtime::{AllowAllGate, BackendId, EmbedderProvider, RuntimeConfig};
use khive_types::Namespace;
use lattice_embed::{EmbedError, EmbeddingModel, EmbeddingService};
use std::sync::Arc;
const MODEL_KEY: &str = "all-minilm-l6-v2";
const DIM: usize = 384;
struct CorrectDimService;
#[async_trait]
impl EmbeddingService for CorrectDimService {
async fn embed(
&self,
texts: &[String],
_model: EmbeddingModel,
) -> std::result::Result<Vec<Vec<f32>>, EmbedError> {
Ok(texts
.iter()
.enumerate()
.map(|(i, _)| {
let v = (i + 1) as f32;
let norm = (DIM as f32 * v * v).sqrt();
vec![v / norm; DIM]
})
.collect())
}
fn supports_model(&self, _model: EmbeddingModel) -> bool {
true
}
fn name(&self) -> &'static str {
"correct-dim"
}
}
struct CorrectDimProvider;
#[async_trait]
impl EmbedderProvider for CorrectDimProvider {
fn name(&self) -> &str {
MODEL_KEY
}
fn dimensions(&self) -> usize {
DIM
}
async fn build(
&self,
) -> std::result::Result<Arc<dyn EmbeddingService>, khive_runtime::RuntimeError> {
Ok(Arc::new(CorrectDimService))
}
}
fn rt_with_correct_embedder() -> KhiveRuntime {
let rt = KhiveRuntime::new(RuntimeConfig {
db_path: None,
default_namespace: Namespace::local(),
embedding_model: Some(EmbeddingModel::AllMiniLmL6V2),
additional_embedding_models: vec![],
gate: Arc::new(AllowAllGate),
packs: vec!["kg".to_string(), "knowledge".to_string()],
backend_id: BackendId::main(),
brain_profile: None,
})
.expect("runtime");
rt.register_embedder(CorrectDimProvider);
rt
}
#[tokio::test]
async fn ann_warm_draft_atom_excluded_by_default_search() {
let f = pack(rt_with_correct_embedder());
f.dispatch(
"knowledge.upsert_atoms",
json!({
"atoms": [
{
"slug": "ann-rev-atom",
"name": "ANN Reviewed Atom",
"content": "neural network attention mechanism transformer dense sparse retrieval corpus benchmark search latency gradient descent vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity unique ann78rev"
},
{
"slug": "ann-dft-atom",
"name": "ANN Draft Atom",
"content": "neural network attention mechanism transformer dense sparse retrieval corpus benchmark search latency gradient descent vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity unique ann78dft"
},
]
}),
)
.await
.expect("seed atoms");
f.sql_exec(
"UPDATE knowledge_atoms SET status='reviewed' WHERE slug=?1",
vec![SqlValue::Text("ann-rev-atom".into())],
)
.await;
f.sql_exec(
"UPDATE knowledge_atoms SET status='draft' WHERE slug=?1",
vec![SqlValue::Text("ann-dft-atom".into())],
)
.await;
let idx = f
.dispatch("knowledge.index", json!({ "rebuild_ann": true }))
.await
.expect("index ok");
assert!(
idx["indexed"].as_u64().unwrap_or(0) >= 2,
"both atoms must be indexed for the ANN to hold them: {idx:?}"
);
let resp = f
.dispatch(
"knowledge.search",
json!({
"query": "neural network attention mechanism transformer unique ann78",
"rerank": false
}),
)
.await
.expect("default search ok");
let results = resp["results"].as_array().expect("results");
let names: Vec<&str> = results.iter().filter_map(|r| r["name"].as_str()).collect();
assert!(
!names.contains(&"ANN Draft Atom"),
"draft atom must be excluded by default even when warm ANN finds it: {names:?}"
);
let resp_incl = f
.dispatch(
"knowledge.search",
json!({
"query": "neural network attention mechanism transformer unique ann78",
"rerank": false,
"include_drafts": true
}),
)
.await
.expect("include_drafts search ok");
let results_incl = resp_incl["results"].as_array().expect("results");
let names_incl: Vec<&str> = results_incl
.iter()
.filter_map(|r| r["name"].as_str())
.collect();
assert!(
names_incl.contains(&"ANN Draft Atom"),
"draft atom must appear when include_drafts=true: {names_incl:?}"
);
}
}
#[tokio::test]
async fn exclude_status_without_status_param_excludes_target_status() {
let f = pack(rt());
f.dispatch(
"knowledge.upsert_atoms",
json!({
"atoms": [
{
"slug": "prec-reviewed",
"name": "Precedence Reviewed Atom",
"content": "precedence exclude status regression reviewed dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity unique prec78a"
},
{
"slug": "prec-draft",
"name": "Precedence Draft Atom",
"content": "precedence exclude status regression draft dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity unique prec78a"
},
]
}),
)
.await
.expect("seed atoms");
f.sql_exec(
"UPDATE knowledge_atoms SET status='reviewed' WHERE slug=?1",
vec![SqlValue::Text("prec-reviewed".into())],
)
.await;
f.sql_exec(
"UPDATE knowledge_atoms SET status='draft' WHERE slug=?1",
vec![SqlValue::Text("prec-draft".into())],
)
.await;
let resp = f
.dispatch(
"knowledge.search",
json!({
"query": "precedence exclude status regression unique prec78a",
"rerank": false,
"exclude_status": "reviewed",
"include_drafts": true
}),
)
.await
.expect("search ok");
let results = resp["results"].as_array().expect("results");
let names: Vec<&str> = results.iter().filter_map(|r| r["name"].as_str()).collect();
assert!(
!names.contains(&"Precedence Reviewed Atom"),
"exclude_status=reviewed must remove reviewed atoms (no status= set): {names:?}"
);
assert!(
names.contains(&"Precedence Draft Atom"),
"draft atom must appear when include_drafts=true and exclude_status=reviewed: {names:?}"
);
}
#[tokio::test]
async fn exclude_status_is_ignored_when_status_param_is_set() {
let f = pack(rt());
f.dispatch(
"knowledge.upsert_atoms",
json!({
"atoms": [
{
"slug": "prec2-reviewed",
"name": "Prec2 Reviewed Atom",
"content": "precedence2 status override reviewed dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity unique prec78b"
},
{
"slug": "prec2-draft",
"name": "Prec2 Draft Atom",
"content": "precedence2 status override draft dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity unique prec78b"
},
]
}),
)
.await
.expect("seed atoms");
f.sql_exec(
"UPDATE knowledge_atoms SET status='reviewed' WHERE slug=?1",
vec![SqlValue::Text("prec2-reviewed".into())],
)
.await;
f.sql_exec(
"UPDATE knowledge_atoms SET status='draft' WHERE slug=?1",
vec![SqlValue::Text("prec2-draft".into())],
)
.await;
let resp = f
.dispatch(
"knowledge.search",
json!({
"query": "precedence2 status override unique prec78b",
"rerank": false,
"status": "reviewed",
"exclude_status": "reviewed"
}),
)
.await
.expect("search ok");
let results = resp["results"].as_array().expect("results");
let names: Vec<&str> = results.iter().filter_map(|r| r["name"].as_str()).collect();
assert!(
names.contains(&"Prec2 Reviewed Atom"),
"status=reviewed overrides exclude_status=reviewed: reviewed atom must appear: {names:?}"
);
assert!(
!names.contains(&"Prec2 Draft Atom"),
"status=reviewed must not return draft atoms: {names:?}"
);
}
#[tokio::test]
async fn blank_exclude_status_falls_through_to_default_draft_exclusion() {
let f = pack(rt());
f.dispatch(
"knowledge.upsert_atoms",
json!({
"atoms": [
{
"slug": "blank-ex-reviewed",
"name": "Blank Ex Reviewed",
"content": "blank exclude status normalization reviewed dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity unique blnk78a"
},
{
"slug": "blank-ex-draft",
"name": "Blank Ex Draft",
"content": "blank exclude status normalization draft dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity unique blnk78a"
},
]
}),
)
.await
.expect("seed atoms");
f.sql_exec(
"UPDATE knowledge_atoms SET status='reviewed' WHERE slug=?1",
vec![SqlValue::Text("blank-ex-reviewed".into())],
)
.await;
f.sql_exec(
"UPDATE knowledge_atoms SET status='draft' WHERE slug=?1",
vec![SqlValue::Text("blank-ex-draft".into())],
)
.await;
let resp = f
.dispatch(
"knowledge.search",
json!({
"query": "blank exclude status normalization unique blnk78a",
"rerank": false,
"exclude_status": ""
}),
)
.await
.expect("search ok");
let results = resp["results"].as_array().expect("results");
let names: Vec<&str> = results.iter().filter_map(|r| r["name"].as_str()).collect();
assert!(
!names.contains(&"Blank Ex Draft"),
"blank exclude_status must not bypass draft exclusion: {names:?}"
);
assert!(
names.contains(&"Blank Ex Reviewed"),
"reviewed atom must appear with blank exclude_status: {names:?}"
);
}
#[tokio::test]
async fn whitespace_padded_exclude_status_normalizes_to_draft() {
let f = pack(rt());
f.dispatch(
"knowledge.upsert_atoms",
json!({
"atoms": [
{
"slug": "ws-ex-reviewed",
"name": "Ws Ex Reviewed",
"content": "whitespace padded exclude status reviewed dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity unique wspad78"
},
{
"slug": "ws-ex-draft",
"name": "Ws Ex Draft",
"content": "whitespace padded exclude status draft dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity unique wspad78"
},
]
}),
)
.await
.expect("seed atoms");
f.sql_exec(
"UPDATE knowledge_atoms SET status='reviewed' WHERE slug=?1",
vec![SqlValue::Text("ws-ex-reviewed".into())],
)
.await;
f.sql_exec(
"UPDATE knowledge_atoms SET status='draft' WHERE slug=?1",
vec![SqlValue::Text("ws-ex-draft".into())],
)
.await;
let resp = f
.dispatch(
"knowledge.search",
json!({
"query": "whitespace padded exclude status unique wspad78",
"rerank": false,
"exclude_status": " draft ",
"include_drafts": true
}),
)
.await
.expect("search ok");
let results = resp["results"].as_array().expect("results");
let names: Vec<&str> = results.iter().filter_map(|r| r["name"].as_str()).collect();
assert!(
!names.contains(&"Ws Ex Draft"),
"whitespace-padded \" draft \" must normalize to \"draft\" and exclude draft atoms: {names:?}"
);
assert!(
names.contains(&"Ws Ex Reviewed"),
"reviewed atom must appear when exclude_status=\" draft \": {names:?}"
);
}
#[tokio::test]
async fn explicit_domain_ids_compose_includes_draft_member_atoms() {
let f = pack(rt());
f.dispatch(
"knowledge.upsert_atoms",
json!({
"atoms": [{
"slug": "compose-draft-member",
"name": "Compose Draft Member",
"content": "compose explicit domain draft member atom dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity unique cmp78d"
}]
}),
)
.await
.expect("seed draft atom");
f.sql_exec(
"UPDATE knowledge_atoms SET status='draft' WHERE slug=?1",
vec![SqlValue::Text("compose-draft-member".into())],
)
.await;
f.dispatch(
"knowledge.upsert_domains",
json!({
"domains": [{
"slug": "compose-explicit-domain",
"name": "Compose Explicit Domain",
"description": "compose explicit domain ids draft member test dense sparse retrieval corpus benchmark search latency gradient descent transformer attention vector index nearest neighbor ranking fusion pipeline embedding rerank cosine similarity unique cmp78e",
"members": ["compose-draft-member"]
}]
}),
)
.await
.expect("upsert domain");
let resp = f
.dispatch(
"knowledge.search",
json!({ "query": "compose explicit domain ids draft member unique cmp78e", "type": "domain", "rerank": false }),
)
.await
.expect("search domain");
let domain_id = resp["results"]
.as_array()
.expect("results")
.iter()
.find(|r| r["slug"].as_str() == Some("compose-explicit-domain"))
.and_then(|r| r["id"].as_str())
.expect("domain id in results")
.to_string();
let compose_resp = f
.dispatch(
"knowledge.compose",
json!({
"query": "compose explicit domain ids draft member unique cmp78e",
"domain_ids": [&domain_id]
}),
)
.await
.expect("compose ok");
let atoms = compose_resp["data"]["atoms"].as_array().expect("atoms");
let atom_names: Vec<&str> = atoms.iter().filter_map(|a| a["slug"].as_str()).collect();
assert!(
atom_names.contains(&"compose-draft-member"),
"explicit domain_ids compose must include draft member atoms (caller opted in): {atom_names:?}"
);
}