use std::sync::Arc;
use smooth_operator::access_control::{AccessContext, DocAcl};
use smooth_operator::curation::{
with_boost, with_document_set, CuratedKnowledgeStore, DocMeta, RetrievalFilter,
};
use smooth_operator::runtime::KnowledgeChatRuntime;
use smooth_operator::StorageAdapter;
use smooth_operator_adapter_memory::InMemoryStorageAdapter;
use smooth_operator_core::llm_provider::MockLlmClient;
use smooth_operator_core::{Document, DocumentType, InMemoryKnowledge, KnowledgeBase, LlmConfig};
fn doc(id: &str, content: &str, source: &str) -> Document {
let mut d = Document::new(content, source, DocumentType::Documentation);
d.id = id.to_string();
d
}
fn ingest(store: &CuratedKnowledgeStore, document: Document) {
store.ingest_handle().ingest(document).expect("ingest");
}
fn retrieved_ids(
store: &CuratedKnowledgeStore,
filter: RetrievalFilter,
access: AccessContext,
query: &str,
limit: usize,
) -> Vec<String> {
store
.reader(filter, access)
.query(query, limit)
.expect("query")
.into_iter()
.map(|r| r.document_id)
.collect()
}
#[test]
fn set_scope_returns_only_alpha_docs() {
let store = CuratedKnowledgeStore::new(Arc::new(InMemoryKnowledge::new()));
ingest(
&store,
with_document_set(doc("a-only", "clearance alpha fact", "alpha.md"), ["alpha"]),
);
ingest(
&store,
with_document_set(doc("b-only", "clearance beta fact", "beta.md"), ["beta"]),
);
ingest(
&store,
with_document_set(
doc("both", "clearance shared fact", "both.md"),
["alpha", "beta"],
),
);
let alpha = retrieved_ids(
&store,
RetrievalFilter::in_sets(["alpha"]),
AccessContext::anonymous(),
"clearance",
10,
);
assert!(
alpha.contains(&"a-only".to_string()),
"alpha scope must include alpha-only; saw {alpha:?}"
);
assert!(
alpha.contains(&"both".to_string()),
"alpha scope must include the multi-set doc; saw {alpha:?}"
);
assert!(
!alpha.contains(&"b-only".to_string()),
"SET LEAK: alpha scope must NEVER include beta-only doc; saw {alpha:?}"
);
let beta = retrieved_ids(
&store,
RetrievalFilter::in_sets(["beta"]),
AccessContext::anonymous(),
"clearance",
10,
);
assert!(
beta.contains(&"b-only".to_string()),
"beta scope must include beta-only; saw {beta:?}"
);
assert!(
beta.contains(&"both".to_string()),
"the multi-set doc must appear for EITHER scope; saw {beta:?}"
);
assert!(
!beta.contains(&"a-only".to_string()),
"SET LEAK: beta scope must NEVER include alpha-only doc; saw {beta:?}"
);
}
#[test]
fn boost_reorders_against_raw_similarity() {
let other_content = "widget guide reference";
let canonical_content = "widget guide reference manual extra";
let query = "widget guide";
{
let store = CuratedKnowledgeStore::new(Arc::new(InMemoryKnowledge::new()));
ingest(&store, doc("other", other_content, "other.md"));
ingest(&store, doc("canonical", canonical_content, "canon.md"));
let order = retrieved_ids(
&store,
RetrievalFilter::none(),
AccessContext::anonymous(),
query,
10,
);
assert_eq!(
order,
vec!["other".to_string(), "canonical".to_string()],
"baseline (no boost) must preserve raw-similarity order (higher density first)"
);
}
{
let store = CuratedKnowledgeStore::new(Arc::new(InMemoryKnowledge::new()));
ingest(&store, doc("other", other_content, "other.md"));
ingest(
&store,
with_boost(doc("canonical", canonical_content, "canon.md"), 3.0),
);
let order = retrieved_ids(
&store,
RetrievalFilter::none(),
AccessContext::anonymous(),
query,
10,
);
assert_eq!(
order.first().map(String::as_str),
Some("canonical"),
"BOOST must promote the canonical doc above the raw-higher 'other'; saw {order:?}"
);
assert_eq!(
order,
vec!["canonical".to_string(), "other".to_string()],
"boost must REORDER the two results; saw {order:?}"
);
}
}
#[test]
fn metadata_eq_filter_returns_only_matching_docs() {
let store = CuratedKnowledgeStore::new(Arc::new(InMemoryKnowledge::new()));
ingest(
&store,
doc("prose-1", "widget overview prose", "p1.md").with_metadata("kind", "prose"),
);
ingest(
&store,
doc("code-1", "widget overview code", "c1.rs").with_metadata("kind", "code"),
);
ingest(
&store,
doc("prose-2", "widget overview narrative", "p2.md").with_metadata("kind", "prose"),
);
let ids = retrieved_ids(
&store,
RetrievalFilter::none().with_metadata_eq("kind", "prose"),
AccessContext::anonymous(),
"widget overview",
10,
);
assert!(
ids.contains(&"prose-1".to_string()),
"prose doc must pass; saw {ids:?}"
);
assert!(
ids.contains(&"prose-2".to_string()),
"prose doc must pass; saw {ids:?}"
);
assert!(
!ids.contains(&"code-1".to_string()),
"metadata filter must drop the code doc; saw {ids:?}"
);
}
#[test]
fn acl_and_set_filter_both_apply() {
let store = CuratedKnowledgeStore::new(Arc::new(InMemoryKnowledge::new()));
let restricted = with_document_set(
doc("alice-alpha", "clearance restricted alpha", "alice.md"),
["alpha"],
);
ingest(&store, DocAcl::for_users(["alice"]).attach_to(restricted));
let public = with_document_set(
doc("public-alpha", "clearance public alpha", "pub.md"),
["alpha"],
);
ingest(&store, DocAcl::public().attach_to(public));
let bob = retrieved_ids(
&store,
RetrievalFilter::in_sets(["alpha"]),
AccessContext::for_user("bob"),
"clearance",
10,
);
assert!(
bob.contains(&"public-alpha".to_string()),
"bob should see the public alpha doc; saw {bob:?}"
);
assert!(
!bob.contains(&"alice-alpha".to_string()),
"ACL∧SET LEAK: bob must NOT see alice-only doc even within set alpha; saw {bob:?}"
);
let alice = retrieved_ids(
&store,
RetrievalFilter::in_sets(["alpha"]),
AccessContext::for_user("alice"),
"clearance",
10,
);
assert!(
alice.contains(&"alice-alpha".to_string()),
"alice should see her own alpha doc; saw {alice:?}"
);
}
#[test]
fn no_filter_path_matches_raw_query() {
let inner = Arc::new(InMemoryKnowledge::new());
let store = CuratedKnowledgeStore::new(Arc::clone(&inner) as Arc<dyn KnowledgeBase>);
for (id, content) in [
("d1", "widget refund policy thirty days"),
("d2", "widget shipping five to seven days"),
("d3", "unrelated cooking recipe"),
] {
ingest(&store, doc(id, content, &format!("{id}.md")));
}
let mut raw_ids: Vec<String> = inner
.query("widget policy", 10)
.unwrap()
.into_iter()
.map(|r| r.document_id)
.collect();
raw_ids.sort();
raw_ids.dedup();
let mut curated_ids = retrieved_ids(
&store,
RetrievalFilter::none(),
AccessContext::anonymous(),
"widget policy",
10,
);
curated_ids.sort();
curated_ids.dedup();
assert_eq!(
curated_ids, raw_ids,
"unconstrained curated reader must surface the same docs as the raw query"
);
}
#[test]
fn untagged_doc_is_in_no_set() {
let store = CuratedKnowledgeStore::new(Arc::new(InMemoryKnowledge::new()));
ingest(&store, doc("untagged", "widget clearance fact", "u.md"));
ingest(
&store,
with_document_set(doc("tagged", "widget clearance fact", "t.md"), ["alpha"]),
);
let scoped = retrieved_ids(
&store,
RetrievalFilter::in_sets(["alpha"]),
AccessContext::anonymous(),
"widget clearance",
10,
);
assert!(
scoped.contains(&"tagged".to_string()),
"tagged doc in set alpha must appear; saw {scoped:?}"
);
assert!(
!scoped.contains(&"untagged".to_string()),
"an untagged doc belongs to no set, so a set-scoped query must skip it; saw {scoped:?}"
);
}
#[tokio::test]
async fn runtime_curation_scopes_knowledge_search_to_set() {
let storage = Arc::new(InMemoryStorageAdapter::new());
let store = CuratedKnowledgeStore::new(storage.knowledge());
let h = store.ingest_handle();
h.ingest(with_document_set(
doc(
"alpha-doc",
"The frobnicator alpha subsystem uses a 42-slot ring buffer.",
"alpha/frob.md",
),
["alpha"],
))
.expect("ingest alpha doc");
h.ingest(with_document_set(
doc(
"beta-doc",
"The frobnicator beta subsystem uses an 88-slot ring buffer.",
"beta/frob.md",
),
["beta"],
))
.expect("ingest beta doc");
let mock = MockLlmClient::new();
mock.push_tool_call(
"call_1",
"knowledge_search",
serde_json::json!({ "query": "frobnicator ring buffer" }),
)
.push_text("Here is what I found.");
let llm = LlmConfig::openrouter("not-a-real-key").with_model("openai/gpt-4o");
let runtime = KnowledgeChatRuntime::new(storage.clone(), llm)
.with_llm_provider(Arc::new(mock.clone()))
.with_curation(
store,
AccessContext::anonymous(),
RetrievalFilter::in_sets(["alpha"]),
);
let outcome = runtime
.run_turn("conv-curation", "Tell me about the frobnicator")
.await
.expect("run_turn");
let tool_result = outcome
.tool_result("knowledge_search")
.expect("knowledge_search ran");
assert!(
tool_result.contains("42-slot"),
"alpha-set content should surface; tool result: {tool_result}"
);
assert!(
!tool_result.contains("88-slot"),
"SET LEAK: beta-set content reached the model under an alpha scope; tool result: {tool_result}"
);
}
#[test]
fn docmeta_round_trips_set_and_boost() {
let d = with_boost(
with_document_set(doc("x", "content", "x.md"), ["alpha", "beta"]),
2.5,
);
let meta = DocMeta::from_document(&d);
assert_eq!(
meta.document_sets,
vec!["alpha".to_string(), "beta".to_string()]
);
assert!((meta.boost - 2.5).abs() < f32::EPSILON);
assert!(meta.in_set("alpha") && meta.in_set("beta") && !meta.in_set("gamma"));
}