use luci::index::Index;
use luci::mapping::{FieldType, Mapping};
use luci::search::expression::parse_search;
use serde_json::json;
fn test_dir(name: &str) -> std::path::PathBuf {
let dir = std::env::temp_dir().join(format!("luci_fusion_{}_{name}", std::process::id()));
let _ = std::fs::remove_dir_all(&dir);
dir
}
fn cleanup(path: &std::path::Path) {
let _ = std::fs::remove_dir_all(path);
}
fn build_index(name: &str) -> (std::path::PathBuf, Index) {
let path = test_dir(name);
let schema = Mapping::builder()
.field("title", FieldType::Text)
.field("tag", FieldType::Keyword)
.field("embedding", FieldType::dense_vector(4))
.build();
let index = Index::create_with_mapping(&path, schema).unwrap();
index
.bulk(vec![
json!({"title": "search engine design", "tag": "tech", "embedding": [0.9, 0.1, 0.0, 0.0]}),
json!({"title": "search algorithms", "tag": "tech", "embedding": [0.1, 0.9, 0.0, 0.0]}),
json!({"title": "cute cats", "tag": "animal", "embedding": [0.8, 0.2, 0.0, 0.0]}),
json!({"title": "search optimization", "tag": "tech", "embedding": [0.0, 0.0, 0.9, 0.1]}),
json!({"title": "happy dog", "tag": "animal", "embedding": [0.0, 0.0, 0.0, 1.0]}),
])
.unwrap();
(path, index)
}
fn search(
index: &Index,
query: serde_json::Value,
size: usize,
) -> luci::search::results::SearchResults {
let expr = parse_search(query, size).unwrap();
index.search(&expr).unwrap()
}
#[test]
fn rrf_two_sources() {
let (path, index) = build_index("rrf_two");
let results = search(
&index,
json!({
"query": {"fusion": {
"sources": [
{"match": {"title": "search engine"}},
{"knn": {
"field": "embedding",
"query_vector": [1.0, 0.0, 0.0, 0.0],
"k": 5
}}
],
"method": "rrf"
}},
"size": 5
}),
5,
);
assert!(!results.is_empty());
assert_eq!(results.hit(0).unwrap().doc_id().as_u32(), 0);
let top_score = results.hit(0).unwrap().score();
assert!(
top_score > 0.0 && top_score < 1.0,
"RRF scores are small: {top_score}"
);
cleanup(&path);
}
#[test]
fn rrf_three_sources() {
let (path, index) = build_index("rrf_three");
let results = search(
&index,
json!({
"query": {"fusion": {
"sources": [
{"match": {"title": "search"}},
{"term": {"tag": "tech"}},
{"knn": {
"field": "embedding",
"query_vector": [1.0, 0.0, 0.0, 0.0],
"k": 5
}}
],
"method": "rrf",
"rank_constant": 60
}},
"size": 5
}),
5,
);
assert!(!results.is_empty());
assert_eq!(results.hit(0).unwrap().doc_id().as_u32(), 0);
cleanup(&path);
}
#[test]
fn rrf_custom_rank_constant() {
let (path, index) = build_index("rrf_k");
let results = search(
&index,
json!({
"query": {"fusion": {
"sources": [
{"match": {"title": "search engine"}},
{"knn": {
"field": "embedding",
"query_vector": [1.0, 0.0, 0.0, 0.0],
"k": 5
}}
],
"method": "rrf",
"rank_constant": 1
}},
"size": 5
}),
5,
);
assert!(!results.is_empty());
let top_score = results.hit(0).unwrap().score();
assert!(
top_score > 0.3,
"small rank_constant → higher scores: {top_score}"
);
cleanup(&path);
}
#[test]
fn rrf_rank_window_size() {
let (path, index) = build_index("rrf_window");
let results = search(
&index,
json!({
"query": {"fusion": {
"sources": [
{"match": {"title": "search"}},
{"knn": {
"field": "embedding",
"query_vector": [1.0, 0.0, 0.0, 0.0],
"k": 5
}}
],
"method": "rrf",
"rank_window_size": 2
}},
"size": 5
}),
5,
);
assert!(results.len() <= 4, "window_size=2: at most 4 unique docs");
cleanup(&path);
}
#[test]
fn sum_fusion() {
let (path, index) = build_index("sum");
let results = search(
&index,
json!({
"query": {"fusion": {
"sources": [
{"match": {"title": "search engine"}},
{"knn": {
"field": "embedding",
"query_vector": [1.0, 0.0, 0.0, 0.0],
"k": 5
}}
],
"method": "sum"
}},
"size": 5
}),
5,
);
assert!(!results.is_empty());
assert_eq!(results.hit(0).unwrap().doc_id().as_u32(), 0);
let top_score = results.hit(0).unwrap().score();
assert!(
top_score > 0.5,
"sum fusion should produce higher scores: {top_score}"
);
cleanup(&path);
}
#[test]
fn weighted_sum_fusion() {
let (path, index) = build_index("weighted_sum");
let results = search(
&index,
json!({
"query": {"fusion": {
"sources": [
{"match": {"title": "search engine"}},
{"knn": {
"field": "embedding",
"query_vector": [1.0, 0.0, 0.0, 0.0],
"k": 5
}}
],
"method": "sum",
"weights": [0.3, 0.7]
}},
"size": 5
}),
5,
);
assert!(!results.is_empty());
for i in 0..results.len().saturating_sub(1) {
assert!(
results.hit(i).unwrap().score() >= results.hit(i + 1).unwrap().score(),
"scores should be descending"
);
}
cleanup(&path);
}
#[test]
fn fusion_parse_default_method() {
let _expr = parse_search(
json!({
"query": {"fusion": {
"sources": [
{"match": {"title": "a"}},
{"match": {"title": "b"}}
]
}}
}),
10,
)
.unwrap();
}
#[test]
fn fusion_requires_at_least_two_sources() {
let result = parse_search(
json!({
"query": {"fusion": {
"sources": [{"match": {"title": "a"}}]
}}
}),
10,
);
assert!(result.is_err());
}
#[test]
fn fusion_unknown_method_rejected() {
let result = parse_search(
json!({
"query": {"fusion": {
"sources": [
{"match": {"title": "a"}},
{"match": {"title": "b"}}
],
"method": "unknown_method"
}}
}),
10,
);
assert!(result.is_err());
}
#[test]
fn fusion_scores_descending() {
let (path, index) = build_index("scores_desc");
let results = search(
&index,
json!({
"query": {"fusion": {
"sources": [
{"match": {"title": "search"}},
{"knn": {
"field": "embedding",
"query_vector": [1.0, 0.0, 0.0, 0.0],
"k": 5
}}
],
"method": "rrf"
}},
"size": 5
}),
5,
);
for i in 0..results.len().saturating_sub(1) {
let a = results.hit(i).unwrap().score();
let b = results.hit(i + 1).unwrap().score();
assert!(a >= b, "scores should be descending: {a} < {b}");
}
cleanup(&path);
}
#[test]
fn fusion_explain_returns_leaf() {
let (path, index) = build_index("explain");
let results = search(
&index,
json!({
"query": {"fusion": {
"sources": [
{"match": {"title": "search"}},
{"knn": {
"field": "embedding",
"query_vector": [1.0, 0.0, 0.0, 0.0],
"k": 5
}}
],
"method": "rrf"
}},
"size": 5
}),
5,
);
let hit = results.hit(0).unwrap();
let explanation = hit.explain().expect("explain should not error");
assert!(explanation.is_some());
let expl = explanation.unwrap();
assert!(expl.value > 0.0);
assert!(expl.description.contains("fusion"));
cleanup(&path);
}
#[test]
fn fusion_highlight_extracts_terms() {
let (path, index) = build_index("highlight");
let results = search(
&index,
json!({
"query": {"fusion": {
"sources": [
{"match": {"title": "search engine"}},
{"knn": {
"field": "embedding",
"query_vector": [1.0, 0.0, 0.0, 0.0],
"k": 5
}}
],
"method": "rrf"
}},
"size": 5
}),
5,
);
let hit = results.hit(0).unwrap();
let hl = hit.highlight("title");
assert!(hl.is_some(), "highlight should work on fusion results");
cleanup(&path);
}
#[test]
fn fusion_with_aggregations() {
let (path, index) = build_index("aggs");
let expr = parse_search(
json!({
"query": {"fusion": {
"sources": [
{"match": {"title": "search"}},
{"knn": {
"field": "embedding",
"query_vector": [1.0, 0.0, 0.0, 0.0],
"k": 5
}}
],
"method": "rrf"
}},
"aggs": {
"tags": {"terms": {"field": "tag"}}
},
"size": 5
}),
5,
)
.unwrap();
let results = index.search(&expr).unwrap();
assert!(!results.is_empty());
let aggs = results.aggregations();
assert!(
aggs.contains_key("tags"),
"fusion results should include aggregations"
);
cleanup(&path);
}
#[test]
fn fusion_from_pagination() {
let (path, index) = build_index("from_page");
let all = search(
&index,
json!({
"query": {"fusion": {
"sources": [
{"match": {"title": "search"}},
{"knn": {
"field": "embedding",
"query_vector": [1.0, 0.0, 0.0, 0.0],
"k": 5
}}
],
"method": "rrf"
}},
"size": 10
}),
10,
);
let page2 = search(
&index,
json!({
"query": {"fusion": {
"sources": [
{"match": {"title": "search"}},
{"knn": {
"field": "embedding",
"query_vector": [1.0, 0.0, 0.0, 0.0],
"k": 5
}}
],
"method": "rrf"
}},
"from": 2,
"size": 10
}),
10,
);
if all.len() > 2 {
assert_eq!(
page2.hit(0).unwrap().doc_id().as_u32(),
all.hit(2).unwrap().doc_id().as_u32(),
"from=2 should start at the 3rd result"
);
}
cleanup(&path);
}
#[test]
fn fusion_weights_length_mismatch_rejected() {
let result = parse_search(
json!({
"query": {"fusion": {
"sources": [
{"match": {"title": "a"}},
{"match": {"title": "b"}}
],
"method": "sum",
"weights": [0.5]
}}
}),
10,
);
assert!(
result.is_err(),
"mismatched weights length should be rejected"
);
}
#[test]
fn fusion_with_sort() {
let path = test_dir("sort");
let schema = Mapping::builder()
.field("title", FieldType::Text)
.field("price", FieldType::Float)
.field("embedding", FieldType::dense_vector(4))
.build();
let index = Index::create_with_mapping(&path, schema).unwrap();
index
.bulk(vec![
json!({"title": "search engine", "price": 30.0, "embedding": [0.9, 0.1, 0.0, 0.0]}),
json!({"title": "search tools", "price": 10.0, "embedding": [0.1, 0.9, 0.0, 0.0]}),
json!({"title": "search guide", "price": 20.0, "embedding": [0.5, 0.5, 0.0, 0.0]}),
])
.unwrap();
let expr = parse_search(
json!({
"query": {"fusion": {
"sources": [
{"match": {"title": "search"}},
{"knn": {
"field": "embedding",
"query_vector": [1.0, 0.0, 0.0, 0.0],
"k": 3
}}
],
"method": "rrf"
}},
"sort": [{"price": "asc"}],
"size": 10
}),
10,
)
.unwrap();
let results = index.search(&expr).unwrap();
assert!(results.len() >= 2);
assert!(
results.hit(0).unwrap().sort_values().is_some(),
"sort values should be present"
);
let prices: Vec<f64> = (0..results.len())
.map(|i| {
let hit = results.hit(i).unwrap();
let sv = hit.sort_values().unwrap();
sv[0].to_json().as_f64().unwrap()
})
.collect();
for i in 0..prices.len().saturating_sub(1) {
assert!(
prices[i] <= prices[i + 1],
"price should be ascending: {} > {}",
prices[i],
prices[i + 1]
);
}
cleanup(&path);
}
#[test]
fn fusion_with_collapse() {
let (path, index) = build_index("collapse");
let expr = parse_search(
json!({
"query": {"fusion": {
"sources": [
{"match": {"title": "search"}},
{"knn": {
"field": "embedding",
"query_vector": [1.0, 0.0, 0.0, 0.0],
"k": 5
}}
],
"method": "rrf"
}},
"collapse": {"field": "tag"},
"size": 10
}),
10,
)
.unwrap();
let results = index.search(&expr).unwrap();
assert!(
results.len() <= 2,
"collapse by tag should produce at most 2 results, got {}",
results.len()
);
for hit in results.iter() {
assert!(
hit.collapse_key().is_some(),
"collapsed hits should have collapse_key"
);
}
let keys: std::collections::HashSet<String> = results
.iter()
.map(|h| h.collapse_key().unwrap().to_string())
.collect();
assert_eq!(keys.len(), results.len(), "collapse keys should be unique");
cleanup(&path);
}
#[test]
fn nested_fusion() {
let (path, index) = build_index("nested");
let results = search(
&index,
json!({
"query": {"fusion": {
"sources": [
{"fusion": {
"sources": [
{"match": {"title": "search"}},
{"match": {"title": "engine"}}
],
"method": "rrf"
}},
{"knn": {
"field": "embedding",
"query_vector": [1.0, 0.0, 0.0, 0.0],
"k": 5
}}
],
"method": "rrf"
}},
"size": 5
}),
5,
);
assert!(!results.is_empty());
assert_eq!(results.hit(0).unwrap().doc_id().as_u32(), 0);
cleanup(&path);
}