use luci::index::Index;
use luci::mapping::{FieldType, Mapping};
use serde_json::json;
fn search(
index: &mut Index,
query: serde_json::Value,
size: usize,
) -> luci::search::results::SearchResults {
let expr = luci::search::expression::parse_search(query, size).unwrap();
index.search(&expr).unwrap()
}
fn test_dir(name: &str) -> std::path::PathBuf {
let dir =
std::env::temp_dir().join(format!("luci_m5_integration_{}_{name}", std::process::id()));
let _ = std::fs::remove_dir_all(&dir);
dir
}
fn cleanup(path: &std::path::Path) {
let _ = std::fs::remove_dir_all(path);
}
fn build_hybrid_index(name: &str) -> (std::path::PathBuf, Index) {
let path = test_dir(name);
let schema = Mapping::builder()
.field("title", FieldType::Text)
.field("tag", FieldType::Keyword)
.field("embedding", FieldType::dense_vector(4))
.build();
let index = Index::create_with_mapping(&path, schema).unwrap();
index.bulk(vec![
json!({"title": "search engine design", "tag": "tech", "embedding": [0.9, 0.1, 0.0, 0.0]}),
json!({"title": "search algorithms", "tag": "tech", "embedding": [0.1, 0.9, 0.0, 0.0]}),
json!({"title": "cute cats", "tag": "animal", "embedding": [0.8, 0.2, 0.0, 0.0]}),
json!({"title": "search optimization", "tag": "tech", "embedding": [0.0, 0.0, 0.9, 0.1]}),
json!({"title": "happy dog", "tag": "animal", "embedding": [0.0, 0.0, 0.0, 1.0]}),
]).unwrap();
(path, index)
}
#[test]
fn hybrid_search_rrf() {
let (path, mut index) = build_hybrid_index("hybrid");
let results = search(
&mut index,
json!({
"query": {"fusion": {
"sources": [
{"match": {"title": "search engine"}},
{"knn": {
"field": "embedding",
"query_vector": [1.0, 0.0, 0.0, 0.0],
"k": 5,
"num_candidates": 10
}}
],
"method": "rrf"
}},
"size": 5
}),
5,
);
assert!(!results.is_empty());
assert_eq!(results.hit(0).unwrap().doc_id().as_u32(), 0);
let ids: Vec<u32> = results.iter().map(|h| h.doc_id().as_u32()).collect();
assert!(ids.contains(&1));
assert!(ids.contains(&2));
cleanup(&path);
}
#[test]
fn text_only_structured() {
let (path, mut index) = build_hybrid_index("text_only");
let results = search(
&mut index,
json!({
"query": {"match": {"title": "search"}},
"size": 10
}),
10,
);
assert!(results.total_hits().value >= 3);
cleanup(&path);
}
#[test]
fn knn_only_structured() {
let (path, mut index) = build_hybrid_index("knn_only");
let results = search(
&mut index,
json!({
"query": {"knn": {
"field": "embedding",
"query_vector": [1.0, 0.0, 0.0, 0.0],
"k": 3,
"num_candidates": 10
}},
"size": 3
}),
3,
);
assert_eq!(results.len(), 3);
assert_eq!(results.hit(0).unwrap().doc_id().as_u32(), 0);
assert_eq!(results.hit(1).unwrap().doc_id().as_u32(), 2);
cleanup(&path);
}
#[test]
fn rrf_rank_based_scores() {
let (path, mut index) = build_hybrid_index("rrf_scores");
let results = search(
&mut index,
json!({
"query": {"fusion": {
"sources": [
{"match": {"title": "search"}},
{"knn": {
"field": "embedding",
"query_vector": [1.0, 0.0, 0.0, 0.0],
"k": 5,
"num_candidates": 10
}}
],
"method": "rrf"
}},
"size": 5
}),
5,
);
for i in 0..results.len().saturating_sub(1) {
assert!(results.hit(i).unwrap().score() >= results.hit(i + 1).unwrap().score());
}
for hit in results.iter() {
assert!(hit.score() > 0.0);
}
cleanup(&path);
}