use luci::index::Index;
use luci::mapping::{FieldType, Mapping};
use luci::search::expression::parse_search;
use serde_json::json;
use std::collections::HashSet;
fn test_dir(name: &str) -> std::path::PathBuf {
let dir =
std::env::temp_dir().join(format!("luci_m4_integration_{}_{name}", std::process::id()));
let _ = std::fs::remove_dir_all(&dir);
dir
}
fn cleanup(path: &std::path::Path) {
let _ = std::fs::remove_dir_all(path);
}
#[test]
fn knn_recall() {
let path = test_dir("recall");
let schema = Mapping::builder()
.field("embedding", FieldType::dense_vector(16))
.build();
let index = Index::create_with_mapping(&path, schema).unwrap();
let n = 500;
let dims = 16;
let mut rng: u64 = 99999;
let mut vectors = Vec::new();
let mut docs = Vec::new();
for _i in 0..n {
let mut v = Vec::with_capacity(dims);
for _ in 0..dims {
rng ^= rng << 13;
rng ^= rng >> 7;
rng ^= rng << 17;
v.push((rng as f64 / u64::MAX as f64) * 2.0 - 1.0);
}
vectors.push(v.clone());
docs.push(json!({"embedding": v}));
}
index.bulk(docs).unwrap();
let query_vec: Vec<f32> = vectors[0].iter().map(|&v| v as f32).collect();
let expr = parse_search(
json!({
"query": {"knn": {
"field": "embedding",
"query_vector": query_vec,
"k": 10,
"num_candidates": 50
}}
}),
10,
)
.unwrap();
let results = index.search(&expr).unwrap();
assert!(
results.total_hits().value >= 5,
"should find at least 5 results"
);
let mut brute: Vec<(usize, f64)> = vectors
.iter()
.enumerate()
.map(|(i, v)| {
let d: f64 = v
.iter()
.zip(query_vec.iter())
.map(|(&a, &b)| (a as f64 - b as f64).powi(2))
.sum::<f64>()
.sqrt();
(i, d)
})
.collect();
brute.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
let brute_top10: HashSet<u32> = brute[..10].iter().map(|x| x.0 as u32).collect();
let hnsw_top10: HashSet<u32> = results.iter().map(|h| h.doc_id().as_u32()).collect();
let recall = brute_top10.intersection(&hnsw_top10).count() as f64 / 10.0;
assert!(recall >= 0.7, "recall@10 = {recall}, expected >= 0.7");
cleanup(&path);
}
#[test]
fn text_plus_vectors() {
let path = test_dir("text_vec");
let schema = Mapping::builder()
.field("title", FieldType::Text)
.field("tag", FieldType::Keyword)
.field("embedding", FieldType::dense_vector(4))
.build();
let index = Index::create_with_mapping(&path, schema).unwrap();
index.bulk(vec![
json!({"title": "search engine", "tag": "tech", "embedding": [1.0, 0.0, 0.0, 0.0]}),
json!({"title": "database internals", "tag": "tech", "embedding": [0.0, 1.0, 0.0, 0.0]}),
json!({"title": "cute cat", "tag": "animal", "embedding": [0.0, 0.0, 1.0, 0.0]}),
json!({"title": "happy dog", "tag": "animal", "embedding": [0.0, 0.0, 0.0, 1.0]}),
]).unwrap();
let expr = parse_search(json!({"match": {"title": "search"}}), 10).unwrap();
let results = index.search(&expr).unwrap();
assert_eq!(results.total_hits().value, 1);
let expr = parse_search(
json!({
"knn": {
"field": "embedding",
"query_vector": [0.9, 0.1, 0.0, 0.0],
"k": 2,
"num_candidates": 10
}
}),
10,
)
.unwrap();
let results = index.search(&expr).unwrap();
assert!(results.total_hits().value >= 1);
assert_eq!(results.hit(0).unwrap().doc_id().as_u32(), 0);
cleanup(&path);
}
#[test]
fn knn_with_filter() {
let path = test_dir("knn_filter");
let schema = Mapping::builder()
.field("tag", FieldType::Keyword)
.field("embedding", FieldType::dense_vector(2))
.build();
let index = Index::create_with_mapping(&path, schema).unwrap();
index
.bulk(vec![
json!({"tag": "tech", "embedding": [1.0, 0.1]}),
json!({"tag": "tech", "embedding": [0.9, 0.2]}),
json!({"tag": "animal", "embedding": [0.1, 1.0]}),
json!({"tag": "animal", "embedding": [0.2, 0.9]}),
json!({"tag": "tech", "embedding": [0.8, 0.3]}),
])
.unwrap();
let expr = parse_search(
json!({
"knn": {
"field": "embedding",
"query_vector": [1.0, 0.0],
"k": 5,
"num_candidates": 10
}
}),
5,
)
.unwrap();
let results = index.search(&expr).unwrap();
assert_eq!(results.total_hits().value, 5);
let top3_ids: Vec<u32> = results
.iter()
.take(3)
.map(|h| h.doc_id().as_u32())
.collect();
for id in &top3_ids {
assert!(
*id == 0 || *id == 1 || *id == 4,
"top 3 nearest to (1,0) should be tech docs (0,1,4), got doc {id}"
);
}
cleanup(&path);
}