use luci::index::Index;
use luci::mapping::{FieldType, Mapping};
use luci::query::ast::ScoringExpression;
use luci::query::parser::parse_query;
use luci::search::expression::parse_search;
use serde_json::json;
fn test_dir(name: &str) -> std::path::PathBuf {
let dir =
std::env::temp_dir().join(format!("luci_m2_integration_{}_{name}", std::process::id()));
let _ = std::fs::remove_dir_all(&dir);
dir
}
fn cleanup(path: &std::path::Path) {
let _ = std::fs::remove_dir_all(path);
}
fn build_test_index(name: &str) -> (std::path::PathBuf, Index) {
let path = test_dir(name);
let schema = Mapping::builder()
.field("title", FieldType::Text)
.field("body", FieldType::Text)
.field("tag", FieldType::Keyword)
.field("status", FieldType::Keyword)
.build();
let index = Index::create_with_mapping(&path, schema).unwrap();
let docs = vec![
json!({"title": "The Quick Brown Fox", "body": "A quick brown fox jumps over the lazy dog", "tag": "animal", "status": "published"}),
json!({"title": "Search Engine Design", "body": "Building a search engine requires inverted indexes and scoring", "tag": "tech", "status": "published"}),
json!({"title": "Quick Start Guide", "body": "This guide helps you get started quickly with the search engine", "tag": "tech", "status": "draft"}),
json!({"title": "The Lazy Dog", "body": "The lazy dog sleeps all day long", "tag": "animal", "status": "published"}),
json!({"title": "Advanced Search Techniques", "body": "Advanced techniques for search engine optimization and scoring", "tag": "tech", "status": "published"}),
json!({"title": "Brown Bear", "body": "The brown bear lives in the forest", "tag": "animal", "status": "draft"}),
json!({"title": "Fox and Friends", "body": "The fox is a clever animal that lives in many habitats", "tag": "animal", "status": "published"}),
json!({"title": "Database Internals", "body": "Understanding database storage engines and indexing strategies", "tag": "tech", "status": "published"}),
];
index.bulk(docs).unwrap();
(path, index)
}
#[test]
fn exit_criterion_1_json_query_round_trip() {
let (path, index) = build_test_index("ec1");
let expr = parse_search(json!({"term": {"tag": "animal"}}), 10).unwrap();
let results = index.search(&expr).unwrap();
assert_eq!(results.total_hits().value, 4, "4 docs have tag=animal");
let expr = parse_search(json!({"match": {"body": "search engine"}}), 10).unwrap();
let results = index.search(&expr).unwrap();
assert!(
results.total_hits().value >= 3,
"multiple docs mention search or engine"
);
let expr = parse_search(
json!({
"bool": {
"must": [{"match": {"body": "search"}}],
"filter": [{"term": {"status": "published"}}]
}
}),
10,
)
.unwrap();
let results = index.search(&expr).unwrap();
for hit in results.iter() {
let source = hit.source().unwrap();
assert_eq!(source["status"], "published");
}
let expr = parse_search(json!({"exists": {"field": "title"}}), 10).unwrap();
let results = index.search(&expr).unwrap();
assert_eq!(results.total_hits().value, 8, "all 8 docs have title");
let expr = parse_search(json!({"prefix": {"tag": "tech"}}), 10).unwrap();
let results = index.search(&expr).unwrap();
assert_eq!(
results.total_hits().value,
4,
"4 docs have tag starting with 'tech'"
);
let expr = parse_search(
json!({
"constant_score": {
"filter": {"term": {"tag": "animal"}},
"boost": 2.5
}
}),
10,
)
.unwrap();
let results = index.search(&expr).unwrap();
assert_eq!(results.total_hits().value, 4);
for hit in results.iter() {
assert_eq!(hit.score(), 2.5, "constant score should be 2.5");
}
cleanup(&path);
}
#[test]
fn exit_criterion_2_bool_query_correctness() {
let (path, index) = build_test_index("ec2");
let expr = parse_search(
json!({
"bool": {
"must": [
{"term": {"tag": "tech"}},
{"term": {"status": "published"}}
]
}
}),
10,
)
.unwrap();
let results = index.search(&expr).unwrap();
for hit in results.iter() {
let s = hit.source().unwrap();
assert_eq!(s["tag"], "tech");
assert_eq!(s["status"], "published");
}
let expr = parse_search(
json!({
"bool": {
"should": [
{"term": {"tag": "animal"}},
{"term": {"tag": "tech"}}
]
}
}),
10,
)
.unwrap();
let results = index.search(&expr).unwrap();
assert_eq!(
results.total_hits().value,
8,
"all docs are either animal or tech"
);
let expr = parse_search(
json!({
"bool": {
"must": [{"term": {"tag": "animal"}}],
"must_not": [{"term": {"status": "draft"}}]
}
}),
10,
)
.unwrap();
let results = index.search(&expr).unwrap();
assert_eq!(results.total_hits().value, 3);
for hit in results.iter() {
let s = hit.source().unwrap();
assert_ne!(s["status"], "draft");
}
let expr = parse_search(
json!({
"bool": {
"filter": [{"term": {"tag": "tech"}}]
}
}),
10,
)
.unwrap();
let results = index.search(&expr).unwrap();
assert_eq!(results.total_hits().value, 4);
for hit in results.iter() {
assert_eq!(
hit.score(),
1.0,
"filter context should give constant score"
);
}
cleanup(&path);
}
#[test]
fn exit_criterion_3_phrase_query() {
let (path, index) = build_test_index("ec3");
let expr = parse_search(json!({"match_phrase": {"body": "search engine"}}), 10).unwrap();
let results = index.search(&expr).unwrap();
assert!(
results.total_hits().value >= 2,
"at least 2 docs have 'search engine' phrase"
);
let expr = parse_search(json!({"match_phrase": {"body": "engine search"}}), 10).unwrap();
let results = index.search(&expr).unwrap();
assert_eq!(
results.total_hits().value,
0,
"reversed phrase should not match"
);
let expr = parse_search(json!({"match_phrase": {"body": "quick brown fox"}}), 10).unwrap();
let results = index.search(&expr).unwrap();
assert!(results.total_hits().value >= 1);
let source = results.hit(0).unwrap().source().unwrap();
assert!(source["body"].as_str().unwrap().contains("quick brown fox"));
let expr = parse_search(json!({"match_phrase": {"body": "lazy dog"}}), 10).unwrap();
let results = index.search(&expr).unwrap();
assert!(results.total_hits().value >= 1);
cleanup(&path);
}
#[test]
fn exit_criterion_4_cost_based_ordering() {
let (path, index) = build_test_index("ec4");
let expr = parse_search(
json!({
"bool": {
"must": [
{"term": {"status": "published"}},
{"match": {"title": "database"}}
]
}
}),
10,
)
.unwrap();
let results = index.search(&expr).unwrap();
assert_eq!(
results.total_hits().value,
1,
"only 1 doc matches both clauses"
);
let source = results.hit(0).unwrap().source().unwrap();
assert!(
source["title"]
.as_str()
.unwrap()
.to_lowercase()
.contains("database")
);
assert_eq!(source["status"], "published");
cleanup(&path);
}
#[test]
fn json_parse_to_search_e2e() {
let (path, _index) = build_test_index("e2e_json");
let json_val = json!({
"query": {
"bool": {
"must": [{"match": {"body": "search"}}],
"filter": [{"term": {"status": "published"}}]
}
}
});
let ast = parse_query(&json_val).unwrap();
if let ScoringExpression::Bool { must, filter, .. } = &ast {
assert_eq!(must.len(), 1);
assert_eq!(filter.len(), 1);
assert!(matches!(&must[0], ScoringExpression::Match { .. }));
assert!(matches!(&filter[0], ScoringExpression::Term { .. }));
} else {
panic!("expected Bool query");
}
cleanup(&path);
}
#[test]
fn must_not_with_pure_should() {
let path = test_dir("must_not_should");
let schema = Mapping::builder()
.field("title", FieldType::Text)
.field("status", FieldType::Keyword)
.build();
let index = Index::create_with_mapping(&path, schema).unwrap();
index
.bulk(vec![
json!({"title": "hello world", "status": "active"}),
json!({"title": "hello there", "status": "deleted"}),
json!({"title": "goodbye world", "status": "active"}),
])
.unwrap();
let expr = parse_search(
json!({
"query": {"bool": {
"should": [{"match": {"title": "hello"}}],
"must_not": [{"term": {"status": "deleted"}}]
}}
}),
10,
)
.unwrap();
let results = index.search(&expr).unwrap();
assert_eq!(
results.len(),
1,
"should match 'hello world' (active) only, got {}",
results.len()
);
cleanup(&path);
}
#[test]
fn minimum_should_match() {
let path = test_dir("min_should");
let schema = Mapping::builder().field("title", FieldType::Text).build();
let index = Index::create_with_mapping(&path, schema).unwrap();
index
.bulk(vec![
json!({"title": "apple banana cherry"}), json!({"title": "apple banana"}), json!({"title": "apple"}), json!({"title": "banana cherry"}), json!({"title": "other stuff"}), ])
.unwrap();
let expr = parse_search(
json!({
"query": {"bool": {
"should": [
{"match": {"title": "apple"}},
{"match": {"title": "banana"}},
{"match": {"title": "cherry"}},
],
"minimum_should_match": 2
}}
}),
10,
)
.unwrap();
let results = index.search(&expr).unwrap();
assert_eq!(
results.len(),
3,
"expected 3 docs with >= 2 should matches, got {}",
results.len()
);
let expr_no_min = parse_search(
json!({
"query": {"bool": {
"should": [
{"match": {"title": "apple"}},
{"match": {"title": "banana"}},
{"match": {"title": "cherry"}},
]
}}
}),
10,
)
.unwrap();
let results_no_min = index.search(&expr_no_min).unwrap();
assert_eq!(
results_no_min.len(),
4,
"without min_should_match, expected 4 docs"
);
let expr_all = parse_search(
json!({
"query": {"bool": {
"should": [
{"match": {"title": "apple"}},
{"match": {"title": "banana"}},
{"match": {"title": "cherry"}},
],
"minimum_should_match": 3
}}
}),
10,
)
.unwrap();
let results_all = index.search(&expr_all).unwrap();
assert_eq!(
results_all.len(),
1,
"min=3 of 3 should only match doc with all 3 terms"
);
let expr_over = parse_search(
json!({
"query": {"bool": {
"should": [
{"match": {"title": "apple"}},
{"match": {"title": "banana"}},
],
"minimum_should_match": 5
}}
}),
10,
)
.unwrap();
let results_over = index.search(&expr_over).unwrap();
assert_eq!(
results_over.len(),
0,
"min > num_should should match nothing"
);
cleanup(&path);
}