use luci::index::Index;
use luci::mapping::{FieldType, Mapping};
use luci::search::expression::parse_search;
use luci::search::highlight::{Highlight, HighlightConfig, HighlightFieldConfig, HighlightOrder};
use serde_json::json;
fn test_dir(name: &str) -> std::path::PathBuf {
let dir = std::env::temp_dir().join(format!("luci_highlight_{}_{name}", std::process::id()));
let _ = std::fs::remove_dir_all(&dir);
dir
}
fn cleanup(path: &std::path::Path) {
let _ = std::fs::remove_dir_all(path);
}
fn setup_index(name: &str) -> (Index, std::path::PathBuf) {
let path = test_dir(name);
let schema = Mapping::builder()
.field("title", FieldType::Text)
.field("body", FieldType::Text)
.build();
let index = Index::create_with_mapping(&path, schema).unwrap();
index
.bulk(vec![
json!({
"title": "Search Engine Architecture",
"body": "A search engine indexes documents for fast retrieval."
}),
json!({
"title": "Database Design",
"body": "Databases store data in tables. Search is done via SQL queries."
}),
json!({
"title": "Information Retrieval",
"body": "Modern search engines use inverted indexes and BM25 scoring."
}),
])
.unwrap();
(index, path)
}
fn spans_for(index: &Index, query: serde_json::Value, field: &str) -> Option<Vec<Highlight>> {
let expr = parse_search(query, 10).unwrap();
let results = index.search(&expr).unwrap();
let hit = results.hit(0)?;
hit.highlight(field)
}
#[test]
fn basic_match_returns_spans() {
let (index, path) = setup_index("basic");
let spans = spans_for(
&index,
json!({"query": {"match": {"body": "search"}}}),
"body",
)
.expect("expected spans");
assert!(!spans.is_empty());
for hl in &spans {
assert!(
hl.text.eq_ignore_ascii_case("search"),
"unexpected match text: {:?}",
hl.text
);
assert!(hl.end > hl.start);
}
cleanup(&path);
}
#[test]
fn multi_term_highlights_each_term_independently() {
let (index, path) = setup_index("multi_terms");
let spans = spans_for(
&index,
json!({"query": {"match": {"body": "search engine"}}}),
"body",
)
.unwrap();
let texts: Vec<&str> = spans.iter().map(|h| h.text.as_str()).collect();
assert!(
texts.iter().any(|t| t.eq_ignore_ascii_case("search")),
"expected a span for 'search', got {texts:?}"
);
assert!(
texts.iter().any(|t| t.eq_ignore_ascii_case("engine")),
"expected a span for 'engine', got {texts:?}"
);
cleanup(&path);
}
#[test]
fn spans_are_in_positional_order() {
let (index, path) = setup_index("ordered");
let spans = spans_for(
&index,
json!({"query": {"match": {"body": "search engine"}}}),
"body",
)
.unwrap();
let starts: Vec<usize> = spans.iter().map(|h| h.start).collect();
let mut sorted = starts.clone();
sorted.sort();
assert_eq!(starts, sorted, "spans must be positionally ordered");
cleanup(&path);
}
#[test]
fn span_offsets_locate_the_match_in_field_text() {
let (index, path) = setup_index("offsets");
let spans = spans_for(
&index,
json!({"query": {"match": {"body": "search"}}}),
"body",
)
.unwrap();
let body = "A search engine indexes documents for fast retrieval.";
for hl in &spans {
assert_eq!(&body[hl.start..hl.end], hl.text);
}
cleanup(&path);
}
#[test]
fn phrase_query_highlights_constituent_terms() {
let (index, path) = setup_index("phrase");
let spans = spans_for(
&index,
json!({"query": {"match_phrase": {"body": "search engine"}}}),
"body",
)
.unwrap();
let texts: Vec<String> = spans.iter().map(|h| h.text.to_lowercase()).collect();
assert!(texts.iter().any(|t| t == "search"));
assert!(texts.iter().any(|t| t == "engine"));
cleanup(&path);
}
#[test]
fn term_query_highlights_exact_value() {
let (index, path) = setup_index("term");
let spans = spans_for(
&index,
json!({"query": {"term": {"body": "search"}}}),
"body",
)
.unwrap();
assert!(spans.iter().all(|h| h.text.eq_ignore_ascii_case("search")));
cleanup(&path);
}
#[test]
fn require_field_match_restricts_to_query_fields() {
let (index, path) = setup_index("rfm_true");
let spans = spans_for(
&index,
json!({"query": {"match": {"body": "search"}}}),
"title",
);
assert!(spans.map_or(true, |s| s.is_empty()));
cleanup(&path);
}
#[test]
fn require_field_match_false_highlights_any_field() {
let (index, path) = setup_index("rfm_false");
let expr = parse_search(json!({"query": {"match": {"body": "search"}}}), 10).unwrap();
let results = index.search(&expr).unwrap();
let hit = results.hit(0).unwrap();
let config = HighlightConfig {
fields: vec![HighlightFieldConfig {
field: "title".to_string(),
fragment_size: 0,
number_of_fragments: 0,
}],
require_field_match: false,
order: HighlightOrder::None,
};
let by_field = hit.highlight_with_config(&config).unwrap();
let title_spans = by_field.get("title").expect("title should have spans");
assert!(
title_spans
.iter()
.any(|h| h.text.to_lowercase() == "search")
);
cleanup(&path);
}
#[test]
fn bool_query_highlights_each_clauses_field() {
let (index, path) = setup_index("bool");
let expr = parse_search(
json!({
"query": {
"bool": {
"must": [
{"match": {"body": "search"}},
{"match": {"title": "architecture"}}
]
}
}
}),
10,
)
.unwrap();
let results = index.search(&expr).unwrap();
let hit = results.hit(0).unwrap();
let body_spans = hit.highlight("body").unwrap_or_default();
let title_spans = hit.highlight("title").unwrap_or_default();
assert!(body_spans.iter().any(|h| h.text.to_lowercase() == "search"));
assert!(
title_spans
.iter()
.any(|h| h.text.to_lowercase() == "architecture")
);
cleanup(&path);
}
#[test]
fn preserves_case_of_matched_text() {
let (index, path) = setup_index("case");
let spans = spans_for(
&index,
json!({"query": {"match": {"title": "search"}}}),
"title",
)
.unwrap();
assert!(
spans
.iter()
.any(|h| h.text.contains('S') || h.text.contains('s'))
);
let title = "Search Engine Architecture";
for hl in &spans {
assert_eq!(&title[hl.start..hl.end], hl.text);
}
cleanup(&path);
}
#[test]
fn missing_field_returns_none() {
let (index, path) = setup_index("missing");
let spans = spans_for(
&index,
json!({"query": {"match": {"body": "search"}}}),
"nonexistent_field",
);
assert!(spans.is_none());
cleanup(&path);
}
#[test]
fn no_matches_returns_none_or_empty() {
let (index, path) = setup_index("no_matches");
let spans = spans_for(
&index,
json!({"query": {"match": {"title": "search"}}}),
"body",
);
assert!(spans.as_ref().map_or(true, |s| s.is_empty()));
cleanup(&path);
}
#[test]
fn number_of_fragments_caps_span_count() {
let (index, path) = setup_index("cap");
let expr = parse_search(json!({"query": {"match": {"body": "search"}}}), 10).unwrap();
let results = index.search(&expr).unwrap();
let hit = results.hit(0).unwrap();
let config = HighlightConfig {
fields: vec![HighlightFieldConfig {
field: "body".to_string(),
fragment_size: 50,
number_of_fragments: 1,
}],
require_field_match: true,
order: HighlightOrder::None,
};
let spans = hit
.highlight_with_config(&config)
.and_then(|m| m.get("body").cloned())
.unwrap_or_default();
assert!(!spans.is_empty());
cleanup(&path);
}