use luci::analysis::config::AnalysisConfig;
use luci::index::Index;
use luci::mapping::Mapping;
use serde_json::json;
fn search(
index: &mut Index,
query: serde_json::Value,
size: usize,
) -> luci::search::results::SearchResults {
let expr = luci::search::expression::parse_search(query, size).unwrap();
index.search(&expr).unwrap()
}
fn temp_path(name: &str) -> std::path::PathBuf {
let dir = std::env::temp_dir().join("luci_analysis_tests");
std::fs::create_dir_all(&dir).ok();
dir.join(name)
}
fn cleanup(path: &std::path::Path) {
let _ = std::fs::remove_file(path);
}
#[test]
fn custom_analyzer_basic() {
let path = temp_path("custom_analyzer_basic.luci");
cleanup(&path);
let analysis = json!({
"analyzer": {
"my_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": ["lowercase", "asciifolding"]
}
}
});
let config = AnalysisConfig::from_json(&analysis).unwrap();
let mapping = Mapping::from_json(&json!({
"properties": {
"title": {"type": "text", "analyzer": "my_analyzer"}
}
}))
.unwrap();
let mut index = Index::create_with_settings(&path, mapping, Some(config)).unwrap();
index.add(json!({"title": "Café résumé"})).unwrap();
let results = search(
&mut index,
json!({"query": {"match": {"title": "cafe"}}}),
10,
);
assert_eq!(
results.len(),
1,
"asciifolding should match 'cafe' to 'café'"
);
let results = search(
&mut index,
json!({"query": {"match": {"title": "resume"}}}),
10,
);
assert_eq!(
results.len(),
1,
"asciifolding should match 'resume' to 'résumé'"
);
cleanup(&path);
}
#[test]
fn custom_analyzer_persisted_on_reopen() {
let path = temp_path("custom_analyzer_persist.luci");
cleanup(&path);
let analysis = json!({
"analyzer": {
"folding": {
"tokenizer": "standard",
"filter": ["lowercase", "asciifolding"]
}
}
});
let config = AnalysisConfig::from_json(&analysis).unwrap();
let mapping = Mapping::from_json(&json!({
"properties": {
"title": {"type": "text", "analyzer": "folding"}
}
}))
.unwrap();
{
let index = Index::create_with_settings(&path, mapping, Some(config)).unwrap();
index.add(json!({"title": "Über straße"})).unwrap();
}
{
let mut index = Index::open(&path).unwrap();
let results = search(
&mut index,
json!({"query": {"match": {"title": "uber"}}}),
10,
);
assert_eq!(results.len(), 1, "custom analyzer should survive reopen");
let results = search(
&mut index,
json!({"query": {"match": {"title": "strasse"}}}),
10,
);
assert_eq!(
results.len(),
1,
"asciifolding should fold ß → ss on reopen"
);
}
cleanup(&path);
}
#[test]
fn edge_ngram_autocomplete() {
let path = temp_path("edge_ngram_autocomplete.luci");
cleanup(&path);
let analysis = json!({
"tokenizer": {
"autocomplete_tok": {
"type": "edge_ngram",
"min_gram": 2,
"max_gram": 10,
"token_chars": ["letter", "digit"]
}
},
"analyzer": {
"autocomplete": {
"tokenizer": "autocomplete_tok",
"filter": ["lowercase"]
},
"autocomplete_search": {
"tokenizer": "standard",
"filter": ["lowercase"]
}
}
});
let config = AnalysisConfig::from_json(&analysis).unwrap();
let mapping = Mapping::from_json(&json!({
"properties": {
"name": {
"type": "text",
"analyzer": "autocomplete",
"search_analyzer": "autocomplete_search"
}
}
}))
.unwrap();
let mut index = Index::create_with_settings(&path, mapping, Some(config)).unwrap();
index.add(json!({"name": "Elasticsearch"})).unwrap();
index.add(json!({"name": "Elastic Cloud"})).unwrap();
index.add(json!({"name": "Python"})).unwrap();
let results = search(
&mut index,
json!({"query": {"match": {"name": "elast"}}}),
10,
);
assert_eq!(
results.len(),
2,
"prefix 'elast' should match both Elastic* docs"
);
let results = search(&mut index, json!({"query": {"match": {"name": "py"}}}), 10);
assert_eq!(results.len(), 1, "prefix 'py' should match Python");
let results = search(&mut index, json!({"query": {"match": {"name": "xyz"}}}), 10);
assert_eq!(results.len(), 0, "'xyz' should match nothing");
cleanup(&path);
}
#[test]
fn synonym_expansion() {
let path = temp_path("synonym_expansion.luci");
cleanup(&path);
let analysis = json!({
"filter": {
"my_synonyms": {
"type": "synonym",
"synonyms": [
"quick, fast, speedy",
"big => large"
]
}
},
"analyzer": {
"syn_analyzer": {
"tokenizer": "standard",
"filter": ["lowercase", "my_synonyms"]
}
}
});
let config = AnalysisConfig::from_json(&analysis).unwrap();
let mapping = Mapping::from_json(&json!({
"properties": {
"description": {"type": "text", "analyzer": "syn_analyzer"}
}
}))
.unwrap();
let mut index = Index::create_with_settings(&path, mapping, Some(config)).unwrap();
index
.add(json!({"description": "The quick brown fox"}))
.unwrap();
index.add(json!({"description": "A big red car"})).unwrap();
let results = search(
&mut index,
json!({"query": {"match": {"description": "fast"}}}),
10,
);
assert_eq!(
results.len(),
1,
"'fast' should match via synonym of 'quick'"
);
let results = search(
&mut index,
json!({"query": {"match": {"description": "large"}}}),
10,
);
assert_eq!(
results.len(),
1,
"'large' should match via synonym mapping from 'big'"
);
cleanup(&path);
}
#[test]
fn html_strip_char_filter() {
let path = temp_path("html_strip.luci");
cleanup(&path);
let analysis = json!({
"analyzer": {
"html_analyzer": {
"char_filter": ["html_strip"],
"tokenizer": "standard",
"filter": ["lowercase"]
}
}
});
let config = AnalysisConfig::from_json(&analysis).unwrap();
let mapping = Mapping::from_json(&json!({
"properties": {
"body": {"type": "text", "analyzer": "html_analyzer"}
}
}))
.unwrap();
let mut index = Index::create_with_settings(&path, mapping, Some(config)).unwrap();
index
.add(json!({"body": "<p>Hello <b>World</b> & friends</p>"}))
.unwrap();
let results = search(
&mut index,
json!({"query": {"match": {"body": "hello world"}}}),
10,
);
assert_eq!(
results.len(),
1,
"HTML tags should be stripped before indexing"
);
cleanup(&path);
}
#[test]
fn search_analyzer_divergence() {
let path = temp_path("search_analyzer_div.luci");
cleanup(&path);
let analysis = json!({
"analyzer": {
"index_analyzer": {
"tokenizer": "standard",
"filter": ["lowercase"]
},
"search_analyzer": {
"tokenizer": "standard",
"filter": ["lowercase"]
}
}
});
let config = AnalysisConfig::from_json(&analysis).unwrap();
let mapping = Mapping::from_json(&json!({
"properties": {
"title": {
"type": "text",
"analyzer": "index_analyzer",
"search_analyzer": "search_analyzer"
}
}
}))
.unwrap();
let mut index = Index::create_with_settings(&path, mapping, Some(config)).unwrap();
index.add(json!({"title": "hello world"})).unwrap();
let results = search(
&mut index,
json!({"query": {"match": {"title": "hello"}}}),
10,
);
assert_eq!(results.len(), 1);
cleanup(&path);
}
#[test]
fn match_bool_prefix_uses_field_analyzer() {
let path = temp_path("match_bool_prefix_analyzer.luci");
cleanup(&path);
let analysis = json!({
"analyzer": {
"folding": {
"tokenizer": "standard",
"filter": ["lowercase", "asciifolding"]
}
}
});
let config = AnalysisConfig::from_json(&analysis).unwrap();
let mapping = Mapping::from_json(&json!({
"properties": {
"title": {"type": "text", "analyzer": "folding"}
}
}))
.unwrap();
let mut index = Index::create_with_settings(&path, mapping, Some(config)).unwrap();
index.add(json!({"title": "Résumé Writer"})).unwrap();
let results = search(
&mut index,
json!({"query": {"match_bool_prefix": {"title": "résumé"}}}),
10,
);
assert_eq!(
results.len(),
1,
"match_bool_prefix must use field's folding analyzer to match accented input against folded index"
);
cleanup(&path);
}
#[test]
fn no_settings_backward_compatible() {
let path = temp_path("no_settings.luci");
cleanup(&path);
let mapping = Mapping::from_json(&json!({
"properties": {
"title": {"type": "text"}
}
}))
.unwrap();
let mut index = Index::create_with_mapping(&path, mapping).unwrap();
index.add(json!({"title": "Hello World"})).unwrap();
let results = search(
&mut index,
json!({"query": {"match": {"title": "hello"}}}),
10,
);
assert_eq!(results.len(), 1);
cleanup(&path);
}