use std::path::Path;
use crate::analysis::AnalyzerRegistry;
use crate::analysis::config::AnalysisConfig;
use crate::core::{LuciError, Result};
use crate::mapping::Mapping;
use crate::query::ast::ScoringExpression;
use crate::query::parser::opt_str;
use crate::search::{MissingValue, SortField, SortFieldType, SortOrder, SortValue};
use crate::storage::SingleFileDirectory;
use crate::search::expression::SearchExpression;
use crate::query::Query as _;
use crate::reader::IndexReader;
use crate::search::searcher::Searcher;
use crate::writer::IndexWriter;
struct ParsedUserMetadata {
mapping: Mapping,
deletions: crate::deletion::DeletionMap,
}
fn parse_user_metadata(meta: &[u8]) -> Result<ParsedUserMetadata> {
if meta.is_empty() {
return Ok(ParsedUserMetadata {
mapping: Mapping::builder().build(),
deletions: crate::deletion::DeletionMap::new(),
});
}
if meta.first() == Some(&b'{') {
let json: serde_json::Value = serde_json::from_slice(meta)
.map_err(|e| LuciError::IndexCorrupted(format!("invalid mapping metadata: {e}")))?;
return Ok(ParsedUserMetadata {
mapping: Mapping::from_json(&json)?,
deletions: crate::deletion::DeletionMap::new(),
});
}
if meta.len() < 4 {
return Ok(ParsedUserMetadata {
mapping: Mapping::builder().build(),
deletions: crate::deletion::DeletionMap::new(),
});
}
let mapping_len = u32::from_le_bytes(meta[0..4].try_into().unwrap()) as usize;
let mapping_bytes = &meta[4..4 + mapping_len];
let json: serde_json::Value = serde_json::from_slice(mapping_bytes)
.map_err(|e| LuciError::IndexCorrupted(format!("invalid mapping metadata: {e}")))?;
let mapping = Mapping::from_json(&json)?;
let after_mapping = 4 + mapping_len;
let deletions = if after_mapping >= meta.len() {
crate::deletion::DeletionMap::new()
} else if meta.len() >= after_mapping + 4 {
let del_len =
u32::from_le_bytes(meta[after_mapping..after_mapping + 4].try_into().unwrap()) as usize;
let start = after_mapping + 4;
if start + del_len <= meta.len() {
crate::deletion::DeletionMap::from_bytes(&meta[start..start + del_len])?
} else {
crate::deletion::DeletionMap::from_bytes(&meta[after_mapping..])?
}
} else {
crate::deletion::DeletionMap::from_bytes(&meta[after_mapping..])?
};
Ok(ParsedUserMetadata { mapping, deletions })
}
pub fn parse_sort(
value: Option<&serde_json::Value>,
) -> crate::core::Result<Option<Vec<crate::search::SortField>>> {
let arr = match value {
None => return Ok(None),
Some(serde_json::Value::Array(a)) => a,
Some(serde_json::Value::String(s)) => {
return Ok(Some(vec![parse_sort_item(&serde_json::Value::String(
s.clone(),
))?]));
}
Some(obj @ serde_json::Value::Object(_)) => {
return Ok(Some(vec![parse_sort_item(obj)?]));
}
Some(other) => {
return Err(LuciError::InvalidQuery(format!(
"sort: must be a string, object, or array of those; got {other}"
)));
}
};
let items: std::result::Result<Vec<_>, _> = arr.iter().map(parse_sort_item).collect();
items.map(Some)
}
fn parse_sort_item(value: &serde_json::Value) -> crate::core::Result<crate::search::SortField> {
match value {
serde_json::Value::String(s) => {
let (field, default_order) = match s.as_str() {
"_score" => (SortFieldType::Score, SortOrder::Desc),
"_doc" => (SortFieldType::Doc, SortOrder::Asc),
name => (SortFieldType::Field(name.to_string()), SortOrder::Asc),
};
Ok(SortField {
field,
order: default_order,
missing: MissingValue::Last,
})
}
serde_json::Value::Object(obj) => {
let (name, spec) = obj.iter().next().ok_or_else(|| {
crate::core::LuciError::InvalidQuery("sort: entry must have a field name".into())
})?;
let (field, default_order) = match name.as_str() {
"_score" => (SortFieldType::Score, SortOrder::Desc),
"_doc" => (SortFieldType::Doc, SortOrder::Asc),
name => (SortFieldType::Field(name.to_string()), SortOrder::Asc),
};
let (order, missing) = match spec {
serde_json::Value::String(o) => {
let ord = match o.as_str() {
"asc" => SortOrder::Asc,
"desc" => SortOrder::Desc,
other => {
return Err(LuciError::InvalidQuery(format!(
"sort[{name}]: unknown order '{other}', expected 'asc' or 'desc'"
)));
}
};
(ord, MissingValue::Last)
}
serde_json::Value::Object(_) => {
let ctx = format!("sort[{name}]");
let opts = crate::search::expression::validate_obj_keys(
spec,
&["order", "missing"],
&ctx,
)?;
let ord = match opt_str(opts, "order", &ctx)? {
Some("asc") => SortOrder::Asc,
Some("desc") => SortOrder::Desc,
Some(other) => {
return Err(LuciError::InvalidQuery(format!(
"{ctx}: unknown order '{other}', expected 'asc' or 'desc'"
)));
}
None => default_order,
};
let miss = match opt_str(opts, "missing", &ctx)? {
Some("_first") => MissingValue::First,
Some("_last") => MissingValue::Last,
Some(other) => {
return Err(LuciError::InvalidQuery(format!(
"{ctx}: unknown missing '{other}', expected '_first' or '_last'"
)));
}
None => MissingValue::Last,
};
(ord, miss)
}
_ => {
return Err(LuciError::InvalidQuery(format!(
"sort[{name}]: spec must be \"asc\"/\"desc\" or an object, got {spec}"
)));
}
};
Ok(SortField {
field,
order,
missing,
})
}
_ => Err(LuciError::InvalidQuery(format!(
"sort: entry must be a field-name string or a {{field: spec}} object, got {value}"
))),
}
}
pub(crate) fn extract_inner_hit_specs(
ast: &ScoringExpression,
searcher: &crate::search::searcher::Searcher,
) -> crate::core::Result<Vec<crate::query::nested::InnerHitSpec>> {
let mut specs = Vec::new();
collect_inner_hit_specs(ast, searcher, &mut specs)?;
Ok(specs)
}
fn collect_inner_hit_specs(
ast: &ScoringExpression,
searcher: &crate::search::searcher::Searcher,
specs: &mut Vec<crate::query::nested::InnerHitSpec>,
) -> crate::core::Result<()> {
match ast {
ScoringExpression::Nested {
path,
query,
inner_hits: Some(config),
} => {
let weight = query.bind(searcher, crate::core::ScoreMode::Complete)?;
let name = config.name.clone().unwrap_or_else(|| path.clone());
specs.push(crate::query::nested::InnerHitSpec {
name,
path: path.clone(),
config: config.clone(),
weight,
});
}
ScoringExpression::Bool {
must,
should,
must_not,
filter,
..
} => {
for sub in must.iter().chain(should).chain(must_not).chain(filter) {
collect_inner_hit_specs(sub, searcher, specs)?;
}
}
ScoringExpression::Nested { query, .. } => {
collect_inner_hit_specs(query, searcher, specs)?;
}
_ => {}
}
Ok(())
}
pub fn parse_search_after(
value: Option<&serde_json::Value>,
) -> crate::core::Result<Option<Vec<crate::search::SortValue>>> {
let arr = match value {
None | Some(serde_json::Value::Null) => return Ok(None),
Some(serde_json::Value::Array(a)) => a,
Some(other) => {
return Err(LuciError::InvalidQuery(format!(
"search_after: must be an array of cursor values, got {other}"
)));
}
};
let values = arr
.iter()
.map(|v| match v {
serde_json::Value::Number(n) => {
if let Some(i) = n.as_i64() {
Ok(SortValue::I64(i))
} else if let Some(f) = n.as_f64() {
Ok(SortValue::F64(f))
} else {
Err(LuciError::InvalidQuery(format!(
"search_after: numeric cursor value out of range: {n}"
)))
}
}
serde_json::Value::String(s) => Ok(SortValue::Str(s.clone())),
serde_json::Value::Bool(b) => Ok(SortValue::Bool(*b)),
serde_json::Value::Null => Ok(SortValue::Null),
other => Err(LuciError::InvalidQuery(format!(
"search_after: cursor values must be a number, string, boolean, or null; \
got {other}"
))),
})
.collect::<crate::core::Result<Vec<_>>>()?;
Ok(Some(values))
}
pub fn parse_source_filter(value: Option<&serde_json::Value>) -> crate::search::SourceFilter {
use crate::search::SourceFilter;
match value {
None | Some(serde_json::Value::Bool(true)) => SourceFilter::Enabled,
Some(serde_json::Value::Bool(false)) => SourceFilter::Disabled,
Some(serde_json::Value::String(s)) => SourceFilter::Fields(vec![s.clone()]),
Some(serde_json::Value::Array(arr)) => {
let fields: Vec<String> = arr
.iter()
.filter_map(|v| v.as_str().map(String::from))
.collect();
SourceFilter::Fields(fields)
}
Some(serde_json::Value::Object(obj)) => {
let includes = obj
.get("includes")
.and_then(|v| v.as_array())
.map(|arr| {
arr.iter()
.filter_map(|v| v.as_str().map(String::from))
.collect()
})
.unwrap_or_default();
let excludes = obj
.get("excludes")
.and_then(|v| v.as_array())
.map(|arr| {
arr.iter()
.filter_map(|v| v.as_str().map(String::from))
.collect()
})
.unwrap_or_default();
SourceFilter::IncludeExclude { includes, excludes }
}
_ => SourceFilter::Enabled,
}
}
pub struct Index {
schema: Mapping,
analysis_config: Option<AnalysisConfig>,
writer: std::sync::Mutex<WriterState>,
reader: std::sync::RwLock<ReaderState>,
file_handle: std::sync::Arc<std::fs::File>,
txn_mutex: std::sync::Mutex<bool>,
txn_condvar: std::sync::Condvar,
}
struct WriterState {
writer: IndexWriter,
commit_generation: u64,
}
struct ReaderState {
segment_store: Option<std::sync::Arc<crate::search::segment_store::SegmentStore>>,
store_generation: u64,
}
impl Index {
pub fn create(path: impl AsRef<Path>) -> Result<Self> {
Self::create_with_mapping(path, Mapping::builder().build())
}
pub fn create_with_mapping(path: impl AsRef<Path>, mapping: Mapping) -> Result<Self> {
Self::create_with_settings(path, mapping, None)
}
pub fn create_with_settings(
path: impl AsRef<Path>,
mut mapping: Mapping,
analysis_config: Option<AnalysisConfig>,
) -> Result<Self> {
mapping.validate()?;
mapping.ensure_id_field();
let path = path.as_ref().to_path_buf();
let storage = SingleFileDirectory::create(&path)?;
let file_handle = storage.file_handle();
let writer_analyzers = match &analysis_config {
Some(config) => config
.build_registry()
.map_err(|e| LuciError::InvalidQuery(e))?,
None => AnalyzerRegistry::new(),
};
let mut writer = IndexWriter::new(storage, mapping.clone(), writer_analyzers);
if let Some(ref config) = analysis_config {
writer.set_analysis_json(Some(config.to_json()));
}
Ok(Self {
schema: mapping,
analysis_config,
writer: std::sync::Mutex::new(WriterState {
writer,
commit_generation: 0,
}),
reader: std::sync::RwLock::new(ReaderState {
segment_store: None,
store_generation: 0,
}),
file_handle,
txn_mutex: std::sync::Mutex::new(false),
txn_condvar: std::sync::Condvar::new(),
})
}
pub fn open(path: impl AsRef<Path>) -> Result<Self> {
let path = path.as_ref().to_path_buf();
let storage = SingleFileDirectory::open(&path)?;
let generation = storage.generation();
let meta = storage.user_metadata();
let parsed = parse_user_metadata(meta)?;
let mut mapping = parsed.mapping;
mapping.ensure_id_field();
let global_hnsw = crate::vector::global::GlobalHnsw::new(&mapping);
for field_id in storage.vector_index_fields() {
let Some(bytes) = storage.read_vector_index(field_id)? else {
continue;
};
global_hnsw.load_field(field_id, &bytes)?;
}
let analysis_config = Self::load_analysis_config_from_storage(&storage);
let writer_analyzers = match &analysis_config {
Some(config) => config
.build_registry()
.unwrap_or_else(|_| AnalyzerRegistry::new()),
None => AnalyzerRegistry::new(),
};
let file_handle = storage.file_handle();
let mut writer = IndexWriter::new(storage, mapping.clone(), writer_analyzers);
if let Some(ref config) = analysis_config {
writer.set_analysis_json(Some(config.to_json()));
}
writer.load_deletions(parsed.deletions);
writer.load_global_hnsw(global_hnsw);
Ok(Self {
schema: mapping,
analysis_config,
writer: std::sync::Mutex::new(WriterState {
writer,
commit_generation: generation,
}),
reader: std::sync::RwLock::new(ReaderState {
segment_store: None,
store_generation: 0,
}),
file_handle,
txn_mutex: std::sync::Mutex::new(false),
txn_condvar: std::sync::Condvar::new(),
})
}
fn load_analysis_config_from_storage(storage: &SingleFileDirectory) -> Option<AnalysisConfig> {
let meta = storage.user_metadata();
if meta.is_empty() {
return None;
}
let parsed = parse_user_metadata(meta).ok()?;
let json = parsed.mapping.to_json();
let analysis = json.get("settings")?.get("analysis")?;
AnalysisConfig::from_json(analysis).ok()
}
pub fn add(&self, doc: serde_json::Value) -> Result<()> {
self.wait_for_transaction();
let mut w = self.writer.lock().unwrap();
w.writer.add(doc)?;
self.commit_inner(&mut w)
}
pub fn bulk(&self, docs: Vec<serde_json::Value>) -> Result<serde_json::Value> {
self.wait_for_transaction();
let start = std::time::Instant::now();
let total = docs.len();
let mut w = self.writer.lock().unwrap();
for (i, doc) in docs.into_iter().enumerate() {
w.writer
.add(doc)
.map_err(|e| LuciError::InvalidQuery(format!("bulk item {i}: {e}")))?;
}
self.commit_inner(&mut w)?;
Ok(serde_json::json!({
"took": start.elapsed().as_millis() as u64,
"count": total
}))
}
pub fn get(&self, id: &str) -> Result<Option<serde_json::Value>> {
let expr = SearchExpression::from_json(serde_json::json!({"term": {"_id": id}}), 10)?;
let results = self.search(&expr)?;
Ok(results.hit(0).and_then(|h| h.source()))
}
pub fn delete(&self, id: &str) -> Result<bool> {
self.wait_for_transaction();
let expr = SearchExpression::from_json(serde_json::json!({"term": {"_id": id}}), 1)?;
let results = self.search(&expr)?;
if let Some(hit) = results.hit(0) {
let mut w = self.writer.lock().unwrap();
w.writer.mark_deleted(hit.segment_id(), hit.doc_id());
self.commit_inner(&mut w)?;
return Ok(true);
}
Ok(false)
}
pub fn update(&self, id: &str, partial_doc: serde_json::Value) -> Result<bool> {
self.wait_for_transaction();
let expr = SearchExpression::from_json(serde_json::json!({"term": {"_id": id}}), 10)?;
let results = self.search(&expr)?;
let (seg_id, doc_id, mut source) = match results.hit(0) {
Some(hit) => match hit.source() {
Some(s) => (hit.segment_id(), hit.doc_id(), s),
None => return Ok(false),
},
None => return Ok(false),
};
if let (Some(existing_obj), Some(partial_obj)) =
(source.as_object_mut(), partial_doc.as_object())
{
for (k, v) in partial_obj {
existing_obj.insert(k.clone(), v.clone());
}
}
if let Some(obj) = source.as_object_mut() {
obj.insert("_id".to_string(), serde_json::Value::String(id.to_string()));
}
let mut w = self.writer.lock().unwrap();
w.writer.mark_deleted(seg_id, doc_id);
w.writer.add(source)?;
self.commit_inner(&mut w)?;
Ok(true)
}
pub fn delete_by_query(&self, query: serde_json::Value) -> Result<u64> {
self.wait_for_transaction();
let expr = SearchExpression::from_json(
serde_json::json!({"query": query, "size": 10000, "_source": false}),
10000,
)?;
let results = self.search(&expr)?;
let count = results.len() as u64;
let mut w = self.writer.lock().unwrap();
for hit in results.iter() {
w.writer.mark_deleted(hit.segment_id(), hit.doc_id());
}
if count > 0 {
self.commit_inner(&mut w)?;
}
Ok(count)
}
pub fn count(&self, query: serde_json::Value) -> Result<u64> {
let expr = SearchExpression::from_json(
serde_json::json!({"query": query, "_source": false, "size": 100000}),
100000,
)?;
let results = self.search(&expr)?;
Ok(results.len() as u64)
}
fn commit_inner(&self, w: &mut WriterState) -> Result<()> {
w.writer.commit()?;
w.commit_generation += 1;
Ok(())
}
fn refresh_reader(&self) -> Result<()> {
let commit_gen = self.writer.lock().unwrap().commit_generation;
let store_gen = self.reader.read().unwrap().store_generation;
if store_gen == commit_gen && self.reader.read().unwrap().segment_store.is_some() {
return Ok(());
}
let storage = SingleFileDirectory::open_from_handle(self.file_handle.clone())?;
let reader = IndexReader::open(&storage)?;
let store_analyzers = match &self.analysis_config {
Some(config) => config
.build_registry()
.unwrap_or_else(|_| AnalyzerRegistry::new()),
None => AnalyzerRegistry::new(),
};
let global_hnsw = {
let g = crate::vector::global::GlobalHnsw::new(&self.schema);
for field_id in storage.vector_index_fields() {
if let Some(bytes) = storage.read_vector_index(field_id)? {
g.load_field(field_id, &bytes)?;
}
}
Some(std::sync::Arc::new(g))
};
let store = crate::search::segment_store::SegmentStore::new(
reader.into_segments(),
store_analyzers,
Some(self.schema.clone()),
global_hnsw,
);
let mut r = self.reader.write().unwrap();
r.segment_store = Some(std::sync::Arc::new(store));
r.store_generation = commit_gen;
Ok(())
}
pub fn force_merge(&self, max_segments: usize) -> Result<()> {
let mut w = self.writer.lock().unwrap();
w.writer.force_merge(max_segments)?;
w.commit_generation += 1;
Ok(())
}
pub fn search(
&self,
expr: &crate::search::expression::SearchExpression,
) -> Result<crate::search::results::SearchResults> {
use crate::query::ast::QueryExpression;
self.refresh_reader()?;
let r = self.reader.read().unwrap();
let store = r.segment_store.as_ref().unwrap();
let searcher = Searcher::new(store);
let match_all = QueryExpression::Scoring(ScoringExpression::MatchAll);
let query = expr.query.as_ref().unwrap_or(&match_all);
let mut results = searcher.execute_query(query, expr)?;
if let Some(ref rescore) = expr.rescore {
searcher.apply_rescore(
&mut results,
rescore.query.as_ref(),
rescore.window_size,
rescore.query_weight,
rescore.rescore_query_weight,
rescore.score_mode,
)?;
}
let store = r.segment_store.as_ref().unwrap().clone();
drop(r);
let w = self.writer.lock().unwrap();
let deletions = w.writer.deletions();
let pre_len = results.hits.len() as u64;
results
.hits
.retain(|hit| !deletions.is_deleted(hit.segment_id, hit.doc_id));
let removed = pre_len - results.hits.len() as u64;
drop(w);
if removed > 0 || results.total_hits.value > results.hits.len() as u64 {
results.total_hits =
crate::search::TotalHits::exact(results.total_hits.value.saturating_sub(removed));
}
results.total_hits =
crate::search::TotalHits::resolve(results.total_hits.value, expr.track_total_hits);
Ok(crate::search::results::SearchResults::new(
results.hits,
results.total_hits,
results.aggregations,
store,
expr.query.clone(),
))
}
pub fn set_memory_budget(&self, budget: usize) {
self.writer.lock().unwrap().writer.set_memory_budget(budget);
}
pub fn set_write_timeout(&self, timeout: std::time::Duration) {
self.writer
.lock()
.unwrap()
.writer
.set_write_timeout(timeout);
}
pub fn schema(&self) -> &Mapping {
&self.schema
}
pub fn buffered_doc_count(&self) -> u32 {
self.writer.lock().unwrap().writer.buffered_doc_count()
}
fn wait_for_transaction(&self) {
let guard = self.txn_mutex.lock().unwrap();
let _guard = self
.txn_condvar
.wait_while(guard, |active| *active)
.unwrap();
}
pub fn begin_transaction(&self) -> Result<()> {
let mut active = self.txn_mutex.lock().unwrap();
active = self.txn_condvar.wait_while(active, |a| *a).unwrap();
*active = true;
Ok(())
}
pub fn end_transaction(&self) {
let mut active = self.txn_mutex.lock().unwrap();
*active = false;
self.txn_condvar.notify_all();
}
pub fn is_transaction_active(&self) -> bool {
*self.txn_mutex.lock().unwrap()
}
pub fn txn_add(&self, doc: serde_json::Value) -> Result<()> {
let mut w = self.writer.lock().unwrap();
w.writer.add(doc)?;
Ok(())
}
pub fn txn_commit(&self) -> Result<()> {
let mut w = self.writer.lock().unwrap();
self.commit_inner(&mut w)
}
pub fn txn_rollback(&self) {
let mut w = self.writer.lock().unwrap();
w.writer.discard_buffer();
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::mapping::FieldType;
use serde_json::json;
fn test_dir(name: &str) -> std::path::PathBuf {
let dir =
std::env::temp_dir().join(format!("luci_index_facade_{}_{name}", std::process::id()));
let _ = std::fs::remove_dir_all(&dir);
dir
}
fn cleanup(path: &Path) {
let _ = std::fs::remove_dir_all(path);
}
fn test_schema() -> Mapping {
Mapping::builder()
.field("title", FieldType::Text)
.field("body", FieldType::Text)
.field("status", FieldType::Keyword)
.build()
}
#[test]
fn create_add_commit_search() {
let path = test_dir("basic");
let index = Index::create_with_mapping(&path, test_schema()).unwrap();
index
.add(json!({"title": "Hello World", "body": "A greeting", "status": "published"}))
.unwrap();
index
.add(json!({"title": "Search Engine", "body": "Building search", "status": "draft"}))
.unwrap();
let results = index
.search(&SearchExpression::from_json(json!({"match": {"title": "hello"}}), 10).unwrap())
.unwrap();
assert_eq!(results.total_hits().value, 1);
assert_eq!(
results.hit(0).unwrap().source().unwrap()["title"],
"Hello World"
);
cleanup(&path);
}
#[test]
fn search_with_query_wrapper() {
let path = test_dir("wrapper");
let index = Index::create_with_mapping(&path, test_schema()).unwrap();
index
.add(json!({"title": "Rust Programming", "status": "published"}))
.unwrap();
index
.add(json!({"title": "Go Programming", "status": "published"}))
.unwrap();
let results = index
.search(
&SearchExpression::from_json(json!({"query": {"match": {"title": "rust"}}}), 10)
.unwrap(),
)
.unwrap();
assert_eq!(results.total_hits().value, 1);
cleanup(&path);
}
#[test]
fn term_query_on_keyword() {
let path = test_dir("keyword");
let index = Index::create_with_mapping(&path, test_schema()).unwrap();
index
.add(json!({"title": "A", "status": "published"}))
.unwrap();
index.add(json!({"title": "B", "status": "draft"})).unwrap();
index
.add(json!({"title": "C", "status": "published"}))
.unwrap();
let results = index
.search(
&SearchExpression::from_json(json!({"term": {"status": "published"}}), 10).unwrap(),
)
.unwrap();
assert_eq!(results.total_hits().value, 2);
cleanup(&path);
}
#[test]
fn bool_query_end_to_end() {
let path = test_dir("bool");
let index = Index::create_with_mapping(&path, test_schema()).unwrap();
index
.add(json!({"title": "Search Engine Design", "status": "published"}))
.unwrap();
index
.add(json!({"title": "Search Tips", "status": "draft"}))
.unwrap();
index
.add(json!({"title": "Database Design", "status": "published"}))
.unwrap();
let results = index
.search(
&SearchExpression::from_json(
json!({
"bool": {
"must": [{"match": {"title": "search"}}],
"filter": [{"term": {"status": "published"}}]
}
}),
10,
)
.unwrap(),
)
.unwrap();
assert_eq!(results.total_hits().value, 1);
let title = results.hit(0).unwrap().source().unwrap()["title"]
.as_str()
.unwrap()
.to_string();
assert!(title.contains("Search Engine"));
cleanup(&path);
}
#[test]
fn phrase_query_end_to_end() {
let path = test_dir("phrase");
let index = Index::create_with_mapping(&path, test_schema()).unwrap();
index
.add(json!({"body": "the quick brown fox jumps"}))
.unwrap();
index.add(json!({"body": "brown quick fox"})).unwrap();
let results = index
.search(
&SearchExpression::from_json(
json!({"match_phrase": {"body": "quick brown fox"}}),
10,
)
.unwrap(),
)
.unwrap();
assert_eq!(results.total_hits().value, 1);
cleanup(&path);
}
#[test]
fn match_all_query() {
let path = test_dir("match_all");
let index = Index::create_with_mapping(&path, test_schema()).unwrap();
index.add(json!({"title": "A"})).unwrap();
index.add(json!({"title": "B"})).unwrap();
index.add(json!({"title": "C"})).unwrap();
let results = index
.search(&SearchExpression::from_json(json!({"match_all": {}}), 10).unwrap())
.unwrap();
assert_eq!(results.total_hits().value, 3);
cleanup(&path);
}
#[test]
fn open_existing_index() {
let path = test_dir("reopen");
{
let index = Index::create_with_mapping(&path, test_schema()).unwrap();
index
.add(json!({"title": "persistent doc", "status": "published"}))
.unwrap();
}
{
let index = Index::open(&path).unwrap();
let results = index
.search(
&SearchExpression::from_json(json!({"match": {"title": "persistent"}}), 10)
.unwrap(),
)
.unwrap();
assert_eq!(results.total_hits().value, 1);
}
cleanup(&path);
}
#[test]
fn multiple_commits_searchable() {
let path = test_dir("multi_commit");
let index = Index::create_with_mapping(&path, test_schema()).unwrap();
index.add(json!({"title": "first batch"})).unwrap();
index.add(json!({"title": "second batch"})).unwrap();
let results = index
.search(&SearchExpression::from_json(json!({"match": {"title": "batch"}}), 10).unwrap())
.unwrap();
assert_eq!(results.total_hits().value, 2);
cleanup(&path);
}
#[test]
fn empty_index_search() {
let path = test_dir("empty");
let index = Index::create_with_mapping(&path, test_schema()).unwrap();
let results = index
.search(&SearchExpression::from_json(json!({"match_all": {}}), 10).unwrap())
.unwrap();
assert_eq!(results.total_hits().value, 0);
cleanup(&path);
}
#[test]
fn search_no_results() {
let path = test_dir("no_results");
let index = Index::create_with_mapping(&path, test_schema()).unwrap();
index.add(json!({"title": "hello"})).unwrap();
let results = index
.search(
&SearchExpression::from_json(json!({"term": {"status": "nonexistent"}}), 10)
.unwrap(),
)
.unwrap();
assert_eq!(results.total_hits().value, 0);
cleanup(&path);
}
#[test]
fn constant_score_end_to_end() {
let path = test_dir("const_score");
let index = Index::create_with_mapping(&path, test_schema()).unwrap();
index
.add(json!({"title": "test", "status": "published"}))
.unwrap();
let results = index
.search(
&SearchExpression::from_json(
json!({
"constant_score": {
"filter": {"term": {"status": "published"}},
"boost": 3.14
}
}),
10,
)
.unwrap(),
)
.unwrap();
assert_eq!(results.total_hits().value, 1);
assert!((results.hit(0).unwrap().score() - 3.14).abs() < 0.01);
cleanup(&path);
}
#[test]
fn from_offset_pagination() {
let path = test_dir("from_offset");
let index = Index::create_with_mapping(&path, test_schema()).unwrap();
let docs: Vec<_> = (0..20)
.map(|i| json!({"title": format!("document {i}"), "status": "published"}))
.collect();
index.bulk(docs).unwrap();
let page1 = index
.search(
&SearchExpression::from_json(
json!({"query": {"match_all": {}}, "from": 0, "size": 5}),
10,
)
.unwrap(),
)
.unwrap();
assert_eq!(page1.len(), 5);
assert_eq!(page1.total_hits().value, 20);
let page2 = index
.search(
&SearchExpression::from_json(
json!({"query": {"match_all": {}}, "from": 5, "size": 5}),
10,
)
.unwrap(),
)
.unwrap();
assert_eq!(page2.len(), 5);
assert_eq!(page2.total_hits().value, 20);
let page1_ids: Vec<_> = page1.iter().map(|h| h.doc_id()).collect();
let page2_ids: Vec<_> = page2.iter().map(|h| h.doc_id()).collect();
for id in &page2_ids {
assert!(!page1_ids.contains(id), "page 2 should not overlap page 1");
}
let empty = index
.search(
&SearchExpression::from_json(
json!({"query": {"match_all": {}}, "from": 100, "size": 5}),
10,
)
.unwrap(),
)
.unwrap();
assert_eq!(empty.len(), 0);
assert_eq!(empty.total_hits().value, 20);
let default = index
.search(&SearchExpression::from_json(json!({"match_all": {}}), 5).unwrap())
.unwrap();
let explicit = index
.search(
&SearchExpression::from_json(
json!({"query": {"match_all": {}}, "from": 0, "size": 5}),
10,
)
.unwrap(),
)
.unwrap();
assert_eq!(default.len(), explicit.len());
cleanup(&path);
}
#[test]
fn source_filtering() {
let path = test_dir("source_filter");
let index = Index::create_with_mapping(&path, test_schema()).unwrap();
index
.add(json!({"title": "Hello World", "body": "Full text", "status": "published"}))
.unwrap();
index
.add(json!({"title": "Search Engine", "body": "More text", "status": "draft"}))
.unwrap();
let sf_disabled = crate::search::SourceFilter::Disabled;
let results = index
.search(
&SearchExpression::from_json(
json!({"query": {"match_all": {}}, "_source": false}),
10,
)
.unwrap(),
)
.unwrap();
assert_eq!(results.len(), 2);
assert!(
results
.hit(0)
.unwrap()
.source_filtered(&sf_disabled)
.is_none()
);
assert!(
results
.hit(1)
.unwrap()
.source_filtered(&sf_disabled)
.is_none()
);
let sf_title = crate::search::SourceFilter::Fields(vec!["title".to_string()]);
let results = index
.search(
&SearchExpression::from_json(
json!({"query": {"match_all": {}}, "_source": ["title"]}),
10,
)
.unwrap(),
)
.unwrap();
for hit in results.iter() {
let src = hit.source_filtered(&sf_title).unwrap();
assert!(src.get("title").is_some());
assert!(src.get("body").is_none());
assert!(src.get("status").is_none());
}
let sf_excl = crate::search::SourceFilter::IncludeExclude {
includes: vec![],
excludes: vec!["body".to_string()],
};
let results = index
.search(
&SearchExpression::from_json(
json!({
"query": {"match_all": {}},
"_source": {"excludes": ["body"]}
}),
10,
)
.unwrap(),
)
.unwrap();
for hit in results.iter() {
let src = hit.source_filtered(&sf_excl).unwrap();
assert!(src.get("title").is_some());
assert!(src.get("status").is_some());
assert!(src.get("body").is_none());
}
let sf_incl_excl = crate::search::SourceFilter::IncludeExclude {
includes: vec!["title".to_string(), "body".to_string()],
excludes: vec!["body".to_string()],
};
let results = index
.search(
&SearchExpression::from_json(
json!({
"query": {"match_all": {}},
"_source": {"includes": ["title", "body"], "excludes": ["body"]}
}),
10,
)
.unwrap(),
)
.unwrap();
for hit in results.iter() {
let src = hit.source_filtered(&sf_incl_excl).unwrap();
assert!(src.get("title").is_some());
assert!(src.get("body").is_none());
}
let results = index
.search(
&SearchExpression::from_json(
json!({"query": {"match_all": {}}, "_source": true}),
10,
)
.unwrap(),
)
.unwrap();
for hit in results.iter() {
let src = hit.source().unwrap();
assert!(src.get("title").is_some());
assert!(src.get("body").is_some());
assert!(src.get("status").is_some());
}
cleanup(&path);
}
#[test]
fn fields_retrieval() {
let path = test_dir("fields_retrieval");
let schema = Mapping::builder()
.field("title", FieldType::Text)
.field("tag", FieldType::Keyword)
.field("price", FieldType::Float)
.build();
let index = Index::create_with_mapping(&path, schema).unwrap();
index
.add(json!({"title": "Hello World", "tag": "tech", "price": 9.99}))
.unwrap();
index
.add(json!({"title": "Search Engine", "tag": "science", "price": 19.99}))
.unwrap();
let field_names: Vec<String> = vec!["tag".to_string(), "price".to_string()];
let results = index
.search(
&SearchExpression::from_json(
json!({
"query": {"match_all": {}},
"fields": ["tag", "price"],
"_source": false
}),
10,
)
.unwrap(),
)
.unwrap();
assert_eq!(results.len(), 2);
let sf_disabled = crate::search::SourceFilter::Disabled;
for hit in results.iter() {
assert!(hit.source_filtered(&sf_disabled).is_none());
let fields = hit.fields(&field_names);
assert!(fields.get("tag").is_some(), "tag field should be present");
assert!(
fields.get("price").is_some(),
"price field should be present"
);
let tag = fields.get("tag").unwrap();
assert!(tag.is_array(), "tag should be array");
}
let title_fields: Vec<String> = vec!["title".to_string()];
let results = index
.search(
&SearchExpression::from_json(
json!({
"query": {"match_all": {}},
"fields": ["title"],
"_source": false
}),
10,
)
.unwrap(),
)
.unwrap();
for hit in results.iter() {
let fields = hit.fields(&title_fields);
assert!(
fields.get("title").is_none(),
"text field should be omitted"
);
}
let tag_fields: Vec<String> = vec!["tag".to_string()];
let results = index
.search(
&SearchExpression::from_json(
json!({
"query": {"match_all": {}},
"fields": ["tag"],
"_source": ["title"]
}),
10,
)
.unwrap(),
)
.unwrap();
for hit in results.iter() {
assert!(hit.source().is_some());
assert!(!hit.fields(&tag_fields).is_empty());
}
cleanup(&path);
}
#[test]
fn sort_by_field() {
let path = test_dir("sort_by_field");
let schema = Mapping::builder()
.field("title", FieldType::Text)
.field("tag", FieldType::Keyword)
.field("price", FieldType::Float)
.build();
let index = Index::create_with_mapping(&path, schema).unwrap();
index
.add(json!({"title": "Expensive", "tag": "b", "price": 99.99}))
.unwrap();
index
.add(json!({"title": "Cheap", "tag": "a", "price": 1.99}))
.unwrap();
index
.add(json!({"title": "Mid", "tag": "c", "price": 49.99}))
.unwrap();
let results = index
.search(
&SearchExpression::from_json(
json!({
"query": {"match_all": {}},
"sort": ["price"]
}),
10,
)
.unwrap(),
)
.unwrap();
assert_eq!(results.len(), 3);
let prices: Vec<f64> = results
.iter()
.map(|h| h.sort_values().unwrap()[0].to_json().as_f64().unwrap())
.collect();
assert!(
prices[0] <= prices[1] && prices[1] <= prices[2],
"prices should be ascending: {:?}",
prices
);
let results = index
.search(
&SearchExpression::from_json(
json!({
"query": {"match_all": {}},
"sort": [{"price": "desc"}]
}),
10,
)
.unwrap(),
)
.unwrap();
let prices: Vec<f64> = results
.iter()
.map(|h| h.sort_values().unwrap()[0].to_json().as_f64().unwrap())
.collect();
assert!(
prices[0] >= prices[1] && prices[1] >= prices[2],
"prices should be descending: {:?}",
prices
);
let results = index
.search(
&SearchExpression::from_json(
json!({
"query": {"match_all": {}},
"sort": [{"tag": "asc"}]
}),
10,
)
.unwrap(),
)
.unwrap();
let tags: Vec<String> = results
.iter()
.map(|h| {
h.sort_values().unwrap()[0]
.to_json()
.as_str()
.unwrap()
.to_string()
})
.collect();
assert_eq!(tags, vec!["a", "b", "c"]);
let results = index
.search(
&SearchExpression::from_json(
json!({
"query": {"match_all": {}},
"sort": ["price"]
}),
10,
)
.unwrap(),
)
.unwrap();
for hit in results.iter() {
assert!(hit.sort_values().is_some(), "sort values should be present");
}
let results = index
.search(
&SearchExpression::from_json(
json!({
"query": {"match_all": {}},
"sort": ["_score"]
}),
10,
)
.unwrap(),
)
.unwrap();
assert_eq!(results.len(), 3);
cleanup(&path);
}
#[test]
fn search_after_pagination() {
let path = test_dir("search_after");
let schema = Mapping::builder()
.field("title", FieldType::Text)
.field("price", FieldType::Float)
.build();
let index = Index::create_with_mapping(&path, schema).unwrap();
let docs: Vec<_> = (0..20)
.map(|i| json!({"title": format!("item {i}"), "price": i as f64}))
.collect();
index.bulk(docs).unwrap();
let page1 = index
.search(
&SearchExpression::from_json(
json!({
"query": {"match_all": {}},
"sort": ["price"],
"size": 5
}),
10,
)
.unwrap(),
)
.unwrap();
assert_eq!(page1.len(), 5);
let last_hit = page1.hit(page1.len() - 1).unwrap();
let last_sort = last_hit.sort_values().unwrap();
let page2 = index
.search(
&SearchExpression::from_json(
json!({
"query": {"match_all": {}},
"sort": ["price"],
"size": 5,
"search_after": [last_sort[0].to_json()]
}),
10,
)
.unwrap(),
)
.unwrap();
assert_eq!(page2.len(), 5);
let p1_prices: Vec<f64> = page1
.iter()
.map(|h| h.sort_values().unwrap()[0].to_json().as_f64().unwrap())
.collect();
let p2_prices: Vec<f64> = page2
.iter()
.map(|h| h.sort_values().unwrap()[0].to_json().as_f64().unwrap())
.collect();
assert!(
p1_prices.last().unwrap() < p2_prices.first().unwrap(),
"page 2 should start after page 1: {:?} vs {:?}",
p1_prices,
p2_prices
);
let mut all_prices = Vec::new();
let mut cursor: Option<Vec<serde_json::Value>> = None;
loop {
let mut req = json!({
"query": {"match_all": {}},
"sort": ["price"],
"size": 7
});
if let Some(ref c) = cursor {
req["search_after"] = serde_json::json!(c);
}
let page = index
.search(&SearchExpression::from_json(req, 10).unwrap())
.unwrap();
if page.is_empty() {
break;
}
for hit in page.iter() {
all_prices.push(hit.sort_values().unwrap()[0].to_json().as_f64().unwrap());
}
let last = page.hit(page.len() - 1).unwrap();
cursor = Some(
last.sort_values()
.unwrap()
.iter()
.map(|sv| sv.to_json())
.collect(),
);
}
assert_eq!(all_prices.len(), 20, "should iterate all 20 docs");
for i in 1..all_prices.len() {
assert!(
all_prices[i] > all_prices[i - 1],
"should be strictly ascending: {} vs {}",
all_prices[i - 1],
all_prices[i]
);
}
let empty = index
.search(
&SearchExpression::from_json(
json!({
"query": {"match_all": {}},
"sort": ["price"],
"search_after": [999.0]
}),
10,
)
.unwrap(),
)
.unwrap();
assert!(empty.is_empty());
cleanup(&path);
}
#[test]
fn explain_score() {
let path = test_dir("explain");
let index = Index::create_with_mapping(&path, test_schema()).unwrap();
index
.bulk(vec![
json!({"title": "hello world", "status": "published"}),
json!({"title": "hello hello hello", "status": "draft"}),
])
.unwrap();
let results = index
.search(
&SearchExpression::from_json(json!({"query": {"match": {"title": "hello"}}}), 10)
.unwrap(),
)
.unwrap();
assert_eq!(results.len(), 2);
for hit in results.iter() {
let exp = hit
.explain()
.expect("explain should not error")
.expect("explain should be present");
assert!(exp.value > 0.0, "score should be positive");
assert!(
!exp.description.is_empty(),
"description should be non-empty"
);
assert!(!exp.details.is_empty(), "should have BM25 sub-details");
}
let scores: Vec<f32> = results
.iter()
.map(|h| h.explain().unwrap().unwrap().value)
.collect();
assert!(scores[0] >= scores[1]);
cleanup(&path);
}
#[test]
fn collapse_by_field() {
let path = test_dir("collapse");
let schema = Mapping::builder()
.field("title", FieldType::Text)
.field("author", FieldType::Keyword)
.build();
let index = Index::create_with_mapping(&path, schema).unwrap();
index
.add(json!({"title": "post one by alice", "author": "alice"}))
.unwrap();
index
.add(json!({"title": "post two by alice", "author": "alice"}))
.unwrap();
index
.add(json!({"title": "post by bob", "author": "bob"}))
.unwrap();
index
.add(json!({"title": "another by bob", "author": "bob"}))
.unwrap();
index
.add(json!({"title": "post by carol", "author": "carol"}))
.unwrap();
let results = index
.search(&SearchExpression::from_json(json!({"query": {"match_all": {}}}), 10).unwrap())
.unwrap();
assert_eq!(results.len(), 5);
let author_fields: Vec<String> = vec!["author".to_string()];
let results = index
.search(
&SearchExpression::from_json(
json!({
"query": {"match_all": {}},
"collapse": {"field": "author"}
}),
10,
)
.unwrap(),
)
.unwrap();
assert_eq!(results.len(), 3, "should have 3 unique authors");
let mut authors: Vec<String> = results
.iter()
.map(|h| h.fields(&author_fields))
.filter_map(|f| f.get("author").cloned())
.filter_map(|v| v.as_array().cloned())
.filter_map(|a| a.first().cloned())
.filter_map(|v| v.as_str().map(String::from))
.collect();
authors.sort();
assert_eq!(authors, vec!["alice", "bob", "carol"]);
assert_eq!(results.total_hits().value, 5);
cleanup(&path);
}
#[test]
fn track_total_hits() {
use crate::search::TotalHitsRelation;
let path = test_dir("track_total_hits");
let index = Index::create_with_mapping(&path, test_schema()).unwrap();
for i in 0..100 {
index
.add(json!({"title": format!("document {i}"), "status": "published"}))
.unwrap();
}
let results = index
.search(&SearchExpression::from_json(json!({"query": {"match_all": {}}}), 10).unwrap())
.unwrap();
assert_eq!(results.total_hits().value, 100);
assert_eq!(results.total_hits().relation, TotalHitsRelation::EqualTo);
let results = index
.search(
&SearchExpression::from_json(
json!({
"query": {"match_all": {}},
"track_total_hits": true
}),
10,
)
.unwrap(),
)
.unwrap();
assert_eq!(results.total_hits().value, 100);
assert_eq!(results.total_hits().relation, TotalHitsRelation::EqualTo);
let results = index
.search(
&SearchExpression::from_json(
json!({
"query": {"match_all": {}},
"track_total_hits": false
}),
10,
)
.unwrap(),
)
.unwrap();
assert_eq!(results.total_hits().value, 0);
assert_eq!(
results.total_hits().relation,
TotalHitsRelation::GreaterThanOrEqualTo
);
let results = index
.search(
&SearchExpression::from_json(
json!({
"query": {"match_all": {}},
"track_total_hits": 50
}),
10,
)
.unwrap(),
)
.unwrap();
assert_eq!(results.total_hits().value, 50);
assert_eq!(
results.total_hits().relation,
TotalHitsRelation::GreaterThanOrEqualTo
);
let results = index
.search(
&SearchExpression::from_json(
json!({
"query": {"match_all": {}},
"track_total_hits": 200
}),
10,
)
.unwrap(),
)
.unwrap();
assert_eq!(results.total_hits().value, 100);
assert_eq!(results.total_hits().relation, TotalHitsRelation::EqualTo);
cleanup(&path);
}
#[test]
fn inner_hits_nested() {
let path = test_dir("inner_hits");
let schema = Mapping::builder()
.field("product", FieldType::Text)
.field("offers", FieldType::Nested)
.field("offers.seller", FieldType::Keyword)
.field("offers.price", FieldType::Keyword)
.build();
let index = Index::create_with_mapping(&path, schema).unwrap();
index
.add(json!({
"product": "laptop",
"offers": [
{"seller": "Alice", "price": "999"},
{"seller": "Bob", "price": "1299"}
]
}))
.unwrap();
let results = index
.search(
&SearchExpression::from_json(
json!({
"nested": {
"path": "offers",
"query": {
"term": {"offers.seller": "Alice"}
},
"inner_hits": {}
}
}),
10,
)
.unwrap(),
)
.unwrap();
assert_eq!(results.total_hits().value, 1);
assert_eq!(results.len(), 1);
let hit = results.hit(0).unwrap();
let inner = hit
.inner_hits()
.expect("inner_hits should not error")
.expect("inner_hits should be present");
let offers = inner
.get("offers")
.expect("should have 'offers' inner_hits group");
let inner_hits_obj = offers.get("hits").unwrap();
let inner_total = inner_hits_obj
.get("total")
.unwrap()
.get("value")
.unwrap()
.as_u64()
.unwrap();
assert_eq!(inner_total, 1, "should have 1 matching inner hit");
let inner_docs = inner_hits_obj.get("hits").unwrap().as_array().unwrap();
assert_eq!(inner_docs.len(), 1);
let inner_source = inner_docs[0].get("_source").unwrap();
assert_eq!(inner_source.get("seller").unwrap(), "Alice");
assert_eq!(inner_source.get("price").unwrap(), "999");
cleanup(&path);
}
#[test]
fn rescore_reranks() {
let path = test_dir("rescore");
let index = Index::create_with_mapping(&path, test_schema()).unwrap();
index
.add(json!({"title": "hello", "status": "published"}))
.unwrap();
index
.add(json!({"title": "hello world", "status": "published"}))
.unwrap();
index
.add(json!({"title": "hello there world", "status": "published"}))
.unwrap();
let without_rescore = index
.search(
&SearchExpression::from_json(
json!({
"query": {"match": {"title": "hello"}}
}),
10,
)
.unwrap(),
)
.unwrap();
assert_eq!(without_rescore.len(), 3);
let with_rescore = index
.search(
&SearchExpression::from_json(
json!({
"query": {"match": {"title": "hello"}},
"rescore": {
"window_size": 10,
"query": {
"rescore_query": {"match_phrase": {"title": "hello world"}},
"query_weight": 0.5,
"rescore_query_weight": 1.5
}
}
}),
10,
)
.unwrap(),
)
.unwrap();
assert_eq!(with_rescore.len(), 3);
let top_source = with_rescore.hit(0).unwrap().source().unwrap();
assert_eq!(
top_source["title"], "hello world",
"phrase match should be boosted to top"
);
assert_ne!(
without_rescore.hit(0).unwrap().score(),
with_rescore.hit(0).unwrap().score(),
"rescore should modify scores"
);
cleanup(&path);
}
#[test]
fn multi_fields() {
let path = test_dir("multi_fields");
let mapping_json = json!({
"properties": {
"title": {
"type": "text",
"fields": {
"raw": {"type": "keyword"}
}
}
}
});
let schema = Mapping::from_json(&mapping_json).unwrap();
let index = Index::create_with_mapping(&path, schema).unwrap();
index.add(json!({"title": "Hello World"})).unwrap();
index.add(json!({"title": "Hello Luci"})).unwrap();
index.add(json!({"title": "Goodbye World"})).unwrap();
let results = index
.search(&SearchExpression::from_json(json!({"match": {"title": "hello"}}), 10).unwrap())
.unwrap();
assert_eq!(
results.total_hits().value,
2,
"text search should match 2 docs"
);
let results = index
.search(
&SearchExpression::from_json(json!({"term": {"title.raw": "Hello World"}}), 10)
.unwrap(),
)
.unwrap();
assert_eq!(
results.total_hits().value,
1,
"exact keyword match on sub-field"
);
let results = index
.search(
&SearchExpression::from_json(json!({"term": {"title.raw": "hello"}}), 10).unwrap(),
)
.unwrap();
assert_eq!(
results.total_hits().value,
0,
"keyword sub-field is not analyzed"
);
let results = index
.search(
&SearchExpression::from_json(
json!({
"query": {"match_all": {}},
"sort": [{"title.raw": "asc"}]
}),
10,
)
.unwrap(),
)
.unwrap();
assert_eq!(results.len(), 3);
let titles: Vec<String> = results
.iter()
.map(|h| {
h.sort_values().unwrap()[0]
.to_json()
.as_str()
.unwrap()
.to_string()
})
.collect();
assert_eq!(titles[0], "Goodbye World");
assert_eq!(titles[1], "Hello Luci");
assert_eq!(titles[2], "Hello World");
let results = index
.search(
&SearchExpression::from_json(
json!({
"query": {"match_all": {}},
"aggs": {"titles": {"terms": {"field": "title.raw"}}},
"size": 0
}),
10,
)
.unwrap(),
)
.unwrap();
assert!(results.aggregations().contains_key("titles"));
cleanup(&path);
}
#[test]
fn copy_to() {
let path = test_dir("copy_to");
let schema = Mapping::from_json(&json!({
"properties": {
"title": {"type": "text", "copy_to": "all_text"},
"body": {"type": "text", "copy_to": "all_text"},
"tag": {"type": "keyword"},
"all_text": {"type": "text"}
}
}))
.unwrap();
let index = Index::create_with_mapping(&path, schema).unwrap();
index
.add(json!({"title": "search engine", "body": "fast and embedded", "tag": "tech"}))
.unwrap();
index
.add(json!({"title": "database", "body": "columnar storage", "tag": "tech"}))
.unwrap();
let results = index
.search(
&SearchExpression::from_json(json!({"match": {"title": "search"}}), 10).unwrap(),
)
.unwrap();
assert_eq!(results.total_hits().value, 1);
let results = index
.search(
&SearchExpression::from_json(json!({"match": {"all_text": "search"}}), 10).unwrap(),
)
.unwrap();
assert_eq!(
results.total_hits().value,
1,
"copy_to target should contain title content"
);
let results = index
.search(
&SearchExpression::from_json(json!({"match": {"all_text": "embedded"}}), 10)
.unwrap(),
)
.unwrap();
assert_eq!(
results.total_hits().value,
1,
"copy_to target should contain body content"
);
let results = index
.search(
&SearchExpression::from_json(json!({"match": {"all_text": "columnar"}}), 10)
.unwrap(),
)
.unwrap();
assert_eq!(
results.total_hits().value,
1,
"copy_to target should contain body of second doc"
);
let results = index
.search(
&SearchExpression::from_json(json!({"match": {"all_text": "search"}}), 10).unwrap(),
)
.unwrap();
let src = results.hit(0).unwrap().source().unwrap();
assert!(
src.get("all_text").is_none(),
"copy_to target should not be in _source"
);
cleanup(&path);
}
#[test]
fn document_crud() {
let path = test_dir("crud");
let index = Index::create_with_mapping(&path, test_schema()).unwrap();
index
.add(json!({"_id": "doc1", "title": "Hello", "status": "published"}))
.unwrap();
index
.add(json!({"_id": "doc2", "title": "World", "status": "draft"}))
.unwrap();
let doc = index.get("doc1").unwrap().expect("doc1 should exist");
assert_eq!(doc["title"], "Hello");
assert!(index.get("nonexistent").unwrap().is_none());
assert_eq!(index.count(json!({"match_all": {}})).unwrap(), 2);
assert!(index.delete("doc1").unwrap());
assert!(index.get("doc1").unwrap().is_none());
assert_eq!(index.count(json!({"match_all": {}})).unwrap(), 1);
assert!(!index.delete("nonexistent").unwrap());
index
.add(json!({"_id": "doc3", "title": "Original", "status": "published"}))
.unwrap();
assert!(index.update("doc3", json!({"title": "Updated"})).unwrap());
let doc = index
.get("doc3")
.unwrap()
.expect("doc3 should exist after update");
assert_eq!(doc["title"], "Updated");
assert_eq!(doc["status"], "published");
index
.add(json!({"_id": "d1", "title": "draft one", "status": "draft"}))
.unwrap();
index
.add(json!({"_id": "d2", "title": "draft two", "status": "draft"}))
.unwrap();
let deleted = index
.delete_by_query(json!({"term": {"status": "draft"}}))
.unwrap();
assert!(deleted >= 2);
index.add(json!({"title": "Auto ID doc"})).unwrap();
let results = index
.search(&SearchExpression::from_json(json!({"match": {"title": "auto"}}), 1).unwrap())
.unwrap();
assert_eq!(results.total_hits().value, 1);
cleanup(&path);
}
#[test]
fn filter_agg_counts_only_matching_docs() {
let path = test_dir("filter_agg");
let schema = Mapping::builder()
.field("title", FieldType::Text)
.field("status", FieldType::Keyword)
.field("price", FieldType::Float)
.build();
let index = Index::create_with_mapping(&path, schema).unwrap();
index
.add(json!({"title": "doc1", "status": "active", "price": 10.0}))
.unwrap();
index
.add(json!({"title": "doc2", "status": "active", "price": 20.0}))
.unwrap();
index
.add(json!({"title": "doc3", "status": "active", "price": 30.0}))
.unwrap();
index
.add(json!({"title": "doc4", "status": "draft", "price": 40.0}))
.unwrap();
index
.add(json!({"title": "doc5", "status": "draft", "price": 50.0}))
.unwrap();
let results = index
.search(
&SearchExpression::from_json(
json!({
"query": {"match_all": {}},
"aggs": {
"active_only": {
"filter": {"term": {"status": "active"}}
}
},
"size": 0
}),
10,
)
.unwrap(),
)
.unwrap();
assert_eq!(results.total_hits().value, 5);
let aggs = results.aggregations();
let active = aggs.get("active_only").expect("active_only agg missing");
let active_json = active.to_json();
let doc_count = active_json["buckets"][0]["doc_count"]
.as_u64()
.expect("filter agg should have doc_count in buckets[0]");
assert_eq!(
doc_count, 3,
"filter agg should count 3 active docs, got {doc_count}"
);
cleanup(&path);
}
#[test]
fn value_count_uses_stats_fast_path() {
let path = test_dir("value_count_fast");
let schema = Mapping::builder()
.field("title", FieldType::Text)
.field("price", FieldType::Float)
.build();
let index = Index::create_with_mapping(&path, schema).unwrap();
for i in 0..5 {
index
.add(json!({"title": format!("doc {i}"), "price": (i as f64) * 10.0}))
.unwrap();
}
let results = index
.search(
&SearchExpression::from_json(
json!({
"query": {"match_all": {}},
"aggs": {
"count_price": {"value_count": {"field": "price"}}
},
"size": 0
}),
10,
)
.unwrap(),
)
.unwrap();
let aggs = results.aggregations();
let count_agg = aggs.get("count_price").expect("count_price missing");
let value = count_agg.to_json()["value"].as_f64().unwrap();
assert_eq!(value, 5.0, "value_count should be 5");
cleanup(&path);
}
#[test]
fn multi_match_best_fields_scoring() {
let path = test_dir("multi_match_bf");
let index = Index::create_with_mapping(&path, test_schema()).unwrap();
index
.add(json!({"title": "search search", "body": "web application framework"}))
.unwrap();
index
.add(json!({"title": "database search", "body": "search tools"}))
.unwrap();
let results = index
.search(
&SearchExpression::from_json(
json!({"multi_match": {"query": "search", "fields": ["title", "body"]}}),
10,
)
.unwrap(),
)
.unwrap();
assert_eq!(results.total_hits().value, 2);
let title_results = index
.search(
&SearchExpression::from_json(json!({"match": {"title": "search"}}), 10).unwrap(),
)
.unwrap();
let body_results = index
.search(&SearchExpression::from_json(json!({"match": {"body": "search"}}), 10).unwrap())
.unwrap();
let doc1_title_score = title_results
.iter()
.find(|h| h.source().unwrap()["body"] == "search tools")
.map(|h| h.score())
.unwrap();
let doc1_body_score = body_results
.iter()
.find(|h| h.source().unwrap()["body"] == "search tools")
.map(|h| h.score())
.unwrap();
let doc1_max = doc1_title_score.max(doc1_body_score);
let doc1_sum = doc1_title_score + doc1_body_score;
let doc1_mm_score = results
.iter()
.find(|h| h.source().unwrap()["body"] == "search tools")
.map(|h| h.score())
.unwrap();
assert!(
(doc1_mm_score - doc1_max).abs() < 1e-5,
"multi_match score ({doc1_mm_score}) should equal max of field scores ({doc1_max}), \
not sum ({doc1_sum})"
);
cleanup(&path);
}
#[test]
fn prefix_query_constant_score() {
let path = test_dir("prefix_const_score");
let schema = Mapping::builder().field("body", FieldType::Text).build();
let index = Index::create_with_mapping(&path, schema).unwrap();
index
.add(json!({"body": "search searchable engines"}))
.unwrap();
index.add(json!({"body": "searching for tools"})).unwrap();
let results = index
.search(&SearchExpression::from_json(json!({"prefix": {"body": "sear"}}), 10).unwrap())
.unwrap();
assert_eq!(results.total_hits().value, 2);
let doc0_score = results
.iter()
.find(|h| h.source().unwrap()["body"] == "search searchable engines")
.map(|h| h.score())
.unwrap();
let doc1_score = results
.iter()
.find(|h| h.source().unwrap()["body"] == "searching for tools")
.map(|h| h.score())
.unwrap();
assert!(
(doc0_score - doc1_score).abs() < 1e-5,
"prefix scores must be constant: doc0 ({doc0_score}) vs doc1 ({doc1_score}). \
ES uses CONSTANT_SCORE_BLENDED_REWRITE — every matching doc gets the same boost \
regardless of how many prefix-terms match."
);
cleanup(&path);
}
#[test]
fn wildcard_multi_segment_returns_all_matches() {
let path = test_dir("wildcard_multi_seg");
let schema = Mapping::builder().field("tag", FieldType::Keyword).build();
let index = Index::create_with_mapping(&path, schema).unwrap();
index.add(json!({"tag": "tech1"})).unwrap();
index.add(json!({"tag": "tech2"})).unwrap();
index.add(json!({"tag": "other"})).unwrap();
index.add(json!({"tag": "tech3"})).unwrap();
index.add(json!({"tag": "tech1"})).unwrap(); index.add(json!({"tag": "tech4"})).unwrap();
index.add(json!({"tag": "unrelated"})).unwrap();
let results = index
.search(
&SearchExpression::from_json(json!({"wildcard": {"tag": "tech*"}}), 20).unwrap(),
)
.unwrap();
assert_eq!(
results.total_hits().value,
5,
"multi-segment wildcard missed matches"
);
cleanup(&path);
}
#[test]
fn parse_sort_unknown_order_rejected() {
let err = parse_sort(Some(&json!([{"price": {"order": "ascending"}}]))).unwrap_err();
assert!(format!("{err}").contains("order"), "{err}");
}
#[test]
fn parse_sort_non_string_order_rejected() {
let err = parse_sort(Some(&json!([{"price": {"order": 1}}]))).unwrap_err();
assert!(format!("{err}").contains("order"), "{err}");
}
#[test]
fn parse_sort_object_form_accepted() {
let sort = parse_sort(Some(&json!({"price": "desc"})))
.unwrap()
.unwrap();
assert_eq!(sort.len(), 1);
}
#[test]
fn parse_sort_malformed_entry_rejected() {
let err = parse_sort(Some(&json!([5]))).unwrap_err();
assert!(format!("{err}").contains("sort"), "{err}");
}
#[test]
fn parse_search_after_object_element_rejected() {
let err = parse_search_after(Some(&json!([1, {}]))).unwrap_err();
assert!(format!("{err}").contains("search_after"), "{err}");
}
#[test]
fn parse_search_after_valid_cursor_accepted() {
let cursor = parse_search_after(Some(&json!([9.99, "doc-42"])))
.unwrap()
.unwrap();
assert_eq!(cursor.len(), 2);
}
}