lucisearch 0.8.1

Embeddable, in-process search engine — the SQLite/DuckDB of search
Documentation
//! Regression tests for [[fix-silent-scorer-errors]].
//!
//! Before the fix, every `Err` from `query.bind()`,
//! `weight.scorer_supplier(segment)`, and `supplier.scorer()` was
//! silently swallowed and converted into "0 hits". The public
//! `Index::search()` returned `Ok(empty_results)` even when the
//! engine had encountered a real error. These tests pin the new
//! behavior: errors propagate to the caller.
//!
//! Per the design doc, full coverage of the per-segment error path
//! requires injecting a mock `BoundQuery` — but the `BoundQuery`
//! trait is `pub(crate)`. We test what the public API can express
//! here (bind-time errors flowing through every search shape) and
//! rely on Rust's type system + the `?`-propagation refactor to
//! cover the post-bind paths (no remaining `.ok()` / `.unwrap_or(0)`
//! sites in the searcher).

use luci::index::Index;
use luci::mapping::{FieldType, Mapping};
use luci::search::expression::parse_search;
use serde_json::json;

fn test_dir(name: &str) -> std::path::PathBuf {
    let dir = std::env::temp_dir().join(format!(
        "luci_error_propagation_{}_{name}",
        std::process::id()
    ));
    let _ = std::fs::remove_dir_all(&dir);
    dir
}

fn cleanup(path: &std::path::Path) {
    let _ = std::fs::remove_dir_all(path);
}

fn build_vector_index(name: &str) -> (std::path::PathBuf, Index) {
    let path = test_dir(name);
    let schema = Mapping::builder()
        .field("title", FieldType::Text)
        .field("tag", FieldType::Keyword)
        .field("embedding", FieldType::dense_vector(4))
        .build();
    let index = Index::create_with_mapping(&path, schema).unwrap();
    index
        .bulk(vec![
            json!({"title": "alpha", "tag": "a", "embedding": [1.0, 0.0, 0.0, 0.0]}),
            json!({"title": "beta", "tag": "b", "embedding": [0.0, 1.0, 0.0, 0.0]}),
            json!({"title": "gamma", "tag": "a", "embedding": [0.0, 0.0, 1.0, 0.0]}),
        ])
        .unwrap();
    (path, index)
}

fn build_multi_segment_vector_index(name: &str) -> (std::path::PathBuf, Index) {
    let path = test_dir(name);
    let schema = Mapping::builder()
        .field("title", FieldType::Text)
        .field("embedding", FieldType::dense_vector(4))
        .build();
    let index = Index::create_with_mapping(&path, schema).unwrap();
    // Multiple bulk batches; each batch flushes into its own segment so
    // the par_iter path is exercised. Verifying segment count would
    // require non-public API, but the bind-time error checked here
    // short-circuits before the par_iter regardless of segment count.
    for batch_id in 0..3 {
        index
            .bulk(vec![
                json!({"title": format!("doc_{batch_id}_a"), "embedding": [1.0, 0.0, 0.0, 0.0]}),
                json!({"title": format!("doc_{batch_id}_b"), "embedding": [0.0, 1.0, 0.0, 0.0]}),
            ])
            .unwrap();
    }
    (path, index)
}

/// `bind()` errors must reach the search caller as `Err`, not as
/// `Ok(empty)`. KNN with a wrong-dim `query_vector` fails at bind
/// time (`KnnQuery::bind`).
#[test]
fn bind_error_propagates_to_search_caller() {
    let (path, index) = build_vector_index("bind_error");
    let expr = parse_search(
        json!({"query": {"knn": {
            "field": "embedding",
            "query_vector": [1.0, 0.0],  // 2 dims, mapping requires 4
            "k": 3,
        }}}),
        10,
    )
    .unwrap();

    let err = match index.search(&expr) {
        Ok(_) => panic!("expected bind error, got Ok"),
        Err(e) => e,
    };
    let msg = err.to_string();
    assert!(
        msg.contains("2 dimensions") && msg.contains("embedding"),
        "expected dim-mismatch message naming dims and field, got: {msg}"
    );

    cleanup(&path);
}

/// Multi-segment index: a bind-time error must abort the whole
/// search rather than producing a partial `Ok` from the segments
/// that *would* have succeeded if scoring were allowed to run.
/// `bind()` happens once before the par_iter, so any error there
/// short-circuits the entire pipeline — verifies the par_iter
/// `collect::<Result<Vec<_>, _>>()?` change.
#[test]
fn per_segment_error_aborts_whole_query() {
    let (path, index) = build_multi_segment_vector_index("per_segment");
    let expr = parse_search(
        json!({"query": {"knn": {
            "field": "embedding",
            "query_vector": [1.0, 0.0, 0.0],  // 3 dims, mapping requires 4
            "k": 3,
        }}}),
        10,
    )
    .unwrap();

    let err = match index.search(&expr) {
        Ok(r) => panic!(
            "expected error from multi-segment search; got Ok with {} hits",
            r.total_hits().value
        ),
        Err(e) => e,
    };
    assert!(err.to_string().contains("3 dimensions"));

    cleanup(&path);
}

/// A *valid* `dense_vector` field with no vectors indexed is the one
/// honest-empty case: `bind()` returns `Ok` and the search yields zero
/// hits (the field's global graph is empty / absent). This is distinct
/// from an *unknown* field, which now errors at bind time (see
/// `knn_query::knn_query_unknown_field_errors`). Repointed from the old
/// version, which queried an unknown field and asserted `Ok`/empty —
/// that input now errors. See [[feature-knn-query-type]] §4, test 17.
#[test]
fn knn_empty_vector_field_returns_ok_empty() {
    let path = test_dir("empty_vec_field");
    let schema = Mapping::builder()
        .field("title", FieldType::Text)
        .field("embedding", FieldType::dense_vector(4))
        .build();
    let index = Index::create_with_mapping(&path, schema).unwrap();
    // One document with NO embedding value: the mapping, searcher, and
    // index exist, but the vector field's graph is empty. Pinning the
    // honest-empty branch (zero docs would leave it ambiguous whether a
    // global graph object exists at all). The query vector is 4-dim so it
    // clears the dimension check and reaches the no-vectors branch.
    index
        .bulk(vec![json!({"title": "no vector here"})])
        .unwrap();

    let expr = parse_search(
        json!({"query": {"knn": {
            "field": "embedding",
            "query_vector": [1.0, 0.0, 0.0, 0.0],
            "k": 3,
        }}}),
        10,
    )
    .unwrap();

    let results = index
        .search(&expr)
        .expect("valid dense_vector field with no vectors is honest-empty, not an error");
    assert_eq!(results.len(), 0);

    cleanup(&path);
}

/// Rescore stage must propagate bind-time errors. Before the fix,
/// `apply_rescore` quietly skipped a failed rescore. The first-pass
/// query is a trivial match_all so the rescore-stage `bind()` is
/// what fires the error.
#[test]
fn rescore_bind_error_propagates() {
    let (path, index) = build_vector_index("rescore_bind");

    let expr = parse_search(
        json!({
            "query": {"match_all": {}},
            "rescore": {
                "window_size": 10,
                "query": {
                    "rescore_query": {"knn": {
                        "field": "embedding",
                        "query_vector": [1.0, 2.0],  // wrong dim — bind errs
                        "k": 5,
                    }}
                }
            }
        }),
        10,
    )
    .unwrap();

    let err = match index.search(&expr) {
        Ok(_) => panic!("expected rescore bind error to propagate"),
        Err(e) => e,
    };
    assert!(err.to_string().contains("2 dimensions"));

    cleanup(&path);
}

/// Collapsed search must propagate bind errors.
#[test]
fn collapse_bind_error_propagates() {
    let (path, index) = build_vector_index("collapse_bind");
    let expr = parse_search(
        json!({
            "query": {"knn": {
                "field": "embedding",
                "query_vector": [1.0],  // 1 dim, mapping requires 4
                "k": 3,
            }},
            "collapse": {"field": "tag"},
        }),
        10,
    )
    .unwrap();

    let err = match index.search(&expr) {
        Ok(_) => panic!("expected collapsed bind error to propagate"),
        Err(e) => e,
    };
    assert!(err.to_string().contains("1 dimensions"));

    cleanup(&path);
}