lucisearch 0.8.1

Embeddable, in-process search engine — the SQLite/DuckDB of search
Documentation
//! Integration tests for the bulk API.

use luci::index::Index;
use serde_json::json;

fn search(
    index: &mut luci::index::Index,
    query: serde_json::Value,
    size: usize,
) -> luci::search::results::SearchResults {
    let expr = luci::search::expression::parse_search(query, size).unwrap();
    index.search(&expr).unwrap()
}

fn temp_path(name: &str) -> std::path::PathBuf {
    let dir = std::env::temp_dir().join("luci_bulk_tests");
    std::fs::create_dir_all(&dir).ok();
    dir.join(name)
}

fn cleanup(path: &std::path::Path) {
    let _ = std::fs::remove_file(path);
}

#[test]
fn bulk_basic() {
    let path = temp_path("bulk_basic.luci");
    cleanup(&path);

    let mut index = Index::create(&path).unwrap();
    let docs: Vec<serde_json::Value> = (0..100)
        .map(|i| json!({"title": format!("Doc {i}"), "value": i}))
        .collect();

    let result = index.bulk(docs).unwrap();
    assert_eq!(result["count"], 100);
    assert!(result["took"].is_u64());

    let results = search(&mut index, json!({"query": {"match_all": {}}}), 200);
    assert_eq!(results.len(), 100);

    cleanup(&path);
}

#[test]
fn bulk_response_format() {
    let path = temp_path("bulk_response.luci");
    cleanup(&path);

    let index = Index::create(&path).unwrap();
    let docs = vec![
        json!({"title": "A"}),
        json!({"title": "B"}),
        json!({"title": "C"}),
    ];

    let result = index.bulk(docs).unwrap();
    assert_eq!(result["count"], 3);
    assert!(result["took"].is_u64());

    cleanup(&path);
}

#[test]
fn bulk_empty_list() {
    let path = temp_path("bulk_empty.luci");
    cleanup(&path);

    let index = Index::create(&path).unwrap();
    let result = index.bulk(vec![]).unwrap();
    assert_eq!(result["count"], 0);

    cleanup(&path);
}

#[test]
fn bulk_with_search() {
    let path = temp_path("bulk_search.luci");
    cleanup(&path);

    let mapping = luci::mapping::Mapping::from_json(&json!({
        "properties": {
            "title": {"type": "text"},
            "category": {"type": "keyword"}
        }
    }))
    .unwrap();

    let mut index = Index::create_with_mapping(&path, mapping).unwrap();
    let docs: Vec<serde_json::Value> = (0..1000)
        .map(|i| {
            json!({
                "title": format!("Document number {i}"),
                "category": if i % 2 == 0 { "even" } else { "odd" }
            })
        })
        .collect();

    index.bulk(docs).unwrap();

    // match_all
    let results = search(
        &mut index,
        json!({"query": {"match_all": {}}, "size": 2000}),
        2000,
    );
    assert_eq!(results.len(), 1000);

    // term query on keyword
    let results = search(
        &mut index,
        json!({"query": {"term": {"category": "even"}}}),
        1000,
    );
    assert_eq!(results.len(), 500);

    // match query on text
    let results = search(
        &mut index,
        json!({"query": {"match": {"title": "document"}}}),
        1000,
    );
    assert_eq!(results.len(), 1000);

    cleanup(&path);
}

#[test]
fn bulk_large_batch() {
    let path = temp_path("bulk_large.luci");
    cleanup(&path);

    let mut index = Index::create(&path).unwrap();
    index.set_memory_budget(1024 * 1024); // 1MB to trigger auto-flush

    let docs: Vec<serde_json::Value> = (0..10_000)
        .map(|i| json!({"title": format!("Doc {i}"), "value": i}))
        .collect();

    let result = index.bulk(docs).unwrap();
    assert_eq!(result["count"], 10_000);

    let results = search(
        &mut index,
        json!({"query": {"match_all": {}}, "size": 20000}),
        20000,
    );
    assert_eq!(results.len(), 10_000);

    cleanup(&path);
}

#[test]
fn bulk_fail_fast() {
    let path = temp_path("bulk_fail.luci");
    cleanup(&path);

    let mut index = Index::create(&path).unwrap();

    // First doc is valid, second is not an object (array), should fail fast
    let docs = vec![
        json!({"title": "Valid"}),
        json!("not an object"),
        json!({"title": "Never reached"}),
    ];

    let err = index.bulk(docs).unwrap_err();
    let msg = format!("{err}");
    assert!(
        msg.contains("bulk item 1"),
        "error should mention item index, got: {msg}"
    );
    assert!(
        msg.contains("JSON object"),
        "error should mention object requirement, got: {msg}"
    );

    // Failed bulk does not commit — no partial results
    let results = search(&mut index, json!({"query": {"match_all": {}}}), 10);
    assert_eq!(
        results.len(),
        0,
        "failed bulk should not leave partial results"
    );

    cleanup(&path);
}

#[test]
fn bulk_duplicate_ids() {
    let path = temp_path("bulk_dup_ids.luci");
    cleanup(&path);

    let index = Index::create(&path).unwrap();
    let docs = vec![
        json!({"_id": "same", "title": "First"}),
        json!({"_id": "same", "title": "Second"}),
    ];

    let result = index.bulk(docs).unwrap();
    assert_eq!(result["count"], 2);

    // Both documents are indexed — get() returns one of them.
    // (Deduplication by _id requires delete-before-add, which is
    // the update() path, not bulk.)
    let source = index.get("same").unwrap().unwrap();
    assert!(source.is_object());

    cleanup(&path);
}