lucisearch 0.8.0

Embeddable, in-process search engine — the SQLite/DuckDB of Elasticsearch
Documentation
//! Nested document profiling: indexing overhead and query latency.

use super::harness::*;
use luci::index::Index;
use luci::mapping::{FieldType, Mapping};
use serde_json::json;

use std::time::Instant;

const WARMUP: usize = 3;
const ITERS: usize = 20;

fn nested_schema() -> Mapping {
    Mapping::builder()
        .field("product", FieldType::Text)
        .field("category", FieldType::Keyword)
        .field("offers", FieldType::Nested)
        .field("offers.seller", FieldType::Keyword)
        .field("offers.price", FieldType::Keyword)
        .build()
}

fn build_nested_corpus(
    name: &str,
    doc_count: usize,
    offers_per_doc: usize,
) -> (std::path::PathBuf, Index) {
    let path = profile_dir(name);
    let index = Index::create_with_mapping(&path, nested_schema()).unwrap();

    let sellers = ["alice", "bob", "charlie", "diana", "eve"];
    let categories = ["electronics", "books", "clothing", "food", "toys"];

    let docs: Vec<serde_json::Value> = (0..doc_count)
        .map(|i| {
            let offers: Vec<serde_json::Value> = (0..offers_per_doc)
                .map(|j| {
                    json!({
                        "seller": sellers[(i + j) % sellers.len()],
                        "price": format!("{}", (i * 10 + j * 100) % 1000),
                    })
                })
                .collect();
            json!({
                "product": format!("product {i}"),
                "category": categories[i % categories.len()],
                "offers": offers,
            })
        })
        .collect();
    index.bulk(docs).unwrap();
    (path, index)
}

#[test]
fn nested_indexing_overhead() {
    println!("\n=== Nested: Indexing Overhead ===\n");

    for &offers_per in &[1, 3, 5, 10] {
        let start = Instant::now();
        let (path, _index) =
            build_nested_corpus(&format!("nested_idx_{offers_per}"), 5_000, offers_per);
        let elapsed = start.elapsed();

        let total_docs = 5_000 * (1 + offers_per); // parent + nested
        println!(
            "5K parents x {offers_per} offers ({total_docs} total docs): {:.0}ms ({:.0} docs/s)",
            elapsed.as_millis(),
            5_000.0 / elapsed.as_secs_f64()
        );
        cleanup(&path);
    }
}

#[test]
fn nested_query_latency() {
    println!("\n=== Nested: Query Latency (5K docs, 3 offers each) ===\n");

    let (path, mut index) = build_nested_corpus("nested_query", 5_000, 3);

    print_measurement_header();

    // Nested term query
    let m = measure_query(
        "nested_term",
        &mut index,
        json!({
            "nested": {"path": "offers", "query": {"term": {"offers.seller": "alice"}}}
        }),
        10,
        WARMUP,
        ITERS,
    );
    print_measurement(&m);

    // Nested bool (the cross-object isolation query)
    let m = measure_query(
        "nested_bool_must",
        &mut index,
        json!({
            "nested": {"path": "offers", "query": {"bool": {"must": [
                {"term": {"offers.seller": "alice"}},
                {"term": {"offers.price": "100"}}
            ]}}}
        }),
        10,
        WARMUP,
        ITERS,
    );
    print_measurement(&m);

    // Top-level text + nested filter
    let m = measure_query(
        "text+nested_filter",
        &mut index,
        json!({
            "bool": {
                "must": [{"match": {"product": "product"}}],
                "filter": [{"nested": {"path": "offers", "query": {"term": {"offers.seller": "alice"}}}}]
            }
        }),
        10,
        WARMUP,
        ITERS,
    );
    print_measurement(&m);

    cleanup(&path);
}

#[test]
fn nested_scaling_by_depth() {
    println!("\n=== Nested: Query Scaling by Offers per Doc ===\n");

    print_measurement_header();
    for &offers_per in &[1, 3, 5, 10, 20] {
        let (path, mut index) =
            build_nested_corpus(&format!("nested_depth_{offers_per}"), 5_000, offers_per);

        let m = measure_query(
            &format!("{offers_per} offers/doc"),
            &mut index,
            json!({"nested": {"path": "offers", "query": {"term": {"offers.seller": "alice"}}}}),
            10,
            WARMUP,
            ITERS,
        );
        print_measurement(&m);
        cleanup(&path);
    }
}