lucisearch 0.8.1

//! Exit criteria integration tests for Milestone 2.
//!
//! Test 1: JSON query parse + execute round-trip
//! Test 2: Bool query correctness with all clause types
//! Test 3: Phrase query correctness
//! Test 4: Cost-based clause ordering

use luci::index::Index;
use luci::mapping::{FieldType, Mapping};
use luci::query::ast::ScoringExpression;
use luci::query::parser::parse_query;
use luci::search::expression::parse_search;
use serde_json::json;

fn test_dir(name: &str) -> std::path::PathBuf {
    let dir =
        std::env::temp_dir().join(format!("luci_m2_integration_{}_{name}", std::process::id()));
    let _ = std::fs::remove_dir_all(&dir);
    dir
}

fn cleanup(path: &std::path::Path) {
    let _ = std::fs::remove_dir_all(path);
}

fn build_test_index(name: &str) -> (std::path::PathBuf, Index) {
    let path = test_dir(name);
    let schema = Mapping::builder()
        .field("title", FieldType::Text)
        .field("body", FieldType::Text)
        .field("tag", FieldType::Keyword)
        .field("status", FieldType::Keyword)
        .build();
    let index = Index::create_with_mapping(&path, schema).unwrap();

    let docs = vec![
        json!({"title": "The Quick Brown Fox", "body": "A quick brown fox jumps over the lazy dog", "tag": "animal", "status": "published"}),
        json!({"title": "Search Engine Design", "body": "Building a search engine requires inverted indexes and scoring", "tag": "tech", "status": "published"}),
        json!({"title": "Quick Start Guide", "body": "This guide helps you get started quickly with the search engine", "tag": "tech", "status": "draft"}),
        json!({"title": "The Lazy Dog", "body": "The lazy dog sleeps all day long", "tag": "animal", "status": "published"}),
        json!({"title": "Advanced Search Techniques", "body": "Advanced techniques for search engine optimization and scoring", "tag": "tech", "status": "published"}),
        json!({"title": "Brown Bear", "body": "The brown bear lives in the forest", "tag": "animal", "status": "draft"}),
        json!({"title": "Fox and Friends", "body": "The fox is a clever animal that lives in many habitats", "tag": "animal", "status": "published"}),
        json!({"title": "Database Internals", "body": "Understanding database storage engines and indexing strategies", "tag": "tech", "status": "published"}),
    ];

    index.bulk(docs).unwrap();

    (path, index)
}

/// Exit Criterion 1: JSON query parse + execute round-trip.
#[test]
fn exit_criterion_1_json_query_round_trip() {
    let (path, index) = build_test_index("ec1");

    // Term query
    let expr = parse_search(json!({"term": {"tag": "animal"}}), 10).unwrap();
    let results = index.search(&expr).unwrap();
    assert_eq!(results.total_hits().value, 4, "4 docs have tag=animal");

    // Match query (multi-term, analyzed)
    let expr = parse_search(json!({"match": {"body": "search engine"}}), 10).unwrap();
    let results = index.search(&expr).unwrap();
    assert!(
        results.total_hits().value >= 3,
        "multiple docs mention search or engine"
    );

    // Bool query: must + filter
    let expr = parse_search(
        json!({
            "bool": {
                "must": [{"match": {"body": "search"}}],
                "filter": [{"term": {"status": "published"}}]
            }
        }),
        10,
    )
    .unwrap();
    let results = index.search(&expr).unwrap();
    // "search" in body AND status=published
    for hit in results.iter() {
        let source = hit.source().unwrap();
        assert_eq!(source["status"], "published");
    }

    // Exists query
    let expr = parse_search(json!({"exists": {"field": "title"}}), 10).unwrap();
    let results = index.search(&expr).unwrap();
    assert_eq!(results.total_hits().value, 8, "all 8 docs have title");

    // Prefix query
    let expr = parse_search(json!({"prefix": {"tag": "tech"}}), 10).unwrap();
    let results = index.search(&expr).unwrap();
    assert_eq!(
        results.total_hits().value,
        4,
        "4 docs have tag starting with 'tech'"
    );

    // Constant score query
    let expr = parse_search(
        json!({
            "constant_score": {
                "filter": {"term": {"tag": "animal"}},
                "boost": 2.5
            }
        }),
        10,
    )
    .unwrap();
    let results = index.search(&expr).unwrap();
    assert_eq!(results.total_hits().value, 4);
    for hit in results.iter() {
        assert_eq!(hit.score(), 2.5, "constant score should be 2.5");
    }

    cleanup(&path);
}

/// Exit Criterion 2: Bool query with all clause types.
#[test]
fn exit_criterion_2_bool_query_correctness() {
    let (path, index) = build_test_index("ec2");

    // must: intersection
    let expr = parse_search(
        json!({
            "bool": {
                "must": [
                    {"term": {"tag": "tech"}},
                    {"term": {"status": "published"}}
                ]
            }
        }),
        10,
    )
    .unwrap();
    let results = index.search(&expr).unwrap();
    for hit in results.iter() {
        let s = hit.source().unwrap();
        assert_eq!(s["tag"], "tech");
        assert_eq!(s["status"], "published");
    }

    // should: union with additive scoring
    let expr = parse_search(
        json!({
            "bool": {
                "should": [
                    {"term": {"tag": "animal"}},
                    {"term": {"tag": "tech"}}
                ]
            }
        }),
        10,
    )
    .unwrap();
    let results = index.search(&expr).unwrap();
    assert_eq!(
        results.total_hits().value,
        8,
        "all docs are either animal or tech"
    );

    // must_not: exclusion
    let expr = parse_search(
        json!({
            "bool": {
                "must": [{"term": {"tag": "animal"}}],
                "must_not": [{"term": {"status": "draft"}}]
            }
        }),
        10,
    )
    .unwrap();
    let results = index.search(&expr).unwrap();
    // 4 animals minus 1 draft = 3
    assert_eq!(results.total_hits().value, 3);
    for hit in results.iter() {
        let s = hit.source().unwrap();
        assert_ne!(s["status"], "draft");
    }

    // filter: no score contribution
    let expr = parse_search(
        json!({
            "bool": {
                "filter": [{"term": {"tag": "tech"}}]
            }
        }),
        10,
    )
    .unwrap();
    let results = index.search(&expr).unwrap();
    assert_eq!(results.total_hits().value, 4);
    // Filter scores should be constant (1.0)
    for hit in results.iter() {
        assert_eq!(
            hit.score(),
            1.0,
            "filter context should give constant score"
        );
    }

    cleanup(&path);
}

/// Exit Criterion 3: Phrase query correctness.
#[test]
fn exit_criterion_3_phrase_query() {
    let (path, index) = build_test_index("ec3");

    // "search engine" should match docs containing that exact phrase
    let expr = parse_search(json!({"match_phrase": {"body": "search engine"}}), 10).unwrap();
    let results = index.search(&expr).unwrap();
    assert!(
        results.total_hits().value >= 2,
        "at least 2 docs have 'search engine' phrase"
    );

    // "engine search" should NOT match (wrong order)
    let expr = parse_search(json!({"match_phrase": {"body": "engine search"}}), 10).unwrap();
    let results = index.search(&expr).unwrap();
    assert_eq!(
        results.total_hits().value,
        0,
        "reversed phrase should not match"
    );

    // "quick brown fox" should match doc 0
    let expr = parse_search(json!({"match_phrase": {"body": "quick brown fox"}}), 10).unwrap();
    let results = index.search(&expr).unwrap();
    assert!(results.total_hits().value >= 1);
    let source = results.hit(0).unwrap().source().unwrap();
    assert!(source["body"].as_str().unwrap().contains("quick brown fox"));

    // "lazy dog" should match
    let expr = parse_search(json!({"match_phrase": {"body": "lazy dog"}}), 10).unwrap();
    let results = index.search(&expr).unwrap();
    assert!(results.total_hits().value >= 1);

    cleanup(&path);
}

/// Exit Criterion 4: Cost-based clause ordering.
#[test]
fn exit_criterion_4_cost_based_ordering() {
    let (path, index) = build_test_index("ec4");

    // Bool with one selective clause (few matches) and one broad clause (many matches).
    // The result should be correct regardless of which is the lead.
    let expr = parse_search(
        json!({
            "bool": {
                "must": [
                    {"term": {"status": "published"}},
                    {"match": {"title": "database"}}
                ]
            }
        }),
        10,
    )
    .unwrap();
    let results = index.search(&expr).unwrap();
    assert_eq!(
        results.total_hits().value,
        1,
        "only 1 doc matches both clauses"
    );
    let source = results.hit(0).unwrap().source().unwrap();
    assert!(
        source["title"]
            .as_str()
            .unwrap()
            .to_lowercase()
            .contains("database")
    );
    assert_eq!(source["status"], "published");

    cleanup(&path);
}

/// Supplementary: end-to-end JSON parse → query → search.
#[test]
fn json_parse_to_search_e2e() {
    let (path, _index) = build_test_index("e2e_json");

    // Parse a JSON query and verify structure
    let json_val = json!({
        "query": {
            "bool": {
                "must": [{"match": {"body": "search"}}],
                "filter": [{"term": {"status": "published"}}]
            }
        }
    });

    let ast = parse_query(&json_val).unwrap();
    // We can't directly execute an AST — we need to convert to Query objects.
    // This test verifies the parse round-trip works correctly.
    if let ScoringExpression::Bool { must, filter, .. } = &ast {
        assert_eq!(must.len(), 1);
        assert_eq!(filter.len(), 1);
        assert!(matches!(&must[0], ScoringExpression::Match { .. }));
        assert!(matches!(&filter[0], ScoringExpression::Term { .. }));
    } else {
        panic!("expected Bool query");
    }

    cleanup(&path);
}

/// must_not with pure-should queries must exclude matching docs.
/// Regression test: the original scorer had an early return that
/// skipped ExclusionScorer wrapping for pure-should queries.
#[test]
fn must_not_with_pure_should() {
    let path = test_dir("must_not_should");
    let schema = Mapping::builder()
        .field("title", FieldType::Text)
        .field("status", FieldType::Keyword)
        .build();
    let index = Index::create_with_mapping(&path, schema).unwrap();

    index
        .bulk(vec![
            json!({"title": "hello world", "status": "active"}),
            json!({"title": "hello there", "status": "deleted"}),
            json!({"title": "goodbye world", "status": "active"}),
        ])
        .unwrap();

    // should + must_not: match "hello" but exclude "deleted"
    let expr = parse_search(
        json!({
            "query": {"bool": {
                "should": [{"match": {"title": "hello"}}],
                "must_not": [{"term": {"status": "deleted"}}]
            }}
        }),
        10,
    )
    .unwrap();
    let results = index.search(&expr).unwrap();
    assert_eq!(
        results.len(),
        1,
        "should match 'hello world' (active) only, got {}",
        results.len()
    );

    cleanup(&path);
}

/// minimum_should_match: requires N of the should clauses to match.
#[test]
fn minimum_should_match() {
    let path = test_dir("min_should");
    let schema = Mapping::builder().field("title", FieldType::Text).build();
    let index = Index::create_with_mapping(&path, schema).unwrap();

    index
        .bulk(vec![
            json!({"title": "apple banana cherry"}), // 3 matches
            json!({"title": "apple banana"}),        // 2 matches
            json!({"title": "apple"}),               // 1 match
            json!({"title": "banana cherry"}),       // 2 matches
            json!({"title": "other stuff"}),         // 0 matches
        ])
        .unwrap();

    // min=2: docs with >= 2 matching should clauses
    let expr = parse_search(
        json!({
            "query": {"bool": {
                "should": [
                    {"match": {"title": "apple"}},
                    {"match": {"title": "banana"}},
                    {"match": {"title": "cherry"}},
                ],
                "minimum_should_match": 2
            }}
        }),
        10,
    )
    .unwrap();
    let results = index.search(&expr).unwrap();
    assert_eq!(
        results.len(),
        3,
        "expected 3 docs with >= 2 should matches, got {}",
        results.len()
    );

    // Without minimum_should_match, all 4 docs with any match qualify
    let expr_no_min = parse_search(
        json!({
            "query": {"bool": {
                "should": [
                    {"match": {"title": "apple"}},
                    {"match": {"title": "banana"}},
                    {"match": {"title": "cherry"}},
                ]
            }}
        }),
        10,
    )
    .unwrap();
    let results_no_min = index.search(&expr_no_min).unwrap();
    assert_eq!(
        results_no_min.len(),
        4,
        "without min_should_match, expected 4 docs"
    );

    // min == num_should: promotes to conjunction
    let expr_all = parse_search(
        json!({
            "query": {"bool": {
                "should": [
                    {"match": {"title": "apple"}},
                    {"match": {"title": "banana"}},
                    {"match": {"title": "cherry"}},
                ],
                "minimum_should_match": 3
            }}
        }),
        10,
    )
    .unwrap();
    let results_all = index.search(&expr_all).unwrap();
    assert_eq!(
        results_all.len(),
        1,
        "min=3 of 3 should only match doc with all 3 terms"
    );

    // min > num_should: matches nothing
    let expr_over = parse_search(
        json!({
            "query": {"bool": {
                "should": [
                    {"match": {"title": "apple"}},
                    {"match": {"title": "banana"}},
                ],
                "minimum_should_match": 5
            }}
        }),
        10,
    )
    .unwrap();
    let results_over = index.search(&expr_over).unwrap();
    assert_eq!(
        results_over.len(),
        0,
        "min > num_should should match nothing"
    );

    cleanup(&path);
}