lucisearch 0.8.0

Embeddable, in-process search engine — the SQLite/DuckDB of Elasticsearch
Documentation
//! MatchQuery: analyze query text, build bool of term queries.
//!
//! Analyzes the query string through the field's analyzer, then:
//! - 1 token → TermQuery
//! - N tokens → BoolQuery { should: [TermQuery * N] }
//!
//! See [[query-dsl#Full-Text Queries]] and [[architecture-query-execution#Step 8]].

use crate::core::{Result, ScoreMode};

use crate::query::boolean::BoolQuery;
use crate::query::prefix::PrefixQuery;
use crate::query::term::TermQuery;
use crate::query::{BoundQuery, Query};
use crate::search::searcher::Searcher;

/// Full-text match query: analyze and search.
pub struct MatchQuery {
    pub field: String,
    pub query_text: String,
    pub analyzer: Option<String>,
}

impl Query for MatchQuery {
    fn bind(&self, searcher: &Searcher, score_mode: ScoreMode) -> Result<Box<dyn BoundQuery>> {
        // Resolve search-time analyzer via field mapping resolution chain.
        // See [[feature-analysis-pipeline#Analyzer Resolution Order]].
        let analyzer_name = searcher.resolve_search_analyzer(&self.field, self.analyzer.as_deref());
        let analyzers = searcher.analyzers();
        let analyzer = analyzers.get(analyzer_name);

        // Analyze query text
        let tokens = analyzer.analyze(&self.query_text);

        if tokens.is_empty() {
            // No terms after analysis → match nothing
            return MatchNoneQuery.bind(searcher, score_mode);
        }

        if tokens.len() == 1 {
            // Single term → rewrite to TermQuery
            let term_query = TermQuery {
                field: self.field.clone(),
                value: tokens[0].text.clone(),
            };
            return term_query.bind(searcher, score_mode);
        }

        // Multiple terms → bool { should: [TermQuery * N] }
        let should: Vec<Box<dyn Query>> = tokens
            .iter()
            .map(|t| -> Box<dyn Query> {
                Box::new(TermQuery {
                    field: self.field.clone(),
                    value: t.text.clone(),
                })
            })
            .collect();

        let bool_query = BoolQuery {
            must: vec![],
            should,
            must_not: vec![],
            filter: vec![],
            minimum_should_match: None,
        };
        bool_query.bind(searcher, score_mode)
    }
}

/// match_bool_prefix: analyze text, all tokens except last become term
/// queries, last becomes prefix query, wrapped in bool/should.
///
/// Defers analysis to `bind()` so the searcher's analyzer registry
/// (including custom and field-configured analyzers) is used.
/// See [[investigation-20260405-06-match-bool-prefix-analyzer]].
pub struct MatchBoolPrefixQuery {
    pub field: String,
    pub query_text: String,
    pub analyzer: Option<String>,
}

impl Query for MatchBoolPrefixQuery {
    fn bind(&self, searcher: &Searcher, score_mode: ScoreMode) -> Result<Box<dyn BoundQuery>> {
        let analyzer_name = searcher.resolve_search_analyzer(&self.field, self.analyzer.as_deref());
        let analyzers = searcher.analyzers();
        let analyzer = analyzers.get(analyzer_name);
        let tokens = analyzer.analyze(&self.query_text);

        if tokens.is_empty() {
            return MatchNoneQuery.bind(searcher, score_mode);
        }

        let last_idx = tokens.len() - 1;
        let mut should: Vec<Box<dyn Query>> = Vec::with_capacity(tokens.len());
        for (i, token) in tokens.iter().enumerate() {
            if i == last_idx {
                should.push(Box::new(PrefixQuery {
                    field: self.field.clone(),
                    value: token.text.clone(),
                }));
            } else {
                should.push(Box::new(TermQuery {
                    field: self.field.clone(),
                    value: token.text.clone(),
                }));
            }
        }

        BoolQuery {
            must: vec![],
            should,
            must_not: vec![],
            filter: vec![],
            minimum_should_match: None,
        }
        .bind(searcher, score_mode)
    }
}

/// Match-nothing query.
struct MatchNoneQuery;

impl Query for MatchNoneQuery {
    fn bind(&self, _searcher: &Searcher, _score_mode: ScoreMode) -> Result<Box<dyn BoundQuery>> {
        Ok(Box::new(BoundMatchNoneQuery))
    }
}

struct BoundMatchNoneQuery;

impl BoundQuery for BoundMatchNoneQuery {
    fn scorer_supplier(
        &self,
        _reader: &crate::segment::reader::SegmentReader,
    ) -> Result<Option<Box<dyn crate::query::ScorerSupplier>>> {
        Ok(None)
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::analysis::AnalyzerRegistry;
    use crate::analysis::Token;
    use crate::core::{FieldId, NO_MORE_DOCS, SegmentId};
    use crate::mapping::{FieldType, Mapping};
    use crate::segment::builder::SegmentBuilder;

    fn make_tokens(terms: &[&str]) -> Vec<Token> {
        terms
            .iter()
            .enumerate()
            .map(|(i, t)| Token::new(*t, 0, t.len(), i as u32))
            .collect()
    }

    fn build_test_store() -> crate::search::segment_store::SegmentStore {
        let schema = Mapping::builder().field("body", FieldType::Text).build();
        let mut builder = SegmentBuilder::new(SegmentId::new(1), &schema);

        builder.add_document(
            &[(
                FieldId::new(0),
                make_tokens(&["the", "quick", "brown", "fox"]),
            )],
            b"{}",
        );
        builder.add_document(
            &[(FieldId::new(0), make_tokens(&["the", "lazy", "dog"]))],
            b"{}",
        );
        builder.add_document(
            &[(FieldId::new(0), make_tokens(&["quick", "search", "engine"]))],
            b"{}",
        );

        let reader = crate::segment::reader::SegmentReader::open(builder.build()).unwrap();
        crate::search::segment_store::SegmentStore::new(
            vec![reader],
            AnalyzerRegistry::new(),
            None,
            None,
        )
    }

    fn collect_doc_ids(scorer: &mut dyn crate::core::Scorer) -> Vec<u32> {
        let mut ids = Vec::new();
        while scorer.doc_id() != NO_MORE_DOCS {
            ids.push(scorer.doc_id().as_u32());
            scorer.next();
        }
        ids
    }

    #[test]
    fn match_single_term() {
        let store = build_test_store();
        let searcher = Searcher::new(&store);
        let query = MatchQuery {
            field: "body".into(),
            query_text: "Fox".into(), // standard analyzer lowercases
            analyzer: None,
        };

        let weight = query.bind(&searcher, ScoreMode::Complete).unwrap();
        let supplier = weight
            .scorer_supplier(&searcher.segments()[0])
            .unwrap()
            .unwrap();
        let mut scorer = supplier.scorer().unwrap();

        let ids = collect_doc_ids(scorer.as_mut());
        assert_eq!(ids, vec![0]); // "fox" only in doc 0
    }

    #[test]
    fn match_multi_term() {
        let store = build_test_store();
        let searcher = Searcher::new(&store);
        let query = MatchQuery {
            field: "body".into(),
            query_text: "quick lazy".into(),
            analyzer: None,
        };

        let weight = query.bind(&searcher, ScoreMode::Complete).unwrap();
        let supplier = weight
            .scorer_supplier(&searcher.segments()[0])
            .unwrap()
            .unwrap();
        let mut scorer = supplier.scorer().unwrap();

        // "quick" in docs 0,2 and "lazy" in doc 1 → should match all three
        let ids = collect_doc_ids(scorer.as_mut());
        assert_eq!(ids, vec![0, 1, 2]);
    }

    #[test]
    fn match_empty_query() {
        let store = build_test_store();
        let searcher = Searcher::new(&store);
        let query = MatchQuery {
            field: "body".into(),
            query_text: "".into(),
            analyzer: None,
        };

        let weight = query.bind(&searcher, ScoreMode::Complete).unwrap();
        let supplier = weight.scorer_supplier(&searcher.segments()[0]).unwrap();
        assert!(supplier.is_none()); // no terms → no matches
    }

    #[test]
    fn match_case_normalization() {
        let store = build_test_store();
        let searcher = Searcher::new(&store);
        let query = MatchQuery {
            field: "body".into(),
            query_text: "THE".into(),
            analyzer: None,
        };

        let weight = query.bind(&searcher, ScoreMode::Complete).unwrap();
        let supplier = weight
            .scorer_supplier(&searcher.segments()[0])
            .unwrap()
            .unwrap();
        let mut scorer = supplier.scorer().unwrap();

        // "the" in docs 0, 1
        let ids = collect_doc_ids(scorer.as_mut());
        assert_eq!(ids, vec![0, 1]);
    }
}