lucisearch 0.8.0

Embeddable, in-process search engine — the SQLite/DuckDB of Elasticsearch
Documentation
use crate::mapping::field_type::FieldType;

/// A single field's mapping: its name, type, and indexing flags.
///
/// Flags control which index structures are built for this field:
///
/// - `stored`: include in the document store (retrievable via `_source`)
/// - `indexed`: add to the inverted index (searchable)
/// - `doc_values`: create columnar storage (sortable, aggregatable)
/// - `norms`: store field norms for [[best-matching-25|BM25]] scoring
///
/// See [[architecture-api-surface#Schema Definition]] and [[architecture-indexing-pipeline]].
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct FieldMapping {
    /// Field name as it appears in documents.
    pub name: String,
    /// Data type.
    pub field_type: FieldType,
    /// Whether to include this field's value in the document store.
    pub stored: bool,
    /// Whether to add this field to the inverted index for search.
    pub indexed: bool,
    /// Whether to create columnar (doc_values) storage for sorting and
    /// aggregations.
    pub doc_values: bool,
    /// Whether to store field length norms for BM25 scoring.
    /// Only meaningful for `Text` fields.
    pub norms: bool,
    /// Analyzer name for `Text` fields. `None` uses the default analyzer.
    pub analyzer: Option<String>,
    /// Search-time analyzer name. If set, queries against this field use this
    /// analyzer instead of `analyzer`. See [[feature-analysis-pipeline]].
    pub search_analyzer: Option<String>,
    /// If this is a multi-field sub-field, the name of the parent field.
    /// The parent's source value is routed to this sub-field during indexing.
    /// `None` for top-level fields. See [[feature-mapping-multi-fields]].
    pub parent_field: Option<String>,
    /// Copy this field's value to target fields at index time.
    /// See [[feature-mapping-copy-to]].
    pub copy_to: Vec<String>,
}

impl FieldMapping {
    /// Create a mapping with sensible defaults for the given field type.
    ///
    /// Default flags per type:
    ///
    /// | Type | stored | indexed | doc_values | norms |
    /// |------|--------|---------|------------|-------|
    /// | Text | true | true | false | true |
    /// | Keyword | true | true | true | false |
    /// | Numeric/Boolean/Date | true | true | true | false |
    pub fn new(name: impl Into<String>, field_type: FieldType) -> Self {
        let norms = field_type == FieldType::Text;
        let is_vector = field_type.is_dense_vector();
        let is_geo = matches!(field_type, FieldType::GeoPoint | FieldType::GeoShape);
        let doc_values = !matches!(field_type, FieldType::Text) && !is_vector && !is_geo;

        Self {
            name: name.into(),
            field_type,
            stored: !is_vector && !is_geo, // vectors and geo excluded from source by default
            indexed: true,
            doc_values,
            norms,
            analyzer: None,
            search_analyzer: None,
            parent_field: None,
            copy_to: Vec::new(),
        }
    }

    /// Set the `stored` flag.
    pub fn stored(mut self, stored: bool) -> Self {
        self.stored = stored;
        self
    }

    /// Set the `indexed` flag.
    pub fn indexed(mut self, indexed: bool) -> Self {
        self.indexed = indexed;
        self
    }

    /// Set the `doc_values` flag.
    pub fn doc_values(mut self, doc_values: bool) -> Self {
        self.doc_values = doc_values;
        self
    }

    /// Set the `norms` flag.
    pub fn norms(mut self, norms: bool) -> Self {
        self.norms = norms;
        self
    }

    /// Set the analyzer name (only meaningful for `Text` fields).
    pub fn analyzer(mut self, analyzer: impl Into<String>) -> Self {
        self.analyzer = Some(analyzer.into());
        self
    }

    /// Set the search-time analyzer name (only meaningful for `Text` fields).
    /// See [[feature-analysis-pipeline]].
    pub fn search_analyzer(mut self, analyzer: impl Into<String>) -> Self {
        self.search_analyzer = Some(analyzer.into());
        self
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn text_defaults() {
        let m = FieldMapping::new("title", FieldType::Text);
        assert!(m.stored);
        assert!(m.indexed);
        assert!(!m.doc_values);
        assert!(m.norms);
        assert!(m.analyzer.is_none());
    }

    #[test]
    fn keyword_defaults() {
        let m = FieldMapping::new("status", FieldType::Keyword);
        assert!(m.stored);
        assert!(m.indexed);
        assert!(m.doc_values);
        assert!(!m.norms);
    }

    #[test]
    fn numeric_defaults() {
        for ft in [
            FieldType::Integer,
            FieldType::Long,
            FieldType::Float,
            FieldType::Double,
        ] {
            let m = FieldMapping::new("val", ft);
            assert!(m.stored);
            assert!(m.indexed);
            assert!(m.doc_values);
            assert!(!m.norms);
        }
    }

    #[test]
    fn boolean_defaults() {
        let m = FieldMapping::new("active", FieldType::Boolean);
        assert!(m.doc_values);
        assert!(!m.norms);
    }

    #[test]
    fn date_defaults() {
        let m = FieldMapping::new("created", FieldType::Date);
        assert!(m.doc_values);
        assert!(!m.norms);
    }

    #[test]
    fn builder_chaining() {
        let m = FieldMapping::new("body", FieldType::Text)
            .stored(false)
            .norms(false)
            .analyzer("whitespace");
        assert!(!m.stored);
        assert!(!m.norms);
        assert_eq!(m.analyzer.as_deref(), Some("whitespace"));
    }
}