lucisearch 0.8.0

Embeddable, in-process search engine — the SQLite/DuckDB of Elasticsearch
Documentation
use crate::core::{LuciError, Result};

/// Quantization scheme for `dense_vector` fields.
///
/// Configurable via the `dense_vector` field mapping:
/// ```json
/// {"embedding": {"type": "dense_vector", "dims": 768, "quantization": "int8"}}
/// ```
///
/// Recognized but unimplemented variants ([`Self::Int4`], [`Self::Bbq`])
/// are rejected at mapping parse time with [`LuciError::InvalidQuery`].
/// The mapping API will not accept a value the engine cannot honor —
/// see [[code-must-not-lie]].
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum QuantizationType {
    /// No quantization — full float32 storage and scoring.
    None,
    /// Int8 scalar quantization (8 bits/dim). 4× memory reduction with
    /// minimal recall loss in typical embedding workloads.
    Int8,
    /// Int4 scalar quantization (4 bits/dim).
    ///
    /// Recognized name; not yet implemented. Constructing a mapping
    /// with this value returns [`LuciError::InvalidQuery`].
    Int4,
    /// Better Binary Quantization (1 bit/dim with a correction term and
    /// oversampling-rerank).
    ///
    /// Recognized name; not yet implemented. Constructing a mapping
    /// with this value returns [`LuciError::InvalidQuery`].
    Bbq,
}

impl QuantizationType {
    /// The default quantization for `dense_vector` fields when the user
    /// does not specify one.
    pub const DEFAULT: Self = Self::Int8;

    /// Parse a quantization name from the mapping JSON `quantization` field.
    ///
    /// Returns [`LuciError::InvalidQuery`] for both unknown names and
    /// recognized-but-unimplemented values (`int4`, `bbq`). The error
    /// message names the rejected value and the reason — the system
    /// will never silently substitute a different quantization for the
    /// one the user asked for.
    pub fn from_es_name(name: &str) -> Result<Self> {
        match name {
            "none" => Ok(Self::None),
            "int8" => Ok(Self::Int8),
            "int4" => Err(LuciError::InvalidQuery(
                "quantization \"int4\" is recognized but not yet implemented; \
                 supported values: \"none\", \"int8\""
                    .into(),
            )),
            "bbq" => Err(LuciError::InvalidQuery(
                "quantization \"bbq\" is recognized but not yet implemented; \
                 supported values: \"none\", \"int8\""
                    .into(),
            )),
            other => Err(LuciError::InvalidQuery(format!(
                "unknown quantization type: \"{other}\" \
                 (supported: \"none\", \"int8\")"
            ))),
        }
    }

    /// The ES-compatible name used in JSON mappings.
    pub fn es_name(self) -> &'static str {
        match self {
            Self::None => "none",
            Self::Int8 => "int8",
            Self::Int4 => "int4",
            Self::Bbq => "bbq",
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn supported_values_round_trip() {
        for q in [QuantizationType::None, QuantizationType::Int8] {
            let parsed = QuantizationType::from_es_name(q.es_name()).unwrap();
            assert_eq!(parsed, q);
        }
    }

    #[test]
    fn int4_is_rejected_as_unimplemented() {
        let err = QuantizationType::from_es_name("int4").unwrap_err();
        let msg = format!("{err}");
        assert!(msg.contains("int4"), "error must name the value: {msg}");
        assert!(
            msg.contains("not yet implemented"),
            "error must explain why: {msg}"
        );
    }

    #[test]
    fn bbq_is_rejected_as_unimplemented() {
        let err = QuantizationType::from_es_name("bbq").unwrap_err();
        let msg = format!("{err}");
        assert!(msg.contains("bbq"), "error must name the value: {msg}");
        assert!(
            msg.contains("not yet implemented"),
            "error must explain why: {msg}"
        );
    }

    #[test]
    fn unknown_value_is_rejected() {
        let err = QuantizationType::from_es_name("magic").unwrap_err();
        let msg = format!("{err}");
        assert!(msg.contains("magic"), "error must name the value: {msg}");
        assert!(
            msg.contains("supported"),
            "error must list supported values: {msg}"
        );
    }

    #[test]
    fn empty_string_is_rejected() {
        assert!(QuantizationType::from_es_name("").is_err());
    }

    #[test]
    fn default_is_int8() {
        assert_eq!(QuantizationType::DEFAULT, QuantizationType::Int8);
    }
}