Skip to main content

luci/mapping/
field_type.rs

1use std::fmt;
2
3use crate::core::LuciError;
4
5use crate::mapping::quantization::QuantizationType;
6
7/// The data type of a field in a Luci index.
8///
9/// M0 types cover text, keyword, numeric, boolean, and date. Vector,
10/// geospatial, and nested types are added in later milestones.
11///
12/// See [[architecture-api-surface#Schema Definition]].
13#[derive(Clone, Debug, PartialEq, Eq, Hash)]
14pub enum FieldType {
15    /// Full-text field, analyzed into tokens for search. Supports `match`,
16    /// `match_phrase`, and `query_string` queries.
17    Text,
18    /// Exact-value string field. Not analyzed. Supports `term`, `terms`,
19    /// `prefix`, `wildcard`, `regexp`, and `exists` queries. Stored as
20    /// dictionary-encoded columnar data for fast aggregations and sorting.
21    Keyword,
22    /// 32-bit signed integer.
23    Integer,
24    /// 64-bit signed integer.
25    Long,
26    /// 32-bit IEEE 754 float.
27    Float,
28    /// 64-bit IEEE 754 double.
29    Double,
30    /// Boolean value (`true` / `false`).
31    Boolean,
32    /// Date/time value. Stored internally as epoch milliseconds (i64).
33    /// Accepts ISO 8601 strings and epoch millis on input.
34    Date,
35    /// Dense vector for kNN search. Fixed dimensionality and quantization
36    /// scheme. Both must be set explicitly when constructing the variant
37    /// directly; [`Self::dense_vector`] builds one with [`QuantizationType::DEFAULT`].
38    DenseVector {
39        dims: usize,
40        quantization: QuantizationType,
41    },
42    /// Geographic point (latitude, longitude).
43    GeoPoint,
44    /// Nested object array — each object indexed as a hidden document.
45    Nested,
46    /// Geographic shape (Polygon, LineString, etc.) for spatial relation queries.
47    /// Indexed in a packed R-tree for efficient candidate selection.
48    ///
49    /// See [[feature-geo-shape]] and [[geospatial]].
50    GeoShape,
51    /// Token count: accepts a string, analyzes it, stores the number of tokens
52    /// as an integer. Supports range queries and numeric aggregations.
53    /// In ES this is typically a multi-field; in Luci it's a standalone field.
54    TokenCount,
55    /// IP address (IPv4 and IPv6). Stored as keyword for term queries and
56    /// as numeric for range queries. Supports CIDR notation in term queries.
57    Ip,
58}
59
60impl FieldType {
61    /// Parse a field type name from an ES-compatible mapping string.
62    ///
63    /// # Errors
64    ///
65    /// Returns `LuciError::InvalidQuery` for unrecognized type names.
66    pub fn from_es_name(name: &str) -> crate::core::Result<Self> {
67        match name {
68            "text" => Ok(Self::Text),
69            "keyword" => Ok(Self::Keyword),
70            "integer" => Ok(Self::Integer),
71            "long" => Ok(Self::Long),
72            "float" => Ok(Self::Float),
73            "double" => Ok(Self::Double),
74            "boolean" => Ok(Self::Boolean),
75            "date" => Ok(Self::Date),
76            // dims and quantization are set by the JSON parser from
77            // sibling fields after this returns; the placeholder here
78            // is overwritten before the value reaches user code.
79            "dense_vector" => Ok(Self::DenseVector {
80                dims: 0,
81                quantization: QuantizationType::DEFAULT,
82            }),
83            "geo_point" => Ok(Self::GeoPoint),
84            "nested" => Ok(Self::Nested),
85            "geo_shape" => Ok(Self::GeoShape),
86            "token_count" => Ok(Self::TokenCount),
87            "ip" => Ok(Self::Ip),
88            _ => Err(LuciError::InvalidQuery(format!(
89                "unsupported field type: {name}"
90            ))),
91        }
92    }
93
94    /// The ES-compatible type name used in JSON mappings.
95    pub fn es_name(&self) -> &'static str {
96        match self {
97            Self::Text => "text",
98            Self::Keyword => "keyword",
99            Self::Integer => "integer",
100            Self::Long => "long",
101            Self::Float => "float",
102            Self::Double => "double",
103            Self::Boolean => "boolean",
104            Self::Date => "date",
105            Self::DenseVector { .. } => "dense_vector",
106            Self::GeoPoint => "geo_point",
107            Self::Nested => "nested",
108            Self::GeoShape => "geo_shape",
109            Self::TokenCount => "token_count",
110            Self::Ip => "ip",
111        }
112    }
113
114    /// Whether this type is numeric (supports range queries, numeric sort,
115    /// and metric aggregations).
116    pub fn is_numeric(&self) -> bool {
117        matches!(
118            self,
119            Self::Integer | Self::Long | Self::Float | Self::Double | Self::TokenCount
120        )
121    }
122
123    /// Whether this type is a dense vector.
124    pub fn is_dense_vector(&self) -> bool {
125        matches!(self, Self::DenseVector { .. })
126    }
127
128    /// Get vector dimensions, or None if not a vector type.
129    pub fn vector_dims(&self) -> Option<usize> {
130        match self {
131            Self::DenseVector { dims, .. } => Some(*dims),
132            _ => None,
133        }
134    }
135
136    /// Get the configured quantization scheme, or None if not a vector type.
137    pub fn vector_quantization(&self) -> Option<QuantizationType> {
138        match self {
139            Self::DenseVector { quantization, .. } => Some(*quantization),
140            _ => None,
141        }
142    }
143
144    /// Construct a `DenseVector` field type with the given dimensionality
145    /// and the default quantization. Convenience for tests and code that
146    /// doesn't need to override the default.
147    pub fn dense_vector(dims: usize) -> Self {
148        Self::DenseVector {
149            dims,
150            quantization: QuantizationType::DEFAULT,
151        }
152    }
153}
154
155impl fmt::Display for FieldType {
156    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
157        f.write_str(self.es_name())
158    }
159}
160
161#[cfg(test)]
162mod tests {
163    use super::*;
164
165    #[test]
166    fn round_trip_es_names() {
167        let types = [
168            FieldType::Text,
169            FieldType::Keyword,
170            FieldType::Integer,
171            FieldType::Long,
172            FieldType::Float,
173            FieldType::Double,
174            FieldType::Boolean,
175            FieldType::Date,
176        ];
177        for ft in &types {
178            let name = ft.es_name();
179            let parsed = FieldType::from_es_name(name).unwrap();
180            assert_eq!(&parsed, ft);
181        }
182    }
183
184    #[test]
185    fn unknown_type_is_error() {
186        assert!(FieldType::from_es_name("percolator").is_err());
187        assert!(FieldType::from_es_name("").is_err());
188    }
189
190    #[test]
191    fn is_numeric() {
192        assert!(FieldType::Integer.is_numeric());
193        assert!(FieldType::Long.is_numeric());
194        assert!(FieldType::Float.is_numeric());
195        assert!(FieldType::Double.is_numeric());
196        assert!(!FieldType::Text.is_numeric());
197        assert!(!FieldType::Keyword.is_numeric());
198        assert!(!FieldType::Boolean.is_numeric());
199        assert!(!FieldType::Date.is_numeric());
200    }
201
202    #[test]
203    fn display() {
204        assert_eq!(format!("{}", FieldType::Text), "text");
205        assert_eq!(format!("{}", FieldType::Long), "long");
206    }
207}