luci/mapping/field_type.rs
1use std::fmt;
2
3use crate::core::LuciError;
4
5use crate::mapping::quantization::QuantizationType;
6
7/// The data type of a field in a Luci index.
8///
9/// M0 types cover text, keyword, numeric, boolean, and date. Vector,
10/// geospatial, and nested types are added in later milestones.
11///
12/// See [[architecture-api-surface#Schema Definition]].
13#[derive(Clone, Debug, PartialEq, Eq, Hash)]
14pub enum FieldType {
15 /// Full-text field, analyzed into tokens for search. Supports `match`,
16 /// `match_phrase`, and `query_string` queries.
17 Text,
18 /// Exact-value string field. Not analyzed. Supports `term`, `terms`,
19 /// `prefix`, `wildcard`, `regexp`, and `exists` queries. Stored as
20 /// dictionary-encoded columnar data for fast aggregations and sorting.
21 Keyword,
22 /// 32-bit signed integer.
23 Integer,
24 /// 64-bit signed integer.
25 Long,
26 /// 32-bit IEEE 754 float.
27 Float,
28 /// 64-bit IEEE 754 double.
29 Double,
30 /// Boolean value (`true` / `false`).
31 Boolean,
32 /// Date/time value. Stored internally as epoch milliseconds (i64).
33 /// Accepts ISO 8601 strings and epoch millis on input.
34 Date,
35 /// Dense vector for kNN search. Fixed dimensionality and quantization
36 /// scheme. Both must be set explicitly when constructing the variant
37 /// directly; [`Self::dense_vector`] builds one with [`QuantizationType::DEFAULT`].
38 DenseVector {
39 dims: usize,
40 quantization: QuantizationType,
41 },
42 /// Geographic point (latitude, longitude).
43 GeoPoint,
44 /// Nested object array — each object indexed as a hidden document.
45 Nested,
46 /// Geographic shape (Polygon, LineString, etc.) for spatial relation queries.
47 /// Indexed in a packed R-tree for efficient candidate selection.
48 ///
49 /// See [[feature-geo-shape]] and [[geospatial]].
50 GeoShape,
51 /// Token count: accepts a string, analyzes it, stores the number of tokens
52 /// as an integer. Supports range queries and numeric aggregations.
53 /// In ES this is typically a multi-field; in Luci it's a standalone field.
54 TokenCount,
55 /// IP address (IPv4 and IPv6). Stored as keyword for term queries and
56 /// as numeric for range queries. Supports CIDR notation in term queries.
57 Ip,
58}
59
60impl FieldType {
61 /// Parse a field type name from an ES-compatible mapping string.
62 ///
63 /// # Errors
64 ///
65 /// Returns `LuciError::InvalidQuery` for unrecognized type names.
66 pub fn from_es_name(name: &str) -> crate::core::Result<Self> {
67 match name {
68 "text" => Ok(Self::Text),
69 "keyword" => Ok(Self::Keyword),
70 "integer" => Ok(Self::Integer),
71 "long" => Ok(Self::Long),
72 "float" => Ok(Self::Float),
73 "double" => Ok(Self::Double),
74 "boolean" => Ok(Self::Boolean),
75 "date" => Ok(Self::Date),
76 // dims and quantization are set by the JSON parser from
77 // sibling fields after this returns; the placeholder here
78 // is overwritten before the value reaches user code.
79 "dense_vector" => Ok(Self::DenseVector {
80 dims: 0,
81 quantization: QuantizationType::DEFAULT,
82 }),
83 "geo_point" => Ok(Self::GeoPoint),
84 "nested" => Ok(Self::Nested),
85 "geo_shape" => Ok(Self::GeoShape),
86 "token_count" => Ok(Self::TokenCount),
87 "ip" => Ok(Self::Ip),
88 _ => Err(LuciError::InvalidQuery(format!(
89 "unsupported field type: {name}"
90 ))),
91 }
92 }
93
94 /// The ES-compatible type name used in JSON mappings.
95 pub fn es_name(&self) -> &'static str {
96 match self {
97 Self::Text => "text",
98 Self::Keyword => "keyword",
99 Self::Integer => "integer",
100 Self::Long => "long",
101 Self::Float => "float",
102 Self::Double => "double",
103 Self::Boolean => "boolean",
104 Self::Date => "date",
105 Self::DenseVector { .. } => "dense_vector",
106 Self::GeoPoint => "geo_point",
107 Self::Nested => "nested",
108 Self::GeoShape => "geo_shape",
109 Self::TokenCount => "token_count",
110 Self::Ip => "ip",
111 }
112 }
113
114 /// Whether this type is numeric (supports range queries, numeric sort,
115 /// and metric aggregations).
116 pub fn is_numeric(&self) -> bool {
117 matches!(
118 self,
119 Self::Integer | Self::Long | Self::Float | Self::Double | Self::TokenCount
120 )
121 }
122
123 /// Whether this type is a dense vector.
124 pub fn is_dense_vector(&self) -> bool {
125 matches!(self, Self::DenseVector { .. })
126 }
127
128 /// Get vector dimensions, or None if not a vector type.
129 pub fn vector_dims(&self) -> Option<usize> {
130 match self {
131 Self::DenseVector { dims, .. } => Some(*dims),
132 _ => None,
133 }
134 }
135
136 /// Get the configured quantization scheme, or None if not a vector type.
137 pub fn vector_quantization(&self) -> Option<QuantizationType> {
138 match self {
139 Self::DenseVector { quantization, .. } => Some(*quantization),
140 _ => None,
141 }
142 }
143
144 /// Construct a `DenseVector` field type with the given dimensionality
145 /// and the default quantization. Convenience for tests and code that
146 /// doesn't need to override the default.
147 pub fn dense_vector(dims: usize) -> Self {
148 Self::DenseVector {
149 dims,
150 quantization: QuantizationType::DEFAULT,
151 }
152 }
153}
154
155impl fmt::Display for FieldType {
156 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
157 f.write_str(self.es_name())
158 }
159}
160
161#[cfg(test)]
162mod tests {
163 use super::*;
164
165 #[test]
166 fn round_trip_es_names() {
167 let types = [
168 FieldType::Text,
169 FieldType::Keyword,
170 FieldType::Integer,
171 FieldType::Long,
172 FieldType::Float,
173 FieldType::Double,
174 FieldType::Boolean,
175 FieldType::Date,
176 ];
177 for ft in &types {
178 let name = ft.es_name();
179 let parsed = FieldType::from_es_name(name).unwrap();
180 assert_eq!(&parsed, ft);
181 }
182 }
183
184 #[test]
185 fn unknown_type_is_error() {
186 assert!(FieldType::from_es_name("percolator").is_err());
187 assert!(FieldType::from_es_name("").is_err());
188 }
189
190 #[test]
191 fn is_numeric() {
192 assert!(FieldType::Integer.is_numeric());
193 assert!(FieldType::Long.is_numeric());
194 assert!(FieldType::Float.is_numeric());
195 assert!(FieldType::Double.is_numeric());
196 assert!(!FieldType::Text.is_numeric());
197 assert!(!FieldType::Keyword.is_numeric());
198 assert!(!FieldType::Boolean.is_numeric());
199 assert!(!FieldType::Date.is_numeric());
200 }
201
202 #[test]
203 fn display() {
204 assert_eq!(format!("{}", FieldType::Text), "text");
205 assert_eq!(format!("{}", FieldType::Long), "long");
206 }
207}