1use std::collections::HashMap;
2
3use crate::core::{FieldId, LuciError, Result};
4use serde_json::Value;
5
6use crate::mapping::field_type::FieldType;
7use crate::mapping::mapping::FieldMapping;
8use crate::mapping::quantization::QuantizationType;
9
10#[derive(Clone, Copy, Debug, PartialEq, Eq)]
18pub enum DynamicMode {
19 True,
21 False,
23}
24
25impl DynamicMode {
26 pub fn from_es_value(s: &str) -> Result<Self> {
28 match s {
29 "true" => Ok(Self::True),
30 "false" => Ok(Self::False),
31 _ => Err(LuciError::InvalidQuery(format!(
32 "invalid dynamic mode: {s} (expected \"true\" or \"false\")"
33 ))),
34 }
35 }
36
37 pub fn es_value(self) -> &'static str {
39 match self {
40 Self::True => "true",
41 Self::False => "false",
42 }
43 }
44}
45
46#[derive(Clone, Debug)]
55pub struct Mapping {
56 fields: Vec<FieldMapping>,
57 name_to_id: HashMap<String, FieldId>,
58 dynamic: DynamicMode,
59}
60
61impl Mapping {
62 pub fn builder() -> MappingBuilder {
64 MappingBuilder {
65 fields: Vec::new(),
66 dynamic: DynamicMode::True,
67 }
68 }
69
70 pub fn ensure_id_field(&mut self) {
73 if self.name_to_id.contains_key("_id") {
74 return;
75 }
76 let mut id_mapping = FieldMapping::new("_id", FieldType::Keyword);
77 id_mapping.stored = false;
78 let id = FieldId::new(self.fields.len() as u16);
79 self.name_to_id.insert("_id".to_string(), id);
80 self.fields.push(id_mapping);
81 }
82
83 pub fn field_id(&self, name: &str) -> Option<FieldId> {
85 self.name_to_id.get(name).copied()
86 }
87
88 pub fn field(&self, id: FieldId) -> &FieldMapping {
94 &self.fields[id.as_u16() as usize]
95 }
96
97 pub fn fields(&self) -> &[FieldMapping] {
99 &self.fields
100 }
101
102 pub fn len(&self) -> usize {
104 self.fields.len()
105 }
106
107 pub fn is_empty(&self) -> bool {
109 self.fields.is_empty()
110 }
111
112 pub fn dynamic_mode(&self) -> DynamicMode {
114 self.dynamic
115 }
116
117 pub fn validate(&self) -> Result<()> {
129 for field in &self.fields {
130 if matches!(field.field_type.vector_dims(), Some(0)) {
137 return Err(LuciError::InvalidQuery(format!(
138 "dense_vector field \"{}\" must have dims >= 1",
139 field.name,
140 )));
141 }
142 if field.analyzer.is_some()
148 && !matches!(field.field_type, FieldType::Text | FieldType::TokenCount)
149 {
150 return Err(LuciError::InvalidQuery(format!(
151 "field \"{}\": \"analyzer\" is not supported for field type \"{}\"",
152 field.name,
153 field.field_type.es_name()
154 )));
155 }
156 if field.search_analyzer.is_some() && !matches!(field.field_type, FieldType::Text) {
157 return Err(LuciError::InvalidQuery(format!(
158 "field \"{}\": \"search_analyzer\" is not supported for field type \"{}\"",
159 field.name,
160 field.field_type.es_name()
161 )));
162 }
163 for target in &field.copy_to {
164 if !self.name_to_id.contains_key(target) {
165 return Err(LuciError::InvalidQuery(format!(
166 "field \"{src}\" has copy_to target \"{target}\" \
167 that is not defined in the schema",
168 src = field.name,
169 )));
170 }
171 }
172 }
173 Ok(())
174 }
175
176 pub fn to_json(&self) -> Value {
180 let mut properties = serde_json::Map::new();
181
182 for mapping in &self.fields {
183 if mapping.parent_field.is_some() {
185 continue;
186 }
187
188 let mut field_obj = serde_json::Map::new();
189 field_obj.insert(
190 "type".into(),
191 Value::String(mapping.field_type.es_name().into()),
192 );
193
194 if let FieldType::DenseVector { dims, quantization } = mapping.field_type {
199 field_obj.insert("dims".into(), Value::Number(dims.into()));
200 if quantization != QuantizationType::DEFAULT {
201 field_obj.insert(
202 "quantization".into(),
203 Value::String(quantization.es_name().into()),
204 );
205 }
206 }
207
208 if let Some(ref analyzer) = mapping.analyzer {
209 field_obj.insert("analyzer".into(), Value::String(analyzer.clone()));
210 }
211 if let Some(ref search_analyzer) = mapping.search_analyzer {
212 field_obj.insert(
213 "search_analyzer".into(),
214 Value::String(search_analyzer.clone()),
215 );
216 }
217
218 let defaults = FieldMapping::new("", mapping.field_type.clone());
220 if mapping.stored != defaults.stored {
221 field_obj.insert("store".into(), Value::Bool(mapping.stored));
222 }
223 if mapping.indexed != defaults.indexed {
224 field_obj.insert("index".into(), Value::Bool(mapping.indexed));
225 }
226 if mapping.doc_values != defaults.doc_values {
227 field_obj.insert("doc_values".into(), Value::Bool(mapping.doc_values));
228 }
229 if mapping.norms != defaults.norms {
230 field_obj.insert("norms".into(), Value::Bool(mapping.norms));
231 }
232 if !mapping.copy_to.is_empty() {
233 if mapping.copy_to.len() == 1 {
234 field_obj.insert("copy_to".into(), Value::String(mapping.copy_to[0].clone()));
235 } else {
236 field_obj.insert(
237 "copy_to".into(),
238 Value::Array(
239 mapping
240 .copy_to
241 .iter()
242 .map(|s| Value::String(s.clone()))
243 .collect(),
244 ),
245 );
246 }
247 }
248
249 let prefix = format!("{}.", mapping.name);
251 let sub_fields: Vec<&FieldMapping> = self
252 .fields
253 .iter()
254 .filter(|f| f.parent_field.as_deref() == Some(&mapping.name))
255 .collect();
256 if !sub_fields.is_empty() {
257 let mut fields_obj = serde_json::Map::new();
258 for sub in sub_fields {
259 let sub_name = sub.name.strip_prefix(&prefix).unwrap_or(&sub.name);
260 let mut sub_obj = serde_json::Map::new();
261 sub_obj.insert(
262 "type".into(),
263 Value::String(sub.field_type.es_name().into()),
264 );
265 if let Some(ref a) = sub.analyzer {
266 sub_obj.insert("analyzer".into(), Value::String(a.clone()));
267 }
268 if let Some(ref sa) = sub.search_analyzer {
269 sub_obj.insert("search_analyzer".into(), Value::String(sa.clone()));
270 }
271 fields_obj.insert(sub_name.to_string(), Value::Object(sub_obj));
272 }
273 field_obj.insert("fields".into(), Value::Object(fields_obj));
274 }
275
276 properties.insert(mapping.name.clone(), Value::Object(field_obj));
277 }
278
279 let mut mappings = serde_json::Map::new();
280 if self.dynamic != DynamicMode::True {
281 mappings.insert(
282 "dynamic".into(),
283 Value::String(self.dynamic.es_value().into()),
284 );
285 }
286 mappings.insert("properties".into(), Value::Object(properties));
287
288 let mut root = serde_json::Map::new();
289 root.insert("mappings".into(), Value::Object(mappings));
290 Value::Object(root)
291 }
292
293 pub fn from_json(json: &Value) -> Result<Self> {
303 let mappings_obj = if let Some(m) = json.get("mappings") {
305 m
306 } else {
307 json
308 };
309
310 let mut builder = MappingBuilder {
311 fields: Vec::new(),
312 dynamic: DynamicMode::True,
313 };
314
315 if let Some(dyn_val) = mappings_obj.get("dynamic") {
317 let mode_str = match dyn_val {
318 Value::String(s) => s.as_str(),
319 Value::Bool(true) => "true",
320 Value::Bool(false) => "false",
321 _ => {
322 return Err(LuciError::InvalidQuery(
323 "\"dynamic\" must be a string or boolean".into(),
324 ));
325 }
326 };
327 builder.dynamic = DynamicMode::from_es_value(mode_str)?;
328 }
329
330 let properties = mappings_obj
332 .get("properties")
333 .and_then(|p| p.as_object())
334 .ok_or_else(|| {
335 LuciError::InvalidQuery("missing or invalid \"properties\" object".into())
336 })?;
337
338 for (name, field_def) in properties {
339 let field_obj = field_def.as_object().ok_or_else(|| {
340 LuciError::InvalidQuery(format!(
341 "field \"{name}\": expected object, got {field_def}"
342 ))
343 })?;
344
345 let type_name = field_obj
346 .get("type")
347 .and_then(|t| t.as_str())
348 .ok_or_else(|| {
349 LuciError::InvalidQuery(format!("field \"{name}\": missing \"type\" property"))
350 })?;
351
352 let mut field_type = FieldType::from_es_name(type_name)?;
353 if let FieldType::DenseVector {
356 ref mut dims,
357 ref mut quantization,
358 } = field_type
359 {
360 parse_dense_vector_config(name, field_obj, dims, quantization)?;
361 }
362 let is_nested = matches!(field_type, FieldType::Nested);
363 let is_dense = field_type.is_dense_vector();
364 let mut mapping = FieldMapping::new(name.clone(), field_type);
365
366 if !is_dense {
369 parse_field_options(name, field_obj, &mut mapping, FieldRole::Field)?;
370 }
371
372 builder.fields.push(mapping);
373
374 if is_nested {
378 if let Some(nested_props) = field_obj.get("properties").and_then(|v| v.as_object())
379 {
380 parse_nested_properties(&mut builder, name, nested_props)?;
381 }
382 }
383
384 if let Some(sub_fields) = field_obj.get("fields").and_then(|v| v.as_object()) {
388 for (sub_name, sub_def) in sub_fields {
389 let sub_label = format!("{name}.{sub_name}");
390 let sub_obj = sub_def.as_object().ok_or_else(|| {
391 LuciError::InvalidQuery(format!("field \"{sub_label}\": expected object"))
392 })?;
393 let sub_type_name =
394 sub_obj
395 .get("type")
396 .and_then(|t| t.as_str())
397 .ok_or_else(|| {
398 LuciError::InvalidQuery(format!(
399 "field \"{sub_label}\": missing \"type\""
400 ))
401 })?;
402 let sub_type = FieldType::from_es_name(sub_type_name)?;
403 let mut sub_mapping = FieldMapping::new(sub_label.clone(), sub_type);
404 sub_mapping.stored = false; sub_mapping.parent_field = Some(name.clone());
406 parse_field_options(
407 &sub_label,
408 sub_obj,
409 &mut sub_mapping,
410 FieldRole::SubField,
411 )?;
412 builder.fields.push(sub_mapping);
413 }
414 }
415 }
416
417 let mapping = builder.build();
418 mapping.validate()?;
419 Ok(mapping)
420 }
421}
422
423#[derive(Clone, Copy, PartialEq, Eq)]
435enum FieldRole {
436 Field,
438 SubField,
441}
442
443const LUCI_OPTIONS: &[&str] = &[
448 "index",
449 "store",
450 "doc_values",
451 "norms",
452 "analyzer",
453 "search_analyzer",
454 "copy_to",
455];
456
457const UNIMPLEMENTED_ES_PARAMS: &[&str] = &[
463 "ignore_above",
464 "null_value",
465 "coerce",
466 "enabled",
467 "ignore_malformed",
468 "format",
469 "locale",
470 "normalizer",
471 "similarity",
472 "term_vector",
473 "index_options",
474 "index_prefixes",
475 "index_phrases",
476 "position_increment_gap",
477 "fielddata",
478 "fielddata_frequency_filter",
479 "ignore_z_value",
480 "orientation",
481 "eager_global_ordinals",
482 "meta",
483 "scaling_factor",
484 "split_queries_on_whitespace",
485 "search_quote_analyzer",
486 "time_series_dimension",
487 "time_series_metric",
488 "boost",
489];
490
491fn supported_options(field_type: &FieldType) -> &'static [&'static str] {
495 match field_type {
496 FieldType::Text => &[
497 "index",
498 "store",
499 "analyzer",
500 "search_analyzer",
501 "norms",
502 "copy_to",
503 ],
504 FieldType::TokenCount => &["index", "store", "doc_values", "analyzer", "copy_to"],
505 FieldType::Keyword
506 | FieldType::Ip
507 | FieldType::Integer
508 | FieldType::Long
509 | FieldType::Float
510 | FieldType::Double
511 | FieldType::Boolean
512 | FieldType::Date
513 | FieldType::GeoPoint
514 | FieldType::GeoShape => &["index", "store", "doc_values", "copy_to"],
515 FieldType::DenseVector { .. } | FieldType::Nested => &[],
518 }
519}
520
521fn validate_field_options(
526 field_label: &str,
527 field_type: &FieldType,
528 field_obj: &serde_json::Map<String, Value>,
529 role: FieldRole,
530) -> Result<()> {
531 for key in field_obj.keys() {
532 let k = key.as_str();
533 if matches!(k, "type" | "fields" | "properties") {
534 continue; }
536 if supported_options(field_type).contains(&k) {
537 if role == FieldRole::SubField && matches!(k, "store" | "copy_to") {
538 return Err(LuciError::InvalidQuery(format!(
539 "field \"{field_label}\": option \"{k}\" is not supported on a \
540 multi-field sub-field"
541 )));
542 }
543 continue;
544 }
545 if LUCI_OPTIONS.contains(&k) {
546 return Err(LuciError::InvalidQuery(format!(
547 "field \"{field_label}\": option \"{k}\" is not supported for field type \"{ft}\"",
548 ft = field_type.es_name()
549 )));
550 }
551 if UNIMPLEMENTED_ES_PARAMS.contains(&k) {
552 return Err(LuciError::InvalidQuery(format!(
553 "field \"{field_label}\": option \"{k}\" is recognized but not yet supported"
554 )));
555 }
556 return Err(LuciError::InvalidQuery(format!(
557 "field \"{field_label}\": unknown option \"{k}\""
558 )));
559 }
560 Ok(())
561}
562
563fn opt_bool(
567 obj: &serde_json::Map<String, Value>,
568 key: &str,
569 field_label: &str,
570) -> Result<Option<bool>> {
571 match obj.get(key) {
572 Some(v) if !v.is_null() => v.as_bool().map(Some).ok_or_else(|| {
573 LuciError::InvalidQuery(format!(
574 "field \"{field_label}\": \"{key}\" must be a boolean, got {v}"
575 ))
576 }),
577 _ => Ok(None),
578 }
579}
580
581fn opt_str<'a>(
584 obj: &'a serde_json::Map<String, Value>,
585 key: &str,
586 field_label: &str,
587) -> Result<Option<&'a str>> {
588 match obj.get(key) {
589 Some(v) if !v.is_null() => v.as_str().map(Some).ok_or_else(|| {
590 LuciError::InvalidQuery(format!(
591 "field \"{field_label}\": \"{key}\" must be a string, got {v}"
592 ))
593 }),
594 _ => Ok(None),
595 }
596}
597
598fn parse_copy_to(
603 obj: &serde_json::Map<String, Value>,
604 field_label: &str,
605 mapping: &mut FieldMapping,
606) -> Result<()> {
607 match obj.get("copy_to") {
608 None | Some(Value::Null) => {}
609 Some(Value::String(s)) => mapping.copy_to = vec![s.clone()],
610 Some(Value::Array(arr)) => {
611 mapping.copy_to = arr
612 .iter()
613 .map(|v| {
614 v.as_str().map(String::from).ok_or_else(|| {
615 LuciError::InvalidQuery(format!(
616 "field \"{field_label}\": copy_to entries must be strings, got {v}"
617 ))
618 })
619 })
620 .collect::<Result<Vec<_>>>()?;
621 }
622 Some(other) => {
623 return Err(LuciError::InvalidQuery(format!(
624 "field \"{field_label}\": copy_to must be a string or array of strings, got {other}"
625 )));
626 }
627 }
628 Ok(())
629}
630
631fn parse_field_options(
637 field_label: &str,
638 field_obj: &serde_json::Map<String, Value>,
639 mapping: &mut FieldMapping,
640 role: FieldRole,
641) -> Result<()> {
642 validate_field_options(field_label, &mapping.field_type, field_obj, role)?;
643 if let Some(v) = opt_bool(field_obj, "index", field_label)? {
644 mapping.indexed = v;
645 }
646 if let Some(v) = opt_bool(field_obj, "doc_values", field_label)? {
647 mapping.doc_values = v;
648 }
649 if let Some(v) = opt_bool(field_obj, "norms", field_label)? {
650 mapping.norms = v;
651 }
652 if let Some(v) = opt_str(field_obj, "analyzer", field_label)? {
653 mapping.analyzer = Some(v.to_string());
654 }
655 if let Some(v) = opt_str(field_obj, "search_analyzer", field_label)? {
656 mapping.search_analyzer = Some(v.to_string());
657 }
658 if role == FieldRole::Field {
661 if let Some(v) = opt_bool(field_obj, "store", field_label)? {
662 mapping.stored = v;
663 }
664 parse_copy_to(field_obj, field_label, mapping)?;
665 }
666 Ok(())
667}
668
669fn parse_dense_vector_config(
683 field_label: &str,
684 field_obj: &serde_json::Map<String, Value>,
685 dims: &mut usize,
686 quantization: &mut QuantizationType,
687) -> Result<()> {
688 for key in field_obj.keys() {
691 match key.as_str() {
692 "type" | "dims" | "quantization" => {}
693 "similarity" | "index" | "index_options" | "element_type" => {
697 return Err(LuciError::InvalidQuery(format!(
698 "field \"{field_label}\": dense_vector option \"{key}\" is recognized but not yet implemented"
699 )));
700 }
701 other => {
702 return Err(LuciError::InvalidQuery(format!(
703 "field \"{field_label}\": unknown dense_vector option \"{other}\""
704 )));
705 }
706 }
707 }
708
709 let dims_val = field_obj.get("dims").ok_or_else(|| {
712 LuciError::InvalidQuery(format!(
713 "field \"{field_label}\": dense_vector requires \"dims\""
714 ))
715 })?;
716 let d = dims_val.as_u64().ok_or_else(|| {
717 LuciError::InvalidQuery(format!(
718 "field \"{field_label}\": \"dims\" must be a positive integer, got {dims_val}"
719 ))
720 })?;
721 if d == 0 {
722 return Err(LuciError::InvalidQuery(format!(
723 "field \"{field_label}\": \"dims\" must be >= 1"
724 )));
725 }
726 *dims = d as usize;
727
728 if let Some(q_val) = field_obj.get("quantization") {
731 let q_str = q_val.as_str().ok_or_else(|| {
732 LuciError::InvalidQuery(format!(
733 "field \"{field_label}\": \"quantization\" must be a string, got {q_val}"
734 ))
735 })?;
736 *quantization = QuantizationType::from_es_name(q_str)?;
737 }
738
739 Ok(())
740}
741
742fn parse_nested_properties(
748 builder: &mut MappingBuilder,
749 prefix: &str,
750 properties: &serde_json::Map<String, Value>,
751) -> Result<()> {
752 for (child_name, child_def) in properties {
753 let child_obj = child_def.as_object().ok_or_else(|| {
754 LuciError::InvalidQuery(format!("field \"{prefix}.{child_name}\": expected object"))
755 })?;
756 let type_name = child_obj
757 .get("type")
758 .and_then(|t| t.as_str())
759 .ok_or_else(|| {
760 LuciError::InvalidQuery(format!(
761 "field \"{prefix}.{child_name}\": missing \"type\""
762 ))
763 })?;
764 let mut field_type = FieldType::from_es_name(type_name)?;
765 let full_name = format!("{prefix}.{child_name}");
766 if let FieldType::DenseVector {
769 ref mut dims,
770 ref mut quantization,
771 } = field_type
772 {
773 parse_dense_vector_config(&full_name, child_obj, dims, quantization)?;
774 }
775 let is_nested = matches!(field_type, FieldType::Nested);
776 let is_dense = field_type.is_dense_vector();
777 let mut mapping = FieldMapping::new(full_name.clone(), field_type);
778 if !is_dense {
781 parse_field_options(&full_name, child_obj, &mut mapping, FieldRole::Field)?;
782 }
783 builder.fields.push(mapping);
784
785 if is_nested {
786 if let Some(sub_props) = child_obj.get("properties").and_then(|v| v.as_object()) {
787 parse_nested_properties(builder, &full_name, sub_props)?;
788 }
789 }
790 }
791 Ok(())
792}
793
794pub struct MappingBuilder {
804 fields: Vec<FieldMapping>,
805 dynamic: DynamicMode,
806}
807
808impl MappingBuilder {
809 pub fn field(mut self, name: impl Into<String>, field_type: FieldType) -> Self {
811 self.fields.push(FieldMapping::new(name, field_type));
812 self
813 }
814
815 pub fn field_with_mapping(mut self, mapping: FieldMapping) -> Self {
817 self.fields.push(mapping);
818 self
819 }
820
821 pub fn dynamic(mut self, mode: DynamicMode) -> Self {
823 self.dynamic = mode;
824 self
825 }
826
827 pub fn build(self) -> Mapping {
829 let mut name_to_id = HashMap::with_capacity(self.fields.len());
830 for (i, mapping) in self.fields.iter().enumerate() {
831 name_to_id.insert(mapping.name.clone(), FieldId::new(i as u16));
832 }
833
834 Mapping {
835 fields: self.fields,
836 name_to_id,
837 dynamic: self.dynamic,
838 }
839 }
840}
841
842#[cfg(test)]
843mod tests {
844 use super::*;
845
846 #[test]
847 fn builder_basic() {
848 let mapping = Mapping::builder()
849 .field("title", FieldType::Text)
850 .field("status", FieldType::Keyword)
851 .field("price", FieldType::Float)
852 .build();
853
854 assert_eq!(mapping.len(), 3);
855 assert_eq!(mapping.field_id("title"), Some(FieldId::new(0)));
856 assert_eq!(mapping.field_id("status"), Some(FieldId::new(1)));
857 assert_eq!(mapping.field_id("price"), Some(FieldId::new(2)));
858 assert_eq!(mapping.field_id("nonexistent"), None);
859
860 assert_eq!(mapping.field(FieldId::new(0)).field_type, FieldType::Text);
861 assert_eq!(
862 mapping.field(FieldId::new(1)).field_type,
863 FieldType::Keyword
864 );
865 assert_eq!(mapping.field(FieldId::new(2)).field_type, FieldType::Float);
866 }
867
868 #[test]
869 fn builder_with_mapping() {
870 let mapping = Mapping::builder()
871 .field_with_mapping(
872 FieldMapping::new("body", FieldType::Text)
873 .analyzer("whitespace")
874 .norms(false),
875 )
876 .build();
877
878 let m = mapping.field(FieldId::new(0));
879 assert_eq!(m.analyzer.as_deref(), Some("whitespace"));
880 assert!(!m.norms);
881 }
882
883 #[test]
884 fn dynamic_mode_default_is_true() {
885 let mapping = Mapping::builder().build();
886 assert_eq!(mapping.dynamic_mode(), DynamicMode::True);
887 }
888
889 #[test]
890 fn dynamic_mode_false() {
891 let mapping = Mapping::builder().dynamic(DynamicMode::False).build();
892 assert_eq!(mapping.dynamic_mode(), DynamicMode::False);
893 }
894
895 #[test]
896 fn json_round_trip() {
897 let mapping = Mapping::builder()
898 .field("title", FieldType::Text)
899 .field("status", FieldType::Keyword)
900 .field("price", FieldType::Float)
901 .field("count", FieldType::Long)
902 .field("active", FieldType::Boolean)
903 .field("created", FieldType::Date)
904 .dynamic(DynamicMode::False)
905 .build();
906
907 let json = mapping.to_json();
908 let parsed = Mapping::from_json(&json).unwrap();
909
910 assert_eq!(parsed.len(), mapping.len());
911 assert_eq!(parsed.dynamic_mode(), DynamicMode::False);
912
913 for mapping in mapping.fields() {
914 let id = parsed.field_id(&mapping.name).unwrap();
915 let parsed_mapping = parsed.field(id);
916 assert_eq!(parsed_mapping.field_type, mapping.field_type);
917 assert_eq!(parsed_mapping.stored, mapping.stored);
918 assert_eq!(parsed_mapping.indexed, mapping.indexed);
919 assert_eq!(parsed_mapping.doc_values, mapping.doc_values);
920 assert_eq!(parsed_mapping.norms, mapping.norms);
921 }
922 }
923
924 #[test]
925 fn json_round_trip_with_analyzer() {
926 let mapping = Mapping::builder()
927 .field_with_mapping(FieldMapping::new("body", FieldType::Text).analyzer("standard"))
928 .build();
929
930 let json = mapping.to_json();
931 let parsed = Mapping::from_json(&json).unwrap();
932
933 assert_eq!(
934 parsed.field(FieldId::new(0)).analyzer.as_deref(),
935 Some("standard")
936 );
937 }
938
939 #[test]
940 fn json_round_trip_with_custom_flags() {
941 let mapping = Mapping::builder()
942 .field_with_mapping(
943 FieldMapping::new("body", FieldType::Text)
944 .stored(false)
945 .norms(false),
946 )
947 .build();
948
949 let json = mapping.to_json();
950 let parsed = Mapping::from_json(&json).unwrap();
951
952 let m = parsed.field(FieldId::new(0));
953 assert!(!m.stored);
954 assert!(!m.norms);
955 }
956
957 #[test]
958 fn parse_es_mapping_json() {
959 let json: Value = serde_json::from_str(
960 r#"{
961 "mappings": {
962 "dynamic": "false",
963 "properties": {
964 "title": {"type": "text", "analyzer": "standard"},
965 "status": {"type": "keyword"},
966 "price": {"type": "float"}
967 }
968 }
969 }"#,
970 )
971 .unwrap();
972
973 let mapping = Mapping::from_json(&json).unwrap();
974 assert_eq!(mapping.len(), 3);
975 assert_eq!(mapping.dynamic_mode(), DynamicMode::False);
976 assert!(mapping.field_id("title").is_some());
977 assert!(mapping.field_id("status").is_some());
978 assert!(mapping.field_id("price").is_some());
979 }
980
981 #[test]
982 fn parse_shorthand_json() {
983 let json: Value = serde_json::from_str(
984 r#"{
985 "properties": {
986 "name": {"type": "keyword"}
987 }
988 }"#,
989 )
990 .unwrap();
991
992 let mapping = Mapping::from_json(&json).unwrap();
993 assert_eq!(mapping.len(), 1);
994 }
995
996 #[test]
997 fn parse_dynamic_as_boolean() {
998 let json: Value = serde_json::from_str(
999 r#"{
1000 "properties": {"x": {"type": "keyword"}},
1001 "dynamic": false
1002 }"#,
1003 )
1004 .unwrap();
1005
1006 let mapping = Mapping::from_json(&json).unwrap();
1007 assert_eq!(mapping.dynamic_mode(), DynamicMode::False);
1008 }
1009
1010 #[test]
1011 fn parse_missing_type_is_error() {
1012 let json: Value = serde_json::from_str(r#"{"properties": {"x": {}}}"#).unwrap();
1013 assert!(Mapping::from_json(&json).is_err());
1014 }
1015
1016 #[test]
1017 fn parse_unknown_type_is_error() {
1018 let json: Value =
1019 serde_json::from_str(r#"{"properties": {"x": {"type": "percolator"}}}"#).unwrap();
1020 assert!(Mapping::from_json(&json).is_err());
1021 }
1022
1023 #[test]
1024 fn parse_missing_properties_is_error() {
1025 let json: Value = serde_json::from_str(r#"{"mappings": {}}"#).unwrap();
1026 assert!(Mapping::from_json(&json).is_err());
1027 }
1028
1029 #[test]
1030 fn dynamic_mode_round_trip() {
1031 for mode in [DynamicMode::True, DynamicMode::False] {
1032 let parsed = DynamicMode::from_es_value(mode.es_value()).unwrap();
1033 assert_eq!(parsed, mode);
1034 }
1035 }
1036
1037 #[test]
1038 fn empty_schema() {
1039 let mapping = Mapping::builder().build();
1040 assert!(mapping.is_empty());
1041 assert_eq!(mapping.len(), 0);
1042 }
1043
1044 #[test]
1045 fn parse_nested_properties_flattened() {
1046 let json: Value = serde_json::from_str(
1047 r#"{
1048 "properties": {
1049 "title": {"type": "text"},
1050 "offers": {
1051 "type": "nested",
1052 "properties": {
1053 "seller": {"type": "keyword"},
1054 "price": {"type": "float"}
1055 }
1056 }
1057 }
1058 }"#,
1059 )
1060 .unwrap();
1061
1062 let mapping = Mapping::from_json(&json).unwrap();
1063 assert_eq!(mapping.len(), 4);
1064 assert!(mapping.field_id("offers").is_some());
1065 assert!(mapping.field_id("offers.seller").is_some());
1066 assert!(mapping.field_id("offers.price").is_some());
1067 }
1068
1069 #[test]
1070 fn parse_deeply_nested_properties() {
1071 let json: Value = serde_json::from_str(
1072 r#"{
1073 "properties": {
1074 "offers": {
1075 "type": "nested",
1076 "properties": {
1077 "variants": {
1078 "type": "nested",
1079 "properties": {
1080 "color": {"type": "keyword"}
1081 }
1082 }
1083 }
1084 }
1085 }
1086 }"#,
1087 )
1088 .unwrap();
1089
1090 let mapping = Mapping::from_json(&json).unwrap();
1091 assert_eq!(mapping.len(), 3);
1092 assert!(mapping.field_id("offers.variants.color").is_some());
1093 }
1094
1095 #[test]
1096 fn default_dynamic_mode_omitted_in_json() {
1097 let mapping = Mapping::builder().field("x", FieldType::Keyword).build();
1098 let json = mapping.to_json();
1099 assert!(json["mappings"].get("dynamic").is_none());
1101 }
1102
1103 #[test]
1112 fn field_ids_survive_json_roundtrip() {
1113 let schema = Mapping::builder()
1114 .field("title", FieldType::Text)
1115 .field("tag", FieldType::Keyword)
1116 .field("embedding", FieldType::dense_vector(64))
1117 .build();
1118 let json = schema.to_json();
1119 let parsed = Mapping::from_json(&json).unwrap();
1120 assert_eq!(schema.field_id("title"), parsed.field_id("title"));
1121 assert_eq!(schema.field_id("tag"), parsed.field_id("tag"));
1122 assert_eq!(schema.field_id("embedding"), parsed.field_id("embedding"));
1123 }
1124
1125 #[test]
1126 fn parse_dense_vector_default_quantization_is_int8() {
1127 let json: Value =
1128 serde_json::from_str(r#"{"properties": {"emb": {"type": "dense_vector", "dims": 4}}}"#)
1129 .unwrap();
1130 let mapping = Mapping::from_json(&json).unwrap();
1131 let f = mapping.field(mapping.field_id("emb").unwrap());
1132 assert_eq!(f.field_type.vector_dims(), Some(4));
1133 assert_eq!(
1134 f.field_type.vector_quantization(),
1135 Some(QuantizationType::Int8)
1136 );
1137 }
1138
1139 #[test]
1140 fn parse_dense_vector_explicit_int8() {
1141 let json: Value = serde_json::from_str(
1142 r#"{"properties": {"emb": {"type": "dense_vector", "dims": 4, "quantization": "int8"}}}"#,
1143 )
1144 .unwrap();
1145 let mapping = Mapping::from_json(&json).unwrap();
1146 let f = mapping.field(mapping.field_id("emb").unwrap());
1147 assert_eq!(
1148 f.field_type.vector_quantization(),
1149 Some(QuantizationType::Int8)
1150 );
1151 }
1152
1153 #[test]
1154 fn parse_dense_vector_explicit_none() {
1155 let json: Value = serde_json::from_str(
1156 r#"{"properties": {"emb": {"type": "dense_vector", "dims": 4, "quantization": "none"}}}"#,
1157 )
1158 .unwrap();
1159 let mapping = Mapping::from_json(&json).unwrap();
1160 let f = mapping.field(mapping.field_id("emb").unwrap());
1161 assert_eq!(
1162 f.field_type.vector_quantization(),
1163 Some(QuantizationType::None)
1164 );
1165 }
1166
1167 #[test]
1168 fn parse_dense_vector_int4_is_rejected() {
1169 let json: Value = serde_json::from_str(
1170 r#"{"properties": {"emb": {"type": "dense_vector", "dims": 4, "quantization": "int4"}}}"#,
1171 )
1172 .unwrap();
1173 let err = Mapping::from_json(&json).unwrap_err();
1174 let msg = format!("{err}");
1175 assert!(msg.contains("int4"), "error must name the value: {msg}");
1176 assert!(
1177 msg.contains("not yet implemented"),
1178 "error must explain why: {msg}"
1179 );
1180 }
1181
1182 #[test]
1183 fn parse_dense_vector_bbq_is_rejected() {
1184 let json: Value = serde_json::from_str(
1185 r#"{"properties": {"emb": {"type": "dense_vector", "dims": 4, "quantization": "bbq"}}}"#,
1186 )
1187 .unwrap();
1188 let err = Mapping::from_json(&json).unwrap_err();
1189 let msg = format!("{err}");
1190 assert!(msg.contains("bbq"), "error must name the value: {msg}");
1191 assert!(
1192 msg.contains("not yet implemented"),
1193 "error must explain why: {msg}"
1194 );
1195 }
1196
1197 #[test]
1198 fn parse_dense_vector_unknown_quantization_is_rejected() {
1199 let json: Value = serde_json::from_str(
1200 r#"{"properties": {"emb": {"type": "dense_vector", "dims": 4, "quantization": "magic"}}}"#,
1201 )
1202 .unwrap();
1203 let err = Mapping::from_json(&json).unwrap_err();
1204 let msg = format!("{err}");
1205 assert!(msg.contains("magic"), "error must name the value: {msg}");
1206 }
1207
1208 #[test]
1209 fn parse_dense_vector_non_string_quantization_is_rejected() {
1210 let json: Value = serde_json::from_str(
1211 r#"{"properties": {"emb": {"type": "dense_vector", "dims": 4, "quantization": 8}}}"#,
1212 )
1213 .unwrap();
1214 let err = Mapping::from_json(&json).unwrap_err();
1215 let msg = format!("{err}");
1216 assert!(
1217 msg.contains("must be a string"),
1218 "error must explain the type mismatch: {msg}"
1219 );
1220 }
1221
1222 #[test]
1227 fn parse_dense_vector_missing_dims_is_rejected() {
1228 let json: Value =
1231 serde_json::from_str(r#"{"properties": {"emb": {"type": "dense_vector"}}}"#).unwrap();
1232 let err = Mapping::from_json(&json).unwrap_err();
1233 let msg = format!("{err}");
1234 assert!(
1235 msg.contains("dims"),
1236 "error must name the missing option: {msg}"
1237 );
1238 }
1239
1240 #[test]
1241 fn parse_dense_vector_string_dims_is_rejected() {
1242 let json: Value = serde_json::from_str(
1244 r#"{"properties": {"emb": {"type": "dense_vector", "dims": "4"}}}"#,
1245 )
1246 .unwrap();
1247 let err = Mapping::from_json(&json).unwrap_err();
1248 let msg = format!("{err}");
1249 assert!(
1250 msg.contains("dims") && msg.contains("positive integer"),
1251 "error must explain the type mismatch: {msg}"
1252 );
1253 }
1254
1255 #[test]
1256 fn parse_dense_vector_zero_dims_is_rejected() {
1257 let json: Value =
1258 serde_json::from_str(r#"{"properties": {"emb": {"type": "dense_vector", "dims": 0}}}"#)
1259 .unwrap();
1260 let err = Mapping::from_json(&json).unwrap_err();
1261 let msg = format!("{err}");
1262 assert!(msg.contains("dims"), "error must name the option: {msg}");
1263 }
1264
1265 #[test]
1266 fn parse_dense_vector_negative_dims_is_rejected() {
1267 let json: Value = serde_json::from_str(
1268 r#"{"properties": {"emb": {"type": "dense_vector", "dims": -4}}}"#,
1269 )
1270 .unwrap();
1271 let err = Mapping::from_json(&json).unwrap_err();
1272 let msg = format!("{err}");
1273 assert!(
1274 msg.contains("positive integer"),
1275 "error must explain the type mismatch: {msg}"
1276 );
1277 }
1278
1279 #[test]
1280 fn parse_dense_vector_unknown_key_is_rejected() {
1281 let json: Value = serde_json::from_str(
1283 r#"{"properties": {"emb": {"type": "dense_vector", "dims": 4, "dimensions": 8}}}"#,
1284 )
1285 .unwrap();
1286 let err = Mapping::from_json(&json).unwrap_err();
1287 let msg = format!("{err}");
1288 assert!(
1289 msg.contains("unknown") && msg.contains("dimensions"),
1290 "error must name the unknown option: {msg}"
1291 );
1292 }
1293
1294 #[test]
1295 fn parse_dense_vector_similarity_is_rejected() {
1296 let json: Value = serde_json::from_str(
1299 r#"{"properties": {"emb": {"type": "dense_vector", "dims": 4, "similarity": "cosine"}}}"#,
1300 )
1301 .unwrap();
1302 let err = Mapping::from_json(&json).unwrap_err();
1303 let msg = format!("{err}");
1304 assert!(
1305 msg.contains("similarity") && msg.contains("not yet implemented"),
1306 "error must explain why it is rejected: {msg}"
1307 );
1308 }
1309
1310 #[test]
1313 fn mapping_string_bool_index_rejected() {
1314 let json: Value = serde_json::from_str(
1317 r#"{"properties": {"sku": {"type": "keyword", "index": "false"}}}"#,
1318 )
1319 .unwrap();
1320 let msg = format!("{}", Mapping::from_json(&json).unwrap_err());
1321 assert!(
1322 msg.contains("index") && msg.contains("boolean"),
1323 "error must name the option and the expected type: {msg}"
1324 );
1325 }
1326
1327 #[test]
1328 fn mapping_non_bool_doc_values_rejected() {
1329 let json: Value = serde_json::from_str(
1330 r#"{"properties": {"sku": {"type": "keyword", "doc_values": 1}}}"#,
1331 )
1332 .unwrap();
1333 let msg = format!("{}", Mapping::from_json(&json).unwrap_err());
1334 assert!(
1335 msg.contains("doc_values") && msg.contains("boolean"),
1336 "{msg}"
1337 );
1338 }
1339
1340 #[test]
1341 fn mapping_unknown_key_rejected() {
1342 let json: Value = serde_json::from_str(
1344 r#"{"properties": {"body": {"type": "text", "anlyzer": "english"}}}"#,
1345 )
1346 .unwrap();
1347 let msg = format!("{}", Mapping::from_json(&json).unwrap_err());
1348 assert!(
1349 msg.contains("unknown option") && msg.contains("anlyzer"),
1350 "error must name the unknown key: {msg}"
1351 );
1352 }
1353
1354 #[test]
1355 fn mapping_analyzer_on_numeric_rejected() {
1356 let json: Value = serde_json::from_str(
1358 r#"{"properties": {"qty": {"type": "integer", "analyzer": "english"}}}"#,
1359 )
1360 .unwrap();
1361 let msg = format!("{}", Mapping::from_json(&json).unwrap_err());
1362 assert!(
1363 msg.contains("analyzer")
1364 && msg.contains("not supported for field type")
1365 && msg.contains("integer"),
1366 "error must explain the per-type rejection: {msg}"
1367 );
1368 }
1369
1370 #[test]
1371 fn mapping_analyzer_on_keyword_rejected() {
1372 let json: Value = serde_json::from_str(
1374 r#"{"properties": {"tag": {"type": "keyword", "analyzer": "english"}}}"#,
1375 )
1376 .unwrap();
1377 let msg = format!("{}", Mapping::from_json(&json).unwrap_err());
1378 assert!(msg.contains("analyzer") && msg.contains("keyword"), "{msg}");
1379 }
1380
1381 #[test]
1382 fn mapping_unimplemented_es_param_rejected() {
1383 let json: Value = serde_json::from_str(
1385 r#"{"properties": {"tag": {"type": "keyword", "ignore_above": 256}}}"#,
1386 )
1387 .unwrap();
1388 let msg = format!("{}", Mapping::from_json(&json).unwrap_err());
1389 assert!(
1390 msg.contains("ignore_above") && msg.contains("not yet supported"),
1391 "error must explain it is a deferred feature: {msg}"
1392 );
1393 }
1394
1395 #[test]
1396 fn mapping_copy_to_non_string_element_rejected() {
1397 let json: Value = serde_json::from_str(
1398 r#"{"properties": {"a": {"type": "keyword", "copy_to": ["ok", 7]}, "ok": {"type": "text"}}}"#,
1399 )
1400 .unwrap();
1401 let msg = format!("{}", Mapping::from_json(&json).unwrap_err());
1402 assert!(msg.contains("copy_to") && msg.contains("strings"), "{msg}");
1403 }
1404
1405 #[test]
1406 fn mapping_copy_to_wrong_shape_rejected() {
1407 let json: Value =
1408 serde_json::from_str(r#"{"properties": {"a": {"type": "keyword", "copy_to": 42}}}"#)
1409 .unwrap();
1410 let msg = format!("{}", Mapping::from_json(&json).unwrap_err());
1411 assert!(msg.contains("copy_to"), "{msg}");
1412 }
1413
1414 #[test]
1415 fn mapping_subfield_unknown_key_rejected() {
1416 let json: Value = serde_json::from_str(
1418 r#"{"properties": {"title": {"type": "text", "fields": {"raw": {"type": "keyword", "indx": "x"}}}}}"#,
1419 )
1420 .unwrap();
1421 let msg = format!("{}", Mapping::from_json(&json).unwrap_err());
1422 assert!(
1423 msg.contains("title.raw") && msg.contains("indx"),
1424 "sub-field strictness: {msg}"
1425 );
1426 }
1427
1428 #[test]
1429 fn mapping_nested_child_string_bool_rejected() {
1430 let json: Value = serde_json::from_str(
1432 r#"{"properties": {"items": {"type": "nested", "properties": {"qty": {"type": "integer", "index": "no"}}}}}"#,
1433 )
1434 .unwrap();
1435 let msg = format!("{}", Mapping::from_json(&json).unwrap_err());
1436 assert!(
1437 msg.contains("items.qty") && msg.contains("index") && msg.contains("boolean"),
1438 "nested strictness: {msg}"
1439 );
1440 }
1441
1442 #[test]
1443 fn mapping_builder_analyzer_on_long_rejected() {
1444 let err = Mapping::builder()
1446 .field_with_mapping(FieldMapping::new("n", FieldType::Long).analyzer("english"))
1447 .build()
1448 .validate()
1449 .unwrap_err();
1450 let msg = format!("{err}");
1451 assert!(
1452 msg.contains("analyzer")
1453 && msg.contains("not supported for field type")
1454 && msg.contains("long"),
1455 "builder-path per-type check: {msg}"
1456 );
1457 }
1458
1459 #[test]
1460 fn mapping_strict_happy_paths_parse() {
1461 let json: Value = serde_json::from_str(
1463 r#"{"properties": {
1464 "title": {"type": "text", "analyzer": "english", "search_analyzer": "standard", "norms": false, "store": true},
1465 "tag": {"type": "keyword", "doc_values": true, "index": false},
1466 "body": {"type": "text", "copy_to": ["title"]}
1467 }}"#,
1468 )
1469 .unwrap();
1470 let m = Mapping::from_json(&json).unwrap();
1471 let title = m.field(m.field_id("title").unwrap());
1472 assert_eq!(title.analyzer.as_deref(), Some("english"));
1473 assert_eq!(title.search_analyzer.as_deref(), Some("standard"));
1474 assert!(!title.norms);
1475 assert!(title.stored);
1476 assert!(!m.field(m.field_id("tag").unwrap()).indexed);
1477 assert_eq!(
1478 m.field(m.field_id("body").unwrap()).copy_to,
1479 vec!["title".to_string()]
1480 );
1481 }
1482
1483 #[test]
1484 fn parse_dense_vector_element_type_is_rejected() {
1485 let json: Value = serde_json::from_str(
1486 r#"{"properties": {"emb": {"type": "dense_vector", "dims": 4, "element_type": "byte"}}}"#,
1487 )
1488 .unwrap();
1489 let err = Mapping::from_json(&json).unwrap_err();
1490 let msg = format!("{err}");
1491 assert!(
1492 msg.contains("element_type") && msg.contains("not yet implemented"),
1493 "error must explain why it is rejected: {msg}"
1494 );
1495 }
1496
1497 #[test]
1498 fn parse_nested_dense_vector_unknown_key_is_rejected() {
1499 let json: Value = serde_json::from_str(
1502 r#"{
1503 "properties": {
1504 "doc": {
1505 "type": "nested",
1506 "properties": {
1507 "emb": {"type": "dense_vector", "dims": 4, "similarity": "cosine"}
1508 }
1509 }
1510 }
1511 }"#,
1512 )
1513 .unwrap();
1514 let err = Mapping::from_json(&json).unwrap_err();
1515 let msg = format!("{err}");
1516 assert!(
1517 msg.contains("similarity") && msg.contains("not yet implemented"),
1518 "nested dense_vector must reject unwired options too: {msg}"
1519 );
1520 }
1521
1522 #[test]
1523 fn dense_vector_dims_round_trip_through_json() {
1524 let mapping = Mapping::builder()
1528 .field("emb", FieldType::dense_vector(768))
1529 .build();
1530 let json = mapping.to_json();
1531 let parsed = Mapping::from_json(&json).unwrap();
1532 let f = parsed.field(parsed.field_id("emb").unwrap());
1533 assert_eq!(f.field_type.vector_dims(), Some(768));
1534 }
1535
1536 #[test]
1537 fn dense_vector_explicit_quantization_round_trips() {
1538 let mapping = Mapping::builder()
1541 .field(
1542 "emb",
1543 FieldType::DenseVector {
1544 dims: 4,
1545 quantization: QuantizationType::None,
1546 },
1547 )
1548 .build();
1549 let json = mapping.to_json();
1550 let parsed = Mapping::from_json(&json).unwrap();
1551 let f = parsed.field(parsed.field_id("emb").unwrap());
1552 assert_eq!(
1553 f.field_type.vector_quantization(),
1554 Some(QuantizationType::None)
1555 );
1556 }
1557
1558 #[test]
1569 fn copy_to_existing_target_is_accepted() {
1570 let json: Value = serde_json::from_str(
1571 r#"{
1572 "properties": {
1573 "title": {"type": "text", "copy_to": "all_text"},
1574 "all_text": {"type": "text"}
1575 }
1576 }"#,
1577 )
1578 .unwrap();
1579 let mapping = Mapping::from_json(&json).unwrap();
1580 let title = mapping.field(mapping.field_id("title").unwrap());
1581 assert_eq!(title.copy_to, vec!["all_text".to_string()]);
1582 }
1583
1584 #[test]
1585 fn copy_to_missing_target_is_rejected_at_parse() {
1586 let json: Value = serde_json::from_str(
1587 r#"{
1588 "properties": {
1589 "title": {"type": "text", "copy_to": "all_text"}
1590 }
1591 }"#,
1592 )
1593 .unwrap();
1594 let err = Mapping::from_json(&json).unwrap_err();
1595 let msg = format!("{err}");
1596 assert!(
1597 msg.contains("title"),
1598 "error must name the source field: {msg}"
1599 );
1600 assert!(
1601 msg.contains("all_text"),
1602 "error must name the missing target: {msg}"
1603 );
1604 }
1605
1606 #[test]
1607 fn copy_to_missing_target_in_array_is_rejected() {
1608 let json: Value = serde_json::from_str(
1609 r#"{
1610 "properties": {
1611 "title": {"type": "text", "copy_to": ["existing", "missing"]},
1612 "existing": {"type": "text"}
1613 }
1614 }"#,
1615 )
1616 .unwrap();
1617 let err = Mapping::from_json(&json).unwrap_err();
1618 let msg = format!("{err}");
1619 assert!(
1620 msg.contains("missing"),
1621 "error must name the missing target: {msg}"
1622 );
1623 }
1624
1625 #[test]
1626 fn validate_catches_builder_api_copy_to_with_missing_target() {
1627 let mut source = FieldMapping::new("source", FieldType::Text);
1630 source.copy_to = vec!["nope".to_string()];
1631 let mapping = Mapping::builder().field_with_mapping(source).build();
1632 let err = mapping.validate().unwrap_err();
1633 let msg = format!("{err}");
1634 assert!(msg.contains("nope"), "error must name the target: {msg}");
1635 }
1636
1637 #[test]
1638 fn parse_nested_dense_vector_int4_is_rejected() {
1639 let json: Value = serde_json::from_str(
1642 r#"{
1643 "properties": {
1644 "outer": {
1645 "type": "nested",
1646 "properties": {
1647 "emb": {"type": "dense_vector", "dims": 4, "quantization": "int4"}
1648 }
1649 }
1650 }
1651 }"#,
1652 )
1653 .unwrap();
1654 let err = Mapping::from_json(&json).unwrap_err();
1655 let msg = format!("{err}");
1656 assert!(msg.contains("int4"), "error must name the value: {msg}");
1657 }
1658}