Skip to main content

luci/mapping/
mapping_def.rs

1use std::collections::HashMap;
2
3use crate::core::{FieldId, LuciError, Result};
4use serde_json::Value;
5
6use crate::mapping::field_type::FieldType;
7use crate::mapping::mapping::FieldMapping;
8use crate::mapping::quantization::QuantizationType;
9
10/// Controls how documents with unknown fields are handled.
11///
12/// Mappings are indexing instructions, not structural constraints. Documents
13/// are never rejected — unknown fields are either auto-mapped or stored
14/// without indexing.
15///
16/// See [[architecture-api-surface#Dynamic Mapping]].
17#[derive(Clone, Copy, Debug, PartialEq, Eq)]
18pub enum DynamicMode {
19    /// Infer types for unknown fields and index them (default).
20    True,
21    /// Store unknown fields in _source but don't index them.
22    False,
23}
24
25impl DynamicMode {
26    /// Parse from a string or boolean value.
27    pub fn from_es_value(s: &str) -> Result<Self> {
28        match s {
29            "true" => Ok(Self::True),
30            "false" => Ok(Self::False),
31            _ => Err(LuciError::InvalidQuery(format!(
32                "invalid dynamic mode: {s} (expected \"true\" or \"false\")"
33            ))),
34        }
35    }
36
37    /// String representation.
38    pub fn es_value(self) -> &'static str {
39        match self {
40            Self::True => "true",
41            Self::False => "false",
42        }
43    }
44}
45
46/// An ordered set of field mappings with name-to-ID lookup.
47///
48/// Fields are assigned sequential [`FieldId`]s starting at 0. The schema is
49/// immutable after construction — new fields are added by building a new
50/// schema that extends the previous one (see schema evolution in
51/// [[architecture-api-surface#Schema Evolution]]).
52///
53/// See [[architecture-api-surface#Schema Definition]].
54#[derive(Clone, Debug)]
55pub struct Mapping {
56    fields: Vec<FieldMapping>,
57    name_to_id: HashMap<String, FieldId>,
58    dynamic: DynamicMode,
59}
60
61impl Mapping {
62    /// Start building a schema.
63    pub fn builder() -> MappingBuilder {
64        MappingBuilder {
65            fields: Vec::new(),
66            dynamic: DynamicMode::True,
67        }
68    }
69
70    /// Ensure the `_id` field exists, appending it if needed.
71    /// Called by `Index` at creation time. See [[feature-document-crud]].
72    pub fn ensure_id_field(&mut self) {
73        if self.name_to_id.contains_key("_id") {
74            return;
75        }
76        let mut id_mapping = FieldMapping::new("_id", FieldType::Keyword);
77        id_mapping.stored = false;
78        let id = FieldId::new(self.fields.len() as u16);
79        self.name_to_id.insert("_id".to_string(), id);
80        self.fields.push(id_mapping);
81    }
82
83    /// Look up a field's ID by name.
84    pub fn field_id(&self, name: &str) -> Option<FieldId> {
85        self.name_to_id.get(name).copied()
86    }
87
88    /// Get a field mapping by ID.
89    ///
90    /// # Panics
91    ///
92    /// Panics if `id` is out of bounds.
93    pub fn field(&self, id: FieldId) -> &FieldMapping {
94        &self.fields[id.as_u16() as usize]
95    }
96
97    /// All field mappings in insertion order.
98    pub fn fields(&self) -> &[FieldMapping] {
99        &self.fields
100    }
101
102    /// Number of fields.
103    pub fn len(&self) -> usize {
104        self.fields.len()
105    }
106
107    /// Whether the schema has no fields.
108    pub fn is_empty(&self) -> bool {
109        self.fields.is_empty()
110    }
111
112    /// How unknown fields are handled.
113    pub fn dynamic_mode(&self) -> DynamicMode {
114        self.dynamic
115    }
116
117    /// Validate cross-field references (currently: `copy_to` targets).
118    ///
119    /// Returns an error if any field's `copy_to` references a target name
120    /// that does not exist in the schema. Without this check the writer
121    /// would silently skip the copy at index time, leaving the user with
122    /// no signal that their configuration is a no-op — see
123    /// [[code-must-not-lie]].
124    ///
125    /// Called automatically by [`Mapping::from_json`]; programmatic
126    /// builder users should call it (or rely on `Index::create_with_mapping`
127    /// to call it on their behalf).
128    pub fn validate(&self) -> Result<()> {
129        for field in &self.fields {
130            // Reject zero-dimension dense_vector fields. The JSON parser
131            // already rejects `dims == 0`, but the programmatic builder
132            // (`FieldType::dense_vector(0)`) does not — and `GlobalHnsw::new`
133            // silently skips `dims == 0` fields, so a knn against such a
134            // field would silently return empty instead of erroring.
135            // See [[feature-knn-query-type]] §4 and [[code-must-not-lie]].
136            if matches!(field.field_type.vector_dims(), Some(0)) {
137                return Err(LuciError::InvalidQuery(format!(
138                    "dense_vector field \"{}\" must have dims >= 1",
139                    field.name,
140                )));
141            }
142            // analyzer / search_analyzer are only meaningful on analyzed
143            // text fields. The JSON parser rejects them per-type at parse
144            // time; this catches the programmatic builder path (e.g.
145            // `FieldMapping::new(name, Long).analyzer("x")`). See
146            // [[fix-strict-mapping-parsing]] and [[code-must-not-lie]].
147            if field.analyzer.is_some()
148                && !matches!(field.field_type, FieldType::Text | FieldType::TokenCount)
149            {
150                return Err(LuciError::InvalidQuery(format!(
151                    "field \"{}\": \"analyzer\" is not supported for field type \"{}\"",
152                    field.name,
153                    field.field_type.es_name()
154                )));
155            }
156            if field.search_analyzer.is_some() && !matches!(field.field_type, FieldType::Text) {
157                return Err(LuciError::InvalidQuery(format!(
158                    "field \"{}\": \"search_analyzer\" is not supported for field type \"{}\"",
159                    field.name,
160                    field.field_type.es_name()
161                )));
162            }
163            for target in &field.copy_to {
164                if !self.name_to_id.contains_key(target) {
165                    return Err(LuciError::InvalidQuery(format!(
166                        "field \"{src}\" has copy_to target \"{target}\" \
167                         that is not defined in the schema",
168                        src = field.name,
169                    )));
170                }
171            }
172        }
173        Ok(())
174    }
175
176    /// Serialize to ES-compatible JSON mapping format.
177    ///
178    /// Produces `{"mappings": {"dynamic": "...", "properties": {...}}}`.
179    pub fn to_json(&self) -> Value {
180        let mut properties = serde_json::Map::new();
181
182        for mapping in &self.fields {
183            // Skip sub-fields — they'll be nested under their parent
184            if mapping.parent_field.is_some() {
185                continue;
186            }
187
188            let mut field_obj = serde_json::Map::new();
189            field_obj.insert(
190                "type".into(),
191                Value::String(mapping.field_type.es_name().into()),
192            );
193
194            // dense_vector carries dims and quantization. These were
195            // previously dropped at to_json time, which made commit →
196            // reload silently zero the dims and reset quantization to
197            // the default — see [[code-must-not-lie]].
198            if let FieldType::DenseVector { dims, quantization } = mapping.field_type {
199                field_obj.insert("dims".into(), Value::Number(dims.into()));
200                if quantization != QuantizationType::DEFAULT {
201                    field_obj.insert(
202                        "quantization".into(),
203                        Value::String(quantization.es_name().into()),
204                    );
205                }
206            }
207
208            if let Some(ref analyzer) = mapping.analyzer {
209                field_obj.insert("analyzer".into(), Value::String(analyzer.clone()));
210            }
211            if let Some(ref search_analyzer) = mapping.search_analyzer {
212                field_obj.insert(
213                    "search_analyzer".into(),
214                    Value::String(search_analyzer.clone()),
215                );
216            }
217
218            // Only include non-default flags.
219            let defaults = FieldMapping::new("", mapping.field_type.clone());
220            if mapping.stored != defaults.stored {
221                field_obj.insert("store".into(), Value::Bool(mapping.stored));
222            }
223            if mapping.indexed != defaults.indexed {
224                field_obj.insert("index".into(), Value::Bool(mapping.indexed));
225            }
226            if mapping.doc_values != defaults.doc_values {
227                field_obj.insert("doc_values".into(), Value::Bool(mapping.doc_values));
228            }
229            if mapping.norms != defaults.norms {
230                field_obj.insert("norms".into(), Value::Bool(mapping.norms));
231            }
232            if !mapping.copy_to.is_empty() {
233                if mapping.copy_to.len() == 1 {
234                    field_obj.insert("copy_to".into(), Value::String(mapping.copy_to[0].clone()));
235                } else {
236                    field_obj.insert(
237                        "copy_to".into(),
238                        Value::Array(
239                            mapping
240                                .copy_to
241                                .iter()
242                                .map(|s| Value::String(s.clone()))
243                                .collect(),
244                        ),
245                    );
246                }
247            }
248
249            // Collect sub-fields for this parent
250            let prefix = format!("{}.", mapping.name);
251            let sub_fields: Vec<&FieldMapping> = self
252                .fields
253                .iter()
254                .filter(|f| f.parent_field.as_deref() == Some(&mapping.name))
255                .collect();
256            if !sub_fields.is_empty() {
257                let mut fields_obj = serde_json::Map::new();
258                for sub in sub_fields {
259                    let sub_name = sub.name.strip_prefix(&prefix).unwrap_or(&sub.name);
260                    let mut sub_obj = serde_json::Map::new();
261                    sub_obj.insert(
262                        "type".into(),
263                        Value::String(sub.field_type.es_name().into()),
264                    );
265                    if let Some(ref a) = sub.analyzer {
266                        sub_obj.insert("analyzer".into(), Value::String(a.clone()));
267                    }
268                    if let Some(ref sa) = sub.search_analyzer {
269                        sub_obj.insert("search_analyzer".into(), Value::String(sa.clone()));
270                    }
271                    fields_obj.insert(sub_name.to_string(), Value::Object(sub_obj));
272                }
273                field_obj.insert("fields".into(), Value::Object(fields_obj));
274            }
275
276            properties.insert(mapping.name.clone(), Value::Object(field_obj));
277        }
278
279        let mut mappings = serde_json::Map::new();
280        if self.dynamic != DynamicMode::True {
281            mappings.insert(
282                "dynamic".into(),
283                Value::String(self.dynamic.es_value().into()),
284            );
285        }
286        mappings.insert("properties".into(), Value::Object(properties));
287
288        let mut root = serde_json::Map::new();
289        root.insert("mappings".into(), Value::Object(mappings));
290        Value::Object(root)
291    }
292
293    /// Parse from ES-compatible JSON mapping format.
294    ///
295    /// Accepts `{"mappings": {"properties": {...}}}` or the shorthand
296    /// `{"properties": {...}}`.
297    ///
298    /// # Errors
299    ///
300    /// Returns `LuciError::InvalidQuery` if the JSON structure is invalid
301    /// or contains unsupported field types.
302    pub fn from_json(json: &Value) -> Result<Self> {
303        // Accept both {"mappings": {...}} and direct {"properties": {...}}.
304        let mappings_obj = if let Some(m) = json.get("mappings") {
305            m
306        } else {
307            json
308        };
309
310        let mut builder = MappingBuilder {
311            fields: Vec::new(),
312            dynamic: DynamicMode::True,
313        };
314
315        // Parse dynamic mode.
316        if let Some(dyn_val) = mappings_obj.get("dynamic") {
317            let mode_str = match dyn_val {
318                Value::String(s) => s.as_str(),
319                Value::Bool(true) => "true",
320                Value::Bool(false) => "false",
321                _ => {
322                    return Err(LuciError::InvalidQuery(
323                        "\"dynamic\" must be a string or boolean".into(),
324                    ));
325                }
326            };
327            builder.dynamic = DynamicMode::from_es_value(mode_str)?;
328        }
329
330        // Parse properties.
331        let properties = mappings_obj
332            .get("properties")
333            .and_then(|p| p.as_object())
334            .ok_or_else(|| {
335                LuciError::InvalidQuery("missing or invalid \"properties\" object".into())
336            })?;
337
338        for (name, field_def) in properties {
339            let field_obj = field_def.as_object().ok_or_else(|| {
340                LuciError::InvalidQuery(format!(
341                    "field \"{name}\": expected object, got {field_def}"
342                ))
343            })?;
344
345            let type_name = field_obj
346                .get("type")
347                .and_then(|t| t.as_str())
348                .ok_or_else(|| {
349                    LuciError::InvalidQuery(format!("field \"{name}\": missing \"type\" property"))
350                })?;
351
352            let mut field_type = FieldType::from_es_name(type_name)?;
353            // Validate and parse dense_vector config (dims, quantization),
354            // rejecting silently-dropped options. See [[code-must-not-lie]].
355            if let FieldType::DenseVector {
356                ref mut dims,
357                ref mut quantization,
358            } = field_type
359            {
360                parse_dense_vector_config(name, field_obj, dims, quantization)?;
361            }
362            let is_nested = matches!(field_type, FieldType::Nested);
363            let is_dense = field_type.is_dense_vector();
364            let mut mapping = FieldMapping::new(name.clone(), field_type);
365
366            // dense_vector options are validated by parse_dense_vector_config
367            // above; every other type runs the strict generic-option parser.
368            if !is_dense {
369                parse_field_options(name, field_obj, &mut mapping, FieldRole::Field)?;
370            }
371
372            builder.fields.push(mapping);
373
374            // Parse nested field properties: recurse into "properties" and
375            // register each child as "parent.child" with dot-notation.
376            // See [[fix-nested-mapping-parser]].
377            if is_nested {
378                if let Some(nested_props) = field_obj.get("properties").and_then(|v| v.as_object())
379                {
380                    parse_nested_properties(&mut builder, name, nested_props)?;
381                }
382            }
383
384            // Parse multi-field sub-fields: "fields": {"raw": {"type": "keyword"}}
385            // Flatten into top-level FieldMappings with dot-notation names.
386            // See [[feature-mapping-multi-fields]].
387            if let Some(sub_fields) = field_obj.get("fields").and_then(|v| v.as_object()) {
388                for (sub_name, sub_def) in sub_fields {
389                    let sub_label = format!("{name}.{sub_name}");
390                    let sub_obj = sub_def.as_object().ok_or_else(|| {
391                        LuciError::InvalidQuery(format!("field \"{sub_label}\": expected object"))
392                    })?;
393                    let sub_type_name =
394                        sub_obj
395                            .get("type")
396                            .and_then(|t| t.as_str())
397                            .ok_or_else(|| {
398                                LuciError::InvalidQuery(format!(
399                                    "field \"{sub_label}\": missing \"type\""
400                                ))
401                            })?;
402                    let sub_type = FieldType::from_es_name(sub_type_name)?;
403                    let mut sub_mapping = FieldMapping::new(sub_label.clone(), sub_type);
404                    sub_mapping.stored = false; // sub-fields never in _source
405                    sub_mapping.parent_field = Some(name.clone());
406                    parse_field_options(
407                        &sub_label,
408                        sub_obj,
409                        &mut sub_mapping,
410                        FieldRole::SubField,
411                    )?;
412                    builder.fields.push(sub_mapping);
413                }
414            }
415        }
416
417        let mapping = builder.build();
418        mapping.validate()?;
419        Ok(mapping)
420    }
421}
422
423// ---------------------------------------------------------------------------
424// Strict generic-option parsing. See [[fix-strict-mapping-parsing]].
425//
426// Replaces the permissive `.and_then(|v| v.as_bool())` extraction (which
427// mapped a wrong-typed value to a silent default), the absent allow-list
428// (which dropped unknown keys and options invalid for the field type), and
429// the `copy_to` `_ => {}` drop. Mirrors `parse_dense_vector_config`'s
430// contract for the generic options. See [[code-must-not-lie]].
431// ---------------------------------------------------------------------------
432
433/// Where a field definition appears, constraining which options apply.
434#[derive(Clone, Copy, PartialEq, Eq)]
435enum FieldRole {
436    /// A top-level field in `properties`, or a `nested` child.
437    Field,
438    /// A multi-field sub-field (`"fields": {...}`): never in `_source` and
439    /// cannot `copy_to`.
440    SubField,
441}
442
443/// Generic options Luci honors for *some* field type. A key in this set
444/// but not in `supported_options(field_type)` is valid for another type
445/// (→ "not supported for field type X"), which distinguishes it from an
446/// unrecognized key.
447const LUCI_OPTIONS: &[&str] = &[
448    "index",
449    "store",
450    "doc_values",
451    "norms",
452    "analyzer",
453    "search_analyzer",
454    "copy_to",
455];
456
457/// Real Elasticsearch mapping parameters Luci does not implement yet —
458/// rejected with "recognized but not yet supported" (tier-ii), mirroring
459/// `parse_dense_vector_config`'s handling of `similarity`/`index_options`.
460/// The five in [[feature-mapping-options]] live here until that feature
461/// lands, then move to `supported_options`. See [[fix-strict-mapping-parsing]].
462const UNIMPLEMENTED_ES_PARAMS: &[&str] = &[
463    "ignore_above",
464    "null_value",
465    "coerce",
466    "enabled",
467    "ignore_malformed",
468    "format",
469    "locale",
470    "normalizer",
471    "similarity",
472    "term_vector",
473    "index_options",
474    "index_prefixes",
475    "index_phrases",
476    "position_increment_gap",
477    "fielddata",
478    "fielddata_frequency_filter",
479    "ignore_z_value",
480    "orientation",
481    "eager_global_ordinals",
482    "meta",
483    "scaling_factor",
484    "split_queries_on_whitespace",
485    "search_quote_analyzer",
486    "time_series_dimension",
487    "time_series_metric",
488    "boost",
489];
490
491/// The options Luci's write/segment path actually honors for `field_type`
492/// — the per-type allow-list. Grounded in the writer/segment-builder code;
493/// see the honoring table in [[fix-strict-mapping-parsing]].
494fn supported_options(field_type: &FieldType) -> &'static [&'static str] {
495    match field_type {
496        FieldType::Text => &[
497            "index",
498            "store",
499            "analyzer",
500            "search_analyzer",
501            "norms",
502            "copy_to",
503        ],
504        FieldType::TokenCount => &["index", "store", "doc_values", "analyzer", "copy_to"],
505        FieldType::Keyword
506        | FieldType::Ip
507        | FieldType::Integer
508        | FieldType::Long
509        | FieldType::Float
510        | FieldType::Double
511        | FieldType::Boolean
512        | FieldType::Date
513        | FieldType::GeoPoint
514        | FieldType::GeoShape => &["index", "store", "doc_values", "copy_to"],
515        // dense_vector goes through parse_dense_vector_config; nested has
516        // only the structural `properties` key.
517        FieldType::DenseVector { .. } | FieldType::Nested => &[],
518    }
519}
520
521/// Reject unknown keys, options invalid for the field type, and
522/// recognized-but-unimplemented ES params on one field definition. The
523/// structural keys (`type`, `fields`, `properties`) are consumed by the
524/// caller and always allowed.
525fn validate_field_options(
526    field_label: &str,
527    field_type: &FieldType,
528    field_obj: &serde_json::Map<String, Value>,
529    role: FieldRole,
530) -> Result<()> {
531    for key in field_obj.keys() {
532        let k = key.as_str();
533        if matches!(k, "type" | "fields" | "properties") {
534            continue; // structural — consumed by the caller
535        }
536        if supported_options(field_type).contains(&k) {
537            if role == FieldRole::SubField && matches!(k, "store" | "copy_to") {
538                return Err(LuciError::InvalidQuery(format!(
539                    "field \"{field_label}\": option \"{k}\" is not supported on a \
540                     multi-field sub-field"
541                )));
542            }
543            continue;
544        }
545        if LUCI_OPTIONS.contains(&k) {
546            return Err(LuciError::InvalidQuery(format!(
547                "field \"{field_label}\": option \"{k}\" is not supported for field type \"{ft}\"",
548                ft = field_type.es_name()
549            )));
550        }
551        if UNIMPLEMENTED_ES_PARAMS.contains(&k) {
552            return Err(LuciError::InvalidQuery(format!(
553                "field \"{field_label}\": option \"{k}\" is recognized but not yet supported"
554            )));
555        }
556        return Err(LuciError::InvalidQuery(format!(
557            "field \"{field_label}\": unknown option \"{k}\""
558        )));
559    }
560    Ok(())
561}
562
563/// Strict boolean accessor: absent or `null` → `Ok(None)`; present but not
564/// a JSON boolean → `Err`. Luci rejects ES's `"true"`/`"false"` string
565/// leniency (see [[fix-strict-mapping-parsing]] Decision 1).
566fn opt_bool(
567    obj: &serde_json::Map<String, Value>,
568    key: &str,
569    field_label: &str,
570) -> Result<Option<bool>> {
571    match obj.get(key) {
572        Some(v) if !v.is_null() => v.as_bool().map(Some).ok_or_else(|| {
573            LuciError::InvalidQuery(format!(
574                "field \"{field_label}\": \"{key}\" must be a boolean, got {v}"
575            ))
576        }),
577        _ => Ok(None),
578    }
579}
580
581/// Strict string accessor: absent or `null` → `Ok(None)`; present but not
582/// a JSON string → `Err`.
583fn opt_str<'a>(
584    obj: &'a serde_json::Map<String, Value>,
585    key: &str,
586    field_label: &str,
587) -> Result<Option<&'a str>> {
588    match obj.get(key) {
589        Some(v) if !v.is_null() => v.as_str().map(Some).ok_or_else(|| {
590            LuciError::InvalidQuery(format!(
591                "field \"{field_label}\": \"{key}\" must be a string, got {v}"
592            ))
593        }),
594        _ => Ok(None),
595    }
596}
597
598/// Strict `copy_to` parser: a string or an array of strings. A wrong shape
599/// (number/object) or a non-string array element is rejected rather than
600/// silently dropped (was `_ => {}` + `filter_map`). See
601/// [[feature-mapping-copy-to]].
602fn parse_copy_to(
603    obj: &serde_json::Map<String, Value>,
604    field_label: &str,
605    mapping: &mut FieldMapping,
606) -> Result<()> {
607    match obj.get("copy_to") {
608        None | Some(Value::Null) => {}
609        Some(Value::String(s)) => mapping.copy_to = vec![s.clone()],
610        Some(Value::Array(arr)) => {
611            mapping.copy_to = arr
612                .iter()
613                .map(|v| {
614                    v.as_str().map(String::from).ok_or_else(|| {
615                        LuciError::InvalidQuery(format!(
616                            "field \"{field_label}\": copy_to entries must be strings, got {v}"
617                        ))
618                    })
619                })
620                .collect::<Result<Vec<_>>>()?;
621        }
622        Some(other) => {
623            return Err(LuciError::InvalidQuery(format!(
624                "field \"{field_label}\": copy_to must be a string or array of strings, got {other}"
625            )));
626        }
627    }
628    Ok(())
629}
630
631/// Validate and apply the generic options on one non-`dense_vector` field
632/// definition: reject wrong-typed values, unknown keys, and options invalid
633/// for the field type, then apply the supported ones with strict accessors.
634/// Shared by the top-level, sub-field, and nested-child parsers so all
635/// three enforce one contract. See [[fix-strict-mapping-parsing]].
636fn parse_field_options(
637    field_label: &str,
638    field_obj: &serde_json::Map<String, Value>,
639    mapping: &mut FieldMapping,
640    role: FieldRole,
641) -> Result<()> {
642    validate_field_options(field_label, &mapping.field_type, field_obj, role)?;
643    if let Some(v) = opt_bool(field_obj, "index", field_label)? {
644        mapping.indexed = v;
645    }
646    if let Some(v) = opt_bool(field_obj, "doc_values", field_label)? {
647        mapping.doc_values = v;
648    }
649    if let Some(v) = opt_bool(field_obj, "norms", field_label)? {
650        mapping.norms = v;
651    }
652    if let Some(v) = opt_str(field_obj, "analyzer", field_label)? {
653        mapping.analyzer = Some(v.to_string());
654    }
655    if let Some(v) = opt_str(field_obj, "search_analyzer", field_label)? {
656        mapping.search_analyzer = Some(v.to_string());
657    }
658    // Sub-fields are never in `_source` and cannot copy_to; the validator
659    // above already rejected `store`/`copy_to` for them.
660    if role == FieldRole::Field {
661        if let Some(v) = opt_bool(field_obj, "store", field_label)? {
662            mapping.stored = v;
663        }
664        parse_copy_to(field_obj, field_label, mapping)?;
665    }
666    Ok(())
667}
668
669/// Validate and parse the `dense_vector`-specific config (`dims`,
670/// `quantization`), rejecting anything Luci would otherwise silently
671/// drop.
672///
673/// Honors [[code-must-not-lie]]: a user who writes `int4`, a
674/// `similarity`, an `element_type`, an unknown key, or a mistyped
675/// `dims` gets an explicit error — never a substituted default. The
676/// previous cherry-pick parser left `dims` at its placeholder `0` on
677/// any type-mismatch and ignored every unrecognized option. Audit
678/// finding E9 in [[vector-recall-investigation-audit]].
679///
680/// Shared by the top-level and nested-properties parsers so both paths
681/// enforce the same contract.
682fn parse_dense_vector_config(
683    field_label: &str,
684    field_obj: &serde_json::Map<String, Value>,
685    dims: &mut usize,
686    quantization: &mut QuantizationType,
687) -> Result<()> {
688    // Strict allow-list: reject every key Luci does not honor for a
689    // dense_vector so a silently-dropped option becomes a loud error.
690    for key in field_obj.keys() {
691        match key.as_str() {
692            "type" | "dims" | "quantization" => {}
693            // Recognized Elasticsearch dense_vector options that Luci
694            // has not wired yet — reject explicitly rather than ignore,
695            // mirroring the int4/bbq quantization rejection.
696            "similarity" | "index" | "index_options" | "element_type" => {
697                return Err(LuciError::InvalidQuery(format!(
698                    "field \"{field_label}\": dense_vector option \"{key}\" is recognized but not yet implemented"
699                )));
700            }
701            other => {
702                return Err(LuciError::InvalidQuery(format!(
703                    "field \"{field_label}\": unknown dense_vector option \"{other}\""
704                )));
705            }
706        }
707    }
708
709    // dims is required and must be a positive integer. A missing or
710    // mistyped value previously left dims at the placeholder 0.
711    let dims_val = field_obj.get("dims").ok_or_else(|| {
712        LuciError::InvalidQuery(format!(
713            "field \"{field_label}\": dense_vector requires \"dims\""
714        ))
715    })?;
716    let d = dims_val.as_u64().ok_or_else(|| {
717        LuciError::InvalidQuery(format!(
718            "field \"{field_label}\": \"dims\" must be a positive integer, got {dims_val}"
719        ))
720    })?;
721    if d == 0 {
722        return Err(LuciError::InvalidQuery(format!(
723            "field \"{field_label}\": \"dims\" must be >= 1"
724        )));
725    }
726    *dims = d as usize;
727
728    // quantization is optional (documented default int8); when present
729    // it must be a string naming a supported scheme.
730    if let Some(q_val) = field_obj.get("quantization") {
731        let q_str = q_val.as_str().ok_or_else(|| {
732            LuciError::InvalidQuery(format!(
733                "field \"{field_label}\": \"quantization\" must be a string, got {q_val}"
734            ))
735        })?;
736        *quantization = QuantizationType::from_es_name(q_str)?;
737    }
738
739    Ok(())
740}
741
742/// Recursively parse `properties` on a nested field, registering each
743/// child as `prefix.child_name` with dot-notation. Handles arbitrary
744/// nesting depth (nested within nested).
745///
746/// See [[fix-nested-mapping-parser]].
747fn parse_nested_properties(
748    builder: &mut MappingBuilder,
749    prefix: &str,
750    properties: &serde_json::Map<String, Value>,
751) -> Result<()> {
752    for (child_name, child_def) in properties {
753        let child_obj = child_def.as_object().ok_or_else(|| {
754            LuciError::InvalidQuery(format!("field \"{prefix}.{child_name}\": expected object"))
755        })?;
756        let type_name = child_obj
757            .get("type")
758            .and_then(|t| t.as_str())
759            .ok_or_else(|| {
760                LuciError::InvalidQuery(format!(
761                    "field \"{prefix}.{child_name}\": missing \"type\""
762                ))
763            })?;
764        let mut field_type = FieldType::from_es_name(type_name)?;
765        let full_name = format!("{prefix}.{child_name}");
766        // Validate and parse dense_vector config (dims, quantization),
767        // rejecting silently-dropped options. See [[code-must-not-lie]].
768        if let FieldType::DenseVector {
769            ref mut dims,
770            ref mut quantization,
771        } = field_type
772        {
773            parse_dense_vector_config(&full_name, child_obj, dims, quantization)?;
774        }
775        let is_nested = matches!(field_type, FieldType::Nested);
776        let is_dense = field_type.is_dense_vector();
777        let mut mapping = FieldMapping::new(full_name.clone(), field_type);
778        // dense_vector child options are validated by parse_dense_vector_config
779        // above; every other type runs the strict generic-option parser.
780        if !is_dense {
781            parse_field_options(&full_name, child_obj, &mut mapping, FieldRole::Field)?;
782        }
783        builder.fields.push(mapping);
784
785        if is_nested {
786            if let Some(sub_props) = child_obj.get("properties").and_then(|v| v.as_object()) {
787                parse_nested_properties(builder, &full_name, sub_props)?;
788            }
789        }
790    }
791    Ok(())
792}
793
794/// Incrementally builds a [`Mapping`].
795///
796/// ```ignore
797/// let mapping = Mapping::builder()
798///     .field("title", FieldType::Text)
799///     .field("status", FieldType::Keyword)
800///     .field("price", FieldType::Float)
801///     .build();
802/// ```
803pub struct MappingBuilder {
804    fields: Vec<FieldMapping>,
805    dynamic: DynamicMode,
806}
807
808impl MappingBuilder {
809    /// Add a field with default flags for its type.
810    pub fn field(mut self, name: impl Into<String>, field_type: FieldType) -> Self {
811        self.fields.push(FieldMapping::new(name, field_type));
812        self
813    }
814
815    /// Add a field with a fully customized mapping.
816    pub fn field_with_mapping(mut self, mapping: FieldMapping) -> Self {
817        self.fields.push(mapping);
818        self
819    }
820
821    /// Set the dynamic mapping mode.
822    pub fn dynamic(mut self, mode: DynamicMode) -> Self {
823        self.dynamic = mode;
824        self
825    }
826
827    /// Build the schema, assigning sequential `FieldId`s.
828    pub fn build(self) -> Mapping {
829        let mut name_to_id = HashMap::with_capacity(self.fields.len());
830        for (i, mapping) in self.fields.iter().enumerate() {
831            name_to_id.insert(mapping.name.clone(), FieldId::new(i as u16));
832        }
833
834        Mapping {
835            fields: self.fields,
836            name_to_id,
837            dynamic: self.dynamic,
838        }
839    }
840}
841
842#[cfg(test)]
843mod tests {
844    use super::*;
845
846    #[test]
847    fn builder_basic() {
848        let mapping = Mapping::builder()
849            .field("title", FieldType::Text)
850            .field("status", FieldType::Keyword)
851            .field("price", FieldType::Float)
852            .build();
853
854        assert_eq!(mapping.len(), 3);
855        assert_eq!(mapping.field_id("title"), Some(FieldId::new(0)));
856        assert_eq!(mapping.field_id("status"), Some(FieldId::new(1)));
857        assert_eq!(mapping.field_id("price"), Some(FieldId::new(2)));
858        assert_eq!(mapping.field_id("nonexistent"), None);
859
860        assert_eq!(mapping.field(FieldId::new(0)).field_type, FieldType::Text);
861        assert_eq!(
862            mapping.field(FieldId::new(1)).field_type,
863            FieldType::Keyword
864        );
865        assert_eq!(mapping.field(FieldId::new(2)).field_type, FieldType::Float);
866    }
867
868    #[test]
869    fn builder_with_mapping() {
870        let mapping = Mapping::builder()
871            .field_with_mapping(
872                FieldMapping::new("body", FieldType::Text)
873                    .analyzer("whitespace")
874                    .norms(false),
875            )
876            .build();
877
878        let m = mapping.field(FieldId::new(0));
879        assert_eq!(m.analyzer.as_deref(), Some("whitespace"));
880        assert!(!m.norms);
881    }
882
883    #[test]
884    fn dynamic_mode_default_is_true() {
885        let mapping = Mapping::builder().build();
886        assert_eq!(mapping.dynamic_mode(), DynamicMode::True);
887    }
888
889    #[test]
890    fn dynamic_mode_false() {
891        let mapping = Mapping::builder().dynamic(DynamicMode::False).build();
892        assert_eq!(mapping.dynamic_mode(), DynamicMode::False);
893    }
894
895    #[test]
896    fn json_round_trip() {
897        let mapping = Mapping::builder()
898            .field("title", FieldType::Text)
899            .field("status", FieldType::Keyword)
900            .field("price", FieldType::Float)
901            .field("count", FieldType::Long)
902            .field("active", FieldType::Boolean)
903            .field("created", FieldType::Date)
904            .dynamic(DynamicMode::False)
905            .build();
906
907        let json = mapping.to_json();
908        let parsed = Mapping::from_json(&json).unwrap();
909
910        assert_eq!(parsed.len(), mapping.len());
911        assert_eq!(parsed.dynamic_mode(), DynamicMode::False);
912
913        for mapping in mapping.fields() {
914            let id = parsed.field_id(&mapping.name).unwrap();
915            let parsed_mapping = parsed.field(id);
916            assert_eq!(parsed_mapping.field_type, mapping.field_type);
917            assert_eq!(parsed_mapping.stored, mapping.stored);
918            assert_eq!(parsed_mapping.indexed, mapping.indexed);
919            assert_eq!(parsed_mapping.doc_values, mapping.doc_values);
920            assert_eq!(parsed_mapping.norms, mapping.norms);
921        }
922    }
923
924    #[test]
925    fn json_round_trip_with_analyzer() {
926        let mapping = Mapping::builder()
927            .field_with_mapping(FieldMapping::new("body", FieldType::Text).analyzer("standard"))
928            .build();
929
930        let json = mapping.to_json();
931        let parsed = Mapping::from_json(&json).unwrap();
932
933        assert_eq!(
934            parsed.field(FieldId::new(0)).analyzer.as_deref(),
935            Some("standard")
936        );
937    }
938
939    #[test]
940    fn json_round_trip_with_custom_flags() {
941        let mapping = Mapping::builder()
942            .field_with_mapping(
943                FieldMapping::new("body", FieldType::Text)
944                    .stored(false)
945                    .norms(false),
946            )
947            .build();
948
949        let json = mapping.to_json();
950        let parsed = Mapping::from_json(&json).unwrap();
951
952        let m = parsed.field(FieldId::new(0));
953        assert!(!m.stored);
954        assert!(!m.norms);
955    }
956
957    #[test]
958    fn parse_es_mapping_json() {
959        let json: Value = serde_json::from_str(
960            r#"{
961                "mappings": {
962                    "dynamic": "false",
963                    "properties": {
964                        "title": {"type": "text", "analyzer": "standard"},
965                        "status": {"type": "keyword"},
966                        "price": {"type": "float"}
967                    }
968                }
969            }"#,
970        )
971        .unwrap();
972
973        let mapping = Mapping::from_json(&json).unwrap();
974        assert_eq!(mapping.len(), 3);
975        assert_eq!(mapping.dynamic_mode(), DynamicMode::False);
976        assert!(mapping.field_id("title").is_some());
977        assert!(mapping.field_id("status").is_some());
978        assert!(mapping.field_id("price").is_some());
979    }
980
981    #[test]
982    fn parse_shorthand_json() {
983        let json: Value = serde_json::from_str(
984            r#"{
985                "properties": {
986                    "name": {"type": "keyword"}
987                }
988            }"#,
989        )
990        .unwrap();
991
992        let mapping = Mapping::from_json(&json).unwrap();
993        assert_eq!(mapping.len(), 1);
994    }
995
996    #[test]
997    fn parse_dynamic_as_boolean() {
998        let json: Value = serde_json::from_str(
999            r#"{
1000                "properties": {"x": {"type": "keyword"}},
1001                "dynamic": false
1002            }"#,
1003        )
1004        .unwrap();
1005
1006        let mapping = Mapping::from_json(&json).unwrap();
1007        assert_eq!(mapping.dynamic_mode(), DynamicMode::False);
1008    }
1009
1010    #[test]
1011    fn parse_missing_type_is_error() {
1012        let json: Value = serde_json::from_str(r#"{"properties": {"x": {}}}"#).unwrap();
1013        assert!(Mapping::from_json(&json).is_err());
1014    }
1015
1016    #[test]
1017    fn parse_unknown_type_is_error() {
1018        let json: Value =
1019            serde_json::from_str(r#"{"properties": {"x": {"type": "percolator"}}}"#).unwrap();
1020        assert!(Mapping::from_json(&json).is_err());
1021    }
1022
1023    #[test]
1024    fn parse_missing_properties_is_error() {
1025        let json: Value = serde_json::from_str(r#"{"mappings": {}}"#).unwrap();
1026        assert!(Mapping::from_json(&json).is_err());
1027    }
1028
1029    #[test]
1030    fn dynamic_mode_round_trip() {
1031        for mode in [DynamicMode::True, DynamicMode::False] {
1032            let parsed = DynamicMode::from_es_value(mode.es_value()).unwrap();
1033            assert_eq!(parsed, mode);
1034        }
1035    }
1036
1037    #[test]
1038    fn empty_schema() {
1039        let mapping = Mapping::builder().build();
1040        assert!(mapping.is_empty());
1041        assert_eq!(mapping.len(), 0);
1042    }
1043
1044    #[test]
1045    fn parse_nested_properties_flattened() {
1046        let json: Value = serde_json::from_str(
1047            r#"{
1048                "properties": {
1049                    "title": {"type": "text"},
1050                    "offers": {
1051                        "type": "nested",
1052                        "properties": {
1053                            "seller": {"type": "keyword"},
1054                            "price": {"type": "float"}
1055                        }
1056                    }
1057                }
1058            }"#,
1059        )
1060        .unwrap();
1061
1062        let mapping = Mapping::from_json(&json).unwrap();
1063        assert_eq!(mapping.len(), 4);
1064        assert!(mapping.field_id("offers").is_some());
1065        assert!(mapping.field_id("offers.seller").is_some());
1066        assert!(mapping.field_id("offers.price").is_some());
1067    }
1068
1069    #[test]
1070    fn parse_deeply_nested_properties() {
1071        let json: Value = serde_json::from_str(
1072            r#"{
1073                "properties": {
1074                    "offers": {
1075                        "type": "nested",
1076                        "properties": {
1077                            "variants": {
1078                                "type": "nested",
1079                                "properties": {
1080                                    "color": {"type": "keyword"}
1081                                }
1082                            }
1083                        }
1084                    }
1085                }
1086            }"#,
1087        )
1088        .unwrap();
1089
1090        let mapping = Mapping::from_json(&json).unwrap();
1091        assert_eq!(mapping.len(), 3);
1092        assert!(mapping.field_id("offers.variants.color").is_some());
1093    }
1094
1095    #[test]
1096    fn default_dynamic_mode_omitted_in_json() {
1097        let mapping = Mapping::builder().field("x", FieldType::Keyword).build();
1098        let json = mapping.to_json();
1099        // DynamicMode::True is the default — should not appear in output.
1100        assert!(json["mappings"].get("dynamic").is_none());
1101    }
1102
1103    // ------------------------------------------------------------------
1104    // dense_vector quantization parsing — see [[code-must-not-lie]].
1105    //
1106    // These tests defend the user contract: the value the user writes
1107    // for `quantization` is what the engine sees, or the parse fails
1108    // with a clear error. No silent substitution is acceptable.
1109    // ------------------------------------------------------------------
1110
1111    #[test]
1112    fn field_ids_survive_json_roundtrip() {
1113        let schema = Mapping::builder()
1114            .field("title", FieldType::Text)
1115            .field("tag", FieldType::Keyword)
1116            .field("embedding", FieldType::dense_vector(64))
1117            .build();
1118        let json = schema.to_json();
1119        let parsed = Mapping::from_json(&json).unwrap();
1120        assert_eq!(schema.field_id("title"), parsed.field_id("title"));
1121        assert_eq!(schema.field_id("tag"), parsed.field_id("tag"));
1122        assert_eq!(schema.field_id("embedding"), parsed.field_id("embedding"));
1123    }
1124
1125    #[test]
1126    fn parse_dense_vector_default_quantization_is_int8() {
1127        let json: Value =
1128            serde_json::from_str(r#"{"properties": {"emb": {"type": "dense_vector", "dims": 4}}}"#)
1129                .unwrap();
1130        let mapping = Mapping::from_json(&json).unwrap();
1131        let f = mapping.field(mapping.field_id("emb").unwrap());
1132        assert_eq!(f.field_type.vector_dims(), Some(4));
1133        assert_eq!(
1134            f.field_type.vector_quantization(),
1135            Some(QuantizationType::Int8)
1136        );
1137    }
1138
1139    #[test]
1140    fn parse_dense_vector_explicit_int8() {
1141        let json: Value = serde_json::from_str(
1142            r#"{"properties": {"emb": {"type": "dense_vector", "dims": 4, "quantization": "int8"}}}"#,
1143        )
1144        .unwrap();
1145        let mapping = Mapping::from_json(&json).unwrap();
1146        let f = mapping.field(mapping.field_id("emb").unwrap());
1147        assert_eq!(
1148            f.field_type.vector_quantization(),
1149            Some(QuantizationType::Int8)
1150        );
1151    }
1152
1153    #[test]
1154    fn parse_dense_vector_explicit_none() {
1155        let json: Value = serde_json::from_str(
1156            r#"{"properties": {"emb": {"type": "dense_vector", "dims": 4, "quantization": "none"}}}"#,
1157        )
1158        .unwrap();
1159        let mapping = Mapping::from_json(&json).unwrap();
1160        let f = mapping.field(mapping.field_id("emb").unwrap());
1161        assert_eq!(
1162            f.field_type.vector_quantization(),
1163            Some(QuantizationType::None)
1164        );
1165    }
1166
1167    #[test]
1168    fn parse_dense_vector_int4_is_rejected() {
1169        let json: Value = serde_json::from_str(
1170            r#"{"properties": {"emb": {"type": "dense_vector", "dims": 4, "quantization": "int4"}}}"#,
1171        )
1172        .unwrap();
1173        let err = Mapping::from_json(&json).unwrap_err();
1174        let msg = format!("{err}");
1175        assert!(msg.contains("int4"), "error must name the value: {msg}");
1176        assert!(
1177            msg.contains("not yet implemented"),
1178            "error must explain why: {msg}"
1179        );
1180    }
1181
1182    #[test]
1183    fn parse_dense_vector_bbq_is_rejected() {
1184        let json: Value = serde_json::from_str(
1185            r#"{"properties": {"emb": {"type": "dense_vector", "dims": 4, "quantization": "bbq"}}}"#,
1186        )
1187        .unwrap();
1188        let err = Mapping::from_json(&json).unwrap_err();
1189        let msg = format!("{err}");
1190        assert!(msg.contains("bbq"), "error must name the value: {msg}");
1191        assert!(
1192            msg.contains("not yet implemented"),
1193            "error must explain why: {msg}"
1194        );
1195    }
1196
1197    #[test]
1198    fn parse_dense_vector_unknown_quantization_is_rejected() {
1199        let json: Value = serde_json::from_str(
1200            r#"{"properties": {"emb": {"type": "dense_vector", "dims": 4, "quantization": "magic"}}}"#,
1201        )
1202        .unwrap();
1203        let err = Mapping::from_json(&json).unwrap_err();
1204        let msg = format!("{err}");
1205        assert!(msg.contains("magic"), "error must name the value: {msg}");
1206    }
1207
1208    #[test]
1209    fn parse_dense_vector_non_string_quantization_is_rejected() {
1210        let json: Value = serde_json::from_str(
1211            r#"{"properties": {"emb": {"type": "dense_vector", "dims": 4, "quantization": 8}}}"#,
1212        )
1213        .unwrap();
1214        let err = Mapping::from_json(&json).unwrap_err();
1215        let msg = format!("{err}");
1216        assert!(
1217            msg.contains("must be a string"),
1218            "error must explain the type mismatch: {msg}"
1219        );
1220    }
1221
1222    // --- E9: dense_vector parser must not silently drop user input. ---
1223    // A specified-but-mistyped/unknown option must error, never fall back
1224    // to a substituted default. See [[code-must-not-lie]].
1225
1226    #[test]
1227    fn parse_dense_vector_missing_dims_is_rejected() {
1228        // Previously left dims at the placeholder 0, producing a silently
1229        // broken zero-dimension vector field.
1230        let json: Value =
1231            serde_json::from_str(r#"{"properties": {"emb": {"type": "dense_vector"}}}"#).unwrap();
1232        let err = Mapping::from_json(&json).unwrap_err();
1233        let msg = format!("{err}");
1234        assert!(
1235            msg.contains("dims"),
1236            "error must name the missing option: {msg}"
1237        );
1238    }
1239
1240    #[test]
1241    fn parse_dense_vector_string_dims_is_rejected() {
1242        // `.as_u64()` returned None for a string, silently leaving dims 0.
1243        let json: Value = serde_json::from_str(
1244            r#"{"properties": {"emb": {"type": "dense_vector", "dims": "4"}}}"#,
1245        )
1246        .unwrap();
1247        let err = Mapping::from_json(&json).unwrap_err();
1248        let msg = format!("{err}");
1249        assert!(
1250            msg.contains("dims") && msg.contains("positive integer"),
1251            "error must explain the type mismatch: {msg}"
1252        );
1253    }
1254
1255    #[test]
1256    fn parse_dense_vector_zero_dims_is_rejected() {
1257        let json: Value =
1258            serde_json::from_str(r#"{"properties": {"emb": {"type": "dense_vector", "dims": 0}}}"#)
1259                .unwrap();
1260        let err = Mapping::from_json(&json).unwrap_err();
1261        let msg = format!("{err}");
1262        assert!(msg.contains("dims"), "error must name the option: {msg}");
1263    }
1264
1265    #[test]
1266    fn parse_dense_vector_negative_dims_is_rejected() {
1267        let json: Value = serde_json::from_str(
1268            r#"{"properties": {"emb": {"type": "dense_vector", "dims": -4}}}"#,
1269        )
1270        .unwrap();
1271        let err = Mapping::from_json(&json).unwrap_err();
1272        let msg = format!("{err}");
1273        assert!(
1274            msg.contains("positive integer"),
1275            "error must explain the type mismatch: {msg}"
1276        );
1277    }
1278
1279    #[test]
1280    fn parse_dense_vector_unknown_key_is_rejected() {
1281        // A typo'd option (here "dimensions") must not be silently ignored.
1282        let json: Value = serde_json::from_str(
1283            r#"{"properties": {"emb": {"type": "dense_vector", "dims": 4, "dimensions": 8}}}"#,
1284        )
1285        .unwrap();
1286        let err = Mapping::from_json(&json).unwrap_err();
1287        let msg = format!("{err}");
1288        assert!(
1289            msg.contains("unknown") && msg.contains("dimensions"),
1290            "error must name the unknown option: {msg}"
1291        );
1292    }
1293
1294    #[test]
1295    fn parse_dense_vector_similarity_is_rejected() {
1296        // `similarity` is a real ES dense_vector option Luci hasn't wired;
1297        // it must be rejected explicitly, not silently dropped.
1298        let json: Value = serde_json::from_str(
1299            r#"{"properties": {"emb": {"type": "dense_vector", "dims": 4, "similarity": "cosine"}}}"#,
1300        )
1301        .unwrap();
1302        let err = Mapping::from_json(&json).unwrap_err();
1303        let msg = format!("{err}");
1304        assert!(
1305            msg.contains("similarity") && msg.contains("not yet implemented"),
1306            "error must explain why it is rejected: {msg}"
1307        );
1308    }
1309
1310    // --- Strict generic-option parsing. See [[fix-strict-mapping-parsing]]. ---
1311
1312    #[test]
1313    fn mapping_string_bool_index_rejected() {
1314        // `"index": "false"` (string) must error, not silently leave
1315        // indexing on. See [[code-must-not-lie]].
1316        let json: Value = serde_json::from_str(
1317            r#"{"properties": {"sku": {"type": "keyword", "index": "false"}}}"#,
1318        )
1319        .unwrap();
1320        let msg = format!("{}", Mapping::from_json(&json).unwrap_err());
1321        assert!(
1322            msg.contains("index") && msg.contains("boolean"),
1323            "error must name the option and the expected type: {msg}"
1324        );
1325    }
1326
1327    #[test]
1328    fn mapping_non_bool_doc_values_rejected() {
1329        let json: Value = serde_json::from_str(
1330            r#"{"properties": {"sku": {"type": "keyword", "doc_values": 1}}}"#,
1331        )
1332        .unwrap();
1333        let msg = format!("{}", Mapping::from_json(&json).unwrap_err());
1334        assert!(
1335            msg.contains("doc_values") && msg.contains("boolean"),
1336            "{msg}"
1337        );
1338    }
1339
1340    #[test]
1341    fn mapping_unknown_key_rejected() {
1342        // A typo'd option must not be silently dropped.
1343        let json: Value = serde_json::from_str(
1344            r#"{"properties": {"body": {"type": "text", "anlyzer": "english"}}}"#,
1345        )
1346        .unwrap();
1347        let msg = format!("{}", Mapping::from_json(&json).unwrap_err());
1348        assert!(
1349            msg.contains("unknown option") && msg.contains("anlyzer"),
1350            "error must name the unknown key: {msg}"
1351        );
1352    }
1353
1354    #[test]
1355    fn mapping_analyzer_on_numeric_rejected() {
1356        // analyzer is meaningless on a numeric field; ES rejects it.
1357        let json: Value = serde_json::from_str(
1358            r#"{"properties": {"qty": {"type": "integer", "analyzer": "english"}}}"#,
1359        )
1360        .unwrap();
1361        let msg = format!("{}", Mapping::from_json(&json).unwrap_err());
1362        assert!(
1363            msg.contains("analyzer")
1364                && msg.contains("not supported for field type")
1365                && msg.contains("integer"),
1366            "error must explain the per-type rejection: {msg}"
1367        );
1368    }
1369
1370    #[test]
1371    fn mapping_analyzer_on_keyword_rejected() {
1372        // keyword is not analyzed; analyzer must be rejected (ES parity).
1373        let json: Value = serde_json::from_str(
1374            r#"{"properties": {"tag": {"type": "keyword", "analyzer": "english"}}}"#,
1375        )
1376        .unwrap();
1377        let msg = format!("{}", Mapping::from_json(&json).unwrap_err());
1378        assert!(msg.contains("analyzer") && msg.contains("keyword"), "{msg}");
1379    }
1380
1381    #[test]
1382    fn mapping_unimplemented_es_param_rejected() {
1383        // `ignore_above` is a real ES param Luci hasn't built — honest refusal.
1384        let json: Value = serde_json::from_str(
1385            r#"{"properties": {"tag": {"type": "keyword", "ignore_above": 256}}}"#,
1386        )
1387        .unwrap();
1388        let msg = format!("{}", Mapping::from_json(&json).unwrap_err());
1389        assert!(
1390            msg.contains("ignore_above") && msg.contains("not yet supported"),
1391            "error must explain it is a deferred feature: {msg}"
1392        );
1393    }
1394
1395    #[test]
1396    fn mapping_copy_to_non_string_element_rejected() {
1397        let json: Value = serde_json::from_str(
1398            r#"{"properties": {"a": {"type": "keyword", "copy_to": ["ok", 7]}, "ok": {"type": "text"}}}"#,
1399        )
1400        .unwrap();
1401        let msg = format!("{}", Mapping::from_json(&json).unwrap_err());
1402        assert!(msg.contains("copy_to") && msg.contains("strings"), "{msg}");
1403    }
1404
1405    #[test]
1406    fn mapping_copy_to_wrong_shape_rejected() {
1407        let json: Value =
1408            serde_json::from_str(r#"{"properties": {"a": {"type": "keyword", "copy_to": 42}}}"#)
1409                .unwrap();
1410        let msg = format!("{}", Mapping::from_json(&json).unwrap_err());
1411        assert!(msg.contains("copy_to"), "{msg}");
1412    }
1413
1414    #[test]
1415    fn mapping_subfield_unknown_key_rejected() {
1416        // The sub-field block enforces the same strictness.
1417        let json: Value = serde_json::from_str(
1418            r#"{"properties": {"title": {"type": "text", "fields": {"raw": {"type": "keyword", "indx": "x"}}}}}"#,
1419        )
1420        .unwrap();
1421        let msg = format!("{}", Mapping::from_json(&json).unwrap_err());
1422        assert!(
1423            msg.contains("title.raw") && msg.contains("indx"),
1424            "sub-field strictness: {msg}"
1425        );
1426    }
1427
1428    #[test]
1429    fn mapping_nested_child_string_bool_rejected() {
1430        // The nested-child block enforces the same strictness.
1431        let json: Value = serde_json::from_str(
1432            r#"{"properties": {"items": {"type": "nested", "properties": {"qty": {"type": "integer", "index": "no"}}}}}"#,
1433        )
1434        .unwrap();
1435        let msg = format!("{}", Mapping::from_json(&json).unwrap_err());
1436        assert!(
1437            msg.contains("items.qty") && msg.contains("index") && msg.contains("boolean"),
1438            "nested strictness: {msg}"
1439        );
1440    }
1441
1442    #[test]
1443    fn mapping_builder_analyzer_on_long_rejected() {
1444        // The programmatic builder path is caught by validate().
1445        let err = Mapping::builder()
1446            .field_with_mapping(FieldMapping::new("n", FieldType::Long).analyzer("english"))
1447            .build()
1448            .validate()
1449            .unwrap_err();
1450        let msg = format!("{err}");
1451        assert!(
1452            msg.contains("analyzer")
1453                && msg.contains("not supported for field type")
1454                && msg.contains("long"),
1455            "builder-path per-type check: {msg}"
1456        );
1457    }
1458
1459    #[test]
1460    fn mapping_strict_happy_paths_parse() {
1461        // Valid per-type options still parse and apply.
1462        let json: Value = serde_json::from_str(
1463            r#"{"properties": {
1464                "title": {"type": "text", "analyzer": "english", "search_analyzer": "standard", "norms": false, "store": true},
1465                "tag": {"type": "keyword", "doc_values": true, "index": false},
1466                "body": {"type": "text", "copy_to": ["title"]}
1467            }}"#,
1468        )
1469        .unwrap();
1470        let m = Mapping::from_json(&json).unwrap();
1471        let title = m.field(m.field_id("title").unwrap());
1472        assert_eq!(title.analyzer.as_deref(), Some("english"));
1473        assert_eq!(title.search_analyzer.as_deref(), Some("standard"));
1474        assert!(!title.norms);
1475        assert!(title.stored);
1476        assert!(!m.field(m.field_id("tag").unwrap()).indexed);
1477        assert_eq!(
1478            m.field(m.field_id("body").unwrap()).copy_to,
1479            vec!["title".to_string()]
1480        );
1481    }
1482
1483    #[test]
1484    fn parse_dense_vector_element_type_is_rejected() {
1485        let json: Value = serde_json::from_str(
1486            r#"{"properties": {"emb": {"type": "dense_vector", "dims": 4, "element_type": "byte"}}}"#,
1487        )
1488        .unwrap();
1489        let err = Mapping::from_json(&json).unwrap_err();
1490        let msg = format!("{err}");
1491        assert!(
1492            msg.contains("element_type") && msg.contains("not yet implemented"),
1493            "error must explain why it is rejected: {msg}"
1494        );
1495    }
1496
1497    #[test]
1498    fn parse_nested_dense_vector_unknown_key_is_rejected() {
1499        // The nested-properties parser had the identical silent-drop bug
1500        // and must enforce the same contract.
1501        let json: Value = serde_json::from_str(
1502            r#"{
1503                "properties": {
1504                    "doc": {
1505                        "type": "nested",
1506                        "properties": {
1507                            "emb": {"type": "dense_vector", "dims": 4, "similarity": "cosine"}
1508                        }
1509                    }
1510                }
1511            }"#,
1512        )
1513        .unwrap();
1514        let err = Mapping::from_json(&json).unwrap_err();
1515        let msg = format!("{err}");
1516        assert!(
1517            msg.contains("similarity") && msg.contains("not yet implemented"),
1518            "nested dense_vector must reject unwired options too: {msg}"
1519        );
1520    }
1521
1522    #[test]
1523    fn dense_vector_dims_round_trip_through_json() {
1524        // Regression: previously `to_json` did not emit `dims` for
1525        // dense_vector, so commit → reload silently zeroed the
1526        // dimensionality. See [[code-must-not-lie]].
1527        let mapping = Mapping::builder()
1528            .field("emb", FieldType::dense_vector(768))
1529            .build();
1530        let json = mapping.to_json();
1531        let parsed = Mapping::from_json(&json).unwrap();
1532        let f = parsed.field(parsed.field_id("emb").unwrap());
1533        assert_eq!(f.field_type.vector_dims(), Some(768));
1534    }
1535
1536    #[test]
1537    fn dense_vector_explicit_quantization_round_trips() {
1538        // None is non-default, so `to_json` must emit it explicitly
1539        // and `from_json` must read it back.
1540        let mapping = Mapping::builder()
1541            .field(
1542                "emb",
1543                FieldType::DenseVector {
1544                    dims: 4,
1545                    quantization: QuantizationType::None,
1546                },
1547            )
1548            .build();
1549        let json = mapping.to_json();
1550        let parsed = Mapping::from_json(&json).unwrap();
1551        let f = parsed.field(parsed.field_id("emb").unwrap());
1552        assert_eq!(
1553            f.field_type.vector_quantization(),
1554            Some(QuantizationType::None)
1555        );
1556    }
1557
1558    // ------------------------------------------------------------------
1559    // copy_to validation — see [[code-must-not-lie]].
1560    //
1561    // copy_to targets that don't exist in the schema were previously
1562    // silently dropped at write time. The user wrote a mapping; the
1563    // engine accepted it; nothing got copied. validate() now rejects
1564    // the mapping at parse / index-creation time so the user gets a
1565    // clear error instead of a silent no-op.
1566    // ------------------------------------------------------------------
1567
1568    #[test]
1569    fn copy_to_existing_target_is_accepted() {
1570        let json: Value = serde_json::from_str(
1571            r#"{
1572                "properties": {
1573                    "title": {"type": "text", "copy_to": "all_text"},
1574                    "all_text": {"type": "text"}
1575                }
1576            }"#,
1577        )
1578        .unwrap();
1579        let mapping = Mapping::from_json(&json).unwrap();
1580        let title = mapping.field(mapping.field_id("title").unwrap());
1581        assert_eq!(title.copy_to, vec!["all_text".to_string()]);
1582    }
1583
1584    #[test]
1585    fn copy_to_missing_target_is_rejected_at_parse() {
1586        let json: Value = serde_json::from_str(
1587            r#"{
1588                "properties": {
1589                    "title": {"type": "text", "copy_to": "all_text"}
1590                }
1591            }"#,
1592        )
1593        .unwrap();
1594        let err = Mapping::from_json(&json).unwrap_err();
1595        let msg = format!("{err}");
1596        assert!(
1597            msg.contains("title"),
1598            "error must name the source field: {msg}"
1599        );
1600        assert!(
1601            msg.contains("all_text"),
1602            "error must name the missing target: {msg}"
1603        );
1604    }
1605
1606    #[test]
1607    fn copy_to_missing_target_in_array_is_rejected() {
1608        let json: Value = serde_json::from_str(
1609            r#"{
1610                "properties": {
1611                    "title": {"type": "text", "copy_to": ["existing", "missing"]},
1612                    "existing": {"type": "text"}
1613                }
1614            }"#,
1615        )
1616        .unwrap();
1617        let err = Mapping::from_json(&json).unwrap_err();
1618        let msg = format!("{err}");
1619        assert!(
1620            msg.contains("missing"),
1621            "error must name the missing target: {msg}"
1622        );
1623    }
1624
1625    #[test]
1626    fn validate_catches_builder_api_copy_to_with_missing_target() {
1627        // Builder API users (no JSON) get the same protection via
1628        // explicit validate() — Index::create_with_mapping calls it.
1629        let mut source = FieldMapping::new("source", FieldType::Text);
1630        source.copy_to = vec!["nope".to_string()];
1631        let mapping = Mapping::builder().field_with_mapping(source).build();
1632        let err = mapping.validate().unwrap_err();
1633        let msg = format!("{err}");
1634        assert!(msg.contains("nope"), "error must name the target: {msg}");
1635    }
1636
1637    #[test]
1638    fn parse_nested_dense_vector_int4_is_rejected() {
1639        // Defense in depth: the nested-properties parser must enforce
1640        // the same contract as the top-level parser.
1641        let json: Value = serde_json::from_str(
1642            r#"{
1643                "properties": {
1644                    "outer": {
1645                        "type": "nested",
1646                        "properties": {
1647                            "emb": {"type": "dense_vector", "dims": 4, "quantization": "int4"}
1648                        }
1649                    }
1650                }
1651            }"#,
1652        )
1653        .unwrap();
1654        let err = Mapping::from_json(&json).unwrap_err();
1655        let msg = format!("{err}");
1656        assert!(msg.contains("int4"), "error must name the value: {msg}");
1657    }
1658}