use std::collections::HashMap;
use crate::core::{FieldId, LuciError, Result};
use serde_json::Value;
use crate::mapping::field_type::FieldType;
use crate::mapping::mapping::FieldMapping;
use crate::mapping::quantization::QuantizationType;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum DynamicMode {
True,
False,
}
impl DynamicMode {
pub fn from_es_value(s: &str) -> Result<Self> {
match s {
"true" => Ok(Self::True),
"false" => Ok(Self::False),
_ => Err(LuciError::InvalidQuery(format!(
"invalid dynamic mode: {s} (expected \"true\" or \"false\")"
))),
}
}
pub fn es_value(self) -> &'static str {
match self {
Self::True => "true",
Self::False => "false",
}
}
}
#[derive(Clone, Debug)]
pub struct Mapping {
fields: Vec<FieldMapping>,
name_to_id: HashMap<String, FieldId>,
dynamic: DynamicMode,
}
impl Mapping {
pub fn builder() -> MappingBuilder {
MappingBuilder {
fields: Vec::new(),
dynamic: DynamicMode::True,
}
}
pub fn ensure_id_field(&mut self) {
if self.name_to_id.contains_key("_id") {
return;
}
let mut id_mapping = FieldMapping::new("_id", FieldType::Keyword);
id_mapping.stored = false;
let id = FieldId::new(self.fields.len() as u16);
self.name_to_id.insert("_id".to_string(), id);
self.fields.push(id_mapping);
}
pub fn field_id(&self, name: &str) -> Option<FieldId> {
self.name_to_id.get(name).copied()
}
pub fn field(&self, id: FieldId) -> &FieldMapping {
&self.fields[id.as_u16() as usize]
}
pub fn fields(&self) -> &[FieldMapping] {
&self.fields
}
pub fn len(&self) -> usize {
self.fields.len()
}
pub fn is_empty(&self) -> bool {
self.fields.is_empty()
}
pub fn dynamic_mode(&self) -> DynamicMode {
self.dynamic
}
pub fn validate(&self) -> Result<()> {
for field in &self.fields {
if matches!(field.field_type.vector_dims(), Some(0)) {
return Err(LuciError::InvalidQuery(format!(
"dense_vector field \"{}\" must have dims >= 1",
field.name,
)));
}
if field.analyzer.is_some()
&& !matches!(field.field_type, FieldType::Text | FieldType::TokenCount)
{
return Err(LuciError::InvalidQuery(format!(
"field \"{}\": \"analyzer\" is not supported for field type \"{}\"",
field.name,
field.field_type.es_name()
)));
}
if field.search_analyzer.is_some() && !matches!(field.field_type, FieldType::Text) {
return Err(LuciError::InvalidQuery(format!(
"field \"{}\": \"search_analyzer\" is not supported for field type \"{}\"",
field.name,
field.field_type.es_name()
)));
}
for target in &field.copy_to {
if !self.name_to_id.contains_key(target) {
return Err(LuciError::InvalidQuery(format!(
"field \"{src}\" has copy_to target \"{target}\" \
that is not defined in the schema",
src = field.name,
)));
}
}
}
Ok(())
}
pub fn to_json(&self) -> Value {
let mut properties = serde_json::Map::new();
for mapping in &self.fields {
if mapping.parent_field.is_some() {
continue;
}
let mut field_obj = serde_json::Map::new();
field_obj.insert(
"type".into(),
Value::String(mapping.field_type.es_name().into()),
);
if let FieldType::DenseVector { dims, quantization } = mapping.field_type {
field_obj.insert("dims".into(), Value::Number(dims.into()));
if quantization != QuantizationType::DEFAULT {
field_obj.insert(
"quantization".into(),
Value::String(quantization.es_name().into()),
);
}
}
if let Some(ref analyzer) = mapping.analyzer {
field_obj.insert("analyzer".into(), Value::String(analyzer.clone()));
}
if let Some(ref search_analyzer) = mapping.search_analyzer {
field_obj.insert(
"search_analyzer".into(),
Value::String(search_analyzer.clone()),
);
}
let defaults = FieldMapping::new("", mapping.field_type.clone());
if mapping.stored != defaults.stored {
field_obj.insert("store".into(), Value::Bool(mapping.stored));
}
if mapping.indexed != defaults.indexed {
field_obj.insert("index".into(), Value::Bool(mapping.indexed));
}
if mapping.doc_values != defaults.doc_values {
field_obj.insert("doc_values".into(), Value::Bool(mapping.doc_values));
}
if mapping.norms != defaults.norms {
field_obj.insert("norms".into(), Value::Bool(mapping.norms));
}
if !mapping.copy_to.is_empty() {
if mapping.copy_to.len() == 1 {
field_obj.insert("copy_to".into(), Value::String(mapping.copy_to[0].clone()));
} else {
field_obj.insert(
"copy_to".into(),
Value::Array(
mapping
.copy_to
.iter()
.map(|s| Value::String(s.clone()))
.collect(),
),
);
}
}
let prefix = format!("{}.", mapping.name);
let sub_fields: Vec<&FieldMapping> = self
.fields
.iter()
.filter(|f| f.parent_field.as_deref() == Some(&mapping.name))
.collect();
if !sub_fields.is_empty() {
let mut fields_obj = serde_json::Map::new();
for sub in sub_fields {
let sub_name = sub.name.strip_prefix(&prefix).unwrap_or(&sub.name);
let mut sub_obj = serde_json::Map::new();
sub_obj.insert(
"type".into(),
Value::String(sub.field_type.es_name().into()),
);
if let Some(ref a) = sub.analyzer {
sub_obj.insert("analyzer".into(), Value::String(a.clone()));
}
if let Some(ref sa) = sub.search_analyzer {
sub_obj.insert("search_analyzer".into(), Value::String(sa.clone()));
}
fields_obj.insert(sub_name.to_string(), Value::Object(sub_obj));
}
field_obj.insert("fields".into(), Value::Object(fields_obj));
}
properties.insert(mapping.name.clone(), Value::Object(field_obj));
}
let mut mappings = serde_json::Map::new();
if self.dynamic != DynamicMode::True {
mappings.insert(
"dynamic".into(),
Value::String(self.dynamic.es_value().into()),
);
}
mappings.insert("properties".into(), Value::Object(properties));
let mut root = serde_json::Map::new();
root.insert("mappings".into(), Value::Object(mappings));
Value::Object(root)
}
pub fn from_json(json: &Value) -> Result<Self> {
let mappings_obj = if let Some(m) = json.get("mappings") {
m
} else {
json
};
let mut builder = MappingBuilder {
fields: Vec::new(),
dynamic: DynamicMode::True,
};
if let Some(dyn_val) = mappings_obj.get("dynamic") {
let mode_str = match dyn_val {
Value::String(s) => s.as_str(),
Value::Bool(true) => "true",
Value::Bool(false) => "false",
_ => {
return Err(LuciError::InvalidQuery(
"\"dynamic\" must be a string or boolean".into(),
));
}
};
builder.dynamic = DynamicMode::from_es_value(mode_str)?;
}
let properties = mappings_obj
.get("properties")
.and_then(|p| p.as_object())
.ok_or_else(|| {
LuciError::InvalidQuery("missing or invalid \"properties\" object".into())
})?;
for (name, field_def) in properties {
let field_obj = field_def.as_object().ok_or_else(|| {
LuciError::InvalidQuery(format!(
"field \"{name}\": expected object, got {field_def}"
))
})?;
let type_name = field_obj
.get("type")
.and_then(|t| t.as_str())
.ok_or_else(|| {
LuciError::InvalidQuery(format!("field \"{name}\": missing \"type\" property"))
})?;
let mut field_type = FieldType::from_es_name(type_name)?;
if let FieldType::DenseVector {
ref mut dims,
ref mut quantization,
} = field_type
{
parse_dense_vector_config(name, field_obj, dims, quantization)?;
}
let is_nested = matches!(field_type, FieldType::Nested);
let is_dense = field_type.is_dense_vector();
let mut mapping = FieldMapping::new(name.clone(), field_type);
if !is_dense {
parse_field_options(name, field_obj, &mut mapping, FieldRole::Field)?;
}
builder.fields.push(mapping);
if is_nested {
if let Some(nested_props) = field_obj.get("properties").and_then(|v| v.as_object())
{
parse_nested_properties(&mut builder, name, nested_props)?;
}
}
if let Some(sub_fields) = field_obj.get("fields").and_then(|v| v.as_object()) {
for (sub_name, sub_def) in sub_fields {
let sub_label = format!("{name}.{sub_name}");
let sub_obj = sub_def.as_object().ok_or_else(|| {
LuciError::InvalidQuery(format!("field \"{sub_label}\": expected object"))
})?;
let sub_type_name =
sub_obj
.get("type")
.and_then(|t| t.as_str())
.ok_or_else(|| {
LuciError::InvalidQuery(format!(
"field \"{sub_label}\": missing \"type\""
))
})?;
let sub_type = FieldType::from_es_name(sub_type_name)?;
let mut sub_mapping = FieldMapping::new(sub_label.clone(), sub_type);
sub_mapping.stored = false; sub_mapping.parent_field = Some(name.clone());
parse_field_options(
&sub_label,
sub_obj,
&mut sub_mapping,
FieldRole::SubField,
)?;
builder.fields.push(sub_mapping);
}
}
}
let mapping = builder.build();
mapping.validate()?;
Ok(mapping)
}
}
#[derive(Clone, Copy, PartialEq, Eq)]
enum FieldRole {
Field,
SubField,
}
const LUCI_OPTIONS: &[&str] = &[
"index",
"store",
"doc_values",
"norms",
"analyzer",
"search_analyzer",
"copy_to",
];
const UNIMPLEMENTED_ES_PARAMS: &[&str] = &[
"ignore_above",
"null_value",
"coerce",
"enabled",
"ignore_malformed",
"format",
"locale",
"normalizer",
"similarity",
"term_vector",
"index_options",
"index_prefixes",
"index_phrases",
"position_increment_gap",
"fielddata",
"fielddata_frequency_filter",
"ignore_z_value",
"orientation",
"eager_global_ordinals",
"meta",
"scaling_factor",
"split_queries_on_whitespace",
"search_quote_analyzer",
"time_series_dimension",
"time_series_metric",
"boost",
];
fn supported_options(field_type: &FieldType) -> &'static [&'static str] {
match field_type {
FieldType::Text => &[
"index",
"store",
"analyzer",
"search_analyzer",
"norms",
"copy_to",
],
FieldType::TokenCount => &["index", "store", "doc_values", "analyzer", "copy_to"],
FieldType::Keyword
| FieldType::Ip
| FieldType::Integer
| FieldType::Long
| FieldType::Float
| FieldType::Double
| FieldType::Boolean
| FieldType::Date
| FieldType::GeoPoint
| FieldType::GeoShape => &["index", "store", "doc_values", "copy_to"],
FieldType::DenseVector { .. } | FieldType::Nested => &[],
}
}
fn validate_field_options(
field_label: &str,
field_type: &FieldType,
field_obj: &serde_json::Map<String, Value>,
role: FieldRole,
) -> Result<()> {
for key in field_obj.keys() {
let k = key.as_str();
if matches!(k, "type" | "fields" | "properties") {
continue; }
if supported_options(field_type).contains(&k) {
if role == FieldRole::SubField && matches!(k, "store" | "copy_to") {
return Err(LuciError::InvalidQuery(format!(
"field \"{field_label}\": option \"{k}\" is not supported on a \
multi-field sub-field"
)));
}
continue;
}
if LUCI_OPTIONS.contains(&k) {
return Err(LuciError::InvalidQuery(format!(
"field \"{field_label}\": option \"{k}\" is not supported for field type \"{ft}\"",
ft = field_type.es_name()
)));
}
if UNIMPLEMENTED_ES_PARAMS.contains(&k) {
return Err(LuciError::InvalidQuery(format!(
"field \"{field_label}\": option \"{k}\" is recognized but not yet supported"
)));
}
return Err(LuciError::InvalidQuery(format!(
"field \"{field_label}\": unknown option \"{k}\""
)));
}
Ok(())
}
fn opt_bool(
obj: &serde_json::Map<String, Value>,
key: &str,
field_label: &str,
) -> Result<Option<bool>> {
match obj.get(key) {
Some(v) if !v.is_null() => v.as_bool().map(Some).ok_or_else(|| {
LuciError::InvalidQuery(format!(
"field \"{field_label}\": \"{key}\" must be a boolean, got {v}"
))
}),
_ => Ok(None),
}
}
fn opt_str<'a>(
obj: &'a serde_json::Map<String, Value>,
key: &str,
field_label: &str,
) -> Result<Option<&'a str>> {
match obj.get(key) {
Some(v) if !v.is_null() => v.as_str().map(Some).ok_or_else(|| {
LuciError::InvalidQuery(format!(
"field \"{field_label}\": \"{key}\" must be a string, got {v}"
))
}),
_ => Ok(None),
}
}
fn parse_copy_to(
obj: &serde_json::Map<String, Value>,
field_label: &str,
mapping: &mut FieldMapping,
) -> Result<()> {
match obj.get("copy_to") {
None | Some(Value::Null) => {}
Some(Value::String(s)) => mapping.copy_to = vec![s.clone()],
Some(Value::Array(arr)) => {
mapping.copy_to = arr
.iter()
.map(|v| {
v.as_str().map(String::from).ok_or_else(|| {
LuciError::InvalidQuery(format!(
"field \"{field_label}\": copy_to entries must be strings, got {v}"
))
})
})
.collect::<Result<Vec<_>>>()?;
}
Some(other) => {
return Err(LuciError::InvalidQuery(format!(
"field \"{field_label}\": copy_to must be a string or array of strings, got {other}"
)));
}
}
Ok(())
}
fn parse_field_options(
field_label: &str,
field_obj: &serde_json::Map<String, Value>,
mapping: &mut FieldMapping,
role: FieldRole,
) -> Result<()> {
validate_field_options(field_label, &mapping.field_type, field_obj, role)?;
if let Some(v) = opt_bool(field_obj, "index", field_label)? {
mapping.indexed = v;
}
if let Some(v) = opt_bool(field_obj, "doc_values", field_label)? {
mapping.doc_values = v;
}
if let Some(v) = opt_bool(field_obj, "norms", field_label)? {
mapping.norms = v;
}
if let Some(v) = opt_str(field_obj, "analyzer", field_label)? {
mapping.analyzer = Some(v.to_string());
}
if let Some(v) = opt_str(field_obj, "search_analyzer", field_label)? {
mapping.search_analyzer = Some(v.to_string());
}
if role == FieldRole::Field {
if let Some(v) = opt_bool(field_obj, "store", field_label)? {
mapping.stored = v;
}
parse_copy_to(field_obj, field_label, mapping)?;
}
Ok(())
}
fn parse_dense_vector_config(
field_label: &str,
field_obj: &serde_json::Map<String, Value>,
dims: &mut usize,
quantization: &mut QuantizationType,
) -> Result<()> {
for key in field_obj.keys() {
match key.as_str() {
"type" | "dims" | "quantization" => {}
"similarity" | "index" | "index_options" | "element_type" => {
return Err(LuciError::InvalidQuery(format!(
"field \"{field_label}\": dense_vector option \"{key}\" is recognized but not yet implemented"
)));
}
other => {
return Err(LuciError::InvalidQuery(format!(
"field \"{field_label}\": unknown dense_vector option \"{other}\""
)));
}
}
}
let dims_val = field_obj.get("dims").ok_or_else(|| {
LuciError::InvalidQuery(format!(
"field \"{field_label}\": dense_vector requires \"dims\""
))
})?;
let d = dims_val.as_u64().ok_or_else(|| {
LuciError::InvalidQuery(format!(
"field \"{field_label}\": \"dims\" must be a positive integer, got {dims_val}"
))
})?;
if d == 0 {
return Err(LuciError::InvalidQuery(format!(
"field \"{field_label}\": \"dims\" must be >= 1"
)));
}
*dims = d as usize;
if let Some(q_val) = field_obj.get("quantization") {
let q_str = q_val.as_str().ok_or_else(|| {
LuciError::InvalidQuery(format!(
"field \"{field_label}\": \"quantization\" must be a string, got {q_val}"
))
})?;
*quantization = QuantizationType::from_es_name(q_str)?;
}
Ok(())
}
fn parse_nested_properties(
builder: &mut MappingBuilder,
prefix: &str,
properties: &serde_json::Map<String, Value>,
) -> Result<()> {
for (child_name, child_def) in properties {
let child_obj = child_def.as_object().ok_or_else(|| {
LuciError::InvalidQuery(format!("field \"{prefix}.{child_name}\": expected object"))
})?;
let type_name = child_obj
.get("type")
.and_then(|t| t.as_str())
.ok_or_else(|| {
LuciError::InvalidQuery(format!(
"field \"{prefix}.{child_name}\": missing \"type\""
))
})?;
let mut field_type = FieldType::from_es_name(type_name)?;
let full_name = format!("{prefix}.{child_name}");
if let FieldType::DenseVector {
ref mut dims,
ref mut quantization,
} = field_type
{
parse_dense_vector_config(&full_name, child_obj, dims, quantization)?;
}
let is_nested = matches!(field_type, FieldType::Nested);
let is_dense = field_type.is_dense_vector();
let mut mapping = FieldMapping::new(full_name.clone(), field_type);
if !is_dense {
parse_field_options(&full_name, child_obj, &mut mapping, FieldRole::Field)?;
}
builder.fields.push(mapping);
if is_nested {
if let Some(sub_props) = child_obj.get("properties").and_then(|v| v.as_object()) {
parse_nested_properties(builder, &full_name, sub_props)?;
}
}
}
Ok(())
}
pub struct MappingBuilder {
fields: Vec<FieldMapping>,
dynamic: DynamicMode,
}
impl MappingBuilder {
pub fn field(mut self, name: impl Into<String>, field_type: FieldType) -> Self {
self.fields.push(FieldMapping::new(name, field_type));
self
}
pub fn field_with_mapping(mut self, mapping: FieldMapping) -> Self {
self.fields.push(mapping);
self
}
pub fn dynamic(mut self, mode: DynamicMode) -> Self {
self.dynamic = mode;
self
}
pub fn build(self) -> Mapping {
let mut name_to_id = HashMap::with_capacity(self.fields.len());
for (i, mapping) in self.fields.iter().enumerate() {
name_to_id.insert(mapping.name.clone(), FieldId::new(i as u16));
}
Mapping {
fields: self.fields,
name_to_id,
dynamic: self.dynamic,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn builder_basic() {
let mapping = Mapping::builder()
.field("title", FieldType::Text)
.field("status", FieldType::Keyword)
.field("price", FieldType::Float)
.build();
assert_eq!(mapping.len(), 3);
assert_eq!(mapping.field_id("title"), Some(FieldId::new(0)));
assert_eq!(mapping.field_id("status"), Some(FieldId::new(1)));
assert_eq!(mapping.field_id("price"), Some(FieldId::new(2)));
assert_eq!(mapping.field_id("nonexistent"), None);
assert_eq!(mapping.field(FieldId::new(0)).field_type, FieldType::Text);
assert_eq!(
mapping.field(FieldId::new(1)).field_type,
FieldType::Keyword
);
assert_eq!(mapping.field(FieldId::new(2)).field_type, FieldType::Float);
}
#[test]
fn builder_with_mapping() {
let mapping = Mapping::builder()
.field_with_mapping(
FieldMapping::new("body", FieldType::Text)
.analyzer("whitespace")
.norms(false),
)
.build();
let m = mapping.field(FieldId::new(0));
assert_eq!(m.analyzer.as_deref(), Some("whitespace"));
assert!(!m.norms);
}
#[test]
fn dynamic_mode_default_is_true() {
let mapping = Mapping::builder().build();
assert_eq!(mapping.dynamic_mode(), DynamicMode::True);
}
#[test]
fn dynamic_mode_false() {
let mapping = Mapping::builder().dynamic(DynamicMode::False).build();
assert_eq!(mapping.dynamic_mode(), DynamicMode::False);
}
#[test]
fn json_round_trip() {
let mapping = Mapping::builder()
.field("title", FieldType::Text)
.field("status", FieldType::Keyword)
.field("price", FieldType::Float)
.field("count", FieldType::Long)
.field("active", FieldType::Boolean)
.field("created", FieldType::Date)
.dynamic(DynamicMode::False)
.build();
let json = mapping.to_json();
let parsed = Mapping::from_json(&json).unwrap();
assert_eq!(parsed.len(), mapping.len());
assert_eq!(parsed.dynamic_mode(), DynamicMode::False);
for mapping in mapping.fields() {
let id = parsed.field_id(&mapping.name).unwrap();
let parsed_mapping = parsed.field(id);
assert_eq!(parsed_mapping.field_type, mapping.field_type);
assert_eq!(parsed_mapping.stored, mapping.stored);
assert_eq!(parsed_mapping.indexed, mapping.indexed);
assert_eq!(parsed_mapping.doc_values, mapping.doc_values);
assert_eq!(parsed_mapping.norms, mapping.norms);
}
}
#[test]
fn json_round_trip_with_analyzer() {
let mapping = Mapping::builder()
.field_with_mapping(FieldMapping::new("body", FieldType::Text).analyzer("standard"))
.build();
let json = mapping.to_json();
let parsed = Mapping::from_json(&json).unwrap();
assert_eq!(
parsed.field(FieldId::new(0)).analyzer.as_deref(),
Some("standard")
);
}
#[test]
fn json_round_trip_with_custom_flags() {
let mapping = Mapping::builder()
.field_with_mapping(
FieldMapping::new("body", FieldType::Text)
.stored(false)
.norms(false),
)
.build();
let json = mapping.to_json();
let parsed = Mapping::from_json(&json).unwrap();
let m = parsed.field(FieldId::new(0));
assert!(!m.stored);
assert!(!m.norms);
}
#[test]
fn parse_es_mapping_json() {
let json: Value = serde_json::from_str(
r#"{
"mappings": {
"dynamic": "false",
"properties": {
"title": {"type": "text", "analyzer": "standard"},
"status": {"type": "keyword"},
"price": {"type": "float"}
}
}
}"#,
)
.unwrap();
let mapping = Mapping::from_json(&json).unwrap();
assert_eq!(mapping.len(), 3);
assert_eq!(mapping.dynamic_mode(), DynamicMode::False);
assert!(mapping.field_id("title").is_some());
assert!(mapping.field_id("status").is_some());
assert!(mapping.field_id("price").is_some());
}
#[test]
fn parse_shorthand_json() {
let json: Value = serde_json::from_str(
r#"{
"properties": {
"name": {"type": "keyword"}
}
}"#,
)
.unwrap();
let mapping = Mapping::from_json(&json).unwrap();
assert_eq!(mapping.len(), 1);
}
#[test]
fn parse_dynamic_as_boolean() {
let json: Value = serde_json::from_str(
r#"{
"properties": {"x": {"type": "keyword"}},
"dynamic": false
}"#,
)
.unwrap();
let mapping = Mapping::from_json(&json).unwrap();
assert_eq!(mapping.dynamic_mode(), DynamicMode::False);
}
#[test]
fn parse_missing_type_is_error() {
let json: Value = serde_json::from_str(r#"{"properties": {"x": {}}}"#).unwrap();
assert!(Mapping::from_json(&json).is_err());
}
#[test]
fn parse_unknown_type_is_error() {
let json: Value =
serde_json::from_str(r#"{"properties": {"x": {"type": "percolator"}}}"#).unwrap();
assert!(Mapping::from_json(&json).is_err());
}
#[test]
fn parse_missing_properties_is_error() {
let json: Value = serde_json::from_str(r#"{"mappings": {}}"#).unwrap();
assert!(Mapping::from_json(&json).is_err());
}
#[test]
fn dynamic_mode_round_trip() {
for mode in [DynamicMode::True, DynamicMode::False] {
let parsed = DynamicMode::from_es_value(mode.es_value()).unwrap();
assert_eq!(parsed, mode);
}
}
#[test]
fn empty_schema() {
let mapping = Mapping::builder().build();
assert!(mapping.is_empty());
assert_eq!(mapping.len(), 0);
}
#[test]
fn parse_nested_properties_flattened() {
let json: Value = serde_json::from_str(
r#"{
"properties": {
"title": {"type": "text"},
"offers": {
"type": "nested",
"properties": {
"seller": {"type": "keyword"},
"price": {"type": "float"}
}
}
}
}"#,
)
.unwrap();
let mapping = Mapping::from_json(&json).unwrap();
assert_eq!(mapping.len(), 4);
assert!(mapping.field_id("offers").is_some());
assert!(mapping.field_id("offers.seller").is_some());
assert!(mapping.field_id("offers.price").is_some());
}
#[test]
fn parse_deeply_nested_properties() {
let json: Value = serde_json::from_str(
r#"{
"properties": {
"offers": {
"type": "nested",
"properties": {
"variants": {
"type": "nested",
"properties": {
"color": {"type": "keyword"}
}
}
}
}
}
}"#,
)
.unwrap();
let mapping = Mapping::from_json(&json).unwrap();
assert_eq!(mapping.len(), 3);
assert!(mapping.field_id("offers.variants.color").is_some());
}
#[test]
fn default_dynamic_mode_omitted_in_json() {
let mapping = Mapping::builder().field("x", FieldType::Keyword).build();
let json = mapping.to_json();
assert!(json["mappings"].get("dynamic").is_none());
}
#[test]
fn field_ids_survive_json_roundtrip() {
let schema = Mapping::builder()
.field("title", FieldType::Text)
.field("tag", FieldType::Keyword)
.field("embedding", FieldType::dense_vector(64))
.build();
let json = schema.to_json();
let parsed = Mapping::from_json(&json).unwrap();
assert_eq!(schema.field_id("title"), parsed.field_id("title"));
assert_eq!(schema.field_id("tag"), parsed.field_id("tag"));
assert_eq!(schema.field_id("embedding"), parsed.field_id("embedding"));
}
#[test]
fn parse_dense_vector_default_quantization_is_int8() {
let json: Value =
serde_json::from_str(r#"{"properties": {"emb": {"type": "dense_vector", "dims": 4}}}"#)
.unwrap();
let mapping = Mapping::from_json(&json).unwrap();
let f = mapping.field(mapping.field_id("emb").unwrap());
assert_eq!(f.field_type.vector_dims(), Some(4));
assert_eq!(
f.field_type.vector_quantization(),
Some(QuantizationType::Int8)
);
}
#[test]
fn parse_dense_vector_explicit_int8() {
let json: Value = serde_json::from_str(
r#"{"properties": {"emb": {"type": "dense_vector", "dims": 4, "quantization": "int8"}}}"#,
)
.unwrap();
let mapping = Mapping::from_json(&json).unwrap();
let f = mapping.field(mapping.field_id("emb").unwrap());
assert_eq!(
f.field_type.vector_quantization(),
Some(QuantizationType::Int8)
);
}
#[test]
fn parse_dense_vector_explicit_none() {
let json: Value = serde_json::from_str(
r#"{"properties": {"emb": {"type": "dense_vector", "dims": 4, "quantization": "none"}}}"#,
)
.unwrap();
let mapping = Mapping::from_json(&json).unwrap();
let f = mapping.field(mapping.field_id("emb").unwrap());
assert_eq!(
f.field_type.vector_quantization(),
Some(QuantizationType::None)
);
}
#[test]
fn parse_dense_vector_int4_is_rejected() {
let json: Value = serde_json::from_str(
r#"{"properties": {"emb": {"type": "dense_vector", "dims": 4, "quantization": "int4"}}}"#,
)
.unwrap();
let err = Mapping::from_json(&json).unwrap_err();
let msg = format!("{err}");
assert!(msg.contains("int4"), "error must name the value: {msg}");
assert!(
msg.contains("not yet implemented"),
"error must explain why: {msg}"
);
}
#[test]
fn parse_dense_vector_bbq_is_rejected() {
let json: Value = serde_json::from_str(
r#"{"properties": {"emb": {"type": "dense_vector", "dims": 4, "quantization": "bbq"}}}"#,
)
.unwrap();
let err = Mapping::from_json(&json).unwrap_err();
let msg = format!("{err}");
assert!(msg.contains("bbq"), "error must name the value: {msg}");
assert!(
msg.contains("not yet implemented"),
"error must explain why: {msg}"
);
}
#[test]
fn parse_dense_vector_unknown_quantization_is_rejected() {
let json: Value = serde_json::from_str(
r#"{"properties": {"emb": {"type": "dense_vector", "dims": 4, "quantization": "magic"}}}"#,
)
.unwrap();
let err = Mapping::from_json(&json).unwrap_err();
let msg = format!("{err}");
assert!(msg.contains("magic"), "error must name the value: {msg}");
}
#[test]
fn parse_dense_vector_non_string_quantization_is_rejected() {
let json: Value = serde_json::from_str(
r#"{"properties": {"emb": {"type": "dense_vector", "dims": 4, "quantization": 8}}}"#,
)
.unwrap();
let err = Mapping::from_json(&json).unwrap_err();
let msg = format!("{err}");
assert!(
msg.contains("must be a string"),
"error must explain the type mismatch: {msg}"
);
}
#[test]
fn parse_dense_vector_missing_dims_is_rejected() {
let json: Value =
serde_json::from_str(r#"{"properties": {"emb": {"type": "dense_vector"}}}"#).unwrap();
let err = Mapping::from_json(&json).unwrap_err();
let msg = format!("{err}");
assert!(
msg.contains("dims"),
"error must name the missing option: {msg}"
);
}
#[test]
fn parse_dense_vector_string_dims_is_rejected() {
let json: Value = serde_json::from_str(
r#"{"properties": {"emb": {"type": "dense_vector", "dims": "4"}}}"#,
)
.unwrap();
let err = Mapping::from_json(&json).unwrap_err();
let msg = format!("{err}");
assert!(
msg.contains("dims") && msg.contains("positive integer"),
"error must explain the type mismatch: {msg}"
);
}
#[test]
fn parse_dense_vector_zero_dims_is_rejected() {
let json: Value =
serde_json::from_str(r#"{"properties": {"emb": {"type": "dense_vector", "dims": 0}}}"#)
.unwrap();
let err = Mapping::from_json(&json).unwrap_err();
let msg = format!("{err}");
assert!(msg.contains("dims"), "error must name the option: {msg}");
}
#[test]
fn parse_dense_vector_negative_dims_is_rejected() {
let json: Value = serde_json::from_str(
r#"{"properties": {"emb": {"type": "dense_vector", "dims": -4}}}"#,
)
.unwrap();
let err = Mapping::from_json(&json).unwrap_err();
let msg = format!("{err}");
assert!(
msg.contains("positive integer"),
"error must explain the type mismatch: {msg}"
);
}
#[test]
fn parse_dense_vector_unknown_key_is_rejected() {
let json: Value = serde_json::from_str(
r#"{"properties": {"emb": {"type": "dense_vector", "dims": 4, "dimensions": 8}}}"#,
)
.unwrap();
let err = Mapping::from_json(&json).unwrap_err();
let msg = format!("{err}");
assert!(
msg.contains("unknown") && msg.contains("dimensions"),
"error must name the unknown option: {msg}"
);
}
#[test]
fn parse_dense_vector_similarity_is_rejected() {
let json: Value = serde_json::from_str(
r#"{"properties": {"emb": {"type": "dense_vector", "dims": 4, "similarity": "cosine"}}}"#,
)
.unwrap();
let err = Mapping::from_json(&json).unwrap_err();
let msg = format!("{err}");
assert!(
msg.contains("similarity") && msg.contains("not yet implemented"),
"error must explain why it is rejected: {msg}"
);
}
#[test]
fn mapping_string_bool_index_rejected() {
let json: Value = serde_json::from_str(
r#"{"properties": {"sku": {"type": "keyword", "index": "false"}}}"#,
)
.unwrap();
let msg = format!("{}", Mapping::from_json(&json).unwrap_err());
assert!(
msg.contains("index") && msg.contains("boolean"),
"error must name the option and the expected type: {msg}"
);
}
#[test]
fn mapping_non_bool_doc_values_rejected() {
let json: Value = serde_json::from_str(
r#"{"properties": {"sku": {"type": "keyword", "doc_values": 1}}}"#,
)
.unwrap();
let msg = format!("{}", Mapping::from_json(&json).unwrap_err());
assert!(
msg.contains("doc_values") && msg.contains("boolean"),
"{msg}"
);
}
#[test]
fn mapping_unknown_key_rejected() {
let json: Value = serde_json::from_str(
r#"{"properties": {"body": {"type": "text", "anlyzer": "english"}}}"#,
)
.unwrap();
let msg = format!("{}", Mapping::from_json(&json).unwrap_err());
assert!(
msg.contains("unknown option") && msg.contains("anlyzer"),
"error must name the unknown key: {msg}"
);
}
#[test]
fn mapping_analyzer_on_numeric_rejected() {
let json: Value = serde_json::from_str(
r#"{"properties": {"qty": {"type": "integer", "analyzer": "english"}}}"#,
)
.unwrap();
let msg = format!("{}", Mapping::from_json(&json).unwrap_err());
assert!(
msg.contains("analyzer")
&& msg.contains("not supported for field type")
&& msg.contains("integer"),
"error must explain the per-type rejection: {msg}"
);
}
#[test]
fn mapping_analyzer_on_keyword_rejected() {
let json: Value = serde_json::from_str(
r#"{"properties": {"tag": {"type": "keyword", "analyzer": "english"}}}"#,
)
.unwrap();
let msg = format!("{}", Mapping::from_json(&json).unwrap_err());
assert!(msg.contains("analyzer") && msg.contains("keyword"), "{msg}");
}
#[test]
fn mapping_unimplemented_es_param_rejected() {
let json: Value = serde_json::from_str(
r#"{"properties": {"tag": {"type": "keyword", "ignore_above": 256}}}"#,
)
.unwrap();
let msg = format!("{}", Mapping::from_json(&json).unwrap_err());
assert!(
msg.contains("ignore_above") && msg.contains("not yet supported"),
"error must explain it is a deferred feature: {msg}"
);
}
#[test]
fn mapping_copy_to_non_string_element_rejected() {
let json: Value = serde_json::from_str(
r#"{"properties": {"a": {"type": "keyword", "copy_to": ["ok", 7]}, "ok": {"type": "text"}}}"#,
)
.unwrap();
let msg = format!("{}", Mapping::from_json(&json).unwrap_err());
assert!(msg.contains("copy_to") && msg.contains("strings"), "{msg}");
}
#[test]
fn mapping_copy_to_wrong_shape_rejected() {
let json: Value =
serde_json::from_str(r#"{"properties": {"a": {"type": "keyword", "copy_to": 42}}}"#)
.unwrap();
let msg = format!("{}", Mapping::from_json(&json).unwrap_err());
assert!(msg.contains("copy_to"), "{msg}");
}
#[test]
fn mapping_subfield_unknown_key_rejected() {
let json: Value = serde_json::from_str(
r#"{"properties": {"title": {"type": "text", "fields": {"raw": {"type": "keyword", "indx": "x"}}}}}"#,
)
.unwrap();
let msg = format!("{}", Mapping::from_json(&json).unwrap_err());
assert!(
msg.contains("title.raw") && msg.contains("indx"),
"sub-field strictness: {msg}"
);
}
#[test]
fn mapping_nested_child_string_bool_rejected() {
let json: Value = serde_json::from_str(
r#"{"properties": {"items": {"type": "nested", "properties": {"qty": {"type": "integer", "index": "no"}}}}}"#,
)
.unwrap();
let msg = format!("{}", Mapping::from_json(&json).unwrap_err());
assert!(
msg.contains("items.qty") && msg.contains("index") && msg.contains("boolean"),
"nested strictness: {msg}"
);
}
#[test]
fn mapping_builder_analyzer_on_long_rejected() {
let err = Mapping::builder()
.field_with_mapping(FieldMapping::new("n", FieldType::Long).analyzer("english"))
.build()
.validate()
.unwrap_err();
let msg = format!("{err}");
assert!(
msg.contains("analyzer")
&& msg.contains("not supported for field type")
&& msg.contains("long"),
"builder-path per-type check: {msg}"
);
}
#[test]
fn mapping_strict_happy_paths_parse() {
let json: Value = serde_json::from_str(
r#"{"properties": {
"title": {"type": "text", "analyzer": "english", "search_analyzer": "standard", "norms": false, "store": true},
"tag": {"type": "keyword", "doc_values": true, "index": false},
"body": {"type": "text", "copy_to": ["title"]}
}}"#,
)
.unwrap();
let m = Mapping::from_json(&json).unwrap();
let title = m.field(m.field_id("title").unwrap());
assert_eq!(title.analyzer.as_deref(), Some("english"));
assert_eq!(title.search_analyzer.as_deref(), Some("standard"));
assert!(!title.norms);
assert!(title.stored);
assert!(!m.field(m.field_id("tag").unwrap()).indexed);
assert_eq!(
m.field(m.field_id("body").unwrap()).copy_to,
vec!["title".to_string()]
);
}
#[test]
fn parse_dense_vector_element_type_is_rejected() {
let json: Value = serde_json::from_str(
r#"{"properties": {"emb": {"type": "dense_vector", "dims": 4, "element_type": "byte"}}}"#,
)
.unwrap();
let err = Mapping::from_json(&json).unwrap_err();
let msg = format!("{err}");
assert!(
msg.contains("element_type") && msg.contains("not yet implemented"),
"error must explain why it is rejected: {msg}"
);
}
#[test]
fn parse_nested_dense_vector_unknown_key_is_rejected() {
let json: Value = serde_json::from_str(
r#"{
"properties": {
"doc": {
"type": "nested",
"properties": {
"emb": {"type": "dense_vector", "dims": 4, "similarity": "cosine"}
}
}
}
}"#,
)
.unwrap();
let err = Mapping::from_json(&json).unwrap_err();
let msg = format!("{err}");
assert!(
msg.contains("similarity") && msg.contains("not yet implemented"),
"nested dense_vector must reject unwired options too: {msg}"
);
}
#[test]
fn dense_vector_dims_round_trip_through_json() {
let mapping = Mapping::builder()
.field("emb", FieldType::dense_vector(768))
.build();
let json = mapping.to_json();
let parsed = Mapping::from_json(&json).unwrap();
let f = parsed.field(parsed.field_id("emb").unwrap());
assert_eq!(f.field_type.vector_dims(), Some(768));
}
#[test]
fn dense_vector_explicit_quantization_round_trips() {
let mapping = Mapping::builder()
.field(
"emb",
FieldType::DenseVector {
dims: 4,
quantization: QuantizationType::None,
},
)
.build();
let json = mapping.to_json();
let parsed = Mapping::from_json(&json).unwrap();
let f = parsed.field(parsed.field_id("emb").unwrap());
assert_eq!(
f.field_type.vector_quantization(),
Some(QuantizationType::None)
);
}
#[test]
fn copy_to_existing_target_is_accepted() {
let json: Value = serde_json::from_str(
r#"{
"properties": {
"title": {"type": "text", "copy_to": "all_text"},
"all_text": {"type": "text"}
}
}"#,
)
.unwrap();
let mapping = Mapping::from_json(&json).unwrap();
let title = mapping.field(mapping.field_id("title").unwrap());
assert_eq!(title.copy_to, vec!["all_text".to_string()]);
}
#[test]
fn copy_to_missing_target_is_rejected_at_parse() {
let json: Value = serde_json::from_str(
r#"{
"properties": {
"title": {"type": "text", "copy_to": "all_text"}
}
}"#,
)
.unwrap();
let err = Mapping::from_json(&json).unwrap_err();
let msg = format!("{err}");
assert!(
msg.contains("title"),
"error must name the source field: {msg}"
);
assert!(
msg.contains("all_text"),
"error must name the missing target: {msg}"
);
}
#[test]
fn copy_to_missing_target_in_array_is_rejected() {
let json: Value = serde_json::from_str(
r#"{
"properties": {
"title": {"type": "text", "copy_to": ["existing", "missing"]},
"existing": {"type": "text"}
}
}"#,
)
.unwrap();
let err = Mapping::from_json(&json).unwrap_err();
let msg = format!("{err}");
assert!(
msg.contains("missing"),
"error must name the missing target: {msg}"
);
}
#[test]
fn validate_catches_builder_api_copy_to_with_missing_target() {
let mut source = FieldMapping::new("source", FieldType::Text);
source.copy_to = vec!["nope".to_string()];
let mapping = Mapping::builder().field_with_mapping(source).build();
let err = mapping.validate().unwrap_err();
let msg = format!("{err}");
assert!(msg.contains("nope"), "error must name the target: {msg}");
}
#[test]
fn parse_nested_dense_vector_int4_is_rejected() {
let json: Value = serde_json::from_str(
r#"{
"properties": {
"outer": {
"type": "nested",
"properties": {
"emb": {"type": "dense_vector", "dims": 4, "quantization": "int4"}
}
}
}
}"#,
)
.unwrap();
let err = Mapping::from_json(&json).unwrap_err();
let msg = format!("{err}");
assert!(msg.contains("int4"), "error must name the value: {msg}");
}
}