use crate::ingest::{
IngestClassification, IngestDType, IngestEntity, IngestEntityAcquired, IngestEntityList,
IngestEntityProperty, IngestJsonNameKeyedStructure, IngestJsonStructure,
IngestJsonStructureList, IngestNamedStructure, IngestRelation, IngestSchema,
IngestStructureProperties, IngestStructureProperty, IngestValidator, IngestValidatorMode,
};
use crate::json_schema::JSONSchemaIngestSchema;
use serde::Serialize;
use std::collections::{BTreeMap, BTreeSet};
use std::convert::TryFrom;
use std::fmt;
pub type Description = String;
pub type Regex = String;
pub type Threshold = f64;
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Default)]
pub struct ExpandedName(String);
impl ExpandedName {
pub fn new(s: String) -> Self {
Self(s)
}
pub fn as_str(&self) -> &str {
&self.0
}
pub fn into_inner(self) -> String {
self.0
}
}
impl fmt::Display for ExpandedName {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.0.fmt(f)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
pub enum DType {
String,
Int,
Float,
Bool,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
pub enum ValidatorMode {
Partial,
Full,
}
#[derive(Debug, Clone, PartialEq, Serialize)]
pub struct Validator {
pub pattern: Regex,
pub mode: Option<ValidatorMode>,
pub exclude: bool,
}
#[derive(Debug, Clone, PartialEq, Serialize, Default)]
pub struct EntitySpec {
pub name: ExpandedName,
pub dtype: Option<DType>,
pub validator: Option<Validator>,
pub threshold: Option<Threshold>,
pub description: Option<Description>,
}
#[derive(Debug, Clone, PartialEq, Serialize)]
pub struct StructureProperty {
pub choices: Vec<EntitySpec>,
pub description: Option<Description>,
pub value: Option<String>,
pub dtype: Option<DType>,
pub validator: Option<Validator>,
pub threshold: Option<Threshold>,
}
#[derive(Debug, Clone, PartialEq, Serialize)]
pub struct NamedStructure {
pub name: ExpandedName,
pub props: BTreeMap<ExpandedName, StructureProperty>,
}
#[derive(Debug, Clone, PartialEq, Serialize)]
pub enum JsonStructure {
NamedStructure(NamedStructure),
NameKeyedStructure {
name: ExpandedName,
props: BTreeMap<ExpandedName, StructureProperty>,
},
EntityList(Vec<EntitySpec>),
}
#[derive(Debug, Clone, PartialEq, Serialize)]
pub struct Classification {
pub task: EntitySpec,
pub labels: Vec<EntitySpec>,
pub threshold: Option<Threshold>,
pub multi_label: bool,
pub label_descriptions: BTreeMap<ExpandedName, EntitySpec>,
}
#[derive(Debug, Clone, PartialEq, Serialize)]
pub enum RelationAcquired {
Empty,
Entity {
head: Box<EntitySpec>,
tail: Box<EntitySpec>,
},
}
#[derive(Debug, Clone, PartialEq, Serialize)]
pub struct Relation {
pub name: ExpandedName,
pub description: Option<Description>,
pub acquired: Option<RelationAcquired>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Default)]
pub struct NormalizedSchema {
pub entities: Vec<EntitySpec>,
pub json_structures: Vec<JsonStructure>,
pub classifications: Vec<Classification>,
pub relations: Vec<Relation>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Path(String);
impl Path {
pub fn root() -> Self {
Self("$".to_string())
}
pub fn field(&self, name: &str) -> Self {
Self(format!("{}.{}", self.0, name))
}
pub fn index(&self, idx: usize) -> Self {
Self(format!("{}[{idx}]", self.0))
}
pub fn key(&self, key: &str) -> Self {
Self(format!("{}[{key:?}]", self.0))
}
pub fn as_str(&self) -> &str {
&self.0
}
}
#[derive(Debug, thiserror::Error)]
pub enum SchemaNormalizeError {
#[error("invalid name at {path}: {value:?}")]
InvalidName { path: String, value: String },
#[error("expected exactly one key at {path}, found {found}")]
ExpectedSingleKey { path: String, found: usize },
#[error("invalid colon-delimited entity at {path}: {raw:?}")]
InvalidColonDelimitedEntity { path: String, raw: String },
#[error("ambiguous colon-delimited entity at {path}: {raw:?}")]
AmbiguousColonDelimitedEntity { path: String, raw: String },
#[error(
"conflicting threshold aliases at {path}: threshold={threshold:?}, cls_threshold={cls_threshold:?}"
)]
ConflictingThresholdAliases {
path: String,
threshold: Option<f64>,
cls_threshold: Option<f64>,
},
#[error("duplicate normalized name at {path}: {name}")]
DuplicateNormalizedName { path: String, name: String },
#[error("invalid empty-acquired relation at {path}")]
InvalidEmptyAcquiredRelation { path: String },
#[error("invalid structure properties at {path}")]
InvalidStructureProperties { path: String },
#[error("invalid dtype at {path}: {value:?}")]
InvalidDType { path: String, value: String },
#[error("nested decode failed at {path}: {message}")]
NestedDecode { path: String, message: String },
}
fn normalize_name(raw: &str, path: &Path) -> Result<ExpandedName, SchemaNormalizeError> {
let trimmed = raw.trim();
if trimmed.is_empty() {
return Err(SchemaNormalizeError::InvalidName {
path: path.as_str().to_string(),
value: raw.to_string(),
});
}
let mut out = String::new();
let mut prev_us = false;
for ch in trimmed.chars() {
match ch {
'a'..='z' | '0'..='9' => {
out.push(ch);
prev_us = false;
}
'A'..='Z' => {
out.push(ch.to_ascii_lowercase());
prev_us = false;
}
'_' | '-' | ' ' => {
if !prev_us && !out.is_empty() {
out.push('_');
prev_us = true;
}
}
_ => {
return Err(SchemaNormalizeError::InvalidName {
path: path.as_str().to_string(),
value: raw.to_string(),
});
}
}
}
while out.ends_with('_') {
out.pop();
}
if out.is_empty() {
return Err(SchemaNormalizeError::InvalidName {
path: path.as_str().to_string(),
value: raw.to_string(),
});
}
Ok(ExpandedName::new(out))
}
fn dtype_1_to_2(v: IngestDType, path: &Path) -> Result<DType, SchemaNormalizeError> {
match v {
IngestDType::String(s) => {
let Some(dt) = looks_like_dtype(&s) else {
return Err(SchemaNormalizeError::InvalidDType {
path: path.as_str().to_string(),
value: s,
});
};
Ok(dt)
}
IngestDType::Int(_) => Ok(DType::Int),
IngestDType::Float(_) => Ok(DType::Float),
IngestDType::Bool(_) => Ok(DType::Bool),
}
}
fn validator_1_to_2(v: IngestValidator) -> Validator {
match v {
IngestValidator::Regex(pattern) => Validator {
pattern,
mode: None,
exclude: false,
},
IngestValidator::Dict(d) => Validator {
pattern: d.pattern,
mode: d.mode.map(|m| match m {
IngestValidatorMode::Partial => ValidatorMode::Partial,
IngestValidatorMode::Full => ValidatorMode::Full,
}),
exclude: d.exclude.unwrap_or(false),
},
}
}
fn looks_like_dtype(s: &str) -> Option<DType> {
let x = s.trim().to_ascii_lowercase();
match x.as_str() {
"str" | "string" => Some(DType::String),
"int" | "integer" => Some(DType::Int),
"float" => Some(DType::Float),
"bool" | "boolean" => Some(DType::Bool),
_ => None,
}
}
type ParsedColonDelimitedEntity = (ExpandedName, Option<DType>, Option<f64>, Option<String>);
fn parse_colon_delimited_entity(
raw: &str,
path: &Path,
) -> Result<ParsedColonDelimitedEntity, SchemaNormalizeError> {
let parts: Vec<&str> = raw.split("::").map(str::trim).collect();
if parts.is_empty() {
return Err(SchemaNormalizeError::InvalidColonDelimitedEntity {
path: path.as_str().to_string(),
raw: raw.to_string(),
});
}
let name = normalize_name(parts[0], &path.field("name"))?;
let mut dtype = None;
let mut threshold = None;
let mut description = None;
for seg in parts.into_iter().skip(1) {
if seg.is_empty() {
continue;
}
if threshold.is_none()
&& let Ok(f) = seg.parse::<f64>()
{
threshold = Some(f);
continue;
}
if dtype.is_none()
&& let Some(dt) = looks_like_dtype(seg)
{
dtype = Some(dt);
continue;
}
if description.is_none() {
description = Some(seg.to_string());
continue;
}
return Err(SchemaNormalizeError::AmbiguousColonDelimitedEntity {
path: path.as_str().to_string(),
raw: raw.to_string(),
});
}
Ok((name, dtype, threshold, description))
}
fn entity_property_to_spec(
name_hint: Option<&str>,
ep: IngestEntityProperty,
path: &Path,
) -> Result<EntitySpec, SchemaNormalizeError> {
match ep {
IngestEntityProperty::Description(desc) => {
let Some(name_hint) = name_hint else {
return Err(SchemaNormalizeError::InvalidStructureProperties {
path: path.as_str().to_string(),
});
};
Ok(EntitySpec {
name: normalize_name(name_hint, &path.field("name"))?,
dtype: None,
validator: None,
threshold: None,
description: Some(desc),
})
}
IngestEntityProperty::Dict(d) => {
let raw_name = if d.name.trim().is_empty() {
name_hint.unwrap_or_default()
} else {
d.name.as_str()
};
let dtype = match d.dtype {
Some(dt) => Some(dtype_1_to_2(dt, &path.field("dtype"))?),
None => None,
};
Ok(EntitySpec {
name: normalize_name(raw_name, &path.field("name"))?,
dtype,
validator: d.validator.map(validator_1_to_2),
threshold: d.threshold,
description: d.description,
})
}
}
}
fn single_entry_map<K, V>(
map: BTreeMap<K, V>,
path: &Path,
) -> Result<(K, V), SchemaNormalizeError> {
if map.len() != 1 {
return Err(SchemaNormalizeError::ExpectedSingleKey {
path: path.as_str().to_string(),
found: map.len(),
});
}
Ok(map.into_iter().next().unwrap())
}
fn entity_1_to_spec(entity: IngestEntity, path: &Path) -> Result<EntitySpec, SchemaNormalizeError> {
match entity {
IngestEntity::Stringish(s) => {
if s.contains("::") {
let (name, dtype, threshold, description) = parse_colon_delimited_entity(&s, path)?;
Ok(EntitySpec {
name,
dtype,
validator: None,
threshold,
description,
})
} else {
Ok(EntitySpec {
name: normalize_name(&s, &path.field("name"))?,
dtype: None,
validator: None,
threshold: None,
description: None,
})
}
}
IngestEntity::SingleEntityDict(map) => {
let (k, v) = single_entry_map(map, path)?;
entity_property_to_spec(Some(&k), v, path)
}
}
}
fn entity_list_1_to_vec(
list: IngestEntityList,
path: &Path,
) -> Result<Vec<EntitySpec>, SchemaNormalizeError> {
match list {
IngestEntityList::List(items) => items
.into_iter()
.enumerate()
.map(|(i, item)| entity_1_to_spec(item, &path.index(i)))
.collect(),
IngestEntityList::Dict(map) => map
.into_iter()
.map(|(k, v)| entity_property_to_spec(Some(&k), v, &path.key(&k)))
.collect(),
}
}
fn structure_property_1_to_2(
v: IngestStructureProperty,
path: &Path,
) -> Result<StructureProperty, SchemaNormalizeError> {
let dtype = match v.dtype {
Some(dt) => Some(dtype_1_to_2(dt, &path.field("dtype"))?),
None => None,
};
Ok(StructureProperty {
choices: match v.choices {
Some(choices) => entity_list_1_to_vec(choices, &path.field("choices"))?,
None => Vec::new(),
},
description: v.description,
value: v.value,
dtype,
validator: v.validator.map(validator_1_to_2),
threshold: v.threshold,
})
}
fn ensure_unique_keys<T>(
map: &BTreeMap<ExpandedName, T>,
path: &Path,
) -> Result<(), SchemaNormalizeError> {
let mut seen = BTreeSet::new();
for k in map.keys() {
if !seen.insert(k.clone()) {
return Err(SchemaNormalizeError::DuplicateNormalizedName {
path: path.as_str().to_string(),
name: k.to_string(),
});
}
}
Ok(())
}
fn insert_unique<T>(
map: &mut BTreeMap<ExpandedName, T>,
key: ExpandedName,
value: T,
path: &Path,
) -> Result<(), SchemaNormalizeError> {
if map.insert(key.clone(), value).is_some() {
return Err(SchemaNormalizeError::DuplicateNormalizedName {
path: path.as_str().to_string(),
name: key.to_string(),
});
}
Ok(())
}
fn structure_properties_1_to_map(
v: IngestStructureProperties,
path: &Path,
) -> Result<BTreeMap<ExpandedName, StructureProperty>, SchemaNormalizeError> {
let out = match v {
IngestStructureProperties::EntityDict(map) => {
let mut out = BTreeMap::new();
for (k, v) in map {
let key = normalize_name(&k, &path.key(&k))?;
let spec = entity_property_to_spec(Some(&k), v, &path.key(&k))?;
insert_unique(
&mut out,
key,
StructureProperty {
choices: Vec::new(),
description: spec.description,
value: None,
dtype: spec.dtype,
validator: spec.validator,
threshold: spec.threshold,
},
path,
)?;
}
out
}
IngestStructureProperties::EntityList(list) => {
let specs = entity_list_1_to_vec(list, path)?;
let mut out = BTreeMap::new();
for spec in specs {
insert_unique(
&mut out,
spec.name.clone(),
StructureProperty {
choices: Vec::new(),
description: spec.description,
value: None,
dtype: spec.dtype,
validator: spec.validator,
threshold: spec.threshold,
},
path,
)?;
}
out
}
IngestStructureProperties::StructurePropertiesDict(map) => {
let mut out = BTreeMap::new();
for (k, v) in map {
let key = normalize_name(&k, &path.key(&k))?;
let value = structure_property_1_to_2(v, &path.key(&k))?;
insert_unique(&mut out, key, value, path)?;
}
out
}
};
ensure_unique_keys(&out, path)?;
Ok(out)
}
fn try_parse_structure_properties_value(
v: serde_json::Value,
path: &Path,
) -> Result<BTreeMap<ExpandedName, StructureProperty>, SchemaNormalizeError> {
let parsed: IngestStructureProperties =
serde_json::from_value(v).map_err(|e| SchemaNormalizeError::NestedDecode {
path: path.as_str().to_string(),
message: e.to_string(),
})?;
structure_properties_1_to_map(parsed, path)
}
fn named_structure_1_to_2(
ns: IngestNamedStructure,
path: &Path,
) -> Result<NamedStructure, SchemaNormalizeError> {
let name = normalize_name(&ns.name, &path.field("name"))?;
let mut props = BTreeMap::new();
for (k, v) in ns.props {
let key = normalize_name(&k, &path.key(&k))?;
let value = structure_property_1_to_2(v, &path.key(&k))?;
insert_unique(&mut props, key, value, &path.field("props"))?;
}
ensure_unique_keys(&props, &path.field("props"))?;
Ok(NamedStructure { name, props })
}
fn json_structure_1_to_2(
v: IngestJsonStructure,
path: &Path,
) -> Result<Vec<JsonStructure>, SchemaNormalizeError> {
match v {
IngestJsonStructure::NamedStructure(ns) => Ok(vec![JsonStructure::NamedStructure(
named_structure_1_to_2(ns, path)?,
)]),
IngestJsonStructure::EntityList(list) => Ok(vec![JsonStructure::EntityList(
entity_list_1_to_vec(list, path)?,
)]),
IngestJsonStructure::JsonNameKeyedStructure(IngestJsonNameKeyedStructure(map)) => {
let mut out = Vec::new();
for (name_raw, value) in map {
let name = normalize_name(&name_raw, &path.key(&name_raw))?;
let props = try_parse_structure_properties_value(value, &path.key(&name_raw))?;
out.push(JsonStructure::NameKeyedStructure { name, props });
}
Ok(out)
}
}
}
fn classification_1_to_2(
v: IngestClassification,
path: &Path,
) -> Result<Classification, SchemaNormalizeError> {
let threshold = match (v.threshold, v.cls_threshold) {
(Some(a), Some(b)) if (a - b).abs() > f64::EPSILON => {
return Err(SchemaNormalizeError::ConflictingThresholdAliases {
path: path.as_str().to_string(),
threshold: Some(a),
cls_threshold: Some(b),
});
}
(Some(a), _) => Some(a),
(_, Some(b)) => Some(b),
_ => None,
};
let task = entity_1_to_spec(v.task, &path.field("task"))?;
let labels = entity_list_1_to_vec(v.labels, &path.field("labels"))?;
let mut label_descriptions = BTreeMap::new();
if let Some(map) = v.label_descriptions {
for (k, ep) in map {
let key = normalize_name(&k, &path.field("label_descriptions").key(&k))?;
let spec =
entity_property_to_spec(Some(&k), ep, &path.field("label_descriptions").key(&k))?;
label_descriptions.insert(key, spec);
}
}
Ok(Classification {
task,
labels,
threshold,
multi_label: v.multi_label.unwrap_or(false),
label_descriptions,
})
}
fn entity_acquired_to_2(
v: IngestEntityAcquired,
path: &Path,
) -> Result<RelationAcquired, SchemaNormalizeError> {
match (v.head, v.tail) {
(IngestEntity::Stringish(s1), IngestEntity::Stringish(s2))
if s1.is_empty() && s2.is_empty() =>
{
Ok(RelationAcquired::Empty)
}
(h, t) => Ok(RelationAcquired::Entity {
head: Box::new(entity_1_to_spec(h, &path.field("head"))?),
tail: Box::new(entity_1_to_spec(t, &path.field("tail"))?),
}),
}
}
fn relation_1_to_2(v: IngestRelation, path: &Path) -> Result<Relation, SchemaNormalizeError> {
match v {
IngestRelation::Name(name) => Ok(Relation {
name: normalize_name(&name, &path.field("name"))?,
description: None,
acquired: None,
}),
IngestRelation::NameDescription(map) => {
let (k, desc) = single_entry_map(map, path)?;
Ok(Relation {
name: normalize_name(&k, &path.key(&k))?,
description: Some(desc),
acquired: None,
})
}
IngestRelation::RelationEntityAcquired(map) => {
let (k, acq) = single_entry_map(map, path)?;
Ok(Relation {
name: normalize_name(&k, &path.key(&k))?,
description: None,
acquired: Some(entity_acquired_to_2(acq, &path.key(&k))?),
})
}
}
}
impl TryFrom<IngestSchema> for NormalizedSchema {
type Error = SchemaNormalizeError;
fn try_from(v: IngestSchema) -> Result<Self, Self::Error> {
let root = Path::root();
let entities = match v.entities {
Some(x) => entity_list_1_to_vec(x, &root.field("entities"))?,
None => Vec::new(),
};
let mut json_structures = Vec::new();
match v.json_structures {
None => {}
Some(IngestJsonStructureList::Single(one)) => {
json_structures.extend(json_structure_1_to_2(one, &root.field("json_structures"))?);
}
Some(IngestJsonStructureList::List(list)) => {
for (i, item) in list.into_iter().enumerate() {
json_structures.extend(json_structure_1_to_2(
item,
&root.field("json_structures").index(i),
)?);
}
}
}
let classifications = match v.classifications {
Some(list) => list
.into_iter()
.enumerate()
.map(|(i, cls)| classification_1_to_2(cls, &root.field("classifications").index(i)))
.collect::<Result<Vec<_>, _>>()?,
None => Vec::new(),
};
let relations = match v.relations {
Some(list) => list
.into_iter()
.enumerate()
.map(|(i, rel)| relation_1_to_2(rel, &root.field("relations").index(i)))
.collect::<Result<Vec<_>, _>>()?,
None => Vec::new(),
};
Ok(Self {
entities,
json_structures,
classifications,
relations,
})
}
}
fn ingest_error_is_unknown_field(err: &serde_json::Error) -> bool {
err.to_string().contains("unknown field")
}
fn unknown_field_name_in_ingest_error(err: &serde_json::Error) -> Option<String> {
let s = err.to_string();
let prefix = "unknown field `";
let start = s.find(prefix)? + prefix.len();
let rest = &s[start..];
let end = rest.find('`')?;
Some(rest[..end].to_string())
}
fn ingest_error_suggests_json_schema_root(err: &serde_json::Error) -> bool {
let Some(name) = unknown_field_name_in_ingest_error(err) else {
return false;
};
if name.starts_with('$') {
return true;
}
matches!(
name.as_str(),
"type"
| "properties"
| "required"
| "items"
| "title"
| "description"
| "definitions"
| "additionalProperties"
| "patternProperties"
| "allOf"
| "anyOf"
| "oneOf"
| "not"
)
}
impl NormalizedSchema {
pub fn from_json_bytes(bytes: &[u8]) -> Result<Self, SchemaLoadError> {
match IngestSchema::from_json_slice(bytes) {
Ok(phase1) => Ok(Self::try_from(phase1)?),
Err(json_err) => {
if ingest_error_is_unknown_field(&json_err)
&& !ingest_error_suggests_json_schema_root(&json_err)
{
return Err(SchemaLoadError::Json(json_err));
}
let schema = JSONSchemaIngestSchema::from_json_utf8(bytes)
.map_err(|_| SchemaLoadError::Json(json_err))?;
let ingest = IngestSchema::try_from(schema).map_err(|e| {
SchemaLoadError::Normalize(SchemaNormalizeError::NestedDecode {
path: "$".to_string(),
message: e.to_string(),
})
})?;
Ok(Self::try_from(ingest)?)
}
}
}
pub fn from_json_str(s: &str) -> Result<Self, SchemaLoadError> {
Self::from_json_bytes(s.as_bytes())
}
}
#[derive(Debug, thiserror::Error)]
pub enum SchemaLoadError {
#[error("json parse error: {0}")]
Json(#[from] serde_json::Error),
#[error("schema normalization error: {0}")]
Normalize(#[from] SchemaNormalizeError),
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn normalized_parses_colon_delimited_entity() {
let s = r#"{ "entities": ["Gene::str::0.95::gene symbol"] }"#;
let schema = NormalizedSchema::from_json_str(s).unwrap();
assert_eq!(schema.entities.len(), 1);
let e = &schema.entities[0];
assert_eq!(e.name.as_str(), "gene");
assert_eq!(e.threshold, Some(0.95));
assert_eq!(e.description.as_deref(), Some("gene symbol"));
assert_eq!(e.dtype, Some(DType::String));
}
#[test]
fn normalized_normalizes_hyphenated_and_camelish_names() {
let s = r#"{ "entities": ["Gene-Name", "Other_Name"] }"#;
let schema = NormalizedSchema::from_json_str(s).unwrap();
assert_eq!(schema.entities[0].name.as_str(), "gene_name");
assert_eq!(schema.entities[1].name.as_str(), "other_name");
}
#[test]
fn normalized_rejects_conflicting_threshold_aliases() {
let s = r#"
{
"classifications": [
{
"task": "sentiment",
"labels": ["positive", "negative"],
"threshold": 0.4,
"cls_threshold": 0.7
}
]
}
"#;
let err = NormalizedSchema::from_json_str(s).unwrap_err();
match err {
SchemaLoadError::Normalize(SchemaNormalizeError::ConflictingThresholdAliases {
..
}) => {}
other => panic!("unexpected error: {other:?}"),
}
}
#[test]
fn normalized_parses_json_schema_root_string() {
let raw = r#"
{
"$id": "BusinessRecord",
"type": "object",
"required": ["business_name"],
"properties": {
"business_name": { "type": "string" },
"status": { "type": "string" }
}
}
"#;
let schema = NormalizedSchema::from_json_str(raw).unwrap();
assert_eq!(schema.json_structures.len(), 1);
let JsonStructure::NameKeyedStructure { name, props } = &schema.json_structures[0] else {
panic!("expected name-keyed structure");
};
assert_eq!(name.as_str(), "businessrecord");
assert!(props.contains_key(&ExpandedName::new("business_name".into())));
assert!(props.contains_key(&ExpandedName::new("status".into())));
}
#[test]
fn normalized_rejects_unknown_top_level_ie_key() {
let s = r#"{ "entities": ["gene"], "extra_root": 1 }"#;
let err = NormalizedSchema::from_json_str(s).expect_err("unknown IE key");
match err {
SchemaLoadError::Json(_) => {}
other => panic!("unexpected error: {other:?}"),
}
}
#[test]
fn normalized_rejects_multi_key_single_entity_dict() {
let s = r#"
{
"entities": [
{
"gene": "desc",
"protein": "desc2"
}
]
}
"#;
let err = NormalizedSchema::from_json_str(s).unwrap_err();
match err {
SchemaLoadError::Normalize(SchemaNormalizeError::ExpectedSingleKey { .. }) => {}
other => panic!("unexpected error: {other:?}"),
}
}
#[test]
fn normalized_named_structure_normalizes_property_keys() {
let s = r#"
{
"json_structures": [
{
"name": "Patient Record",
"Field-A": {
"description": "field a",
"dtype": "str"
},
"Field B": {
"choices": ["x", "y::str::0.5::label"]
}
}
]
}
"#;
let schema = NormalizedSchema::from_json_str(s).unwrap();
let JsonStructure::NamedStructure(ns) = &schema.json_structures[0] else {
panic!("expected named structure");
};
assert_eq!(ns.name.as_str(), "patient_record");
assert!(ns.props.keys().any(|k| k.as_str() == "field_a"));
assert!(ns.props.keys().any(|k| k.as_str() == "field_b"));
}
#[test]
fn normalized_rejects_duplicate_normalized_structure_property_names() {
let s = r#"
{
"json_structures": [
{
"name": "Patient Record",
"Field-A": { "dtype": "str" },
"Field A": { "dtype": "str" }
}
]
}
"#;
let err = NormalizedSchema::from_json_str(s).unwrap_err();
match err {
SchemaLoadError::Normalize(SchemaNormalizeError::DuplicateNormalizedName {
name,
..
}) => assert_eq!(name, "field_a"),
other => panic!("unexpected error: {other:?}"),
}
}
}