use crate::normalized::{
Classification, DType, EntitySpec, ExpandedName, JsonStructure, NamedStructure,
NormalizedSchema, Relation, RelationAcquired, StructureProperty, Validator,
};
use serde::Serialize;
use std::collections::BTreeMap;
use std::convert::TryFrom;
pub type Description = String;
pub type Regex = String;
pub type Threshold = f64;
#[derive(Debug, Clone, PartialEq, Serialize)]
pub struct ExpandedEntity {
pub name: ExpandedName,
pub dtype: Option<DType>,
pub validator: Option<Validator>,
pub threshold: Option<Threshold>,
pub description: Option<Description>,
}
#[derive(Debug, Clone, PartialEq, Serialize)]
pub struct ExpandedStructureProperty {
pub choices: Vec<ExpandedEntity>,
pub description: Option<Description>,
pub value: Option<String>,
pub dtype: Option<DType>,
pub validator: Option<Validator>,
pub threshold: Option<Threshold>,
}
#[derive(Debug, Clone, PartialEq, Serialize)]
pub struct ExpandedJsonStructure {
pub name: ExpandedName,
pub props: BTreeMap<ExpandedName, ExpandedStructureProperty>,
}
#[derive(Debug, Clone, PartialEq, Serialize)]
pub struct ExpandedClassification {
pub task: ExpandedEntity,
pub labels: Vec<ExpandedEntity>,
pub threshold: Option<Threshold>,
pub multi_label: bool,
pub label_descriptions: BTreeMap<ExpandedName, ExpandedEntity>,
}
#[derive(Debug, Clone, PartialEq, Serialize)]
pub enum ExpandedRelation {
EmptyAcquired {
name: ExpandedName,
description: Option<Description>,
},
EntityAcquired {
name: ExpandedName,
description: Option<Description>,
head: Box<ExpandedEntity>,
tail: Box<ExpandedEntity>,
},
}
#[derive(Debug, Clone, PartialEq, Serialize, Default)]
pub struct ExpandedSchema {
pub entities: Vec<ExpandedEntity>,
pub json_structures: Vec<ExpandedJsonStructure>,
pub relations: Vec<ExpandedRelation>,
pub classifications: Vec<ExpandedClassification>,
}
#[derive(Debug, thiserror::Error)]
pub enum SchemaExpandError {
#[error("relation without acquired form cannot be expanded at index {index}: {name}")]
RelationWithoutAcquired { index: usize, name: String },
}
fn entity2_to_3(v: EntitySpec) -> ExpandedEntity {
ExpandedEntity {
name: v.name,
dtype: v.dtype,
validator: v.validator,
threshold: v.threshold,
description: v.description,
}
}
fn entity_spec_to_structure_property(spec: EntitySpec) -> ExpandedStructureProperty {
ExpandedStructureProperty {
choices: Vec::new(),
description: spec.description,
value: None,
dtype: spec.dtype,
validator: spec.validator,
threshold: spec.threshold,
}
}
fn structure_property2_to_3(v: StructureProperty) -> ExpandedStructureProperty {
ExpandedStructureProperty {
choices: v.choices.into_iter().map(entity2_to_3).collect(),
description: v.description,
value: v.value,
dtype: v.dtype,
validator: v.validator,
threshold: v.threshold,
}
}
fn json_structure2_to_3(
v: JsonStructure,
index: usize,
) -> Result<ExpandedJsonStructure, SchemaExpandError> {
match v {
JsonStructure::NamedStructure(NamedStructure { name, props }) => {
Ok(ExpandedJsonStructure {
name,
props: props
.into_iter()
.map(|(k, v)| (k, structure_property2_to_3(v)))
.collect(),
})
}
JsonStructure::NameKeyedStructure { name, props } => Ok(ExpandedJsonStructure {
name,
props: props
.into_iter()
.map(|(k, v)| (k, structure_property2_to_3(v)))
.collect(),
}),
JsonStructure::EntityList(list) => {
let props = list
.into_iter()
.map(|spec| {
let key = spec.name.clone();
(key, entity_spec_to_structure_property(spec))
})
.collect();
Ok(ExpandedJsonStructure {
name: ExpandedName::new(format!("unnamed_{index}")),
props,
})
}
}
}
fn classification2_to_3(v: Classification) -> ExpandedClassification {
ExpandedClassification {
task: entity2_to_3(v.task),
labels: v.labels.into_iter().map(entity2_to_3).collect(),
threshold: v.threshold,
multi_label: v.multi_label,
label_descriptions: v
.label_descriptions
.into_iter()
.map(|(k, v)| (k, entity2_to_3(v)))
.collect(),
}
}
fn relation2_to_3(v: Relation, index: usize) -> Result<ExpandedRelation, SchemaExpandError> {
match v.acquired {
Some(RelationAcquired::Empty) => Ok(ExpandedRelation::EmptyAcquired {
name: v.name,
description: v.description,
}),
Some(RelationAcquired::Entity { head, tail }) => Ok(ExpandedRelation::EntityAcquired {
name: v.name,
description: v.description,
head: Box::new(entity2_to_3(*head)),
tail: Box::new(entity2_to_3(*tail)),
}),
None => Err(SchemaExpandError::RelationWithoutAcquired {
index,
name: v.name.to_string(),
}),
}
}
impl TryFrom<NormalizedSchema> for ExpandedSchema {
type Error = SchemaExpandError;
fn try_from(v: NormalizedSchema) -> Result<Self, Self::Error> {
let entities = v.entities.into_iter().map(entity2_to_3).collect();
let json_structures = v
.json_structures
.into_iter()
.enumerate()
.map(|(i, js)| json_structure2_to_3(js, i))
.collect::<Result<Vec<_>, _>>()?;
let relations = v
.relations
.into_iter()
.enumerate()
.map(|(i, rel)| relation2_to_3(rel, i))
.collect::<Result<Vec<_>, _>>()?;
let classifications = v
.classifications
.into_iter()
.map(classification2_to_3)
.collect();
Ok(Self {
entities,
json_structures,
relations,
classifications,
})
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::normalized::NormalizedSchema;
#[test]
fn expanded_expands_named_structure() {
let s = r#"
{
"json_structures": [
{
"name": "Patient Record",
"id": { "description": "identifier", "dtype": "str" }
}
],
"relations": [
{ "contains": { "head": "patient", "tail": "record" } }
]
}
"#;
let s2 = NormalizedSchema::from_json_str(s).unwrap();
let s3 = ExpandedSchema::try_from(s2).unwrap();
assert_eq!(s3.json_structures.len(), 1);
assert_eq!(s3.relations.len(), 1);
assert_eq!(s3.json_structures[0].name.as_str(), "patient_record");
}
#[test]
fn expanded_rejects_relation_without_acquired_form() {
let s = r#"
{
"relations": ["interacts_with"]
}
"#;
let s2 = NormalizedSchema::from_json_str(s).unwrap();
let err = ExpandedSchema::try_from(s2).unwrap_err();
match err {
SchemaExpandError::RelationWithoutAcquired { .. } => {}
}
}
#[test]
fn expanded_entity_list_becomes_unnamed_structure() {
let s = r#"
{
"json_structures": [
["gene::str", "score::float::0.9"]
]
}
"#;
let s2 = NormalizedSchema::from_json_str(s).unwrap();
let s3 = ExpandedSchema::try_from(s2).unwrap();
assert_eq!(s3.json_structures.len(), 1);
assert_eq!(s3.json_structures[0].name.as_str(), "unnamed_0");
assert_eq!(s3.entities.len(), 0);
let props = &s3.json_structures[0].props;
assert_eq!(props.len(), 2);
let gene = props.get(&ExpandedName::new("gene".to_string())).unwrap();
assert_eq!(gene.dtype, Some(DType::String));
assert_eq!(gene.threshold, None);
let score = props.get(&ExpandedName::new("score".to_string())).unwrap();
assert_eq!(score.dtype, Some(DType::Float));
assert_eq!(score.threshold, Some(0.9));
}
#[test]
fn expanded_multiple_entity_lists_get_sequential_names() {
let s = r#"
{
"json_structures": [
["a::str"],
["b::float"],
["c::bool"]
]
}
"#;
let s2 = NormalizedSchema::from_json_str(s).unwrap();
let s3 = ExpandedSchema::try_from(s2).unwrap();
assert_eq!(s3.json_structures.len(), 3);
assert_eq!(s3.json_structures[0].name.as_str(), "unnamed_0");
assert_eq!(s3.json_structures[1].name.as_str(), "unnamed_1");
assert_eq!(s3.json_structures[2].name.as_str(), "unnamed_2");
}
#[test]
fn expanded_mixed_entity_list_and_named_structure() {
let s = r#"
{
"entities": ["gene::str"],
"json_structures": [
["patient::str"],
{
"name": "Patient Record",
"id": { "dtype": "str" }
}
]
}
"#;
let s2 = NormalizedSchema::from_json_str(s).unwrap();
let s3 = ExpandedSchema::try_from(s2).unwrap();
assert_eq!(s3.entities.len(), 1);
assert_eq!(s3.entities[0].name.as_str(), "gene");
assert_eq!(s3.json_structures.len(), 2);
assert_eq!(s3.json_structures[0].name.as_str(), "unnamed_0");
assert_eq!(s3.json_structures[1].name.as_str(), "patient_record");
}
#[test]
fn expanded_entity_list_entities_not_promoted_to_top_level() {
let s = r#"
{
"entities": ["gene::str"],
"json_structures": [
["gene::str", "score::float::0.9"]
]
}
"#;
let s2 = NormalizedSchema::from_json_str(s).unwrap();
let s3 = ExpandedSchema::try_from(s2).unwrap();
assert_eq!(s3.entities.len(), 1);
assert_eq!(s3.entities[0].name.as_str(), "gene");
assert_eq!(s3.json_structures.len(), 1);
let props = &s3.json_structures[0].props;
assert!(props.contains_key(&ExpandedName::new("gene".to_string())));
assert!(props.contains_key(&ExpandedName::new("score".to_string())));
}
}