use std::collections::{BTreeMap, HashMap, HashSet};
use regex::Regex;
use crate::{
attribute::Attribute,
object::{Enumeration, Object},
option::AttrOption,
prelude::DataModel,
};
use super::schema::{EnumObject, PrimitiveType, Property, SchemaObject, SchemaType};
const IGNORE_TYPES: [&str; 2] = ["object", "array"];
impl TryFrom<SchemaObject> for DataModel {
type Error = Box<dyn std::error::Error>;
fn try_from(schema_obj: SchemaObject) -> Result<Self, Self::Error> {
let mut objects = vec![schema_obj.clone().try_into()?];
let mut enums = vec![];
for (name, definition) in schema_obj.definitions {
match definition {
SchemaType::Object(object) => {
let mut object: Object = object.try_into()?;
object.name = name;
objects.push(object);
}
SchemaType::Enum(enum_obj) => {
let mut enum_obj: Enumeration = enum_obj.try_into()?;
enum_obj.name = name;
enums.push(enum_obj);
}
}
}
Ok(DataModel {
objects,
enums,
name: Some(schema_obj.title),
..Default::default()
})
}
}
impl TryFrom<SchemaObject> for Object {
type Error = Box<dyn std::error::Error>;
fn try_from(schema_obj: SchemaObject) -> Result<Self, Self::Error> {
let mut attributes = schema_obj
.properties
.into_iter()
.map(|(name, property)| {
let mut attribute: Attribute = property.try_into()?;
attribute.name = name.clone();
Ok(attribute)
})
.collect::<Result<Vec<Attribute>, Self::Error>>()?;
for required_attribute in schema_obj.required {
let attribute = attributes
.iter_mut()
.find(|attr| attr.name == required_attribute);
if let Some(attr) = attribute {
attr.required = true;
}
}
Ok(Object {
name: schema_obj.title,
attributes,
docstring: schema_obj.description.unwrap_or_default(),
term: None,
mixins: Vec::new(),
position: None,
})
}
}
impl TryFrom<Property> for Attribute {
type Error = Box<dyn std::error::Error>;
fn try_from(property: Property) -> Result<Self, Self::Error> {
let is_array = property
.dtype
.as_ref()
.is_some_and(|dtype| dtype.is_array());
let mut dtypes = HashSet::new();
if is_array {
if let Some(items) = &property.items {
dtypes.extend(
items
.get_types()
.into_iter()
.map(extract_reference)
.collect::<Result<Vec<String>, String>>()?,
);
}
} else if let Some(dtype) = &property.dtype {
dtypes.insert(extract_reference(dtype.to_string())?);
}
if let Some(reference) = &property.reference {
dtypes.insert(extract_reference(reference.clone())?);
}
if let Some(one_of) = property.one_of {
for item in one_of.iter() {
dtypes.extend(
item.get_types()
.into_iter()
.map(extract_reference)
.collect::<Result<Vec<String>, String>>()?,
);
}
}
if let Some(all_of) = property.all_of {
if all_of.len() == 1 {
dtypes.extend(
all_of[0]
.get_types()
.into_iter()
.map(extract_reference)
.collect::<Result<Vec<String>, String>>()?,
);
} else {
return Err("allOf with multiple items is not supported yet".into());
}
}
Ok(Attribute {
name: property.title.unwrap_or("MISSING_TITLE".to_string()),
is_array,
dtypes: dtypes
.into_iter()
.filter(|dtype| !IGNORE_TYPES.contains(&dtype.as_str()))
.collect::<Vec<String>>(),
is_id: false,
docstring: property.description.unwrap_or_default(),
options: parse_options(&property.options)?,
term: property.term,
required: false,
default: None,
xml: None,
is_enum: false,
position: None,
import_prefix: None,
})
}
}
impl TryFrom<EnumObject> for Enumeration {
type Error = Box<dyn std::error::Error>;
fn try_from(enum_obj: EnumObject) -> Result<Self, Self::Error> {
let mappings = enum_obj
.enum_values
.iter()
.enumerate()
.map(|(i, value)| {
if is_valid_key(value) {
(value.clone().to_uppercase(), value.clone())
} else if value.len() < 15 {
let cleaned_key = clean_key(value);
(cleaned_key.to_uppercase(), value.clone())
} else {
(format!("VALUE_{i}"), value.clone())
}
})
.collect::<BTreeMap<String, String>>();
Ok(Enumeration {
name: enum_obj.title,
docstring: enum_obj.description.unwrap_or_default(),
position: None,
mappings,
})
}
}
fn clean_key(key: &str) -> String {
let cleaned_key = key.replace(|c: char| !c.is_alphanumeric(), "_");
let pattern = Regex::new(r"_+").unwrap();
let mut cleaned_key = pattern.replace_all(&cleaned_key, "_").to_string();
if !cleaned_key.starts_with(|c: char| c.is_alphabetic()) {
cleaned_key = cleaned_key[1..].to_string();
}
cleaned_key.to_uppercase()
}
fn parse_options(
options: &HashMap<String, PrimitiveType>,
) -> Result<Vec<AttrOption>, Box<dyn std::error::Error>> {
let mut parsed_options = Vec::new();
for (key, value) in options {
let option = AttrOption::from_pair(key, value.to_string().as_str())?;
parsed_options.push(option);
}
Ok(parsed_options)
}
fn extract_reference(reference: String) -> Result<String, String> {
reference
.split('/')
.next_back()
.filter(|s| !s.is_empty())
.map(ToString::to_string)
.ok_or_else(|| "Invalid reference format".to_string())
}
fn is_valid_key(s: &str) -> bool {
if s.is_empty() {
return false;
}
let first_char = s.chars().next().unwrap();
if !first_char.is_alphabetic() && first_char != '_' {
return false;
}
s.chars().all(|c| c.is_alphanumeric() || c == '_')
}
#[cfg(test)]
mod tests {
use serde_json::json;
use super::*;
#[test]
fn test_parse_schema() {
let schema = json!({
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://www.github.com/my/repo/",
"title": "Test",
"type": "object",
"properties": {
"array_valued": {
"title": "array_valued",
"type": "array",
"$term": "http://schema.org/something",
"items": {
"$ref": "#/$defs/Test2"
}
},
"multiple_types": {
"title": "multiple_types",
"oneOf": [
{
"type": "number"
},
{
"$ref": "#/$defs/Test2"
}
]
},
"multiple_types_array": {
"title": "multiple_types_array",
"type": "array",
"items": {
"oneOf": [
{
"type": "number"
},
{
"$ref": "#/$defs/Test2"
}
]
}
},
"name": {
"title": "name",
"type": "string",
"default": "test",
"description": "A test description",
"$term": "http://schema.org/hello"
},
"number": {
"title": "number",
"type": "number",
"$term": "http://schema.org/one",
"minimum": 0.0
},
"ontology": {
"title": "ontology",
"$ref": "#/$defs/Ontology"
},
"single_valued": {
"title": "single_valued",
"type": "object",
"$ref": "#/$defs/Test2"
}
},
"$defs": {
"Ontology": {
"title": "Ontology",
"type": "string",
"enum": [
"https://www.evidenceontology.org/term/",
"https://amigo.geneontology.org/amigo/term/",
"http://semanticscience.org/resource/"
]
},
"Test2": {
"title": "Test2",
"type": "object",
"properties": {
"names": {
"title": "names",
"type": "array",
"$term": "http://schema.org/hello",
"items": {
"type": "string"
}
},
"number": {
"title": "number",
"type": "number",
"$term": "http://schema.org/one",
"minimum": 0.0
}
},
"required": [],
"additionalProperties": false
},
"no_title_and_no_required": {
"type": "object",
"properties": {
"val": {
"type": "string"
}
}
}
},
"required": [
"name"
],
"additionalProperties": false
});
let schema: SchemaObject = serde_json::from_value(schema).expect("Failed to parse schema");
let data_model =
DataModel::try_from(schema).expect("Failed to convert schema to data model");
assert_eq!(data_model.name, Some("Test".to_string()));
assert_eq!(data_model.objects.len(), 3);
assert_eq!(data_model.enums.len(), 1);
let root = data_model
.objects
.iter()
.find(|object| object.name == "Test")
.expect("Root object not found");
assert_eq!(root.attributes.len(), 7);
assert_eq!(root.attributes[0].name, "array_valued");
assert_eq!(root.attributes[1].name, "multiple_types");
assert_eq!(root.attributes[2].name, "multiple_types_array");
assert_eq!(root.attributes[3].name, "name");
assert_eq!(root.attributes[4].name, "number");
assert_eq!(root.attributes[5].name, "ontology");
assert_eq!(root.attributes[6].name, "single_valued");
let test2 = data_model
.objects
.iter()
.find(|object| object.name == "Test2")
.expect("Test2 object not found");
assert_eq!(test2.attributes.len(), 2);
assert_eq!(test2.attributes[0].name, "names");
assert_eq!(test2.attributes[1].name, "number");
let names_attr = &test2.attributes[0];
assert!(names_attr.is_array);
assert_eq!(names_attr.dtypes, vec!["string"]);
assert_eq!(names_attr.term, Some("http://schema.org/hello".to_string()));
let number_attr = &test2.attributes[1];
assert!(!number_attr.is_array);
assert_eq!(number_attr.dtypes, vec!["number"]);
assert_eq!(number_attr.term, Some("http://schema.org/one".to_string()));
let no_title_and_no_required = data_model
.objects
.iter()
.find(|object| object.name == "no_title_and_no_required")
.expect("no_title_and_no_required object not found");
assert!(!no_title_and_no_required.name.is_empty());
assert_eq!(no_title_and_no_required.attributes.len(), 1);
assert_eq!(no_title_and_no_required.attributes[0].name, "val");
let ontology = data_model
.enums
.iter()
.find(|e| e.name == "Ontology")
.expect("Ontology enum not found");
assert_eq!(ontology.mappings.len(), 3);
assert_eq!(
ontology.mappings["VALUE_0"],
"https://www.evidenceontology.org/term/"
);
assert_eq!(
ontology.mappings["VALUE_1"],
"https://amigo.geneontology.org/amigo/term/"
);
assert_eq!(
ontology.mappings["VALUE_2"],
"http://semanticscience.org/resource/"
);
let array_valued = &root.attributes[0];
assert!(array_valued.is_array);
assert_eq!(array_valued.dtypes, vec!["Test2"]);
assert_eq!(
array_valued.term,
Some("http://schema.org/something".to_string())
);
let multiple_types = &root.attributes[1];
assert!(!multiple_types.is_array);
let multiple_types_dtypes: HashSet<_> = multiple_types.dtypes.iter().collect();
assert_eq!(
multiple_types_dtypes,
HashSet::from([&"number".to_string(), &"Test2".to_string()])
);
let name_attr = &root.attributes[3];
assert!(name_attr.required);
assert_eq!(name_attr.term, Some("http://schema.org/hello".to_string()));
}
#[test]
fn test_parse_property() {
let property = json!({
"title": "number",
"type": "number",
"$term": "http://schema.org/one",
"minimum": 0.0,
"description": "test"
});
let property: Property = serde_json::from_value(property).unwrap();
let attribute = Attribute::try_from(property).unwrap();
assert_eq!(attribute.name, "number");
assert_eq!(attribute.dtypes, vec!["number"]);
assert_eq!(attribute.docstring, "test");
assert_eq!(attribute.term, Some("http://schema.org/one".to_string()));
assert!(!attribute.required);
assert_eq!(attribute.default, None);
assert!(!attribute.is_array);
assert_eq!(attribute.xml, None);
assert!(!attribute.is_enum);
assert_eq!(attribute.position, None);
assert_eq!(attribute.import_prefix, None);
}
#[test]
fn test_parse_property_with_one_of() {
let property = json!({
"title": "number",
"oneOf": [
{
"type": "number"
},
{
"type": "string"
}
]
});
let property: Property = serde_json::from_value(property).unwrap();
let attribute = Attribute::try_from(property).unwrap();
assert_eq!(attribute.name, "number");
assert_eq!(
attribute.dtypes.into_iter().collect::<HashSet<_>>(),
vec!["number".to_string(), "string".to_string()]
.into_iter()
.collect::<HashSet<_>>()
);
assert_eq!(attribute.docstring, "");
assert_eq!(attribute.term, None);
assert!(!attribute.required);
assert_eq!(attribute.default, None);
assert_eq!(attribute.xml, None);
assert!(!attribute.is_array);
assert!(!attribute.is_enum);
assert_eq!(attribute.position, None);
assert_eq!(attribute.import_prefix, None);
}
#[test]
fn test_parse_property_with_one_of_mixed() {
let property = json!({
"title": "number",
"oneOf": [
{
"$ref": "#/$defs/Test"
},
{
"type": "string"
}
]
});
let property: Property = serde_json::from_value(property).unwrap();
let attribute = Attribute::try_from(property).unwrap();
assert_eq!(attribute.name, "number");
assert_eq!(
attribute.dtypes.into_iter().collect::<HashSet<_>>(),
vec!["Test".to_string(), "string".to_string()]
.into_iter()
.collect::<HashSet<_>>()
);
assert_eq!(attribute.docstring, "");
assert_eq!(attribute.term, None);
assert!(!attribute.required);
assert_eq!(attribute.default, None);
assert_eq!(attribute.xml, None);
assert!(!attribute.is_array);
assert!(!attribute.is_enum);
assert_eq!(attribute.position, None);
assert_eq!(attribute.import_prefix, None);
}
#[test]
#[should_panic]
fn test_parse_property_with_all_of() {
let property = json!({
"title": "number",
"allOf": [
{
"type": "number"
},
{
"type": "string"
}
]
});
let property: Property = serde_json::from_value(property).unwrap();
Attribute::try_from(property).unwrap();
}
#[test]
fn test_parse_property_with_reference() {
let property = json!({
"title": "number",
"$ref": "#/$defs/Test"
});
let property: Property = serde_json::from_value(property).unwrap();
let attribute = Attribute::try_from(property).unwrap();
assert_eq!(attribute.name, "number");
assert_eq!(attribute.dtypes, vec!["Test".to_string()]);
assert_eq!(attribute.docstring, "");
assert_eq!(attribute.term, None);
assert!(!attribute.required);
assert_eq!(attribute.default, None);
assert!(!attribute.is_array);
assert_eq!(attribute.xml, None);
assert!(!attribute.is_enum);
assert_eq!(attribute.position, None);
assert_eq!(attribute.import_prefix, None);
}
#[test]
fn test_parse_property_array_without_reference() {
let property = json!({
"title": "number",
"type": "array",
"items": {
"type": "string"
}
});
let property: Property = serde_json::from_value(property).unwrap();
let attribute = Attribute::try_from(property).unwrap();
assert_eq!(attribute.name, "number");
assert_eq!(attribute.dtypes, vec!["string".to_string()]);
assert_eq!(attribute.docstring, "");
assert_eq!(attribute.term, None);
assert!(!attribute.required);
assert_eq!(attribute.default, None);
assert!(attribute.is_array);
assert_eq!(attribute.xml, None);
assert!(!attribute.is_enum);
assert_eq!(attribute.position, None);
assert_eq!(attribute.import_prefix, None);
}
#[test]
fn test_extract_reference_from_array() {
let property = json!({
"title": "number",
"type": "array",
"items": {
"type": "string"
}
});
let property: Property = serde_json::from_value(property).unwrap();
let attribute = Attribute::try_from(property).unwrap();
assert_eq!(attribute.name, "number");
assert_eq!(attribute.dtypes, vec!["string".to_string()]);
assert_eq!(attribute.docstring, "");
assert_eq!(attribute.term, None);
assert!(!attribute.required);
assert_eq!(attribute.default, None);
assert!(attribute.is_array);
assert_eq!(attribute.xml, None);
assert!(!attribute.is_enum);
assert_eq!(attribute.position, None);
assert_eq!(attribute.import_prefix, None);
}
#[test]
fn test_extract_reference_from_one_of() {
let property = json!({
"title": "number",
"oneOf": [
{
"$ref": "#/$defs/Test"
},
{
"type": "string"
}
]
});
let property: Property = serde_json::from_value(property).unwrap();
let attribute = Attribute::try_from(property).unwrap();
assert_eq!(attribute.name, "number");
assert_eq!(
attribute.dtypes.into_iter().collect::<HashSet<_>>(),
vec!["Test".to_string(), "string".to_string()]
.into_iter()
.collect::<HashSet<_>>()
);
}
#[test]
fn test_parse_object() {
let object = json!({
"title": "Test",
"type": "object",
"properties": {
"number": {
"title": "number",
"type": "number"
},
"string": {
"name": "string",
"type": "string"
}
},
"required": ["number"]
});
let object: SchemaObject = serde_json::from_value(object).unwrap();
let data_model = Object::try_from(object).unwrap();
assert_eq!(data_model.name, "Test");
assert_eq!(data_model.attributes.len(), 2);
assert_eq!(data_model.attributes[0].name, "number");
assert_eq!(data_model.attributes[1].name, "string");
let attribute1 = data_model.attributes[0].clone();
assert_eq!(attribute1.name, "number");
assert_eq!(attribute1.dtypes, vec!["number"]);
assert_eq!(attribute1.docstring, "");
assert_eq!(attribute1.term, None);
assert!(attribute1.required);
assert_eq!(attribute1.default, None);
assert!(!attribute1.is_array);
let attribute2 = data_model.attributes[1].clone();
assert_eq!(attribute2.name, "string");
assert_eq!(attribute2.dtypes, vec!["string"]);
assert_eq!(attribute2.docstring, "");
assert_eq!(attribute2.term, None);
assert!(!attribute2.required);
assert_eq!(attribute2.default, None);
assert!(!attribute2.is_array);
}
#[test]
fn test_parse_enum() {
let enum_obj = json!({
"title": "Test",
"type": "string",
"enum": ["value1", "value2", "value3"]
});
let enum_obj: EnumObject = serde_json::from_value(enum_obj).unwrap();
let enumeration = Enumeration::try_from(enum_obj).unwrap();
assert_eq!(enumeration.name, "Test");
assert_eq!(enumeration.mappings.len(), 3);
assert_eq!(enumeration.mappings["VALUE1"], "value1");
assert_eq!(enumeration.mappings["VALUE2"], "value2");
assert_eq!(enumeration.mappings["VALUE3"], "value3");
}
#[test]
fn test_parse_enum_with_special_characters() {
let enum_obj = json!({
"title": "Test",
"type": "string",
"enum": ["https://www.evidenceontology.org/term/", "https://amigo.geneontology.org/amigo/term/", "http://semanticscience.org/resource/"]
});
let enum_obj: EnumObject = serde_json::from_value(enum_obj).unwrap();
let enumeration = Enumeration::try_from(enum_obj).unwrap();
assert_eq!(enumeration.name, "Test");
assert_eq!(enumeration.mappings.len(), 3);
assert_eq!(
enumeration.mappings["VALUE_0"],
"https://www.evidenceontology.org/term/"
);
assert_eq!(
enumeration.mappings["VALUE_1"],
"https://amigo.geneontology.org/amigo/term/"
);
assert_eq!(
enumeration.mappings["VALUE_2"],
"http://semanticscience.org/resource/"
);
}
#[test]
fn test_extract_reference() {
assert_eq!(
extract_reference("#/$defs/Test".to_string()),
Ok("Test".to_string())
);
assert_eq!(
extract_reference("Test".to_string()),
Ok("Test".to_string())
);
assert_eq!(
extract_reference("".to_string()),
Err("Invalid reference format".to_string())
);
}
#[test]
fn test_enzml_schema() {
let schema_path = "tests/data/old_schema.json";
let schema = std::fs::read_to_string(schema_path).expect("Failed to read schema");
let schema: SchemaObject = serde_json::from_str(&schema).expect("Failed to parse schema");
let data_model =
DataModel::try_from(schema).expect("Failed to convert schema to data model");
assert_eq!(data_model.objects.len(), 14);
assert_eq!(data_model.enums.len(), 2);
}
#[test]
fn test_clean_key() {
assert_eq!(clean_key("Test:Hello"), "TEST_HELLO");
assert_eq!(clean_key("Test::Hello"), "TEST_HELLO");
assert_eq!(clean_key("Test_Hello"), "TEST_HELLO");
assert_eq!(clean_key("Test__Hello"), "TEST_HELLO");
assert_eq!(clean_key("!Test"), "TEST");
}
#[test]
fn test_additional_properties_object() {
let schema = json!({
"title": "Test",
"type": "object",
"properties": {
"test": {
"type": "string"
}
},
"additionalProperties": {
"type": "string"
}
});
let schema: SchemaObject = serde_json::from_value(schema).unwrap();
let data_model = DataModel::try_from(schema.clone()).unwrap();
assert!(schema.additional_properties);
assert_eq!(data_model.objects.len(), 1);
assert_eq!(data_model.objects[0].attributes.len(), 1);
}
}