use std::collections::HashMap;
use std::path::PathBuf;
use std::{error::Error, fs, path::Path};
use log::error;
use serde::{Deserialize, Serialize};
use crate::error::DataModelError;
use crate::exporters::{render_jinja_template, Templates};
use crate::json::export::to_json_schema;
use crate::json::schema::SchemaObject;
use crate::json::validation::{validate_json, ValidationError};
use crate::jsonld::export::to_json_ld;
use crate::jsonld::schema::JsonLdHeader;
use crate::linkml::export::serialize_linkml;
use crate::markdown::frontmatter::FrontMatter;
use crate::markdown::parser::{parse_markdown, validate_model};
use crate::object::{Enumeration, Object};
use crate::validation::Validator;
use colored::Colorize;
#[cfg(feature = "python")]
use pyo3::pyclass;
#[cfg(feature = "wasm")]
use tsify_next::Tsify;
const MERGE_IGNORE_TYPES: &[&str] = &["UnitDefinition", "BaseUnit", "UnitType"];
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)]
#[cfg_attr(feature = "python", pyclass(get_all, from_py_object))]
#[cfg_attr(feature = "wasm", derive(Tsify))]
#[cfg_attr(feature = "wasm", tsify(into_wasm_abi))]
pub struct DataModel {
#[serde(skip_serializing_if = "Option::is_none")]
pub name: Option<String>,
pub objects: Vec<Object>,
pub enums: Vec<Enumeration>,
#[serde(skip_serializing_if = "Option::is_none")]
pub config: Option<FrontMatter>,
}
impl DataModel {
pub fn new(name: Option<String>, config: Option<FrontMatter>) -> Self {
DataModel {
name,
objects: Vec::new(),
enums: Vec::new(),
config,
}
}
pub fn validate_json(
&self,
path: &Path,
root: Option<String>,
) -> Result<Vec<ValidationError>, Box<dyn Error>> {
validate_json(path.to_path_buf(), self, root)
}
pub fn json_schema(
&self,
obj_name: Option<String>,
openai: bool,
) -> Result<String, Box<dyn Error>> {
if self.objects.is_empty() {
panic!("No objects found in the markdown file");
}
match obj_name {
Some(name) => {
if self.objects.iter().all(|o| o.name != name) {
panic!("Object '{name}' not found in the markdown file");
}
Ok(serde_json::to_string_pretty(&to_json_schema(
self, &name, openai,
)?)?)
}
None => Ok(serde_json::to_string_pretty(&to_json_schema(
self,
&self.objects[0].name,
openai,
)?)?),
}
}
pub fn json_schema_all(&self, path: PathBuf, openai: bool) -> Result<(), Box<dyn Error>> {
if self.objects.is_empty() {
panic!("No objects found in the markdown file");
}
if !std::path::Path::new(&path).exists() {
fs::create_dir_all(&path).expect("Could not create directory");
}
let base_path = path.to_str().ok_or("Failed to convert path to string")?;
for object in &self.objects {
let schema = to_json_schema(self, &object.name, openai)?;
let file_name = format!("{}/{}.json", base_path, object.name);
fs::write(file_name, serde_json::to_string_pretty(&schema)?)
.expect("Could not write file");
}
Ok(())
}
pub fn json_ld_header(&self, root: Option<&str>) -> Result<JsonLdHeader, Box<dyn Error>> {
to_json_ld(self, root)
}
pub fn internal_schema(&self) -> String {
if self.objects.is_empty() {
panic!("No objects found in the markdown file");
}
serde_json::to_string_pretty(&self).expect("Could not serialize to internal schema")
}
pub fn from_internal_schema(path: &Path) -> Result<Self, Box<dyn Error>> {
if !path.exists() {
return Err("File does not exist".into());
}
let contents = fs::read_to_string(path)?;
let model: DataModel = serde_json::from_str(&contents)?;
Ok(model)
}
pub fn sort_attrs(&mut self) {
for obj in &mut self.objects {
obj.sort_attrs_by_required();
}
}
pub fn convert_to(
&mut self,
template: &Templates,
config: Option<&HashMap<String, String>>,
) -> Result<String, minijinja::Error> {
self.sort_attrs();
match template {
Templates::JsonLd => {
Ok(serde_json::to_string_pretty(&self.json_ld_header(None).unwrap()).unwrap())
}
Templates::JsonSchema => Ok(self.json_schema(None, false).unwrap()),
Templates::Linkml => Ok(serialize_linkml(self.clone(), None).unwrap()),
_ => render_jinja_template(template, self, config),
}
}
pub fn merge(&mut self, other: &Self) {
let mut valid = true;
let ignore_types = self.get_ignore_types();
for other_obj in &other.objects {
if ignore_types.contains(&other_obj.name) {
continue;
}
if let Some(duplicate_obj) = self.objects.iter().find(|o| o.name == other_obj.name) {
if !duplicate_obj.same_hash(other_obj) {
error!(
"[{}] {}: Object {} is defined more than once.",
"Merge".bold(),
"DuplicateError".bold(),
other_obj.name.red().bold(),
);
valid = false;
}
}
}
for other_enm in &other.enums {
if ignore_types.contains(&other_enm.name) {
continue;
}
if let Some(duplicate_enm) = self.enums.iter().find(|e| e.name == other_enm.name) {
if !duplicate_enm.same_hash(other_enm) {
error!(
"[{}] {}: Enumeration {} is defined more than once.",
"Merge".bold(),
"DuplicateError".bold(),
other_enm.name.red().bold(),
);
valid = false;
}
}
}
if !valid {
panic!("Merge is not valid");
}
self.merge_prefixes(other);
self.objects.extend(
other
.objects
.iter()
.filter(|o| !ignore_types.contains(&o.name))
.filter(|o| !self.objects.iter().any(|existing| existing.name == o.name))
.cloned()
.collect::<Vec<Object>>(),
);
self.enums.extend(
other
.enums
.iter()
.filter(|e| !ignore_types.contains(&e.name))
.filter(|e| !self.enums.iter().any(|existing| existing.name == e.name))
.cloned()
.collect::<Vec<Enumeration>>(),
);
}
fn merge_prefixes(&mut self, other: &Self) {
if let Some(other_prefixes) = other.config.as_ref().and_then(|c| c.prefixes.as_ref()) {
let self_config = self.config.get_or_insert_with(FrontMatter::new);
let self_prefixes = self_config.prefixes.get_or_insert_with(HashMap::new);
for (key, value) in other_prefixes {
self_prefixes
.entry(key.clone())
.or_insert_with(|| value.clone());
}
}
}
fn get_ignore_types(&self) -> Vec<String> {
let mut ignore_types = Vec::new();
if self
.objects
.iter()
.any(|o| MERGE_IGNORE_TYPES.contains(&o.name.as_str()))
{
ignore_types.extend(
self.objects
.iter()
.filter(|o| MERGE_IGNORE_TYPES.contains(&o.name.as_str()))
.map(|o| o.name.clone()),
);
}
if self
.enums
.iter()
.any(|e| MERGE_IGNORE_TYPES.contains(&e.name.as_str()))
{
ignore_types.extend(
self.enums
.iter()
.filter(|e| MERGE_IGNORE_TYPES.contains(&e.name.as_str()))
.map(|e| e.name.clone()),
);
}
ignore_types
}
#[allow(clippy::result_large_err)]
pub fn from_markdown(path: &Path) -> Result<Self, Validator> {
let content = fs::read_to_string(path).expect("Could not read file");
parse_markdown(&content, Some(path))
}
#[allow(clippy::result_large_err)]
pub fn from_markdown_string(content: &str) -> Result<Self, Validator> {
parse_markdown(content, None)
}
#[allow(clippy::result_large_err)]
pub fn from_json_schema(path: &Path) -> Result<Self, DataModelError> {
let content = fs::read_to_string(path)?;
let schema: SchemaObject = serde_json::from_str(&content)?;
let model: DataModel = schema
.try_into()
.expect("Could not convert schema to data model");
validate_model(&model).map_err(DataModelError::ValidationError)?;
Ok(model)
}
#[allow(clippy::result_large_err)]
pub fn from_json_schema_string(content: &str) -> Result<Self, DataModelError> {
let schema: SchemaObject = serde_json::from_str(content)?;
let model: DataModel = schema
.try_into()
.expect("Could not convert schema to data model");
validate_model(&model).map_err(DataModelError::ValidationError)?;
Ok(model)
}
#[allow(clippy::result_large_err)]
pub fn from_json_schema_object(schema: SchemaObject) -> Result<Self, DataModelError> {
let model: DataModel = schema
.try_into()
.expect("Could not convert schema to data model");
validate_model(&model).map_err(DataModelError::ValidationError)?;
Ok(model)
}
}
#[cfg(test)]
mod tests {
use std::collections::BTreeMap;
use crate::attribute::DataType;
use super::*;
use pretty_assertions::assert_eq;
#[test]
fn test_merge() {
let mut model1 = DataModel::new(None, None);
let mut model2 = DataModel::new(None, None);
let mut obj1 = Object::new("Object1".to_string(), None);
obj1.add_attribute(crate::attribute::Attribute {
name: "test1".to_string(),
is_array: false,
is_id: false,
dtypes: vec!["string".to_string()],
docstring: "".to_string(),
options: vec![],
term: None,
required: false,
xml: None,
default: None,
is_enum: false,
position: None,
import_prefix: None,
});
let mut obj2 = Object::new("Object2".to_string(), None);
obj2.add_attribute(crate::attribute::Attribute {
name: "test2".to_string(),
is_array: false,
is_id: false,
dtypes: vec!["string".to_string()],
docstring: "".to_string(),
options: vec![],
term: None,
required: false,
xml: None,
default: None,
is_enum: false,
position: None,
import_prefix: None,
});
let enm1 = Enumeration {
name: "Enum1".to_string(),
mappings: BTreeMap::from([("key1".to_string(), "value1".to_string())]),
docstring: "".to_string(),
position: None,
};
let enm2 = Enumeration {
name: "Enum2".to_string(),
mappings: BTreeMap::from([("key2".to_string(), "value2".to_string())]),
docstring: "".to_string(),
position: None,
};
model1.objects.push(obj1);
model1.enums.push(enm1);
model2.objects.push(obj2);
model2.enums.push(enm2);
model1.merge(&model2);
assert_eq!(model1.objects.len(), 2);
assert_eq!(model1.enums.len(), 2);
assert_eq!(model1.objects[0].name, "Object1");
assert_eq!(model1.objects[1].name, "Object2");
assert_eq!(model1.enums[0].name, "Enum1");
assert_eq!(model1.enums[1].name, "Enum2");
}
#[test]
fn test_sort_attrs() {
let mut model = DataModel::new(None, None);
let mut obj = Object::new("Object1".to_string(), None);
obj.add_attribute(crate::attribute::Attribute {
name: "not_required".to_string(),
is_array: false,
is_id: false,
dtypes: vec!["string".to_string()],
docstring: "".to_string(),
options: vec![],
term: None,
required: false,
xml: None,
default: Some(DataType::String("".to_string())),
is_enum: false,
position: None,
import_prefix: None,
});
obj.add_attribute(crate::attribute::Attribute {
name: "required".to_string(),
is_array: false,
is_id: false,
dtypes: vec!["string".to_string()],
docstring: "".to_string(),
options: vec![],
term: None,
required: true,
xml: None,
default: None,
is_enum: false,
position: None,
import_prefix: None,
});
model.objects.push(obj);
model.sort_attrs();
assert_eq!(model.objects[0].attributes[0].name, "required");
assert_eq!(model.objects[0].attributes[1].name, "not_required");
}
#[test]
fn test_from_internal_schema() {
let path = Path::new("tests/data/expected_internal_schema.json");
let model = DataModel::from_internal_schema(path).expect("Failed to parse internal schema");
assert_eq!(model.objects.len(), 2);
assert_eq!(model.enums.len(), 1);
}
#[test]
fn test_from_markdown_w_html() {
let path = Path::new("tests/data/model_w_html.md");
let model = DataModel::from_markdown(path).expect("Failed to parse markdown");
assert_eq!(model.objects.len(), 2);
assert_eq!(model.enums.len(), 1);
}
#[test]
fn test_from_markdown_string() {
let path = Path::new("tests/data/model.md");
let content = fs::read_to_string(path).unwrap();
let model =
DataModel::from_markdown_string(content.as_str()).expect("Failed to parse markdown");
assert_eq!(model.objects.len(), 2);
assert_eq!(model.enums.len(), 1);
}
}