1use std::collections::HashMap;
25use std::path::PathBuf;
26use std::{error::Error, fs, path::Path};
27
28use log::error;
29use serde::{Deserialize, Serialize};
30
31use crate::error::DataModelError;
32use crate::exporters::{render_jinja_template, Templates};
33use crate::json::export::to_json_schema;
34use crate::json::schema::SchemaObject;
35use crate::json::validation::{validate_json, ValidationError};
36use crate::jsonld::export::to_json_ld;
37use crate::jsonld::schema::JsonLdHeader;
38use crate::linkml::export::serialize_linkml;
39use crate::markdown::frontmatter::FrontMatter;
40use crate::markdown::parser::{parse_markdown, validate_model};
41use crate::object::{Enumeration, Object};
42use crate::validation::Validator;
43use colored::Colorize;
44
45#[cfg(feature = "python")]
46use pyo3::pyclass;
47
48#[cfg(feature = "wasm")]
49use tsify_next::Tsify;
50
51const MERGE_IGNORE_TYPES: &[&str] = &["UnitDefinition", "BaseUnit", "UnitType"];
53
54#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)]
77#[cfg_attr(feature = "python", pyclass(get_all, from_py_object))]
78#[cfg_attr(feature = "wasm", derive(Tsify))]
79#[cfg_attr(feature = "wasm", tsify(into_wasm_abi))]
80pub struct DataModel {
81 #[serde(skip_serializing_if = "Option::is_none")]
82 pub name: Option<String>,
83 pub objects: Vec<Object>,
84 pub enums: Vec<Enumeration>,
85 #[serde(skip_serializing_if = "Option::is_none")]
86 pub config: Option<FrontMatter>,
87}
88
89impl DataModel {
90 pub fn new(name: Option<String>, config: Option<FrontMatter>) -> Self {
91 DataModel {
92 name,
93 objects: Vec::new(),
94 enums: Vec::new(),
95 config,
96 }
97 }
98
99 pub fn validate_json(
114 &self,
115 path: &Path,
116 root: Option<String>,
117 ) -> Result<Vec<ValidationError>, Box<dyn Error>> {
118 validate_json(path.to_path_buf(), self, root)
119 }
120
121 pub fn json_schema(
142 &self,
143 obj_name: Option<String>,
144 openai: bool,
145 ) -> Result<String, Box<dyn Error>> {
146 if self.objects.is_empty() {
147 panic!("No objects found in the markdown file");
148 }
149
150 match obj_name {
151 Some(name) => {
152 if self.objects.iter().all(|o| o.name != name) {
153 panic!("Object '{name}' not found in the markdown file");
154 }
155 Ok(serde_json::to_string_pretty(&to_json_schema(
156 self, &name, openai,
157 )?)?)
158 }
159 None => Ok(serde_json::to_string_pretty(&to_json_schema(
160 self,
161 &self.objects[0].name,
162 openai,
163 )?)?),
164 }
165 }
166
167 pub fn json_schema_all(&self, path: PathBuf, openai: bool) -> Result<(), Box<dyn Error>> {
185 if self.objects.is_empty() {
186 panic!("No objects found in the markdown file");
187 }
188
189 if !std::path::Path::new(&path).exists() {
191 fs::create_dir_all(&path).expect("Could not create directory");
192 }
193
194 let base_path = path.to_str().ok_or("Failed to convert path to string")?;
195 for object in &self.objects {
196 let schema = to_json_schema(self, &object.name, openai)?;
197 let file_name = format!("{}/{}.json", base_path, object.name);
198 fs::write(file_name, serde_json::to_string_pretty(&schema)?)
199 .expect("Could not write file");
200 }
201
202 Ok(())
203 }
204
205 pub fn json_ld_header(&self, root: Option<&str>) -> Result<JsonLdHeader, Box<dyn Error>> {
219 to_json_ld(self, root)
220 }
221
222 pub fn internal_schema(&self) -> String {
240 if self.objects.is_empty() {
241 panic!("No objects found in the markdown file");
242 }
243
244 serde_json::to_string_pretty(&self).expect("Could not serialize to internal schema")
245 }
246
247 pub fn from_internal_schema(path: &Path) -> Result<Self, Box<dyn Error>> {
263 if !path.exists() {
264 return Err("File does not exist".into());
265 }
266
267 let contents = fs::read_to_string(path)?;
268 let model: DataModel = serde_json::from_str(&contents)?;
269
270 Ok(model)
271 }
272
273 pub fn sort_attrs(&mut self) {
275 for obj in &mut self.objects {
276 obj.sort_attrs_by_required();
277 }
278 }
279
280 pub fn convert_to(
293 &mut self,
294 template: &Templates,
295 config: Option<&HashMap<String, String>>,
296 ) -> Result<String, minijinja::Error> {
297 self.sort_attrs();
298
299 match template {
300 Templates::JsonLd => {
301 Ok(serde_json::to_string_pretty(&self.json_ld_header(None).unwrap()).unwrap())
302 }
303 Templates::JsonSchema => Ok(self.json_schema(None, false).unwrap()),
304 Templates::Linkml => Ok(serialize_linkml(self.clone(), None).unwrap()),
305 _ => render_jinja_template(template, self, config),
306 }
307 }
308
309 pub fn merge(&mut self, other: &Self) {
313 let mut valid = true;
315 let ignore_types = self.get_ignore_types();
316
317 for other_obj in &other.objects {
320 if ignore_types.contains(&other_obj.name) {
321 continue;
322 }
323 if let Some(duplicate_obj) = self.objects.iter().find(|o| o.name == other_obj.name) {
324 if !duplicate_obj.same_hash(other_obj) {
325 error!(
326 "[{}] {}: Object {} is defined more than once.",
327 "Merge".bold(),
328 "DuplicateError".bold(),
329 other_obj.name.red().bold(),
330 );
331 valid = false;
332 }
333 }
334 }
335
336 for other_enm in &other.enums {
337 if ignore_types.contains(&other_enm.name) {
338 continue;
339 }
340 if let Some(duplicate_enm) = self.enums.iter().find(|e| e.name == other_enm.name) {
341 if !duplicate_enm.same_hash(other_enm) {
342 error!(
343 "[{}] {}: Enumeration {} is defined more than once.",
344 "Merge".bold(),
345 "DuplicateError".bold(),
346 other_enm.name.red().bold(),
347 );
348 valid = false;
349 }
350 }
351 }
352
353 if !valid {
355 panic!("Merge is not valid");
356 }
357
358 self.merge_prefixes(other);
360
361 self.objects.extend(
363 other
364 .objects
365 .iter()
366 .filter(|o| !ignore_types.contains(&o.name))
367 .filter(|o| !self.objects.iter().any(|existing| existing.name == o.name))
368 .cloned()
369 .collect::<Vec<Object>>(),
370 );
371 self.enums.extend(
372 other
373 .enums
374 .iter()
375 .filter(|e| !ignore_types.contains(&e.name))
376 .filter(|e| !self.enums.iter().any(|existing| existing.name == e.name))
377 .cloned()
378 .collect::<Vec<Enumeration>>(),
379 );
380 }
381
382 fn merge_prefixes(&mut self, other: &Self) {
385 if let Some(other_prefixes) = other.config.as_ref().and_then(|c| c.prefixes.as_ref()) {
386 let self_config = self.config.get_or_insert_with(FrontMatter::new);
387 let self_prefixes = self_config.prefixes.get_or_insert_with(HashMap::new);
388
389 for (key, value) in other_prefixes {
390 self_prefixes
391 .entry(key.clone())
392 .or_insert_with(|| value.clone());
393 }
394 }
395 }
396
397 fn get_ignore_types(&self) -> Vec<String> {
399 let mut ignore_types = Vec::new();
400 if self
401 .objects
402 .iter()
403 .any(|o| MERGE_IGNORE_TYPES.contains(&o.name.as_str()))
404 {
405 ignore_types.extend(
406 self.objects
407 .iter()
408 .filter(|o| MERGE_IGNORE_TYPES.contains(&o.name.as_str()))
409 .map(|o| o.name.clone()),
410 );
411 }
412 if self
413 .enums
414 .iter()
415 .any(|e| MERGE_IGNORE_TYPES.contains(&e.name.as_str()))
416 {
417 ignore_types.extend(
418 self.enums
419 .iter()
420 .filter(|e| MERGE_IGNORE_TYPES.contains(&e.name.as_str()))
421 .map(|e| e.name.clone()),
422 );
423 }
424 ignore_types
425 }
426
427 #[allow(clippy::result_large_err)]
443 pub fn from_markdown(path: &Path) -> Result<Self, Validator> {
444 let content = fs::read_to_string(path).expect("Could not read file");
445 parse_markdown(&content, Some(path))
446 }
447
448 #[allow(clippy::result_large_err)]
466 pub fn from_markdown_string(content: &str) -> Result<Self, Validator> {
467 parse_markdown(content, None)
468 }
469
470 #[allow(clippy::result_large_err)]
477 pub fn from_json_schema(path: &Path) -> Result<Self, DataModelError> {
478 let content = fs::read_to_string(path)?;
479 let schema: SchemaObject = serde_json::from_str(&content)?;
480 let model: DataModel = schema
481 .try_into()
482 .expect("Could not convert schema to data model");
483
484 validate_model(&model).map_err(DataModelError::ValidationError)?;
486
487 Ok(model)
488 }
489
490 #[allow(clippy::result_large_err)]
497 pub fn from_json_schema_string(content: &str) -> Result<Self, DataModelError> {
498 let schema: SchemaObject = serde_json::from_str(content)?;
499 let model: DataModel = schema
500 .try_into()
501 .expect("Could not convert schema to data model");
502
503 validate_model(&model).map_err(DataModelError::ValidationError)?;
505
506 Ok(model)
507 }
508
509 #[allow(clippy::result_large_err)]
516 pub fn from_json_schema_object(schema: SchemaObject) -> Result<Self, DataModelError> {
517 let model: DataModel = schema
518 .try_into()
519 .expect("Could not convert schema to data model");
520
521 validate_model(&model).map_err(DataModelError::ValidationError)?;
523
524 Ok(model)
525 }
526}
527
528#[cfg(test)]
529mod tests {
530 use std::collections::BTreeMap;
531
532 use crate::attribute::DataType;
533
534 use super::*;
535 use pretty_assertions::assert_eq;
536
537 #[test]
538 fn test_merge() {
539 let mut model1 = DataModel::new(None, None);
541 let mut model2 = DataModel::new(None, None);
542
543 let mut obj1 = Object::new("Object1".to_string(), None);
544 obj1.add_attribute(crate::attribute::Attribute {
545 name: "test1".to_string(),
546 is_array: false,
547 is_id: false,
548 dtypes: vec!["string".to_string()],
549 docstring: "".to_string(),
550 options: vec![],
551 term: None,
552 required: false,
553 xml: None,
554 default: None,
555 is_enum: false,
556 position: None,
557 import_prefix: None,
558 });
559
560 let mut obj2 = Object::new("Object2".to_string(), None);
561 obj2.add_attribute(crate::attribute::Attribute {
562 name: "test2".to_string(),
563 is_array: false,
564 is_id: false,
565 dtypes: vec!["string".to_string()],
566 docstring: "".to_string(),
567 options: vec![],
568 term: None,
569 required: false,
570 xml: None,
571 default: None,
572 is_enum: false,
573 position: None,
574 import_prefix: None,
575 });
576
577 let enm1 = Enumeration {
578 name: "Enum1".to_string(),
579 mappings: BTreeMap::from([("key1".to_string(), "value1".to_string())]),
580 docstring: "".to_string(),
581 position: None,
582 };
583
584 let enm2 = Enumeration {
585 name: "Enum2".to_string(),
586 mappings: BTreeMap::from([("key2".to_string(), "value2".to_string())]),
587 docstring: "".to_string(),
588 position: None,
589 };
590
591 model1.objects.push(obj1);
592 model1.enums.push(enm1);
593 model2.objects.push(obj2);
594 model2.enums.push(enm2);
595
596 model1.merge(&model2);
598
599 assert_eq!(model1.objects.len(), 2);
601 assert_eq!(model1.enums.len(), 2);
602 assert_eq!(model1.objects[0].name, "Object1");
603 assert_eq!(model1.objects[1].name, "Object2");
604 assert_eq!(model1.enums[0].name, "Enum1");
605 assert_eq!(model1.enums[1].name, "Enum2");
606 }
607
608 #[test]
609 fn test_sort_attrs() {
610 let mut model = DataModel::new(None, None);
612 let mut obj = Object::new("Object1".to_string(), None);
613 obj.add_attribute(crate::attribute::Attribute {
614 name: "not_required".to_string(),
615 is_array: false,
616 is_id: false,
617 dtypes: vec!["string".to_string()],
618 docstring: "".to_string(),
619 options: vec![],
620 term: None,
621 required: false,
622 xml: None,
623 default: Some(DataType::String("".to_string())),
624 is_enum: false,
625 position: None,
626 import_prefix: None,
627 });
628
629 obj.add_attribute(crate::attribute::Attribute {
630 name: "required".to_string(),
631 is_array: false,
632 is_id: false,
633 dtypes: vec!["string".to_string()],
634 docstring: "".to_string(),
635 options: vec![],
636 term: None,
637 required: true,
638 xml: None,
639 default: None,
640 is_enum: false,
641 position: None,
642 import_prefix: None,
643 });
644
645 model.objects.push(obj);
646
647 model.sort_attrs();
649
650 assert_eq!(model.objects[0].attributes[0].name, "required");
652 assert_eq!(model.objects[0].attributes[1].name, "not_required");
653 }
654
655 #[test]
656 fn test_from_internal_schema() {
657 let path = Path::new("tests/data/expected_internal_schema.json");
659
660 let model = DataModel::from_internal_schema(path).expect("Failed to parse internal schema");
662
663 assert_eq!(model.objects.len(), 2);
665 assert_eq!(model.enums.len(), 1);
666 }
667
668 #[test]
669 fn test_from_markdown_w_html() {
670 let path = Path::new("tests/data/model_w_html.md");
672
673 let model = DataModel::from_markdown(path).expect("Failed to parse markdown");
675
676 assert_eq!(model.objects.len(), 2);
678 assert_eq!(model.enums.len(), 1);
679 }
680
681 #[test]
682 fn test_from_markdown_string() {
683 let path = Path::new("tests/data/model.md");
685 let content = fs::read_to_string(path).unwrap();
686
687 let model =
689 DataModel::from_markdown_string(content.as_str()).expect("Failed to parse markdown");
690
691 assert_eq!(model.objects.len(), 2);
693 assert_eq!(model.enums.len(), 1);
694 }
695}