1use std::collections::HashMap;
25use std::path::PathBuf;
26use std::{error::Error, fs, path::Path};
27
28use log::error;
29use serde::{Deserialize, Serialize};
30
31use crate::error::DataModelError;
32use crate::exporters::{render_jinja_template, Templates};
33#[cfg(not(target_arch = "wasm32"))]
34use crate::git::cache_github_repo;
35use crate::json::export::to_json_schema;
36use crate::json::schema::SchemaObject;
37use crate::json::validation::{validate_json, ValidationError};
38use crate::jsonld::export::to_json_ld;
39use crate::jsonld::schema::JsonLdHeader;
40use crate::linkml::export::serialize_linkml;
41use crate::markdown::frontmatter::FrontMatter;
42use crate::markdown::parser::{parse_markdown, validate_model};
43use crate::object::{Enumeration, Object};
44use crate::validation::Validator;
45use colored::Colorize;
46
47#[cfg(feature = "python")]
48use pyo3::pyclass;
49
50#[cfg(feature = "wasm")]
51use tsify_next::Tsify;
52
53const MERGE_IGNORE_TYPES: &[&str] = &["UnitDefinition", "BaseUnit", "UnitType"];
55
56#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)]
79#[cfg_attr(feature = "python", pyclass(get_all, from_py_object))]
80#[cfg_attr(feature = "wasm", derive(Tsify))]
81#[cfg_attr(feature = "wasm", tsify(into_wasm_abi))]
82pub struct DataModel {
83 #[serde(skip_serializing_if = "Option::is_none")]
84 pub name: Option<String>,
85 pub objects: Vec<Object>,
86 pub enums: Vec<Enumeration>,
87 #[serde(skip_serializing_if = "Option::is_none")]
88 pub config: Option<FrontMatter>,
89}
90
91impl DataModel {
92 pub fn new(name: Option<String>, config: Option<FrontMatter>) -> Self {
93 DataModel {
94 name,
95 objects: Vec::new(),
96 enums: Vec::new(),
97 config,
98 }
99 }
100
101 pub fn validate_json(
116 &self,
117 path: &Path,
118 root: Option<String>,
119 ) -> Result<Vec<ValidationError>, Box<dyn Error>> {
120 validate_json(path.to_path_buf(), self, root)
121 }
122
123 pub fn json_schema(
144 &self,
145 obj_name: Option<String>,
146 openai: bool,
147 ) -> Result<String, Box<dyn Error>> {
148 if self.objects.is_empty() {
149 panic!("No objects found in the markdown file");
150 }
151
152 match obj_name {
153 Some(name) => {
154 if self.objects.iter().all(|o| o.name != name) {
155 panic!("Object '{name}' not found in the markdown file");
156 }
157 Ok(serde_json::to_string_pretty(&to_json_schema(
158 self, &name, openai,
159 )?)?)
160 }
161 None => Ok(serde_json::to_string_pretty(&to_json_schema(
162 self,
163 &self.objects[0].name,
164 openai,
165 )?)?),
166 }
167 }
168
169 pub fn json_schema_all(&self, path: PathBuf, openai: bool) -> Result<(), Box<dyn Error>> {
187 if self.objects.is_empty() {
188 panic!("No objects found in the markdown file");
189 }
190
191 if !std::path::Path::new(&path).exists() {
193 fs::create_dir_all(&path).expect("Could not create directory");
194 }
195
196 let base_path = path.to_str().ok_or("Failed to convert path to string")?;
197 for object in &self.objects {
198 let schema = to_json_schema(self, &object.name, openai)?;
199 let file_name = format!("{}/{}.json", base_path, object.name);
200 fs::write(file_name, serde_json::to_string_pretty(&schema)?)
201 .expect("Could not write file");
202 }
203
204 Ok(())
205 }
206
207 pub fn json_ld_header(&self, root: Option<&str>) -> Result<JsonLdHeader, Box<dyn Error>> {
221 to_json_ld(self, root)
222 }
223
224 pub fn internal_schema(&self) -> String {
242 if self.objects.is_empty() {
243 panic!("No objects found in the markdown file");
244 }
245
246 serde_json::to_string_pretty(&self).expect("Could not serialize to internal schema")
247 }
248
249 pub fn from_internal_schema(path: &Path) -> Result<Self, Box<dyn Error>> {
265 if !path.exists() {
266 return Err("File does not exist".into());
267 }
268
269 let contents = fs::read_to_string(path)?;
270 let model: DataModel = serde_json::from_str(&contents)?;
271
272 Ok(model)
273 }
274
275 pub fn sort_attrs(&mut self) {
277 for obj in &mut self.objects {
278 obj.sort_attrs_by_required();
279 }
280 }
281
282 pub fn convert_to(
295 &mut self,
296 template: &Templates,
297 config: Option<&HashMap<String, String>>,
298 ) -> Result<String, minijinja::Error> {
299 self.sort_attrs();
300
301 match template {
302 Templates::JsonLd => {
303 Ok(serde_json::to_string_pretty(&self.json_ld_header(None).unwrap()).unwrap())
304 }
305 Templates::JsonSchema => Ok(self.json_schema(None, false).unwrap()),
306 Templates::Linkml => Ok(serialize_linkml(self.clone(), None).unwrap()),
307 _ => render_jinja_template(template, self, config),
308 }
309 }
310
311 pub fn merge(&mut self, other: &Self) {
315 let mut valid = true;
317 let ignore_types = self.get_ignore_types();
318
319 for other_obj in &other.objects {
322 if ignore_types.contains(&other_obj.name) {
323 continue;
324 }
325 if let Some(duplicate_obj) = self.objects.iter().find(|o| o.name == other_obj.name) {
326 if !duplicate_obj.same_hash(other_obj) {
327 error!(
328 "[{}] {}: Object {} is defined more than once.",
329 "Merge".bold(),
330 "DuplicateError".bold(),
331 other_obj.name.red().bold(),
332 );
333 valid = false;
334 }
335 }
336 }
337
338 for other_enm in &other.enums {
339 if ignore_types.contains(&other_enm.name) {
340 continue;
341 }
342 if let Some(duplicate_enm) = self.enums.iter().find(|e| e.name == other_enm.name) {
343 if !duplicate_enm.same_hash(other_enm) {
344 error!(
345 "[{}] {}: Enumeration {} is defined more than once.",
346 "Merge".bold(),
347 "DuplicateError".bold(),
348 other_enm.name.red().bold(),
349 );
350 valid = false;
351 }
352 }
353 }
354
355 if !valid {
357 panic!("Merge is not valid");
358 }
359
360 self.merge_prefixes(other);
362
363 self.objects.extend(
365 other
366 .objects
367 .iter()
368 .filter(|o| !ignore_types.contains(&o.name))
369 .filter(|o| !self.objects.iter().any(|existing| existing.name == o.name))
370 .cloned()
371 .collect::<Vec<Object>>(),
372 );
373 self.enums.extend(
374 other
375 .enums
376 .iter()
377 .filter(|e| !ignore_types.contains(&e.name))
378 .filter(|e| !self.enums.iter().any(|existing| existing.name == e.name))
379 .cloned()
380 .collect::<Vec<Enumeration>>(),
381 );
382 }
383
384 fn merge_prefixes(&mut self, other: &Self) {
387 if let Some(other_prefixes) = other.config.as_ref().and_then(|c| c.prefixes.as_ref()) {
388 let self_config = self.config.get_or_insert_with(FrontMatter::new);
389 let self_prefixes = self_config.prefixes.get_or_insert_with(HashMap::new);
390
391 for (key, value) in other_prefixes {
392 self_prefixes
393 .entry(key.clone())
394 .or_insert_with(|| value.clone());
395 }
396 }
397 }
398
399 fn get_ignore_types(&self) -> Vec<String> {
401 let mut ignore_types = Vec::new();
402 if self
403 .objects
404 .iter()
405 .any(|o| MERGE_IGNORE_TYPES.contains(&o.name.as_str()))
406 {
407 ignore_types.extend(
408 self.objects
409 .iter()
410 .filter(|o| MERGE_IGNORE_TYPES.contains(&o.name.as_str()))
411 .map(|o| o.name.clone()),
412 );
413 }
414 if self
415 .enums
416 .iter()
417 .any(|e| MERGE_IGNORE_TYPES.contains(&e.name.as_str()))
418 {
419 ignore_types.extend(
420 self.enums
421 .iter()
422 .filter(|e| MERGE_IGNORE_TYPES.contains(&e.name.as_str()))
423 .map(|e| e.name.clone()),
424 );
425 }
426 ignore_types
427 }
428
429 #[allow(clippy::result_large_err)]
445 pub fn from_markdown(path: &Path) -> Result<Self, Validator> {
446 let content = fs::read_to_string(path).expect("Could not read file");
447 parse_markdown(&content, Some(path))
448 }
449
450 #[cfg(not(target_arch = "wasm32"))]
451 pub fn from_github(repo: &str, path: &str) -> Result<Self, Box<dyn Error>> {
452 let cached = cache_github_repo(repo)?;
453 let path = path.trim_start_matches('/');
454 let model_path = cached.root.join(path);
455
456 if !model_path.exists() {
457 return Err(format!(
458 "Model path '{}' does not exist in cached repo {} at {}",
459 path, repo, cached.commit
460 )
461 .into());
462 }
463
464 let model = DataModel::from_markdown(&model_path)?;
465 Ok(model)
466 }
467
468 #[allow(clippy::result_large_err)]
486 pub fn from_markdown_string(content: &str) -> Result<Self, Validator> {
487 parse_markdown(content, None)
488 }
489
490 #[allow(clippy::result_large_err)]
497 pub fn from_json_schema(path: &Path) -> Result<Self, DataModelError> {
498 let content = fs::read_to_string(path)?;
499 let schema: SchemaObject = serde_json::from_str(&content)?;
500 let model: DataModel = schema
501 .try_into()
502 .expect("Could not convert schema to data model");
503
504 validate_model(&model).map_err(DataModelError::ValidationError)?;
506
507 Ok(model)
508 }
509
510 #[allow(clippy::result_large_err)]
517 pub fn from_json_schema_string(content: &str) -> Result<Self, DataModelError> {
518 let schema: SchemaObject = serde_json::from_str(content)?;
519 let model: DataModel = schema
520 .try_into()
521 .expect("Could not convert schema to data model");
522
523 validate_model(&model).map_err(DataModelError::ValidationError)?;
525
526 Ok(model)
527 }
528
529 #[allow(clippy::result_large_err)]
536 pub fn from_json_schema_object(schema: SchemaObject) -> Result<Self, DataModelError> {
537 let model: DataModel = schema
538 .try_into()
539 .expect("Could not convert schema to data model");
540
541 validate_model(&model).map_err(DataModelError::ValidationError)?;
543
544 Ok(model)
545 }
546}
547
548#[cfg(test)]
549mod tests {
550 use std::collections::BTreeMap;
551
552 use crate::attribute::DataType;
553
554 use super::*;
555 use pretty_assertions::assert_eq;
556
557 #[test]
558 fn test_merge() {
559 let mut model1 = DataModel::new(None, None);
561 let mut model2 = DataModel::new(None, None);
562
563 let mut obj1 = Object::new("Object1".to_string(), None);
564 obj1.add_attribute(crate::attribute::Attribute {
565 name: "test1".to_string(),
566 is_array: false,
567 is_id: false,
568 dtypes: vec!["string".to_string()],
569 docstring: "".to_string(),
570 options: vec![],
571 term: None,
572 required: false,
573 xml: None,
574 default: None,
575 is_enum: false,
576 position: None,
577 import_prefix: None,
578 });
579
580 let mut obj2 = Object::new("Object2".to_string(), None);
581 obj2.add_attribute(crate::attribute::Attribute {
582 name: "test2".to_string(),
583 is_array: false,
584 is_id: false,
585 dtypes: vec!["string".to_string()],
586 docstring: "".to_string(),
587 options: vec![],
588 term: None,
589 required: false,
590 xml: None,
591 default: None,
592 is_enum: false,
593 position: None,
594 import_prefix: None,
595 });
596
597 let enm1 = Enumeration {
598 name: "Enum1".to_string(),
599 mappings: BTreeMap::from([("key1".to_string(), "value1".to_string())]),
600 docstring: "".to_string(),
601 position: None,
602 };
603
604 let enm2 = Enumeration {
605 name: "Enum2".to_string(),
606 mappings: BTreeMap::from([("key2".to_string(), "value2".to_string())]),
607 docstring: "".to_string(),
608 position: None,
609 };
610
611 model1.objects.push(obj1);
612 model1.enums.push(enm1);
613 model2.objects.push(obj2);
614 model2.enums.push(enm2);
615
616 model1.merge(&model2);
618
619 assert_eq!(model1.objects.len(), 2);
621 assert_eq!(model1.enums.len(), 2);
622 assert_eq!(model1.objects[0].name, "Object1");
623 assert_eq!(model1.objects[1].name, "Object2");
624 assert_eq!(model1.enums[0].name, "Enum1");
625 assert_eq!(model1.enums[1].name, "Enum2");
626 }
627
628 #[test]
629 fn test_sort_attrs() {
630 let mut model = DataModel::new(None, None);
632 let mut obj = Object::new("Object1".to_string(), None);
633 obj.add_attribute(crate::attribute::Attribute {
634 name: "not_required".to_string(),
635 is_array: false,
636 is_id: false,
637 dtypes: vec!["string".to_string()],
638 docstring: "".to_string(),
639 options: vec![],
640 term: None,
641 required: false,
642 xml: None,
643 default: Some(DataType::String("".to_string())),
644 is_enum: false,
645 position: None,
646 import_prefix: None,
647 });
648
649 obj.add_attribute(crate::attribute::Attribute {
650 name: "required".to_string(),
651 is_array: false,
652 is_id: false,
653 dtypes: vec!["string".to_string()],
654 docstring: "".to_string(),
655 options: vec![],
656 term: None,
657 required: true,
658 xml: None,
659 default: None,
660 is_enum: false,
661 position: None,
662 import_prefix: None,
663 });
664
665 model.objects.push(obj);
666
667 model.sort_attrs();
669
670 assert_eq!(model.objects[0].attributes[0].name, "required");
672 assert_eq!(model.objects[0].attributes[1].name, "not_required");
673 }
674
675 #[test]
676 fn test_from_internal_schema() {
677 let path = Path::new("tests/data/expected_internal_schema.json");
679
680 let model = DataModel::from_internal_schema(path).expect("Failed to parse internal schema");
682
683 assert_eq!(model.objects.len(), 2);
685 assert_eq!(model.enums.len(), 1);
686 }
687
688 #[test]
689 fn test_from_markdown_w_html() {
690 let path = Path::new("tests/data/model_w_html.md");
692
693 let model = DataModel::from_markdown(path).expect("Failed to parse markdown");
695
696 assert_eq!(model.objects.len(), 2);
698 assert_eq!(model.enums.len(), 1);
699 }
700
701 #[test]
702 fn test_from_markdown_string() {
703 let path = Path::new("tests/data/model.md");
705 let content = fs::read_to_string(path).unwrap();
706
707 let model =
709 DataModel::from_markdown_string(content.as_str()).expect("Failed to parse markdown");
710
711 assert_eq!(model.objects.len(), 2);
713 assert_eq!(model.enums.len(), 1);
714 }
715}