1use std::collections::{BTreeMap, HashMap, HashSet};
31
32use regex::Regex;
33
34use crate::{
35 attribute::Attribute,
36 object::{Enumeration, Object},
37 option::AttrOption,
38 prelude::DataModel,
39};
40
41use super::schema::{EnumObject, PrimitiveType, Property, SchemaObject, SchemaType};
42
43const IGNORE_TYPES: [&str; 2] = ["object", "array"];
46
47impl TryFrom<SchemaObject> for DataModel {
52 type Error = Box<dyn std::error::Error>;
53
54 fn try_from(schema_obj: SchemaObject) -> Result<Self, Self::Error> {
55 let mut objects = vec![schema_obj.clone().try_into()?];
56 let mut enums = vec![];
57
58 for (name, definition) in schema_obj.definitions {
60 match definition {
61 SchemaType::Object(object) => {
62 let mut object: Object = object.try_into()?;
63 object.name = name;
64 objects.push(object);
65 }
66 SchemaType::Enum(enum_obj) => {
67 let mut enum_obj: Enumeration = enum_obj.try_into()?;
68 enum_obj.name = name;
69 enums.push(enum_obj);
70 }
71 }
72 }
73
74 Ok(DataModel {
75 objects,
76 enums,
77 name: Some(schema_obj.title),
78 ..Default::default()
79 })
80 }
81}
82
83impl TryFrom<SchemaObject> for Object {
88 type Error = Box<dyn std::error::Error>;
89
90 fn try_from(schema_obj: SchemaObject) -> Result<Self, Self::Error> {
91 let mut attributes = schema_obj
93 .properties
94 .into_iter()
95 .map(|(name, property)| {
96 let mut attribute: Attribute = property.try_into()?;
97 attribute.name = name.clone();
98 Ok(attribute)
99 })
100 .collect::<Result<Vec<Attribute>, Self::Error>>()?;
101
102 for required_attribute in schema_obj.required {
104 let attribute = attributes
105 .iter_mut()
106 .find(|attr| attr.name == required_attribute);
107 if let Some(attr) = attribute {
108 attr.required = true;
109 }
110 }
111
112 Ok(Object {
113 name: schema_obj.title,
114 attributes,
115 docstring: schema_obj.description.unwrap_or_default(),
116 term: None,
117 mixins: Vec::new(),
118 position: None,
119 })
120 }
121}
122
123impl TryFrom<Property> for Attribute {
131 type Error = Box<dyn std::error::Error>;
132
133 fn try_from(property: Property) -> Result<Self, Self::Error> {
134 let is_array = property
135 .dtype
136 .as_ref()
137 .is_some_and(|dtype| dtype.is_array());
138
139 let mut dtypes = HashSet::new();
140
141 if is_array {
143 if let Some(items) = &property.items {
147 dtypes.extend(
148 items
149 .get_types()
150 .into_iter()
151 .map(extract_reference)
152 .collect::<Result<Vec<String>, String>>()?,
153 );
154 }
155 } else if let Some(dtype) = &property.dtype {
156 dtypes.insert(extract_reference(dtype.to_string())?);
158 }
159
160 if let Some(reference) = &property.reference {
162 dtypes.insert(extract_reference(reference.clone())?);
163 }
164
165 if let Some(one_of) = property.one_of {
167 for item in one_of.iter() {
168 dtypes.extend(
169 item.get_types()
170 .into_iter()
171 .map(extract_reference)
172 .collect::<Result<Vec<String>, String>>()?,
173 );
174 }
175 }
176
177 if let Some(all_of) = property.all_of {
178 if all_of.len() == 1 {
179 dtypes.extend(
180 all_of[0]
181 .get_types()
182 .into_iter()
183 .map(extract_reference)
184 .collect::<Result<Vec<String>, String>>()?,
185 );
186 } else {
187 return Err("allOf with multiple items is not supported yet".into());
188 }
189 }
190
191 Ok(Attribute {
192 name: property.title.unwrap_or("MISSING_TITLE".to_string()),
193 is_array,
194 dtypes: dtypes
195 .into_iter()
196 .filter(|dtype| !IGNORE_TYPES.contains(&dtype.as_str()))
197 .collect::<Vec<String>>(),
198 is_id: false,
199 docstring: property.description.unwrap_or_default(),
200 options: parse_options(&property.options)?,
201 term: property.term,
202 required: false,
203 default: None,
204 xml: None,
205 is_enum: false,
206 position: None,
207 import_prefix: None,
208 })
209 }
210}
211
212impl TryFrom<EnumObject> for Enumeration {
217 type Error = Box<dyn std::error::Error>;
218
219 fn try_from(enum_obj: EnumObject) -> Result<Self, Self::Error> {
220 let mappings = enum_obj
221 .enum_values
222 .iter()
223 .enumerate()
224 .map(|(i, value)| {
225 if is_valid_key(value) {
226 (value.clone().to_uppercase(), value.clone())
228 } else if value.len() < 15 {
229 let cleaned_key = clean_key(value);
231 (cleaned_key.to_uppercase(), value.clone())
232 } else {
233 (format!("VALUE_{i}"), value.clone())
234 }
235 })
236 .collect::<BTreeMap<String, String>>();
237
238 Ok(Enumeration {
239 name: enum_obj.title,
240 docstring: enum_obj.description.unwrap_or_default(),
241 position: None,
242 mappings,
243 })
244 }
245}
246
247fn clean_key(key: &str) -> String {
248 let cleaned_key = key.replace(|c: char| !c.is_alphanumeric(), "_");
249 let pattern = Regex::new(r"_+").unwrap();
250 let mut cleaned_key = pattern.replace_all(&cleaned_key, "_").to_string();
251
252 if !cleaned_key.starts_with(|c: char| c.is_alphabetic()) {
254 cleaned_key = cleaned_key[1..].to_string();
255 }
256
257 cleaned_key.to_uppercase()
258}
259
260fn parse_options(
265 options: &HashMap<String, PrimitiveType>,
266) -> Result<Vec<AttrOption>, Box<dyn std::error::Error>> {
267 let mut parsed_options = Vec::new();
268
269 for (key, value) in options {
270 let option = AttrOption::from_pair(key, value.to_string().as_str())?;
271 parsed_options.push(option);
272 }
273
274 Ok(parsed_options)
275}
276
277fn extract_reference(reference: String) -> Result<String, String> {
282 reference
283 .split('/')
284 .next_back()
285 .filter(|s| !s.is_empty())
286 .map(ToString::to_string)
287 .ok_or_else(|| "Invalid reference format".to_string())
288}
289
290fn is_valid_key(s: &str) -> bool {
297 if s.is_empty() {
298 return false;
299 }
300
301 let first_char = s.chars().next().unwrap();
303 if !first_char.is_alphabetic() && first_char != '_' {
304 return false;
305 }
306
307 s.chars().all(|c| c.is_alphanumeric() || c == '_')
309}
310
311#[cfg(test)]
312mod tests {
313 use serde_json::json;
314
315 use super::*;
316
317 #[test]
328 fn test_parse_schema() {
329 let schema = json!({
330 "$schema": "https://json-schema.org/draft/2020-12/schema",
331 "$id": "https://www.github.com/my/repo/",
332 "title": "Test",
333 "type": "object",
334 "properties": {
335 "array_valued": {
336 "title": "array_valued",
337 "type": "array",
338 "$term": "http://schema.org/something",
339 "items": {
340 "$ref": "#/$defs/Test2"
341 }
342 },
343 "multiple_types": {
344 "title": "multiple_types",
345 "oneOf": [
346 {
347 "type": "number"
348 },
349 {
350 "$ref": "#/$defs/Test2"
351 }
352 ]
353 },
354 "multiple_types_array": {
355 "title": "multiple_types_array",
356 "type": "array",
357 "items": {
358 "oneOf": [
359 {
360 "type": "number"
361 },
362 {
363 "$ref": "#/$defs/Test2"
364 }
365 ]
366 }
367 },
368 "name": {
369 "title": "name",
370 "type": "string",
371 "default": "test",
372 "description": "A test description",
373 "$term": "http://schema.org/hello"
374 },
375 "number": {
376 "title": "number",
377 "type": "number",
378 "$term": "http://schema.org/one",
379 "minimum": 0.0
380 },
381 "ontology": {
382 "title": "ontology",
383 "$ref": "#/$defs/Ontology"
384 },
385 "single_valued": {
386 "title": "single_valued",
387 "type": "object",
388 "$ref": "#/$defs/Test2"
389 }
390 },
391 "$defs": {
392 "Ontology": {
393 "title": "Ontology",
394 "type": "string",
395 "enum": [
396 "https://www.evidenceontology.org/term/",
397 "https://amigo.geneontology.org/amigo/term/",
398 "http://semanticscience.org/resource/"
399 ]
400 },
401 "Test2": {
402 "title": "Test2",
403 "type": "object",
404 "properties": {
405 "names": {
406 "title": "names",
407 "type": "array",
408 "$term": "http://schema.org/hello",
409 "items": {
410 "type": "string"
411 }
412 },
413 "number": {
414 "title": "number",
415 "type": "number",
416 "$term": "http://schema.org/one",
417 "minimum": 0.0
418 }
419 },
420 "required": [],
421 "additionalProperties": false
422 },
423 "no_title_and_no_required": {
424 "type": "object",
425 "properties": {
426 "val": {
427 "type": "string"
428 }
429 }
430 }
431 },
432 "required": [
433 "name"
434 ],
435 "additionalProperties": false
436 });
437
438 let schema: SchemaObject = serde_json::from_value(schema).expect("Failed to parse schema");
439 let data_model =
440 DataModel::try_from(schema).expect("Failed to convert schema to data model");
441
442 assert_eq!(data_model.name, Some("Test".to_string()));
443 assert_eq!(data_model.objects.len(), 3);
444 assert_eq!(data_model.enums.len(), 1);
445
446 let root = data_model
448 .objects
449 .iter()
450 .find(|object| object.name == "Test")
451 .expect("Root object not found");
452
453 assert_eq!(root.attributes.len(), 7);
454 assert_eq!(root.attributes[0].name, "array_valued");
455 assert_eq!(root.attributes[1].name, "multiple_types");
456 assert_eq!(root.attributes[2].name, "multiple_types_array");
457 assert_eq!(root.attributes[3].name, "name");
458 assert_eq!(root.attributes[4].name, "number");
459 assert_eq!(root.attributes[5].name, "ontology");
460 assert_eq!(root.attributes[6].name, "single_valued");
461
462 let test2 = data_model
464 .objects
465 .iter()
466 .find(|object| object.name == "Test2")
467 .expect("Test2 object not found");
468
469 assert_eq!(test2.attributes.len(), 2);
470 assert_eq!(test2.attributes[0].name, "names");
471 assert_eq!(test2.attributes[1].name, "number");
472
473 let names_attr = &test2.attributes[0];
475 assert!(names_attr.is_array);
476 assert_eq!(names_attr.dtypes, vec!["string"]);
477 assert_eq!(names_attr.term, Some("http://schema.org/hello".to_string()));
478
479 let number_attr = &test2.attributes[1];
480 assert!(!number_attr.is_array);
481 assert_eq!(number_attr.dtypes, vec!["number"]);
482 assert_eq!(number_attr.term, Some("http://schema.org/one".to_string()));
483
484 let no_title_and_no_required = data_model
486 .objects
487 .iter()
488 .find(|object| object.name == "no_title_and_no_required")
489 .expect("no_title_and_no_required object not found");
490
491 assert!(!no_title_and_no_required.name.is_empty());
492 assert_eq!(no_title_and_no_required.attributes.len(), 1);
493 assert_eq!(no_title_and_no_required.attributes[0].name, "val");
494
495 let ontology = data_model
497 .enums
498 .iter()
499 .find(|e| e.name == "Ontology")
500 .expect("Ontology enum not found");
501
502 assert_eq!(ontology.mappings.len(), 3);
503 assert_eq!(
504 ontology.mappings["VALUE_0"],
505 "https://www.evidenceontology.org/term/"
506 );
507 assert_eq!(
508 ontology.mappings["VALUE_1"],
509 "https://amigo.geneontology.org/amigo/term/"
510 );
511 assert_eq!(
512 ontology.mappings["VALUE_2"],
513 "http://semanticscience.org/resource/"
514 );
515
516 let array_valued = &root.attributes[0];
518 assert!(array_valued.is_array);
519 assert_eq!(array_valued.dtypes, vec!["Test2"]);
520 assert_eq!(
521 array_valued.term,
522 Some("http://schema.org/something".to_string())
523 );
524
525 let multiple_types = &root.attributes[1];
526 assert!(!multiple_types.is_array);
527 let multiple_types_dtypes: HashSet<_> = multiple_types.dtypes.iter().collect();
528 assert_eq!(
529 multiple_types_dtypes,
530 HashSet::from([&"number".to_string(), &"Test2".to_string()])
531 );
532
533 let name_attr = &root.attributes[3];
534 assert!(name_attr.required);
535 assert_eq!(name_attr.term, Some("http://schema.org/hello".to_string()));
536 }
537
538 #[test]
543 fn test_parse_property() {
544 let property = json!({
545 "title": "number",
546 "type": "number",
547 "$term": "http://schema.org/one",
548 "minimum": 0.0,
549 "description": "test"
550 });
551
552 let property: Property = serde_json::from_value(property).unwrap();
553 let attribute = Attribute::try_from(property).unwrap();
554 assert_eq!(attribute.name, "number");
555 assert_eq!(attribute.dtypes, vec!["number"]);
556 assert_eq!(attribute.docstring, "test");
557 assert_eq!(attribute.term, Some("http://schema.org/one".to_string()));
558 assert!(!attribute.required);
559 assert_eq!(attribute.default, None);
560 assert!(!attribute.is_array);
561 assert_eq!(attribute.xml, None);
562 assert!(!attribute.is_enum);
563 assert_eq!(attribute.position, None);
564 assert_eq!(attribute.import_prefix, None);
565 }
566
567 #[test]
572 fn test_parse_property_with_one_of() {
573 let property = json!({
574 "title": "number",
575 "oneOf": [
576 {
577 "type": "number"
578 },
579 {
580 "type": "string"
581 }
582 ]
583 });
584
585 let property: Property = serde_json::from_value(property).unwrap();
586 let attribute = Attribute::try_from(property).unwrap();
587
588 assert_eq!(attribute.name, "number");
589 assert_eq!(
590 attribute.dtypes.into_iter().collect::<HashSet<_>>(),
591 vec!["number".to_string(), "string".to_string()]
592 .into_iter()
593 .collect::<HashSet<_>>()
594 );
595 assert_eq!(attribute.docstring, "");
596 assert_eq!(attribute.term, None);
597 assert!(!attribute.required);
598 assert_eq!(attribute.default, None);
599 assert_eq!(attribute.xml, None);
600 assert!(!attribute.is_array);
601 assert!(!attribute.is_enum);
602 assert_eq!(attribute.position, None);
603 assert_eq!(attribute.import_prefix, None);
604 }
605
606 #[test]
611 fn test_parse_property_with_one_of_mixed() {
612 let property = json!({
613 "title": "number",
614 "oneOf": [
615 {
616 "$ref": "#/$defs/Test"
617 },
618 {
619 "type": "string"
620 }
621 ]
622 });
623
624 let property: Property = serde_json::from_value(property).unwrap();
625 let attribute = Attribute::try_from(property).unwrap();
626
627 assert_eq!(attribute.name, "number");
628 assert_eq!(
629 attribute.dtypes.into_iter().collect::<HashSet<_>>(),
630 vec!["Test".to_string(), "string".to_string()]
631 .into_iter()
632 .collect::<HashSet<_>>()
633 );
634 assert_eq!(attribute.docstring, "");
635 assert_eq!(attribute.term, None);
636 assert!(!attribute.required);
637 assert_eq!(attribute.default, None);
638 assert_eq!(attribute.xml, None);
639 assert!(!attribute.is_array);
640 assert!(!attribute.is_enum);
641 assert_eq!(attribute.position, None);
642 assert_eq!(attribute.import_prefix, None);
643 }
644
645 #[test]
650 #[should_panic]
651 fn test_parse_property_with_all_of() {
652 let property = json!({
653 "title": "number",
654 "allOf": [
655 {
656 "type": "number"
657 },
658 {
659 "type": "string"
660 }
661 ]
662 });
663
664 let property: Property = serde_json::from_value(property).unwrap();
665 Attribute::try_from(property).unwrap();
666 }
667
668 #[test]
673 fn test_parse_property_with_reference() {
674 let property = json!({
675 "title": "number",
676 "$ref": "#/$defs/Test"
677 });
678
679 let property: Property = serde_json::from_value(property).unwrap();
680 let attribute = Attribute::try_from(property).unwrap();
681
682 assert_eq!(attribute.name, "number");
683 assert_eq!(attribute.dtypes, vec!["Test".to_string()]);
684 assert_eq!(attribute.docstring, "");
685 assert_eq!(attribute.term, None);
686 assert!(!attribute.required);
687 assert_eq!(attribute.default, None);
688 assert!(!attribute.is_array);
689 assert_eq!(attribute.xml, None);
690 assert!(!attribute.is_enum);
691 assert_eq!(attribute.position, None);
692 assert_eq!(attribute.import_prefix, None);
693 }
694
695 #[test]
700 fn test_parse_property_array_without_reference() {
701 let property = json!({
702 "title": "number",
703 "type": "array",
704 "items": {
705 "type": "string"
706 }
707 });
708
709 let property: Property = serde_json::from_value(property).unwrap();
710 let attribute = Attribute::try_from(property).unwrap();
711
712 assert_eq!(attribute.name, "number");
713 assert_eq!(attribute.dtypes, vec!["string".to_string()]);
714 assert_eq!(attribute.docstring, "");
715 assert_eq!(attribute.term, None);
716 assert!(!attribute.required);
717 assert_eq!(attribute.default, None);
718 assert!(attribute.is_array);
719 assert_eq!(attribute.xml, None);
720 assert!(!attribute.is_enum);
721 assert_eq!(attribute.position, None);
722 assert_eq!(attribute.import_prefix, None);
723 }
724
725 #[test]
730 fn test_extract_reference_from_array() {
731 let property = json!({
732 "title": "number",
733 "type": "array",
734 "items": {
735 "type": "string"
736 }
737 });
738
739 let property: Property = serde_json::from_value(property).unwrap();
740 let attribute = Attribute::try_from(property).unwrap();
741
742 assert_eq!(attribute.name, "number");
743 assert_eq!(attribute.dtypes, vec!["string".to_string()]);
744 assert_eq!(attribute.docstring, "");
745 assert_eq!(attribute.term, None);
746 assert!(!attribute.required);
747 assert_eq!(attribute.default, None);
748 assert!(attribute.is_array);
749 assert_eq!(attribute.xml, None);
750 assert!(!attribute.is_enum);
751 assert_eq!(attribute.position, None);
752 assert_eq!(attribute.import_prefix, None);
753 }
754
755 #[test]
760 fn test_extract_reference_from_one_of() {
761 let property = json!({
762 "title": "number",
763 "oneOf": [
764 {
765 "$ref": "#/$defs/Test"
766 },
767 {
768 "type": "string"
769 }
770 ]
771 });
772
773 let property: Property = serde_json::from_value(property).unwrap();
774 let attribute = Attribute::try_from(property).unwrap();
775
776 assert_eq!(attribute.name, "number");
777 assert_eq!(
778 attribute.dtypes.into_iter().collect::<HashSet<_>>(),
779 vec!["Test".to_string(), "string".to_string()]
780 .into_iter()
781 .collect::<HashSet<_>>()
782 );
783 }
784
785 #[test]
790 fn test_parse_object() {
791 let object = json!({
792 "title": "Test",
793 "type": "object",
794 "properties": {
795 "number": {
796 "title": "number",
797 "type": "number"
798 },
799 "string": {
800 "name": "string",
801 "type": "string"
802 }
803 },
804 "required": ["number"]
805 });
806
807 let object: SchemaObject = serde_json::from_value(object).unwrap();
808 let data_model = Object::try_from(object).unwrap();
809
810 assert_eq!(data_model.name, "Test");
811 assert_eq!(data_model.attributes.len(), 2);
812 assert_eq!(data_model.attributes[0].name, "number");
813 assert_eq!(data_model.attributes[1].name, "string");
814
815 let attribute1 = data_model.attributes[0].clone();
816
817 assert_eq!(attribute1.name, "number");
818 assert_eq!(attribute1.dtypes, vec!["number"]);
819 assert_eq!(attribute1.docstring, "");
820 assert_eq!(attribute1.term, None);
821 assert!(attribute1.required);
822 assert_eq!(attribute1.default, None);
823 assert!(!attribute1.is_array);
824
825 let attribute2 = data_model.attributes[1].clone();
826
827 assert_eq!(attribute2.name, "string");
828 assert_eq!(attribute2.dtypes, vec!["string"]);
829 assert_eq!(attribute2.docstring, "");
830 assert_eq!(attribute2.term, None);
831 assert!(!attribute2.required);
832 assert_eq!(attribute2.default, None);
833 assert!(!attribute2.is_array);
834 }
835
836 #[test]
841 fn test_parse_enum() {
842 let enum_obj = json!({
843 "title": "Test",
844 "type": "string",
845 "enum": ["value1", "value2", "value3"]
846 });
847
848 let enum_obj: EnumObject = serde_json::from_value(enum_obj).unwrap();
849 let enumeration = Enumeration::try_from(enum_obj).unwrap();
850
851 assert_eq!(enumeration.name, "Test");
852 assert_eq!(enumeration.mappings.len(), 3);
853 assert_eq!(enumeration.mappings["VALUE1"], "value1");
854 assert_eq!(enumeration.mappings["VALUE2"], "value2");
855 assert_eq!(enumeration.mappings["VALUE3"], "value3");
856 }
857
858 #[test]
863 fn test_parse_enum_with_special_characters() {
864 let enum_obj = json!({
865 "title": "Test",
866 "type": "string",
867 "enum": ["https://www.evidenceontology.org/term/", "https://amigo.geneontology.org/amigo/term/", "http://semanticscience.org/resource/"]
868 });
869
870 let enum_obj: EnumObject = serde_json::from_value(enum_obj).unwrap();
871 let enumeration = Enumeration::try_from(enum_obj).unwrap();
872
873 assert_eq!(enumeration.name, "Test");
874 assert_eq!(enumeration.mappings.len(), 3);
875 assert_eq!(
876 enumeration.mappings["VALUE_0"],
877 "https://www.evidenceontology.org/term/"
878 );
879 assert_eq!(
880 enumeration.mappings["VALUE_1"],
881 "https://amigo.geneontology.org/amigo/term/"
882 );
883 assert_eq!(
884 enumeration.mappings["VALUE_2"],
885 "http://semanticscience.org/resource/"
886 );
887 }
888
889 #[test]
894 fn test_extract_reference() {
895 assert_eq!(
896 extract_reference("#/$defs/Test".to_string()),
897 Ok("Test".to_string())
898 );
899 assert_eq!(
900 extract_reference("Test".to_string()),
901 Ok("Test".to_string())
902 );
903 assert_eq!(
904 extract_reference("".to_string()),
905 Err("Invalid reference format".to_string())
906 );
907 }
908
909 #[test]
910 fn test_enzml_schema() {
911 let schema_path = "tests/data/old_schema.json";
913 let schema = std::fs::read_to_string(schema_path).expect("Failed to read schema");
914 let schema: SchemaObject = serde_json::from_str(&schema).expect("Failed to parse schema");
915
916 let data_model =
918 DataModel::try_from(schema).expect("Failed to convert schema to data model");
919
920 assert_eq!(data_model.objects.len(), 14);
922 assert_eq!(data_model.enums.len(), 2);
923 }
924
925 #[test]
926 fn test_clean_key() {
927 assert_eq!(clean_key("Test:Hello"), "TEST_HELLO");
928 assert_eq!(clean_key("Test::Hello"), "TEST_HELLO");
929 assert_eq!(clean_key("Test_Hello"), "TEST_HELLO");
930 assert_eq!(clean_key("Test__Hello"), "TEST_HELLO");
931 assert_eq!(clean_key("!Test"), "TEST");
932 }
933
934 #[test]
935 fn test_additional_properties_object() {
936 let schema = json!({
937 "title": "Test",
938 "type": "object",
939 "properties": {
940 "test": {
941 "type": "string"
942 }
943 },
944 "additionalProperties": {
945 "type": "string"
946 }
947 });
948
949 let schema: SchemaObject = serde_json::from_value(schema).unwrap();
950 let data_model = DataModel::try_from(schema.clone()).unwrap();
951
952 assert!(schema.additional_properties);
953 assert_eq!(data_model.objects.len(), 1);
954 assert_eq!(data_model.objects[0].attributes.len(), 1);
955 }
956}