Skip to main content

mdmodels_core/json/
import.rs

1/*
2 * Copyright (c) 2025 Jan Range, Felix Neubauer
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 * THE SOFTWARE.
21 *
22 */
23
24//! JSON Schema import functionality for converting JSON Schema objects to DataModel structures.
25//!
26//! This module provides the implementation for converting JSON Schema objects to the internal
27//! data model representation. It handles the conversion of schema objects, properties, and enumerations
28//! to their corresponding data model types.
29
30use std::collections::{BTreeMap, HashMap, HashSet};
31
32use convert_case::{Case, Casing};
33use regex::Regex;
34
35use crate::{
36    attribute::{self, Attribute},
37    object::{Enumeration, Object},
38    option::AttrOption,
39    prelude::DataModel,
40};
41
42use super::schema::{EnumObject, PrimitiveType, Property, SchemaObject, SchemaType};
43
44/// Types that should be ignored when processing data types
45/// "object" and "array" are container types and not actual data types
46const IGNORE_TYPES: [&str; 2] = ["object", "array"];
47
48/// Converts a JSON Schema object to a DataModel
49///
50/// This implementation handles the conversion of the root schema object and all its definitions
51/// to the corresponding DataModel structure, including objects and enumerations.
52impl TryFrom<SchemaObject> for DataModel {
53    type Error = Box<dyn std::error::Error>;
54
55    fn try_from(schema_obj: SchemaObject) -> Result<Self, Self::Error> {
56        let mut nested_objects = Vec::new();
57        let mut nested_enums = Vec::new();
58        let root =
59            schema_object_to_object(schema_obj.clone(), &mut nested_objects, &mut nested_enums)?;
60        let mut objects = vec![root];
61        objects.extend(nested_objects);
62        let mut enums = nested_enums;
63
64        // Process all definitions in the schema
65        for (name, definition) in schema_obj.definitions {
66            match definition {
67                SchemaType::Object(object) => {
68                    let mut object: Object = object.try_into()?;
69                    object.name = name;
70                    objects.push(object);
71                }
72                SchemaType::Enum(enum_obj) => {
73                    let mut enum_obj: Enumeration = enum_obj.try_into()?;
74                    enum_obj.name = name;
75                    enums.push(enum_obj);
76                }
77            }
78        }
79
80        Ok(DataModel {
81            objects,
82            enums,
83            name: Some(schema_obj.title),
84            ..Default::default()
85        })
86    }
87}
88
89/// Converts a JSON Schema object to an Object
90///
91/// This implementation handles the conversion of a schema object's properties
92/// to attributes, and processes required fields.
93impl TryFrom<SchemaObject> for Object {
94    type Error = Box<dyn std::error::Error>;
95
96    fn try_from(schema_obj: SchemaObject) -> Result<Self, Self::Error> {
97        schema_object_to_object(schema_obj, &mut Vec::new(), &mut Vec::new())
98    }
99}
100
101/// Converts a JSON Schema property to an Attribute
102///
103/// This implementation handles various property types including:
104/// - Array properties with items
105/// - Properties with direct data types
106/// - Properties with references
107/// - Properties with oneOf (multiple possible types)
108impl TryFrom<Property> for Attribute {
109    type Error = Box<dyn std::error::Error>;
110
111    fn try_from(property: Property) -> Result<Self, Self::Error> {
112        let name = property
113            .title
114            .clone()
115            .unwrap_or_else(|| "MISSING_TITLE".to_string());
116        property_to_attribute(&name, property, None, &mut Vec::new(), &mut Vec::new())
117    }
118}
119
120fn schema_object_to_object(
121    schema_obj: SchemaObject,
122    nested_objects: &mut Vec<Object>,
123    nested_enums: &mut Vec<Enumeration>,
124) -> Result<Object, Box<dyn std::error::Error>> {
125    let object_name = schema_obj.title.clone();
126
127    let mut attributes = schema_obj
128        .properties
129        .into_iter()
130        .map(|(name, property)| {
131            let mut attribute =
132                property_to_attribute(&name, property, None, nested_objects, nested_enums)?;
133            attribute.name = name;
134            Ok(attribute)
135        })
136        .collect::<Result<Vec<Attribute>, Box<dyn std::error::Error>>>()?;
137
138    apply_required_fields(&mut attributes, &schema_obj.required);
139
140    Ok(Object {
141        name: object_name,
142        attributes,
143        docstring: schema_obj.description.unwrap_or_default(),
144        term: None,
145        mixins: Vec::new(),
146        position: None,
147    })
148}
149
150fn inline_object_to_object(
151    property_name: &str,
152    property: Property,
153    parent_property: Option<&str>,
154    nested_objects: &mut Vec<Object>,
155    nested_enums: &mut Vec<Enumeration>,
156) -> Result<Object, Box<dyn std::error::Error>> {
157    let object_name =
158        synthetic_type_name(property_name, property.title.as_deref(), parent_property);
159
160    let mut attributes = property
161        .properties
162        .into_iter()
163        .map(|(name, nested_property)| {
164            let mut attribute = property_to_attribute(
165                &name,
166                nested_property,
167                Some(property_name),
168                nested_objects,
169                nested_enums,
170            )?;
171            attribute.name = name;
172            Ok(attribute)
173        })
174        .collect::<Result<Vec<Attribute>, Box<dyn std::error::Error>>>()?;
175
176    apply_required_fields(&mut attributes, &property.required);
177
178    Ok(Object {
179        name: object_name,
180        attributes,
181        docstring: property.description.unwrap_or_default(),
182        term: property.term,
183        mixins: Vec::new(),
184        position: None,
185    })
186}
187
188fn apply_required_fields(attributes: &mut [Attribute], required: &[String]) {
189    for required_attribute in required {
190        if let Some(attr) = attributes
191            .iter_mut()
192            .find(|attr| attr.name == *required_attribute)
193        {
194            attr.required = true;
195        }
196    }
197}
198
199fn synthetic_type_name(
200    property_name: &str,
201    title: Option<&str>,
202    parent_property: Option<&str>,
203) -> String {
204    let base = title
205        .filter(|value| !value.is_empty())
206        .map(|value| {
207            value
208                .chars()
209                .filter(|c| !c.is_whitespace())
210                .collect::<String>()
211        })
212        .unwrap_or_else(|| property_name.to_case(Case::Pascal));
213
214    match parent_property {
215        Some(parent) => format!("{}{}", parent.to_case(Case::Pascal), base),
216        None => base,
217    }
218}
219
220fn property_to_attribute(
221    property_name: &str,
222    property: Property,
223    parent_property: Option<&str>,
224    nested_objects: &mut Vec<Object>,
225    nested_enums: &mut Vec<Enumeration>,
226) -> Result<Attribute, Box<dyn std::error::Error>> {
227    if property.has_inline_object() {
228        let docstring = property.description.clone().unwrap_or_default();
229        let term = property.term.clone();
230        let options = parse_options(&property.options)?;
231        let object = inline_object_to_object(
232            property_name,
233            property,
234            parent_property,
235            nested_objects,
236            nested_enums,
237        )?;
238        let type_name = object.name.clone();
239        nested_objects.push(object);
240
241        return Ok(Attribute {
242            name: property_name.to_string(),
243            is_array: false,
244            dtypes: vec![type_name],
245            is_id: false,
246            docstring,
247            options,
248            term,
249            required: false,
250            default: None,
251            xml: None,
252            is_enum: false,
253            position: None,
254            import_prefix: None,
255        });
256    }
257
258    let is_array = property
259        .dtype
260        .as_ref()
261        .is_some_and(|dtype| dtype.is_array());
262
263    let mut dtypes = HashSet::new();
264    let mut is_enum = false;
265
266    if is_array {
267        if let Some(items) = &property.items {
268            if let Some(item_property) = items.as_property() {
269                if item_property.has_inline_object() {
270                    let object = inline_object_to_object(
271                        property_name,
272                        item_property.clone(),
273                        parent_property,
274                        nested_objects,
275                        nested_enums,
276                    )?;
277                    let type_name = object.name.clone();
278                    nested_objects.push(object);
279                    dtypes.insert(type_name);
280                } else if let Some(values) = enum_values(item_property) {
281                    // Array of inline enum values: synthesize a named enumeration.
282                    let enum_name = make_inline_enum(
283                        property_name,
284                        parent_property,
285                        values,
286                        item_property.description.as_deref(),
287                        nested_enums,
288                    );
289                    dtypes.insert(enum_name);
290                    is_enum = true;
291                } else {
292                    dtypes.extend(
293                        items
294                            .get_types()
295                            .into_iter()
296                            .map(extract_reference)
297                            .collect::<Result<Vec<String>, String>>()?,
298                    );
299                }
300            } else {
301                dtypes.extend(
302                    items
303                        .get_types()
304                        .into_iter()
305                        .map(extract_reference)
306                        .collect::<Result<Vec<String>, String>>()?,
307                );
308            }
309        }
310    } else if let Some(values) = enum_values(&property) {
311        // Inline enum values (e.g. `{ "type": "string", "enum": [...] }`) are
312        // converted into a named enumeration instead of being flattened to a plain
313        // string, so the allowed values are preserved.
314        let enum_name = make_inline_enum(
315            property_name,
316            parent_property,
317            values,
318            property.description.as_deref(),
319            nested_enums,
320        );
321        dtypes.insert(enum_name);
322        is_enum = true;
323    } else if let Some(dtype) = &property.dtype {
324        dtypes.insert(extract_reference(dtype.to_string())?);
325    }
326
327    if let Some(reference) = &property.reference {
328        dtypes.insert(extract_reference(reference.clone())?);
329    }
330
331    if let Some(one_of) = property.one_of {
332        for item in one_of.iter() {
333            dtypes.extend(
334                item.get_types()
335                    .into_iter()
336                    .map(extract_reference)
337                    .collect::<Result<Vec<String>, String>>()?,
338            );
339        }
340    }
341
342    if let Some(all_of) = property.all_of {
343        if all_of.len() == 1 {
344            dtypes.extend(
345                all_of[0]
346                    .get_types()
347                    .into_iter()
348                    .map(extract_reference)
349                    .collect::<Result<Vec<String>, String>>()?,
350            );
351        } else {
352            return Err("allOf with multiple items is not supported yet".into());
353        }
354    }
355
356    Ok(Attribute {
357        name: property.title.unwrap_or_else(|| property_name.to_string()),
358        is_array,
359        dtypes: dtypes
360            .into_iter()
361            .filter(|dtype| !IGNORE_TYPES.contains(&dtype.as_str()))
362            .collect::<Vec<String>>(),
363        is_id: false,
364        docstring: property.description.unwrap_or_default(),
365        options: parse_options(&property.options)?,
366        term: property.term,
367        required: false,
368        default: property.default.map(|p| primitive_to_datatype(&p)),
369        xml: None,
370        is_enum,
371        position: None,
372        import_prefix: None,
373    })
374}
375
376/// Converts a JSON Schema enum object to an Enumeration
377///
378/// This implementation handles the conversion of enum values to mappings,
379/// escaping invalid keys as needed.
380impl TryFrom<EnumObject> for Enumeration {
381    type Error = Box<dyn std::error::Error>;
382
383    fn try_from(enum_obj: EnumObject) -> Result<Self, Self::Error> {
384        Ok(Enumeration {
385            name: enum_obj.title,
386            docstring: enum_obj.description.unwrap_or_default(),
387            position: None,
388            mappings: enum_values_to_mappings(&enum_obj.enum_values),
389        })
390    }
391}
392
393/// Builds the `{KEY: value}` mappings for an enumeration from its raw values,
394/// escaping values that are not valid identifier keys.
395fn enum_values_to_mappings(values: &[String]) -> BTreeMap<String, String> {
396    values
397        .iter()
398        .enumerate()
399        .map(|(i, value)| {
400            if is_valid_key(value) {
401                // If there are no special characters, we can use the value as is
402                (value.clone().to_uppercase(), value.clone())
403            } else if value.len() < 15 {
404                // If there are special characters, we need to escape them
405                let cleaned_key = clean_key(value);
406                (cleaned_key.to_uppercase(), value.clone())
407            } else {
408                (format!("VALUE_{i}"), value.clone())
409            }
410        })
411        .collect::<BTreeMap<String, String>>()
412}
413
414/// Returns the inline enum values of a property, if it declares a non-empty
415/// `enum` list.
416fn enum_values(property: &Property) -> Option<&[String]> {
417    property
418        .enum_values
419        .as_deref()
420        .filter(|values| !values.is_empty())
421}
422
423/// Synthesizes a named enumeration from inline enum values and records it in the
424/// accumulator, returning the enumeration's name.
425///
426/// Identical value sets are de-duplicated (the existing enum's name is reused),
427/// and name collisions between distinct value sets are disambiguated with a
428/// numeric suffix.
429fn make_inline_enum(
430    property_name: &str,
431    parent_property: Option<&str>,
432    values: &[String],
433    docstring: Option<&str>,
434    nested_enums: &mut Vec<Enumeration>,
435) -> String {
436    let mappings = enum_values_to_mappings(values);
437
438    // Reuse an existing enumeration with the exact same mappings.
439    if let Some(existing) = nested_enums.iter().find(|e| e.mappings == mappings) {
440        return existing.name.clone();
441    }
442
443    let base_name = synthetic_type_name(property_name, None, parent_property);
444    let mut name = base_name.clone();
445    let mut suffix = 2;
446    while nested_enums.iter().any(|e| e.name == name) {
447        name = format!("{base_name}{suffix}");
448        suffix += 1;
449    }
450
451    nested_enums.push(Enumeration {
452        name: name.clone(),
453        docstring: docstring.unwrap_or_default().to_string(),
454        position: None,
455        mappings,
456    });
457
458    name
459}
460
461/// Converts a JSON Schema primitive default value into the model's `DataType`.
462///
463/// String defaults are stored quoted to match the representation produced by the
464/// markdown parser, so both import paths agree.
465fn primitive_to_datatype(value: &PrimitiveType) -> attribute::DataType {
466    match value {
467        PrimitiveType::String(s) => attribute::DataType::String(format!("\"{s}\"")),
468        PrimitiveType::Number(n) if n.fract() == 0.0 && n.is_finite() => {
469            attribute::DataType::Integer(*n as i64)
470        }
471        PrimitiveType::Number(n) => attribute::DataType::Float(*n),
472        PrimitiveType::Integer(i) => attribute::DataType::Integer(*i),
473        PrimitiveType::Boolean(b) => attribute::DataType::Boolean(*b),
474    }
475}
476
477fn clean_key(key: &str) -> String {
478    let cleaned_key = key.replace(|c: char| !c.is_alphanumeric(), "_");
479    let pattern = Regex::new(r"_+").unwrap();
480    let mut cleaned_key = pattern.replace_all(&cleaned_key, "_").to_string();
481
482    // If the first character is not a letter, remove it
483    if !cleaned_key.starts_with(|c: char| c.is_alphabetic()) {
484        cleaned_key = cleaned_key[1..].to_string();
485    }
486
487    cleaned_key.to_uppercase()
488}
489
490/// Parses JSON Schema options into AttrOption objects
491///
492/// This function converts the key-value pairs from the JSON Schema options
493/// into AttrOption objects that can be used in the data model.
494fn parse_options(
495    options: &HashMap<String, PrimitiveType>,
496) -> Result<Vec<AttrOption>, Box<dyn std::error::Error>> {
497    let mut parsed_options = Vec::new();
498
499    for (key, value) in options {
500        let option = AttrOption::from_pair(key, value.to_string().as_str())?;
501        parsed_options.push(option);
502    }
503
504    Ok(parsed_options)
505}
506
507/// Extracts the reference name from a JSON Schema reference string
508///
509/// This function takes a reference string (e.g., "#/$defs/Test") and extracts
510/// the actual type name (e.g., "Test").
511fn extract_reference(reference: String) -> Result<String, String> {
512    reference
513        .split('/')
514        .next_back()
515        .filter(|s| !s.is_empty())
516        .map(ToString::to_string)
517        .ok_or_else(|| "Invalid reference format".to_string())
518}
519
520/// Checks if a string is a valid identifier key
521///
522/// A valid key must:
523/// - Not be empty
524/// - Start with a letter or underscore
525/// - Contain only alphanumeric characters or underscores
526fn is_valid_key(s: &str) -> bool {
527    if s.is_empty() {
528        return false;
529    }
530
531    // Check if the first character is a letter or underscore
532    let first_char = s.chars().next().unwrap();
533    if !first_char.is_alphabetic() && first_char != '_' {
534        return false;
535    }
536
537    // Check if all other characters are alphanumeric or underscore
538    s.chars().all(|c| c.is_alphanumeric() || c == '_')
539}
540
541#[cfg(test)]
542mod tests {
543    use serde_json::json;
544
545    use super::*;
546
547    /// Tests the parsing of a complete JSON Schema into a DataModel
548    ///
549    /// This test verifies that:
550    /// - The schema is correctly parsed into a DataModel
551    /// - Objects and enums are correctly extracted
552    /// - Attributes are correctly parsed with their properties
553    /// - Required fields are marked as such
554    /// - Array types are correctly handled
555    /// - References are correctly resolved
556    /// - OneOf types are correctly handled
557    #[test]
558    fn test_parse_schema() {
559        let schema = json!({
560          "$schema": "https://json-schema.org/draft/2020-12/schema",
561          "$id": "https://www.github.com/my/repo/",
562          "title": "Test",
563          "type": "object",
564          "properties": {
565            "array_valued": {
566              "title": "array_valued",
567              "type": "array",
568              "$term": "http://schema.org/something",
569              "items": {
570                "$ref": "#/$defs/Test2"
571              }
572            },
573            "multiple_types": {
574              "title": "multiple_types",
575              "oneOf": [
576                {
577                  "type": "number"
578                },
579                {
580                  "$ref": "#/$defs/Test2"
581                }
582              ]
583            },
584            "multiple_types_array": {
585              "title": "multiple_types_array",
586              "type": "array",
587              "items": {
588                "oneOf": [
589                  {
590                    "type": "number"
591                  },
592                  {
593                    "$ref": "#/$defs/Test2"
594                  }
595                ]
596              }
597            },
598            "name": {
599              "title": "name",
600              "type": "string",
601              "default": "test",
602              "description": "A test description",
603              "$term": "http://schema.org/hello"
604            },
605            "number": {
606              "title": "number",
607              "type": "number",
608              "$term": "http://schema.org/one",
609              "minimum": 0.0
610            },
611            "ontology": {
612              "title": "ontology",
613              "$ref": "#/$defs/Ontology"
614            },
615            "single_valued": {
616              "title": "single_valued",
617              "type": "object",
618              "$ref": "#/$defs/Test2"
619            }
620          },
621          "$defs": {
622            "Ontology": {
623              "title": "Ontology",
624              "type": "string",
625              "enum": [
626                "https://www.evidenceontology.org/term/",
627                "https://amigo.geneontology.org/amigo/term/",
628                "http://semanticscience.org/resource/"
629              ]
630            },
631            "Test2": {
632              "title": "Test2",
633              "type": "object",
634              "properties": {
635                "names": {
636                  "title": "names",
637                  "type": "array",
638                  "$term": "http://schema.org/hello",
639                  "items": {
640                    "type": "string"
641                  }
642                },
643                "number": {
644                  "title": "number",
645                  "type": "number",
646                  "$term": "http://schema.org/one",
647                  "minimum": 0.0
648                }
649              },
650              "required": [],
651              "additionalProperties": false
652            },
653            "no_title_and_no_required": {
654                "type": "object",
655                "properties": {
656                    "val": {
657                        "type": "string"
658                    }
659                }
660            }
661          },
662          "required": [
663            "name"
664          ],
665          "additionalProperties": false
666        });
667
668        let schema: SchemaObject = serde_json::from_value(schema).expect("Failed to parse schema");
669        let data_model =
670            DataModel::try_from(schema).expect("Failed to convert schema to data model");
671
672        assert_eq!(data_model.name, Some("Test".to_string()));
673        assert_eq!(data_model.objects.len(), 3);
674        assert_eq!(data_model.enums.len(), 1);
675
676        // Test root object (Test)
677        let root = data_model
678            .objects
679            .iter()
680            .find(|object| object.name == "Test")
681            .expect("Root object not found");
682
683        assert_eq!(root.attributes.len(), 7);
684        assert_eq!(root.attributes[0].name, "array_valued");
685        assert_eq!(root.attributes[1].name, "multiple_types");
686        assert_eq!(root.attributes[2].name, "multiple_types_array");
687        assert_eq!(root.attributes[3].name, "name");
688        assert_eq!(root.attributes[4].name, "number");
689        assert_eq!(root.attributes[5].name, "ontology");
690        assert_eq!(root.attributes[6].name, "single_valued");
691
692        // Test Test2 object
693        let test2 = data_model
694            .objects
695            .iter()
696            .find(|object| object.name == "Test2")
697            .expect("Test2 object not found");
698
699        assert_eq!(test2.attributes.len(), 2);
700        assert_eq!(test2.attributes[0].name, "names");
701        assert_eq!(test2.attributes[1].name, "number");
702
703        // Verify Test2 attributes in detail
704        let names_attr = &test2.attributes[0];
705        assert!(names_attr.is_array);
706        assert_eq!(names_attr.dtypes, vec!["string"]);
707        assert_eq!(names_attr.term, Some("http://schema.org/hello".to_string()));
708
709        let number_attr = &test2.attributes[1];
710        assert!(!number_attr.is_array);
711        assert_eq!(number_attr.dtypes, vec!["number"]);
712        assert_eq!(number_attr.term, Some("http://schema.org/one".to_string()));
713
714        // Verify no_title_and_no_required object
715        let no_title_and_no_required = data_model
716            .objects
717            .iter()
718            .find(|object| object.name == "no_title_and_no_required")
719            .expect("no_title_and_no_required object not found");
720
721        assert!(!no_title_and_no_required.name.is_empty());
722        assert_eq!(no_title_and_no_required.attributes.len(), 1);
723        assert_eq!(no_title_and_no_required.attributes[0].name, "val");
724
725        // Test Ontology enum
726        let ontology = data_model
727            .enums
728            .iter()
729            .find(|e| e.name == "Ontology")
730            .expect("Ontology enum not found");
731
732        assert_eq!(ontology.mappings.len(), 3);
733        assert_eq!(
734            ontology.mappings["VALUE_0"],
735            "https://www.evidenceontology.org/term/"
736        );
737        assert_eq!(
738            ontology.mappings["VALUE_1"],
739            "https://amigo.geneontology.org/amigo/term/"
740        );
741        assert_eq!(
742            ontology.mappings["VALUE_2"],
743            "http://semanticscience.org/resource/"
744        );
745
746        // Verify root object attribute details
747        let array_valued = &root.attributes[0];
748        assert!(array_valued.is_array);
749        assert_eq!(array_valued.dtypes, vec!["Test2"]);
750        assert_eq!(
751            array_valued.term,
752            Some("http://schema.org/something".to_string())
753        );
754
755        let multiple_types = &root.attributes[1];
756        assert!(!multiple_types.is_array);
757        let multiple_types_dtypes: HashSet<_> = multiple_types.dtypes.iter().collect();
758        assert_eq!(
759            multiple_types_dtypes,
760            HashSet::from([&"number".to_string(), &"Test2".to_string()])
761        );
762
763        let name_attr = &root.attributes[3];
764        assert!(name_attr.required);
765        assert_eq!(name_attr.term, Some("http://schema.org/hello".to_string()));
766    }
767
768    /// Tests the parsing of a simple property into an Attribute
769    ///
770    /// This test verifies that a property with a simple type (number)
771    /// is correctly converted to an Attribute with the right properties.
772    #[test]
773    fn test_parse_property() {
774        let property = json!({
775          "title": "number",
776          "type": "number",
777          "$term": "http://schema.org/one",
778          "minimum": 0.0,
779          "description": "test"
780        });
781
782        let property: Property = serde_json::from_value(property).unwrap();
783        let attribute = Attribute::try_from(property).unwrap();
784        assert_eq!(attribute.name, "number");
785        assert_eq!(attribute.dtypes, vec!["number"]);
786        assert_eq!(attribute.docstring, "test");
787        assert_eq!(attribute.term, Some("http://schema.org/one".to_string()));
788        assert!(!attribute.required);
789        assert_eq!(attribute.default, None);
790        assert!(!attribute.is_array);
791        assert_eq!(attribute.xml, None);
792        assert!(!attribute.is_enum);
793        assert_eq!(attribute.position, None);
794        assert_eq!(attribute.import_prefix, None);
795    }
796
797    /// Tests the parsing of a property with oneOf (multiple types)
798    ///
799    /// This test verifies that a property with multiple possible types
800    /// is correctly converted to an Attribute with all types included.
801    #[test]
802    fn test_parse_property_with_one_of() {
803        let property = json!({
804            "title": "number",
805            "oneOf": [
806                {
807                    "type": "number"
808                },
809                {
810                    "type": "string"
811                }
812            ]
813        });
814
815        let property: Property = serde_json::from_value(property).unwrap();
816        let attribute = Attribute::try_from(property).unwrap();
817
818        assert_eq!(attribute.name, "number");
819        assert_eq!(
820            attribute.dtypes.into_iter().collect::<HashSet<_>>(),
821            vec!["number".to_string(), "string".to_string()]
822                .into_iter()
823                .collect::<HashSet<_>>()
824        );
825        assert_eq!(attribute.docstring, "");
826        assert_eq!(attribute.term, None);
827        assert!(!attribute.required);
828        assert_eq!(attribute.default, None);
829        assert_eq!(attribute.xml, None);
830        assert!(!attribute.is_array);
831        assert!(!attribute.is_enum);
832        assert_eq!(attribute.position, None);
833        assert_eq!(attribute.import_prefix, None);
834    }
835
836    /// Tests the parsing of a property with oneOf (multiple types)
837    ///
838    /// This test verifies that a property with multiple possible types
839    /// is correctly converted to an Attribute with all types included.
840    #[test]
841    fn test_parse_property_with_one_of_mixed() {
842        let property = json!({
843            "title": "number",
844            "oneOf": [
845                {
846                    "$ref": "#/$defs/Test"
847                },
848                {
849                    "type": "string"
850                }
851            ]
852        });
853
854        let property: Property = serde_json::from_value(property).unwrap();
855        let attribute = Attribute::try_from(property).unwrap();
856
857        assert_eq!(attribute.name, "number");
858        assert_eq!(
859            attribute.dtypes.into_iter().collect::<HashSet<_>>(),
860            vec!["Test".to_string(), "string".to_string()]
861                .into_iter()
862                .collect::<HashSet<_>>()
863        );
864        assert_eq!(attribute.docstring, "");
865        assert_eq!(attribute.term, None);
866        assert!(!attribute.required);
867        assert_eq!(attribute.default, None);
868        assert_eq!(attribute.xml, None);
869        assert!(!attribute.is_array);
870        assert!(!attribute.is_enum);
871        assert_eq!(attribute.position, None);
872        assert_eq!(attribute.import_prefix, None);
873    }
874
875    /// Tests the parsing of a property with allOf (multiple types)
876    ///
877    /// This test verifies that a property with multiple possible types
878    /// is correctly converted to an Attribute with all types included.
879    #[test]
880    #[should_panic]
881    fn test_parse_property_with_all_of() {
882        let property = json!({
883            "title": "number",
884            "allOf": [
885                {
886                    "type": "number"
887                },
888                {
889                    "type": "string"
890                }
891            ]
892        });
893
894        let property: Property = serde_json::from_value(property).unwrap();
895        Attribute::try_from(property).unwrap();
896    }
897
898    /// Tests the parsing of a property with a reference
899    ///
900    /// This test verifies that a property with a reference to another type
901    /// is correctly converted to an Attribute with the referenced type.
902    #[test]
903    fn test_parse_property_with_reference() {
904        let property = json!({
905            "title": "number",
906            "$ref": "#/$defs/Test"
907        });
908
909        let property: Property = serde_json::from_value(property).unwrap();
910        let attribute = Attribute::try_from(property).unwrap();
911
912        assert_eq!(attribute.name, "number");
913        assert_eq!(attribute.dtypes, vec!["Test".to_string()]);
914        assert_eq!(attribute.docstring, "");
915        assert_eq!(attribute.term, None);
916        assert!(!attribute.required);
917        assert_eq!(attribute.default, None);
918        assert!(!attribute.is_array);
919        assert_eq!(attribute.xml, None);
920        assert!(!attribute.is_enum);
921        assert_eq!(attribute.position, None);
922        assert_eq!(attribute.import_prefix, None);
923    }
924
925    /// Tests the parsing of an array property without a reference
926    ///
927    /// This test verifies that an array property with a simple type
928    /// is correctly converted to an Attribute with is_array=true.
929    #[test]
930    fn test_parse_property_array_without_reference() {
931        let property = json!({
932            "title": "number",
933            "type": "array",
934            "items": {
935                "type": "string"
936            }
937        });
938
939        let property: Property = serde_json::from_value(property).unwrap();
940        let attribute = Attribute::try_from(property).unwrap();
941
942        assert_eq!(attribute.name, "number");
943        assert_eq!(attribute.dtypes, vec!["string".to_string()]);
944        assert_eq!(attribute.docstring, "");
945        assert_eq!(attribute.term, None);
946        assert!(!attribute.required);
947        assert_eq!(attribute.default, None);
948        assert!(attribute.is_array);
949        assert_eq!(attribute.xml, None);
950        assert!(!attribute.is_enum);
951        assert_eq!(attribute.position, None);
952        assert_eq!(attribute.import_prefix, None);
953    }
954
955    /// Tests extracting a reference from an array property
956    ///
957    /// This test verifies that the type information is correctly extracted
958    /// from an array property with items of a specific type.
959    #[test]
960    fn test_extract_reference_from_array() {
961        let property = json!({
962            "title": "number",
963            "type": "array",
964            "items": {
965                "type": "string"
966            }
967        });
968
969        let property: Property = serde_json::from_value(property).unwrap();
970        let attribute = Attribute::try_from(property).unwrap();
971
972        assert_eq!(attribute.name, "number");
973        assert_eq!(attribute.dtypes, vec!["string".to_string()]);
974        assert_eq!(attribute.docstring, "");
975        assert_eq!(attribute.term, None);
976        assert!(!attribute.required);
977        assert_eq!(attribute.default, None);
978        assert!(attribute.is_array);
979        assert_eq!(attribute.xml, None);
980        assert!(!attribute.is_enum);
981        assert_eq!(attribute.position, None);
982        assert_eq!(attribute.import_prefix, None);
983    }
984
985    /// Tests extracting references from a oneOf property
986    ///
987    /// This test verifies that all type information is correctly extracted
988    /// from a property with oneOf containing both a reference and a simple type.
989    #[test]
990    fn test_extract_reference_from_one_of() {
991        let property = json!({
992            "title": "number",
993            "oneOf": [
994                {
995                    "$ref": "#/$defs/Test"
996                },
997                {
998                    "type": "string"
999                }
1000            ]
1001        });
1002
1003        let property: Property = serde_json::from_value(property).unwrap();
1004        let attribute = Attribute::try_from(property).unwrap();
1005
1006        assert_eq!(attribute.name, "number");
1007        assert_eq!(
1008            attribute.dtypes.into_iter().collect::<HashSet<_>>(),
1009            vec!["Test".to_string(), "string".to_string()]
1010                .into_iter()
1011                .collect::<HashSet<_>>()
1012        );
1013    }
1014
1015    /// Tests parsing a schema object into an Object
1016    ///
1017    /// This test verifies that a schema object with properties is correctly
1018    /// converted to an Object with attributes, and required fields are marked.
1019    #[test]
1020    fn test_parse_object() {
1021        let object = json!({
1022            "title": "Test",
1023            "type": "object",
1024            "properties": {
1025                "number": {
1026                    "title": "number",
1027                    "type": "number"
1028                },
1029                "string": {
1030                    "name": "string",
1031                    "type": "string"
1032                }
1033            },
1034            "required": ["number"]
1035        });
1036
1037        let object: SchemaObject = serde_json::from_value(object).unwrap();
1038        let data_model = Object::try_from(object).unwrap();
1039
1040        assert_eq!(data_model.name, "Test");
1041        assert_eq!(data_model.attributes.len(), 2);
1042        assert_eq!(data_model.attributes[0].name, "number");
1043        assert_eq!(data_model.attributes[1].name, "string");
1044
1045        let attribute1 = data_model.attributes[0].clone();
1046
1047        assert_eq!(attribute1.name, "number");
1048        assert_eq!(attribute1.dtypes, vec!["number"]);
1049        assert_eq!(attribute1.docstring, "");
1050        assert_eq!(attribute1.term, None);
1051        assert!(attribute1.required);
1052        assert_eq!(attribute1.default, None);
1053        assert!(!attribute1.is_array);
1054
1055        let attribute2 = data_model.attributes[1].clone();
1056
1057        assert_eq!(attribute2.name, "string");
1058        assert_eq!(attribute2.dtypes, vec!["string"]);
1059        assert_eq!(attribute2.docstring, "");
1060        assert_eq!(attribute2.term, None);
1061        assert!(!attribute2.required);
1062        assert_eq!(attribute2.default, None);
1063        assert!(!attribute2.is_array);
1064    }
1065
1066    /// Tests parsing an enum object into an Enumeration
1067    ///
1068    /// This test verifies that an enum object with simple values
1069    /// is correctly converted to an Enumeration with mappings.
1070    #[test]
1071    fn test_parse_enum() {
1072        let enum_obj = json!({
1073            "title": "Test",
1074            "type": "string",
1075            "enum": ["value1", "value2", "value3"]
1076        });
1077
1078        let enum_obj: EnumObject = serde_json::from_value(enum_obj).unwrap();
1079        let enumeration = Enumeration::try_from(enum_obj).unwrap();
1080
1081        assert_eq!(enumeration.name, "Test");
1082        assert_eq!(enumeration.mappings.len(), 3);
1083        assert_eq!(enumeration.mappings["VALUE1"], "value1");
1084        assert_eq!(enumeration.mappings["VALUE2"], "value2");
1085        assert_eq!(enumeration.mappings["VALUE3"], "value3");
1086    }
1087
1088    /// Tests parsing an enum object with special characters
1089    ///
1090    /// This test verifies that an enum object with values containing special characters
1091    /// is correctly converted to an Enumeration with escaped mappings.
1092    #[test]
1093    fn test_parse_enum_with_special_characters() {
1094        let enum_obj = json!({
1095            "title": "Test",
1096            "type": "string",
1097            "enum": ["https://www.evidenceontology.org/term/", "https://amigo.geneontology.org/amigo/term/", "http://semanticscience.org/resource/"]
1098        });
1099
1100        let enum_obj: EnumObject = serde_json::from_value(enum_obj).unwrap();
1101        let enumeration = Enumeration::try_from(enum_obj).unwrap();
1102
1103        assert_eq!(enumeration.name, "Test");
1104        assert_eq!(enumeration.mappings.len(), 3);
1105        assert_eq!(
1106            enumeration.mappings["VALUE_0"],
1107            "https://www.evidenceontology.org/term/"
1108        );
1109        assert_eq!(
1110            enumeration.mappings["VALUE_1"],
1111            "https://amigo.geneontology.org/amigo/term/"
1112        );
1113        assert_eq!(
1114            enumeration.mappings["VALUE_2"],
1115            "http://semanticscience.org/resource/"
1116        );
1117    }
1118
1119    /// Tests the extract_reference function
1120    ///
1121    /// This test verifies that the extract_reference function correctly
1122    /// extracts type names from reference strings and handles edge cases.
1123    #[test]
1124    fn test_extract_reference() {
1125        assert_eq!(
1126            extract_reference("#/$defs/Test".to_string()),
1127            Ok("Test".to_string())
1128        );
1129        assert_eq!(
1130            extract_reference("Test".to_string()),
1131            Ok("Test".to_string())
1132        );
1133        assert_eq!(
1134            extract_reference("".to_string()),
1135            Err("Invalid reference format".to_string())
1136        );
1137    }
1138
1139    #[test]
1140    fn test_enzml_schema() {
1141        // Arrange
1142        let schema_path = "tests/data/old_schema.json";
1143        let schema = std::fs::read_to_string(schema_path).expect("Failed to read schema");
1144        let schema: SchemaObject = serde_json::from_str(&schema).expect("Failed to parse schema");
1145
1146        // Act
1147        let data_model =
1148            DataModel::try_from(schema).expect("Failed to convert schema to data model");
1149
1150        // Assert
1151        assert_eq!(data_model.objects.len(), 14);
1152        assert_eq!(data_model.enums.len(), 2);
1153    }
1154
1155    #[test]
1156    fn test_clean_key() {
1157        assert_eq!(clean_key("Test:Hello"), "TEST_HELLO");
1158        assert_eq!(clean_key("Test::Hello"), "TEST_HELLO");
1159        assert_eq!(clean_key("Test_Hello"), "TEST_HELLO");
1160        assert_eq!(clean_key("Test__Hello"), "TEST_HELLO");
1161        assert_eq!(clean_key("!Test"), "TEST");
1162    }
1163
1164    #[test]
1165    fn test_additional_properties_object() {
1166        let schema = json!({
1167            "title": "Test",
1168            "type": "object",
1169            "properties": {
1170                "test": {
1171                    "type": "string"
1172                }
1173            },
1174            "additionalProperties": {
1175                "type": "string"
1176            }
1177        });
1178
1179        let schema: SchemaObject = serde_json::from_value(schema).unwrap();
1180        let data_model = DataModel::try_from(schema.clone()).unwrap();
1181
1182        assert!(schema.additional_properties);
1183        assert_eq!(data_model.objects.len(), 1);
1184        assert_eq!(data_model.objects[0].attributes.len(), 1);
1185    }
1186
1187    #[test]
1188    fn test_parse_inline_nested_objects() {
1189        let schema = json!({
1190            "title": "Root",
1191            "type": "object",
1192            "properties": {
1193                "settings": {
1194                    "type": "object",
1195                    "properties": {
1196                        "enabled": { "type": "boolean" },
1197                        "limit": { "type": "number" }
1198                    },
1199                    "required": ["enabled"]
1200                },
1201                "items": {
1202                    "type": "array",
1203                    "items": {
1204                        "type": "object",
1205                        "properties": {
1206                            "name": { "type": "string" }
1207                        },
1208                        "required": ["name"]
1209                    }
1210                }
1211            },
1212            "required": ["settings"]
1213        });
1214
1215        let schema: SchemaObject = serde_json::from_value(schema).expect("Failed to parse schema");
1216        let data_model =
1217            DataModel::try_from(schema).expect("Failed to convert schema to data model");
1218
1219        assert!(data_model.objects.len() >= 3);
1220
1221        let root = data_model
1222            .objects
1223            .iter()
1224            .find(|object| object.name == "Root")
1225            .expect("root object");
1226        let settings_attr = root
1227            .attributes
1228            .iter()
1229            .find(|attr| attr.name == "settings")
1230            .expect("settings attribute");
1231        assert_eq!(settings_attr.dtypes, vec!["Settings"]);
1232        assert!(settings_attr.required);
1233
1234        let settings = data_model
1235            .objects
1236            .iter()
1237            .find(|object| object.name == "Settings")
1238            .expect("settings object");
1239        assert_eq!(settings.attributes.len(), 2);
1240        assert!(settings
1241            .attributes
1242            .iter()
1243            .any(|attr| attr.name == "enabled" && attr.required));
1244
1245        let items_attr = root
1246            .attributes
1247            .iter()
1248            .find(|attr| attr.name == "items")
1249            .expect("items attribute");
1250        assert!(items_attr.is_array);
1251        assert_eq!(items_attr.dtypes, vec!["Items"]);
1252
1253        let item_object = data_model
1254            .objects
1255            .iter()
1256            .find(|object| object.name == "Items")
1257            .expect("items object");
1258        assert!(item_object
1259            .attributes
1260            .iter()
1261            .any(|attr| attr.name == "name" && attr.required));
1262    }
1263
1264    #[test]
1265    fn test_parse_precice_topology_schema() {
1266        let schema_path = "tests/data/precice_topology_schema.json";
1267        let schema = std::fs::read_to_string(schema_path).expect("Failed to read schema");
1268        let schema: SchemaObject = serde_json::from_str(&schema).expect("Failed to parse schema");
1269
1270        assert_eq!(schema.optional, vec!["acceleration"]);
1271        assert!(schema.properties.contains_key("acceleration"));
1272
1273        let acceleration = schema.properties.get("acceleration").unwrap();
1274        assert_eq!(acceleration.properties.len(), 3);
1275        assert!(acceleration.properties.contains_key("filter"));
1276
1277        let data_model =
1278            DataModel::try_from(schema).expect("Failed to convert schema to data model");
1279
1280        let root = data_model
1281            .objects
1282            .iter()
1283            .find(|object| object.name == "preCICETopologyConfiguration")
1284            .expect("root object");
1285
1286        let acceleration_attr = root
1287            .attributes
1288            .iter()
1289            .find(|attr| attr.name == "acceleration")
1290            .expect("acceleration attribute");
1291        assert_eq!(acceleration_attr.dtypes, vec!["Acceleration"]);
1292        assert!(!acceleration_attr.required);
1293
1294        let coupling_attr = root
1295            .attributes
1296            .iter()
1297            .find(|attr| attr.name == "coupling-scheme")
1298            .expect("coupling-scheme attribute");
1299        assert_eq!(coupling_attr.dtypes, vec!["CouplingScheme"]);
1300        assert!(coupling_attr.required);
1301
1302        let exchanges_attr = root
1303            .attributes
1304            .iter()
1305            .find(|attr| attr.name == "exchanges")
1306            .expect("exchanges attribute");
1307        assert!(exchanges_attr.is_array);
1308        assert_eq!(exchanges_attr.dtypes, vec!["Exchanges"]);
1309
1310        let participants_attr = root
1311            .attributes
1312            .iter()
1313            .find(|attr| attr.name == "participants")
1314            .expect("participants attribute");
1315        assert!(participants_attr.is_array, "participants is a collection");
1316        assert_eq!(participants_attr.dtypes, vec!["Participants"]);
1317
1318        let filter_object = data_model
1319            .objects
1320            .iter()
1321            .find(|object| object.name == "AccelerationFilter")
1322            .expect("filter object");
1323        assert!(filter_object
1324            .attributes
1325            .iter()
1326            .any(|attr| attr.name == "limit" && attr.required));
1327
1328        // Inline enum values are converted into named enumerations rather than
1329        // being flattened to a plain string.
1330        let coupling_scheme = data_model
1331            .objects
1332            .iter()
1333            .find(|object| object.name == "CouplingScheme")
1334            .expect("coupling scheme object");
1335        let coupling = coupling_scheme
1336            .attributes
1337            .iter()
1338            .find(|attr| attr.name == "coupling")
1339            .expect("coupling attribute");
1340        assert!(coupling.is_enum, "coupling should be an enum");
1341        let coupling_enum = data_model
1342            .enums
1343            .iter()
1344            .find(|e| e.name == coupling.dtypes[0])
1345            .expect("coupling enumeration");
1346        let values: std::collections::HashSet<_> = coupling_enum.mappings.values().collect();
1347        assert_eq!(
1348            values,
1349            ["parallel".to_string(), "serial".to_string()]
1350                .iter()
1351                .collect()
1352        );
1353
1354        // Default values are captured from the schema.
1355        assert_eq!(
1356            coupling.default,
1357            Some(attribute::DataType::String("\"parallel\"".to_string()))
1358        );
1359        let max_iter = coupling_scheme
1360            .attributes
1361            .iter()
1362            .find(|attr| attr.name == "max-iterations")
1363            .expect("max-iterations attribute");
1364        assert_eq!(max_iter.default, Some(attribute::DataType::Integer(50)));
1365    }
1366}