Skip to main content

mdmodels_core/
datamodel.rs

1/*
2 * Copyright (c) 2025 Jan Range
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 * THE SOFTWARE.
21 *
22 */
23
24use std::collections::HashMap;
25use std::path::PathBuf;
26use std::{error::Error, fs, path::Path};
27
28use log::error;
29use serde::{Deserialize, Serialize};
30
31use crate::error::DataModelError;
32use crate::exporters::{render_jinja_template, Templates};
33use crate::json::export::to_json_schema;
34use crate::json::schema::SchemaObject;
35use crate::json::validation::{validate_json, ValidationError};
36use crate::jsonld::export::to_json_ld;
37use crate::jsonld::schema::JsonLdHeader;
38use crate::linkml::export::serialize_linkml;
39use crate::markdown::frontmatter::FrontMatter;
40use crate::markdown::parser::{parse_markdown, validate_model};
41use crate::object::{Enumeration, Object};
42use crate::validation::Validator;
43use colored::Colorize;
44
45#[cfg(feature = "python")]
46use pyo3::pyclass;
47
48#[cfg(feature = "wasm")]
49use tsify_next::Tsify;
50
51/// Types that are ignored when merging data models
52const MERGE_IGNORE_TYPES: &[&str] = &["UnitDefinition", "BaseUnit", "UnitType"];
53
54// Data model
55//
56// Contains a list of objects that represent the data model
57// written in the markdown format
58//
59// # Examples
60//
61// ```
62// let model = DataModel::new();
63// ```
64//
65// # Fields
66//
67// * `objects` - A list of objects
68//
69// # Methods
70//
71// * `new` - Create a new data model
72// * `parse` - Parse a markdown file and create a data model
73// * `json_schema` - Generate a JSON schema from the data model
74// * `json_schema_all` - Generate JSON schemas for all objects in the data model
75// * `internal_schema` - Generate an internal schema from the data model
76#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)]
77#[cfg_attr(feature = "python", pyclass(get_all, from_py_object))]
78#[cfg_attr(feature = "wasm", derive(Tsify))]
79#[cfg_attr(feature = "wasm", tsify(into_wasm_abi))]
80pub struct DataModel {
81    #[serde(skip_serializing_if = "Option::is_none")]
82    pub name: Option<String>,
83    pub objects: Vec<Object>,
84    pub enums: Vec<Enumeration>,
85    #[serde(skip_serializing_if = "Option::is_none")]
86    pub config: Option<FrontMatter>,
87}
88
89impl DataModel {
90    pub fn new(name: Option<String>, config: Option<FrontMatter>) -> Self {
91        DataModel {
92            name,
93            objects: Vec::new(),
94            enums: Vec::new(),
95            config,
96        }
97    }
98
99    /// Validates a dataset against the data model.
100    ///
101    /// This function takes the path to a dataset and validates it against the
102    /// current data model. It returns a vector of validation errors if any
103    /// validation issues are found, or an empty vector if the validation is successful.
104    ///
105    /// # Arguments
106    ///
107    /// * `path` - A reference to the path of the dataset to validate.
108    /// * `root` - An optional root path for the schema. Will use the first object if not provided.
109    ///
110    /// # Returns
111    /// A Result containing a vector of `ValidationError` if validation fails,
112    /// or an empty vector if successful.
113    pub fn validate_json(
114        &self,
115        path: &Path,
116        root: Option<String>,
117    ) -> Result<Vec<ValidationError>, Box<dyn Error>> {
118        validate_json(path.to_path_buf(), self, root)
119    }
120
121    // Get the JSON schema for an object
122    //
123    // * `obj_name` - Name of the object
124    // * `openai` - Whether to remove options from the schema properties. OpenAI does not support options.
125    //
126    // # Panics
127    // If no objects are found in the markdown file
128    // If the object is not found in the markdown file
129    //
130    // # Examples
131    //
132    // ```
133    // let model = DataModel::new();
134    // model.parse("path/to/file.md".to_string());
135    // let schema = model.json_schema("object_name".to_string());
136    // ```
137    //
138    // # Returns
139    //
140    // A JSON schema string
141    pub fn json_schema(
142        &self,
143        obj_name: Option<String>,
144        openai: bool,
145    ) -> Result<String, Box<dyn Error>> {
146        if self.objects.is_empty() {
147            panic!("No objects found in the markdown file");
148        }
149
150        match obj_name {
151            Some(name) => {
152                if self.objects.iter().all(|o| o.name != name) {
153                    panic!("Object '{name}' not found in the markdown file");
154                }
155                Ok(serde_json::to_string_pretty(&to_json_schema(
156                    self, &name, openai,
157                )?)?)
158            }
159            None => Ok(serde_json::to_string_pretty(&to_json_schema(
160                self,
161                &self.objects[0].name,
162                openai,
163            )?)?),
164        }
165    }
166
167    // Get the JSON schema for all objects in the markdown file
168    // and write them to a file
169    //
170    // * `path` - Path to the directory where the JSON schema files will be written
171    // * `openai` - Whether to remove options from the schema properties. OpenAI does not support options.
172    //
173    // # Panics
174    //
175    // If no objects are found in the markdown file
176    //
177    // # Examples
178    //
179    // ```
180    // let model = DataModel::new();
181    // model.parse("path/to/file.md".to_string());
182    // model.json_schema_all("path/to/directory".to_string());
183    // ```
184    pub fn json_schema_all(&self, path: PathBuf, openai: bool) -> Result<(), Box<dyn Error>> {
185        if self.objects.is_empty() {
186            panic!("No objects found in the markdown file");
187        }
188
189        // Create the directory if it does not exist
190        if !std::path::Path::new(&path).exists() {
191            fs::create_dir_all(&path).expect("Could not create directory");
192        }
193
194        let base_path = path.to_str().ok_or("Failed to convert path to string")?;
195        for object in &self.objects {
196            let schema = to_json_schema(self, &object.name, openai)?;
197            let file_name = format!("{}/{}.json", base_path, object.name);
198            fs::write(file_name, serde_json::to_string_pretty(&schema)?)
199                .expect("Could not write file");
200        }
201
202        Ok(())
203    }
204
205    /// Generates a JSON-LD header (`JsonLdHeader`) for the data model, suitable for use in JSON-LD serialization.
206    ///
207    /// This method constructs a `JsonLdHeader` using the model's configuration and optionally a specified root object.
208    /// The header contains the appropriate JSON-LD `@context`, `@id`, and `@type` for the model or selected object.
209    ///
210    /// # Arguments
211    ///
212    /// * `root` - Optional. The name of the root object to use. If `None`, the first object in the model is used.
213    ///
214    /// # Returns
215    ///
216    /// * `Ok(JsonLdHeader)` with the generated JSON-LD header if successful.
217    /// * `Err(Box<dyn Error>)` if the generation fails (for example, if the root is not found).
218    pub fn json_ld_header(&self, root: Option<&str>) -> Result<JsonLdHeader, Box<dyn Error>> {
219        to_json_ld(self, root)
220    }
221
222    // Get the internal schema for the markdown file
223    //
224    // # Panics
225    //
226    // If no objects are found in the markdown file
227    //
228    // # Examples
229    //
230    // ```
231    // let model = DataModel::new();
232    // model.parse("path/to/file.md".to_string());
233    // let schema = model.internal_schema();
234    // ```
235    //
236    // # Returns
237    //
238    // An internal schema string
239    pub fn internal_schema(&self) -> String {
240        if self.objects.is_empty() {
241            panic!("No objects found in the markdown file");
242        }
243
244        serde_json::to_string_pretty(&self).expect("Could not serialize to internal schema")
245    }
246
247    // Parse a markdown file and create a data model
248    //
249    // * `path` - Path to the markdown file
250    //
251    // # Examples
252    //
253    // ```
254    // let path = Path::new("path/to/file.md");
255    // let model = DataModel::from_internal_schema(path);
256    // ```
257    //
258    // # Returns
259    //
260    // A data model
261    //
262    pub fn from_internal_schema(path: &Path) -> Result<Self, Box<dyn Error>> {
263        if !path.exists() {
264            return Err("File does not exist".into());
265        }
266
267        let contents = fs::read_to_string(path)?;
268        let model: DataModel = serde_json::from_str(&contents)?;
269
270        Ok(model)
271    }
272
273    /// Sort the attributes of all objects by required
274    pub fn sort_attrs(&mut self) {
275        for obj in &mut self.objects {
276            obj.sort_attrs_by_required();
277        }
278    }
279
280    // Convert the data model to a template using Jinja
281    //
282    // * `template` - The Jinja template
283    //
284    // # Returns
285    //
286    // A string containing the Jinja template
287    //
288    // # Errors
289    //
290    // If the Jinja template is invalid
291    //
292    pub fn convert_to(
293        &mut self,
294        template: &Templates,
295        config: Option<&HashMap<String, String>>,
296    ) -> Result<String, minijinja::Error> {
297        self.sort_attrs();
298
299        match template {
300            Templates::JsonLd => {
301                Ok(serde_json::to_string_pretty(&self.json_ld_header(None).unwrap()).unwrap())
302            }
303            Templates::JsonSchema => Ok(self.json_schema(None, false).unwrap()),
304            Templates::Linkml => Ok(serialize_linkml(self.clone(), None).unwrap()),
305            _ => render_jinja_template(template, self, config),
306        }
307    }
308
309    // Merge two data models
310    //
311    // * `other` - The other data model to merge
312    pub fn merge(&mut self, other: &Self) {
313        // Initialize a variable to check if the merge is valid
314        let mut valid = true;
315        let ignore_types = self.get_ignore_types();
316
317        // Check if there are any duplicate objects or enums
318        // Types that are internally defined do not throw an error
319        for other_obj in &other.objects {
320            if ignore_types.contains(&other_obj.name) {
321                continue;
322            }
323            if let Some(duplicate_obj) = self.objects.iter().find(|o| o.name == other_obj.name) {
324                if !duplicate_obj.same_hash(other_obj) {
325                    error!(
326                        "[{}] {}: Object {} is defined more than once.",
327                        "Merge".bold(),
328                        "DuplicateError".bold(),
329                        other_obj.name.red().bold(),
330                    );
331                    valid = false;
332                }
333            }
334        }
335
336        for other_enm in &other.enums {
337            if ignore_types.contains(&other_enm.name) {
338                continue;
339            }
340            if let Some(duplicate_enm) = self.enums.iter().find(|e| e.name == other_enm.name) {
341                if !duplicate_enm.same_hash(other_enm) {
342                    error!(
343                        "[{}] {}: Enumeration {} is defined more than once.",
344                        "Merge".bold(),
345                        "DuplicateError".bold(),
346                        other_enm.name.red().bold(),
347                    );
348                    valid = false;
349                }
350            }
351        }
352
353        // If the merge is not valid, panic
354        if !valid {
355            panic!("Merge is not valid");
356        }
357
358        // Merge prefixes: only add new ones, preserve existing
359        self.merge_prefixes(other);
360
361        // Merge the objects and enums
362        self.objects.extend(
363            other
364                .objects
365                .iter()
366                .filter(|o| !ignore_types.contains(&o.name))
367                .filter(|o| !self.objects.iter().any(|existing| existing.name == o.name))
368                .cloned()
369                .collect::<Vec<Object>>(),
370        );
371        self.enums.extend(
372            other
373                .enums
374                .iter()
375                .filter(|e| !ignore_types.contains(&e.name))
376                .filter(|e| !self.enums.iter().any(|existing| existing.name == e.name))
377                .cloned()
378                .collect::<Vec<Enumeration>>(),
379        );
380    }
381
382    /// Merge prefixes from another data model into this one.
383    /// Only adds new prefixes, preserving existing ones.
384    fn merge_prefixes(&mut self, other: &Self) {
385        if let Some(other_prefixes) = other.config.as_ref().and_then(|c| c.prefixes.as_ref()) {
386            let self_config = self.config.get_or_insert_with(FrontMatter::new);
387            let self_prefixes = self_config.prefixes.get_or_insert_with(HashMap::new);
388
389            for (key, value) in other_prefixes {
390                self_prefixes
391                    .entry(key.clone())
392                    .or_insert_with(|| value.clone());
393            }
394        }
395    }
396
397    /// Get the types that should be ignored when merging.
398    fn get_ignore_types(&self) -> Vec<String> {
399        let mut ignore_types = Vec::new();
400        if self
401            .objects
402            .iter()
403            .any(|o| MERGE_IGNORE_TYPES.contains(&o.name.as_str()))
404        {
405            ignore_types.extend(
406                self.objects
407                    .iter()
408                    .filter(|o| MERGE_IGNORE_TYPES.contains(&o.name.as_str()))
409                    .map(|o| o.name.clone()),
410            );
411        }
412        if self
413            .enums
414            .iter()
415            .any(|e| MERGE_IGNORE_TYPES.contains(&e.name.as_str()))
416        {
417            ignore_types.extend(
418                self.enums
419                    .iter()
420                    .filter(|e| MERGE_IGNORE_TYPES.contains(&e.name.as_str()))
421                    .map(|e| e.name.clone()),
422            );
423        }
424        ignore_types
425    }
426
427    /// Parse a markdown file and create a data model
428    ///
429    /// * `path` - Path to the markdown file
430    ///
431    /// # Examples
432    ///
433    /// ```
434    /// use std::path::Path;
435    /// use mdmodels_core::datamodel::DataModel;
436    ///
437    /// let path = Path::new("tests/data/model.md");
438    /// let model = DataModel::from_markdown(path);
439    /// ```
440    /// # Returns
441    /// A data model
442    #[allow(clippy::result_large_err)]
443    pub fn from_markdown(path: &Path) -> Result<Self, Validator> {
444        let content = fs::read_to_string(path).expect("Could not read file");
445        parse_markdown(&content, Some(path))
446    }
447
448    /// Parse a markdown file and create a data model
449    ///
450    /// * `path` - Path to the markdown file
451    ///
452    /// # Examples
453    ///
454    /// ```
455    /// use std::path::Path;
456    /// use std::fs;
457    /// use mdmodels_core::datamodel::DataModel;
458    ///
459    /// let path = Path::new("tests/data/model.md");
460    /// let content = fs::read_to_string(path).unwrap();
461    /// let model = DataModel::from_markdown_string(content.as_str());
462    /// ```
463    /// # Returns
464    /// A data model
465    #[allow(clippy::result_large_err)]
466    pub fn from_markdown_string(content: &str) -> Result<Self, Validator> {
467        parse_markdown(content, None)
468    }
469
470    /// Parse a JSON schema file and create a data model
471    ///
472    /// * `path` - Path to the JSON schema file
473    ///
474    /// # Returns
475    /// A data model
476    #[allow(clippy::result_large_err)]
477    pub fn from_json_schema(path: &Path) -> Result<Self, DataModelError> {
478        let content = fs::read_to_string(path)?;
479        let schema: SchemaObject = serde_json::from_str(&content)?;
480        let model: DataModel = schema
481            .try_into()
482            .expect("Could not convert schema to data model");
483
484        // Validate the data model
485        validate_model(&model).map_err(DataModelError::ValidationError)?;
486
487        Ok(model)
488    }
489
490    /// Parse a JSON schema string and create a data model
491    ///
492    /// * `content` - The JSON schema string
493    ///
494    /// # Returns
495    /// A data model
496    #[allow(clippy::result_large_err)]
497    pub fn from_json_schema_string(content: &str) -> Result<Self, DataModelError> {
498        let schema: SchemaObject = serde_json::from_str(content)?;
499        let model: DataModel = schema
500            .try_into()
501            .expect("Could not convert schema to data model");
502
503        // Validate the data model
504        validate_model(&model).map_err(DataModelError::ValidationError)?;
505
506        Ok(model)
507    }
508
509    /// Parse a JSON schema object and create a data model
510    ///
511    /// * `schema` - The JSON schema object
512    ///
513    /// # Returns
514    /// A data model
515    #[allow(clippy::result_large_err)]
516    pub fn from_json_schema_object(schema: SchemaObject) -> Result<Self, DataModelError> {
517        let model: DataModel = schema
518            .try_into()
519            .expect("Could not convert schema to data model");
520
521        // Validate the data model
522        validate_model(&model).map_err(DataModelError::ValidationError)?;
523
524        Ok(model)
525    }
526}
527
528#[cfg(test)]
529mod tests {
530    use std::collections::BTreeMap;
531
532    use crate::attribute::DataType;
533
534    use super::*;
535    use pretty_assertions::assert_eq;
536
537    #[test]
538    fn test_merge() {
539        // Arrange
540        let mut model1 = DataModel::new(None, None);
541        let mut model2 = DataModel::new(None, None);
542
543        let mut obj1 = Object::new("Object1".to_string(), None);
544        obj1.add_attribute(crate::attribute::Attribute {
545            name: "test1".to_string(),
546            is_array: false,
547            is_id: false,
548            dtypes: vec!["string".to_string()],
549            docstring: "".to_string(),
550            options: vec![],
551            term: None,
552            required: false,
553            xml: None,
554            default: None,
555            is_enum: false,
556            position: None,
557            import_prefix: None,
558        });
559
560        let mut obj2 = Object::new("Object2".to_string(), None);
561        obj2.add_attribute(crate::attribute::Attribute {
562            name: "test2".to_string(),
563            is_array: false,
564            is_id: false,
565            dtypes: vec!["string".to_string()],
566            docstring: "".to_string(),
567            options: vec![],
568            term: None,
569            required: false,
570            xml: None,
571            default: None,
572            is_enum: false,
573            position: None,
574            import_prefix: None,
575        });
576
577        let enm1 = Enumeration {
578            name: "Enum1".to_string(),
579            mappings: BTreeMap::from([("key1".to_string(), "value1".to_string())]),
580            docstring: "".to_string(),
581            position: None,
582        };
583
584        let enm2 = Enumeration {
585            name: "Enum2".to_string(),
586            mappings: BTreeMap::from([("key2".to_string(), "value2".to_string())]),
587            docstring: "".to_string(),
588            position: None,
589        };
590
591        model1.objects.push(obj1);
592        model1.enums.push(enm1);
593        model2.objects.push(obj2);
594        model2.enums.push(enm2);
595
596        // Act
597        model1.merge(&model2);
598
599        // Assert
600        assert_eq!(model1.objects.len(), 2);
601        assert_eq!(model1.enums.len(), 2);
602        assert_eq!(model1.objects[0].name, "Object1");
603        assert_eq!(model1.objects[1].name, "Object2");
604        assert_eq!(model1.enums[0].name, "Enum1");
605        assert_eq!(model1.enums[1].name, "Enum2");
606    }
607
608    #[test]
609    fn test_sort_attrs() {
610        // Arrange
611        let mut model = DataModel::new(None, None);
612        let mut obj = Object::new("Object1".to_string(), None);
613        obj.add_attribute(crate::attribute::Attribute {
614            name: "not_required".to_string(),
615            is_array: false,
616            is_id: false,
617            dtypes: vec!["string".to_string()],
618            docstring: "".to_string(),
619            options: vec![],
620            term: None,
621            required: false,
622            xml: None,
623            default: Some(DataType::String("".to_string())),
624            is_enum: false,
625            position: None,
626            import_prefix: None,
627        });
628
629        obj.add_attribute(crate::attribute::Attribute {
630            name: "required".to_string(),
631            is_array: false,
632            is_id: false,
633            dtypes: vec!["string".to_string()],
634            docstring: "".to_string(),
635            options: vec![],
636            term: None,
637            required: true,
638            xml: None,
639            default: None,
640            is_enum: false,
641            position: None,
642            import_prefix: None,
643        });
644
645        model.objects.push(obj);
646
647        // Act
648        model.sort_attrs();
649
650        // Assert
651        assert_eq!(model.objects[0].attributes[0].name, "required");
652        assert_eq!(model.objects[0].attributes[1].name, "not_required");
653    }
654
655    #[test]
656    fn test_from_internal_schema() {
657        // Arrange
658        let path = Path::new("tests/data/expected_internal_schema.json");
659
660        // Act
661        let model = DataModel::from_internal_schema(path).expect("Failed to parse internal schema");
662
663        // Assert
664        assert_eq!(model.objects.len(), 2);
665        assert_eq!(model.enums.len(), 1);
666    }
667
668    #[test]
669    fn test_from_markdown_w_html() {
670        // Arrange
671        let path = Path::new("tests/data/model_w_html.md");
672
673        // Act
674        let model = DataModel::from_markdown(path).expect("Failed to parse markdown");
675
676        // Assert
677        assert_eq!(model.objects.len(), 2);
678        assert_eq!(model.enums.len(), 1);
679    }
680
681    #[test]
682    fn test_from_markdown_string() {
683        // Arrange
684        let path = Path::new("tests/data/model.md");
685        let content = fs::read_to_string(path).unwrap();
686
687        // Act
688        let model =
689            DataModel::from_markdown_string(content.as_str()).expect("Failed to parse markdown");
690
691        // Assert
692        assert_eq!(model.objects.len(), 2);
693        assert_eq!(model.enums.len(), 1);
694    }
695}