Skip to main content

mdmodels_core/
datamodel.rs

1/*
2 * Copyright (c) 2025 Jan Range
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 * THE SOFTWARE.
21 *
22 */
23
24use std::collections::HashMap;
25use std::path::PathBuf;
26use std::{error::Error, fs, path::Path};
27
28use log::error;
29use serde::{Deserialize, Serialize};
30
31use crate::error::DataModelError;
32use crate::exporters::{render_jinja_template, Templates};
33#[cfg(not(target_arch = "wasm32"))]
34use crate::git::cache_github_repo;
35use crate::json::export::to_json_schema;
36use crate::json::schema::SchemaObject;
37use crate::json::validation::{validate_json, ValidationError};
38use crate::jsonld::export::to_json_ld;
39use crate::jsonld::schema::JsonLdHeader;
40use crate::linkml::export::serialize_linkml;
41use crate::markdown::frontmatter::FrontMatter;
42use crate::markdown::parser::{parse_markdown, validate_model};
43use crate::object::{Enumeration, Object};
44use crate::validation::Validator;
45use colored::Colorize;
46
47#[cfg(feature = "python")]
48use pyo3::pyclass;
49
50#[cfg(feature = "wasm")]
51use tsify_next::Tsify;
52
53/// Types that are ignored when merging data models
54const MERGE_IGNORE_TYPES: &[&str] = &["UnitDefinition", "BaseUnit", "UnitType"];
55
56// Data model
57//
58// Contains a list of objects that represent the data model
59// written in the markdown format
60//
61// # Examples
62//
63// ```
64// let model = DataModel::new();
65// ```
66//
67// # Fields
68//
69// * `objects` - A list of objects
70//
71// # Methods
72//
73// * `new` - Create a new data model
74// * `parse` - Parse a markdown file and create a data model
75// * `json_schema` - Generate a JSON schema from the data model
76// * `json_schema_all` - Generate JSON schemas for all objects in the data model
77// * `internal_schema` - Generate an internal schema from the data model
78#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)]
79#[cfg_attr(feature = "python", pyclass(get_all, from_py_object))]
80#[cfg_attr(feature = "wasm", derive(Tsify))]
81#[cfg_attr(feature = "wasm", tsify(into_wasm_abi))]
82pub struct DataModel {
83    #[serde(skip_serializing_if = "Option::is_none")]
84    pub name: Option<String>,
85    pub objects: Vec<Object>,
86    pub enums: Vec<Enumeration>,
87    #[serde(skip_serializing_if = "Option::is_none")]
88    pub config: Option<FrontMatter>,
89}
90
91impl DataModel {
92    pub fn new(name: Option<String>, config: Option<FrontMatter>) -> Self {
93        DataModel {
94            name,
95            objects: Vec::new(),
96            enums: Vec::new(),
97            config,
98        }
99    }
100
101    /// Validates a dataset against the data model.
102    ///
103    /// This function takes the path to a dataset and validates it against the
104    /// current data model. It returns a vector of validation errors if any
105    /// validation issues are found, or an empty vector if the validation is successful.
106    ///
107    /// # Arguments
108    ///
109    /// * `path` - A reference to the path of the dataset to validate.
110    /// * `root` - An optional root path for the schema. Will use the first object if not provided.
111    ///
112    /// # Returns
113    /// A Result containing a vector of `ValidationError` if validation fails,
114    /// or an empty vector if successful.
115    pub fn validate_json(
116        &self,
117        path: &Path,
118        root: Option<String>,
119    ) -> Result<Vec<ValidationError>, Box<dyn Error>> {
120        validate_json(path.to_path_buf(), self, root)
121    }
122
123    // Get the JSON schema for an object
124    //
125    // * `obj_name` - Name of the object
126    // * `openai` - Whether to remove options from the schema properties. OpenAI does not support options.
127    //
128    // # Panics
129    // If no objects are found in the markdown file
130    // If the object is not found in the markdown file
131    //
132    // # Examples
133    //
134    // ```
135    // let model = DataModel::new();
136    // model.parse("path/to/file.md".to_string());
137    // let schema = model.json_schema("object_name".to_string());
138    // ```
139    //
140    // # Returns
141    //
142    // A JSON schema string
143    pub fn json_schema(
144        &self,
145        obj_name: Option<String>,
146        openai: bool,
147    ) -> Result<String, Box<dyn Error>> {
148        if self.objects.is_empty() {
149            panic!("No objects found in the markdown file");
150        }
151
152        match obj_name {
153            Some(name) => {
154                if self.objects.iter().all(|o| o.name != name) {
155                    panic!("Object '{name}' not found in the markdown file");
156                }
157                Ok(serde_json::to_string_pretty(&to_json_schema(
158                    self, &name, openai,
159                )?)?)
160            }
161            None => Ok(serde_json::to_string_pretty(&to_json_schema(
162                self,
163                &self.objects[0].name,
164                openai,
165            )?)?),
166        }
167    }
168
169    // Get the JSON schema for all objects in the markdown file
170    // and write them to a file
171    //
172    // * `path` - Path to the directory where the JSON schema files will be written
173    // * `openai` - Whether to remove options from the schema properties. OpenAI does not support options.
174    //
175    // # Panics
176    //
177    // If no objects are found in the markdown file
178    //
179    // # Examples
180    //
181    // ```
182    // let model = DataModel::new();
183    // model.parse("path/to/file.md".to_string());
184    // model.json_schema_all("path/to/directory".to_string());
185    // ```
186    pub fn json_schema_all(&self, path: PathBuf, openai: bool) -> Result<(), Box<dyn Error>> {
187        if self.objects.is_empty() {
188            panic!("No objects found in the markdown file");
189        }
190
191        // Create the directory if it does not exist
192        if !std::path::Path::new(&path).exists() {
193            fs::create_dir_all(&path).expect("Could not create directory");
194        }
195
196        let base_path = path.to_str().ok_or("Failed to convert path to string")?;
197        for object in &self.objects {
198            let schema = to_json_schema(self, &object.name, openai)?;
199            let file_name = format!("{}/{}.json", base_path, object.name);
200            fs::write(file_name, serde_json::to_string_pretty(&schema)?)
201                .expect("Could not write file");
202        }
203
204        Ok(())
205    }
206
207    /// Generates a JSON-LD header (`JsonLdHeader`) for the data model, suitable for use in JSON-LD serialization.
208    ///
209    /// This method constructs a `JsonLdHeader` using the model's configuration and optionally a specified root object.
210    /// The header contains the appropriate JSON-LD `@context`, `@id`, and `@type` for the model or selected object.
211    ///
212    /// # Arguments
213    ///
214    /// * `root` - Optional. The name of the root object to use. If `None`, the first object in the model is used.
215    ///
216    /// # Returns
217    ///
218    /// * `Ok(JsonLdHeader)` with the generated JSON-LD header if successful.
219    /// * `Err(Box<dyn Error>)` if the generation fails (for example, if the root is not found).
220    pub fn json_ld_header(&self, root: Option<&str>) -> Result<JsonLdHeader, Box<dyn Error>> {
221        to_json_ld(self, root)
222    }
223
224    // Get the internal schema for the markdown file
225    //
226    // # Panics
227    //
228    // If no objects are found in the markdown file
229    //
230    // # Examples
231    //
232    // ```
233    // let model = DataModel::new();
234    // model.parse("path/to/file.md".to_string());
235    // let schema = model.internal_schema();
236    // ```
237    //
238    // # Returns
239    //
240    // An internal schema string
241    pub fn internal_schema(&self) -> String {
242        if self.objects.is_empty() {
243            panic!("No objects found in the markdown file");
244        }
245
246        serde_json::to_string_pretty(&self).expect("Could not serialize to internal schema")
247    }
248
249    // Parse a markdown file and create a data model
250    //
251    // * `path` - Path to the markdown file
252    //
253    // # Examples
254    //
255    // ```
256    // let path = Path::new("path/to/file.md");
257    // let model = DataModel::from_internal_schema(path);
258    // ```
259    //
260    // # Returns
261    //
262    // A data model
263    //
264    pub fn from_internal_schema(path: &Path) -> Result<Self, Box<dyn Error>> {
265        if !path.exists() {
266            return Err("File does not exist".into());
267        }
268
269        let contents = fs::read_to_string(path)?;
270        let model: DataModel = serde_json::from_str(&contents)?;
271
272        Ok(model)
273    }
274
275    /// Sort the attributes of all objects by required
276    pub fn sort_attrs(&mut self) {
277        for obj in &mut self.objects {
278            obj.sort_attrs_by_required();
279        }
280    }
281
282    // Convert the data model to a template using Jinja
283    //
284    // * `template` - The Jinja template
285    //
286    // # Returns
287    //
288    // A string containing the Jinja template
289    //
290    // # Errors
291    //
292    // If the Jinja template is invalid
293    //
294    pub fn convert_to(
295        &mut self,
296        template: &Templates,
297        config: Option<&HashMap<String, String>>,
298    ) -> Result<String, minijinja::Error> {
299        self.sort_attrs();
300
301        match template {
302            Templates::JsonLd => {
303                Ok(serde_json::to_string_pretty(&self.json_ld_header(None).unwrap()).unwrap())
304            }
305            Templates::JsonSchema => Ok(self.json_schema(None, false).unwrap()),
306            Templates::Linkml => Ok(serialize_linkml(self.clone(), None).unwrap()),
307            _ => render_jinja_template(template, self, config),
308        }
309    }
310
311    // Merge two data models
312    //
313    // * `other` - The other data model to merge
314    pub fn merge(&mut self, other: &Self) {
315        // Initialize a variable to check if the merge is valid
316        let mut valid = true;
317        let ignore_types = self.get_ignore_types();
318
319        // Check if there are any duplicate objects or enums
320        // Types that are internally defined do not throw an error
321        for other_obj in &other.objects {
322            if ignore_types.contains(&other_obj.name) {
323                continue;
324            }
325            if let Some(duplicate_obj) = self.objects.iter().find(|o| o.name == other_obj.name) {
326                if !duplicate_obj.same_hash(other_obj) {
327                    error!(
328                        "[{}] {}: Object {} is defined more than once.",
329                        "Merge".bold(),
330                        "DuplicateError".bold(),
331                        other_obj.name.red().bold(),
332                    );
333                    valid = false;
334                }
335            }
336        }
337
338        for other_enm in &other.enums {
339            if ignore_types.contains(&other_enm.name) {
340                continue;
341            }
342            if let Some(duplicate_enm) = self.enums.iter().find(|e| e.name == other_enm.name) {
343                if !duplicate_enm.same_hash(other_enm) {
344                    error!(
345                        "[{}] {}: Enumeration {} is defined more than once.",
346                        "Merge".bold(),
347                        "DuplicateError".bold(),
348                        other_enm.name.red().bold(),
349                    );
350                    valid = false;
351                }
352            }
353        }
354
355        // If the merge is not valid, panic
356        if !valid {
357            panic!("Merge is not valid");
358        }
359
360        // Merge prefixes: only add new ones, preserve existing
361        self.merge_prefixes(other);
362
363        // Merge the objects and enums
364        self.objects.extend(
365            other
366                .objects
367                .iter()
368                .filter(|o| !ignore_types.contains(&o.name))
369                .filter(|o| !self.objects.iter().any(|existing| existing.name == o.name))
370                .cloned()
371                .collect::<Vec<Object>>(),
372        );
373        self.enums.extend(
374            other
375                .enums
376                .iter()
377                .filter(|e| !ignore_types.contains(&e.name))
378                .filter(|e| !self.enums.iter().any(|existing| existing.name == e.name))
379                .cloned()
380                .collect::<Vec<Enumeration>>(),
381        );
382    }
383
384    /// Merge prefixes from another data model into this one.
385    /// Only adds new prefixes, preserving existing ones.
386    fn merge_prefixes(&mut self, other: &Self) {
387        if let Some(other_prefixes) = other.config.as_ref().and_then(|c| c.prefixes.as_ref()) {
388            let self_config = self.config.get_or_insert_with(FrontMatter::new);
389            let self_prefixes = self_config.prefixes.get_or_insert_with(HashMap::new);
390
391            for (key, value) in other_prefixes {
392                self_prefixes
393                    .entry(key.clone())
394                    .or_insert_with(|| value.clone());
395            }
396        }
397    }
398
399    /// Get the types that should be ignored when merging.
400    fn get_ignore_types(&self) -> Vec<String> {
401        let mut ignore_types = Vec::new();
402        if self
403            .objects
404            .iter()
405            .any(|o| MERGE_IGNORE_TYPES.contains(&o.name.as_str()))
406        {
407            ignore_types.extend(
408                self.objects
409                    .iter()
410                    .filter(|o| MERGE_IGNORE_TYPES.contains(&o.name.as_str()))
411                    .map(|o| o.name.clone()),
412            );
413        }
414        if self
415            .enums
416            .iter()
417            .any(|e| MERGE_IGNORE_TYPES.contains(&e.name.as_str()))
418        {
419            ignore_types.extend(
420                self.enums
421                    .iter()
422                    .filter(|e| MERGE_IGNORE_TYPES.contains(&e.name.as_str()))
423                    .map(|e| e.name.clone()),
424            );
425        }
426        ignore_types
427    }
428
429    /// Parse a markdown file and create a data model
430    ///
431    /// * `path` - Path to the markdown file
432    ///
433    /// # Examples
434    ///
435    /// ```
436    /// use std::path::Path;
437    /// use mdmodels_core::datamodel::DataModel;
438    ///
439    /// let path = Path::new("tests/data/model.md");
440    /// let model = DataModel::from_markdown(path);
441    /// ```
442    /// # Returns
443    /// A data model
444    #[allow(clippy::result_large_err)]
445    pub fn from_markdown(path: &Path) -> Result<Self, Validator> {
446        let content = fs::read_to_string(path).expect("Could not read file");
447        parse_markdown(&content, Some(path))
448    }
449
450    #[cfg(not(target_arch = "wasm32"))]
451    pub fn from_github(repo: &str, path: &str) -> Result<Self, Box<dyn Error>> {
452        let cached = cache_github_repo(repo)?;
453        let path = path.trim_start_matches('/');
454        let model_path = cached.root.join(path);
455
456        if !model_path.exists() {
457            return Err(format!(
458                "Model path '{}' does not exist in cached repo {} at {}",
459                path, repo, cached.commit
460            )
461            .into());
462        }
463
464        let model = DataModel::from_markdown(&model_path)?;
465        Ok(model)
466    }
467
468    /// Parse a markdown file and create a data model
469    ///
470    /// * `path` - Path to the markdown file
471    ///
472    /// # Examples
473    ///
474    /// ```
475    /// use std::path::Path;
476    /// use std::fs;
477    /// use mdmodels_core::datamodel::DataModel;
478    ///
479    /// let path = Path::new("tests/data/model.md");
480    /// let content = fs::read_to_string(path).unwrap();
481    /// let model = DataModel::from_markdown_string(content.as_str());
482    /// ```
483    /// # Returns
484    /// A data model
485    #[allow(clippy::result_large_err)]
486    pub fn from_markdown_string(content: &str) -> Result<Self, Validator> {
487        parse_markdown(content, None)
488    }
489
490    /// Parse a JSON schema file and create a data model
491    ///
492    /// * `path` - Path to the JSON schema file
493    ///
494    /// # Returns
495    /// A data model
496    #[allow(clippy::result_large_err)]
497    pub fn from_json_schema(path: &Path) -> Result<Self, DataModelError> {
498        let content = fs::read_to_string(path)?;
499        let schema: SchemaObject = serde_json::from_str(&content)?;
500        let model: DataModel = schema
501            .try_into()
502            .expect("Could not convert schema to data model");
503
504        // Validate the data model
505        validate_model(&model).map_err(DataModelError::ValidationError)?;
506
507        Ok(model)
508    }
509
510    /// Parse a JSON schema string and create a data model
511    ///
512    /// * `content` - The JSON schema string
513    ///
514    /// # Returns
515    /// A data model
516    #[allow(clippy::result_large_err)]
517    pub fn from_json_schema_string(content: &str) -> Result<Self, DataModelError> {
518        let schema: SchemaObject = serde_json::from_str(content)?;
519        let model: DataModel = schema
520            .try_into()
521            .expect("Could not convert schema to data model");
522
523        // Validate the data model
524        validate_model(&model).map_err(DataModelError::ValidationError)?;
525
526        Ok(model)
527    }
528
529    /// Parse a JSON schema object and create a data model
530    ///
531    /// * `schema` - The JSON schema object
532    ///
533    /// # Returns
534    /// A data model
535    #[allow(clippy::result_large_err)]
536    pub fn from_json_schema_object(schema: SchemaObject) -> Result<Self, DataModelError> {
537        let model: DataModel = schema
538            .try_into()
539            .expect("Could not convert schema to data model");
540
541        // Validate the data model
542        validate_model(&model).map_err(DataModelError::ValidationError)?;
543
544        Ok(model)
545    }
546}
547
548#[cfg(test)]
549mod tests {
550    use std::collections::BTreeMap;
551
552    use crate::attribute::DataType;
553
554    use super::*;
555    use pretty_assertions::assert_eq;
556
557    #[test]
558    fn test_merge() {
559        // Arrange
560        let mut model1 = DataModel::new(None, None);
561        let mut model2 = DataModel::new(None, None);
562
563        let mut obj1 = Object::new("Object1".to_string(), None);
564        obj1.add_attribute(crate::attribute::Attribute {
565            name: "test1".to_string(),
566            is_array: false,
567            is_id: false,
568            dtypes: vec!["string".to_string()],
569            docstring: "".to_string(),
570            options: vec![],
571            term: None,
572            required: false,
573            xml: None,
574            default: None,
575            is_enum: false,
576            position: None,
577            import_prefix: None,
578        });
579
580        let mut obj2 = Object::new("Object2".to_string(), None);
581        obj2.add_attribute(crate::attribute::Attribute {
582            name: "test2".to_string(),
583            is_array: false,
584            is_id: false,
585            dtypes: vec!["string".to_string()],
586            docstring: "".to_string(),
587            options: vec![],
588            term: None,
589            required: false,
590            xml: None,
591            default: None,
592            is_enum: false,
593            position: None,
594            import_prefix: None,
595        });
596
597        let enm1 = Enumeration {
598            name: "Enum1".to_string(),
599            mappings: BTreeMap::from([("key1".to_string(), "value1".to_string())]),
600            docstring: "".to_string(),
601            position: None,
602        };
603
604        let enm2 = Enumeration {
605            name: "Enum2".to_string(),
606            mappings: BTreeMap::from([("key2".to_string(), "value2".to_string())]),
607            docstring: "".to_string(),
608            position: None,
609        };
610
611        model1.objects.push(obj1);
612        model1.enums.push(enm1);
613        model2.objects.push(obj2);
614        model2.enums.push(enm2);
615
616        // Act
617        model1.merge(&model2);
618
619        // Assert
620        assert_eq!(model1.objects.len(), 2);
621        assert_eq!(model1.enums.len(), 2);
622        assert_eq!(model1.objects[0].name, "Object1");
623        assert_eq!(model1.objects[1].name, "Object2");
624        assert_eq!(model1.enums[0].name, "Enum1");
625        assert_eq!(model1.enums[1].name, "Enum2");
626    }
627
628    #[test]
629    fn test_sort_attrs() {
630        // Arrange
631        let mut model = DataModel::new(None, None);
632        let mut obj = Object::new("Object1".to_string(), None);
633        obj.add_attribute(crate::attribute::Attribute {
634            name: "not_required".to_string(),
635            is_array: false,
636            is_id: false,
637            dtypes: vec!["string".to_string()],
638            docstring: "".to_string(),
639            options: vec![],
640            term: None,
641            required: false,
642            xml: None,
643            default: Some(DataType::String("".to_string())),
644            is_enum: false,
645            position: None,
646            import_prefix: None,
647        });
648
649        obj.add_attribute(crate::attribute::Attribute {
650            name: "required".to_string(),
651            is_array: false,
652            is_id: false,
653            dtypes: vec!["string".to_string()],
654            docstring: "".to_string(),
655            options: vec![],
656            term: None,
657            required: true,
658            xml: None,
659            default: None,
660            is_enum: false,
661            position: None,
662            import_prefix: None,
663        });
664
665        model.objects.push(obj);
666
667        // Act
668        model.sort_attrs();
669
670        // Assert
671        assert_eq!(model.objects[0].attributes[0].name, "required");
672        assert_eq!(model.objects[0].attributes[1].name, "not_required");
673    }
674
675    #[test]
676    fn test_from_internal_schema() {
677        // Arrange
678        let path = Path::new("tests/data/expected_internal_schema.json");
679
680        // Act
681        let model = DataModel::from_internal_schema(path).expect("Failed to parse internal schema");
682
683        // Assert
684        assert_eq!(model.objects.len(), 2);
685        assert_eq!(model.enums.len(), 1);
686    }
687
688    #[test]
689    fn test_from_markdown_w_html() {
690        // Arrange
691        let path = Path::new("tests/data/model_w_html.md");
692
693        // Act
694        let model = DataModel::from_markdown(path).expect("Failed to parse markdown");
695
696        // Assert
697        assert_eq!(model.objects.len(), 2);
698        assert_eq!(model.enums.len(), 1);
699    }
700
701    #[test]
702    fn test_from_markdown_string() {
703        // Arrange
704        let path = Path::new("tests/data/model.md");
705        let content = fs::read_to_string(path).unwrap();
706
707        // Act
708        let model =
709            DataModel::from_markdown_string(content.as_str()).expect("Failed to parse markdown");
710
711        // Assert
712        assert_eq!(model.objects.len(), 2);
713        assert_eq!(model.enums.len(), 1);
714    }
715}