Skip to main content

mdmodels_core/linkml/
export.rs

1/*
2 * Copyright (c) 2025 Jan Range
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 * THE SOFTWARE.
21 *
22 */
23
24//! Provides functionality to export data models to LinkML format.
25//!
26//! This module contains implementations for converting internal data model representations
27//! to LinkML schema format. It handles the conversion of objects, attributes, and enumerations
28//! to their corresponding LinkML representations.
29//!
30//! The module provides several key components:
31//! - Serialization of DataModel instances to LinkML YAML format
32//! - Conversion implementations between internal model types and LinkML schema types
33//! - Utilities for handling global slots and attribute sharing between classes
34//! - Pattern constraint management through slot usage
35//!
36//! The conversion process preserves:
37//! - Documentation and descriptions
38//! - Data types and ranges
39//! - Cardinality constraints
40//! - Identifier flags
41//! - Required/optional status
42//! - URI/term mappings
43//! - Enumeration values and meanings
44//! - Minimum/maximum value constraints
45//! - Pattern validation rules
46
47use std::{error::Error, path::PathBuf};
48
49use indexmap::IndexMap;
50
51use crate::{
52    attribute::Attribute,
53    object::{Enumeration, Object},
54    prelude::DataModel,
55    tree::{self},
56};
57
58use super::schema::{
59    AttributeDefinition, ClassDefinition, EnumDefinition, Example, LinkML, PermissibleValue,
60    SlotUsage,
61};
62
63/// Serializes a DataModel to LinkML YAML format and writes it to a file.
64///
65/// This function takes a DataModel instance and converts it to LinkML schema format,
66/// then serializes it to YAML. If an output path is provided, the YAML will be written
67/// to that file. The function returns the serialized YAML string regardless of whether
68/// it was written to a file.
69///
70/// # Arguments
71///
72/// * `model` - The DataModel to serialize
73/// * `out` - Optional output path to write the YAML to
74///
75/// # Returns
76///
77/// * `Ok(String)` - The serialized YAML string
78/// * `Err(Box<dyn Error>)` - If serialization or file writing fails
79pub fn serialize_linkml(model: DataModel, out: Option<&PathBuf>) -> Result<String, Box<dyn Error>> {
80    let linkml = LinkML::from(model);
81    let yaml = serde_yaml::to_string(&linkml)?;
82    if let Some(out) = out {
83        std::fs::write(out, &yaml)?;
84    }
85
86    Ok(yaml)
87}
88
89/// Implements conversion from DataModel to LinkML schema format.
90impl From<DataModel> for LinkML {
91    /// Converts a DataModel instance into a LinkML schema.
92    ///
93    /// This conversion process handles:
94    /// - Basic schema configuration including ID, prefixes, and name
95    /// - Class definitions and their attributes
96    /// - Global slots (shared attributes across classes)
97    /// - Enumeration definitions
98    /// - Import declarations
99    /// - Default type configurations
100    ///
101    /// The conversion maintains the hierarchical structure of the data model while
102    /// adapting it to LinkML's schema format requirements.
103    fn from(model: DataModel) -> Self {
104        // Basic configuration
105        let config = model.clone().config.unwrap_or_default();
106        let id = &config.prefix;
107        let prefixes: IndexMap<String, String> =
108            config.prefixes.unwrap_or_default().into_iter().collect();
109        let name = model
110            .name
111            .clone()
112            .unwrap_or("Unnamed Data Model".to_string());
113
114        // Classes - ensure sorting by collecting into a BTreeMap
115        let mut classes: IndexMap<String, ClassDefinition> = IndexMap::from_iter(
116            model
117                .objects
118                .iter()
119                .map(|c| (c.name.clone(), c.clone().into())),
120        );
121
122        // Extract slots and update classes
123        let slots = extract_slots(&model);
124
125        classes.values_mut().for_each(|c| {
126            remove_global_slots(c, &slots);
127        });
128
129        // Determine the order of classes based on dependencies
130        let graph = tree::dependency_graph(&model);
131        let class_order = tree::get_topological_order(&graph);
132
133        // Set the root class
134        if let Some(root) = class_order.first() {
135            if let Some(class) = classes.get_mut(root) {
136                class.tree_root = Some(true);
137            }
138        }
139
140        // Enums
141        let enums: IndexMap<String, EnumDefinition> = model
142            .enums
143            .iter()
144            .map(|e| (e.name.clone(), e.clone().into()))
145            .collect::<IndexMap<String, EnumDefinition>>();
146
147        Self {
148            id: id.clone(),
149            name: name.clone(),
150            title: name,
151            description: None,
152            license: None,
153            see_also: Vec::new(),
154            prefixes: prefixes.clone(),
155            default_prefix: id.clone(),
156            default_range: Some("string".to_string()),
157            imports: vec!["linkml:types".to_string()],
158            classes,
159            slots,
160            enums,
161        }
162    }
163}
164
165/// Extracts global slots (shared attributes) from a data model.
166///
167/// Global slots are attributes that appear in multiple classes with identical definitions.
168/// This function identifies such attributes and extracts them to be defined at the schema level
169/// rather than within individual classes.
170///
171/// The extraction process:
172/// 1. Collects all attributes from all classes
173/// 2. Identifies attributes that appear multiple times with identical definitions
174/// 3. Returns these as global slots
175///
176/// # Arguments
177///
178/// * `model` - The data model to extract slots from
179///
180/// # Returns
181///
182/// A HashMap mapping slot names to their definitions
183fn extract_slots(model: &DataModel) -> IndexMap<String, AttributeDefinition> {
184    // Extract and convert attributes to a map
185    let attributes: IndexMap<String, AttributeDefinition> = model
186        .objects
187        .iter()
188        .flat_map(|o| o.attributes.iter())
189        .map(|a| (a.name.clone(), a.clone().into()))
190        .collect();
191
192    // Filter out non-duplicate attributes (global slots)
193    attributes
194        .clone()
195        .into_iter()
196        .filter(
197            // Check if the attribute is defined in more than one class
198            |(name_a, def_a)| {
199                attributes
200                    .iter()
201                    .filter(|(name_b, def_b)| name_a == *name_b && def_a == *def_b)
202                    .count()
203                    > 1
204            },
205        )
206        .collect()
207}
208
209/// Updates a class definition to use global slots where appropriate.
210///
211/// This function modifies a class definition to reference global slots instead of
212/// duplicating attribute definitions. It performs the following steps:
213/// 1. Identifies which of the class's attributes match global slot definitions
214/// 2. Adds references to those slots in the class's slots list
215/// 3. Removes the matching attributes from the class's local attributes
216///
217/// This process helps reduce redundancy and maintain consistency across the schema.
218///
219/// # Arguments
220///
221/// * `class` - The class definition to update
222/// * `slots` - The map of global slots to reference
223fn remove_global_slots(class: &mut ClassDefinition, slots: &IndexMap<String, AttributeDefinition>) {
224    // Get the class's attributes
225    let class_attrs = class.attributes.clone().unwrap_or_default();
226
227    // Fill slots with globally defined duplicate attributes that exist in this class
228    class.slots = class_attrs
229        .keys()
230        .filter(|name| slots.contains_key(*name))
231        .cloned()
232        .collect();
233
234    // Keep only non-duplicate attributes in the class
235    class.attributes = Some(
236        class_attrs
237            .iter()
238            .filter(|(name, _)| !slots.contains_key(*name))
239            .map(|(name, def)| (name.clone(), def.clone()))
240            .collect(),
241    );
242}
243
244/// Implements conversion from Object to LinkML ClassDefinition.
245impl From<Object> for ClassDefinition {
246    /// Converts an Object into a LinkML ClassDefinition.
247    ///
248    /// This conversion process handles:
249    /// - Converting attributes to LinkML format
250    /// - Setting up slot usage for pattern constraints
251    /// - Preserving documentation and URI terms
252    /// - Maintaining inheritance relationships
253    /// - Managing attribute constraints and validations
254    fn from(obj: Object) -> Self {
255        // Create a map of attributes
256        let attrib = obj
257            .attributes
258            .iter()
259            .map(|a| (a.name.clone(), a.clone().into()))
260            .collect::<IndexMap<String, AttributeDefinition>>();
261
262        // Derive slot usage from attributes
263        let mut slot_usage = IndexMap::new();
264        for attr in obj.attributes.iter() {
265            let pattern_option = attr.options.iter().find(|o| o.key() == "pattern");
266            if let Some(pattern) = pattern_option {
267                slot_usage.insert(
268                    attr.name.clone(),
269                    SlotUsage {
270                        pattern: Some(pattern.value().to_string()),
271                    },
272                );
273            }
274        }
275
276        ClassDefinition {
277            description: Some(obj.docstring),
278            class_uri: obj.term.clone(),
279            slots: Vec::new(),
280            is_a: obj.term,
281            mixins: obj.mixins,
282            tree_root: None,
283            attributes: Some(attrib),
284            slot_usage: if slot_usage.is_empty() {
285                None
286            } else {
287                Some(slot_usage)
288            },
289        }
290    }
291}
292
293/// Implements conversion from Attribute to LinkML AttributeDefinition.
294impl From<Attribute> for AttributeDefinition {
295    /// Converts an Attribute into a LinkML AttributeDefinition.
296    ///
297    /// This conversion preserves:
298    /// - Array/multivalued status
299    /// - Data type (range)
300    /// - Documentation
301    /// - ID status
302    /// - Required status
303    /// - Minimum and maximum values
304    /// - Examples
305    /// - Term mappings
306    fn from(attribute: Attribute) -> Self {
307        let minimum_value = attribute.options.iter().find(|o| o.key() == "minimum");
308        let maximum_value = attribute.options.iter().find(|o| o.key() == "maximum");
309        let example = attribute
310            .options
311            .iter()
312            .filter(|o| o.key() == "example")
313            .map(|o| Example {
314                value: Some(o.value()),
315                description: None,
316            })
317            .collect::<Vec<_>>();
318
319        AttributeDefinition {
320            slot_uri: attribute.term,
321            multivalued: Some(attribute.is_array),
322            range: if attribute.dtypes[0] == "string" {
323                None
324            } else {
325                Some(attribute.dtypes[0].clone())
326            },
327            description: Some(attribute.docstring),
328            identifier: Some(attribute.is_id),
329            required: Some(attribute.required),
330            readonly: None,
331            minimum_value: minimum_value.map(|v| v.value().parse::<i64>().unwrap()),
332            maximum_value: maximum_value.map(|v| v.value().parse::<i64>().unwrap()),
333            recommended: None,
334            examples: example,
335            annotations: None,
336        }
337    }
338}
339
340/// Implements conversion from Enumeration to LinkML EnumDefinition.
341impl From<Enumeration> for EnumDefinition {
342    /// Converts an Enumeration into a LinkML EnumDefinition.
343    ///
344    /// This conversion process handles:
345    /// - Documentation preservation
346    /// - Enumeration values and their meanings
347    /// - Value descriptions
348    /// - Semantic mappings
349    fn from(enum_: Enumeration) -> Self {
350        let mut values = IndexMap::new();
351        for (key, value) in enum_.mappings.iter() {
352            values.insert(
353                key.clone(),
354                PermissibleValue {
355                    text: None,
356                    description: Some(value.clone()),
357                    meaning: Some(value.clone()),
358                },
359            );
360        }
361        EnumDefinition {
362            description: Some(enum_.docstring),
363            permissible_values: values,
364        }
365    }
366}
367
368#[cfg(test)]
369mod tests {
370    use pretty_assertions::assert_eq;
371    use std::{collections::BTreeMap, path::PathBuf};
372
373    use crate::option::AttrOption;
374
375    use super::*;
376
377    #[test]
378    fn serialize_linkml_test() {
379        let model = DataModel::from_markdown(&PathBuf::from("tests/data/model.md")).unwrap();
380        let yaml = serde_yaml::from_str::<LinkML>(&serialize_linkml(model, None).unwrap()).unwrap();
381
382        let expected_yaml = serde_yaml::from_str::<LinkML>(
383            &std::fs::read_to_string("tests/data/expected_linkml.yml").unwrap(),
384        )
385        .unwrap();
386
387        assert_eq!(yaml, expected_yaml);
388    }
389
390    #[test]
391    #[allow(clippy::field_reassign_with_default)]
392    fn test_class_definition_conversion() {
393        let mut obj = Object::default();
394        obj.name = "TestClass".to_string();
395        obj.docstring = "Test description".to_string();
396        obj.term = Some("http://example.org/TestClass".to_string());
397
398        let mut attr = Attribute::default();
399        attr.name = "test_attr".to_string();
400        attr.options = vec![AttrOption::Pattern("^test.*$".to_string())];
401        attr.dtypes = vec!["string".to_string()];
402        obj.attributes = vec![attr];
403
404        let class_def: ClassDefinition = obj.into();
405        assert_eq!(class_def.description, Some("Test description".to_string()));
406        assert_eq!(
407            class_def.class_uri,
408            Some("http://example.org/TestClass".to_string())
409        );
410        assert!(class_def.is_a.is_some());
411        assert!(class_def.slot_usage.is_some());
412    }
413
414    #[test]
415    #[allow(clippy::field_reassign_with_default)]
416    fn test_attribute_definition_conversion() {
417        let mut attr = Attribute::default();
418        attr.is_array = true;
419        attr.dtypes = vec!["integer".to_string()];
420        attr.docstring = "Test attribute".to_string();
421        attr.is_id = true;
422        attr.required = true;
423
424        let attr_def: AttributeDefinition = attr.into();
425        assert_eq!(attr_def.multivalued, Some(true));
426        assert_eq!(attr_def.range, Some("integer".to_string()));
427        assert_eq!(attr_def.description, Some("Test attribute".to_string()));
428        assert_eq!(attr_def.identifier, Some(true));
429        assert_eq!(attr_def.required, Some(true));
430    }
431
432    #[test]
433    #[allow(clippy::field_reassign_with_default)]
434    fn test_enum_definition_conversion() {
435        let mut enum_ = Enumeration::default();
436        enum_.docstring = "Test enum".to_string();
437        enum_.mappings = BTreeMap::from([
438            ("KEY1".to_string(), "value1".to_string()),
439            ("KEY2".to_string(), "value2".to_string()),
440        ]);
441
442        let enum_def: EnumDefinition = enum_.into();
443        assert_eq!(enum_def.description, Some("Test enum".to_string()));
444        assert_eq!(enum_def.permissible_values.len(), 2);
445        assert!(enum_def.permissible_values.contains_key("KEY1"));
446        assert_eq!(
447            enum_def.permissible_values["KEY1"].meaning,
448            Some("value1".to_string())
449        );
450    }
451}