mdmodels_core/linkml/export.rs
1/*
2 * Copyright (c) 2025 Jan Range
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 * THE SOFTWARE.
21 *
22 */
23
24//! Provides functionality to export data models to LinkML format.
25//!
26//! This module contains implementations for converting internal data model representations
27//! to LinkML schema format. It handles the conversion of objects, attributes, and enumerations
28//! to their corresponding LinkML representations.
29//!
30//! The module provides several key components:
31//! - Serialization of DataModel instances to LinkML YAML format
32//! - Conversion implementations between internal model types and LinkML schema types
33//! - Utilities for handling global slots and attribute sharing between classes
34//! - Pattern constraint management through slot usage
35//!
36//! The conversion process preserves:
37//! - Documentation and descriptions
38//! - Data types and ranges
39//! - Cardinality constraints
40//! - Identifier flags
41//! - Required/optional status
42//! - URI/term mappings
43//! - Enumeration values and meanings
44//! - Minimum/maximum value constraints
45//! - Pattern validation rules
46
47use std::{error::Error, path::PathBuf};
48
49use indexmap::IndexMap;
50
51use crate::{
52 attribute::Attribute,
53 object::{Enumeration, Object},
54 prelude::DataModel,
55 tree::{self},
56};
57
58use super::schema::{
59 AttributeDefinition, ClassDefinition, EnumDefinition, Example, LinkML, PermissibleValue,
60 SlotUsage,
61};
62
63/// Serializes a DataModel to LinkML YAML format and writes it to a file.
64///
65/// This function takes a DataModel instance and converts it to LinkML schema format,
66/// then serializes it to YAML. If an output path is provided, the YAML will be written
67/// to that file. The function returns the serialized YAML string regardless of whether
68/// it was written to a file.
69///
70/// # Arguments
71///
72/// * `model` - The DataModel to serialize
73/// * `out` - Optional output path to write the YAML to
74///
75/// # Returns
76///
77/// * `Ok(String)` - The serialized YAML string
78/// * `Err(Box<dyn Error>)` - If serialization or file writing fails
79pub fn serialize_linkml(model: DataModel, out: Option<&PathBuf>) -> Result<String, Box<dyn Error>> {
80 let linkml = LinkML::from(model);
81 let yaml = serde_yaml::to_string(&linkml)?;
82 if let Some(out) = out {
83 std::fs::write(out, &yaml)?;
84 }
85
86 Ok(yaml)
87}
88
89/// Implements conversion from DataModel to LinkML schema format.
90impl From<DataModel> for LinkML {
91 /// Converts a DataModel instance into a LinkML schema.
92 ///
93 /// This conversion process handles:
94 /// - Basic schema configuration including ID, prefixes, and name
95 /// - Class definitions and their attributes
96 /// - Global slots (shared attributes across classes)
97 /// - Enumeration definitions
98 /// - Import declarations
99 /// - Default type configurations
100 ///
101 /// The conversion maintains the hierarchical structure of the data model while
102 /// adapting it to LinkML's schema format requirements.
103 fn from(model: DataModel) -> Self {
104 // Basic configuration
105 let config = model.clone().config.unwrap_or_default();
106 let id = &config.prefix;
107 let prefixes: IndexMap<String, String> =
108 config.prefixes.unwrap_or_default().into_iter().collect();
109 let name = model
110 .name
111 .clone()
112 .unwrap_or("Unnamed Data Model".to_string());
113
114 // Classes - ensure sorting by collecting into a BTreeMap
115 let mut classes: IndexMap<String, ClassDefinition> = IndexMap::from_iter(
116 model
117 .objects
118 .iter()
119 .map(|c| (c.name.clone(), c.clone().into())),
120 );
121
122 // Extract slots and update classes
123 let slots = extract_slots(&model);
124
125 classes.values_mut().for_each(|c| {
126 remove_global_slots(c, &slots);
127 });
128
129 // Determine the order of classes based on dependencies
130 let graph = tree::dependency_graph(&model);
131 let class_order = tree::get_topological_order(&graph);
132
133 // Set the root class
134 if let Some(root) = class_order.first() {
135 if let Some(class) = classes.get_mut(root) {
136 class.tree_root = Some(true);
137 }
138 }
139
140 // Enums
141 let enums: IndexMap<String, EnumDefinition> = model
142 .enums
143 .iter()
144 .map(|e| (e.name.clone(), e.clone().into()))
145 .collect::<IndexMap<String, EnumDefinition>>();
146
147 Self {
148 id: id.clone(),
149 name: name.clone(),
150 title: name,
151 description: None,
152 license: None,
153 see_also: Vec::new(),
154 prefixes: prefixes.clone(),
155 default_prefix: id.clone(),
156 default_range: Some("string".to_string()),
157 imports: vec!["linkml:types".to_string()],
158 classes,
159 slots,
160 enums,
161 }
162 }
163}
164
165/// Extracts global slots (shared attributes) from a data model.
166///
167/// Global slots are attributes that appear in multiple classes with identical definitions.
168/// This function identifies such attributes and extracts them to be defined at the schema level
169/// rather than within individual classes.
170///
171/// The extraction process:
172/// 1. Collects all attributes from all classes
173/// 2. Identifies attributes that appear multiple times with identical definitions
174/// 3. Returns these as global slots
175///
176/// # Arguments
177///
178/// * `model` - The data model to extract slots from
179///
180/// # Returns
181///
182/// A HashMap mapping slot names to their definitions
183fn extract_slots(model: &DataModel) -> IndexMap<String, AttributeDefinition> {
184 // Extract and convert attributes to a map
185 let attributes: IndexMap<String, AttributeDefinition> = model
186 .objects
187 .iter()
188 .flat_map(|o| o.attributes.iter())
189 .map(|a| (a.name.clone(), a.clone().into()))
190 .collect();
191
192 // Filter out non-duplicate attributes (global slots)
193 attributes
194 .clone()
195 .into_iter()
196 .filter(
197 // Check if the attribute is defined in more than one class
198 |(name_a, def_a)| {
199 attributes
200 .iter()
201 .filter(|(name_b, def_b)| name_a == *name_b && def_a == *def_b)
202 .count()
203 > 1
204 },
205 )
206 .collect()
207}
208
209/// Updates a class definition to use global slots where appropriate.
210///
211/// This function modifies a class definition to reference global slots instead of
212/// duplicating attribute definitions. It performs the following steps:
213/// 1. Identifies which of the class's attributes match global slot definitions
214/// 2. Adds references to those slots in the class's slots list
215/// 3. Removes the matching attributes from the class's local attributes
216///
217/// This process helps reduce redundancy and maintain consistency across the schema.
218///
219/// # Arguments
220///
221/// * `class` - The class definition to update
222/// * `slots` - The map of global slots to reference
223fn remove_global_slots(class: &mut ClassDefinition, slots: &IndexMap<String, AttributeDefinition>) {
224 // Get the class's attributes
225 let class_attrs = class.attributes.clone().unwrap_or_default();
226
227 // Fill slots with globally defined duplicate attributes that exist in this class
228 class.slots = class_attrs
229 .keys()
230 .filter(|name| slots.contains_key(*name))
231 .cloned()
232 .collect();
233
234 // Keep only non-duplicate attributes in the class
235 class.attributes = Some(
236 class_attrs
237 .iter()
238 .filter(|(name, _)| !slots.contains_key(*name))
239 .map(|(name, def)| (name.clone(), def.clone()))
240 .collect(),
241 );
242}
243
244/// Implements conversion from Object to LinkML ClassDefinition.
245impl From<Object> for ClassDefinition {
246 /// Converts an Object into a LinkML ClassDefinition.
247 ///
248 /// This conversion process handles:
249 /// - Converting attributes to LinkML format
250 /// - Setting up slot usage for pattern constraints
251 /// - Preserving documentation and URI terms
252 /// - Maintaining inheritance relationships
253 /// - Managing attribute constraints and validations
254 fn from(obj: Object) -> Self {
255 // Create a map of attributes
256 let attrib = obj
257 .attributes
258 .iter()
259 .map(|a| (a.name.clone(), a.clone().into()))
260 .collect::<IndexMap<String, AttributeDefinition>>();
261
262 // Derive slot usage from attributes
263 let mut slot_usage = IndexMap::new();
264 for attr in obj.attributes.iter() {
265 let pattern_option = attr.options.iter().find(|o| o.key() == "pattern");
266 if let Some(pattern) = pattern_option {
267 slot_usage.insert(
268 attr.name.clone(),
269 SlotUsage {
270 pattern: Some(pattern.value().to_string()),
271 },
272 );
273 }
274 }
275
276 ClassDefinition {
277 description: Some(obj.docstring),
278 class_uri: obj.term.clone(),
279 slots: Vec::new(),
280 is_a: obj.term,
281 mixins: obj.mixins,
282 tree_root: None,
283 attributes: Some(attrib),
284 slot_usage: if slot_usage.is_empty() {
285 None
286 } else {
287 Some(slot_usage)
288 },
289 }
290 }
291}
292
293/// Implements conversion from Attribute to LinkML AttributeDefinition.
294impl From<Attribute> for AttributeDefinition {
295 /// Converts an Attribute into a LinkML AttributeDefinition.
296 ///
297 /// This conversion preserves:
298 /// - Array/multivalued status
299 /// - Data type (range)
300 /// - Documentation
301 /// - ID status
302 /// - Required status
303 /// - Minimum and maximum values
304 /// - Examples
305 /// - Term mappings
306 fn from(attribute: Attribute) -> Self {
307 let minimum_value = attribute.options.iter().find(|o| o.key() == "minimum");
308 let maximum_value = attribute.options.iter().find(|o| o.key() == "maximum");
309 let example = attribute
310 .options
311 .iter()
312 .filter(|o| o.key() == "example")
313 .map(|o| Example {
314 value: Some(o.value()),
315 description: None,
316 })
317 .collect::<Vec<_>>();
318
319 AttributeDefinition {
320 slot_uri: attribute.term,
321 multivalued: Some(attribute.is_array),
322 range: if attribute.dtypes[0] == "string" {
323 None
324 } else {
325 Some(attribute.dtypes[0].clone())
326 },
327 description: Some(attribute.docstring),
328 identifier: Some(attribute.is_id),
329 required: Some(attribute.required),
330 readonly: None,
331 minimum_value: minimum_value.map(|v| v.value().parse::<i64>().unwrap()),
332 maximum_value: maximum_value.map(|v| v.value().parse::<i64>().unwrap()),
333 recommended: None,
334 examples: example,
335 annotations: None,
336 }
337 }
338}
339
340/// Implements conversion from Enumeration to LinkML EnumDefinition.
341impl From<Enumeration> for EnumDefinition {
342 /// Converts an Enumeration into a LinkML EnumDefinition.
343 ///
344 /// This conversion process handles:
345 /// - Documentation preservation
346 /// - Enumeration values and their meanings
347 /// - Value descriptions
348 /// - Semantic mappings
349 fn from(enum_: Enumeration) -> Self {
350 let mut values = IndexMap::new();
351 for (key, value) in enum_.mappings.iter() {
352 values.insert(
353 key.clone(),
354 PermissibleValue {
355 text: None,
356 description: Some(value.clone()),
357 meaning: Some(value.clone()),
358 },
359 );
360 }
361 EnumDefinition {
362 description: Some(enum_.docstring),
363 permissible_values: values,
364 }
365 }
366}
367
368#[cfg(test)]
369mod tests {
370 use pretty_assertions::assert_eq;
371 use std::{collections::BTreeMap, path::PathBuf};
372
373 use crate::option::AttrOption;
374
375 use super::*;
376
377 #[test]
378 fn serialize_linkml_test() {
379 let model = DataModel::from_markdown(&PathBuf::from("tests/data/model.md")).unwrap();
380 let yaml = serde_yaml::from_str::<LinkML>(&serialize_linkml(model, None).unwrap()).unwrap();
381
382 let expected_yaml = serde_yaml::from_str::<LinkML>(
383 &std::fs::read_to_string("tests/data/expected_linkml.yml").unwrap(),
384 )
385 .unwrap();
386
387 assert_eq!(yaml, expected_yaml);
388 }
389
390 #[test]
391 #[allow(clippy::field_reassign_with_default)]
392 fn test_class_definition_conversion() {
393 let mut obj = Object::default();
394 obj.name = "TestClass".to_string();
395 obj.docstring = "Test description".to_string();
396 obj.term = Some("http://example.org/TestClass".to_string());
397
398 let mut attr = Attribute::default();
399 attr.name = "test_attr".to_string();
400 attr.options = vec![AttrOption::Pattern("^test.*$".to_string())];
401 attr.dtypes = vec!["string".to_string()];
402 obj.attributes = vec![attr];
403
404 let class_def: ClassDefinition = obj.into();
405 assert_eq!(class_def.description, Some("Test description".to_string()));
406 assert_eq!(
407 class_def.class_uri,
408 Some("http://example.org/TestClass".to_string())
409 );
410 assert!(class_def.is_a.is_some());
411 assert!(class_def.slot_usage.is_some());
412 }
413
414 #[test]
415 #[allow(clippy::field_reassign_with_default)]
416 fn test_attribute_definition_conversion() {
417 let mut attr = Attribute::default();
418 attr.is_array = true;
419 attr.dtypes = vec!["integer".to_string()];
420 attr.docstring = "Test attribute".to_string();
421 attr.is_id = true;
422 attr.required = true;
423
424 let attr_def: AttributeDefinition = attr.into();
425 assert_eq!(attr_def.multivalued, Some(true));
426 assert_eq!(attr_def.range, Some("integer".to_string()));
427 assert_eq!(attr_def.description, Some("Test attribute".to_string()));
428 assert_eq!(attr_def.identifier, Some(true));
429 assert_eq!(attr_def.required, Some(true));
430 }
431
432 #[test]
433 #[allow(clippy::field_reassign_with_default)]
434 fn test_enum_definition_conversion() {
435 let mut enum_ = Enumeration::default();
436 enum_.docstring = "Test enum".to_string();
437 enum_.mappings = BTreeMap::from([
438 ("KEY1".to_string(), "value1".to_string()),
439 ("KEY2".to_string(), "value2".to_string()),
440 ]);
441
442 let enum_def: EnumDefinition = enum_.into();
443 assert_eq!(enum_def.description, Some("Test enum".to_string()));
444 assert_eq!(enum_def.permissible_values.len(), 2);
445 assert!(enum_def.permissible_values.contains_key("KEY1"));
446 assert_eq!(
447 enum_def.permissible_values["KEY1"].meaning,
448 Some("value1".to_string())
449 );
450 }
451}