data_modelling_core/convert/
openapi_to_odcs.rs

1//! OpenAPI to ODCS converter
2//!
3//! Provides functionality to convert OpenAPI schema components to ODCS table definitions.
4
5use crate::convert::ConversionError;
6use crate::models::{Column, Table};
7use anyhow::Result;
8use serde::{Deserialize, Serialize};
9use serde_json::Value as JsonValue;
10use std::collections::HashMap;
11
12/// Strategy for handling nested objects in OpenAPI schemas
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
14#[serde(rename_all = "PascalCase")]
15pub enum NestedObjectStrategy {
16    /// Create separate tables for nested objects
17    SeparateTables,
18    /// Flatten nested objects into parent table
19    Flatten,
20    /// Hybrid: flatten simple objects, separate complex ones
21    Hybrid,
22}
23
24/// Type mapping rule for OpenAPI to ODCS conversion
25#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
26pub struct TypeMappingRule {
27    /// OpenAPI type
28    pub openapi_type: String,
29    /// OpenAPI format (if any)
30    #[serde(skip_serializing_if = "Option::is_none")]
31    pub openapi_format: Option<String>,
32    /// ODCS type
33    pub odcs_type: String,
34    /// Quality rules to apply
35    #[serde(default)]
36    pub quality_rules: Vec<serde_json::Value>,
37    /// Field name pattern (if any)
38    #[serde(skip_serializing_if = "Option::is_none")]
39    pub field_name: Option<String>,
40}
41
42/// Conversion report for OpenAPI to ODCS conversion
43#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
44pub struct ConversionReport {
45    /// Component name in OpenAPI
46    pub component_name: String,
47    /// Generated table name in ODCS
48    pub table_name: String,
49    /// Field mappings
50    #[serde(default)]
51    pub mappings: Vec<TypeMappingRule>,
52    /// Warnings during conversion
53    #[serde(default)]
54    pub warnings: Vec<String>,
55    /// Fields that were skipped
56    #[serde(default)]
57    pub skipped_fields: Vec<String>,
58    /// Estimated structure (for nested objects)
59    #[serde(skip_serializing_if = "Option::is_none")]
60    pub estimated_structure: Option<HashMap<String, serde_json::Value>>,
61}
62
63/// OpenAPI to ODCS Converter
64///
65/// Converts OpenAPI schema components to ODCS table definitions.
66#[derive(Debug)]
67pub struct OpenAPIToODCSConverter {
68    /// Strategy for handling nested objects
69    pub nested_object_strategy: NestedObjectStrategy,
70    /// Whether to flatten simple nested objects
71    pub flatten_simple_objects: bool,
72}
73
74impl Default for OpenAPIToODCSConverter {
75    fn default() -> Self {
76        Self {
77            nested_object_strategy: NestedObjectStrategy::Hybrid,
78            flatten_simple_objects: true,
79        }
80    }
81}
82
83impl OpenAPIToODCSConverter {
84    /// Create a new OpenAPI to ODCS converter with default settings
85    pub fn new() -> Self {
86        Self::default()
87    }
88
89    /// Create a new OpenAPI to ODCS converter with custom strategy
90    pub fn with_strategy(nested_object_strategy: NestedObjectStrategy) -> Self {
91        OpenAPIToODCSConverter {
92            nested_object_strategy,
93            flatten_simple_objects: matches!(
94                nested_object_strategy,
95                NestedObjectStrategy::Flatten | NestedObjectStrategy::Hybrid
96            ),
97        }
98    }
99
100    /// Convert an OpenAPI component to an ODCS table
101    ///
102    /// # Arguments
103    ///
104    /// * `openapi_content` - The OpenAPI YAML or JSON content.
105    /// * `component_name` - The name of the OpenAPI component to convert.
106    /// * `table_name` - Optional desired ODCS table name (uses component_name if None).
107    ///
108    /// # Returns
109    ///
110    /// A `Result` containing the converted ODCS Table.
111    pub fn convert_component(
112        &self,
113        openapi_content: &str,
114        component_name: &str,
115        table_name: Option<&str>,
116    ) -> Result<Table, ConversionError> {
117        // Parse OpenAPI content
118        let openapi_value: JsonValue = if openapi_content.trim_start().starts_with('{') {
119            serde_json::from_str(openapi_content).map_err(|e| {
120                ConversionError::OpenAPISchemaInvalid(format!("Invalid JSON: {}", e))
121            })?
122        } else {
123            serde_yaml::from_str(openapi_content).map_err(|e| {
124                ConversionError::OpenAPISchemaInvalid(format!("Invalid YAML: {}", e))
125            })?
126        };
127
128        // Extract components section
129        let components = openapi_value
130            .get("components")
131            .and_then(|v| v.get("schemas"))
132            .and_then(|v| v.as_object())
133            .ok_or_else(|| {
134                ConversionError::OpenAPIComponentNotFound(
135                    "components.schemas section not found".to_string(),
136                )
137            })?;
138
139        // Get the component schema
140        let component_schema = components
141            .get(component_name)
142            .ok_or_else(|| {
143                ConversionError::OpenAPIComponentNotFound(format!(
144                    "Component '{}' not found in schemas",
145                    component_name
146                ))
147            })?
148            .as_object()
149            .ok_or_else(|| {
150                ConversionError::OpenAPISchemaInvalid(format!(
151                    "Component '{}' is not an object",
152                    component_name
153                ))
154            })?;
155
156        // Determine table name
157        let target_table_name = table_name.unwrap_or(component_name);
158
159        // Convert schema to table
160        self.convert_schema_to_table(component_schema, target_table_name, component_name)
161    }
162
163    /// Convert an OpenAPI schema object to an ODCS table
164    fn convert_schema_to_table(
165        &self,
166        schema: &serde_json::Map<String, JsonValue>,
167        table_name: &str,
168        _component_name: &str,
169    ) -> Result<Table, ConversionError> {
170        let mut columns = Vec::new();
171        let mut warnings = Vec::new();
172
173        // Get properties
174        if let Some(properties) = schema.get("properties").and_then(|v| v.as_object()) {
175            // Get required fields
176            let required_fields: Vec<&str> = schema
177                .get("required")
178                .and_then(|v| v.as_array())
179                .map(|arr| arr.iter().filter_map(|v| v.as_str()).collect())
180                .unwrap_or_default();
181
182            // Convert each property to a column
183            for (field_name, field_schema) in properties {
184                match self.convert_field_to_column(field_name, field_schema, &required_fields) {
185                    Ok(column) => columns.push(column),
186                    Err(e) => {
187                        warnings.push(format!("Failed to convert field '{}': {}", field_name, e));
188                    }
189                }
190            }
191        } else {
192            return Err(ConversionError::OpenAPISchemaInvalid(
193                "Schema has no properties".to_string(),
194            ));
195        }
196
197        // Create table
198        let table = Table::new(table_name.to_string(), columns);
199        Ok(table)
200    }
201
202    /// Convert an OpenAPI field schema to an ODCS column
203    fn convert_field_to_column(
204        &self,
205        field_name: &str,
206        field_schema: &JsonValue,
207        required_fields: &[&str],
208    ) -> Result<Column, ConversionError> {
209        let schema_obj = field_schema.as_object().ok_or_else(|| {
210            ConversionError::OpenAPISchemaInvalid("Field schema is not an object".to_string())
211        })?;
212
213        // Determine if field is required
214        let nullable = !required_fields.contains(&field_name);
215
216        // Get type
217        let openapi_type = schema_obj
218            .get("type")
219            .and_then(|v| v.as_str())
220            .ok_or_else(|| {
221                ConversionError::OpenAPISchemaInvalid(format!("Field '{}' has no type", field_name))
222            })?;
223
224        // Get format
225        let format = schema_obj.get("format").and_then(|v| v.as_str());
226
227        // Map OpenAPI type to ODCS type
228        let odcs_type = self.map_openapi_type_to_odcs(openapi_type, format)?;
229
230        // Create column
231        let mut column = Column::new(field_name.to_string(), odcs_type.clone());
232        column.nullable = nullable;
233
234        // Get description
235        if let Some(desc) = schema_obj.get("description").and_then(|v| v.as_str()) {
236            column.description = desc.to_string();
237        }
238
239        // Add quality rules for constraints
240        self.add_constraints_to_column(&mut column, schema_obj, openapi_type, format)?;
241
242        Ok(column)
243    }
244
245    /// Map OpenAPI type to ODCS type
246    fn map_openapi_type_to_odcs(
247        &self,
248        openapi_type: &str,
249        format: Option<&str>,
250    ) -> Result<String, ConversionError> {
251        match (openapi_type, format) {
252            ("string", Some("date")) => Ok("date".to_string()),
253            ("string", Some("date-time")) => Ok("timestamp".to_string()),
254            ("string", Some("email")) => Ok("text".to_string()),
255            ("string", Some("uri")) => Ok("text".to_string()),
256            ("string", Some("uuid")) => Ok("text".to_string()),
257            ("string", Some("password")) => Ok("text".to_string()),
258            ("string", _) => Ok("text".to_string()),
259            ("integer", _) => Ok("long".to_string()),
260            ("number", _) => Ok("double".to_string()),
261            ("boolean", _) => Ok("boolean".to_string()),
262            ("array", _) => Err(ConversionError::NestedObjectConversionFailed(
263                "Arrays require special handling - not yet implemented".to_string(),
264            )),
265            ("object", _) => Err(ConversionError::NestedObjectConversionFailed(
266                "Nested objects require special handling - not yet implemented".to_string(),
267            )),
268            _ => Err(ConversionError::UnsupportedFormat(format!(
269                "Unsupported OpenAPI type: {}",
270                openapi_type
271            ))),
272        }
273    }
274
275    /// Add constraints from OpenAPI schema to column quality rules
276    fn add_constraints_to_column(
277        &self,
278        column: &mut Column,
279        schema_obj: &serde_json::Map<String, JsonValue>,
280        openapi_type: &str,
281        format: Option<&str>,
282    ) -> Result<(), ConversionError> {
283        let mut quality_rules = Vec::new();
284
285        // Add format constraint
286        if let Some(fmt) = format {
287            let mut rule = HashMap::new();
288            rule.insert("type".to_string(), JsonValue::String("text".to_string()));
289            rule.insert(
290                "description".to_string(),
291                JsonValue::String(format!("Format: {}", fmt)),
292            );
293            rule.insert("format".to_string(), JsonValue::String(fmt.to_string()));
294            quality_rules.push(rule);
295        }
296
297        // Add minLength/maxLength for strings
298        if openapi_type == "string" {
299            if let Some(min_len) = schema_obj.get("minLength").and_then(|v| v.as_u64()) {
300                let mut rule = HashMap::new();
301                rule.insert("type".to_string(), JsonValue::String("text".to_string()));
302                rule.insert("minLength".to_string(), JsonValue::Number(min_len.into()));
303                quality_rules.push(rule);
304            }
305            if let Some(max_len) = schema_obj.get("maxLength").and_then(|v| v.as_u64()) {
306                let mut rule = HashMap::new();
307                rule.insert("type".to_string(), JsonValue::String("text".to_string()));
308                rule.insert("maxLength".to_string(), JsonValue::Number(max_len.into()));
309                quality_rules.push(rule);
310            }
311            if let Some(pattern) = schema_obj.get("pattern").and_then(|v| v.as_str()) {
312                let mut rule = HashMap::new();
313                rule.insert("type".to_string(), JsonValue::String("text".to_string()));
314                rule.insert(
315                    "pattern".to_string(),
316                    JsonValue::String(pattern.to_string()),
317                );
318                quality_rules.push(rule);
319            }
320        }
321
322        // Add minimum/maximum for numbers
323        if openapi_type == "integer" || openapi_type == "number" {
324            if let Some(min_val) = schema_obj.get("minimum")
325                && let Some(min_num) = min_val.as_number()
326            {
327                let mut rule = HashMap::new();
328                rule.insert("type".to_string(), JsonValue::String("sql".to_string()));
329                rule.insert(
330                    "mustBeGreaterThan".to_string(),
331                    JsonValue::Number(min_num.clone()),
332                );
333                quality_rules.push(rule);
334            }
335            if let Some(max_val) = schema_obj.get("maximum")
336                && let Some(max_num) = max_val.as_number()
337            {
338                let mut rule = HashMap::new();
339                rule.insert("type".to_string(), JsonValue::String("sql".to_string()));
340                rule.insert(
341                    "mustBeLessThan".to_string(),
342                    JsonValue::Number(max_num.clone()),
343                );
344                quality_rules.push(rule);
345            }
346        }
347
348        // Add enum values
349        if let Some(enum_values) = schema_obj.get("enum").and_then(|v| v.as_array()) {
350            let enum_strings: Vec<String> = enum_values
351                .iter()
352                .filter_map(|v| v.as_str().map(|s| s.to_string()))
353                .collect();
354            if !enum_strings.is_empty() {
355                column.enum_values = enum_strings;
356            }
357        }
358
359        column.quality = quality_rules;
360        Ok(())
361    }
362
363    /// Convert multiple OpenAPI components to ODCS tables
364    ///
365    /// # Arguments
366    ///
367    /// * `openapi_content` - The OpenAPI YAML or JSON content.
368    /// * `component_names` - Names of components to convert.
369    ///
370    /// # Returns
371    ///
372    /// A `Result` containing a vector of converted ODCS Tables.
373    pub fn convert_components(
374        &self,
375        openapi_content: &str,
376        component_names: &[&str],
377    ) -> Result<Vec<Table>, ConversionError> {
378        let mut tables = Vec::new();
379        for component_name in component_names {
380            match self.convert_component(openapi_content, component_name, None) {
381                Ok(table) => tables.push(table),
382                Err(e) => {
383                    return Err(ConversionError::OpenAPIToODCSError(format!(
384                        "Failed to convert component '{}': {}",
385                        component_name, e
386                    )));
387                }
388            }
389        }
390        Ok(tables)
391    }
392
393    /// Analyze an OpenAPI component for conversion feasibility
394    ///
395    /// # Arguments
396    ///
397    /// * `openapi_content` - The OpenAPI YAML or JSON content.
398    /// * `component_name` - The name of the OpenAPI component to analyze.
399    ///
400    /// # Returns
401    ///
402    /// A `Result` containing a conversion report with analysis.
403    pub fn analyze_conversion(
404        &self,
405        openapi_content: &str,
406        component_name: &str,
407    ) -> Result<ConversionReport, ConversionError> {
408        // Parse OpenAPI content
409        let openapi_value: JsonValue = if openapi_content.trim_start().starts_with('{') {
410            serde_json::from_str(openapi_content).map_err(|e| {
411                ConversionError::OpenAPISchemaInvalid(format!("Invalid JSON: {}", e))
412            })?
413        } else {
414            serde_yaml::from_str(openapi_content).map_err(|e| {
415                ConversionError::OpenAPISchemaInvalid(format!("Invalid YAML: {}", e))
416            })?
417        };
418
419        // Extract components section
420        let components = openapi_value
421            .get("components")
422            .and_then(|v| v.get("schemas"))
423            .and_then(|v| v.as_object())
424            .ok_or_else(|| {
425                ConversionError::OpenAPIComponentNotFound(
426                    "components.schemas section not found".to_string(),
427                )
428            })?;
429
430        // Get the component schema
431        let component_schema = components
432            .get(component_name)
433            .ok_or_else(|| {
434                ConversionError::OpenAPIComponentNotFound(format!(
435                    "Component '{}' not found in schemas",
436                    component_name
437                ))
438            })?
439            .as_object()
440            .ok_or_else(|| {
441                ConversionError::OpenAPISchemaInvalid(format!(
442                    "Component '{}' is not an object",
443                    component_name
444                ))
445            })?;
446
447        // Analyze the schema
448        let mut mappings = Vec::new();
449        let mut warnings = Vec::new();
450        let mut skipped_fields = Vec::new();
451
452        if let Some(properties) = component_schema
453            .get("properties")
454            .and_then(|v| v.as_object())
455        {
456            for (field_name, field_schema) in properties {
457                if let Some(schema_obj) = field_schema.as_object() {
458                    let openapi_type = schema_obj
459                        .get("type")
460                        .and_then(|v| v.as_str())
461                        .unwrap_or("unknown");
462                    let format = schema_obj.get("format").and_then(|v| v.as_str());
463
464                    match self.map_openapi_type_to_odcs(openapi_type, format) {
465                        Ok(odcs_type) => {
466                            mappings.push(TypeMappingRule {
467                                openapi_type: openapi_type.to_string(),
468                                openapi_format: format.map(|s| s.to_string()),
469                                odcs_type: odcs_type.clone(),
470                                quality_rules: Vec::new(), // Simplified for analysis
471                                field_name: Some(field_name.clone()),
472                            });
473                        }
474                        Err(e) => {
475                            warnings.push(format!("Field '{}': {}", field_name, e));
476                            skipped_fields.push(field_name.clone());
477                        }
478                    }
479                }
480            }
481        }
482
483        Ok(ConversionReport {
484            component_name: component_name.to_string(),
485            table_name: component_name.to_string(),
486            mappings,
487            warnings,
488            skipped_fields,
489            estimated_structure: None,
490        })
491    }
492}