data_modelling_sdk/convert/
converter.rs

1//! Universal format converter
2//!
3//! Converts any import format to ODCS v3.1.0 format.
4
5use crate::export::{ExportError, ODCSExporter};
6use crate::import::{
7    AvroImporter, CADSImporter, ImportError, JSONSchemaImporter, ODCSImporter, ODPSImporter,
8    ProtobufImporter, SQLImporter,
9};
10use crate::models::{DataModel, Domain};
11
12/// Error during format conversion
13#[derive(Debug, thiserror::Error)]
14pub enum ConversionError {
15    #[error("Import error: {0}")]
16    ImportError(#[from] ImportError),
17    #[error("Export error: {0}")]
18    ExportError(#[from] ExportError),
19    #[error("Unsupported format: {0}")]
20    UnsupportedFormat(String),
21    #[error("Auto-detection failed: {0}")]
22    AutoDetectionFailed(String),
23    #[error("OpenAPI to ODCS conversion error: {0}")]
24    OpenAPIToODCSError(String),
25    #[error("OpenAPI component not found: {0}")]
26    OpenAPIComponentNotFound(String),
27    #[error("OpenAPI schema invalid: {0}")]
28    OpenAPISchemaInvalid(String),
29    #[error("Nested object conversion failed: {0}")]
30    NestedObjectConversionFailed(String),
31}
32
33/// Convert any import format to ODCS v3.1.0 YAML format.
34///
35/// # Arguments
36///
37/// * `input` - Format-specific content as a string
38/// * `format` - Optional format identifier. If None, attempts auto-detection.
39///   Supported formats: "sql", "json_schema", "avro", "protobuf", "odcl", "odcs", "cads", "odps", "domain"
40///
41/// # Returns
42///
43/// ODCS v3.1.0 YAML string, or ConversionError
44pub fn convert_to_odcs(input: &str, format: Option<&str>) -> Result<String, ConversionError> {
45    // Determine format (auto-detect if not specified)
46    let detected_format = if let Some(fmt) = format {
47        fmt
48    } else {
49        auto_detect_format(input)?
50    };
51
52    // Import using appropriate importer
53    let import_result = match detected_format {
54        "odcs" => {
55            let mut importer = ODCSImporter::new();
56            importer
57                .import(input)
58                .map_err(ConversionError::ImportError)?
59        }
60        "odcl" => {
61            let mut importer = ODCSImporter::new();
62            importer
63                .import(input)
64                .map_err(ConversionError::ImportError)?
65        }
66        "sql" => {
67            let importer = SQLImporter::new("postgresql");
68            importer
69                .parse(input)
70                .map_err(|e| ConversionError::ImportError(ImportError::ParseError(e.to_string())))?
71        }
72        "json_schema" => {
73            let importer = JSONSchemaImporter::new();
74            importer
75                .import(input)
76                .map_err(ConversionError::ImportError)?
77        }
78        "avro" => {
79            let importer = AvroImporter::new();
80            importer
81                .import(input)
82                .map_err(ConversionError::ImportError)?
83        }
84        "protobuf" => {
85            let importer = ProtobufImporter::new();
86            importer
87                .import(input)
88                .map_err(ConversionError::ImportError)?
89        }
90        "cads" => {
91            // CADS assets are compute assets, not data contracts
92            // For CADS → ODCS conversion, we create a minimal ODCS representation
93            // that captures metadata but doesn't represent a true data contract
94            // This is a placeholder - full conversion would require understanding
95            // the data schema produced by the CADS asset
96            let importer = CADSImporter::new();
97            let _asset = importer
98                .import(input)
99                .map_err(ConversionError::ImportError)?;
100
101            // For now, return an error indicating CADS → ODCS conversion
102            // requires additional context about the data schema
103            return Err(ConversionError::UnsupportedFormat(
104                "CADS → ODCS conversion requires data schema information. CADS assets represent compute resources, not data contracts.".to_string()
105            ));
106        }
107        "odps" => {
108            // ODPS Data Products link to ODCS Tables via contractId
109            // For ODPS → ODCS conversion, we extract the referenced ODCS Tables
110            // from the input/output ports and export them
111            let importer = ODPSImporter::new();
112            let product = importer
113                .import(input)
114                .map_err(ConversionError::ImportError)?;
115
116            // Extract contractIds from input and output ports
117            let mut contract_ids = Vec::new();
118            if let Some(input_ports) = &product.input_ports {
119                for port in input_ports {
120                    contract_ids.push(port.contract_id.clone());
121                }
122            }
123            if let Some(output_ports) = &product.output_ports {
124                for port in output_ports {
125                    if let Some(contract_id) = &port.contract_id {
126                        contract_ids.push(contract_id.clone());
127                    }
128                }
129            }
130
131            if contract_ids.is_empty() {
132                return Err(ConversionError::UnsupportedFormat(
133                    "ODPS → ODCS conversion requires contractId references. No contractIds found in input/output ports.".to_string()
134                ));
135            }
136
137            // For now, return an error indicating that ODPS → ODCS conversion
138            // requires the actual ODCS Table definitions to be provided
139            // In a full implementation, we would look up the ODCS Tables by contractId
140            return Err(ConversionError::UnsupportedFormat(format!(
141                "ODPS → ODCS conversion requires ODCS Table definitions for contractIds: {}. Please provide the referenced ODCS Tables.",
142                contract_ids.join(", ")
143            )));
144        }
145        "domain" => {
146            // Domain schema stores references to ODCS Tables (ODCSNode with table_id)
147            // but doesn't contain the full Table definitions
148            // For Domain → ODCS conversion, we need the actual Table definitions
149            let domain: Domain = serde_yaml::from_str(input).map_err(|e| {
150                ConversionError::ImportError(ImportError::ParseError(format!(
151                    "Failed to parse Domain YAML: {}",
152                    e
153                )))
154            })?;
155
156            // Extract ODCS node references
157            let odcs_node_count = domain.odcs_nodes.len();
158            if odcs_node_count == 0 {
159                return Err(ConversionError::UnsupportedFormat(
160                    "Domain → ODCS conversion: Domain contains no ODCS nodes.".to_string(),
161                ));
162            }
163
164            // Domain schema only stores references, not full Table definitions
165            // To convert Domain → ODCS, we need the actual Table definitions
166            // This would require looking up Tables by table_id from a DataModel or similar
167            return Err(ConversionError::UnsupportedFormat(format!(
168                "Domain → ODCS conversion requires Table definitions. Domain contains {} ODCS node references, but full Table definitions must be provided separately (e.g., from a DataModel).",
169                odcs_node_count
170            )));
171        }
172        _ => {
173            return Err(ConversionError::UnsupportedFormat(
174                detected_format.to_string(),
175            ));
176        }
177    };
178
179    // Convert ImportResult to DataModel
180    // For conversion purposes, we create a temporary DataModel
181    // The actual table reconstruction from ColumnData would require additional logic
182    // For now, we'll create a minimal DataModel and export each table individually
183    let model = DataModel::new(
184        "converted_model".to_string(),
185        "".to_string(),
186        "".to_string(),
187    );
188
189    // If we have tables, export them using ODCSExporter
190    // Note: This is a simplified version - full implementation would reconstruct
191    // Table structs from TableData/ColumnData
192    if import_result.tables.is_empty() {
193        return Err(ConversionError::ImportError(ImportError::ParseError(
194            "No tables found in input".to_string(),
195        )));
196    }
197
198    // Export using ODCSExporter
199    // For now, we'll return a basic ODCS structure
200    // TODO: Reconstruct full Table structs from ImportResult for proper export
201    let exports = ODCSExporter::export_model(&model, None, "odcs_v3_1_0");
202
203    // Combine all YAML documents
204    let yaml_docs: Vec<String> = exports.values().cloned().collect();
205    Ok(yaml_docs.join("\n---\n"))
206}
207
208/// Auto-detect format from input content
209fn auto_detect_format(input: &str) -> Result<&str, ConversionError> {
210    // Check for ODCS format
211    if input.contains("apiVersion:") && input.contains("kind: DataContract") {
212        return Ok("odcs");
213    }
214
215    // Check for ODCL format
216    if input.contains("dataContractSpecification:") {
217        return Ok("odcl");
218    }
219
220    // Check for SQL format
221    if input.to_uppercase().contains("CREATE TABLE") {
222        return Ok("sql");
223    }
224
225    // Check for JSON Schema format
226    if input.trim_start().starts_with('{')
227        && (input.contains("\"$schema\"") || input.contains("\"type\""))
228    {
229        return Ok("json_schema");
230    }
231
232    // Check for AVRO format
233    if input.contains("\"type\"") && input.contains("\"fields\"") && input.contains("\"name\"") {
234        return Ok("avro");
235    }
236
237    // Check for Protobuf format
238    if input.contains("syntax") || input.contains("message") || input.contains("service") {
239        return Ok("protobuf");
240    }
241
242    // Check for CADS format
243    if input.contains("apiVersion:")
244        && (input.contains("kind: AIModel")
245            || input.contains("kind: MLPipeline")
246            || input.contains("kind: Application")
247            || input.contains("kind: ETLPipeline")
248            || input.contains("kind: SourceSystem")
249            || input.contains("kind: DestinationSystem"))
250    {
251        return Ok("cads");
252    }
253
254    // Check for ODPS format
255    if input.contains("apiVersion:") && input.contains("kind: DataProduct") {
256        return Ok("odps");
257    }
258
259    // Check for Domain format (Business Domain schema)
260    if input.contains("systems:")
261        && (input.contains("cads_nodes:") || input.contains("odcs_nodes:"))
262    {
263        return Ok("domain");
264    }
265
266    Err(ConversionError::AutoDetectionFailed(
267        "Could not auto-detect format. Please specify format explicitly.".to_string(),
268    ))
269}