data_modelling_core/import/
mod.rs

1//! Import functionality
2//!
3//! Provides parsers for importing data models from various formats:
4//! - SQL (CREATE TABLE statements)
5//! - ODCS (Open Data Contract Standard) v3.1.0 YAML format (legacy ODCL formats supported for import)
6//! - JSON Schema
7//! - AVRO
8//! - Protobuf
9//! - Decision (MADR-compliant decision records)
10//! - Knowledge (Knowledge Base articles)
11
12pub mod avro;
13#[cfg(feature = "bpmn")]
14pub mod bpmn;
15pub mod cads;
16pub mod decision;
17#[cfg(feature = "dmn")]
18pub mod dmn;
19pub mod json_schema;
20pub mod knowledge;
21pub mod odcl;
22pub mod odcs;
23pub mod odcs_shared;
24pub mod odps;
25#[cfg(feature = "openapi")]
26pub mod openapi;
27pub mod protobuf;
28pub mod sql;
29
30// anyhow::Result not currently used in this module
31
32/// Result of an import operation.
33///
34/// Contains extracted tables and any errors/warnings from the import process.
35#[derive(Debug, serde::Serialize, serde::Deserialize)]
36#[must_use = "import results should be processed or errors checked"]
37pub struct ImportResult {
38    /// Tables extracted from the import
39    pub tables: Vec<TableData>,
40    /// Tables that require name input (for SQL imports with unnamed tables)
41    pub tables_requiring_name: Vec<TableRequiringName>,
42    /// Parse errors/warnings
43    pub errors: Vec<ImportError>,
44    /// Whether AI suggestions are available
45    pub ai_suggestions: Option<Vec<serde_json::Value>>,
46}
47
48/// Error during import
49#[derive(Debug, thiserror::Error, serde::Serialize, serde::Deserialize)]
50pub enum ImportError {
51    #[error("Parse error: {0}")]
52    ParseError(String),
53    #[error("Validation error: {0}")]
54    ValidationError(String),
55    #[error("IO error: {0}")]
56    IoError(String),
57    #[error("BPMN validation error: {0}")]
58    BPMNValidationError(String),
59    #[error("DMN validation error: {0}")]
60    DMNValidationError(String),
61    #[error("OpenAPI validation error: {0}")]
62    OpenAPIValidationError(String),
63    #[error("BPMN parse error: {0}")]
64    BPMNParseError(String),
65    #[error("DMN parse error: {0}")]
66    DMNParseError(String),
67    #[error("OpenAPI parse error: {0}")]
68    OpenAPIParseError(String),
69}
70
71/// Table data from import - preserves all ODCS v3.1.0 contract-level fields
72#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
73#[serde(rename_all = "camelCase")]
74pub struct TableData {
75    /// Index of this table in the import result
76    pub table_index: usize,
77
78    // === ODCS Contract Identity Fields ===
79    /// Table/Contract UUID from ODCS `id` field (preserved from source file)
80    #[serde(skip_serializing_if = "Option::is_none")]
81    pub id: Option<String>,
82    /// Contract/table name (ODCS: name)
83    pub name: Option<String>,
84    /// ODCS API version (e.g., "v3.1.0")
85    #[serde(skip_serializing_if = "Option::is_none")]
86    pub api_version: Option<String>,
87    /// Contract version (ODCS: version)
88    #[serde(skip_serializing_if = "Option::is_none")]
89    pub version: Option<String>,
90    /// Contract status (ODCS: status) - e.g., "draft", "active", "deprecated"
91    #[serde(skip_serializing_if = "Option::is_none")]
92    pub status: Option<String>,
93    /// Contract kind (ODCS: kind) - typically "DataContract"
94    #[serde(skip_serializing_if = "Option::is_none")]
95    pub kind: Option<String>,
96
97    // === Domain & Organization ===
98    /// Domain name (ODCS: domain)
99    #[serde(skip_serializing_if = "Option::is_none")]
100    pub domain: Option<String>,
101    /// Data product name (ODCS: dataProduct)
102    #[serde(skip_serializing_if = "Option::is_none")]
103    pub data_product: Option<String>,
104    /// Tenant identifier (ODCS: tenant)
105    #[serde(skip_serializing_if = "Option::is_none")]
106    pub tenant: Option<String>,
107
108    // === Description (ODCS description object) ===
109    /// High-level description object containing usage, purpose, limitations
110    #[serde(skip_serializing_if = "Option::is_none")]
111    pub description: Option<serde_json::Value>,
112
113    // === Schema Object Level Fields (ODCS v3.1.0) ===
114    /// Physical name of the schema object (ODCS: schema[].physicalName)
115    #[serde(skip_serializing_if = "Option::is_none")]
116    pub physical_name: Option<String>,
117    /// Physical type of the schema object (ODCS: schema[].physicalType)
118    #[serde(skip_serializing_if = "Option::is_none")]
119    pub physical_type: Option<String>,
120    /// Business name of the schema object (ODCS: schema[].businessName)
121    #[serde(skip_serializing_if = "Option::is_none")]
122    pub business_name: Option<String>,
123    /// Data granularity description (ODCS: schema[].dataGranularityDescription)
124    #[serde(skip_serializing_if = "Option::is_none")]
125    pub data_granularity_description: Option<String>,
126
127    // === Schema/Columns ===
128    /// Column definitions (from ODCS schema.properties)
129    pub columns: Vec<ColumnData>,
130
131    // === Server Configuration ===
132    /// Server definitions (ODCS: servers)
133    #[serde(default, skip_serializing_if = "Vec::is_empty")]
134    pub servers: Vec<serde_json::Value>,
135
136    // === Team & Support ===
137    /// Team information (ODCS: team)
138    #[serde(skip_serializing_if = "Option::is_none")]
139    pub team: Option<serde_json::Value>,
140    /// Support information (ODCS: support)
141    #[serde(skip_serializing_if = "Option::is_none")]
142    pub support: Option<serde_json::Value>,
143
144    // === Roles & Access ===
145    /// Role definitions (ODCS: roles)
146    #[serde(default, skip_serializing_if = "Vec::is_empty")]
147    pub roles: Vec<serde_json::Value>,
148
149    // === SLA & Quality ===
150    /// SLA properties (ODCS: slaProperties)
151    #[serde(default, skip_serializing_if = "Vec::is_empty")]
152    pub sla_properties: Vec<serde_json::Value>,
153    /// Contract-level quality rules
154    #[serde(default, skip_serializing_if = "Vec::is_empty")]
155    pub quality: Vec<std::collections::HashMap<String, serde_json::Value>>,
156
157    // === Pricing ===
158    /// Pricing information (ODCS: price)
159    #[serde(skip_serializing_if = "Option::is_none")]
160    pub price: Option<serde_json::Value>,
161
162    // === Tags & Custom Properties ===
163    /// Contract-level tags (ODCS: tags)
164    #[serde(default, skip_serializing_if = "Vec::is_empty")]
165    pub tags: Vec<String>,
166    /// Custom properties (ODCS: customProperties)
167    #[serde(default, skip_serializing_if = "Vec::is_empty")]
168    pub custom_properties: Vec<serde_json::Value>,
169    /// Authoritative definitions (ODCS: authoritativeDefinitions)
170    #[serde(default, skip_serializing_if = "Vec::is_empty")]
171    pub authoritative_definitions: Vec<serde_json::Value>,
172
173    // === Timestamps ===
174    /// Contract creation timestamp (ODCS: contractCreatedTs)
175    #[serde(skip_serializing_if = "Option::is_none")]
176    pub contract_created_ts: Option<String>,
177
178    // === Legacy/Metadata Storage ===
179    /// Additional ODCS metadata not captured in specific fields (for backward compatibility)
180    #[serde(default, skip_serializing_if = "std::collections::HashMap::is_empty")]
181    pub odcs_metadata: std::collections::HashMap<String, serde_json::Value>,
182}
183
184/// Column data from import - mirrors Column struct exactly to preserve all ODCS v3.1.0 fields
185#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
186#[serde(rename_all = "camelCase")]
187pub struct ColumnData {
188    // === Core Identity Fields ===
189    /// Stable technical identifier (ODCS: id)
190    #[serde(skip_serializing_if = "Option::is_none")]
191    pub id: Option<String>,
192    /// Column name (ODCS: name)
193    pub name: String,
194    /// Business name for the column (ODCS: businessName)
195    #[serde(skip_serializing_if = "Option::is_none")]
196    pub business_name: Option<String>,
197    /// Column description/documentation (ODCS: description)
198    #[serde(skip_serializing_if = "Option::is_none")]
199    pub description: Option<String>,
200
201    // === Type Information ===
202    /// Logical data type (ODCS: logicalType)
203    #[serde(rename = "dataType")]
204    pub data_type: String,
205    /// Physical database type (ODCS: physicalType)
206    #[serde(skip_serializing_if = "Option::is_none")]
207    pub physical_type: Option<String>,
208    /// Physical name in the data source (ODCS: physicalName)
209    #[serde(skip_serializing_if = "Option::is_none")]
210    pub physical_name: Option<String>,
211    /// Additional type options (ODCS: logicalTypeOptions)
212    #[serde(skip_serializing_if = "Option::is_none")]
213    pub logical_type_options: Option<crate::models::LogicalTypeOptions>,
214
215    // === Key Constraints ===
216    /// Whether this column is part of the primary key (ODCS: primaryKey)
217    #[serde(default)]
218    pub primary_key: bool,
219    /// Position in composite primary key, 1-based (ODCS: primaryKeyPosition)
220    #[serde(skip_serializing_if = "Option::is_none")]
221    pub primary_key_position: Option<i32>,
222    /// Whether the column contains unique values (ODCS: unique)
223    #[serde(default)]
224    pub unique: bool,
225    /// Whether the column allows NULL values (inverse of ODCS: required)
226    #[serde(default = "default_true")]
227    pub nullable: bool,
228
229    // === Partitioning & Clustering ===
230    /// Whether the column is used for partitioning (ODCS: partitioned)
231    #[serde(default)]
232    pub partitioned: bool,
233    /// Position in partition key, 1-based (ODCS: partitionKeyPosition)
234    #[serde(skip_serializing_if = "Option::is_none")]
235    pub partition_key_position: Option<i32>,
236    /// Whether the column is used for clustering
237    #[serde(default)]
238    pub clustered: bool,
239
240    // === Data Classification & Security ===
241    /// Data classification level (ODCS: classification)
242    #[serde(skip_serializing_if = "Option::is_none")]
243    pub classification: Option<String>,
244    /// Whether this is a critical data element (ODCS: criticalDataElement)
245    #[serde(default)]
246    pub critical_data_element: bool,
247    /// Name of the encrypted version of this column (ODCS: encryptedName)
248    #[serde(skip_serializing_if = "Option::is_none")]
249    pub encrypted_name: Option<String>,
250
251    // === Transformation Metadata ===
252    /// Source objects used in transformation (ODCS: transformSourceObjects)
253    #[serde(default, skip_serializing_if = "Vec::is_empty")]
254    pub transform_source_objects: Vec<String>,
255    /// Transformation logic/expression (ODCS: transformLogic)
256    #[serde(skip_serializing_if = "Option::is_none")]
257    pub transform_logic: Option<String>,
258    /// Human-readable transformation description (ODCS: transformDescription)
259    #[serde(skip_serializing_if = "Option::is_none")]
260    pub transform_description: Option<String>,
261
262    // === Examples & Documentation ===
263    /// Example values for this column (ODCS: examples)
264    #[serde(default, skip_serializing_if = "Vec::is_empty")]
265    pub examples: Vec<serde_json::Value>,
266    /// Default value for the column
267    #[serde(skip_serializing_if = "Option::is_none")]
268    pub default_value: Option<serde_json::Value>,
269
270    // === Relationships & References ===
271    /// ODCS v3.1.0 relationships (property-level references)
272    #[serde(default, skip_serializing_if = "Vec::is_empty")]
273    pub relationships: Vec<crate::models::PropertyRelationship>,
274    /// Authoritative definitions (ODCS: authoritativeDefinitions)
275    #[serde(default, skip_serializing_if = "Vec::is_empty")]
276    pub authoritative_definitions: Vec<crate::models::AuthoritativeDefinition>,
277
278    // === Quality & Validation ===
279    /// Quality rules and checks (ODCS: quality)
280    #[serde(skip_serializing_if = "Option::is_none")]
281    pub quality: Option<Vec<std::collections::HashMap<String, serde_json::Value>>>,
282    /// Enum values if this column is an enumeration type
283    #[serde(skip_serializing_if = "Option::is_none")]
284    pub enum_values: Option<Vec<String>>,
285
286    // === Tags & Custom Properties ===
287    /// Property-level tags (ODCS: tags)
288    #[serde(default, skip_serializing_if = "Vec::is_empty")]
289    pub tags: Vec<String>,
290    /// Custom properties for format-specific metadata
291    #[serde(default, skip_serializing_if = "std::collections::HashMap::is_empty")]
292    pub custom_properties: std::collections::HashMap<String, serde_json::Value>,
293}
294
295fn default_true() -> bool {
296    true
297}
298
299impl Default for ColumnData {
300    fn default() -> Self {
301        Self {
302            // Core Identity
303            id: None,
304            name: String::new(),
305            business_name: None,
306            description: None,
307            // Type Information
308            data_type: String::new(),
309            physical_type: None,
310            physical_name: None,
311            logical_type_options: None,
312            // Key Constraints
313            primary_key: false,
314            primary_key_position: None,
315            unique: false,
316            nullable: true,
317            // Partitioning & Clustering
318            partitioned: false,
319            partition_key_position: None,
320            clustered: false,
321            // Data Classification & Security
322            classification: None,
323            critical_data_element: false,
324            encrypted_name: None,
325            // Transformation Metadata
326            transform_source_objects: Vec::new(),
327            transform_logic: None,
328            transform_description: None,
329            // Examples & Documentation
330            examples: Vec::new(),
331            default_value: None,
332            // Relationships & References
333            relationships: Vec::new(),
334            authoritative_definitions: Vec::new(),
335            // Quality & Validation
336            quality: None,
337            enum_values: None,
338            // Tags & Custom Properties
339            tags: Vec::new(),
340            custom_properties: std::collections::HashMap::new(),
341        }
342    }
343}
344
345// Re-export for convenience
346pub use avro::AvroImporter;
347pub use cads::CADSImporter;
348pub use decision::DecisionImporter;
349pub use json_schema::JSONSchemaImporter;
350pub use knowledge::KnowledgeImporter;
351pub use odcl::ODCLImporter;
352pub use odcs::ODCSImporter;
353pub use odcs_shared::ParserError;
354pub use odps::ODPSImporter;
355pub use protobuf::ProtobufImporter;
356pub use sql::SQLImporter;
357
358/// Table requiring name input (for SQL imports)
359#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
360pub struct TableRequiringName {
361    pub table_index: usize,
362    pub suggested_name: Option<String>,
363}