Skip to main content

data_modelling_core/import/
mod.rs

1//! Import functionality
2//!
3//! Provides parsers for importing data models from various formats:
4//! - SQL (CREATE TABLE statements)
5//! - ODCS (Open Data Contract Standard) v3.1.0 YAML format (legacy ODCL formats supported for import)
6//! - JSON Schema
7//! - AVRO
8//! - Protobuf
9//! - Decision (MADR-compliant decision records)
10//! - Knowledge (Knowledge Base articles)
11
12pub mod avro;
13#[cfg(feature = "bpmn")]
14pub mod bpmn;
15pub mod cads;
16pub mod dbmv;
17pub mod decision;
18#[cfg(feature = "dmn")]
19pub mod dmn;
20pub mod json_schema;
21pub mod knowledge;
22pub mod odcl;
23pub mod odcs;
24pub mod odcs_shared;
25pub mod odps;
26#[cfg(feature = "openapi")]
27pub mod openapi;
28pub mod protobuf;
29pub mod sketch;
30pub mod sql;
31
32// anyhow::Result not currently used in this module
33
34/// Result of an import operation.
35///
36/// Contains extracted tables and any errors/warnings from the import process.
37#[derive(Debug, serde::Serialize, serde::Deserialize)]
38#[must_use = "import results should be processed or errors checked"]
39pub struct ImportResult {
40    /// Tables extracted from the import
41    pub tables: Vec<TableData>,
42    /// Tables that require name input (for SQL imports with unnamed tables)
43    pub tables_requiring_name: Vec<TableRequiringName>,
44    /// Parse errors/warnings
45    pub errors: Vec<ImportError>,
46    /// Whether AI suggestions are available
47    pub ai_suggestions: Option<Vec<serde_json::Value>>,
48}
49
50/// Error during import
51#[derive(Debug, thiserror::Error, serde::Serialize, serde::Deserialize)]
52pub enum ImportError {
53    #[error("Parse error: {0}")]
54    ParseError(String),
55    #[error("Validation error: {0}")]
56    ValidationError(String),
57    #[error("IO error: {0}")]
58    IoError(String),
59    #[error("BPMN validation error: {0}")]
60    BPMNValidationError(String),
61    #[error("DMN validation error: {0}")]
62    DMNValidationError(String),
63    #[error("OpenAPI validation error: {0}")]
64    OpenAPIValidationError(String),
65    #[error("BPMN parse error: {0}")]
66    BPMNParseError(String),
67    #[error("DMN parse error: {0}")]
68    DMNParseError(String),
69    #[error("OpenAPI parse error: {0}")]
70    OpenAPIParseError(String),
71}
72
73/// Table data from import - preserves all ODCS v3.1.0 contract-level fields
74#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
75#[serde(rename_all = "camelCase")]
76pub struct TableData {
77    /// Index of this table in the import result
78    pub table_index: usize,
79
80    // === ODCS Contract Identity Fields ===
81    /// Table/Contract UUID from ODCS `id` field (preserved from source file)
82    #[serde(skip_serializing_if = "Option::is_none")]
83    pub id: Option<String>,
84    /// Contract/table name (ODCS: name)
85    pub name: Option<String>,
86    /// ODCS API version (e.g., "v3.1.0")
87    #[serde(skip_serializing_if = "Option::is_none")]
88    pub api_version: Option<String>,
89    /// Contract version (ODCS: version)
90    #[serde(skip_serializing_if = "Option::is_none")]
91    pub version: Option<String>,
92    /// Contract status (ODCS: status) - e.g., "draft", "active", "deprecated"
93    #[serde(skip_serializing_if = "Option::is_none")]
94    pub status: Option<String>,
95    /// Contract kind (ODCS: kind) - typically "DataContract"
96    #[serde(skip_serializing_if = "Option::is_none")]
97    pub kind: Option<String>,
98
99    // === Domain & Organization ===
100    /// Domain name (ODCS: domain)
101    #[serde(skip_serializing_if = "Option::is_none")]
102    pub domain: Option<String>,
103    /// Data product name (ODCS: dataProduct)
104    #[serde(skip_serializing_if = "Option::is_none")]
105    pub data_product: Option<String>,
106    /// Tenant identifier (ODCS: tenant)
107    #[serde(skip_serializing_if = "Option::is_none")]
108    pub tenant: Option<String>,
109
110    // === Description (ODCS description object) ===
111    /// High-level description object containing usage, purpose, limitations
112    #[serde(skip_serializing_if = "Option::is_none")]
113    pub description: Option<serde_json::Value>,
114
115    // === Schema Object Level Fields (ODCS v3.1.0) ===
116    /// Physical name of the schema object (ODCS: schema[].physicalName)
117    #[serde(skip_serializing_if = "Option::is_none")]
118    pub physical_name: Option<String>,
119    /// Physical type of the schema object (ODCS: schema[].physicalType)
120    #[serde(skip_serializing_if = "Option::is_none")]
121    pub physical_type: Option<String>,
122    /// Business name of the schema object (ODCS: schema[].businessName)
123    #[serde(skip_serializing_if = "Option::is_none")]
124    pub business_name: Option<String>,
125    /// Data granularity description (ODCS: schema[].dataGranularityDescription)
126    #[serde(skip_serializing_if = "Option::is_none")]
127    pub data_granularity_description: Option<String>,
128
129    // === Schema/Columns ===
130    /// Column definitions (from ODCS schema.properties)
131    pub columns: Vec<ColumnData>,
132
133    // === Server Configuration ===
134    /// Server definitions (ODCS: servers)
135    #[serde(default, skip_serializing_if = "Vec::is_empty")]
136    pub servers: Vec<serde_json::Value>,
137
138    // === Team & Support ===
139    /// Team information (ODCS: team)
140    #[serde(skip_serializing_if = "Option::is_none")]
141    pub team: Option<serde_json::Value>,
142    /// Support information (ODCS: support)
143    #[serde(skip_serializing_if = "Option::is_none")]
144    pub support: Option<serde_json::Value>,
145
146    // === Roles & Access ===
147    /// Role definitions (ODCS: roles)
148    #[serde(default, skip_serializing_if = "Vec::is_empty")]
149    pub roles: Vec<serde_json::Value>,
150
151    // === SLA & Quality ===
152    /// SLA properties (ODCS: slaProperties)
153    #[serde(default, skip_serializing_if = "Vec::is_empty")]
154    pub sla_properties: Vec<serde_json::Value>,
155    /// Contract-level quality rules
156    #[serde(default, skip_serializing_if = "Vec::is_empty")]
157    pub quality: Vec<std::collections::HashMap<String, serde_json::Value>>,
158
159    // === Pricing ===
160    /// Pricing information (ODCS: price)
161    #[serde(skip_serializing_if = "Option::is_none")]
162    pub price: Option<serde_json::Value>,
163
164    // === Tags & Custom Properties ===
165    /// Contract-level tags (ODCS: tags)
166    #[serde(default, skip_serializing_if = "Vec::is_empty")]
167    pub tags: Vec<String>,
168    /// Custom properties (ODCS: customProperties)
169    #[serde(default, skip_serializing_if = "Vec::is_empty")]
170    pub custom_properties: Vec<serde_json::Value>,
171    /// Authoritative definitions (ODCS: authoritativeDefinitions)
172    #[serde(default, skip_serializing_if = "Vec::is_empty")]
173    pub authoritative_definitions: Vec<serde_json::Value>,
174
175    // === Timestamps ===
176    /// Contract creation timestamp (ODCS: contractCreatedTs)
177    #[serde(skip_serializing_if = "Option::is_none")]
178    pub contract_created_ts: Option<String>,
179
180    // === Legacy/Metadata Storage ===
181    /// Additional ODCS metadata not captured in specific fields (for backward compatibility)
182    #[serde(default, skip_serializing_if = "std::collections::HashMap::is_empty")]
183    pub odcs_metadata: std::collections::HashMap<String, serde_json::Value>,
184}
185
186/// Column data from import - mirrors Column struct exactly to preserve all ODCS v3.1.0 fields
187#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
188#[serde(rename_all = "camelCase")]
189pub struct ColumnData {
190    // === Core Identity Fields ===
191    /// Stable technical identifier (ODCS: id)
192    #[serde(skip_serializing_if = "Option::is_none")]
193    pub id: Option<String>,
194    /// Column name (ODCS: name)
195    pub name: String,
196    /// Business name for the column (ODCS: businessName)
197    #[serde(skip_serializing_if = "Option::is_none")]
198    pub business_name: Option<String>,
199    /// Column description/documentation (ODCS: description)
200    #[serde(skip_serializing_if = "Option::is_none")]
201    pub description: Option<String>,
202
203    // === Type Information ===
204    /// Logical data type (ODCS: logicalType)
205    #[serde(rename = "dataType")]
206    pub data_type: String,
207    /// Physical database type (ODCS: physicalType)
208    #[serde(skip_serializing_if = "Option::is_none")]
209    pub physical_type: Option<String>,
210    /// Physical name in the data source (ODCS: physicalName)
211    #[serde(skip_serializing_if = "Option::is_none")]
212    pub physical_name: Option<String>,
213    /// Additional type options (ODCS: logicalTypeOptions)
214    #[serde(skip_serializing_if = "Option::is_none")]
215    pub logical_type_options: Option<crate::models::LogicalTypeOptions>,
216
217    // === Key Constraints ===
218    /// Whether this column is part of the primary key (ODCS: primaryKey)
219    #[serde(default)]
220    pub primary_key: bool,
221    /// Position in composite primary key, 1-based (ODCS: primaryKeyPosition)
222    #[serde(skip_serializing_if = "Option::is_none")]
223    pub primary_key_position: Option<i32>,
224    /// Whether the column contains unique values (ODCS: unique)
225    #[serde(default)]
226    pub unique: bool,
227    /// Whether the column allows NULL values (inverse of ODCS: required)
228    #[serde(default = "default_true")]
229    pub nullable: bool,
230
231    // === Partitioning & Clustering ===
232    /// Whether the column is used for partitioning (ODCS: partitioned)
233    #[serde(default)]
234    pub partitioned: bool,
235    /// Position in partition key, 1-based (ODCS: partitionKeyPosition)
236    #[serde(skip_serializing_if = "Option::is_none")]
237    pub partition_key_position: Option<i32>,
238    /// Whether the column is used for clustering
239    #[serde(default)]
240    pub clustered: bool,
241
242    // === Data Classification & Security ===
243    /// Data classification level (ODCS: classification)
244    #[serde(skip_serializing_if = "Option::is_none")]
245    pub classification: Option<String>,
246    /// Whether this is a critical data element (ODCS: criticalDataElement)
247    #[serde(default)]
248    pub critical_data_element: bool,
249    /// Name of the encrypted version of this column (ODCS: encryptedName)
250    #[serde(skip_serializing_if = "Option::is_none")]
251    pub encrypted_name: Option<String>,
252
253    // === Transformation Metadata ===
254    /// Source objects used in transformation (ODCS: transformSourceObjects)
255    #[serde(default, skip_serializing_if = "Vec::is_empty")]
256    pub transform_source_objects: Vec<String>,
257    /// Transformation logic/expression (ODCS: transformLogic)
258    #[serde(skip_serializing_if = "Option::is_none")]
259    pub transform_logic: Option<String>,
260    /// Human-readable transformation description (ODCS: transformDescription)
261    #[serde(skip_serializing_if = "Option::is_none")]
262    pub transform_description: Option<String>,
263
264    // === Examples & Documentation ===
265    /// Example values for this column (ODCS: examples)
266    #[serde(default, skip_serializing_if = "Vec::is_empty")]
267    pub examples: Vec<serde_json::Value>,
268    /// Default value for the column
269    #[serde(skip_serializing_if = "Option::is_none")]
270    pub default_value: Option<serde_json::Value>,
271
272    // === Relationships & References ===
273    /// ODCS v3.1.0 relationships (property-level references)
274    #[serde(default, skip_serializing_if = "Vec::is_empty")]
275    pub relationships: Vec<crate::models::PropertyRelationship>,
276    /// Authoritative definitions (ODCS: authoritativeDefinitions)
277    #[serde(default, skip_serializing_if = "Vec::is_empty")]
278    pub authoritative_definitions: Vec<crate::models::AuthoritativeDefinition>,
279
280    // === Quality & Validation ===
281    /// Quality rules and checks (ODCS: quality)
282    #[serde(skip_serializing_if = "Option::is_none")]
283    pub quality: Option<Vec<std::collections::HashMap<String, serde_json::Value>>>,
284    /// Enum values if this column is an enumeration type
285    #[serde(skip_serializing_if = "Option::is_none")]
286    pub enum_values: Option<Vec<String>>,
287
288    // === Tags & Custom Properties ===
289    /// Property-level tags (ODCS: tags)
290    #[serde(default, skip_serializing_if = "Vec::is_empty")]
291    pub tags: Vec<String>,
292    /// Custom properties for format-specific metadata
293    #[serde(default, skip_serializing_if = "std::collections::HashMap::is_empty")]
294    pub custom_properties: std::collections::HashMap<String, serde_json::Value>,
295}
296
297fn default_true() -> bool {
298    true
299}
300
301impl Default for ColumnData {
302    fn default() -> Self {
303        Self {
304            // Core Identity
305            id: None,
306            name: String::new(),
307            business_name: None,
308            description: None,
309            // Type Information
310            data_type: String::new(),
311            physical_type: None,
312            physical_name: None,
313            logical_type_options: None,
314            // Key Constraints
315            primary_key: false,
316            primary_key_position: None,
317            unique: false,
318            nullable: true,
319            // Partitioning & Clustering
320            partitioned: false,
321            partition_key_position: None,
322            clustered: false,
323            // Data Classification & Security
324            classification: None,
325            critical_data_element: false,
326            encrypted_name: None,
327            // Transformation Metadata
328            transform_source_objects: Vec::new(),
329            transform_logic: None,
330            transform_description: None,
331            // Examples & Documentation
332            examples: Vec::new(),
333            default_value: None,
334            // Relationships & References
335            relationships: Vec::new(),
336            authoritative_definitions: Vec::new(),
337            // Quality & Validation
338            quality: None,
339            enum_values: None,
340            // Tags & Custom Properties
341            tags: Vec::new(),
342            custom_properties: std::collections::HashMap::new(),
343        }
344    }
345}
346
347// Re-export for convenience
348pub use avro::AvroImporter;
349pub use cads::CADSImporter;
350pub use dbmv::DBMVImporter;
351pub use decision::DecisionImporter;
352pub use json_schema::JSONSchemaImporter;
353pub use knowledge::KnowledgeImporter;
354pub use odcl::ODCLImporter;
355pub use odcs::ODCSImporter;
356pub use odcs_shared::ParserError;
357pub use odps::ODPSImporter;
358pub use protobuf::ProtobufImporter;
359pub use sketch::SketchImporter;
360pub use sql::SQLImporter;
361
362/// Table requiring name input (for SQL imports)
363#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
364pub struct TableRequiringName {
365    pub table_index: usize,
366    pub suggested_name: Option<String>,
367}