Skip to main content

data_modelling_core/import/
mod.rs

1//! Import functionality
2//!
3//! Provides parsers for importing data models from various formats:
4//! - SQL (CREATE TABLE statements)
5//! - ODCS (Open Data Contract Standard) v3.1.0 YAML format (legacy ODCL formats supported for import)
6//! - JSON Schema
7//! - AVRO
8//! - Protobuf
9//! - Decision (MADR-compliant decision records)
10//! - Knowledge (Knowledge Base articles)
11
12pub mod avro;
13#[cfg(feature = "bpmn")]
14pub mod bpmn;
15pub mod cads;
16pub mod decision;
17#[cfg(feature = "dmn")]
18pub mod dmn;
19pub mod json_schema;
20pub mod knowledge;
21pub mod odcl;
22pub mod odcs;
23pub mod odcs_shared;
24pub mod odps;
25#[cfg(feature = "openapi")]
26pub mod openapi;
27pub mod protobuf;
28pub mod sketch;
29pub mod sql;
30
31// anyhow::Result not currently used in this module
32
33/// Result of an import operation.
34///
35/// Contains extracted tables and any errors/warnings from the import process.
36#[derive(Debug, serde::Serialize, serde::Deserialize)]
37#[must_use = "import results should be processed or errors checked"]
38pub struct ImportResult {
39    /// Tables extracted from the import
40    pub tables: Vec<TableData>,
41    /// Tables that require name input (for SQL imports with unnamed tables)
42    pub tables_requiring_name: Vec<TableRequiringName>,
43    /// Parse errors/warnings
44    pub errors: Vec<ImportError>,
45    /// Whether AI suggestions are available
46    pub ai_suggestions: Option<Vec<serde_json::Value>>,
47}
48
49/// Error during import
50#[derive(Debug, thiserror::Error, serde::Serialize, serde::Deserialize)]
51pub enum ImportError {
52    #[error("Parse error: {0}")]
53    ParseError(String),
54    #[error("Validation error: {0}")]
55    ValidationError(String),
56    #[error("IO error: {0}")]
57    IoError(String),
58    #[error("BPMN validation error: {0}")]
59    BPMNValidationError(String),
60    #[error("DMN validation error: {0}")]
61    DMNValidationError(String),
62    #[error("OpenAPI validation error: {0}")]
63    OpenAPIValidationError(String),
64    #[error("BPMN parse error: {0}")]
65    BPMNParseError(String),
66    #[error("DMN parse error: {0}")]
67    DMNParseError(String),
68    #[error("OpenAPI parse error: {0}")]
69    OpenAPIParseError(String),
70}
71
72/// Table data from import - preserves all ODCS v3.1.0 contract-level fields
73#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
74#[serde(rename_all = "camelCase")]
75pub struct TableData {
76    /// Index of this table in the import result
77    pub table_index: usize,
78
79    // === ODCS Contract Identity Fields ===
80    /// Table/Contract UUID from ODCS `id` field (preserved from source file)
81    #[serde(skip_serializing_if = "Option::is_none")]
82    pub id: Option<String>,
83    /// Contract/table name (ODCS: name)
84    pub name: Option<String>,
85    /// ODCS API version (e.g., "v3.1.0")
86    #[serde(skip_serializing_if = "Option::is_none")]
87    pub api_version: Option<String>,
88    /// Contract version (ODCS: version)
89    #[serde(skip_serializing_if = "Option::is_none")]
90    pub version: Option<String>,
91    /// Contract status (ODCS: status) - e.g., "draft", "active", "deprecated"
92    #[serde(skip_serializing_if = "Option::is_none")]
93    pub status: Option<String>,
94    /// Contract kind (ODCS: kind) - typically "DataContract"
95    #[serde(skip_serializing_if = "Option::is_none")]
96    pub kind: Option<String>,
97
98    // === Domain & Organization ===
99    /// Domain name (ODCS: domain)
100    #[serde(skip_serializing_if = "Option::is_none")]
101    pub domain: Option<String>,
102    /// Data product name (ODCS: dataProduct)
103    #[serde(skip_serializing_if = "Option::is_none")]
104    pub data_product: Option<String>,
105    /// Tenant identifier (ODCS: tenant)
106    #[serde(skip_serializing_if = "Option::is_none")]
107    pub tenant: Option<String>,
108
109    // === Description (ODCS description object) ===
110    /// High-level description object containing usage, purpose, limitations
111    #[serde(skip_serializing_if = "Option::is_none")]
112    pub description: Option<serde_json::Value>,
113
114    // === Schema Object Level Fields (ODCS v3.1.0) ===
115    /// Physical name of the schema object (ODCS: schema[].physicalName)
116    #[serde(skip_serializing_if = "Option::is_none")]
117    pub physical_name: Option<String>,
118    /// Physical type of the schema object (ODCS: schema[].physicalType)
119    #[serde(skip_serializing_if = "Option::is_none")]
120    pub physical_type: Option<String>,
121    /// Business name of the schema object (ODCS: schema[].businessName)
122    #[serde(skip_serializing_if = "Option::is_none")]
123    pub business_name: Option<String>,
124    /// Data granularity description (ODCS: schema[].dataGranularityDescription)
125    #[serde(skip_serializing_if = "Option::is_none")]
126    pub data_granularity_description: Option<String>,
127
128    // === Schema/Columns ===
129    /// Column definitions (from ODCS schema.properties)
130    pub columns: Vec<ColumnData>,
131
132    // === Server Configuration ===
133    /// Server definitions (ODCS: servers)
134    #[serde(default, skip_serializing_if = "Vec::is_empty")]
135    pub servers: Vec<serde_json::Value>,
136
137    // === Team & Support ===
138    /// Team information (ODCS: team)
139    #[serde(skip_serializing_if = "Option::is_none")]
140    pub team: Option<serde_json::Value>,
141    /// Support information (ODCS: support)
142    #[serde(skip_serializing_if = "Option::is_none")]
143    pub support: Option<serde_json::Value>,
144
145    // === Roles & Access ===
146    /// Role definitions (ODCS: roles)
147    #[serde(default, skip_serializing_if = "Vec::is_empty")]
148    pub roles: Vec<serde_json::Value>,
149
150    // === SLA & Quality ===
151    /// SLA properties (ODCS: slaProperties)
152    #[serde(default, skip_serializing_if = "Vec::is_empty")]
153    pub sla_properties: Vec<serde_json::Value>,
154    /// Contract-level quality rules
155    #[serde(default, skip_serializing_if = "Vec::is_empty")]
156    pub quality: Vec<std::collections::HashMap<String, serde_json::Value>>,
157
158    // === Pricing ===
159    /// Pricing information (ODCS: price)
160    #[serde(skip_serializing_if = "Option::is_none")]
161    pub price: Option<serde_json::Value>,
162
163    // === Tags & Custom Properties ===
164    /// Contract-level tags (ODCS: tags)
165    #[serde(default, skip_serializing_if = "Vec::is_empty")]
166    pub tags: Vec<String>,
167    /// Custom properties (ODCS: customProperties)
168    #[serde(default, skip_serializing_if = "Vec::is_empty")]
169    pub custom_properties: Vec<serde_json::Value>,
170    /// Authoritative definitions (ODCS: authoritativeDefinitions)
171    #[serde(default, skip_serializing_if = "Vec::is_empty")]
172    pub authoritative_definitions: Vec<serde_json::Value>,
173
174    // === Timestamps ===
175    /// Contract creation timestamp (ODCS: contractCreatedTs)
176    #[serde(skip_serializing_if = "Option::is_none")]
177    pub contract_created_ts: Option<String>,
178
179    // === Legacy/Metadata Storage ===
180    /// Additional ODCS metadata not captured in specific fields (for backward compatibility)
181    #[serde(default, skip_serializing_if = "std::collections::HashMap::is_empty")]
182    pub odcs_metadata: std::collections::HashMap<String, serde_json::Value>,
183}
184
185/// Column data from import - mirrors Column struct exactly to preserve all ODCS v3.1.0 fields
186#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
187#[serde(rename_all = "camelCase")]
188pub struct ColumnData {
189    // === Core Identity Fields ===
190    /// Stable technical identifier (ODCS: id)
191    #[serde(skip_serializing_if = "Option::is_none")]
192    pub id: Option<String>,
193    /// Column name (ODCS: name)
194    pub name: String,
195    /// Business name for the column (ODCS: businessName)
196    #[serde(skip_serializing_if = "Option::is_none")]
197    pub business_name: Option<String>,
198    /// Column description/documentation (ODCS: description)
199    #[serde(skip_serializing_if = "Option::is_none")]
200    pub description: Option<String>,
201
202    // === Type Information ===
203    /// Logical data type (ODCS: logicalType)
204    #[serde(rename = "dataType")]
205    pub data_type: String,
206    /// Physical database type (ODCS: physicalType)
207    #[serde(skip_serializing_if = "Option::is_none")]
208    pub physical_type: Option<String>,
209    /// Physical name in the data source (ODCS: physicalName)
210    #[serde(skip_serializing_if = "Option::is_none")]
211    pub physical_name: Option<String>,
212    /// Additional type options (ODCS: logicalTypeOptions)
213    #[serde(skip_serializing_if = "Option::is_none")]
214    pub logical_type_options: Option<crate::models::LogicalTypeOptions>,
215
216    // === Key Constraints ===
217    /// Whether this column is part of the primary key (ODCS: primaryKey)
218    #[serde(default)]
219    pub primary_key: bool,
220    /// Position in composite primary key, 1-based (ODCS: primaryKeyPosition)
221    #[serde(skip_serializing_if = "Option::is_none")]
222    pub primary_key_position: Option<i32>,
223    /// Whether the column contains unique values (ODCS: unique)
224    #[serde(default)]
225    pub unique: bool,
226    /// Whether the column allows NULL values (inverse of ODCS: required)
227    #[serde(default = "default_true")]
228    pub nullable: bool,
229
230    // === Partitioning & Clustering ===
231    /// Whether the column is used for partitioning (ODCS: partitioned)
232    #[serde(default)]
233    pub partitioned: bool,
234    /// Position in partition key, 1-based (ODCS: partitionKeyPosition)
235    #[serde(skip_serializing_if = "Option::is_none")]
236    pub partition_key_position: Option<i32>,
237    /// Whether the column is used for clustering
238    #[serde(default)]
239    pub clustered: bool,
240
241    // === Data Classification & Security ===
242    /// Data classification level (ODCS: classification)
243    #[serde(skip_serializing_if = "Option::is_none")]
244    pub classification: Option<String>,
245    /// Whether this is a critical data element (ODCS: criticalDataElement)
246    #[serde(default)]
247    pub critical_data_element: bool,
248    /// Name of the encrypted version of this column (ODCS: encryptedName)
249    #[serde(skip_serializing_if = "Option::is_none")]
250    pub encrypted_name: Option<String>,
251
252    // === Transformation Metadata ===
253    /// Source objects used in transformation (ODCS: transformSourceObjects)
254    #[serde(default, skip_serializing_if = "Vec::is_empty")]
255    pub transform_source_objects: Vec<String>,
256    /// Transformation logic/expression (ODCS: transformLogic)
257    #[serde(skip_serializing_if = "Option::is_none")]
258    pub transform_logic: Option<String>,
259    /// Human-readable transformation description (ODCS: transformDescription)
260    #[serde(skip_serializing_if = "Option::is_none")]
261    pub transform_description: Option<String>,
262
263    // === Examples & Documentation ===
264    /// Example values for this column (ODCS: examples)
265    #[serde(default, skip_serializing_if = "Vec::is_empty")]
266    pub examples: Vec<serde_json::Value>,
267    /// Default value for the column
268    #[serde(skip_serializing_if = "Option::is_none")]
269    pub default_value: Option<serde_json::Value>,
270
271    // === Relationships & References ===
272    /// ODCS v3.1.0 relationships (property-level references)
273    #[serde(default, skip_serializing_if = "Vec::is_empty")]
274    pub relationships: Vec<crate::models::PropertyRelationship>,
275    /// Authoritative definitions (ODCS: authoritativeDefinitions)
276    #[serde(default, skip_serializing_if = "Vec::is_empty")]
277    pub authoritative_definitions: Vec<crate::models::AuthoritativeDefinition>,
278
279    // === Quality & Validation ===
280    /// Quality rules and checks (ODCS: quality)
281    #[serde(skip_serializing_if = "Option::is_none")]
282    pub quality: Option<Vec<std::collections::HashMap<String, serde_json::Value>>>,
283    /// Enum values if this column is an enumeration type
284    #[serde(skip_serializing_if = "Option::is_none")]
285    pub enum_values: Option<Vec<String>>,
286
287    // === Tags & Custom Properties ===
288    /// Property-level tags (ODCS: tags)
289    #[serde(default, skip_serializing_if = "Vec::is_empty")]
290    pub tags: Vec<String>,
291    /// Custom properties for format-specific metadata
292    #[serde(default, skip_serializing_if = "std::collections::HashMap::is_empty")]
293    pub custom_properties: std::collections::HashMap<String, serde_json::Value>,
294}
295
296fn default_true() -> bool {
297    true
298}
299
300impl Default for ColumnData {
301    fn default() -> Self {
302        Self {
303            // Core Identity
304            id: None,
305            name: String::new(),
306            business_name: None,
307            description: None,
308            // Type Information
309            data_type: String::new(),
310            physical_type: None,
311            physical_name: None,
312            logical_type_options: None,
313            // Key Constraints
314            primary_key: false,
315            primary_key_position: None,
316            unique: false,
317            nullable: true,
318            // Partitioning & Clustering
319            partitioned: false,
320            partition_key_position: None,
321            clustered: false,
322            // Data Classification & Security
323            classification: None,
324            critical_data_element: false,
325            encrypted_name: None,
326            // Transformation Metadata
327            transform_source_objects: Vec::new(),
328            transform_logic: None,
329            transform_description: None,
330            // Examples & Documentation
331            examples: Vec::new(),
332            default_value: None,
333            // Relationships & References
334            relationships: Vec::new(),
335            authoritative_definitions: Vec::new(),
336            // Quality & Validation
337            quality: None,
338            enum_values: None,
339            // Tags & Custom Properties
340            tags: Vec::new(),
341            custom_properties: std::collections::HashMap::new(),
342        }
343    }
344}
345
346// Re-export for convenience
347pub use avro::AvroImporter;
348pub use cads::CADSImporter;
349pub use decision::DecisionImporter;
350pub use json_schema::JSONSchemaImporter;
351pub use knowledge::KnowledgeImporter;
352pub use odcl::ODCLImporter;
353pub use odcs::ODCSImporter;
354pub use odcs_shared::ParserError;
355pub use odps::ODPSImporter;
356pub use protobuf::ProtobufImporter;
357pub use sketch::SketchImporter;
358pub use sql::SQLImporter;
359
360/// Table requiring name input (for SQL imports)
361#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
362pub struct TableRequiringName {
363    pub table_index: usize,
364    pub suggested_name: Option<String>,
365}