data_modelling_core/import/
mod.rs

1//! Import functionality
2//!
3//! Provides parsers for importing data models from various formats:
4//! - SQL (CREATE TABLE statements)
5//! - ODCS (Open Data Contract Standard) v3.1.0 YAML format (legacy ODCL formats supported for import)
6//! - JSON Schema
7//! - AVRO
8//! - Protobuf
9//! - Decision (MADR-compliant decision records)
10//! - Knowledge (Knowledge Base articles)
11
12pub mod avro;
13#[cfg(feature = "bpmn")]
14pub mod bpmn;
15pub mod cads;
16pub mod decision;
17#[cfg(feature = "dmn")]
18pub mod dmn;
19pub mod json_schema;
20pub mod knowledge;
21pub mod odcl;
22pub mod odcs;
23pub mod odcs_shared;
24pub mod odps;
25#[cfg(feature = "openapi")]
26pub mod openapi;
27pub mod protobuf;
28pub mod sql;
29
30// anyhow::Result not currently used in this module
31
32/// Result of an import operation.
33///
34/// Contains extracted tables and any errors/warnings from the import process.
35#[derive(Debug, serde::Serialize, serde::Deserialize)]
36#[must_use = "import results should be processed or errors checked"]
37pub struct ImportResult {
38    /// Tables extracted from the import
39    pub tables: Vec<TableData>,
40    /// Tables that require name input (for SQL imports with unnamed tables)
41    pub tables_requiring_name: Vec<TableRequiringName>,
42    /// Parse errors/warnings
43    pub errors: Vec<ImportError>,
44    /// Whether AI suggestions are available
45    pub ai_suggestions: Option<Vec<serde_json::Value>>,
46}
47
48/// Error during import
49#[derive(Debug, thiserror::Error, serde::Serialize, serde::Deserialize)]
50pub enum ImportError {
51    #[error("Parse error: {0}")]
52    ParseError(String),
53    #[error("Validation error: {0}")]
54    ValidationError(String),
55    #[error("IO error: {0}")]
56    IoError(String),
57    #[error("BPMN validation error: {0}")]
58    BPMNValidationError(String),
59    #[error("DMN validation error: {0}")]
60    DMNValidationError(String),
61    #[error("OpenAPI validation error: {0}")]
62    OpenAPIValidationError(String),
63    #[error("BPMN parse error: {0}")]
64    BPMNParseError(String),
65    #[error("DMN parse error: {0}")]
66    DMNParseError(String),
67    #[error("OpenAPI parse error: {0}")]
68    OpenAPIParseError(String),
69}
70
71/// Table data from import - preserves all ODCS v3.1.0 contract-level fields
72#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
73#[serde(rename_all = "camelCase")]
74pub struct TableData {
75    /// Index of this table in the import result
76    pub table_index: usize,
77
78    // === ODCS Contract Identity Fields ===
79    /// Table/Contract UUID from ODCS `id` field (preserved from source file)
80    #[serde(skip_serializing_if = "Option::is_none")]
81    pub id: Option<String>,
82    /// Contract/table name (ODCS: name)
83    pub name: Option<String>,
84    /// ODCS API version (e.g., "v3.1.0")
85    #[serde(skip_serializing_if = "Option::is_none")]
86    pub api_version: Option<String>,
87    /// Contract version (ODCS: version)
88    #[serde(skip_serializing_if = "Option::is_none")]
89    pub version: Option<String>,
90    /// Contract status (ODCS: status) - e.g., "draft", "active", "deprecated"
91    #[serde(skip_serializing_if = "Option::is_none")]
92    pub status: Option<String>,
93    /// Contract kind (ODCS: kind) - typically "DataContract"
94    #[serde(skip_serializing_if = "Option::is_none")]
95    pub kind: Option<String>,
96
97    // === Domain & Organization ===
98    /// Domain name (ODCS: domain)
99    #[serde(skip_serializing_if = "Option::is_none")]
100    pub domain: Option<String>,
101    /// Data product name (ODCS: dataProduct)
102    #[serde(skip_serializing_if = "Option::is_none")]
103    pub data_product: Option<String>,
104    /// Tenant identifier (ODCS: tenant)
105    #[serde(skip_serializing_if = "Option::is_none")]
106    pub tenant: Option<String>,
107
108    // === Description (ODCS description object) ===
109    /// High-level description object containing usage, purpose, limitations
110    #[serde(skip_serializing_if = "Option::is_none")]
111    pub description: Option<serde_json::Value>,
112
113    // === Schema/Columns ===
114    /// Column definitions (from ODCS schema.properties)
115    pub columns: Vec<ColumnData>,
116
117    // === Server Configuration ===
118    /// Server definitions (ODCS: servers)
119    #[serde(default, skip_serializing_if = "Vec::is_empty")]
120    pub servers: Vec<serde_json::Value>,
121
122    // === Team & Support ===
123    /// Team information (ODCS: team)
124    #[serde(skip_serializing_if = "Option::is_none")]
125    pub team: Option<serde_json::Value>,
126    /// Support information (ODCS: support)
127    #[serde(skip_serializing_if = "Option::is_none")]
128    pub support: Option<serde_json::Value>,
129
130    // === Roles & Access ===
131    /// Role definitions (ODCS: roles)
132    #[serde(default, skip_serializing_if = "Vec::is_empty")]
133    pub roles: Vec<serde_json::Value>,
134
135    // === SLA & Quality ===
136    /// SLA properties (ODCS: slaProperties)
137    #[serde(default, skip_serializing_if = "Vec::is_empty")]
138    pub sla_properties: Vec<serde_json::Value>,
139    /// Contract-level quality rules
140    #[serde(default, skip_serializing_if = "Vec::is_empty")]
141    pub quality: Vec<std::collections::HashMap<String, serde_json::Value>>,
142
143    // === Pricing ===
144    /// Pricing information (ODCS: price)
145    #[serde(skip_serializing_if = "Option::is_none")]
146    pub price: Option<serde_json::Value>,
147
148    // === Tags & Custom Properties ===
149    /// Contract-level tags (ODCS: tags)
150    #[serde(default, skip_serializing_if = "Vec::is_empty")]
151    pub tags: Vec<String>,
152    /// Custom properties (ODCS: customProperties)
153    #[serde(default, skip_serializing_if = "Vec::is_empty")]
154    pub custom_properties: Vec<serde_json::Value>,
155    /// Authoritative definitions (ODCS: authoritativeDefinitions)
156    #[serde(default, skip_serializing_if = "Vec::is_empty")]
157    pub authoritative_definitions: Vec<serde_json::Value>,
158
159    // === Timestamps ===
160    /// Contract creation timestamp (ODCS: contractCreatedTs)
161    #[serde(skip_serializing_if = "Option::is_none")]
162    pub contract_created_ts: Option<String>,
163
164    // === Legacy/Metadata Storage ===
165    /// Additional ODCS metadata not captured in specific fields (for backward compatibility)
166    #[serde(default, skip_serializing_if = "std::collections::HashMap::is_empty")]
167    pub odcs_metadata: std::collections::HashMap<String, serde_json::Value>,
168}
169
170/// Column data from import - mirrors Column struct exactly to preserve all ODCS v3.1.0 fields
171#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
172#[serde(rename_all = "camelCase")]
173pub struct ColumnData {
174    // === Core Identity Fields ===
175    /// Stable technical identifier (ODCS: id)
176    #[serde(skip_serializing_if = "Option::is_none")]
177    pub id: Option<String>,
178    /// Column name (ODCS: name)
179    pub name: String,
180    /// Business name for the column (ODCS: businessName)
181    #[serde(skip_serializing_if = "Option::is_none")]
182    pub business_name: Option<String>,
183    /// Column description/documentation (ODCS: description)
184    #[serde(skip_serializing_if = "Option::is_none")]
185    pub description: Option<String>,
186
187    // === Type Information ===
188    /// Logical data type (ODCS: logicalType)
189    #[serde(rename = "dataType")]
190    pub data_type: String,
191    /// Physical database type (ODCS: physicalType)
192    #[serde(skip_serializing_if = "Option::is_none")]
193    pub physical_type: Option<String>,
194    /// Physical name in the data source (ODCS: physicalName)
195    #[serde(skip_serializing_if = "Option::is_none")]
196    pub physical_name: Option<String>,
197    /// Additional type options (ODCS: logicalTypeOptions)
198    #[serde(skip_serializing_if = "Option::is_none")]
199    pub logical_type_options: Option<crate::models::LogicalTypeOptions>,
200
201    // === Key Constraints ===
202    /// Whether this column is part of the primary key (ODCS: primaryKey)
203    #[serde(default)]
204    pub primary_key: bool,
205    /// Position in composite primary key, 1-based (ODCS: primaryKeyPosition)
206    #[serde(skip_serializing_if = "Option::is_none")]
207    pub primary_key_position: Option<i32>,
208    /// Whether the column contains unique values (ODCS: unique)
209    #[serde(default)]
210    pub unique: bool,
211    /// Whether the column allows NULL values (inverse of ODCS: required)
212    #[serde(default = "default_true")]
213    pub nullable: bool,
214
215    // === Partitioning & Clustering ===
216    /// Whether the column is used for partitioning (ODCS: partitioned)
217    #[serde(default)]
218    pub partitioned: bool,
219    /// Position in partition key, 1-based (ODCS: partitionKeyPosition)
220    #[serde(skip_serializing_if = "Option::is_none")]
221    pub partition_key_position: Option<i32>,
222    /// Whether the column is used for clustering
223    #[serde(default)]
224    pub clustered: bool,
225
226    // === Data Classification & Security ===
227    /// Data classification level (ODCS: classification)
228    #[serde(skip_serializing_if = "Option::is_none")]
229    pub classification: Option<String>,
230    /// Whether this is a critical data element (ODCS: criticalDataElement)
231    #[serde(default)]
232    pub critical_data_element: bool,
233    /// Name of the encrypted version of this column (ODCS: encryptedName)
234    #[serde(skip_serializing_if = "Option::is_none")]
235    pub encrypted_name: Option<String>,
236
237    // === Transformation Metadata ===
238    /// Source objects used in transformation (ODCS: transformSourceObjects)
239    #[serde(default, skip_serializing_if = "Vec::is_empty")]
240    pub transform_source_objects: Vec<String>,
241    /// Transformation logic/expression (ODCS: transformLogic)
242    #[serde(skip_serializing_if = "Option::is_none")]
243    pub transform_logic: Option<String>,
244    /// Human-readable transformation description (ODCS: transformDescription)
245    #[serde(skip_serializing_if = "Option::is_none")]
246    pub transform_description: Option<String>,
247
248    // === Examples & Documentation ===
249    /// Example values for this column (ODCS: examples)
250    #[serde(default, skip_serializing_if = "Vec::is_empty")]
251    pub examples: Vec<serde_json::Value>,
252    /// Default value for the column
253    #[serde(skip_serializing_if = "Option::is_none")]
254    pub default_value: Option<serde_json::Value>,
255
256    // === Relationships & References ===
257    /// ODCS v3.1.0 relationships (property-level references)
258    #[serde(default, skip_serializing_if = "Vec::is_empty")]
259    pub relationships: Vec<crate::models::PropertyRelationship>,
260    /// Authoritative definitions (ODCS: authoritativeDefinitions)
261    #[serde(default, skip_serializing_if = "Vec::is_empty")]
262    pub authoritative_definitions: Vec<crate::models::AuthoritativeDefinition>,
263
264    // === Quality & Validation ===
265    /// Quality rules and checks (ODCS: quality)
266    #[serde(skip_serializing_if = "Option::is_none")]
267    pub quality: Option<Vec<std::collections::HashMap<String, serde_json::Value>>>,
268    /// Enum values if this column is an enumeration type
269    #[serde(skip_serializing_if = "Option::is_none")]
270    pub enum_values: Option<Vec<String>>,
271
272    // === Tags & Custom Properties ===
273    /// Property-level tags (ODCS: tags)
274    #[serde(default, skip_serializing_if = "Vec::is_empty")]
275    pub tags: Vec<String>,
276    /// Custom properties for format-specific metadata
277    #[serde(default, skip_serializing_if = "std::collections::HashMap::is_empty")]
278    pub custom_properties: std::collections::HashMap<String, serde_json::Value>,
279}
280
281fn default_true() -> bool {
282    true
283}
284
285impl Default for ColumnData {
286    fn default() -> Self {
287        Self {
288            // Core Identity
289            id: None,
290            name: String::new(),
291            business_name: None,
292            description: None,
293            // Type Information
294            data_type: String::new(),
295            physical_type: None,
296            physical_name: None,
297            logical_type_options: None,
298            // Key Constraints
299            primary_key: false,
300            primary_key_position: None,
301            unique: false,
302            nullable: true,
303            // Partitioning & Clustering
304            partitioned: false,
305            partition_key_position: None,
306            clustered: false,
307            // Data Classification & Security
308            classification: None,
309            critical_data_element: false,
310            encrypted_name: None,
311            // Transformation Metadata
312            transform_source_objects: Vec::new(),
313            transform_logic: None,
314            transform_description: None,
315            // Examples & Documentation
316            examples: Vec::new(),
317            default_value: None,
318            // Relationships & References
319            relationships: Vec::new(),
320            authoritative_definitions: Vec::new(),
321            // Quality & Validation
322            quality: None,
323            enum_values: None,
324            // Tags & Custom Properties
325            tags: Vec::new(),
326            custom_properties: std::collections::HashMap::new(),
327        }
328    }
329}
330
331// Re-export for convenience
332pub use avro::AvroImporter;
333pub use cads::CADSImporter;
334pub use decision::DecisionImporter;
335pub use json_schema::JSONSchemaImporter;
336pub use knowledge::KnowledgeImporter;
337pub use odcl::ODCLImporter;
338pub use odcs::ODCSImporter;
339pub use odcs_shared::ParserError;
340pub use odps::ODPSImporter;
341pub use protobuf::ProtobufImporter;
342pub use sql::SQLImporter;
343
344/// Table requiring name input (for SQL imports)
345#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
346pub struct TableRequiringName {
347    pub table_index: usize,
348    pub suggested_name: Option<String>,
349}