data_modelling_sdk/import/
json_schema.rs

1//! JSON Schema parser for importing JSON Schema into data models.
2//!
3//! # Validation
4//!
5//! All imported table and column names are validated for:
6//! - Valid identifier format
7//! - Maximum length limits
8
9use super::{ImportError, ImportResult, TableData};
10use crate::models::{Column, Table};
11use crate::validation::input::{validate_column_name, validate_data_type, validate_table_name};
12use anyhow::{Context, Result};
13use serde_json::{Value, json};
14use std::collections::HashMap;
15use tracing::{info, warn};
16
17/// Parser for JSON Schema format.
18pub struct JSONSchemaImporter;
19
20impl Default for JSONSchemaImporter {
21    fn default() -> Self {
22        Self::new()
23    }
24}
25
26impl JSONSchemaImporter {
27    /// Create a new JSON Schema parser instance.
28    ///
29    /// # Example
30    ///
31    /// ```rust
32    /// use data_modelling_sdk::import::json_schema::JSONSchemaImporter;
33    ///
34    /// let importer = JSONSchemaImporter::new();
35    /// ```
36    pub fn new() -> Self {
37        Self
38    }
39
40    /// Import JSON Schema content and create Table(s) (SDK interface).
41    ///
42    /// # Arguments
43    ///
44    /// * `json_content` - JSON Schema string (can be a single schema or schema with definitions)
45    ///
46    /// # Returns
47    ///
48    /// An `ImportResult` containing extracted tables and any parse errors.
49    ///
50    /// # Example
51    ///
52    /// ```rust
53    /// use data_modelling_sdk::import::json_schema::JSONSchemaImporter;
54    ///
55    /// let importer = JSONSchemaImporter::new();
56    /// let schema = r#"
57    /// {
58    ///   "type": "object",
59    ///   "properties": {
60    ///     "id": {"type": "integer"},
61    ///     "name": {"type": "string"}
62    ///   },
63    ///   "required": ["id"]
64    /// }
65    /// "#;
66    /// let result = importer.import(schema).unwrap();
67    /// ```
68    pub fn import(&self, json_content: &str) -> Result<ImportResult, ImportError> {
69        match self.parse(json_content) {
70            Ok((tables, errors)) => {
71                let mut sdk_tables = Vec::new();
72                for (idx, table) in tables.iter().enumerate() {
73                    sdk_tables.push(TableData {
74                        table_index: idx,
75                        name: Some(table.name.clone()),
76                        columns: table
77                            .columns
78                            .iter()
79                            .map(|c| super::ColumnData {
80                                name: c.name.clone(),
81                                data_type: c.data_type.clone(),
82                                nullable: c.nullable,
83                                primary_key: c.primary_key,
84                            })
85                            .collect(),
86                    });
87                }
88                let sdk_errors: Vec<ImportError> = errors
89                    .iter()
90                    .map(|e| ImportError::ParseError(e.message.clone()))
91                    .collect();
92                Ok(ImportResult {
93                    tables: sdk_tables,
94                    tables_requiring_name: Vec::new(),
95                    errors: sdk_errors,
96                    ai_suggestions: None,
97                })
98            }
99            Err(e) => Err(ImportError::ParseError(e.to_string())),
100        }
101    }
102
103    /// Parse JSON Schema content and create Table(s) (internal method).
104    ///
105    /// # Returns
106    ///
107    /// Returns a tuple of (Tables, list of errors/warnings).
108    fn parse(&self, json_content: &str) -> Result<(Vec<Table>, Vec<ParserError>)> {
109        let mut errors = Vec::new();
110
111        // Parse JSON
112        let schema: Value =
113            serde_json::from_str(json_content).context("Failed to parse JSON Schema")?;
114
115        let mut tables = Vec::new();
116
117        // Check if it's a schema with definitions (multiple tables)
118        if let Some(definitions) = schema.get("definitions").and_then(|v| v.as_object()) {
119            // Multiple schemas in definitions
120            for (name, def_schema) in definitions {
121                match self.parse_schema(def_schema, Some(name), &mut errors) {
122                    Ok(table) => tables.push(table),
123                    Err(e) => {
124                        errors.push(ParserError {
125                            error_type: "parse_error".to_string(),
126                            field: Some(format!("definitions.{}", name)),
127                            message: format!("Failed to parse schema: {}", e),
128                        });
129                    }
130                }
131            }
132        } else {
133            // Single schema
134            match self.parse_schema(&schema, None, &mut errors) {
135                Ok(table) => tables.push(table),
136                Err(e) => {
137                    errors.push(ParserError {
138                        error_type: "parse_error".to_string(),
139                        field: None,
140                        message: format!("Failed to parse schema: {}", e),
141                    });
142                }
143            }
144        }
145
146        Ok((tables, errors))
147    }
148
149    /// Parse a single JSON Schema object.
150    fn parse_schema(
151        &self,
152        schema: &Value,
153        name_override: Option<&str>,
154        errors: &mut Vec<ParserError>,
155    ) -> Result<Table> {
156        let schema_obj = schema
157            .as_object()
158            .ok_or_else(|| anyhow::anyhow!("Schema must be an object"))?;
159
160        // Extract name/title
161        let name = name_override
162            .map(|s| s.to_string())
163            .or_else(|| {
164                schema_obj
165                    .get("title")
166                    .or_else(|| schema_obj.get("name"))
167                    .and_then(|v| v.as_str())
168                    .map(|s| s.to_string())
169            })
170            .ok_or_else(|| anyhow::anyhow!("Missing required field: title or name"))?;
171
172        // Validate table name
173        if let Err(e) = validate_table_name(&name) {
174            warn!("Table name validation warning for '{}': {}", name, e);
175        }
176
177        // Extract description
178        let description = schema_obj
179            .get("description")
180            .and_then(|v| v.as_str())
181            .map(|s| s.to_string())
182            .unwrap_or_default();
183
184        // Extract properties
185        let properties = schema_obj
186            .get("properties")
187            .and_then(|v| v.as_object())
188            .ok_or_else(|| anyhow::anyhow!("Missing required field: properties"))?;
189
190        // Extract required fields
191        let required_fields: Vec<String> = schema_obj
192            .get("required")
193            .and_then(|v| v.as_array())
194            .map(|arr| {
195                arr.iter()
196                    .filter_map(|v| v.as_str().map(|s| s.to_string()))
197                    .collect()
198            })
199            .unwrap_or_default();
200
201        let mut columns = Vec::new();
202        for (prop_name, prop_schema) in properties {
203            let nullable = !required_fields.contains(prop_name);
204            match self.parse_property(prop_name, prop_schema, nullable, errors) {
205                Ok(mut cols) => columns.append(&mut cols),
206                Err(e) => {
207                    errors.push(ParserError {
208                        error_type: "parse_error".to_string(),
209                        field: Some(format!("properties.{}", prop_name)),
210                        message: format!("Failed to parse property: {}", e),
211                    });
212                }
213            }
214        }
215
216        // Build table metadata
217        let mut odcl_metadata = HashMap::new();
218        if !description.is_empty() {
219            odcl_metadata.insert("description".to_string(), json!(description));
220        }
221
222        let table = Table {
223            id: crate::models::table::Table::generate_id(&name, None, None, None),
224            name: name.clone(),
225            columns,
226            database_type: None,
227            catalog_name: None,
228            schema_name: None,
229            medallion_layers: Vec::new(),
230            scd_pattern: None,
231            data_vault_classification: None,
232            modeling_level: None,
233            tags: Vec::new(),
234            odcl_metadata,
235            owner: None,
236            sla: None,
237            contact_details: None,
238            infrastructure_type: None,
239            notes: None,
240            position: None,
241            yaml_file_path: None,
242            drawio_cell_id: None,
243            quality: Vec::new(),
244            errors: Vec::new(),
245            created_at: chrono::Utc::now(),
246            updated_at: chrono::Utc::now(),
247        };
248
249        info!(
250            "Parsed JSON Schema: {} with {} columns",
251            name,
252            table.columns.len()
253        );
254        Ok(table)
255    }
256
257    /// Parse a JSON Schema property (which can be a simple property or nested object).
258    fn parse_property(
259        &self,
260        prop_name: &str,
261        prop_schema: &Value,
262        nullable: bool,
263        errors: &mut Vec<ParserError>,
264    ) -> Result<Vec<Column>> {
265        // Validate column name
266        if let Err(e) = validate_column_name(prop_name) {
267            warn!("Column name validation warning for '{}': {}", prop_name, e);
268        }
269
270        let prop_obj = prop_schema
271            .as_object()
272            .ok_or_else(|| anyhow::anyhow!("Property schema must be an object"))?;
273
274        let prop_type = prop_obj
275            .get("type")
276            .and_then(|v| v.as_str())
277            .ok_or_else(|| anyhow::anyhow!("Property missing type"))?;
278
279        // Validate data type
280        let mapped_type = self.map_json_type_to_sql(prop_type);
281        if let Err(e) = validate_data_type(&mapped_type) {
282            warn!("Data type validation warning for '{}': {}", mapped_type, e);
283        }
284
285        let description = prop_obj
286            .get("description")
287            .and_then(|v| v.as_str())
288            .map(|s| s.to_string())
289            .unwrap_or_default();
290
291        let mut columns = Vec::new();
292
293        match prop_type {
294            "object" => {
295                // Nested object - create nested columns with dot notation
296                if let Some(nested_props) = prop_obj.get("properties").and_then(|v| v.as_object()) {
297                    let nested_required: Vec<String> = prop_obj
298                        .get("required")
299                        .and_then(|v| v.as_array())
300                        .map(|arr| {
301                            arr.iter()
302                                .filter_map(|v| v.as_str().map(|s| s.to_string()))
303                                .collect()
304                        })
305                        .unwrap_or_default();
306
307                    for (nested_name, nested_schema) in nested_props {
308                        let nested_nullable = !nested_required.contains(nested_name);
309                        match self.parse_property(
310                            nested_name,
311                            nested_schema,
312                            nested_nullable,
313                            errors,
314                        ) {
315                            Ok(mut nested_cols) => {
316                                // Prefix nested columns with parent property name
317                                for col in nested_cols.iter_mut() {
318                                    col.name = format!("{}.{}", prop_name, col.name);
319                                }
320                                columns.append(&mut nested_cols);
321                            }
322                            Err(e) => {
323                                errors.push(ParserError {
324                                    error_type: "parse_error".to_string(),
325                                    field: Some(format!("{}.{}", prop_name, nested_name)),
326                                    message: format!("Failed to parse nested property: {}", e),
327                                });
328                            }
329                        }
330                    }
331                } else {
332                    // Object without properties - treat as STRUCT
333                    columns.push(Column {
334                        name: prop_name.to_string(),
335                        data_type: "STRUCT".to_string(),
336                        nullable,
337                        primary_key: false,
338                        secondary_key: false,
339                        composite_key: None,
340                        foreign_key: None,
341                        constraints: Vec::new(),
342                        description,
343                        quality: Vec::new(),
344                        enum_values: Vec::new(),
345                        errors: Vec::new(),
346                        column_order: 0,
347                    });
348                }
349            }
350            "array" => {
351                // Array type
352                let items = prop_obj
353                    .get("items")
354                    .ok_or_else(|| anyhow::anyhow!("Array property missing items"))?;
355
356                let data_type = if let Some(items_str) = items.get("type").and_then(|v| v.as_str())
357                {
358                    if items_str == "object" {
359                        // Array of objects - create nested columns
360                        if let Some(nested_props) =
361                            items.get("properties").and_then(|v| v.as_object())
362                        {
363                            let nested_required: Vec<String> = items
364                                .get("required")
365                                .and_then(|v| v.as_array())
366                                .map(|arr| {
367                                    arr.iter()
368                                        .filter_map(|v| v.as_str().map(|s| s.to_string()))
369                                        .collect()
370                                })
371                                .unwrap_or_default();
372
373                            for (nested_name, nested_schema) in nested_props {
374                                let nested_nullable = !nested_required.contains(nested_name);
375                                match self.parse_property(
376                                    nested_name,
377                                    nested_schema,
378                                    nested_nullable,
379                                    errors,
380                                ) {
381                                    Ok(mut nested_cols) => {
382                                        for col in nested_cols.iter_mut() {
383                                            col.name = format!("{}.{}", prop_name, col.name);
384                                        }
385                                        columns.append(&mut nested_cols);
386                                    }
387                                    Err(e) => {
388                                        errors.push(ParserError {
389                                            error_type: "parse_error".to_string(),
390                                            field: Some(format!("{}.{}", prop_name, nested_name)),
391                                            message: format!(
392                                                "Failed to parse array item property: {}",
393                                                e
394                                            ),
395                                        });
396                                    }
397                                }
398                            }
399                            return Ok(columns);
400                        } else {
401                            "ARRAY<STRUCT>".to_string()
402                        }
403                    } else {
404                        format!("ARRAY<{}>", self.map_json_type_to_sql(items_str))
405                    }
406                } else {
407                    "ARRAY<STRING>".to_string()
408                };
409
410                columns.push(Column {
411                    name: prop_name.to_string(),
412                    data_type,
413                    nullable,
414                    primary_key: false,
415                    secondary_key: false,
416                    composite_key: None,
417                    foreign_key: None,
418                    constraints: Vec::new(),
419                    description,
420                    quality: Vec::new(),
421                    enum_values: Vec::new(),
422                    errors: Vec::new(),
423                    column_order: 0,
424                });
425            }
426            _ => {
427                // Simple type
428                let data_type = self.map_json_type_to_sql(prop_type);
429                columns.push(Column {
430                    name: prop_name.to_string(),
431                    data_type,
432                    nullable,
433                    primary_key: false,
434                    secondary_key: false,
435                    composite_key: None,
436                    foreign_key: None,
437                    constraints: Vec::new(),
438                    description,
439                    quality: Vec::new(),
440                    enum_values: Vec::new(),
441                    errors: Vec::new(),
442                    column_order: 0,
443                });
444            }
445        }
446
447        Ok(columns)
448    }
449
450    /// Map JSON Schema type to SQL/ODCL data type.
451    fn map_json_type_to_sql(&self, json_type: &str) -> String {
452        match json_type {
453            "integer" => "INTEGER".to_string(),
454            "number" => "DOUBLE".to_string(),
455            "boolean" => "BOOLEAN".to_string(),
456            "string" => "STRING".to_string(),
457            "null" => "NULL".to_string(),
458            _ => "STRING".to_string(), // Default fallback
459        }
460    }
461}
462
463/// Parser error structure (matches ODCL parser format).
464#[derive(Debug, Clone)]
465pub struct ParserError {
466    pub error_type: String,
467    pub field: Option<String>,
468    pub message: String,
469}