data_modelling_sdk/import/
json_schema.rs

1//! JSON Schema parser for importing JSON Schema into data models.
2//!
3//! # Validation
4//!
5//! All imported table and column names are validated for:
6//! - Valid identifier format
7//! - Maximum length limits
8
9use super::{ImportError, ImportResult, TableData};
10use crate::models::{Column, Table};
11use crate::validation::input::{validate_column_name, validate_data_type, validate_table_name};
12use anyhow::{Context, Result};
13use serde_json::{Value, json};
14use std::collections::HashMap;
15use tracing::{info, warn};
16
17/// Parser for JSON Schema format.
18pub struct JSONSchemaImporter;
19
20impl Default for JSONSchemaImporter {
21    fn default() -> Self {
22        Self::new()
23    }
24}
25
26impl JSONSchemaImporter {
27    /// Create a new JSON Schema parser instance.
28    ///
29    /// # Example
30    ///
31    /// ```rust
32    /// use data_modelling_sdk::import::json_schema::JSONSchemaImporter;
33    ///
34    /// let importer = JSONSchemaImporter::new();
35    /// ```
36    pub fn new() -> Self {
37        Self
38    }
39
40    /// Import JSON Schema content and create Table(s) (SDK interface).
41    ///
42    /// # Arguments
43    ///
44    /// * `json_content` - JSON Schema string (can be a single schema or schema with definitions)
45    ///
46    /// # Returns
47    ///
48    /// An `ImportResult` containing extracted tables and any parse errors.
49    ///
50    /// # Example
51    ///
52    /// ```rust
53    /// use data_modelling_sdk::import::json_schema::JSONSchemaImporter;
54    ///
55    /// let importer = JSONSchemaImporter::new();
56    /// let schema = r#"
57    /// {
58    ///   "type": "object",
59    ///   "properties": {
60    ///     "id": {"type": "integer"},
61    ///     "name": {"type": "string"}
62    ///   },
63    ///   "required": ["id"]
64    /// }
65    /// "#;
66    /// let result = importer.import(schema).unwrap();
67    /// ```
68    pub fn import(&self, json_content: &str) -> Result<ImportResult, ImportError> {
69        match self.parse(json_content) {
70            Ok((tables, errors)) => {
71                let mut sdk_tables = Vec::new();
72                for (idx, table) in tables.iter().enumerate() {
73                    sdk_tables.push(TableData {
74                        table_index: idx,
75                        name: Some(table.name.clone()),
76                        columns: table
77                            .columns
78                            .iter()
79                            .map(|c| super::ColumnData {
80                                name: c.name.clone(),
81                                data_type: c.data_type.clone(),
82                                nullable: c.nullable,
83                                primary_key: c.primary_key,
84                            })
85                            .collect(),
86                    });
87                }
88                let sdk_errors: Vec<ImportError> = errors
89                    .iter()
90                    .map(|e| ImportError::ParseError(e.message.clone()))
91                    .collect();
92                Ok(ImportResult {
93                    tables: sdk_tables,
94                    tables_requiring_name: Vec::new(),
95                    errors: sdk_errors,
96                    ai_suggestions: None,
97                })
98            }
99            Err(e) => Err(ImportError::ParseError(e.to_string())),
100        }
101    }
102
103    /// Parse JSON Schema content and create Table(s) (internal method).
104    ///
105    /// # Returns
106    ///
107    /// Returns a tuple of (Tables, list of errors/warnings).
108    fn parse(&self, json_content: &str) -> Result<(Vec<Table>, Vec<ParserError>)> {
109        let mut errors = Vec::new();
110
111        // Parse JSON
112        let schema: Value =
113            serde_json::from_str(json_content).context("Failed to parse JSON Schema")?;
114
115        let mut tables = Vec::new();
116
117        // Check if it's a schema with definitions (multiple tables)
118        if let Some(definitions) = schema.get("definitions").and_then(|v| v.as_object()) {
119            // Multiple schemas in definitions
120            for (name, def_schema) in definitions {
121                match self.parse_schema(def_schema, Some(name), &mut errors) {
122                    Ok(table) => tables.push(table),
123                    Err(e) => {
124                        errors.push(ParserError {
125                            error_type: "parse_error".to_string(),
126                            field: Some(format!("definitions.{}", name)),
127                            message: format!("Failed to parse schema: {}", e),
128                        });
129                    }
130                }
131            }
132        } else {
133            // Single schema
134            match self.parse_schema(&schema, None, &mut errors) {
135                Ok(table) => tables.push(table),
136                Err(e) => {
137                    errors.push(ParserError {
138                        error_type: "parse_error".to_string(),
139                        field: None,
140                        message: format!("Failed to parse schema: {}", e),
141                    });
142                }
143            }
144        }
145
146        Ok((tables, errors))
147    }
148
149    /// Parse a single JSON Schema object.
150    fn parse_schema(
151        &self,
152        schema: &Value,
153        name_override: Option<&str>,
154        errors: &mut Vec<ParserError>,
155    ) -> Result<Table> {
156        let schema_obj = schema
157            .as_object()
158            .ok_or_else(|| anyhow::anyhow!("Schema must be an object"))?;
159
160        // Extract name/title
161        let name = name_override
162            .map(|s| s.to_string())
163            .or_else(|| {
164                schema_obj
165                    .get("title")
166                    .or_else(|| schema_obj.get("name"))
167                    .and_then(|v| v.as_str())
168                    .map(|s| s.to_string())
169            })
170            .ok_or_else(|| anyhow::anyhow!("Missing required field: title or name"))?;
171
172        // Validate table name
173        if let Err(e) = validate_table_name(&name) {
174            warn!("Table name validation warning for '{}': {}", name, e);
175        }
176
177        // Extract description
178        let description = schema_obj
179            .get("description")
180            .and_then(|v| v.as_str())
181            .map(|s| s.to_string())
182            .unwrap_or_default();
183
184        // Extract properties
185        let properties = schema_obj
186            .get("properties")
187            .and_then(|v| v.as_object())
188            .ok_or_else(|| anyhow::anyhow!("Missing required field: properties"))?;
189
190        // Extract required fields
191        let required_fields: Vec<String> = schema_obj
192            .get("required")
193            .and_then(|v| v.as_array())
194            .map(|arr| {
195                arr.iter()
196                    .filter_map(|v| v.as_str().map(|s| s.to_string()))
197                    .collect()
198            })
199            .unwrap_or_default();
200
201        let mut columns = Vec::new();
202        for (prop_name, prop_schema) in properties {
203            let nullable = !required_fields.contains(prop_name);
204            match self.parse_property(prop_name, prop_schema, nullable, errors) {
205                Ok(mut cols) => columns.append(&mut cols),
206                Err(e) => {
207                    errors.push(ParserError {
208                        error_type: "parse_error".to_string(),
209                        field: Some(format!("properties.{}", prop_name)),
210                        message: format!("Failed to parse property: {}", e),
211                    });
212                }
213            }
214        }
215
216        // Build table metadata
217        let mut odcl_metadata = HashMap::new();
218        if !description.is_empty() {
219            odcl_metadata.insert("description".to_string(), json!(description));
220        }
221
222        let table = Table {
223            id: crate::models::table::Table::generate_id(&name, None, None, None),
224            name: name.clone(),
225            columns,
226            database_type: None,
227            catalog_name: None,
228            schema_name: None,
229            medallion_layers: Vec::new(),
230            scd_pattern: None,
231            data_vault_classification: None,
232            modeling_level: None,
233            tags: Vec::new(),
234            odcl_metadata,
235            position: None,
236            yaml_file_path: None,
237            drawio_cell_id: None,
238            quality: Vec::new(),
239            errors: Vec::new(),
240            created_at: chrono::Utc::now(),
241            updated_at: chrono::Utc::now(),
242        };
243
244        info!(
245            "Parsed JSON Schema: {} with {} columns",
246            name,
247            table.columns.len()
248        );
249        Ok(table)
250    }
251
252    /// Parse a JSON Schema property (which can be a simple property or nested object).
253    fn parse_property(
254        &self,
255        prop_name: &str,
256        prop_schema: &Value,
257        nullable: bool,
258        errors: &mut Vec<ParserError>,
259    ) -> Result<Vec<Column>> {
260        // Validate column name
261        if let Err(e) = validate_column_name(prop_name) {
262            warn!("Column name validation warning for '{}': {}", prop_name, e);
263        }
264
265        let prop_obj = prop_schema
266            .as_object()
267            .ok_or_else(|| anyhow::anyhow!("Property schema must be an object"))?;
268
269        let prop_type = prop_obj
270            .get("type")
271            .and_then(|v| v.as_str())
272            .ok_or_else(|| anyhow::anyhow!("Property missing type"))?;
273
274        // Validate data type
275        let mapped_type = self.map_json_type_to_sql(prop_type);
276        if let Err(e) = validate_data_type(&mapped_type) {
277            warn!("Data type validation warning for '{}': {}", mapped_type, e);
278        }
279
280        let description = prop_obj
281            .get("description")
282            .and_then(|v| v.as_str())
283            .map(|s| s.to_string())
284            .unwrap_or_default();
285
286        let mut columns = Vec::new();
287
288        match prop_type {
289            "object" => {
290                // Nested object - create nested columns with dot notation
291                if let Some(nested_props) = prop_obj.get("properties").and_then(|v| v.as_object()) {
292                    let nested_required: Vec<String> = prop_obj
293                        .get("required")
294                        .and_then(|v| v.as_array())
295                        .map(|arr| {
296                            arr.iter()
297                                .filter_map(|v| v.as_str().map(|s| s.to_string()))
298                                .collect()
299                        })
300                        .unwrap_or_default();
301
302                    for (nested_name, nested_schema) in nested_props {
303                        let nested_nullable = !nested_required.contains(nested_name);
304                        match self.parse_property(
305                            nested_name,
306                            nested_schema,
307                            nested_nullable,
308                            errors,
309                        ) {
310                            Ok(mut nested_cols) => {
311                                // Prefix nested columns with parent property name
312                                for col in nested_cols.iter_mut() {
313                                    col.name = format!("{}.{}", prop_name, col.name);
314                                }
315                                columns.append(&mut nested_cols);
316                            }
317                            Err(e) => {
318                                errors.push(ParserError {
319                                    error_type: "parse_error".to_string(),
320                                    field: Some(format!("{}.{}", prop_name, nested_name)),
321                                    message: format!("Failed to parse nested property: {}", e),
322                                });
323                            }
324                        }
325                    }
326                } else {
327                    // Object without properties - treat as STRUCT
328                    columns.push(Column {
329                        name: prop_name.to_string(),
330                        data_type: "STRUCT".to_string(),
331                        nullable,
332                        primary_key: false,
333                        secondary_key: false,
334                        composite_key: None,
335                        foreign_key: None,
336                        constraints: Vec::new(),
337                        description,
338                        quality: Vec::new(),
339                        enum_values: Vec::new(),
340                        errors: Vec::new(),
341                        column_order: 0,
342                    });
343                }
344            }
345            "array" => {
346                // Array type
347                let items = prop_obj
348                    .get("items")
349                    .ok_or_else(|| anyhow::anyhow!("Array property missing items"))?;
350
351                let data_type = if let Some(items_str) = items.get("type").and_then(|v| v.as_str())
352                {
353                    if items_str == "object" {
354                        // Array of objects - create nested columns
355                        if let Some(nested_props) =
356                            items.get("properties").and_then(|v| v.as_object())
357                        {
358                            let nested_required: Vec<String> = items
359                                .get("required")
360                                .and_then(|v| v.as_array())
361                                .map(|arr| {
362                                    arr.iter()
363                                        .filter_map(|v| v.as_str().map(|s| s.to_string()))
364                                        .collect()
365                                })
366                                .unwrap_or_default();
367
368                            for (nested_name, nested_schema) in nested_props {
369                                let nested_nullable = !nested_required.contains(nested_name);
370                                match self.parse_property(
371                                    nested_name,
372                                    nested_schema,
373                                    nested_nullable,
374                                    errors,
375                                ) {
376                                    Ok(mut nested_cols) => {
377                                        for col in nested_cols.iter_mut() {
378                                            col.name = format!("{}.{}", prop_name, col.name);
379                                        }
380                                        columns.append(&mut nested_cols);
381                                    }
382                                    Err(e) => {
383                                        errors.push(ParserError {
384                                            error_type: "parse_error".to_string(),
385                                            field: Some(format!("{}.{}", prop_name, nested_name)),
386                                            message: format!(
387                                                "Failed to parse array item property: {}",
388                                                e
389                                            ),
390                                        });
391                                    }
392                                }
393                            }
394                            return Ok(columns);
395                        } else {
396                            "ARRAY<STRUCT>".to_string()
397                        }
398                    } else {
399                        format!("ARRAY<{}>", self.map_json_type_to_sql(items_str))
400                    }
401                } else {
402                    "ARRAY<STRING>".to_string()
403                };
404
405                columns.push(Column {
406                    name: prop_name.to_string(),
407                    data_type,
408                    nullable,
409                    primary_key: false,
410                    secondary_key: false,
411                    composite_key: None,
412                    foreign_key: None,
413                    constraints: Vec::new(),
414                    description,
415                    quality: Vec::new(),
416                    enum_values: Vec::new(),
417                    errors: Vec::new(),
418                    column_order: 0,
419                });
420            }
421            _ => {
422                // Simple type
423                let data_type = self.map_json_type_to_sql(prop_type);
424                columns.push(Column {
425                    name: prop_name.to_string(),
426                    data_type,
427                    nullable,
428                    primary_key: false,
429                    secondary_key: false,
430                    composite_key: None,
431                    foreign_key: None,
432                    constraints: Vec::new(),
433                    description,
434                    quality: Vec::new(),
435                    enum_values: Vec::new(),
436                    errors: Vec::new(),
437                    column_order: 0,
438                });
439            }
440        }
441
442        Ok(columns)
443    }
444
445    /// Map JSON Schema type to SQL/ODCL data type.
446    fn map_json_type_to_sql(&self, json_type: &str) -> String {
447        match json_type {
448            "integer" => "INTEGER".to_string(),
449            "number" => "DOUBLE".to_string(),
450            "boolean" => "BOOLEAN".to_string(),
451            "string" => "STRING".to_string(),
452            "null" => "NULL".to_string(),
453            _ => "STRING".to_string(), // Default fallback
454        }
455    }
456}
457
458/// Parser error structure (matches ODCL parser format).
459#[derive(Debug, Clone)]
460pub struct ParserError {
461    pub error_type: String,
462    pub field: Option<String>,
463    pub message: String,
464}