data_modelling_sdk/import/
json_schema.rs

1//! JSON Schema parser for importing JSON Schema into data models.
2//!
3//! # Validation
4//!
5//! All imported table and column names are validated for:
6//! - Valid identifier format
7//! - Maximum length limits
8
9use super::{ImportError, ImportResult, TableData};
10use crate::models::{Column, Table, Tag};
11use crate::validation::input::{validate_column_name, validate_data_type, validate_table_name};
12use anyhow::{Context, Result};
13use serde_json::{Value, json};
14use std::collections::HashMap;
15use std::str::FromStr;
16use tracing::{info, warn};
17
18/// Parser for JSON Schema format.
19pub struct JSONSchemaImporter;
20
21impl Default for JSONSchemaImporter {
22    fn default() -> Self {
23        Self::new()
24    }
25}
26
27impl JSONSchemaImporter {
28    /// Create a new JSON Schema parser instance.
29    ///
30    /// # Example
31    ///
32    /// ```rust
33    /// use data_modelling_sdk::import::json_schema::JSONSchemaImporter;
34    ///
35    /// let importer = JSONSchemaImporter::new();
36    /// ```
37    pub fn new() -> Self {
38        Self
39    }
40
41    /// Import JSON Schema content and create Table(s) (SDK interface).
42    ///
43    /// # Arguments
44    ///
45    /// * `json_content` - JSON Schema string (can be a single schema or schema with definitions)
46    ///
47    /// # Returns
48    ///
49    /// An `ImportResult` containing extracted tables and any parse errors.
50    ///
51    /// # Example
52    ///
53    /// ```rust
54    /// use data_modelling_sdk::import::json_schema::JSONSchemaImporter;
55    ///
56    /// let importer = JSONSchemaImporter::new();
57    /// let schema = r#"
58    /// {
59    ///   "type": "object",
60    ///   "properties": {
61    ///     "id": {"type": "integer"},
62    ///     "name": {"type": "string"}
63    ///   },
64    ///   "required": ["id"]
65    /// }
66    /// "#;
67    /// let result = importer.import(schema).unwrap();
68    /// ```
69    pub fn import(&self, json_content: &str) -> Result<ImportResult, ImportError> {
70        match self.parse(json_content) {
71            Ok((tables, errors)) => {
72                let mut sdk_tables = Vec::new();
73                for (idx, table) in tables.iter().enumerate() {
74                    sdk_tables.push(TableData {
75                        table_index: idx,
76                        name: Some(table.name.clone()),
77                        columns: table
78                            .columns
79                            .iter()
80                            .map(|c| super::ColumnData {
81                                name: c.name.clone(),
82                                data_type: c.data_type.clone(),
83                                nullable: c.nullable,
84                                primary_key: c.primary_key,
85                                description: if c.description.is_empty() {
86                                    None
87                                } else {
88                                    Some(c.description.clone())
89                                },
90                                quality: if c.quality.is_empty() {
91                                    None
92                                } else {
93                                    Some(c.quality.clone())
94                                },
95                                ref_path: c.ref_path.clone(),
96                            })
97                            .collect(),
98                    });
99                }
100                let sdk_errors: Vec<ImportError> = errors
101                    .iter()
102                    .map(|e| ImportError::ParseError(e.message.clone()))
103                    .collect();
104                Ok(ImportResult {
105                    tables: sdk_tables,
106                    tables_requiring_name: Vec::new(),
107                    errors: sdk_errors,
108                    ai_suggestions: None,
109                })
110            }
111            Err(e) => Err(ImportError::ParseError(e.to_string())),
112        }
113    }
114
115    /// Parse JSON Schema content and create Table(s) (internal method).
116    ///
117    /// # Returns
118    ///
119    /// Returns a tuple of (Tables, list of errors/warnings).
120    fn parse(&self, json_content: &str) -> Result<(Vec<Table>, Vec<ParserError>)> {
121        let mut errors = Vec::new();
122
123        // Parse JSON
124        let schema: Value =
125            serde_json::from_str(json_content).context("Failed to parse JSON Schema")?;
126
127        let mut tables = Vec::new();
128
129        // Check if it's a schema with definitions (multiple tables)
130        if let Some(definitions) = schema.get("definitions").and_then(|v| v.as_object()) {
131            // Multiple schemas in definitions
132            for (name, def_schema) in definitions {
133                match self.parse_schema(def_schema, Some(name), &mut errors) {
134                    Ok(table) => tables.push(table),
135                    Err(e) => {
136                        errors.push(ParserError {
137                            error_type: "parse_error".to_string(),
138                            field: Some(format!("definitions.{}", name)),
139                            message: format!("Failed to parse schema: {}", e),
140                        });
141                    }
142                }
143            }
144        } else {
145            // Single schema
146            match self.parse_schema(&schema, None, &mut errors) {
147                Ok(table) => tables.push(table),
148                Err(e) => {
149                    errors.push(ParserError {
150                        error_type: "parse_error".to_string(),
151                        field: None,
152                        message: format!("Failed to parse schema: {}", e),
153                    });
154                }
155            }
156        }
157
158        Ok((tables, errors))
159    }
160
161    /// Parse a single JSON Schema object.
162    fn parse_schema(
163        &self,
164        schema: &Value,
165        name_override: Option<&str>,
166        errors: &mut Vec<ParserError>,
167    ) -> Result<Table> {
168        let schema_obj = schema
169            .as_object()
170            .ok_or_else(|| anyhow::anyhow!("Schema must be an object"))?;
171
172        // Extract name/title
173        let name = name_override
174            .map(|s| s.to_string())
175            .or_else(|| {
176                schema_obj
177                    .get("title")
178                    .or_else(|| schema_obj.get("name"))
179                    .and_then(|v| v.as_str())
180                    .map(|s| s.to_string())
181            })
182            .ok_or_else(|| anyhow::anyhow!("Missing required field: title or name"))?;
183
184        // Validate table name
185        if let Err(e) = validate_table_name(&name) {
186            warn!("Table name validation warning for '{}': {}", name, e);
187        }
188
189        // Extract description
190        let description = schema_obj
191            .get("description")
192            .and_then(|v| v.as_str())
193            .map(|s| s.to_string())
194            .unwrap_or_default();
195
196        // Extract properties
197        let properties = schema_obj
198            .get("properties")
199            .and_then(|v| v.as_object())
200            .ok_or_else(|| anyhow::anyhow!("Missing required field: properties"))?;
201
202        // Extract required fields
203        let required_fields: Vec<String> = schema_obj
204            .get("required")
205            .and_then(|v| v.as_array())
206            .map(|arr| {
207                arr.iter()
208                    .filter_map(|v| v.as_str().map(|s| s.to_string()))
209                    .collect()
210            })
211            .unwrap_or_default();
212
213        let mut columns = Vec::new();
214        for (prop_name, prop_schema) in properties {
215            let nullable = !required_fields.contains(prop_name);
216            match self.parse_property(prop_name, prop_schema, nullable, errors) {
217                Ok(mut cols) => columns.append(&mut cols),
218                Err(e) => {
219                    errors.push(ParserError {
220                        error_type: "parse_error".to_string(),
221                        field: Some(format!("properties.{}", prop_name)),
222                        message: format!("Failed to parse property: {}", e),
223                    });
224                }
225            }
226        }
227
228        // Extract tags from JSON Schema (can be in root or in customProperties)
229        let mut tags: Vec<Tag> = Vec::new();
230        if let Some(tags_arr) = schema_obj.get("tags").and_then(|v| v.as_array()) {
231            for item in tags_arr {
232                if let Some(s) = item.as_str() {
233                    if let Ok(tag) = Tag::from_str(s) {
234                        tags.push(tag);
235                    } else {
236                        tags.push(Tag::Simple(s.to_string()));
237                    }
238                }
239            }
240        }
241        // Also check customProperties for tags
242        if let Some(custom_props) = schema_obj
243            .get("customProperties")
244            .and_then(|v| v.as_object())
245            && let Some(tags_val) = custom_props.get("tags")
246            && let Some(tags_arr) = tags_val.as_array()
247        {
248            for item in tags_arr {
249                if let Some(s) = item.as_str() {
250                    if let Ok(tag) = Tag::from_str(s) {
251                        if !tags.contains(&tag) {
252                            tags.push(tag);
253                        }
254                    } else {
255                        let simple_tag = Tag::Simple(s.to_string());
256                        if !tags.contains(&simple_tag) {
257                            tags.push(simple_tag);
258                        }
259                    }
260                }
261            }
262        }
263
264        // Build table metadata
265        let mut odcl_metadata = HashMap::new();
266        if !description.is_empty() {
267            odcl_metadata.insert("description".to_string(), json!(description));
268        }
269
270        let table = Table {
271            id: crate::models::table::Table::generate_id(&name, None, None, None),
272            name: name.clone(),
273            columns,
274            database_type: None,
275            catalog_name: None,
276            schema_name: None,
277            medallion_layers: Vec::new(),
278            scd_pattern: None,
279            data_vault_classification: None,
280            modeling_level: None,
281            tags,
282            odcl_metadata,
283            owner: None,
284            sla: None,
285            contact_details: None,
286            infrastructure_type: None,
287            notes: None,
288            position: None,
289            yaml_file_path: None,
290            drawio_cell_id: None,
291            quality: Vec::new(),
292            errors: Vec::new(),
293            created_at: chrono::Utc::now(),
294            updated_at: chrono::Utc::now(),
295        };
296
297        info!(
298            "Parsed JSON Schema: {} with {} columns",
299            name,
300            table.columns.len()
301        );
302        Ok(table)
303    }
304
305    /// Parse a JSON Schema property (which can be a simple property or nested object).
306    fn parse_property(
307        &self,
308        prop_name: &str,
309        prop_schema: &Value,
310        nullable: bool,
311        errors: &mut Vec<ParserError>,
312    ) -> Result<Vec<Column>> {
313        // Validate column name
314        if let Err(e) = validate_column_name(prop_name) {
315            warn!("Column name validation warning for '{}': {}", prop_name, e);
316        }
317
318        let prop_obj = prop_schema
319            .as_object()
320            .ok_or_else(|| anyhow::anyhow!("Property schema must be an object"))?;
321
322        let prop_type = prop_obj
323            .get("type")
324            .and_then(|v| v.as_str())
325            .ok_or_else(|| anyhow::anyhow!("Property missing type"))?;
326
327        // Validate data type
328        let mapped_type = self.map_json_type_to_sql(prop_type);
329        if let Err(e) = validate_data_type(&mapped_type) {
330            warn!("Data type validation warning for '{}': {}", mapped_type, e);
331        }
332
333        let description = prop_obj
334            .get("description")
335            .and_then(|v| v.as_str())
336            .map(|s| s.to_string())
337            .unwrap_or_default();
338
339        let mut columns = Vec::new();
340
341        match prop_type {
342            "object" => {
343                // Nested object - create nested columns with dot notation
344                if let Some(nested_props) = prop_obj.get("properties").and_then(|v| v.as_object()) {
345                    let nested_required: Vec<String> = prop_obj
346                        .get("required")
347                        .and_then(|v| v.as_array())
348                        .map(|arr| {
349                            arr.iter()
350                                .filter_map(|v| v.as_str().map(|s| s.to_string()))
351                                .collect()
352                        })
353                        .unwrap_or_default();
354
355                    for (nested_name, nested_schema) in nested_props {
356                        let nested_nullable = !nested_required.contains(nested_name);
357                        match self.parse_property(
358                            nested_name,
359                            nested_schema,
360                            nested_nullable,
361                            errors,
362                        ) {
363                            Ok(mut nested_cols) => {
364                                // Prefix nested columns with parent property name
365                                for col in nested_cols.iter_mut() {
366                                    col.name = format!("{}.{}", prop_name, col.name);
367                                }
368                                columns.append(&mut nested_cols);
369                            }
370                            Err(e) => {
371                                errors.push(ParserError {
372                                    error_type: "parse_error".to_string(),
373                                    field: Some(format!("{}.{}", prop_name, nested_name)),
374                                    message: format!("Failed to parse nested property: {}", e),
375                                });
376                            }
377                        }
378                    }
379                } else {
380                    // Object without properties - treat as STRUCT
381                    columns.push(Column {
382                        name: prop_name.to_string(),
383                        data_type: "STRUCT".to_string(),
384                        nullable,
385                        primary_key: false,
386                        secondary_key: false,
387                        composite_key: None,
388                        foreign_key: None,
389                        constraints: Vec::new(),
390                        description,
391                        quality: Vec::new(),
392                        ref_path: None,
393                        enum_values: Vec::new(),
394                        errors: Vec::new(),
395                        column_order: 0,
396                    });
397                }
398            }
399            "array" => {
400                // Array type
401                let items = prop_obj
402                    .get("items")
403                    .ok_or_else(|| anyhow::anyhow!("Array property missing items"))?;
404
405                let data_type = if let Some(items_str) = items.get("type").and_then(|v| v.as_str())
406                {
407                    if items_str == "object" {
408                        // Array of objects - create nested columns
409                        if let Some(nested_props) =
410                            items.get("properties").and_then(|v| v.as_object())
411                        {
412                            let nested_required: Vec<String> = items
413                                .get("required")
414                                .and_then(|v| v.as_array())
415                                .map(|arr| {
416                                    arr.iter()
417                                        .filter_map(|v| v.as_str().map(|s| s.to_string()))
418                                        .collect()
419                                })
420                                .unwrap_or_default();
421
422                            for (nested_name, nested_schema) in nested_props {
423                                let nested_nullable = !nested_required.contains(nested_name);
424                                match self.parse_property(
425                                    nested_name,
426                                    nested_schema,
427                                    nested_nullable,
428                                    errors,
429                                ) {
430                                    Ok(mut nested_cols) => {
431                                        for col in nested_cols.iter_mut() {
432                                            col.name = format!("{}.{}", prop_name, col.name);
433                                        }
434                                        columns.append(&mut nested_cols);
435                                    }
436                                    Err(e) => {
437                                        errors.push(ParserError {
438                                            error_type: "parse_error".to_string(),
439                                            field: Some(format!("{}.{}", prop_name, nested_name)),
440                                            message: format!(
441                                                "Failed to parse array item property: {}",
442                                                e
443                                            ),
444                                        });
445                                    }
446                                }
447                            }
448                            return Ok(columns);
449                        } else {
450                            "ARRAY<STRUCT>".to_string()
451                        }
452                    } else {
453                        format!("ARRAY<{}>", self.map_json_type_to_sql(items_str))
454                    }
455                } else {
456                    "ARRAY<STRING>".to_string()
457                };
458
459                columns.push(Column {
460                    name: prop_name.to_string(),
461                    data_type,
462                    nullable,
463                    primary_key: false,
464                    secondary_key: false,
465                    composite_key: None,
466                    foreign_key: None,
467                    constraints: Vec::new(),
468                    description,
469                    quality: Vec::new(),
470                    ref_path: None,
471                    enum_values: Vec::new(),
472                    errors: Vec::new(),
473                    column_order: 0,
474                });
475            }
476            _ => {
477                // Simple type
478                let data_type = self.map_json_type_to_sql(prop_type);
479                columns.push(Column {
480                    name: prop_name.to_string(),
481                    data_type,
482                    nullable,
483                    primary_key: false,
484                    secondary_key: false,
485                    composite_key: None,
486                    foreign_key: None,
487                    constraints: Vec::new(),
488                    description,
489                    quality: Vec::new(),
490                    ref_path: None,
491                    enum_values: Vec::new(),
492                    errors: Vec::new(),
493                    column_order: 0,
494                });
495            }
496        }
497
498        Ok(columns)
499    }
500
501    /// Map JSON Schema type to SQL/ODCL data type.
502    fn map_json_type_to_sql(&self, json_type: &str) -> String {
503        match json_type {
504            "integer" => "INTEGER".to_string(),
505            "number" => "DOUBLE".to_string(),
506            "boolean" => "BOOLEAN".to_string(),
507            "string" => "STRING".to_string(),
508            "null" => "NULL".to_string(),
509            _ => "STRING".to_string(), // Default fallback
510        }
511    }
512}
513
514/// Parser error structure (matches ODCL parser format).
515#[derive(Debug, Clone)]
516pub struct ParserError {
517    pub error_type: String,
518    pub field: Option<String>,
519    pub message: String,
520}