1use super::{ImportError, ImportResult, TableData};
10use crate::models::{Column, Table, Tag};
11use crate::validation::input::{validate_column_name, validate_data_type, validate_table_name};
12use anyhow::{Context, Result};
13use serde_json::{Value, json};
14use std::collections::HashMap;
15use std::str::FromStr;
16use tracing::{info, warn};
17
18pub struct JSONSchemaImporter;
20
21impl Default for JSONSchemaImporter {
22 fn default() -> Self {
23 Self::new()
24 }
25}
26
27impl JSONSchemaImporter {
28 pub fn new() -> Self {
38 Self
39 }
40
41 pub fn import(&self, json_content: &str) -> Result<ImportResult, ImportError> {
70 match self.parse(json_content) {
71 Ok((tables, errors)) => {
72 let mut sdk_tables = Vec::new();
73 for (idx, table) in tables.iter().enumerate() {
74 sdk_tables.push(TableData {
75 table_index: idx,
76 name: Some(table.name.clone()),
77 columns: table
78 .columns
79 .iter()
80 .map(|c| super::ColumnData {
81 name: c.name.clone(),
82 data_type: c.data_type.clone(),
83 nullable: c.nullable,
84 primary_key: c.primary_key,
85 description: if c.description.is_empty() {
86 None
87 } else {
88 Some(c.description.clone())
89 },
90 quality: if c.quality.is_empty() {
91 None
92 } else {
93 Some(c.quality.clone())
94 },
95 ref_path: c.ref_path.clone(),
96 enum_values: if c.enum_values.is_empty() {
97 None
98 } else {
99 Some(c.enum_values.clone())
100 },
101 })
102 .collect(),
103 });
104 }
105 let sdk_errors: Vec<ImportError> = errors
106 .iter()
107 .map(|e| ImportError::ParseError(e.message.clone()))
108 .collect();
109 Ok(ImportResult {
110 tables: sdk_tables,
111 tables_requiring_name: Vec::new(),
112 errors: sdk_errors,
113 ai_suggestions: None,
114 })
115 }
116 Err(e) => Err(ImportError::ParseError(e.to_string())),
117 }
118 }
119
120 fn parse(&self, json_content: &str) -> Result<(Vec<Table>, Vec<ParserError>)> {
126 let mut errors = Vec::new();
127
128 let schema: Value =
130 serde_json::from_str(json_content).context("Failed to parse JSON Schema")?;
131
132 let mut tables = Vec::new();
133
134 if let Some(definitions) = schema.get("definitions").and_then(|v| v.as_object()) {
136 for (name, def_schema) in definitions {
138 match self.parse_schema(def_schema, Some(name), &mut errors) {
139 Ok(table) => tables.push(table),
140 Err(e) => {
141 errors.push(ParserError {
142 error_type: "parse_error".to_string(),
143 field: Some(format!("definitions.{}", name)),
144 message: format!("Failed to parse schema: {}", e),
145 });
146 }
147 }
148 }
149 } else {
150 match self.parse_schema(&schema, None, &mut errors) {
152 Ok(table) => tables.push(table),
153 Err(e) => {
154 errors.push(ParserError {
155 error_type: "parse_error".to_string(),
156 field: None,
157 message: format!("Failed to parse schema: {}", e),
158 });
159 }
160 }
161 }
162
163 Ok((tables, errors))
164 }
165
166 fn parse_schema(
168 &self,
169 schema: &Value,
170 name_override: Option<&str>,
171 errors: &mut Vec<ParserError>,
172 ) -> Result<Table> {
173 let schema_obj = schema
174 .as_object()
175 .ok_or_else(|| anyhow::anyhow!("Schema must be an object"))?;
176
177 let name = name_override
179 .map(|s| s.to_string())
180 .or_else(|| {
181 schema_obj
182 .get("title")
183 .or_else(|| schema_obj.get("name"))
184 .and_then(|v| v.as_str())
185 .map(|s| s.to_string())
186 })
187 .ok_or_else(|| anyhow::anyhow!("Missing required field: title or name"))?;
188
189 if let Err(e) = validate_table_name(&name) {
191 warn!("Table name validation warning for '{}': {}", name, e);
192 }
193
194 let description = schema_obj
196 .get("description")
197 .and_then(|v| v.as_str())
198 .map(|s| s.to_string())
199 .unwrap_or_default();
200
201 let properties = schema_obj
203 .get("properties")
204 .and_then(|v| v.as_object())
205 .ok_or_else(|| anyhow::anyhow!("Missing required field: properties"))?;
206
207 let required_fields: Vec<String> = schema_obj
209 .get("required")
210 .and_then(|v| v.as_array())
211 .map(|arr| {
212 arr.iter()
213 .filter_map(|v| v.as_str().map(|s| s.to_string()))
214 .collect()
215 })
216 .unwrap_or_default();
217
218 let mut columns = Vec::new();
219 for (prop_name, prop_schema) in properties {
220 let nullable = !required_fields.contains(prop_name);
221 match self.parse_property(prop_name, prop_schema, nullable, errors) {
222 Ok(mut cols) => columns.append(&mut cols),
223 Err(e) => {
224 errors.push(ParserError {
225 error_type: "parse_error".to_string(),
226 field: Some(format!("properties.{}", prop_name)),
227 message: format!("Failed to parse property: {}", e),
228 });
229 }
230 }
231 }
232
233 let mut tags: Vec<Tag> = Vec::new();
235 if let Some(tags_arr) = schema_obj.get("tags").and_then(|v| v.as_array()) {
236 for item in tags_arr {
237 if let Some(s) = item.as_str() {
238 if let Ok(tag) = Tag::from_str(s) {
239 tags.push(tag);
240 } else {
241 tags.push(Tag::Simple(s.to_string()));
242 }
243 }
244 }
245 }
246 if let Some(custom_props) = schema_obj
248 .get("customProperties")
249 .and_then(|v| v.as_object())
250 && let Some(tags_val) = custom_props.get("tags")
251 && let Some(tags_arr) = tags_val.as_array()
252 {
253 for item in tags_arr {
254 if let Some(s) = item.as_str() {
255 if let Ok(tag) = Tag::from_str(s) {
256 if !tags.contains(&tag) {
257 tags.push(tag);
258 }
259 } else {
260 let simple_tag = Tag::Simple(s.to_string());
261 if !tags.contains(&simple_tag) {
262 tags.push(simple_tag);
263 }
264 }
265 }
266 }
267 }
268
269 let mut odcl_metadata = HashMap::new();
271 if !description.is_empty() {
272 odcl_metadata.insert("description".to_string(), json!(description));
273 }
274
275 let table = Table {
276 id: crate::models::table::Table::generate_id(&name, None, None, None),
277 name: name.clone(),
278 columns,
279 database_type: None,
280 catalog_name: None,
281 schema_name: None,
282 medallion_layers: Vec::new(),
283 scd_pattern: None,
284 data_vault_classification: None,
285 modeling_level: None,
286 tags,
287 odcl_metadata,
288 owner: None,
289 sla: None,
290 contact_details: None,
291 infrastructure_type: None,
292 notes: None,
293 position: None,
294 yaml_file_path: None,
295 drawio_cell_id: None,
296 quality: Vec::new(),
297 errors: Vec::new(),
298 created_at: chrono::Utc::now(),
299 updated_at: chrono::Utc::now(),
300 };
301
302 info!(
303 "Parsed JSON Schema: {} with {} columns",
304 name,
305 table.columns.len()
306 );
307 Ok(table)
308 }
309
310 fn parse_property(
312 &self,
313 prop_name: &str,
314 prop_schema: &Value,
315 nullable: bool,
316 errors: &mut Vec<ParserError>,
317 ) -> Result<Vec<Column>> {
318 if let Err(e) = validate_column_name(prop_name) {
320 warn!("Column name validation warning for '{}': {}", prop_name, e);
321 }
322
323 let prop_obj = prop_schema
324 .as_object()
325 .ok_or_else(|| anyhow::anyhow!("Property schema must be an object"))?;
326
327 if let Some(ref_path) = prop_obj.get("$ref").and_then(|v| v.as_str()) {
329 let description = prop_obj
331 .get("description")
332 .and_then(|v| v.as_str())
333 .map(|s| s.to_string())
334 .unwrap_or_default();
335
336 let quality_rules = self.extract_validation_keywords(prop_obj, prop_name);
337
338 return Ok(vec![Column {
339 name: prop_name.to_string(),
340 data_type: "STRING".to_string(), nullable,
342 primary_key: false,
343 secondary_key: false,
344 composite_key: None,
345 foreign_key: None,
346 constraints: Vec::new(),
347 description,
348 quality: quality_rules,
349 ref_path: Some(ref_path.to_string()),
350 enum_values: Vec::new(),
351 errors: Vec::new(),
352 column_order: 0,
353 }]);
354 }
355
356 let prop_type = prop_obj
357 .get("type")
358 .and_then(|v| v.as_str())
359 .ok_or_else(|| anyhow::anyhow!("Property missing type"))?;
360
361 let mapped_type = self.map_json_type_to_sql(prop_type);
363 if let Err(e) = validate_data_type(&mapped_type) {
364 warn!("Data type validation warning for '{}': {}", mapped_type, e);
365 }
366
367 let description = prop_obj
368 .get("description")
369 .and_then(|v| v.as_str())
370 .map(|s| s.to_string())
371 .unwrap_or_default();
372
373 let quality_rules = self.extract_validation_keywords(prop_obj, prop_name);
375 let enum_values = self.extract_enum_values(prop_obj);
376
377 let mut columns = Vec::new();
378
379 match prop_type {
380 "object" => {
381 if let Some(nested_props) = prop_obj.get("properties").and_then(|v| v.as_object()) {
383 let nested_required: Vec<String> = prop_obj
384 .get("required")
385 .and_then(|v| v.as_array())
386 .map(|arr| {
387 arr.iter()
388 .filter_map(|v| v.as_str().map(|s| s.to_string()))
389 .collect()
390 })
391 .unwrap_or_default();
392
393 for (nested_name, nested_schema) in nested_props {
394 let nested_nullable = !nested_required.contains(nested_name);
395 match self.parse_property(
396 nested_name,
397 nested_schema,
398 nested_nullable,
399 errors,
400 ) {
401 Ok(mut nested_cols) => {
402 for col in nested_cols.iter_mut() {
404 col.name = format!("{}.{}", prop_name, col.name);
405 }
406 columns.append(&mut nested_cols);
407 }
408 Err(e) => {
409 errors.push(ParserError {
410 error_type: "parse_error".to_string(),
411 field: Some(format!("{}.{}", prop_name, nested_name)),
412 message: format!("Failed to parse nested property: {}", e),
413 });
414 }
415 }
416 }
417 let object_quality = self.extract_validation_keywords(prop_obj, prop_name);
420 if !object_quality.is_empty() && !columns.is_empty() {
421 columns[0].quality.extend(object_quality);
423 }
424 } else {
425 let struct_quality = self.extract_validation_keywords(prop_obj, prop_name);
427 columns.push(Column {
428 name: prop_name.to_string(),
429 data_type: "STRUCT".to_string(),
430 nullable,
431 primary_key: false,
432 secondary_key: false,
433 composite_key: None,
434 foreign_key: None,
435 constraints: Vec::new(),
436 description,
437 quality: struct_quality,
438 ref_path: None,
439 enum_values: Vec::new(),
440 errors: Vec::new(),
441 column_order: 0,
442 });
443 }
444 }
445 "array" => {
446 let items = prop_obj
448 .get("items")
449 .ok_or_else(|| anyhow::anyhow!("Array property missing items"))?;
450
451 let data_type = if let Some(items_str) = items.get("type").and_then(|v| v.as_str())
452 {
453 if items_str == "object" {
454 if let Some(nested_props) =
456 items.get("properties").and_then(|v| v.as_object())
457 {
458 let nested_required: Vec<String> = items
459 .get("required")
460 .and_then(|v| v.as_array())
461 .map(|arr| {
462 arr.iter()
463 .filter_map(|v| v.as_str().map(|s| s.to_string()))
464 .collect()
465 })
466 .unwrap_or_default();
467
468 for (nested_name, nested_schema) in nested_props {
469 let nested_nullable = !nested_required.contains(nested_name);
470 match self.parse_property(
471 nested_name,
472 nested_schema,
473 nested_nullable,
474 errors,
475 ) {
476 Ok(mut nested_cols) => {
477 for col in nested_cols.iter_mut() {
478 col.name = format!("{}.{}", prop_name, col.name);
479 }
480 columns.append(&mut nested_cols);
481 }
482 Err(e) => {
483 errors.push(ParserError {
484 error_type: "parse_error".to_string(),
485 field: Some(format!("{}.{}", prop_name, nested_name)),
486 message: format!(
487 "Failed to parse array item property: {}",
488 e
489 ),
490 });
491 }
492 }
493 }
494 return Ok(columns);
495 } else {
496 "ARRAY<STRUCT>".to_string()
497 }
498 } else {
499 format!("ARRAY<{}>", self.map_json_type_to_sql(items_str))
500 }
501 } else {
502 "ARRAY<STRING>".to_string()
503 };
504
505 let mut array_quality = self.extract_validation_keywords(prop_obj, prop_name);
507 if let Some(items_obj) = items.as_object() {
509 let items_quality = self.extract_validation_keywords(items_obj, prop_name);
510 array_quality.extend(items_quality);
511 }
512
513 columns.push(Column {
514 name: prop_name.to_string(),
515 data_type,
516 nullable,
517 primary_key: false,
518 secondary_key: false,
519 composite_key: None,
520 foreign_key: None,
521 constraints: Vec::new(),
522 description,
523 quality: array_quality,
524 ref_path: None,
525 enum_values: Vec::new(),
526 errors: Vec::new(),
527 column_order: 0,
528 });
529 }
530 _ => {
531 let data_type = self.map_json_type_to_sql(prop_type);
533 columns.push(Column {
534 name: prop_name.to_string(),
535 data_type,
536 nullable,
537 primary_key: false,
538 secondary_key: false,
539 composite_key: None,
540 foreign_key: None,
541 constraints: Vec::new(),
542 description,
543 quality: quality_rules,
544 ref_path: None,
545 enum_values: enum_values.clone(),
546 errors: Vec::new(),
547 column_order: 0,
548 });
549 }
550 }
551
552 Ok(columns)
553 }
554
555 fn map_json_type_to_sql(&self, json_type: &str) -> String {
557 match json_type {
558 "integer" => "INTEGER".to_string(),
559 "number" => "DOUBLE".to_string(),
560 "boolean" => "BOOLEAN".to_string(),
561 "string" => "STRING".to_string(),
562 "null" => "NULL".to_string(),
563 _ => "STRING".to_string(), }
565 }
566
567 fn extract_validation_keywords(
569 &self,
570 prop_obj: &serde_json::Map<String, Value>,
571 _prop_name: &str,
572 ) -> Vec<HashMap<String, serde_json::Value>> {
573 let mut quality_rules = Vec::new();
574
575 if let Some(pattern) = prop_obj.get("pattern").and_then(|v| v.as_str()) {
577 let mut rule = HashMap::new();
578 rule.insert("type".to_string(), json!("pattern"));
579 rule.insert("pattern".to_string(), json!(pattern));
580 rule.insert("source".to_string(), json!("json_schema"));
581 quality_rules.push(rule);
582 }
583
584 if let Some(minimum) = prop_obj.get("minimum") {
586 let mut rule = HashMap::new();
587 rule.insert("type".to_string(), json!("minimum"));
588 rule.insert("value".to_string(), minimum.clone());
589 rule.insert("source".to_string(), json!("json_schema"));
590 if let Some(exclusive_minimum) = prop_obj.get("exclusiveMinimum") {
591 rule.insert("exclusive".to_string(), exclusive_minimum.clone());
592 }
593 quality_rules.push(rule);
594 }
595
596 if let Some(maximum) = prop_obj.get("maximum") {
598 let mut rule = HashMap::new();
599 rule.insert("type".to_string(), json!("maximum"));
600 rule.insert("value".to_string(), maximum.clone());
601 rule.insert("source".to_string(), json!("json_schema"));
602 if let Some(exclusive_maximum) = prop_obj.get("exclusiveMaximum") {
603 rule.insert("exclusive".to_string(), exclusive_maximum.clone());
604 }
605 quality_rules.push(rule);
606 }
607
608 if let Some(min_length) = prop_obj.get("minLength").and_then(|v| v.as_u64()) {
610 let mut rule = HashMap::new();
611 rule.insert("type".to_string(), json!("minLength"));
612 rule.insert("value".to_string(), json!(min_length));
613 rule.insert("source".to_string(), json!("json_schema"));
614 quality_rules.push(rule);
615 }
616
617 if let Some(max_length) = prop_obj.get("maxLength").and_then(|v| v.as_u64()) {
619 let mut rule = HashMap::new();
620 rule.insert("type".to_string(), json!("maxLength"));
621 rule.insert("value".to_string(), json!(max_length));
622 rule.insert("source".to_string(), json!("json_schema"));
623 quality_rules.push(rule);
624 }
625
626 if let Some(multiple_of) = prop_obj.get("multipleOf") {
628 let mut rule = HashMap::new();
629 rule.insert("type".to_string(), json!("multipleOf"));
630 rule.insert("value".to_string(), multiple_of.clone());
631 rule.insert("source".to_string(), json!("json_schema"));
632 quality_rules.push(rule);
633 }
634
635 if let Some(const_val) = prop_obj.get("const") {
637 let mut rule = HashMap::new();
638 rule.insert("type".to_string(), json!("const"));
639 rule.insert("value".to_string(), const_val.clone());
640 rule.insert("source".to_string(), json!("json_schema"));
641 quality_rules.push(rule);
642 }
643
644 if let Some(min_items) = prop_obj.get("minItems").and_then(|v| v.as_u64()) {
646 let mut rule = HashMap::new();
647 rule.insert("type".to_string(), json!("minItems"));
648 rule.insert("value".to_string(), json!(min_items));
649 rule.insert("source".to_string(), json!("json_schema"));
650 quality_rules.push(rule);
651 }
652
653 if let Some(max_items) = prop_obj.get("maxItems").and_then(|v| v.as_u64()) {
655 let mut rule = HashMap::new();
656 rule.insert("type".to_string(), json!("maxItems"));
657 rule.insert("value".to_string(), json!(max_items));
658 rule.insert("source".to_string(), json!("json_schema"));
659 quality_rules.push(rule);
660 }
661
662 if let Some(unique_items) = prop_obj.get("uniqueItems").and_then(|v| v.as_bool())
664 && unique_items
665 {
666 let mut rule = HashMap::new();
667 rule.insert("type".to_string(), json!("uniqueItems"));
668 rule.insert("value".to_string(), json!(true));
669 rule.insert("source".to_string(), json!("json_schema"));
670 quality_rules.push(rule);
671 }
672
673 if let Some(min_props) = prop_obj.get("minProperties").and_then(|v| v.as_u64()) {
675 let mut rule = HashMap::new();
676 rule.insert("type".to_string(), json!("minProperties"));
677 rule.insert("value".to_string(), json!(min_props));
678 rule.insert("source".to_string(), json!("json_schema"));
679 quality_rules.push(rule);
680 }
681
682 if let Some(max_props) = prop_obj.get("maxProperties").and_then(|v| v.as_u64()) {
684 let mut rule = HashMap::new();
685 rule.insert("type".to_string(), json!("maxProperties"));
686 rule.insert("value".to_string(), json!(max_props));
687 rule.insert("source".to_string(), json!("json_schema"));
688 quality_rules.push(rule);
689 }
690
691 if let Some(additional_props) = prop_obj.get("additionalProperties") {
693 let mut rule = HashMap::new();
694 rule.insert("type".to_string(), json!("additionalProperties"));
695 rule.insert("value".to_string(), additional_props.clone());
696 rule.insert("source".to_string(), json!("json_schema"));
697 quality_rules.push(rule);
698 }
699
700 if let Some(format_val) = prop_obj.get("format").and_then(|v| v.as_str()) {
702 let mut rule = HashMap::new();
703 rule.insert("type".to_string(), json!("format"));
704 rule.insert("value".to_string(), json!(format_val));
705 rule.insert("source".to_string(), json!("json_schema"));
706 quality_rules.push(rule);
707 }
708
709 for keyword in &["allOf", "anyOf", "oneOf", "not"] {
711 if let Some(value) = prop_obj.get(*keyword) {
712 let mut rule = HashMap::new();
713 rule.insert("type".to_string(), json!(*keyword));
714 rule.insert("value".to_string(), value.clone());
715 rule.insert("source".to_string(), json!("json_schema"));
716 quality_rules.push(rule);
717 }
718 }
719
720 quality_rules
721 }
722
723 fn extract_enum_values(&self, prop_obj: &serde_json::Map<String, Value>) -> Vec<String> {
725 prop_obj
726 .get("enum")
727 .and_then(|v| v.as_array())
728 .map(|arr| {
729 arr.iter()
730 .filter_map(|v| {
731 match v {
733 Value::String(s) => Some(s.clone()),
734 Value::Number(n) => Some(n.to_string()),
735 Value::Bool(b) => Some(b.to_string()),
736 Value::Null => Some("null".to_string()),
737 _ => serde_json::to_string(v).ok(),
738 }
739 })
740 .collect()
741 })
742 .unwrap_or_default()
743 }
744}
745
746#[derive(Debug, Clone)]
748pub struct ParserError {
749 pub error_type: String,
750 pub field: Option<String>,
751 pub message: String,
752}