1use super::{ImportError, ImportResult, TableData};
10use crate::models::{Column, PropertyRelationship, Table, Tag};
11use crate::validation::input::{validate_column_name, validate_data_type, validate_table_name};
12use anyhow::{Context, Result};
13use serde_json::{Value, json};
14use std::collections::HashMap;
15use std::str::FromStr;
16use tracing::{info, warn};
17
18fn ref_to_relationships(ref_path: &Option<String>) -> Vec<PropertyRelationship> {
20 match ref_path {
21 Some(ref_str) => {
22 let to = if ref_str.starts_with("#/definitions/") {
23 let def_path = ref_str.strip_prefix("#/definitions/").unwrap_or(ref_str);
24 format!("definitions/{}", def_path)
25 } else if ref_str.starts_with("#/") {
26 ref_str.strip_prefix("#/").unwrap_or(ref_str).to_string()
27 } else {
28 ref_str.clone()
29 };
30 vec![PropertyRelationship {
31 relationship_type: "foreignKey".to_string(),
32 to,
33 }]
34 }
35 None => Vec::new(),
36 }
37}
38
39pub struct JSONSchemaImporter;
41
42impl Default for JSONSchemaImporter {
43 fn default() -> Self {
44 Self::new()
45 }
46}
47
48impl JSONSchemaImporter {
49 pub fn new() -> Self {
59 Self
60 }
61
62 pub fn import(&self, json_content: &str) -> Result<ImportResult, ImportError> {
91 match self.parse(json_content) {
92 Ok((tables, errors)) => {
93 let mut sdk_tables = Vec::new();
94 for (idx, table) in tables.iter().enumerate() {
95 sdk_tables.push(TableData {
96 table_index: idx,
97 name: Some(table.name.clone()),
98 columns: table
99 .columns
100 .iter()
101 .map(|c| super::ColumnData {
102 name: c.name.clone(),
103 data_type: c.data_type.clone(),
104 physical_type: c.physical_type.clone(),
105 nullable: c.nullable,
106 primary_key: c.primary_key,
107 description: if c.description.is_empty() {
108 None
109 } else {
110 Some(c.description.clone())
111 },
112 quality: if c.quality.is_empty() {
113 None
114 } else {
115 Some(c.quality.clone())
116 },
117 relationships: c.relationships.clone(),
118 enum_values: if c.enum_values.is_empty() {
119 None
120 } else {
121 Some(c.enum_values.clone())
122 },
123 })
124 .collect(),
125 });
126 }
127 let sdk_errors: Vec<ImportError> = errors
128 .iter()
129 .map(|e| ImportError::ParseError(e.message.clone()))
130 .collect();
131 Ok(ImportResult {
132 tables: sdk_tables,
133 tables_requiring_name: Vec::new(),
134 errors: sdk_errors,
135 ai_suggestions: None,
136 })
137 }
138 Err(e) => Err(ImportError::ParseError(e.to_string())),
139 }
140 }
141
142 fn parse(&self, json_content: &str) -> Result<(Vec<Table>, Vec<ParserError>)> {
148 let mut errors = Vec::new();
149
150 let schema: Value =
152 serde_json::from_str(json_content).context("Failed to parse JSON Schema")?;
153
154 let mut tables = Vec::new();
155
156 if let Some(definitions) = schema.get("definitions").and_then(|v| v.as_object()) {
158 for (name, def_schema) in definitions {
160 match self.parse_schema(def_schema, Some(name), &mut errors) {
161 Ok(table) => tables.push(table),
162 Err(e) => {
163 errors.push(ParserError {
164 error_type: "parse_error".to_string(),
165 field: Some(format!("definitions.{}", name)),
166 message: format!("Failed to parse schema: {}", e),
167 });
168 }
169 }
170 }
171 } else {
172 match self.parse_schema(&schema, None, &mut errors) {
174 Ok(table) => tables.push(table),
175 Err(e) => {
176 errors.push(ParserError {
177 error_type: "parse_error".to_string(),
178 field: None,
179 message: format!("Failed to parse schema: {}", e),
180 });
181 }
182 }
183 }
184
185 Ok((tables, errors))
186 }
187
188 fn parse_schema(
190 &self,
191 schema: &Value,
192 name_override: Option<&str>,
193 errors: &mut Vec<ParserError>,
194 ) -> Result<Table> {
195 let schema_obj = schema
196 .as_object()
197 .ok_or_else(|| anyhow::anyhow!("Schema must be an object"))?;
198
199 let name = name_override
201 .map(|s| s.to_string())
202 .or_else(|| {
203 schema_obj
204 .get("title")
205 .or_else(|| schema_obj.get("name"))
206 .and_then(|v| v.as_str())
207 .map(|s| s.to_string())
208 })
209 .ok_or_else(|| anyhow::anyhow!("Missing required field: title or name"))?;
210
211 if let Err(e) = validate_table_name(&name) {
213 warn!("Table name validation warning for '{}': {}", name, e);
214 }
215
216 let description = schema_obj
218 .get("description")
219 .and_then(|v| v.as_str())
220 .map(|s| s.to_string())
221 .unwrap_or_default();
222
223 let properties = schema_obj
225 .get("properties")
226 .and_then(|v| v.as_object())
227 .ok_or_else(|| anyhow::anyhow!("Missing required field: properties"))?;
228
229 let required_fields: Vec<String> = schema_obj
231 .get("required")
232 .and_then(|v| v.as_array())
233 .map(|arr| {
234 arr.iter()
235 .filter_map(|v| v.as_str().map(|s| s.to_string()))
236 .collect()
237 })
238 .unwrap_or_default();
239
240 let mut columns = Vec::new();
241 for (prop_name, prop_schema) in properties {
242 let nullable = !required_fields.contains(prop_name);
243 match self.parse_property(prop_name, prop_schema, nullable, errors) {
244 Ok(mut cols) => columns.append(&mut cols),
245 Err(e) => {
246 errors.push(ParserError {
247 error_type: "parse_error".to_string(),
248 field: Some(format!("properties.{}", prop_name)),
249 message: format!("Failed to parse property: {}", e),
250 });
251 }
252 }
253 }
254
255 let mut tags: Vec<Tag> = Vec::new();
257 if let Some(tags_arr) = schema_obj.get("tags").and_then(|v| v.as_array()) {
258 for item in tags_arr {
259 if let Some(s) = item.as_str() {
260 if let Ok(tag) = Tag::from_str(s) {
261 tags.push(tag);
262 } else {
263 tags.push(Tag::Simple(s.to_string()));
264 }
265 }
266 }
267 }
268 if let Some(custom_props) = schema_obj
270 .get("customProperties")
271 .and_then(|v| v.as_object())
272 && let Some(tags_val) = custom_props.get("tags")
273 && let Some(tags_arr) = tags_val.as_array()
274 {
275 for item in tags_arr {
276 if let Some(s) = item.as_str() {
277 if let Ok(tag) = Tag::from_str(s) {
278 if !tags.contains(&tag) {
279 tags.push(tag);
280 }
281 } else {
282 let simple_tag = Tag::Simple(s.to_string());
283 if !tags.contains(&simple_tag) {
284 tags.push(simple_tag);
285 }
286 }
287 }
288 }
289 }
290
291 let mut odcl_metadata = HashMap::new();
293 if !description.is_empty() {
294 odcl_metadata.insert("description".to_string(), json!(description));
295 }
296
297 let table = Table {
298 id: crate::models::table::Table::generate_id(&name, None, None, None),
299 name: name.clone(),
300 columns,
301 database_type: None,
302 catalog_name: None,
303 schema_name: None,
304 medallion_layers: Vec::new(),
305 scd_pattern: None,
306 data_vault_classification: None,
307 modeling_level: None,
308 tags,
309 odcl_metadata,
310 owner: None,
311 sla: None,
312 contact_details: None,
313 infrastructure_type: None,
314 notes: None,
315 position: None,
316 yaml_file_path: None,
317 drawio_cell_id: None,
318 quality: Vec::new(),
319 errors: Vec::new(),
320 created_at: chrono::Utc::now(),
321 updated_at: chrono::Utc::now(),
322 };
323
324 info!(
325 "Parsed JSON Schema: {} with {} columns",
326 name,
327 table.columns.len()
328 );
329 Ok(table)
330 }
331
332 fn parse_property(
334 &self,
335 prop_name: &str,
336 prop_schema: &Value,
337 nullable: bool,
338 errors: &mut Vec<ParserError>,
339 ) -> Result<Vec<Column>> {
340 if let Err(e) = validate_column_name(prop_name) {
342 warn!("Column name validation warning for '{}': {}", prop_name, e);
343 }
344
345 let prop_obj = prop_schema
346 .as_object()
347 .ok_or_else(|| anyhow::anyhow!("Property schema must be an object"))?;
348
349 if let Some(ref_path) = prop_obj.get("$ref").and_then(|v| v.as_str()) {
351 let description = prop_obj
353 .get("description")
354 .and_then(|v| v.as_str())
355 .map(|s| s.to_string())
356 .unwrap_or_default();
357
358 let quality_rules = self.extract_validation_keywords(prop_obj, prop_name);
359
360 return Ok(vec![Column {
361 name: prop_name.to_string(),
362 data_type: "STRING".to_string(), physical_type: None,
364 nullable,
365 primary_key: false,
366 secondary_key: false,
367 composite_key: None,
368 foreign_key: None,
369 constraints: Vec::new(),
370 description,
371 quality: quality_rules,
372 relationships: ref_to_relationships(&Some(ref_path.to_string())),
373 enum_values: Vec::new(),
374 errors: Vec::new(),
375 column_order: 0,
376 nested_data: None,
377 }]);
378 }
379
380 let prop_type = prop_obj
381 .get("type")
382 .and_then(|v| v.as_str())
383 .ok_or_else(|| anyhow::anyhow!("Property missing type"))?;
384
385 let mapped_type = self.map_json_type_to_sql(prop_type);
387 if let Err(e) = validate_data_type(&mapped_type) {
388 warn!("Data type validation warning for '{}': {}", mapped_type, e);
389 }
390
391 let description = prop_obj
392 .get("description")
393 .and_then(|v| v.as_str())
394 .map(|s| s.to_string())
395 .unwrap_or_default();
396
397 let quality_rules = self.extract_validation_keywords(prop_obj, prop_name);
399 let enum_values = self.extract_enum_values(prop_obj);
400
401 let mut columns = Vec::new();
402
403 match prop_type {
404 "object" => {
405 if let Some(nested_props) = prop_obj.get("properties").and_then(|v| v.as_object()) {
407 let nested_required: Vec<String> = prop_obj
408 .get("required")
409 .and_then(|v| v.as_array())
410 .map(|arr| {
411 arr.iter()
412 .filter_map(|v| v.as_str().map(|s| s.to_string()))
413 .collect()
414 })
415 .unwrap_or_default();
416
417 for (nested_name, nested_schema) in nested_props {
418 let nested_nullable = !nested_required.contains(nested_name);
419 match self.parse_property(
420 nested_name,
421 nested_schema,
422 nested_nullable,
423 errors,
424 ) {
425 Ok(mut nested_cols) => {
426 for col in nested_cols.iter_mut() {
428 col.name = format!("{}.{}", prop_name, col.name);
429 }
430 columns.append(&mut nested_cols);
431 }
432 Err(e) => {
433 errors.push(ParserError {
434 error_type: "parse_error".to_string(),
435 field: Some(format!("{}.{}", prop_name, nested_name)),
436 message: format!("Failed to parse nested property: {}", e),
437 });
438 }
439 }
440 }
441 let object_quality = self.extract_validation_keywords(prop_obj, prop_name);
444 if !object_quality.is_empty() && !columns.is_empty() {
445 columns[0].quality.extend(object_quality);
447 }
448 } else {
449 let struct_quality = self.extract_validation_keywords(prop_obj, prop_name);
451 columns.push(Column {
452 name: prop_name.to_string(),
453 data_type: "STRUCT".to_string(),
454 physical_type: None,
455 nullable,
456 primary_key: false,
457 secondary_key: false,
458 composite_key: None,
459 foreign_key: None,
460 constraints: Vec::new(),
461 description,
462 quality: struct_quality,
463 relationships: Vec::new(),
464 enum_values: Vec::new(),
465 errors: Vec::new(),
466 column_order: 0,
467 nested_data: None,
468 });
469 }
470 }
471 "array" => {
472 let items = prop_obj
474 .get("items")
475 .ok_or_else(|| anyhow::anyhow!("Array property missing items"))?;
476
477 let data_type = if let Some(items_str) = items.get("type").and_then(|v| v.as_str())
478 {
479 if items_str == "object" {
480 if let Some(nested_props) =
482 items.get("properties").and_then(|v| v.as_object())
483 {
484 let nested_required: Vec<String> = items
485 .get("required")
486 .and_then(|v| v.as_array())
487 .map(|arr| {
488 arr.iter()
489 .filter_map(|v| v.as_str().map(|s| s.to_string()))
490 .collect()
491 })
492 .unwrap_or_default();
493
494 for (nested_name, nested_schema) in nested_props {
495 let nested_nullable = !nested_required.contains(nested_name);
496 match self.parse_property(
497 nested_name,
498 nested_schema,
499 nested_nullable,
500 errors,
501 ) {
502 Ok(mut nested_cols) => {
503 for col in nested_cols.iter_mut() {
504 col.name = format!("{}.{}", prop_name, col.name);
505 }
506 columns.append(&mut nested_cols);
507 }
508 Err(e) => {
509 errors.push(ParserError {
510 error_type: "parse_error".to_string(),
511 field: Some(format!("{}.{}", prop_name, nested_name)),
512 message: format!(
513 "Failed to parse array item property: {}",
514 e
515 ),
516 });
517 }
518 }
519 }
520 return Ok(columns);
521 } else {
522 "ARRAY<STRUCT>".to_string()
523 }
524 } else {
525 format!("ARRAY<{}>", self.map_json_type_to_sql(items_str))
526 }
527 } else {
528 "ARRAY<STRING>".to_string()
529 };
530
531 let mut array_quality = self.extract_validation_keywords(prop_obj, prop_name);
533 if let Some(items_obj) = items.as_object() {
535 let items_quality = self.extract_validation_keywords(items_obj, prop_name);
536 array_quality.extend(items_quality);
537 }
538
539 columns.push(Column {
540 name: prop_name.to_string(),
541 data_type,
542 physical_type: None,
543 nullable,
544 primary_key: false,
545 secondary_key: false,
546 composite_key: None,
547 foreign_key: None,
548 constraints: Vec::new(),
549 description,
550 quality: array_quality,
551 relationships: Vec::new(),
552 enum_values: Vec::new(),
553 errors: Vec::new(),
554 column_order: 0,
555 nested_data: None,
556 });
557 }
558 _ => {
559 let data_type = self.map_json_type_to_sql(prop_type);
561 columns.push(Column {
562 name: prop_name.to_string(),
563 data_type,
564 physical_type: None,
565 nullable,
566 primary_key: false,
567 secondary_key: false,
568 composite_key: None,
569 foreign_key: None,
570 constraints: Vec::new(),
571 description,
572 quality: quality_rules,
573 relationships: Vec::new(),
574 enum_values: enum_values.clone(),
575 errors: Vec::new(),
576 column_order: 0,
577 nested_data: None,
578 });
579 }
580 }
581
582 Ok(columns)
583 }
584
585 fn map_json_type_to_sql(&self, json_type: &str) -> String {
587 match json_type {
588 "integer" => "INTEGER".to_string(),
589 "number" => "DOUBLE".to_string(),
590 "boolean" => "BOOLEAN".to_string(),
591 "string" => "STRING".to_string(),
592 "null" => "NULL".to_string(),
593 _ => "STRING".to_string(), }
595 }
596
597 fn extract_validation_keywords(
599 &self,
600 prop_obj: &serde_json::Map<String, Value>,
601 _prop_name: &str,
602 ) -> Vec<HashMap<String, serde_json::Value>> {
603 let mut quality_rules = Vec::new();
604
605 if let Some(pattern) = prop_obj.get("pattern").and_then(|v| v.as_str()) {
607 let mut rule = HashMap::new();
608 rule.insert("type".to_string(), json!("pattern"));
609 rule.insert("pattern".to_string(), json!(pattern));
610 rule.insert("source".to_string(), json!("json_schema"));
611 quality_rules.push(rule);
612 }
613
614 if let Some(minimum) = prop_obj.get("minimum") {
616 let mut rule = HashMap::new();
617 rule.insert("type".to_string(), json!("minimum"));
618 rule.insert("value".to_string(), minimum.clone());
619 rule.insert("source".to_string(), json!("json_schema"));
620 if let Some(exclusive_minimum) = prop_obj.get("exclusiveMinimum") {
621 rule.insert("exclusive".to_string(), exclusive_minimum.clone());
622 }
623 quality_rules.push(rule);
624 }
625
626 if let Some(maximum) = prop_obj.get("maximum") {
628 let mut rule = HashMap::new();
629 rule.insert("type".to_string(), json!("maximum"));
630 rule.insert("value".to_string(), maximum.clone());
631 rule.insert("source".to_string(), json!("json_schema"));
632 if let Some(exclusive_maximum) = prop_obj.get("exclusiveMaximum") {
633 rule.insert("exclusive".to_string(), exclusive_maximum.clone());
634 }
635 quality_rules.push(rule);
636 }
637
638 if let Some(min_length) = prop_obj.get("minLength").and_then(|v| v.as_u64()) {
640 let mut rule = HashMap::new();
641 rule.insert("type".to_string(), json!("minLength"));
642 rule.insert("value".to_string(), json!(min_length));
643 rule.insert("source".to_string(), json!("json_schema"));
644 quality_rules.push(rule);
645 }
646
647 if let Some(max_length) = prop_obj.get("maxLength").and_then(|v| v.as_u64()) {
649 let mut rule = HashMap::new();
650 rule.insert("type".to_string(), json!("maxLength"));
651 rule.insert("value".to_string(), json!(max_length));
652 rule.insert("source".to_string(), json!("json_schema"));
653 quality_rules.push(rule);
654 }
655
656 if let Some(multiple_of) = prop_obj.get("multipleOf") {
658 let mut rule = HashMap::new();
659 rule.insert("type".to_string(), json!("multipleOf"));
660 rule.insert("value".to_string(), multiple_of.clone());
661 rule.insert("source".to_string(), json!("json_schema"));
662 quality_rules.push(rule);
663 }
664
665 if let Some(const_val) = prop_obj.get("const") {
667 let mut rule = HashMap::new();
668 rule.insert("type".to_string(), json!("const"));
669 rule.insert("value".to_string(), const_val.clone());
670 rule.insert("source".to_string(), json!("json_schema"));
671 quality_rules.push(rule);
672 }
673
674 if let Some(min_items) = prop_obj.get("minItems").and_then(|v| v.as_u64()) {
676 let mut rule = HashMap::new();
677 rule.insert("type".to_string(), json!("minItems"));
678 rule.insert("value".to_string(), json!(min_items));
679 rule.insert("source".to_string(), json!("json_schema"));
680 quality_rules.push(rule);
681 }
682
683 if let Some(max_items) = prop_obj.get("maxItems").and_then(|v| v.as_u64()) {
685 let mut rule = HashMap::new();
686 rule.insert("type".to_string(), json!("maxItems"));
687 rule.insert("value".to_string(), json!(max_items));
688 rule.insert("source".to_string(), json!("json_schema"));
689 quality_rules.push(rule);
690 }
691
692 if let Some(unique_items) = prop_obj.get("uniqueItems").and_then(|v| v.as_bool())
694 && unique_items
695 {
696 let mut rule = HashMap::new();
697 rule.insert("type".to_string(), json!("uniqueItems"));
698 rule.insert("value".to_string(), json!(true));
699 rule.insert("source".to_string(), json!("json_schema"));
700 quality_rules.push(rule);
701 }
702
703 if let Some(min_props) = prop_obj.get("minProperties").and_then(|v| v.as_u64()) {
705 let mut rule = HashMap::new();
706 rule.insert("type".to_string(), json!("minProperties"));
707 rule.insert("value".to_string(), json!(min_props));
708 rule.insert("source".to_string(), json!("json_schema"));
709 quality_rules.push(rule);
710 }
711
712 if let Some(max_props) = prop_obj.get("maxProperties").and_then(|v| v.as_u64()) {
714 let mut rule = HashMap::new();
715 rule.insert("type".to_string(), json!("maxProperties"));
716 rule.insert("value".to_string(), json!(max_props));
717 rule.insert("source".to_string(), json!("json_schema"));
718 quality_rules.push(rule);
719 }
720
721 if let Some(additional_props) = prop_obj.get("additionalProperties") {
723 let mut rule = HashMap::new();
724 rule.insert("type".to_string(), json!("additionalProperties"));
725 rule.insert("value".to_string(), additional_props.clone());
726 rule.insert("source".to_string(), json!("json_schema"));
727 quality_rules.push(rule);
728 }
729
730 if let Some(format_val) = prop_obj.get("format").and_then(|v| v.as_str()) {
732 let mut rule = HashMap::new();
733 rule.insert("type".to_string(), json!("format"));
734 rule.insert("value".to_string(), json!(format_val));
735 rule.insert("source".to_string(), json!("json_schema"));
736 quality_rules.push(rule);
737 }
738
739 for keyword in &["allOf", "anyOf", "oneOf", "not"] {
741 if let Some(value) = prop_obj.get(*keyword) {
742 let mut rule = HashMap::new();
743 rule.insert("type".to_string(), json!(*keyword));
744 rule.insert("value".to_string(), value.clone());
745 rule.insert("source".to_string(), json!("json_schema"));
746 quality_rules.push(rule);
747 }
748 }
749
750 quality_rules
751 }
752
753 fn extract_enum_values(&self, prop_obj: &serde_json::Map<String, Value>) -> Vec<String> {
755 prop_obj
756 .get("enum")
757 .and_then(|v| v.as_array())
758 .map(|arr| {
759 arr.iter()
760 .filter_map(|v| {
761 match v {
763 Value::String(s) => Some(s.clone()),
764 Value::Number(n) => Some(n.to_string()),
765 Value::Bool(b) => Some(b.to_string()),
766 Value::Null => Some("null".to_string()),
767 _ => serde_json::to_string(v).ok(),
768 }
769 })
770 .collect()
771 })
772 .unwrap_or_default()
773 }
774}
775
776#[derive(Debug, Clone)]
778pub struct ParserError {
779 pub error_type: String,
780 pub field: Option<String>,
781 pub message: String,
782}