1use super::odcs_shared::column_to_column_data;
10use super::{ImportError, ImportResult, TableData};
11use crate::models::{Column, PropertyRelationship, Table, Tag};
12use crate::validation::input::{validate_column_name, validate_data_type, validate_table_name};
13use anyhow::{Context, Result};
14use serde_json::{Value, json};
15use std::collections::HashMap;
16use std::str::FromStr;
17use tracing::{info, warn};
18
19fn ref_to_relationships(ref_path: &Option<String>) -> Vec<PropertyRelationship> {
21 match ref_path {
22 Some(ref_str) => {
23 let to = if ref_str.starts_with("#/definitions/") {
24 let def_path = ref_str.strip_prefix("#/definitions/").unwrap_or(ref_str);
25 format!("definitions/{}", def_path)
26 } else if ref_str.starts_with("#/") {
27 ref_str.strip_prefix("#/").unwrap_or(ref_str).to_string()
28 } else {
29 ref_str.clone()
30 };
31 vec![PropertyRelationship {
32 relationship_type: "foreignKey".to_string(),
33 to,
34 }]
35 }
36 None => Vec::new(),
37 }
38}
39
40pub struct JSONSchemaImporter;
42
43impl Default for JSONSchemaImporter {
44 fn default() -> Self {
45 Self::new()
46 }
47}
48
49impl JSONSchemaImporter {
50 pub fn new() -> Self {
60 Self
61 }
62
63 pub fn import(&self, json_content: &str) -> Result<ImportResult, ImportError> {
92 match self.parse(json_content) {
93 Ok((tables, errors)) => {
94 let mut sdk_tables = Vec::new();
95 for (idx, table) in tables.iter().enumerate() {
96 sdk_tables.push(TableData {
97 table_index: idx,
98 id: Some(table.id.to_string()),
99 name: Some(table.name.clone()),
100 columns: table.columns.iter().map(column_to_column_data).collect(),
101 ..Default::default()
102 });
103 }
104 let sdk_errors: Vec<ImportError> = errors
105 .iter()
106 .map(|e| ImportError::ParseError(e.message.clone()))
107 .collect();
108 Ok(ImportResult {
109 tables: sdk_tables,
110 tables_requiring_name: Vec::new(),
111 errors: sdk_errors,
112 ai_suggestions: None,
113 })
114 }
115 Err(e) => Err(ImportError::ParseError(e.to_string())),
116 }
117 }
118
119 fn parse(&self, json_content: &str) -> Result<(Vec<Table>, Vec<ParserError>)> {
125 let mut errors = Vec::new();
126
127 let schema: Value =
129 serde_json::from_str(json_content).context("Failed to parse JSON Schema")?;
130
131 let mut tables = Vec::new();
132
133 if let Some(definitions) = schema.get("definitions").and_then(|v| v.as_object()) {
135 for (name, def_schema) in definitions {
137 match self.parse_schema(def_schema, Some(name), &mut errors) {
138 Ok(table) => tables.push(table),
139 Err(e) => {
140 errors.push(ParserError {
141 error_type: "parse_error".to_string(),
142 field: Some(format!("definitions.{}", name)),
143 message: format!("Failed to parse schema: {}", e),
144 });
145 }
146 }
147 }
148 } else {
149 match self.parse_schema(&schema, None, &mut errors) {
151 Ok(table) => tables.push(table),
152 Err(e) => {
153 errors.push(ParserError {
154 error_type: "parse_error".to_string(),
155 field: None,
156 message: format!("Failed to parse schema: {}", e),
157 });
158 }
159 }
160 }
161
162 Ok((tables, errors))
163 }
164
165 fn parse_schema(
167 &self,
168 schema: &Value,
169 name_override: Option<&str>,
170 errors: &mut Vec<ParserError>,
171 ) -> Result<Table> {
172 let schema_obj = schema
173 .as_object()
174 .ok_or_else(|| anyhow::anyhow!("Schema must be an object"))?;
175
176 let name = name_override
178 .map(|s| s.to_string())
179 .or_else(|| {
180 schema_obj
181 .get("title")
182 .or_else(|| schema_obj.get("name"))
183 .and_then(|v| v.as_str())
184 .map(|s| s.to_string())
185 })
186 .ok_or_else(|| anyhow::anyhow!("Missing required field: title or name"))?;
187
188 if let Err(e) = validate_table_name(&name) {
190 warn!("Table name validation warning for '{}': {}", name, e);
191 }
192
193 let description = schema_obj
195 .get("description")
196 .and_then(|v| v.as_str())
197 .map(|s| s.to_string())
198 .unwrap_or_default();
199
200 let properties = schema_obj
202 .get("properties")
203 .and_then(|v| v.as_object())
204 .ok_or_else(|| anyhow::anyhow!("Missing required field: properties"))?;
205
206 let required_fields: Vec<String> = schema_obj
208 .get("required")
209 .and_then(|v| v.as_array())
210 .map(|arr| {
211 arr.iter()
212 .filter_map(|v| v.as_str().map(|s| s.to_string()))
213 .collect()
214 })
215 .unwrap_or_default();
216
217 let mut columns = Vec::new();
218 for (prop_name, prop_schema) in properties {
219 let nullable = !required_fields.contains(prop_name);
220 match self.parse_property(prop_name, prop_schema, nullable, errors) {
221 Ok(mut cols) => columns.append(&mut cols),
222 Err(e) => {
223 errors.push(ParserError {
224 error_type: "parse_error".to_string(),
225 field: Some(format!("properties.{}", prop_name)),
226 message: format!("Failed to parse property: {}", e),
227 });
228 }
229 }
230 }
231
232 let mut tags: Vec<Tag> = Vec::new();
234 if let Some(tags_arr) = schema_obj.get("tags").and_then(|v| v.as_array()) {
235 for item in tags_arr {
236 if let Some(s) = item.as_str() {
237 if let Ok(tag) = Tag::from_str(s) {
238 tags.push(tag);
239 } else {
240 tags.push(Tag::Simple(s.to_string()));
241 }
242 }
243 }
244 }
245 if let Some(custom_props) = schema_obj
247 .get("customProperties")
248 .and_then(|v| v.as_object())
249 && let Some(tags_val) = custom_props.get("tags")
250 && let Some(tags_arr) = tags_val.as_array()
251 {
252 for item in tags_arr {
253 if let Some(s) = item.as_str() {
254 if let Ok(tag) = Tag::from_str(s) {
255 if !tags.contains(&tag) {
256 tags.push(tag);
257 }
258 } else {
259 let simple_tag = Tag::Simple(s.to_string());
260 if !tags.contains(&simple_tag) {
261 tags.push(simple_tag);
262 }
263 }
264 }
265 }
266 }
267
268 let mut odcl_metadata = HashMap::new();
270 if !description.is_empty() {
271 odcl_metadata.insert("description".to_string(), json!(description));
272 }
273
274 let table = Table {
275 id: crate::models::table::Table::generate_id(&name, None, None, None),
276 name: name.clone(),
277 columns,
278 database_type: None,
279 catalog_name: None,
280 schema_name: None,
281 medallion_layers: Vec::new(),
282 scd_pattern: None,
283 data_vault_classification: None,
284 modeling_level: None,
285 tags,
286 odcl_metadata,
287 owner: None,
288 sla: None,
289 contact_details: None,
290 infrastructure_type: None,
291 notes: None,
292 position: None,
293 yaml_file_path: None,
294 drawio_cell_id: None,
295 quality: Vec::new(),
296 errors: Vec::new(),
297 created_at: chrono::Utc::now(),
298 updated_at: chrono::Utc::now(),
299 };
300
301 info!(
302 "Parsed JSON Schema: {} with {} columns",
303 name,
304 table.columns.len()
305 );
306 Ok(table)
307 }
308
309 fn parse_property(
311 &self,
312 prop_name: &str,
313 prop_schema: &Value,
314 nullable: bool,
315 errors: &mut Vec<ParserError>,
316 ) -> Result<Vec<Column>> {
317 if let Err(e) = validate_column_name(prop_name) {
319 warn!("Column name validation warning for '{}': {}", prop_name, e);
320 }
321
322 let prop_obj = prop_schema
323 .as_object()
324 .ok_or_else(|| anyhow::anyhow!("Property schema must be an object"))?;
325
326 if let Some(ref_path) = prop_obj.get("$ref").and_then(|v| v.as_str()) {
328 let description = prop_obj
330 .get("description")
331 .and_then(|v| v.as_str())
332 .map(|s| s.to_string())
333 .unwrap_or_default();
334
335 let quality_rules = self.extract_validation_keywords(prop_obj, prop_name);
336
337 return Ok(vec![Column {
338 name: prop_name.to_string(),
339 data_type: "STRING".to_string(), nullable,
341 description,
342 quality: quality_rules,
343 relationships: ref_to_relationships(&Some(ref_path.to_string())),
344 ..Default::default()
345 }]);
346 }
347
348 let prop_type = prop_obj
349 .get("type")
350 .and_then(|v| v.as_str())
351 .ok_or_else(|| anyhow::anyhow!("Property missing type"))?;
352
353 let mapped_type = self.map_json_type_to_sql(prop_type);
355 if let Err(e) = validate_data_type(&mapped_type) {
356 warn!("Data type validation warning for '{}': {}", mapped_type, e);
357 }
358
359 let description = prop_obj
360 .get("description")
361 .and_then(|v| v.as_str())
362 .map(|s| s.to_string())
363 .unwrap_or_default();
364
365 let quality_rules = self.extract_validation_keywords(prop_obj, prop_name);
367 let enum_values = self.extract_enum_values(prop_obj);
368
369 let mut columns = Vec::new();
370
371 match prop_type {
372 "object" => {
373 if let Some(nested_props) = prop_obj.get("properties").and_then(|v| v.as_object()) {
375 let nested_required: Vec<String> = prop_obj
376 .get("required")
377 .and_then(|v| v.as_array())
378 .map(|arr| {
379 arr.iter()
380 .filter_map(|v| v.as_str().map(|s| s.to_string()))
381 .collect()
382 })
383 .unwrap_or_default();
384
385 for (nested_name, nested_schema) in nested_props {
386 let nested_nullable = !nested_required.contains(nested_name);
387 match self.parse_property(
388 nested_name,
389 nested_schema,
390 nested_nullable,
391 errors,
392 ) {
393 Ok(mut nested_cols) => {
394 for col in nested_cols.iter_mut() {
396 col.name = format!("{}.{}", prop_name, col.name);
397 }
398 columns.append(&mut nested_cols);
399 }
400 Err(e) => {
401 errors.push(ParserError {
402 error_type: "parse_error".to_string(),
403 field: Some(format!("{}.{}", prop_name, nested_name)),
404 message: format!("Failed to parse nested property: {}", e),
405 });
406 }
407 }
408 }
409 let object_quality = self.extract_validation_keywords(prop_obj, prop_name);
412 if !object_quality.is_empty() && !columns.is_empty() {
413 columns[0].quality.extend(object_quality);
415 }
416 } else {
417 let struct_quality = self.extract_validation_keywords(prop_obj, prop_name);
419 columns.push(Column {
420 name: prop_name.to_string(),
421 data_type: "STRUCT".to_string(),
422 nullable,
423 description,
424 quality: struct_quality,
425 ..Default::default()
426 });
427 }
428 }
429 "array" => {
430 let items = prop_obj
432 .get("items")
433 .ok_or_else(|| anyhow::anyhow!("Array property missing items"))?;
434
435 let data_type = if let Some(items_str) = items.get("type").and_then(|v| v.as_str())
436 {
437 if items_str == "object" {
438 if let Some(nested_props) =
440 items.get("properties").and_then(|v| v.as_object())
441 {
442 let nested_required: Vec<String> = items
443 .get("required")
444 .and_then(|v| v.as_array())
445 .map(|arr| {
446 arr.iter()
447 .filter_map(|v| v.as_str().map(|s| s.to_string()))
448 .collect()
449 })
450 .unwrap_or_default();
451
452 for (nested_name, nested_schema) in nested_props {
453 let nested_nullable = !nested_required.contains(nested_name);
454 match self.parse_property(
455 nested_name,
456 nested_schema,
457 nested_nullable,
458 errors,
459 ) {
460 Ok(mut nested_cols) => {
461 for col in nested_cols.iter_mut() {
462 col.name = format!("{}.{}", prop_name, col.name);
463 }
464 columns.append(&mut nested_cols);
465 }
466 Err(e) => {
467 errors.push(ParserError {
468 error_type: "parse_error".to_string(),
469 field: Some(format!("{}.{}", prop_name, nested_name)),
470 message: format!(
471 "Failed to parse array item property: {}",
472 e
473 ),
474 });
475 }
476 }
477 }
478 return Ok(columns);
479 } else {
480 "ARRAY<STRUCT>".to_string()
481 }
482 } else {
483 format!("ARRAY<{}>", self.map_json_type_to_sql(items_str))
484 }
485 } else {
486 "ARRAY<STRING>".to_string()
487 };
488
489 let mut array_quality = self.extract_validation_keywords(prop_obj, prop_name);
491 if let Some(items_obj) = items.as_object() {
493 let items_quality = self.extract_validation_keywords(items_obj, prop_name);
494 array_quality.extend(items_quality);
495 }
496
497 columns.push(Column {
498 name: prop_name.to_string(),
499 data_type,
500 nullable,
501 description,
502 quality: array_quality,
503 ..Default::default()
504 });
505 }
506 _ => {
507 let data_type = self.map_json_type_to_sql(prop_type);
509 columns.push(Column {
510 name: prop_name.to_string(),
511 data_type,
512 nullable,
513 description,
514 quality: quality_rules,
515 enum_values: enum_values.clone(),
516 ..Default::default()
517 });
518 }
519 }
520
521 Ok(columns)
522 }
523
524 fn map_json_type_to_sql(&self, json_type: &str) -> String {
526 match json_type {
527 "integer" => "INTEGER".to_string(),
528 "number" => "DOUBLE".to_string(),
529 "boolean" => "BOOLEAN".to_string(),
530 "string" => "STRING".to_string(),
531 "null" => "NULL".to_string(),
532 _ => "STRING".to_string(), }
534 }
535
536 fn extract_validation_keywords(
538 &self,
539 prop_obj: &serde_json::Map<String, Value>,
540 _prop_name: &str,
541 ) -> Vec<HashMap<String, serde_json::Value>> {
542 let mut quality_rules = Vec::new();
543
544 if let Some(pattern) = prop_obj.get("pattern").and_then(|v| v.as_str()) {
546 let mut rule = HashMap::new();
547 rule.insert("type".to_string(), json!("pattern"));
548 rule.insert("pattern".to_string(), json!(pattern));
549 rule.insert("source".to_string(), json!("json_schema"));
550 quality_rules.push(rule);
551 }
552
553 if let Some(minimum) = prop_obj.get("minimum") {
555 let mut rule = HashMap::new();
556 rule.insert("type".to_string(), json!("minimum"));
557 rule.insert("value".to_string(), minimum.clone());
558 rule.insert("source".to_string(), json!("json_schema"));
559 if let Some(exclusive_minimum) = prop_obj.get("exclusiveMinimum") {
560 rule.insert("exclusive".to_string(), exclusive_minimum.clone());
561 }
562 quality_rules.push(rule);
563 }
564
565 if let Some(maximum) = prop_obj.get("maximum") {
567 let mut rule = HashMap::new();
568 rule.insert("type".to_string(), json!("maximum"));
569 rule.insert("value".to_string(), maximum.clone());
570 rule.insert("source".to_string(), json!("json_schema"));
571 if let Some(exclusive_maximum) = prop_obj.get("exclusiveMaximum") {
572 rule.insert("exclusive".to_string(), exclusive_maximum.clone());
573 }
574 quality_rules.push(rule);
575 }
576
577 if let Some(min_length) = prop_obj.get("minLength").and_then(|v| v.as_u64()) {
579 let mut rule = HashMap::new();
580 rule.insert("type".to_string(), json!("minLength"));
581 rule.insert("value".to_string(), json!(min_length));
582 rule.insert("source".to_string(), json!("json_schema"));
583 quality_rules.push(rule);
584 }
585
586 if let Some(max_length) = prop_obj.get("maxLength").and_then(|v| v.as_u64()) {
588 let mut rule = HashMap::new();
589 rule.insert("type".to_string(), json!("maxLength"));
590 rule.insert("value".to_string(), json!(max_length));
591 rule.insert("source".to_string(), json!("json_schema"));
592 quality_rules.push(rule);
593 }
594
595 if let Some(multiple_of) = prop_obj.get("multipleOf") {
597 let mut rule = HashMap::new();
598 rule.insert("type".to_string(), json!("multipleOf"));
599 rule.insert("value".to_string(), multiple_of.clone());
600 rule.insert("source".to_string(), json!("json_schema"));
601 quality_rules.push(rule);
602 }
603
604 if let Some(const_val) = prop_obj.get("const") {
606 let mut rule = HashMap::new();
607 rule.insert("type".to_string(), json!("const"));
608 rule.insert("value".to_string(), const_val.clone());
609 rule.insert("source".to_string(), json!("json_schema"));
610 quality_rules.push(rule);
611 }
612
613 if let Some(min_items) = prop_obj.get("minItems").and_then(|v| v.as_u64()) {
615 let mut rule = HashMap::new();
616 rule.insert("type".to_string(), json!("minItems"));
617 rule.insert("value".to_string(), json!(min_items));
618 rule.insert("source".to_string(), json!("json_schema"));
619 quality_rules.push(rule);
620 }
621
622 if let Some(max_items) = prop_obj.get("maxItems").and_then(|v| v.as_u64()) {
624 let mut rule = HashMap::new();
625 rule.insert("type".to_string(), json!("maxItems"));
626 rule.insert("value".to_string(), json!(max_items));
627 rule.insert("source".to_string(), json!("json_schema"));
628 quality_rules.push(rule);
629 }
630
631 if let Some(unique_items) = prop_obj.get("uniqueItems").and_then(|v| v.as_bool())
633 && unique_items
634 {
635 let mut rule = HashMap::new();
636 rule.insert("type".to_string(), json!("uniqueItems"));
637 rule.insert("value".to_string(), json!(true));
638 rule.insert("source".to_string(), json!("json_schema"));
639 quality_rules.push(rule);
640 }
641
642 if let Some(min_props) = prop_obj.get("minProperties").and_then(|v| v.as_u64()) {
644 let mut rule = HashMap::new();
645 rule.insert("type".to_string(), json!("minProperties"));
646 rule.insert("value".to_string(), json!(min_props));
647 rule.insert("source".to_string(), json!("json_schema"));
648 quality_rules.push(rule);
649 }
650
651 if let Some(max_props) = prop_obj.get("maxProperties").and_then(|v| v.as_u64()) {
653 let mut rule = HashMap::new();
654 rule.insert("type".to_string(), json!("maxProperties"));
655 rule.insert("value".to_string(), json!(max_props));
656 rule.insert("source".to_string(), json!("json_schema"));
657 quality_rules.push(rule);
658 }
659
660 if let Some(additional_props) = prop_obj.get("additionalProperties") {
662 let mut rule = HashMap::new();
663 rule.insert("type".to_string(), json!("additionalProperties"));
664 rule.insert("value".to_string(), additional_props.clone());
665 rule.insert("source".to_string(), json!("json_schema"));
666 quality_rules.push(rule);
667 }
668
669 if let Some(format_val) = prop_obj.get("format").and_then(|v| v.as_str()) {
671 let mut rule = HashMap::new();
672 rule.insert("type".to_string(), json!("format"));
673 rule.insert("value".to_string(), json!(format_val));
674 rule.insert("source".to_string(), json!("json_schema"));
675 quality_rules.push(rule);
676 }
677
678 for keyword in &["allOf", "anyOf", "oneOf", "not"] {
680 if let Some(value) = prop_obj.get(*keyword) {
681 let mut rule = HashMap::new();
682 rule.insert("type".to_string(), json!(*keyword));
683 rule.insert("value".to_string(), value.clone());
684 rule.insert("source".to_string(), json!("json_schema"));
685 quality_rules.push(rule);
686 }
687 }
688
689 quality_rules
690 }
691
692 fn extract_enum_values(&self, prop_obj: &serde_json::Map<String, Value>) -> Vec<String> {
694 prop_obj
695 .get("enum")
696 .and_then(|v| v.as_array())
697 .map(|arr| {
698 arr.iter()
699 .filter_map(|v| {
700 match v {
702 Value::String(s) => Some(s.clone()),
703 Value::Number(n) => Some(n.to_string()),
704 Value::Bool(b) => Some(b.to_string()),
705 Value::Null => Some("null".to_string()),
706 _ => serde_json::to_string(v).ok(),
707 }
708 })
709 .collect()
710 })
711 .unwrap_or_default()
712 }
713}
714
715#[derive(Debug, Clone)]
717pub struct ParserError {
718 pub error_type: String,
719 pub field: Option<String>,
720 pub message: String,
721}