1use super::odcs_shared::column_to_column_data;
10use super::{ImportError, ImportResult, TableData};
11use crate::models::{Column, PropertyRelationship, Table, Tag};
12use crate::validation::input::{validate_column_name, validate_data_type, validate_table_name};
13use anyhow::{Context, Result};
14use serde_json::{Value, json};
15use std::collections::HashMap;
16use std::str::FromStr;
17use tracing::{info, warn};
18
19fn ref_to_relationships(ref_path: &Option<String>) -> Vec<PropertyRelationship> {
21 match ref_path {
22 Some(ref_str) => {
23 let to = if ref_str.starts_with("#/definitions/") {
24 let def_path = ref_str.strip_prefix("#/definitions/").unwrap_or(ref_str);
25 format!("definitions/{}", def_path)
26 } else if ref_str.starts_with("#/") {
27 ref_str.strip_prefix("#/").unwrap_or(ref_str).to_string()
28 } else {
29 ref_str.clone()
30 };
31 vec![PropertyRelationship {
32 relationship_type: "foreignKey".to_string(),
33 to,
34 }]
35 }
36 None => Vec::new(),
37 }
38}
39
40pub struct JSONSchemaImporter;
42
43impl Default for JSONSchemaImporter {
44 fn default() -> Self {
45 Self::new()
46 }
47}
48
49impl JSONSchemaImporter {
50 pub fn new() -> Self {
60 Self
61 }
62
63 pub fn import(&self, json_content: &str) -> Result<ImportResult, ImportError> {
92 match self.parse(json_content) {
93 Ok((tables, errors)) => {
94 let mut sdk_tables = Vec::new();
95 for (idx, table) in tables.iter().enumerate() {
96 sdk_tables.push(TableData {
97 table_index: idx,
98 name: Some(table.name.clone()),
99 columns: table.columns.iter().map(column_to_column_data).collect(),
100 });
101 }
102 let sdk_errors: Vec<ImportError> = errors
103 .iter()
104 .map(|e| ImportError::ParseError(e.message.clone()))
105 .collect();
106 Ok(ImportResult {
107 tables: sdk_tables,
108 tables_requiring_name: Vec::new(),
109 errors: sdk_errors,
110 ai_suggestions: None,
111 })
112 }
113 Err(e) => Err(ImportError::ParseError(e.to_string())),
114 }
115 }
116
117 fn parse(&self, json_content: &str) -> Result<(Vec<Table>, Vec<ParserError>)> {
123 let mut errors = Vec::new();
124
125 let schema: Value =
127 serde_json::from_str(json_content).context("Failed to parse JSON Schema")?;
128
129 let mut tables = Vec::new();
130
131 if let Some(definitions) = schema.get("definitions").and_then(|v| v.as_object()) {
133 for (name, def_schema) in definitions {
135 match self.parse_schema(def_schema, Some(name), &mut errors) {
136 Ok(table) => tables.push(table),
137 Err(e) => {
138 errors.push(ParserError {
139 error_type: "parse_error".to_string(),
140 field: Some(format!("definitions.{}", name)),
141 message: format!("Failed to parse schema: {}", e),
142 });
143 }
144 }
145 }
146 } else {
147 match self.parse_schema(&schema, None, &mut errors) {
149 Ok(table) => tables.push(table),
150 Err(e) => {
151 errors.push(ParserError {
152 error_type: "parse_error".to_string(),
153 field: None,
154 message: format!("Failed to parse schema: {}", e),
155 });
156 }
157 }
158 }
159
160 Ok((tables, errors))
161 }
162
163 fn parse_schema(
165 &self,
166 schema: &Value,
167 name_override: Option<&str>,
168 errors: &mut Vec<ParserError>,
169 ) -> Result<Table> {
170 let schema_obj = schema
171 .as_object()
172 .ok_or_else(|| anyhow::anyhow!("Schema must be an object"))?;
173
174 let name = name_override
176 .map(|s| s.to_string())
177 .or_else(|| {
178 schema_obj
179 .get("title")
180 .or_else(|| schema_obj.get("name"))
181 .and_then(|v| v.as_str())
182 .map(|s| s.to_string())
183 })
184 .ok_or_else(|| anyhow::anyhow!("Missing required field: title or name"))?;
185
186 if let Err(e) = validate_table_name(&name) {
188 warn!("Table name validation warning for '{}': {}", name, e);
189 }
190
191 let description = schema_obj
193 .get("description")
194 .and_then(|v| v.as_str())
195 .map(|s| s.to_string())
196 .unwrap_or_default();
197
198 let properties = schema_obj
200 .get("properties")
201 .and_then(|v| v.as_object())
202 .ok_or_else(|| anyhow::anyhow!("Missing required field: properties"))?;
203
204 let required_fields: Vec<String> = schema_obj
206 .get("required")
207 .and_then(|v| v.as_array())
208 .map(|arr| {
209 arr.iter()
210 .filter_map(|v| v.as_str().map(|s| s.to_string()))
211 .collect()
212 })
213 .unwrap_or_default();
214
215 let mut columns = Vec::new();
216 for (prop_name, prop_schema) in properties {
217 let nullable = !required_fields.contains(prop_name);
218 match self.parse_property(prop_name, prop_schema, nullable, errors) {
219 Ok(mut cols) => columns.append(&mut cols),
220 Err(e) => {
221 errors.push(ParserError {
222 error_type: "parse_error".to_string(),
223 field: Some(format!("properties.{}", prop_name)),
224 message: format!("Failed to parse property: {}", e),
225 });
226 }
227 }
228 }
229
230 let mut tags: Vec<Tag> = Vec::new();
232 if let Some(tags_arr) = schema_obj.get("tags").and_then(|v| v.as_array()) {
233 for item in tags_arr {
234 if let Some(s) = item.as_str() {
235 if let Ok(tag) = Tag::from_str(s) {
236 tags.push(tag);
237 } else {
238 tags.push(Tag::Simple(s.to_string()));
239 }
240 }
241 }
242 }
243 if let Some(custom_props) = schema_obj
245 .get("customProperties")
246 .and_then(|v| v.as_object())
247 && let Some(tags_val) = custom_props.get("tags")
248 && let Some(tags_arr) = tags_val.as_array()
249 {
250 for item in tags_arr {
251 if let Some(s) = item.as_str() {
252 if let Ok(tag) = Tag::from_str(s) {
253 if !tags.contains(&tag) {
254 tags.push(tag);
255 }
256 } else {
257 let simple_tag = Tag::Simple(s.to_string());
258 if !tags.contains(&simple_tag) {
259 tags.push(simple_tag);
260 }
261 }
262 }
263 }
264 }
265
266 let mut odcl_metadata = HashMap::new();
268 if !description.is_empty() {
269 odcl_metadata.insert("description".to_string(), json!(description));
270 }
271
272 let table = Table {
273 id: crate::models::table::Table::generate_id(&name, None, None, None),
274 name: name.clone(),
275 columns,
276 database_type: None,
277 catalog_name: None,
278 schema_name: None,
279 medallion_layers: Vec::new(),
280 scd_pattern: None,
281 data_vault_classification: None,
282 modeling_level: None,
283 tags,
284 odcl_metadata,
285 owner: None,
286 sla: None,
287 contact_details: None,
288 infrastructure_type: None,
289 notes: None,
290 position: None,
291 yaml_file_path: None,
292 drawio_cell_id: None,
293 quality: Vec::new(),
294 errors: Vec::new(),
295 created_at: chrono::Utc::now(),
296 updated_at: chrono::Utc::now(),
297 };
298
299 info!(
300 "Parsed JSON Schema: {} with {} columns",
301 name,
302 table.columns.len()
303 );
304 Ok(table)
305 }
306
307 fn parse_property(
309 &self,
310 prop_name: &str,
311 prop_schema: &Value,
312 nullable: bool,
313 errors: &mut Vec<ParserError>,
314 ) -> Result<Vec<Column>> {
315 if let Err(e) = validate_column_name(prop_name) {
317 warn!("Column name validation warning for '{}': {}", prop_name, e);
318 }
319
320 let prop_obj = prop_schema
321 .as_object()
322 .ok_or_else(|| anyhow::anyhow!("Property schema must be an object"))?;
323
324 if let Some(ref_path) = prop_obj.get("$ref").and_then(|v| v.as_str()) {
326 let description = prop_obj
328 .get("description")
329 .and_then(|v| v.as_str())
330 .map(|s| s.to_string())
331 .unwrap_or_default();
332
333 let quality_rules = self.extract_validation_keywords(prop_obj, prop_name);
334
335 return Ok(vec![Column {
336 name: prop_name.to_string(),
337 data_type: "STRING".to_string(), nullable,
339 description,
340 quality: quality_rules,
341 relationships: ref_to_relationships(&Some(ref_path.to_string())),
342 ..Default::default()
343 }]);
344 }
345
346 let prop_type = prop_obj
347 .get("type")
348 .and_then(|v| v.as_str())
349 .ok_or_else(|| anyhow::anyhow!("Property missing type"))?;
350
351 let mapped_type = self.map_json_type_to_sql(prop_type);
353 if let Err(e) = validate_data_type(&mapped_type) {
354 warn!("Data type validation warning for '{}': {}", mapped_type, e);
355 }
356
357 let description = prop_obj
358 .get("description")
359 .and_then(|v| v.as_str())
360 .map(|s| s.to_string())
361 .unwrap_or_default();
362
363 let quality_rules = self.extract_validation_keywords(prop_obj, prop_name);
365 let enum_values = self.extract_enum_values(prop_obj);
366
367 let mut columns = Vec::new();
368
369 match prop_type {
370 "object" => {
371 if let Some(nested_props) = prop_obj.get("properties").and_then(|v| v.as_object()) {
373 let nested_required: Vec<String> = prop_obj
374 .get("required")
375 .and_then(|v| v.as_array())
376 .map(|arr| {
377 arr.iter()
378 .filter_map(|v| v.as_str().map(|s| s.to_string()))
379 .collect()
380 })
381 .unwrap_or_default();
382
383 for (nested_name, nested_schema) in nested_props {
384 let nested_nullable = !nested_required.contains(nested_name);
385 match self.parse_property(
386 nested_name,
387 nested_schema,
388 nested_nullable,
389 errors,
390 ) {
391 Ok(mut nested_cols) => {
392 for col in nested_cols.iter_mut() {
394 col.name = format!("{}.{}", prop_name, col.name);
395 }
396 columns.append(&mut nested_cols);
397 }
398 Err(e) => {
399 errors.push(ParserError {
400 error_type: "parse_error".to_string(),
401 field: Some(format!("{}.{}", prop_name, nested_name)),
402 message: format!("Failed to parse nested property: {}", e),
403 });
404 }
405 }
406 }
407 let object_quality = self.extract_validation_keywords(prop_obj, prop_name);
410 if !object_quality.is_empty() && !columns.is_empty() {
411 columns[0].quality.extend(object_quality);
413 }
414 } else {
415 let struct_quality = self.extract_validation_keywords(prop_obj, prop_name);
417 columns.push(Column {
418 name: prop_name.to_string(),
419 data_type: "STRUCT".to_string(),
420 nullable,
421 description,
422 quality: struct_quality,
423 ..Default::default()
424 });
425 }
426 }
427 "array" => {
428 let items = prop_obj
430 .get("items")
431 .ok_or_else(|| anyhow::anyhow!("Array property missing items"))?;
432
433 let data_type = if let Some(items_str) = items.get("type").and_then(|v| v.as_str())
434 {
435 if items_str == "object" {
436 if let Some(nested_props) =
438 items.get("properties").and_then(|v| v.as_object())
439 {
440 let nested_required: Vec<String> = items
441 .get("required")
442 .and_then(|v| v.as_array())
443 .map(|arr| {
444 arr.iter()
445 .filter_map(|v| v.as_str().map(|s| s.to_string()))
446 .collect()
447 })
448 .unwrap_or_default();
449
450 for (nested_name, nested_schema) in nested_props {
451 let nested_nullable = !nested_required.contains(nested_name);
452 match self.parse_property(
453 nested_name,
454 nested_schema,
455 nested_nullable,
456 errors,
457 ) {
458 Ok(mut nested_cols) => {
459 for col in nested_cols.iter_mut() {
460 col.name = format!("{}.{}", prop_name, col.name);
461 }
462 columns.append(&mut nested_cols);
463 }
464 Err(e) => {
465 errors.push(ParserError {
466 error_type: "parse_error".to_string(),
467 field: Some(format!("{}.{}", prop_name, nested_name)),
468 message: format!(
469 "Failed to parse array item property: {}",
470 e
471 ),
472 });
473 }
474 }
475 }
476 return Ok(columns);
477 } else {
478 "ARRAY<STRUCT>".to_string()
479 }
480 } else {
481 format!("ARRAY<{}>", self.map_json_type_to_sql(items_str))
482 }
483 } else {
484 "ARRAY<STRING>".to_string()
485 };
486
487 let mut array_quality = self.extract_validation_keywords(prop_obj, prop_name);
489 if let Some(items_obj) = items.as_object() {
491 let items_quality = self.extract_validation_keywords(items_obj, prop_name);
492 array_quality.extend(items_quality);
493 }
494
495 columns.push(Column {
496 name: prop_name.to_string(),
497 data_type,
498 nullable,
499 description,
500 quality: array_quality,
501 ..Default::default()
502 });
503 }
504 _ => {
505 let data_type = self.map_json_type_to_sql(prop_type);
507 columns.push(Column {
508 name: prop_name.to_string(),
509 data_type,
510 nullable,
511 description,
512 quality: quality_rules,
513 enum_values: enum_values.clone(),
514 ..Default::default()
515 });
516 }
517 }
518
519 Ok(columns)
520 }
521
522 fn map_json_type_to_sql(&self, json_type: &str) -> String {
524 match json_type {
525 "integer" => "INTEGER".to_string(),
526 "number" => "DOUBLE".to_string(),
527 "boolean" => "BOOLEAN".to_string(),
528 "string" => "STRING".to_string(),
529 "null" => "NULL".to_string(),
530 _ => "STRING".to_string(), }
532 }
533
534 fn extract_validation_keywords(
536 &self,
537 prop_obj: &serde_json::Map<String, Value>,
538 _prop_name: &str,
539 ) -> Vec<HashMap<String, serde_json::Value>> {
540 let mut quality_rules = Vec::new();
541
542 if let Some(pattern) = prop_obj.get("pattern").and_then(|v| v.as_str()) {
544 let mut rule = HashMap::new();
545 rule.insert("type".to_string(), json!("pattern"));
546 rule.insert("pattern".to_string(), json!(pattern));
547 rule.insert("source".to_string(), json!("json_schema"));
548 quality_rules.push(rule);
549 }
550
551 if let Some(minimum) = prop_obj.get("minimum") {
553 let mut rule = HashMap::new();
554 rule.insert("type".to_string(), json!("minimum"));
555 rule.insert("value".to_string(), minimum.clone());
556 rule.insert("source".to_string(), json!("json_schema"));
557 if let Some(exclusive_minimum) = prop_obj.get("exclusiveMinimum") {
558 rule.insert("exclusive".to_string(), exclusive_minimum.clone());
559 }
560 quality_rules.push(rule);
561 }
562
563 if let Some(maximum) = prop_obj.get("maximum") {
565 let mut rule = HashMap::new();
566 rule.insert("type".to_string(), json!("maximum"));
567 rule.insert("value".to_string(), maximum.clone());
568 rule.insert("source".to_string(), json!("json_schema"));
569 if let Some(exclusive_maximum) = prop_obj.get("exclusiveMaximum") {
570 rule.insert("exclusive".to_string(), exclusive_maximum.clone());
571 }
572 quality_rules.push(rule);
573 }
574
575 if let Some(min_length) = prop_obj.get("minLength").and_then(|v| v.as_u64()) {
577 let mut rule = HashMap::new();
578 rule.insert("type".to_string(), json!("minLength"));
579 rule.insert("value".to_string(), json!(min_length));
580 rule.insert("source".to_string(), json!("json_schema"));
581 quality_rules.push(rule);
582 }
583
584 if let Some(max_length) = prop_obj.get("maxLength").and_then(|v| v.as_u64()) {
586 let mut rule = HashMap::new();
587 rule.insert("type".to_string(), json!("maxLength"));
588 rule.insert("value".to_string(), json!(max_length));
589 rule.insert("source".to_string(), json!("json_schema"));
590 quality_rules.push(rule);
591 }
592
593 if let Some(multiple_of) = prop_obj.get("multipleOf") {
595 let mut rule = HashMap::new();
596 rule.insert("type".to_string(), json!("multipleOf"));
597 rule.insert("value".to_string(), multiple_of.clone());
598 rule.insert("source".to_string(), json!("json_schema"));
599 quality_rules.push(rule);
600 }
601
602 if let Some(const_val) = prop_obj.get("const") {
604 let mut rule = HashMap::new();
605 rule.insert("type".to_string(), json!("const"));
606 rule.insert("value".to_string(), const_val.clone());
607 rule.insert("source".to_string(), json!("json_schema"));
608 quality_rules.push(rule);
609 }
610
611 if let Some(min_items) = prop_obj.get("minItems").and_then(|v| v.as_u64()) {
613 let mut rule = HashMap::new();
614 rule.insert("type".to_string(), json!("minItems"));
615 rule.insert("value".to_string(), json!(min_items));
616 rule.insert("source".to_string(), json!("json_schema"));
617 quality_rules.push(rule);
618 }
619
620 if let Some(max_items) = prop_obj.get("maxItems").and_then(|v| v.as_u64()) {
622 let mut rule = HashMap::new();
623 rule.insert("type".to_string(), json!("maxItems"));
624 rule.insert("value".to_string(), json!(max_items));
625 rule.insert("source".to_string(), json!("json_schema"));
626 quality_rules.push(rule);
627 }
628
629 if let Some(unique_items) = prop_obj.get("uniqueItems").and_then(|v| v.as_bool())
631 && unique_items
632 {
633 let mut rule = HashMap::new();
634 rule.insert("type".to_string(), json!("uniqueItems"));
635 rule.insert("value".to_string(), json!(true));
636 rule.insert("source".to_string(), json!("json_schema"));
637 quality_rules.push(rule);
638 }
639
640 if let Some(min_props) = prop_obj.get("minProperties").and_then(|v| v.as_u64()) {
642 let mut rule = HashMap::new();
643 rule.insert("type".to_string(), json!("minProperties"));
644 rule.insert("value".to_string(), json!(min_props));
645 rule.insert("source".to_string(), json!("json_schema"));
646 quality_rules.push(rule);
647 }
648
649 if let Some(max_props) = prop_obj.get("maxProperties").and_then(|v| v.as_u64()) {
651 let mut rule = HashMap::new();
652 rule.insert("type".to_string(), json!("maxProperties"));
653 rule.insert("value".to_string(), json!(max_props));
654 rule.insert("source".to_string(), json!("json_schema"));
655 quality_rules.push(rule);
656 }
657
658 if let Some(additional_props) = prop_obj.get("additionalProperties") {
660 let mut rule = HashMap::new();
661 rule.insert("type".to_string(), json!("additionalProperties"));
662 rule.insert("value".to_string(), additional_props.clone());
663 rule.insert("source".to_string(), json!("json_schema"));
664 quality_rules.push(rule);
665 }
666
667 if let Some(format_val) = prop_obj.get("format").and_then(|v| v.as_str()) {
669 let mut rule = HashMap::new();
670 rule.insert("type".to_string(), json!("format"));
671 rule.insert("value".to_string(), json!(format_val));
672 rule.insert("source".to_string(), json!("json_schema"));
673 quality_rules.push(rule);
674 }
675
676 for keyword in &["allOf", "anyOf", "oneOf", "not"] {
678 if let Some(value) = prop_obj.get(*keyword) {
679 let mut rule = HashMap::new();
680 rule.insert("type".to_string(), json!(*keyword));
681 rule.insert("value".to_string(), value.clone());
682 rule.insert("source".to_string(), json!("json_schema"));
683 quality_rules.push(rule);
684 }
685 }
686
687 quality_rules
688 }
689
690 fn extract_enum_values(&self, prop_obj: &serde_json::Map<String, Value>) -> Vec<String> {
692 prop_obj
693 .get("enum")
694 .and_then(|v| v.as_array())
695 .map(|arr| {
696 arr.iter()
697 .filter_map(|v| {
698 match v {
700 Value::String(s) => Some(s.clone()),
701 Value::Number(n) => Some(n.to_string()),
702 Value::Bool(b) => Some(b.to_string()),
703 Value::Null => Some("null".to_string()),
704 _ => serde_json::to_string(v).ok(),
705 }
706 })
707 .collect()
708 })
709 .unwrap_or_default()
710 }
711}
712
713#[derive(Debug, Clone)]
715pub struct ParserError {
716 pub error_type: String,
717 pub field: Option<String>,
718 pub message: String,
719}