1use super::{ImportError, ImportResult, TableData};
10use crate::models::{Column, Table, Tag};
11use crate::validation::input::{validate_column_name, validate_data_type, validate_table_name};
12use anyhow::{Context, Result};
13use serde_json::{Value, json};
14use std::collections::HashMap;
15use std::str::FromStr;
16use tracing::{info, warn};
17
18pub struct JSONSchemaImporter;
20
21impl Default for JSONSchemaImporter {
22 fn default() -> Self {
23 Self::new()
24 }
25}
26
27impl JSONSchemaImporter {
28 pub fn new() -> Self {
38 Self
39 }
40
41 pub fn import(&self, json_content: &str) -> Result<ImportResult, ImportError> {
70 match self.parse(json_content) {
71 Ok((tables, errors)) => {
72 let mut sdk_tables = Vec::new();
73 for (idx, table) in tables.iter().enumerate() {
74 sdk_tables.push(TableData {
75 table_index: idx,
76 name: Some(table.name.clone()),
77 columns: table
78 .columns
79 .iter()
80 .map(|c| super::ColumnData {
81 name: c.name.clone(),
82 data_type: c.data_type.clone(),
83 nullable: c.nullable,
84 primary_key: c.primary_key,
85 description: if c.description.is_empty() {
86 None
87 } else {
88 Some(c.description.clone())
89 },
90 quality: if c.quality.is_empty() {
91 None
92 } else {
93 Some(c.quality.clone())
94 },
95 ref_path: c.ref_path.clone(),
96 })
97 .collect(),
98 });
99 }
100 let sdk_errors: Vec<ImportError> = errors
101 .iter()
102 .map(|e| ImportError::ParseError(e.message.clone()))
103 .collect();
104 Ok(ImportResult {
105 tables: sdk_tables,
106 tables_requiring_name: Vec::new(),
107 errors: sdk_errors,
108 ai_suggestions: None,
109 })
110 }
111 Err(e) => Err(ImportError::ParseError(e.to_string())),
112 }
113 }
114
115 fn parse(&self, json_content: &str) -> Result<(Vec<Table>, Vec<ParserError>)> {
121 let mut errors = Vec::new();
122
123 let schema: Value =
125 serde_json::from_str(json_content).context("Failed to parse JSON Schema")?;
126
127 let mut tables = Vec::new();
128
129 if let Some(definitions) = schema.get("definitions").and_then(|v| v.as_object()) {
131 for (name, def_schema) in definitions {
133 match self.parse_schema(def_schema, Some(name), &mut errors) {
134 Ok(table) => tables.push(table),
135 Err(e) => {
136 errors.push(ParserError {
137 error_type: "parse_error".to_string(),
138 field: Some(format!("definitions.{}", name)),
139 message: format!("Failed to parse schema: {}", e),
140 });
141 }
142 }
143 }
144 } else {
145 match self.parse_schema(&schema, None, &mut errors) {
147 Ok(table) => tables.push(table),
148 Err(e) => {
149 errors.push(ParserError {
150 error_type: "parse_error".to_string(),
151 field: None,
152 message: format!("Failed to parse schema: {}", e),
153 });
154 }
155 }
156 }
157
158 Ok((tables, errors))
159 }
160
161 fn parse_schema(
163 &self,
164 schema: &Value,
165 name_override: Option<&str>,
166 errors: &mut Vec<ParserError>,
167 ) -> Result<Table> {
168 let schema_obj = schema
169 .as_object()
170 .ok_or_else(|| anyhow::anyhow!("Schema must be an object"))?;
171
172 let name = name_override
174 .map(|s| s.to_string())
175 .or_else(|| {
176 schema_obj
177 .get("title")
178 .or_else(|| schema_obj.get("name"))
179 .and_then(|v| v.as_str())
180 .map(|s| s.to_string())
181 })
182 .ok_or_else(|| anyhow::anyhow!("Missing required field: title or name"))?;
183
184 if let Err(e) = validate_table_name(&name) {
186 warn!("Table name validation warning for '{}': {}", name, e);
187 }
188
189 let description = schema_obj
191 .get("description")
192 .and_then(|v| v.as_str())
193 .map(|s| s.to_string())
194 .unwrap_or_default();
195
196 let properties = schema_obj
198 .get("properties")
199 .and_then(|v| v.as_object())
200 .ok_or_else(|| anyhow::anyhow!("Missing required field: properties"))?;
201
202 let required_fields: Vec<String> = schema_obj
204 .get("required")
205 .and_then(|v| v.as_array())
206 .map(|arr| {
207 arr.iter()
208 .filter_map(|v| v.as_str().map(|s| s.to_string()))
209 .collect()
210 })
211 .unwrap_or_default();
212
213 let mut columns = Vec::new();
214 for (prop_name, prop_schema) in properties {
215 let nullable = !required_fields.contains(prop_name);
216 match self.parse_property(prop_name, prop_schema, nullable, errors) {
217 Ok(mut cols) => columns.append(&mut cols),
218 Err(e) => {
219 errors.push(ParserError {
220 error_type: "parse_error".to_string(),
221 field: Some(format!("properties.{}", prop_name)),
222 message: format!("Failed to parse property: {}", e),
223 });
224 }
225 }
226 }
227
228 let mut tags: Vec<Tag> = Vec::new();
230 if let Some(tags_arr) = schema_obj.get("tags").and_then(|v| v.as_array()) {
231 for item in tags_arr {
232 if let Some(s) = item.as_str() {
233 if let Ok(tag) = Tag::from_str(s) {
234 tags.push(tag);
235 } else {
236 tags.push(Tag::Simple(s.to_string()));
237 }
238 }
239 }
240 }
241 if let Some(custom_props) = schema_obj
243 .get("customProperties")
244 .and_then(|v| v.as_object())
245 && let Some(tags_val) = custom_props.get("tags")
246 && let Some(tags_arr) = tags_val.as_array()
247 {
248 for item in tags_arr {
249 if let Some(s) = item.as_str() {
250 if let Ok(tag) = Tag::from_str(s) {
251 if !tags.contains(&tag) {
252 tags.push(tag);
253 }
254 } else {
255 let simple_tag = Tag::Simple(s.to_string());
256 if !tags.contains(&simple_tag) {
257 tags.push(simple_tag);
258 }
259 }
260 }
261 }
262 }
263
264 let mut odcl_metadata = HashMap::new();
266 if !description.is_empty() {
267 odcl_metadata.insert("description".to_string(), json!(description));
268 }
269
270 let table = Table {
271 id: crate::models::table::Table::generate_id(&name, None, None, None),
272 name: name.clone(),
273 columns,
274 database_type: None,
275 catalog_name: None,
276 schema_name: None,
277 medallion_layers: Vec::new(),
278 scd_pattern: None,
279 data_vault_classification: None,
280 modeling_level: None,
281 tags,
282 odcl_metadata,
283 owner: None,
284 sla: None,
285 contact_details: None,
286 infrastructure_type: None,
287 notes: None,
288 position: None,
289 yaml_file_path: None,
290 drawio_cell_id: None,
291 quality: Vec::new(),
292 errors: Vec::new(),
293 created_at: chrono::Utc::now(),
294 updated_at: chrono::Utc::now(),
295 };
296
297 info!(
298 "Parsed JSON Schema: {} with {} columns",
299 name,
300 table.columns.len()
301 );
302 Ok(table)
303 }
304
305 fn parse_property(
307 &self,
308 prop_name: &str,
309 prop_schema: &Value,
310 nullable: bool,
311 errors: &mut Vec<ParserError>,
312 ) -> Result<Vec<Column>> {
313 if let Err(e) = validate_column_name(prop_name) {
315 warn!("Column name validation warning for '{}': {}", prop_name, e);
316 }
317
318 let prop_obj = prop_schema
319 .as_object()
320 .ok_or_else(|| anyhow::anyhow!("Property schema must be an object"))?;
321
322 let prop_type = prop_obj
323 .get("type")
324 .and_then(|v| v.as_str())
325 .ok_or_else(|| anyhow::anyhow!("Property missing type"))?;
326
327 let mapped_type = self.map_json_type_to_sql(prop_type);
329 if let Err(e) = validate_data_type(&mapped_type) {
330 warn!("Data type validation warning for '{}': {}", mapped_type, e);
331 }
332
333 let description = prop_obj
334 .get("description")
335 .and_then(|v| v.as_str())
336 .map(|s| s.to_string())
337 .unwrap_or_default();
338
339 let mut columns = Vec::new();
340
341 match prop_type {
342 "object" => {
343 if let Some(nested_props) = prop_obj.get("properties").and_then(|v| v.as_object()) {
345 let nested_required: Vec<String> = prop_obj
346 .get("required")
347 .and_then(|v| v.as_array())
348 .map(|arr| {
349 arr.iter()
350 .filter_map(|v| v.as_str().map(|s| s.to_string()))
351 .collect()
352 })
353 .unwrap_or_default();
354
355 for (nested_name, nested_schema) in nested_props {
356 let nested_nullable = !nested_required.contains(nested_name);
357 match self.parse_property(
358 nested_name,
359 nested_schema,
360 nested_nullable,
361 errors,
362 ) {
363 Ok(mut nested_cols) => {
364 for col in nested_cols.iter_mut() {
366 col.name = format!("{}.{}", prop_name, col.name);
367 }
368 columns.append(&mut nested_cols);
369 }
370 Err(e) => {
371 errors.push(ParserError {
372 error_type: "parse_error".to_string(),
373 field: Some(format!("{}.{}", prop_name, nested_name)),
374 message: format!("Failed to parse nested property: {}", e),
375 });
376 }
377 }
378 }
379 } else {
380 columns.push(Column {
382 name: prop_name.to_string(),
383 data_type: "STRUCT".to_string(),
384 nullable,
385 primary_key: false,
386 secondary_key: false,
387 composite_key: None,
388 foreign_key: None,
389 constraints: Vec::new(),
390 description,
391 quality: Vec::new(),
392 ref_path: None,
393 enum_values: Vec::new(),
394 errors: Vec::new(),
395 column_order: 0,
396 });
397 }
398 }
399 "array" => {
400 let items = prop_obj
402 .get("items")
403 .ok_or_else(|| anyhow::anyhow!("Array property missing items"))?;
404
405 let data_type = if let Some(items_str) = items.get("type").and_then(|v| v.as_str())
406 {
407 if items_str == "object" {
408 if let Some(nested_props) =
410 items.get("properties").and_then(|v| v.as_object())
411 {
412 let nested_required: Vec<String> = items
413 .get("required")
414 .and_then(|v| v.as_array())
415 .map(|arr| {
416 arr.iter()
417 .filter_map(|v| v.as_str().map(|s| s.to_string()))
418 .collect()
419 })
420 .unwrap_or_default();
421
422 for (nested_name, nested_schema) in nested_props {
423 let nested_nullable = !nested_required.contains(nested_name);
424 match self.parse_property(
425 nested_name,
426 nested_schema,
427 nested_nullable,
428 errors,
429 ) {
430 Ok(mut nested_cols) => {
431 for col in nested_cols.iter_mut() {
432 col.name = format!("{}.{}", prop_name, col.name);
433 }
434 columns.append(&mut nested_cols);
435 }
436 Err(e) => {
437 errors.push(ParserError {
438 error_type: "parse_error".to_string(),
439 field: Some(format!("{}.{}", prop_name, nested_name)),
440 message: format!(
441 "Failed to parse array item property: {}",
442 e
443 ),
444 });
445 }
446 }
447 }
448 return Ok(columns);
449 } else {
450 "ARRAY<STRUCT>".to_string()
451 }
452 } else {
453 format!("ARRAY<{}>", self.map_json_type_to_sql(items_str))
454 }
455 } else {
456 "ARRAY<STRING>".to_string()
457 };
458
459 columns.push(Column {
460 name: prop_name.to_string(),
461 data_type,
462 nullable,
463 primary_key: false,
464 secondary_key: false,
465 composite_key: None,
466 foreign_key: None,
467 constraints: Vec::new(),
468 description,
469 quality: Vec::new(),
470 ref_path: None,
471 enum_values: Vec::new(),
472 errors: Vec::new(),
473 column_order: 0,
474 });
475 }
476 _ => {
477 let data_type = self.map_json_type_to_sql(prop_type);
479 columns.push(Column {
480 name: prop_name.to_string(),
481 data_type,
482 nullable,
483 primary_key: false,
484 secondary_key: false,
485 composite_key: None,
486 foreign_key: None,
487 constraints: Vec::new(),
488 description,
489 quality: Vec::new(),
490 ref_path: None,
491 enum_values: Vec::new(),
492 errors: Vec::new(),
493 column_order: 0,
494 });
495 }
496 }
497
498 Ok(columns)
499 }
500
501 fn map_json_type_to_sql(&self, json_type: &str) -> String {
503 match json_type {
504 "integer" => "INTEGER".to_string(),
505 "number" => "DOUBLE".to_string(),
506 "boolean" => "BOOLEAN".to_string(),
507 "string" => "STRING".to_string(),
508 "null" => "NULL".to_string(),
509 _ => "STRING".to_string(), }
511 }
512}
513
514#[derive(Debug, Clone)]
516pub struct ParserError {
517 pub error_type: String,
518 pub field: Option<String>,
519 pub message: String,
520}