data_modelling_sdk/import/
odcs_shared.rs

1//! Shared utilities for ODCS and ODCL parsing.
2//!
3//! This module contains common types, utility functions, and parsing helpers
4//! used by both the ODCS (Open Data Contract Standard) and ODCL (legacy Data Contract)
5//! importers. Separating these shared components allows for cleaner code organization
6//! and easier testing.
7
8use crate::models::column::ForeignKey;
9use crate::models::enums::{DataVaultClassification, MedallionLayer, SCDPattern};
10use crate::models::{Column, Tag};
11use anyhow::Result;
12use serde_json::Value as JsonValue;
13use std::collections::HashMap;
14use std::str::FromStr;
15
16/// Parser error structure for detailed error reporting.
17#[derive(Debug, Clone)]
18pub struct ParserError {
19    pub error_type: String,
20    pub field: String,
21    pub message: String,
22}
23
24/// Convert YAML Value to JSON Value for easier manipulation.
25pub fn yaml_to_json_value(yaml: &serde_yaml::Value) -> Result<JsonValue> {
26    use anyhow::Context;
27    // Convert YAML to JSON via serialization
28    let json_str = serde_json::to_string(yaml).context("Failed to convert YAML to JSON")?;
29    serde_json::from_str(&json_str).context("Failed to parse JSON")
30}
31
32/// Convert JSON Value to serde_json::Value for storage in HashMap.
33pub fn json_value_to_serde_value(value: &JsonValue) -> serde_json::Value {
34    value.clone()
35}
36
37/// Normalize data type to uppercase, preserving STRUCT<...>, ARRAY<...>, MAP<...> format.
38pub fn normalize_data_type(data_type: &str) -> String {
39    if data_type.is_empty() {
40        return data_type.to_string();
41    }
42
43    let upper = data_type.to_uppercase();
44
45    // Handle STRUCT<...>, ARRAY<...>, MAP<...> preserving inner content
46    if upper.starts_with("STRUCT") {
47        if let Some(start) = data_type.find('<')
48            && let Some(end) = data_type.rfind('>')
49        {
50            let inner = &data_type[start + 1..end];
51            return format!("STRUCT<{}>", inner);
52        }
53        return format!("STRUCT{}", &data_type[6..]);
54    } else if upper.starts_with("ARRAY") {
55        if let Some(start) = data_type.find('<')
56            && let Some(end) = data_type.rfind('>')
57        {
58            let inner = &data_type[start + 1..end];
59            return format!("ARRAY<{}>", inner);
60        }
61        return format!("ARRAY{}", &data_type[5..]);
62    } else if upper.starts_with("MAP") {
63        if let Some(start) = data_type.find('<')
64            && let Some(end) = data_type.rfind('>')
65        {
66            let inner = &data_type[start + 1..end];
67            return format!("MAP<{}>", inner);
68        }
69        return format!("MAP{}", &data_type[3..]);
70    }
71
72    upper
73}
74
75/// Parse medallion layer from string.
76pub fn parse_medallion_layer(s: &str) -> Result<MedallionLayer> {
77    match s.to_uppercase().as_str() {
78        "BRONZE" => Ok(MedallionLayer::Bronze),
79        "SILVER" => Ok(MedallionLayer::Silver),
80        "GOLD" => Ok(MedallionLayer::Gold),
81        "OPERATIONAL" => Ok(MedallionLayer::Operational),
82        _ => Err(anyhow::anyhow!("Unknown medallion layer: {}", s)),
83    }
84}
85
86/// Parse SCD pattern from string.
87pub fn parse_scd_pattern(s: &str) -> Result<SCDPattern> {
88    match s.to_uppercase().as_str() {
89        "TYPE_1" | "TYPE1" => Ok(SCDPattern::Type1),
90        "TYPE_2" | "TYPE2" => Ok(SCDPattern::Type2),
91        _ => Err(anyhow::anyhow!("Unknown SCD pattern: {}", s)),
92    }
93}
94
95/// Parse Data Vault classification from string.
96pub fn parse_data_vault_classification(s: &str) -> Result<DataVaultClassification> {
97    match s.to_uppercase().as_str() {
98        "HUB" => Ok(DataVaultClassification::Hub),
99        "LINK" => Ok(DataVaultClassification::Link),
100        "SATELLITE" | "SAT" => Ok(DataVaultClassification::Satellite),
101        _ => Err(anyhow::anyhow!("Unknown Data Vault classification: {}", s)),
102    }
103}
104
105/// Extract quality rules from a JSON object.
106pub fn extract_quality_from_obj(
107    obj: &serde_json::Map<String, JsonValue>,
108) -> Vec<HashMap<String, serde_json::Value>> {
109    let mut quality_rules = Vec::new();
110    if let Some(quality_val) = obj.get("quality") {
111        if let Some(arr) = quality_val.as_array() {
112            // Array of quality rules
113            for item in arr {
114                if let Some(rule_obj) = item.as_object() {
115                    let mut rule = HashMap::new();
116                    for (key, value) in rule_obj {
117                        rule.insert(key.clone(), json_value_to_serde_value(value));
118                    }
119                    quality_rules.push(rule);
120                }
121            }
122        } else if let Some(rule_obj) = quality_val.as_object() {
123            // Single quality rule object
124            let mut rule = HashMap::new();
125            for (key, value) in rule_obj {
126                rule.insert(key.clone(), json_value_to_serde_value(value));
127            }
128            quality_rules.push(rule);
129        }
130    }
131    quality_rules
132}
133
134/// Parse foreign key from JSON value.
135pub fn parse_foreign_key(fk_data: &JsonValue) -> Option<ForeignKey> {
136    let obj = fk_data.as_object()?;
137    Some(ForeignKey {
138        table_id: obj
139            .get("table_id")
140            .or_else(|| obj.get("table"))
141            .and_then(|v| v.as_str())
142            .unwrap_or("")
143            .to_string(),
144        column_name: obj
145            .get("column_name")
146            .or_else(|| obj.get("column"))
147            .and_then(|v| v.as_str())
148            .unwrap_or("")
149            .to_string(),
150    })
151}
152
153/// Parse foreign key from Data Contract field data.
154pub fn parse_foreign_key_from_data_contract(
155    field_data: &serde_json::Map<String, JsonValue>,
156) -> Option<ForeignKey> {
157    field_data
158        .get("foreignKey")
159        .and_then(|v| v.as_object())
160        .map(|fk_obj| ForeignKey {
161            table_id: fk_obj
162                .get("table")
163                .or_else(|| fk_obj.get("table_id"))
164                .and_then(|v| v.as_str())
165                .unwrap_or("")
166                .to_string(),
167            column_name: fk_obj
168                .get("column")
169                .or_else(|| fk_obj.get("column_name"))
170                .and_then(|v| v.as_str())
171                .unwrap_or("")
172                .to_string(),
173        })
174}
175
176/// Extract metadata from customProperties in ODCS/ODCL format.
177pub fn extract_metadata_from_custom_properties(
178    data: &JsonValue,
179) -> (
180    Vec<MedallionLayer>,
181    Option<SCDPattern>,
182    Option<DataVaultClassification>,
183    Vec<Tag>,
184) {
185    let mut medallion_layers = Vec::new();
186    let mut scd_pattern = None;
187    let mut data_vault_classification = None;
188    let mut tags: Vec<Tag> = Vec::new();
189
190    if let Some(custom_props) = data.get("customProperties").and_then(|v| v.as_array()) {
191        for prop in custom_props {
192            if let Some(prop_obj) = prop.as_object() {
193                let prop_key = prop_obj
194                    .get("property")
195                    .and_then(|v| v.as_str())
196                    .unwrap_or("");
197                let prop_value = prop_obj.get("value");
198
199                match prop_key {
200                    "medallionLayers" | "medallion_layers" => {
201                        if let Some(arr) = prop_value.and_then(|v| v.as_array()) {
202                            for item in arr {
203                                if let Some(s) = item.as_str()
204                                    && let Ok(layer) = parse_medallion_layer(s)
205                                {
206                                    medallion_layers.push(layer);
207                                }
208                            }
209                        } else if let Some(s) = prop_value.and_then(|v| v.as_str()) {
210                            // Comma-separated string
211                            for part in s.split(',') {
212                                if let Ok(layer) = parse_medallion_layer(part.trim()) {
213                                    medallion_layers.push(layer);
214                                }
215                            }
216                        }
217                    }
218                    "scdPattern" | "scd_pattern" => {
219                        if let Some(s) = prop_value.and_then(|v| v.as_str()) {
220                            scd_pattern = parse_scd_pattern(s).ok();
221                        }
222                    }
223                    "dataVaultClassification" | "data_vault_classification" => {
224                        if let Some(s) = prop_value.and_then(|v| v.as_str()) {
225                            data_vault_classification = parse_data_vault_classification(s).ok();
226                        }
227                    }
228                    "tags" => {
229                        if let Some(arr) = prop_value.and_then(|v| v.as_array()) {
230                            for item in arr {
231                                if let Some(s) = item.as_str() {
232                                    // Parse tag string to Tag enum
233                                    if let Ok(tag) = Tag::from_str(s) {
234                                        tags.push(tag);
235                                    } else {
236                                        tags.push(Tag::Simple(s.to_string()));
237                                    }
238                                }
239                            }
240                        } else if let Some(s) = prop_value.and_then(|v| v.as_str()) {
241                            // Comma-separated string
242                            for part in s.split(',') {
243                                let part = part.trim();
244                                if let Ok(tag) = Tag::from_str(part) {
245                                    tags.push(tag);
246                                } else {
247                                    tags.push(Tag::Simple(part.to_string()));
248                                }
249                            }
250                        }
251                    }
252                    "sharedDomains" | "shared_domains" => {
253                        // sharedDomains will be stored in metadata by the caller
254                        // This match is here for completeness but sharedDomains is handled separately
255                    }
256                    _ => {}
257                }
258            }
259        }
260    }
261
262    // Also extract tags from top-level tags field
263    if let Some(tags_arr) = data.get("tags").and_then(|v| v.as_array()) {
264        for item in tags_arr {
265            if let Some(s) = item.as_str() {
266                // Parse tag string to Tag enum
267                let tag = Tag::from_str(s).unwrap_or_else(|_| Tag::Simple(s.to_string()));
268                if !tags.contains(&tag) {
269                    tags.push(tag);
270                }
271            }
272        }
273    }
274
275    (
276        medallion_layers,
277        scd_pattern,
278        data_vault_classification,
279        tags,
280    )
281}
282
283/// Extract catalog and schema from customProperties.
284pub fn extract_catalog_schema(data: &JsonValue) -> (Option<String>, Option<String>) {
285    let mut catalog_name = None;
286    let mut schema_name = None;
287
288    if let Some(custom_props) = data.get("customProperties").and_then(|v| v.as_array()) {
289        for prop in custom_props {
290            if let Some(prop_obj) = prop.as_object() {
291                let prop_key = prop_obj
292                    .get("property")
293                    .and_then(|v| v.as_str())
294                    .unwrap_or("");
295                let prop_value = prop_obj.get("value").and_then(|v| v.as_str());
296
297                match prop_key {
298                    "catalogName" | "catalog_name" => {
299                        catalog_name = prop_value.map(|s| s.to_string());
300                    }
301                    "schemaName" | "schema_name" => {
302                        schema_name = prop_value.map(|s| s.to_string());
303                    }
304                    _ => {}
305                }
306            }
307        }
308    }
309
310    // Also check direct fields
311    if catalog_name.is_none() {
312        catalog_name = data
313            .get("catalog_name")
314            .and_then(|v| v.as_str())
315            .map(|s| s.to_string());
316    }
317    if schema_name.is_none() {
318        schema_name = data
319            .get("schema_name")
320            .and_then(|v| v.as_str())
321            .map(|s| s.to_string());
322    }
323
324    (catalog_name, schema_name)
325}
326
327/// Extract sharedDomains from customProperties.
328pub fn extract_shared_domains(data: &JsonValue) -> Vec<String> {
329    let mut shared_domains: Vec<String> = Vec::new();
330    if let Some(custom_props) = data.get("customProperties").and_then(|v| v.as_array()) {
331        for prop in custom_props {
332            if let Some(prop_obj) = prop.as_object() {
333                let prop_key = prop_obj
334                    .get("property")
335                    .and_then(|v| v.as_str())
336                    .unwrap_or("");
337                if (prop_key == "sharedDomains" || prop_key == "shared_domains")
338                    && let Some(arr) = prop_obj.get("value").and_then(|v| v.as_array())
339                {
340                    for item in arr {
341                        if let Some(s) = item.as_str() {
342                            shared_domains.push(s.to_string());
343                        }
344                    }
345                }
346            }
347        }
348    }
349    shared_domains
350}
351
352/// Resolve a $ref reference like '#/definitions/orderAction'.
353pub fn resolve_ref<'a>(ref_str: &str, data: &'a JsonValue) -> Option<&'a JsonValue> {
354    if !ref_str.starts_with("#/") {
355        return None;
356    }
357
358    // Remove the leading '#/'
359    let path = &ref_str[2..];
360    let parts: Vec<&str> = path.split('/').collect();
361
362    // Navigate through the data structure
363    let mut current = data;
364    for part in parts {
365        current = current.get(part)?;
366    }
367
368    if current.is_object() {
369        Some(current)
370    } else {
371        None
372    }
373}
374
375/// Expand a nested column from a schema definition, creating columns with dot notation.
376///
377/// This helper function recursively expands nested structures (OBJECT/STRUCT types)
378/// into flat columns with dot notation (e.g., "address.street", "address.city").
379#[allow(clippy::only_used_in_recursion)]
380pub fn expand_nested_column(
381    column_name: &str,
382    schema: &JsonValue,
383    nullable: bool,
384    columns: &mut Vec<Column>,
385    errors: &mut Vec<ParserError>,
386) {
387    let schema_obj = match schema.as_object() {
388        Some(obj) => obj,
389        None => {
390            errors.push(ParserError {
391                error_type: "parse_error".to_string(),
392                field: column_name.to_string(),
393                message: "Nested schema must be an object".to_string(),
394            });
395            return;
396        }
397    };
398
399    // Check both "logicalType" (ODCS v3.1.0) and "type" (legacy/ODCL) for backward compatibility
400    let schema_type_raw = schema_obj
401        .get("logicalType")
402        .and_then(|v| v.as_str())
403        .or_else(|| schema_obj.get("type").and_then(|v| v.as_str()))
404        .unwrap_or("object");
405
406    // Normalize legacy "type" values to "logicalType" equivalents
407    let schema_type = match schema_type_raw {
408        "object" | "struct" => "object",
409        "array" => "array",
410        "string" | "varchar" | "char" | "text" => "string",
411        "integer" | "int" | "bigint" | "smallint" | "tinyint" => "integer",
412        "number" | "decimal" | "double" | "float" | "numeric" => "number",
413        "boolean" | "bool" => "boolean",
414        "date" => "date",
415        "timestamp" | "datetime" => "timestamp",
416        "time" => "time",
417        _ => schema_type_raw,
418    };
419
420    match schema_type {
421        "object" | "struct" => {
422            // Check if it has nested properties - handle both object format (legacy/ODCL)
423            // and array format (ODCS v3.1.0)
424            let properties_obj = schema_obj.get("properties").and_then(|v| v.as_object());
425            let properties_arr = schema_obj.get("properties").and_then(|v| v.as_array());
426
427            if let Some(properties) = properties_obj {
428                // Object format (legacy/ODCL): properties is a map of name -> schema
429                let nested_required: Vec<String> = schema_obj
430                    .get("required")
431                    .and_then(|v| v.as_array())
432                    .map(|arr| {
433                        arr.iter()
434                            .filter_map(|v| v.as_str().map(|s| s.to_string()))
435                            .collect()
436                    })
437                    .unwrap_or_default();
438
439                for (nested_name, nested_schema) in properties {
440                    let nested_nullable = !nested_required.contains(nested_name);
441                    expand_nested_column(
442                        &format!("{}.{}", column_name, nested_name),
443                        nested_schema,
444                        nullable || nested_nullable,
445                        columns,
446                        errors,
447                    );
448                }
449            } else if let Some(properties_list) = properties_arr {
450                // Array format (ODCS v3.1.0): properties is an array with 'name' field
451                for prop_data in properties_list {
452                    if let Some(prop_obj) = prop_data.as_object() {
453                        // Extract name from property object (required in v3.1.0)
454                        let nested_name = prop_obj
455                            .get("name")
456                            .or_else(|| prop_obj.get("id"))
457                            .and_then(|v| v.as_str())
458                            .unwrap_or("");
459
460                        if !nested_name.is_empty() {
461                            let nested_nullable = !prop_obj
462                                .get("required")
463                                .and_then(|v| v.as_bool())
464                                .unwrap_or(false);
465
466                            expand_nested_column(
467                                &format!("{}.{}", column_name, nested_name),
468                                prop_data,
469                                nullable || nested_nullable,
470                                columns,
471                                errors,
472                            );
473                        }
474                    }
475                }
476            } else {
477                // Object without properties - create as OBJECT type
478                let physical_type = schema_obj
479                    .get("physicalType")
480                    .and_then(|v| v.as_str())
481                    .map(|s| s.to_string());
482                let description = schema_obj
483                    .get("description")
484                    .and_then(|v| v.as_str())
485                    .unwrap_or("")
486                    .to_string();
487                columns.push(Column {
488                    name: column_name.to_string(),
489                    data_type: "OBJECT".to_string(),
490                    physical_type,
491                    nullable,
492                    primary_key: false,
493                    secondary_key: false,
494                    composite_key: None,
495                    foreign_key: None,
496                    constraints: Vec::new(),
497                    description,
498                    quality: Vec::new(),
499                    relationships: Vec::new(),
500                    enum_values: Vec::new(),
501                    errors: Vec::new(),
502                    column_order: 0,
503                    nested_data: None,
504                });
505            }
506        }
507        "array" => {
508            // Handle array types
509            let items = schema_obj.get("items").unwrap_or(schema);
510            // Check both "logicalType" (ODCS v3.1.0) and "type" (legacy) for backward compatibility
511            let items_obj = items.as_object();
512            let items_type_raw = items_obj
513                .and_then(|obj| {
514                    obj.get("logicalType")
515                        .and_then(|v| v.as_str())
516                        .or_else(|| obj.get("type").and_then(|v| v.as_str()))
517                })
518                .unwrap_or("string");
519
520            // Normalize legacy "type" values to "logicalType" equivalents for backward compatibility
521            let items_type = match items_type_raw {
522                "object" | "struct" => "object",
523                "array" => "array",
524                "string" | "varchar" | "char" | "text" => "string",
525                "integer" | "int" | "bigint" | "smallint" | "tinyint" => "integer",
526                "number" | "decimal" | "double" | "float" | "numeric" => "number",
527                "boolean" | "bool" => "boolean",
528                "date" => "date",
529                "timestamp" | "datetime" => "timestamp",
530                "time" => "time",
531                _ => items_type_raw,
532            };
533
534            if items_type == "object" {
535                // Array of objects - expand nested structure
536                let physical_type = schema_obj
537                    .get("physicalType")
538                    .and_then(|v| v.as_str())
539                    .map(|s| s.to_string());
540                let description = schema_obj
541                    .get("description")
542                    .and_then(|v| v.as_str())
543                    .unwrap_or("")
544                    .to_string();
545                columns.push(Column {
546                    name: column_name.to_string(),
547                    data_type: "ARRAY<OBJECT>".to_string(),
548                    physical_type,
549                    nullable,
550                    primary_key: false,
551                    secondary_key: false,
552                    composite_key: None,
553                    foreign_key: None,
554                    constraints: Vec::new(),
555                    description,
556                    quality: Vec::new(),
557                    relationships: Vec::new(),
558                    enum_values: Vec::new(),
559                    errors: Vec::new(),
560                    column_order: 0,
561                    nested_data: None,
562                });
563                // Also expand nested properties with array prefix
564                // Handle both object format (legacy) and array format (ODCS v3.1.0)
565                let properties_obj = items
566                    .as_object()
567                    .and_then(|obj| obj.get("properties"))
568                    .and_then(|v| v.as_object());
569                let properties_arr = items
570                    .as_object()
571                    .and_then(|obj| obj.get("properties"))
572                    .and_then(|v| v.as_array());
573
574                if let Some(properties_map) = properties_obj {
575                    // Object format (legacy): properties is a map
576                    let nested_required: Vec<String> = items
577                        .as_object()
578                        .and_then(|obj| obj.get("required").and_then(|v| v.as_array()))
579                        .map(|arr| {
580                            arr.iter()
581                                .filter_map(|v| v.as_str().map(|s| s.to_string()))
582                                .collect()
583                        })
584                        .unwrap_or_default();
585
586                    for (nested_name, nested_schema) in properties_map {
587                        let nested_nullable = !nested_required.contains(nested_name);
588                        expand_nested_column(
589                            &format!("{}.[].{}", column_name, nested_name),
590                            nested_schema,
591                            nullable || nested_nullable,
592                            columns,
593                            errors,
594                        );
595                    }
596                } else if let Some(properties_list) = properties_arr {
597                    // Array format (ODCS v3.1.0): properties is an array with 'name' field
598                    for prop_data in properties_list {
599                        if let Some(prop_obj) = prop_data.as_object() {
600                            // Extract name from property object (required in v3.1.0)
601                            let nested_name = prop_obj
602                                .get("name")
603                                .or_else(|| prop_obj.get("id"))
604                                .and_then(|v| v.as_str())
605                                .unwrap_or("");
606
607                            if !nested_name.is_empty() {
608                                let nested_nullable = !prop_obj
609                                    .get("required")
610                                    .and_then(|v| v.as_bool())
611                                    .unwrap_or(false);
612
613                                expand_nested_column(
614                                    &format!("{}.[].{}", column_name, nested_name),
615                                    prop_data,
616                                    nullable || nested_nullable,
617                                    columns,
618                                    errors,
619                                );
620                            }
621                        }
622                    }
623                }
624            } else {
625                // Array of primitives
626                let data_type = format!("ARRAY<{}>", items_type.to_uppercase());
627                // Extract physicalType (ODCS v3.1.0) - the actual database type
628                let physical_type = schema_obj
629                    .get("physicalType")
630                    .and_then(|v| v.as_str())
631                    .map(|s| s.to_string());
632                let description = schema_obj
633                    .get("description")
634                    .and_then(|v| v.as_str())
635                    .unwrap_or("")
636                    .to_string();
637                columns.push(Column {
638                    name: column_name.to_string(),
639                    data_type,
640                    physical_type,
641                    nullable,
642                    primary_key: false,
643                    secondary_key: false,
644                    composite_key: None,
645                    foreign_key: None,
646                    constraints: Vec::new(),
647                    description,
648                    quality: Vec::new(),
649                    relationships: Vec::new(),
650                    enum_values: Vec::new(),
651                    errors: Vec::new(),
652                    column_order: 0,
653                    nested_data: None,
654                });
655            }
656        }
657        _ => {
658            // Simple type
659            let data_type = schema_type.to_uppercase();
660            // Extract physicalType (ODCS v3.1.0) - the actual database type
661            let physical_type = schema_obj
662                .get("physicalType")
663                .and_then(|v| v.as_str())
664                .map(|s| s.to_string());
665            let description = schema_obj
666                .get("description")
667                .and_then(|v| v.as_str())
668                .unwrap_or("")
669                .to_string();
670            let enum_values = schema_obj
671                .get("enum")
672                .and_then(|v| v.as_array())
673                .map(|arr| {
674                    arr.iter()
675                        .filter_map(|v| v.as_str().map(|s| s.to_string()))
676                        .collect()
677                })
678                .unwrap_or_default();
679            columns.push(Column {
680                name: column_name.to_string(),
681                data_type,
682                physical_type,
683                nullable,
684                primary_key: false,
685                secondary_key: false,
686                composite_key: None,
687                foreign_key: None,
688                constraints: Vec::new(),
689                description,
690                quality: Vec::new(),
691                relationships: Vec::new(),
692                enum_values,
693                errors: Vec::new(),
694                column_order: 0,
695                nested_data: None,
696            });
697        }
698    }
699}
700
701/// Parse STRUCT fields from string (e.g., "ID: STRING, NAME: STRING").
702pub fn parse_struct_fields_from_string(fields_str: &str) -> Result<Vec<(String, String)>> {
703    let mut fields = Vec::new();
704    let mut current_field = String::new();
705    let mut depth = 0;
706    let mut in_string = false;
707    let mut string_char = None;
708
709    for ch in fields_str.chars() {
710        match ch {
711            '\'' | '"' if !in_string || Some(ch) == string_char => {
712                if in_string {
713                    in_string = false;
714                    string_char = None;
715                } else {
716                    in_string = true;
717                    string_char = Some(ch);
718                }
719                current_field.push(ch);
720            }
721            '<' if !in_string => {
722                depth += 1;
723                current_field.push(ch);
724            }
725            '>' if !in_string => {
726                depth -= 1;
727                current_field.push(ch);
728            }
729            ',' if !in_string && depth == 0 => {
730                // End of current field
731                let trimmed = current_field.trim();
732                if !trimmed.is_empty()
733                    && let Some((name, type_part)) = parse_field_definition(trimmed)
734                {
735                    fields.push((name, type_part));
736                }
737                current_field.clear();
738            }
739            _ => {
740                current_field.push(ch);
741            }
742        }
743    }
744
745    // Handle last field
746    let trimmed = current_field.trim();
747    if !trimmed.is_empty()
748        && let Some((name, type_part)) = parse_field_definition(trimmed)
749    {
750        fields.push((name, type_part));
751    }
752
753    Ok(fields)
754}
755
756/// Parse a single field definition (e.g., "ID: STRING" or "DETAILS: STRUCT<...>").
757pub fn parse_field_definition(field_def: &str) -> Option<(String, String)> {
758    // Split by colon, but handle nested STRUCTs
759    let colon_pos = field_def.find(':')?;
760    let name = field_def[..colon_pos].trim().to_string();
761    let type_part = field_def[colon_pos + 1..].trim().to_string();
762
763    if name.is_empty() || type_part.is_empty() {
764        return None;
765    }
766
767    Some((name, type_part))
768}
769
770#[cfg(test)]
771mod tests {
772    use super::*;
773
774    #[test]
775    fn test_normalize_data_type() {
776        assert_eq!(normalize_data_type("string"), "STRING");
777        assert_eq!(normalize_data_type("int"), "INT");
778        assert_eq!(normalize_data_type("STRUCT<a: INT>"), "STRUCT<a: INT>");
779        assert_eq!(normalize_data_type("array<string>"), "ARRAY<string>");
780        assert_eq!(normalize_data_type("MAP<string, int>"), "MAP<string, int>");
781    }
782
783    #[test]
784    fn test_parse_medallion_layer() {
785        assert!(matches!(
786            parse_medallion_layer("bronze").unwrap(),
787            MedallionLayer::Bronze
788        ));
789        assert!(matches!(
790            parse_medallion_layer("SILVER").unwrap(),
791            MedallionLayer::Silver
792        ));
793        assert!(matches!(
794            parse_medallion_layer("Gold").unwrap(),
795            MedallionLayer::Gold
796        ));
797        assert!(parse_medallion_layer("invalid").is_err());
798    }
799
800    #[test]
801    fn test_parse_scd_pattern() {
802        assert!(matches!(
803            parse_scd_pattern("TYPE_1").unwrap(),
804            SCDPattern::Type1
805        ));
806        assert!(matches!(
807            parse_scd_pattern("type2").unwrap(),
808            SCDPattern::Type2
809        ));
810        assert!(parse_scd_pattern("invalid").is_err());
811    }
812
813    #[test]
814    fn test_parse_data_vault_classification() {
815        assert!(matches!(
816            parse_data_vault_classification("hub").unwrap(),
817            DataVaultClassification::Hub
818        ));
819        assert!(matches!(
820            parse_data_vault_classification("LINK").unwrap(),
821            DataVaultClassification::Link
822        ));
823        assert!(matches!(
824            parse_data_vault_classification("sat").unwrap(),
825            DataVaultClassification::Satellite
826        ));
827        assert!(parse_data_vault_classification("invalid").is_err());
828    }
829
830    #[test]
831    fn test_parse_field_definition() {
832        let result = parse_field_definition("name: STRING");
833        assert!(result.is_some());
834        let (name, type_part) = result.unwrap();
835        assert_eq!(name, "name");
836        assert_eq!(type_part, "STRING");
837
838        let result = parse_field_definition("nested: STRUCT<a: INT, b: STRING>");
839        assert!(result.is_some());
840        let (name, type_part) = result.unwrap();
841        assert_eq!(name, "nested");
842        assert_eq!(type_part, "STRUCT<a: INT, b: STRING>");
843    }
844
845    #[test]
846    fn test_parse_struct_fields_from_string() {
847        let fields = parse_struct_fields_from_string("id: INT, name: STRING").unwrap();
848        assert_eq!(fields.len(), 2);
849        assert_eq!(fields[0], ("id".to_string(), "INT".to_string()));
850        assert_eq!(fields[1], ("name".to_string(), "STRING".to_string()));
851
852        let fields = parse_struct_fields_from_string(
853            "id: INT, nested: STRUCT<a: INT, b: STRING>, name: STRING",
854        )
855        .unwrap();
856        assert_eq!(fields.len(), 3);
857        assert_eq!(fields[0], ("id".to_string(), "INT".to_string()));
858        assert_eq!(
859            fields[1],
860            (
861                "nested".to_string(),
862                "STRUCT<a: INT, b: STRING>".to_string()
863            )
864        );
865        assert_eq!(fields[2], ("name".to_string(), "STRING".to_string()));
866    }
867}