data_modelling_core/import/
odcs_shared.rs

1//! Shared utilities for ODCS and ODCL parsing.
2//!
3//! This module contains common types, utility functions, and parsing helpers
4//! used by both the ODCS (Open Data Contract Standard) and ODCL (legacy Data Contract)
5//! importers. Separating these shared components allows for cleaner code organization
6//! and easier testing.
7
8use crate::models::column::ForeignKey;
9use crate::models::enums::{DataVaultClassification, MedallionLayer, SCDPattern};
10use crate::models::{Column, Tag};
11use anyhow::Result;
12use serde_json::Value as JsonValue;
13use std::collections::HashMap;
14use std::str::FromStr;
15
16/// Parser error structure for detailed error reporting.
17#[derive(Debug, Clone)]
18pub struct ParserError {
19    pub error_type: String,
20    pub field: String,
21    pub message: String,
22}
23
24/// Convert YAML Value to JSON Value for easier manipulation.
25pub fn yaml_to_json_value(yaml: &serde_yaml::Value) -> Result<JsonValue> {
26    use anyhow::Context;
27    // Convert YAML to JSON via serialization
28    let json_str = serde_json::to_string(yaml).context("Failed to convert YAML to JSON")?;
29    serde_json::from_str(&json_str).context("Failed to parse JSON")
30}
31
32/// Convert JSON Value to serde_json::Value for storage in HashMap.
33pub fn json_value_to_serde_value(value: &JsonValue) -> serde_json::Value {
34    value.clone()
35}
36
37/// Normalize data type to uppercase, preserving STRUCT<...>, ARRAY<...>, MAP<...> format.
38pub fn normalize_data_type(data_type: &str) -> String {
39    if data_type.is_empty() {
40        return data_type.to_string();
41    }
42
43    let upper = data_type.to_uppercase();
44
45    // Handle STRUCT<...>, ARRAY<...>, MAP<...> preserving inner content
46    if upper.starts_with("STRUCT") {
47        if let Some(start) = data_type.find('<')
48            && let Some(end) = data_type.rfind('>')
49        {
50            let inner = &data_type[start + 1..end];
51            return format!("STRUCT<{}>", inner);
52        }
53        return format!("STRUCT{}", &data_type[6..]);
54    } else if upper.starts_with("ARRAY") {
55        if let Some(start) = data_type.find('<')
56            && let Some(end) = data_type.rfind('>')
57        {
58            let inner = &data_type[start + 1..end];
59            return format!("ARRAY<{}>", inner);
60        }
61        return format!("ARRAY{}", &data_type[5..]);
62    } else if upper.starts_with("MAP") {
63        if let Some(start) = data_type.find('<')
64            && let Some(end) = data_type.rfind('>')
65        {
66            let inner = &data_type[start + 1..end];
67            return format!("MAP<{}>", inner);
68        }
69        return format!("MAP{}", &data_type[3..]);
70    }
71
72    upper
73}
74
75/// Parse medallion layer from string.
76pub fn parse_medallion_layer(s: &str) -> Result<MedallionLayer> {
77    match s.to_uppercase().as_str() {
78        "BRONZE" => Ok(MedallionLayer::Bronze),
79        "SILVER" => Ok(MedallionLayer::Silver),
80        "GOLD" => Ok(MedallionLayer::Gold),
81        "OPERATIONAL" => Ok(MedallionLayer::Operational),
82        _ => Err(anyhow::anyhow!("Unknown medallion layer: {}", s)),
83    }
84}
85
86/// Parse SCD pattern from string.
87pub fn parse_scd_pattern(s: &str) -> Result<SCDPattern> {
88    match s.to_uppercase().as_str() {
89        "TYPE_1" | "TYPE1" => Ok(SCDPattern::Type1),
90        "TYPE_2" | "TYPE2" => Ok(SCDPattern::Type2),
91        _ => Err(anyhow::anyhow!("Unknown SCD pattern: {}", s)),
92    }
93}
94
95/// Parse Data Vault classification from string.
96pub fn parse_data_vault_classification(s: &str) -> Result<DataVaultClassification> {
97    match s.to_uppercase().as_str() {
98        "HUB" => Ok(DataVaultClassification::Hub),
99        "LINK" => Ok(DataVaultClassification::Link),
100        "SATELLITE" | "SAT" => Ok(DataVaultClassification::Satellite),
101        _ => Err(anyhow::anyhow!("Unknown Data Vault classification: {}", s)),
102    }
103}
104
105/// Extract quality rules from a JSON object.
106pub fn extract_quality_from_obj(
107    obj: &serde_json::Map<String, JsonValue>,
108) -> Vec<HashMap<String, serde_json::Value>> {
109    let mut quality_rules = Vec::new();
110    if let Some(quality_val) = obj.get("quality") {
111        if let Some(arr) = quality_val.as_array() {
112            // Array of quality rules
113            for item in arr {
114                if let Some(rule_obj) = item.as_object() {
115                    let mut rule = HashMap::new();
116                    for (key, value) in rule_obj {
117                        rule.insert(key.clone(), json_value_to_serde_value(value));
118                    }
119                    quality_rules.push(rule);
120                }
121            }
122        } else if let Some(rule_obj) = quality_val.as_object() {
123            // Single quality rule object
124            let mut rule = HashMap::new();
125            for (key, value) in rule_obj {
126                rule.insert(key.clone(), json_value_to_serde_value(value));
127            }
128            quality_rules.push(rule);
129        }
130    }
131    quality_rules
132}
133
134/// Parse foreign key from JSON value.
135pub fn parse_foreign_key(fk_data: &JsonValue) -> Option<ForeignKey> {
136    let obj = fk_data.as_object()?;
137    Some(ForeignKey {
138        table_id: obj
139            .get("table_id")
140            .or_else(|| obj.get("table"))
141            .and_then(|v| v.as_str())
142            .unwrap_or("")
143            .to_string(),
144        column_name: obj
145            .get("column_name")
146            .or_else(|| obj.get("column"))
147            .and_then(|v| v.as_str())
148            .unwrap_or("")
149            .to_string(),
150    })
151}
152
153/// Parse foreign key from Data Contract field data.
154pub fn parse_foreign_key_from_data_contract(
155    field_data: &serde_json::Map<String, JsonValue>,
156) -> Option<ForeignKey> {
157    field_data
158        .get("foreignKey")
159        .and_then(|v| v.as_object())
160        .map(|fk_obj| ForeignKey {
161            table_id: fk_obj
162                .get("table")
163                .or_else(|| fk_obj.get("table_id"))
164                .and_then(|v| v.as_str())
165                .unwrap_or("")
166                .to_string(),
167            column_name: fk_obj
168                .get("column")
169                .or_else(|| fk_obj.get("column_name"))
170                .and_then(|v| v.as_str())
171                .unwrap_or("")
172                .to_string(),
173        })
174}
175
176/// Extract metadata from customProperties in ODCS/ODCL format.
177pub fn extract_metadata_from_custom_properties(
178    data: &JsonValue,
179) -> (
180    Vec<MedallionLayer>,
181    Option<SCDPattern>,
182    Option<DataVaultClassification>,
183    Vec<Tag>,
184) {
185    let mut medallion_layers = Vec::new();
186    let mut scd_pattern = None;
187    let mut data_vault_classification = None;
188    let mut tags: Vec<Tag> = Vec::new();
189
190    if let Some(custom_props) = data.get("customProperties").and_then(|v| v.as_array()) {
191        for prop in custom_props {
192            if let Some(prop_obj) = prop.as_object() {
193                let prop_key = prop_obj
194                    .get("property")
195                    .and_then(|v| v.as_str())
196                    .unwrap_or("");
197                let prop_value = prop_obj.get("value");
198
199                match prop_key {
200                    "medallionLayers" | "medallion_layers" => {
201                        if let Some(arr) = prop_value.and_then(|v| v.as_array()) {
202                            for item in arr {
203                                if let Some(s) = item.as_str()
204                                    && let Ok(layer) = parse_medallion_layer(s)
205                                {
206                                    medallion_layers.push(layer);
207                                }
208                            }
209                        } else if let Some(s) = prop_value.and_then(|v| v.as_str()) {
210                            // Comma-separated string
211                            for part in s.split(',') {
212                                if let Ok(layer) = parse_medallion_layer(part.trim()) {
213                                    medallion_layers.push(layer);
214                                }
215                            }
216                        }
217                    }
218                    "scdPattern" | "scd_pattern" => {
219                        if let Some(s) = prop_value.and_then(|v| v.as_str()) {
220                            scd_pattern = parse_scd_pattern(s).ok();
221                        }
222                    }
223                    "dataVaultClassification" | "data_vault_classification" => {
224                        if let Some(s) = prop_value.and_then(|v| v.as_str()) {
225                            data_vault_classification = parse_data_vault_classification(s).ok();
226                        }
227                    }
228                    "tags" => {
229                        if let Some(arr) = prop_value.and_then(|v| v.as_array()) {
230                            for item in arr {
231                                if let Some(s) = item.as_str() {
232                                    // Parse tag string to Tag enum
233                                    if let Ok(tag) = Tag::from_str(s) {
234                                        tags.push(tag);
235                                    } else {
236                                        tags.push(Tag::Simple(s.to_string()));
237                                    }
238                                }
239                            }
240                        } else if let Some(s) = prop_value.and_then(|v| v.as_str()) {
241                            // Comma-separated string
242                            for part in s.split(',') {
243                                let part = part.trim();
244                                if let Ok(tag) = Tag::from_str(part) {
245                                    tags.push(tag);
246                                } else {
247                                    tags.push(Tag::Simple(part.to_string()));
248                                }
249                            }
250                        }
251                    }
252                    "sharedDomains" | "shared_domains" => {
253                        // sharedDomains will be stored in metadata by the caller
254                        // This match is here for completeness but sharedDomains is handled separately
255                    }
256                    _ => {}
257                }
258            }
259        }
260    }
261
262    // Also extract tags from top-level tags field
263    if let Some(tags_arr) = data.get("tags").and_then(|v| v.as_array()) {
264        for item in tags_arr {
265            if let Some(s) = item.as_str() {
266                // Parse tag string to Tag enum
267                let tag = Tag::from_str(s).unwrap_or_else(|_| Tag::Simple(s.to_string()));
268                if !tags.contains(&tag) {
269                    tags.push(tag);
270                }
271            }
272        }
273    }
274
275    (
276        medallion_layers,
277        scd_pattern,
278        data_vault_classification,
279        tags,
280    )
281}
282
283/// Extract catalog and schema from customProperties.
284pub fn extract_catalog_schema(data: &JsonValue) -> (Option<String>, Option<String>) {
285    let mut catalog_name = None;
286    let mut schema_name = None;
287
288    if let Some(custom_props) = data.get("customProperties").and_then(|v| v.as_array()) {
289        for prop in custom_props {
290            if let Some(prop_obj) = prop.as_object() {
291                let prop_key = prop_obj
292                    .get("property")
293                    .and_then(|v| v.as_str())
294                    .unwrap_or("");
295                let prop_value = prop_obj.get("value").and_then(|v| v.as_str());
296
297                match prop_key {
298                    "catalogName" | "catalog_name" => {
299                        catalog_name = prop_value.map(|s| s.to_string());
300                    }
301                    "schemaName" | "schema_name" => {
302                        schema_name = prop_value.map(|s| s.to_string());
303                    }
304                    _ => {}
305                }
306            }
307        }
308    }
309
310    // Also check direct fields
311    if catalog_name.is_none() {
312        catalog_name = data
313            .get("catalog_name")
314            .and_then(|v| v.as_str())
315            .map(|s| s.to_string());
316    }
317    if schema_name.is_none() {
318        schema_name = data
319            .get("schema_name")
320            .and_then(|v| v.as_str())
321            .map(|s| s.to_string());
322    }
323
324    (catalog_name, schema_name)
325}
326
327/// Extract sharedDomains from customProperties.
328pub fn extract_shared_domains(data: &JsonValue) -> Vec<String> {
329    let mut shared_domains: Vec<String> = Vec::new();
330    if let Some(custom_props) = data.get("customProperties").and_then(|v| v.as_array()) {
331        for prop in custom_props {
332            if let Some(prop_obj) = prop.as_object() {
333                let prop_key = prop_obj
334                    .get("property")
335                    .and_then(|v| v.as_str())
336                    .unwrap_or("");
337                if (prop_key == "sharedDomains" || prop_key == "shared_domains")
338                    && let Some(arr) = prop_obj.get("value").and_then(|v| v.as_array())
339                {
340                    for item in arr {
341                        if let Some(s) = item.as_str() {
342                            shared_domains.push(s.to_string());
343                        }
344                    }
345                }
346            }
347        }
348    }
349    shared_domains
350}
351
352/// Resolve a $ref reference like '#/definitions/orderAction'.
353pub fn resolve_ref<'a>(ref_str: &str, data: &'a JsonValue) -> Option<&'a JsonValue> {
354    if !ref_str.starts_with("#/") {
355        return None;
356    }
357
358    // Remove the leading '#/'
359    let path = &ref_str[2..];
360    let parts: Vec<&str> = path.split('/').collect();
361
362    // Navigate through the data structure
363    let mut current = data;
364    for part in parts {
365        current = current.get(part)?;
366    }
367
368    if current.is_object() {
369        Some(current)
370    } else {
371        None
372    }
373}
374
375/// Expand a nested column from a schema definition, creating columns with dot notation.
376///
377/// This helper function recursively expands nested structures (OBJECT/STRUCT types)
378/// into flat columns with dot notation (e.g., "address.street", "address.city").
379#[allow(clippy::only_used_in_recursion)]
380pub fn expand_nested_column(
381    column_name: &str,
382    schema: &JsonValue,
383    nullable: bool,
384    columns: &mut Vec<Column>,
385    errors: &mut Vec<ParserError>,
386) {
387    let schema_obj = match schema.as_object() {
388        Some(obj) => obj,
389        None => {
390            errors.push(ParserError {
391                error_type: "parse_error".to_string(),
392                field: column_name.to_string(),
393                message: "Nested schema must be an object".to_string(),
394            });
395            return;
396        }
397    };
398
399    // Check both "logicalType" (ODCS v3.1.0) and "type" (legacy/ODCL) for backward compatibility
400    let schema_type_raw = schema_obj
401        .get("logicalType")
402        .and_then(|v| v.as_str())
403        .or_else(|| schema_obj.get("type").and_then(|v| v.as_str()))
404        .unwrap_or("object");
405
406    // Normalize legacy "type" values to "logicalType" equivalents
407    let schema_type = match schema_type_raw {
408        "object" | "struct" => "object",
409        "array" => "array",
410        "string" | "varchar" | "char" | "text" => "string",
411        "integer" | "int" | "bigint" | "smallint" | "tinyint" => "integer",
412        "number" | "decimal" | "double" | "float" | "numeric" => "number",
413        "boolean" | "bool" => "boolean",
414        "date" => "date",
415        "timestamp" | "datetime" => "timestamp",
416        "time" => "time",
417        _ => schema_type_raw,
418    };
419
420    match schema_type {
421        "object" | "struct" => {
422            // Check if it has nested properties - handle both object format (legacy/ODCL)
423            // and array format (ODCS v3.1.0)
424            let properties_obj = schema_obj.get("properties").and_then(|v| v.as_object());
425            let properties_arr = schema_obj.get("properties").and_then(|v| v.as_array());
426
427            if let Some(properties) = properties_obj {
428                // Object format (legacy/ODCL): properties is a map of name -> schema
429                let nested_required: Vec<String> = schema_obj
430                    .get("required")
431                    .and_then(|v| v.as_array())
432                    .map(|arr| {
433                        arr.iter()
434                            .filter_map(|v| v.as_str().map(|s| s.to_string()))
435                            .collect()
436                    })
437                    .unwrap_or_default();
438
439                for (nested_name, nested_schema) in properties {
440                    let nested_nullable = !nested_required.contains(nested_name);
441                    expand_nested_column(
442                        &format!("{}.{}", column_name, nested_name),
443                        nested_schema,
444                        nullable || nested_nullable,
445                        columns,
446                        errors,
447                    );
448                }
449            } else if let Some(properties_list) = properties_arr {
450                // Array format (ODCS v3.1.0): properties is an array with 'name' field
451                for prop_data in properties_list {
452                    if let Some(prop_obj) = prop_data.as_object() {
453                        // Extract name from property object (required in v3.1.0)
454                        let nested_name = prop_obj
455                            .get("name")
456                            .or_else(|| prop_obj.get("id"))
457                            .and_then(|v| v.as_str())
458                            .unwrap_or("");
459
460                        if !nested_name.is_empty() {
461                            let nested_nullable = !prop_obj
462                                .get("required")
463                                .and_then(|v| v.as_bool())
464                                .unwrap_or(false);
465
466                            expand_nested_column(
467                                &format!("{}.{}", column_name, nested_name),
468                                prop_data,
469                                nullable || nested_nullable,
470                                columns,
471                                errors,
472                            );
473                        }
474                    }
475                }
476            } else {
477                // Object without properties - create as OBJECT type
478                let physical_type = schema_obj
479                    .get("physicalType")
480                    .and_then(|v| v.as_str())
481                    .map(|s| s.to_string());
482                let description = schema_obj
483                    .get("description")
484                    .and_then(|v| v.as_str())
485                    .unwrap_or("")
486                    .to_string();
487                columns.push(Column {
488                    name: column_name.to_string(),
489                    data_type: "OBJECT".to_string(),
490                    physical_type,
491                    nullable,
492                    description,
493                    ..Default::default()
494                });
495            }
496        }
497        "array" => {
498            // Handle array types
499            let items = schema_obj.get("items").unwrap_or(schema);
500            // Check both "logicalType" (ODCS v3.1.0) and "type" (legacy) for backward compatibility
501            let items_obj = items.as_object();
502            let items_type_raw = items_obj
503                .and_then(|obj| {
504                    obj.get("logicalType")
505                        .and_then(|v| v.as_str())
506                        .or_else(|| obj.get("type").and_then(|v| v.as_str()))
507                })
508                .unwrap_or("string");
509
510            // Normalize legacy "type" values to "logicalType" equivalents for backward compatibility
511            let items_type = match items_type_raw {
512                "object" | "struct" => "object",
513                "array" => "array",
514                "string" | "varchar" | "char" | "text" => "string",
515                "integer" | "int" | "bigint" | "smallint" | "tinyint" => "integer",
516                "number" | "decimal" | "double" | "float" | "numeric" => "number",
517                "boolean" | "bool" => "boolean",
518                "date" => "date",
519                "timestamp" | "datetime" => "timestamp",
520                "time" => "time",
521                _ => items_type_raw,
522            };
523
524            if items_type == "object" {
525                // Array of objects - expand nested structure
526                let physical_type = schema_obj
527                    .get("physicalType")
528                    .and_then(|v| v.as_str())
529                    .map(|s| s.to_string());
530                let description = schema_obj
531                    .get("description")
532                    .and_then(|v| v.as_str())
533                    .unwrap_or("")
534                    .to_string();
535                columns.push(Column {
536                    name: column_name.to_string(),
537                    data_type: "ARRAY<OBJECT>".to_string(),
538                    physical_type,
539                    nullable,
540                    description,
541                    ..Default::default()
542                });
543                // Also expand nested properties with array prefix
544                // Handle both object format (legacy) and array format (ODCS v3.1.0)
545                let properties_obj = items
546                    .as_object()
547                    .and_then(|obj| obj.get("properties"))
548                    .and_then(|v| v.as_object());
549                let properties_arr = items
550                    .as_object()
551                    .and_then(|obj| obj.get("properties"))
552                    .and_then(|v| v.as_array());
553
554                if let Some(properties_map) = properties_obj {
555                    // Object format (legacy): properties is a map
556                    let nested_required: Vec<String> = items
557                        .as_object()
558                        .and_then(|obj| obj.get("required").and_then(|v| v.as_array()))
559                        .map(|arr| {
560                            arr.iter()
561                                .filter_map(|v| v.as_str().map(|s| s.to_string()))
562                                .collect()
563                        })
564                        .unwrap_or_default();
565
566                    for (nested_name, nested_schema) in properties_map {
567                        let nested_nullable = !nested_required.contains(nested_name);
568                        expand_nested_column(
569                            &format!("{}.[].{}", column_name, nested_name),
570                            nested_schema,
571                            nullable || nested_nullable,
572                            columns,
573                            errors,
574                        );
575                    }
576                } else if let Some(properties_list) = properties_arr {
577                    // Array format (ODCS v3.1.0): properties is an array with 'name' field
578                    for prop_data in properties_list {
579                        if let Some(prop_obj) = prop_data.as_object() {
580                            // Extract name from property object (required in v3.1.0)
581                            let nested_name = prop_obj
582                                .get("name")
583                                .or_else(|| prop_obj.get("id"))
584                                .and_then(|v| v.as_str())
585                                .unwrap_or("");
586
587                            if !nested_name.is_empty() {
588                                let nested_nullable = !prop_obj
589                                    .get("required")
590                                    .and_then(|v| v.as_bool())
591                                    .unwrap_or(false);
592
593                                expand_nested_column(
594                                    &format!("{}.[].{}", column_name, nested_name),
595                                    prop_data,
596                                    nullable || nested_nullable,
597                                    columns,
598                                    errors,
599                                );
600                            }
601                        }
602                    }
603                }
604            } else {
605                // Array of primitives
606                let data_type = format!("ARRAY<{}>", items_type.to_uppercase());
607                // Extract physicalType (ODCS v3.1.0) - the actual database type
608                let physical_type = schema_obj
609                    .get("physicalType")
610                    .and_then(|v| v.as_str())
611                    .map(|s| s.to_string());
612                let description = schema_obj
613                    .get("description")
614                    .and_then(|v| v.as_str())
615                    .unwrap_or("")
616                    .to_string();
617                columns.push(Column {
618                    name: column_name.to_string(),
619                    data_type,
620                    physical_type,
621                    nullable,
622                    description,
623                    ..Default::default()
624                });
625            }
626        }
627        _ => {
628            // Simple type
629            let data_type = schema_type.to_uppercase();
630            // Extract physicalType (ODCS v3.1.0) - the actual database type
631            let physical_type = schema_obj
632                .get("physicalType")
633                .and_then(|v| v.as_str())
634                .map(|s| s.to_string());
635            let description = schema_obj
636                .get("description")
637                .and_then(|v| v.as_str())
638                .unwrap_or("")
639                .to_string();
640            let enum_values = schema_obj
641                .get("enum")
642                .and_then(|v| v.as_array())
643                .map(|arr| {
644                    arr.iter()
645                        .filter_map(|v| v.as_str().map(|s| s.to_string()))
646                        .collect()
647                })
648                .unwrap_or_default();
649            columns.push(Column {
650                name: column_name.to_string(),
651                data_type,
652                physical_type,
653                nullable,
654                description,
655                enum_values,
656                ..Default::default()
657            });
658        }
659    }
660}
661
662/// Parse STRUCT fields from string (e.g., "ID: STRING, NAME: STRING").
663pub fn parse_struct_fields_from_string(fields_str: &str) -> Result<Vec<(String, String)>> {
664    let mut fields = Vec::new();
665    let mut current_field = String::new();
666    let mut depth = 0;
667    let mut in_string = false;
668    let mut string_char = None;
669
670    for ch in fields_str.chars() {
671        match ch {
672            '\'' | '"' if !in_string || Some(ch) == string_char => {
673                if in_string {
674                    in_string = false;
675                    string_char = None;
676                } else {
677                    in_string = true;
678                    string_char = Some(ch);
679                }
680                current_field.push(ch);
681            }
682            '<' if !in_string => {
683                depth += 1;
684                current_field.push(ch);
685            }
686            '>' if !in_string => {
687                depth -= 1;
688                current_field.push(ch);
689            }
690            ',' if !in_string && depth == 0 => {
691                // End of current field
692                let trimmed = current_field.trim();
693                if !trimmed.is_empty()
694                    && let Some((name, type_part)) = parse_field_definition(trimmed)
695                {
696                    fields.push((name, type_part));
697                }
698                current_field.clear();
699            }
700            _ => {
701                current_field.push(ch);
702            }
703        }
704    }
705
706    // Handle last field
707    let trimmed = current_field.trim();
708    if !trimmed.is_empty()
709        && let Some((name, type_part)) = parse_field_definition(trimmed)
710    {
711        fields.push((name, type_part));
712    }
713
714    Ok(fields)
715}
716
717/// Parse a single field definition (e.g., "ID: STRING" or "DETAILS: STRUCT<...>").
718pub fn parse_field_definition(field_def: &str) -> Option<(String, String)> {
719    // Split by colon, but handle nested STRUCTs
720    let colon_pos = field_def.find(':')?;
721    let name = field_def[..colon_pos].trim().to_string();
722    let type_part = field_def[colon_pos + 1..].trim().to_string();
723
724    if name.is_empty() || type_part.is_empty() {
725        return None;
726    }
727
728    Some((name, type_part))
729}
730
731/// Convert a Column to ColumnData, preserving all ODCS v3.1.0 fields.
732/// This is used when importers create Column objects internally and need to
733/// return ColumnData in the ImportResult.
734pub fn column_to_column_data(c: &Column) -> super::ColumnData {
735    super::ColumnData {
736        // Core Identity
737        id: c.id.clone(),
738        name: c.name.clone(),
739        business_name: c.business_name.clone(),
740        description: if c.description.is_empty() {
741            None
742        } else {
743            Some(c.description.clone())
744        },
745        // Type Information
746        data_type: c.data_type.clone(),
747        physical_type: c.physical_type.clone(),
748        physical_name: c.physical_name.clone(),
749        logical_type_options: c.logical_type_options.clone(),
750        // Key Constraints
751        primary_key: c.primary_key,
752        primary_key_position: c.primary_key_position,
753        unique: c.unique,
754        nullable: c.nullable,
755        // Partitioning & Clustering
756        partitioned: c.partitioned,
757        partition_key_position: c.partition_key_position,
758        clustered: c.clustered,
759        // Data Classification & Security
760        classification: c.classification.clone(),
761        critical_data_element: c.critical_data_element,
762        encrypted_name: c.encrypted_name.clone(),
763        // Transformation Metadata
764        transform_source_objects: c.transform_source_objects.clone(),
765        transform_logic: c.transform_logic.clone(),
766        transform_description: c.transform_description.clone(),
767        // Examples & Documentation
768        examples: c.examples.clone(),
769        default_value: c.default_value.clone(),
770        // Relationships & References
771        relationships: c.relationships.clone(),
772        authoritative_definitions: c.authoritative_definitions.clone(),
773        // Quality & Validation
774        quality: if c.quality.is_empty() {
775            None
776        } else {
777            Some(c.quality.clone())
778        },
779        enum_values: if c.enum_values.is_empty() {
780            None
781        } else {
782            Some(c.enum_values.clone())
783        },
784        // Tags & Custom Properties
785        tags: c.tags.clone(),
786        custom_properties: c.custom_properties.clone(),
787    }
788}
789
790#[cfg(test)]
791mod tests {
792    use super::*;
793
794    #[test]
795    fn test_normalize_data_type() {
796        assert_eq!(normalize_data_type("string"), "STRING");
797        assert_eq!(normalize_data_type("int"), "INT");
798        assert_eq!(normalize_data_type("STRUCT<a: INT>"), "STRUCT<a: INT>");
799        assert_eq!(normalize_data_type("array<string>"), "ARRAY<string>");
800        assert_eq!(normalize_data_type("MAP<string, int>"), "MAP<string, int>");
801    }
802
803    #[test]
804    fn test_parse_medallion_layer() {
805        assert!(matches!(
806            parse_medallion_layer("bronze").unwrap(),
807            MedallionLayer::Bronze
808        ));
809        assert!(matches!(
810            parse_medallion_layer("SILVER").unwrap(),
811            MedallionLayer::Silver
812        ));
813        assert!(matches!(
814            parse_medallion_layer("Gold").unwrap(),
815            MedallionLayer::Gold
816        ));
817        assert!(parse_medallion_layer("invalid").is_err());
818    }
819
820    #[test]
821    fn test_parse_scd_pattern() {
822        assert!(matches!(
823            parse_scd_pattern("TYPE_1").unwrap(),
824            SCDPattern::Type1
825        ));
826        assert!(matches!(
827            parse_scd_pattern("type2").unwrap(),
828            SCDPattern::Type2
829        ));
830        assert!(parse_scd_pattern("invalid").is_err());
831    }
832
833    #[test]
834    fn test_parse_data_vault_classification() {
835        assert!(matches!(
836            parse_data_vault_classification("hub").unwrap(),
837            DataVaultClassification::Hub
838        ));
839        assert!(matches!(
840            parse_data_vault_classification("LINK").unwrap(),
841            DataVaultClassification::Link
842        ));
843        assert!(matches!(
844            parse_data_vault_classification("sat").unwrap(),
845            DataVaultClassification::Satellite
846        ));
847        assert!(parse_data_vault_classification("invalid").is_err());
848    }
849
850    #[test]
851    fn test_parse_field_definition() {
852        let result = parse_field_definition("name: STRING");
853        assert!(result.is_some());
854        let (name, type_part) = result.unwrap();
855        assert_eq!(name, "name");
856        assert_eq!(type_part, "STRING");
857
858        let result = parse_field_definition("nested: STRUCT<a: INT, b: STRING>");
859        assert!(result.is_some());
860        let (name, type_part) = result.unwrap();
861        assert_eq!(name, "nested");
862        assert_eq!(type_part, "STRUCT<a: INT, b: STRING>");
863    }
864
865    #[test]
866    fn test_parse_struct_fields_from_string() {
867        let fields = parse_struct_fields_from_string("id: INT, name: STRING").unwrap();
868        assert_eq!(fields.len(), 2);
869        assert_eq!(fields[0], ("id".to_string(), "INT".to_string()));
870        assert_eq!(fields[1], ("name".to_string(), "STRING".to_string()));
871
872        let fields = parse_struct_fields_from_string(
873            "id: INT, nested: STRUCT<a: INT, b: STRING>, name: STRING",
874        )
875        .unwrap();
876        assert_eq!(fields.len(), 3);
877        assert_eq!(fields[0], ("id".to_string(), "INT".to_string()));
878        assert_eq!(
879            fields[1],
880            (
881                "nested".to_string(),
882                "STRUCT<a: INT, b: STRING>".to_string()
883            )
884        );
885        assert_eq!(fields[2], ("name".to_string(), "STRING".to_string()));
886    }
887}