data_modelling_core/models/
table.rs

1//! Table model for the SDK
2
3use super::column::Column;
4use super::enums::{
5    DataVaultClassification, DatabaseType, InfrastructureType, MedallionLayer, ModelingLevel,
6    SCDPattern,
7};
8use super::tag::Tag;
9use chrono::{DateTime, Utc};
10use serde::{Deserialize, Deserializer, Serialize};
11use serde_json;
12use std::collections::HashMap;
13use std::str::FromStr;
14use uuid::Uuid;
15
16/// Deserialize tags with backward compatibility (supports Vec<String> and Vec<Tag>)
17fn deserialize_tags<'de, D>(deserializer: D) -> Result<Vec<Tag>, D::Error>
18where
19    D: Deserializer<'de>,
20{
21    // Accept either Vec<String> (backward compatibility) or Vec<Tag>
22    struct TagVisitor;
23
24    impl<'de> serde::de::Visitor<'de> for TagVisitor {
25        type Value = Vec<Tag>;
26
27        fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
28            formatter.write_str("a vector of tags (strings or Tag objects)")
29        }
30
31        fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
32        where
33            A: serde::de::SeqAccess<'de>,
34        {
35            let mut tags = Vec::new();
36            while let Some(item) = seq.next_element::<serde_json::Value>()? {
37                match item {
38                    serde_json::Value::String(s) => {
39                        // Backward compatibility: parse string as Tag
40                        if let Ok(tag) = Tag::from_str(&s) {
41                            tags.push(tag);
42                        }
43                    }
44                    _ => {
45                        // Try to deserialize as Tag directly (if it's a string in JSON)
46                        if let serde_json::Value::String(s) = item
47                            && let Ok(tag) = Tag::from_str(&s)
48                        {
49                            tags.push(tag);
50                        }
51                    }
52                }
53            }
54            Ok(tags)
55        }
56    }
57
58    deserializer.deserialize_seq(TagVisitor)
59}
60
61/// Position coordinates for table placement on canvas
62#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
63pub struct Position {
64    /// X coordinate
65    pub x: f64,
66    /// Y coordinate
67    pub y: f64,
68}
69
70/// SLA (Service Level Agreement) property following ODCS-inspired structure
71///
72/// Represents a single SLA property for Data Flow nodes and relationships.
73/// Uses a lightweight format inspired by ODCS servicelevels but separate from ODCS.
74#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
75#[serde(rename_all = "camelCase")]
76pub struct SlaProperty {
77    /// SLA attribute name (e.g., "latency", "availability", "throughput")
78    pub property: String,
79    /// Metric value (flexible type to support numbers, strings, etc.)
80    pub value: serde_json::Value,
81    /// Measurement unit (e.g., "hours", "percent", "requests_per_second")
82    pub unit: String,
83    /// Optional: Data elements this SLA applies to
84    #[serde(skip_serializing_if = "Option::is_none")]
85    pub element: Option<String>,
86    /// Optional: Importance driver (e.g., "regulatory", "analytics", "operational")
87    #[serde(skip_serializing_if = "Option::is_none")]
88    pub driver: Option<String>,
89    /// Optional: Description of the SLA
90    #[serde(skip_serializing_if = "Option::is_none")]
91    pub description: Option<String>,
92    /// Optional: Scheduler type for monitoring
93    #[serde(skip_serializing_if = "Option::is_none")]
94    pub scheduler: Option<String>,
95    /// Optional: Schedule expression (e.g., cron format)
96    #[serde(skip_serializing_if = "Option::is_none")]
97    pub schedule: Option<String>,
98}
99
100/// Contact details for Data Flow node/relationship owners/responsible parties
101///
102/// Structured contact information for operational and governance purposes.
103#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
104#[serde(rename_all = "camelCase")]
105pub struct ContactDetails {
106    /// Email address
107    #[serde(skip_serializing_if = "Option::is_none")]
108    pub email: Option<String>,
109    /// Phone number
110    #[serde(skip_serializing_if = "Option::is_none")]
111    pub phone: Option<String>,
112    /// Contact name
113    #[serde(skip_serializing_if = "Option::is_none")]
114    pub name: Option<String>,
115    /// Role or title
116    #[serde(skip_serializing_if = "Option::is_none")]
117    pub role: Option<String>,
118    /// Other contact methods or additional information
119    #[serde(skip_serializing_if = "Option::is_none")]
120    pub other: Option<String>,
121}
122
123/// Table model representing a database table or data contract
124///
125/// A table represents a structured data entity with columns, metadata, and relationships.
126/// Tables can be imported from various formats (SQL, ODCS, JSON Schema, etc.) and exported
127/// to multiple formats.
128///
129/// # Example
130///
131/// ```rust
132/// use data_modelling_core::models::{Table, Column};
133///
134/// let table = Table::new(
135///     "users".to_string(),
136///     vec![
137///         Column::new("id".to_string(), "INT".to_string()),
138///         Column::new("name".to_string(), "VARCHAR(100)".to_string()),
139///     ],
140/// );
141/// ```
142///
143/// # Example with Metadata (Data Flow Node)
144///
145/// ```rust
146/// use data_modelling_core::models::{Table, Column, InfrastructureType, ContactDetails, SlaProperty};
147/// use serde_json::json;
148///
149/// let mut table = Table::new(
150///     "user_events".to_string(),
151///     vec![Column::new("id".to_string(), "UUID".to_string())],
152/// );
153/// table.owner = Some("Data Engineering Team".to_string());
154/// table.infrastructure_type = Some(InfrastructureType::Kafka);
155/// table.contact_details = Some(ContactDetails {
156///     email: Some("team@example.com".to_string()),
157///     phone: None,
158///     name: Some("Data Team".to_string()),
159///     role: Some("Data Owner".to_string()),
160///     other: None,
161/// });
162/// table.sla = Some(vec![SlaProperty {
163///     property: "latency".to_string(),
164///     value: json!(4),
165///     unit: "hours".to_string(),
166///     description: Some("Data must be available within 4 hours".to_string()),
167///     element: None,
168///     driver: Some("operational".to_string()),
169///     scheduler: None,
170///     schedule: None,
171/// }]);
172/// table.notes = Some("User interaction events from web application".to_string());
173/// ```
174#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
175#[serde(rename_all = "camelCase")]
176pub struct Table {
177    /// Unique identifier for the table (UUIDv4)
178    pub id: Uuid,
179    /// Table name (must be unique within database_type/catalog/schema scope)
180    pub name: String,
181    /// List of columns in the table
182    pub columns: Vec<Column>,
183    /// Database type (PostgreSQL, MySQL, etc.) if applicable
184    #[serde(skip_serializing_if = "Option::is_none", alias = "database_type")]
185    pub database_type: Option<DatabaseType>,
186    /// Catalog name (database name in some systems)
187    #[serde(skip_serializing_if = "Option::is_none", alias = "catalog_name")]
188    pub catalog_name: Option<String>,
189    /// Schema name (namespace within catalog)
190    #[serde(skip_serializing_if = "Option::is_none", alias = "schema_name")]
191    pub schema_name: Option<String>,
192    /// Medallion architecture layers (Bronze, Silver, Gold)
193    #[serde(default, alias = "medallion_layers")]
194    pub medallion_layers: Vec<MedallionLayer>,
195    /// Slowly Changing Dimension pattern (Type 1, Type 2, etc.)
196    #[serde(skip_serializing_if = "Option::is_none", alias = "scd_pattern")]
197    pub scd_pattern: Option<SCDPattern>,
198    /// Data Vault classification (Hub, Link, Satellite)
199    #[serde(
200        skip_serializing_if = "Option::is_none",
201        alias = "data_vault_classification"
202    )]
203    pub data_vault_classification: Option<DataVaultClassification>,
204    /// Modeling level (Conceptual, Logical, Physical)
205    #[serde(skip_serializing_if = "Option::is_none", alias = "modeling_level")]
206    pub modeling_level: Option<ModelingLevel>,
207    /// Tags for categorization and filtering (supports Simple, Pair, and List formats)
208    #[serde(default, deserialize_with = "deserialize_tags")]
209    pub tags: Vec<Tag>,
210    /// ODCL/ODCS metadata (legacy format support)
211    #[serde(default, alias = "odcl_metadata")]
212    pub odcl_metadata: HashMap<String, serde_json::Value>,
213    /// Owner information (person, team, or organization name) for Data Flow nodes
214    #[serde(skip_serializing_if = "Option::is_none")]
215    pub owner: Option<String>,
216    /// SLA (Service Level Agreement) information (ODCS-inspired but lightweight format)
217    #[serde(skip_serializing_if = "Option::is_none")]
218    pub sla: Option<Vec<SlaProperty>>,
219    /// Contact details for responsible parties
220    #[serde(skip_serializing_if = "Option::is_none", alias = "contact_details")]
221    pub contact_details: Option<ContactDetails>,
222    /// Infrastructure type (hosting platform, service, or tool) for Data Flow nodes
223    #[serde(skip_serializing_if = "Option::is_none", alias = "infrastructure_type")]
224    pub infrastructure_type: Option<InfrastructureType>,
225    /// Additional notes and context for Data Flow nodes
226    #[serde(skip_serializing_if = "Option::is_none")]
227    pub notes: Option<String>,
228    /// Canvas position for visual representation
229    #[serde(skip_serializing_if = "Option::is_none")]
230    pub position: Option<Position>,
231    /// Path to YAML file if loaded from file system
232    #[serde(skip_serializing_if = "Option::is_none", alias = "yaml_file_path")]
233    pub yaml_file_path: Option<String>,
234    /// Draw.io cell ID for diagram integration
235    #[serde(skip_serializing_if = "Option::is_none", alias = "drawio_cell_id")]
236    pub drawio_cell_id: Option<String>,
237    /// Quality rules and checks
238    #[serde(default)]
239    pub quality: Vec<HashMap<String, serde_json::Value>>,
240    /// Validation errors and warnings
241    #[serde(default)]
242    pub errors: Vec<HashMap<String, serde_json::Value>>,
243    /// Creation timestamp
244    #[serde(alias = "created_at")]
245    pub created_at: DateTime<Utc>,
246    /// Last update timestamp
247    #[serde(alias = "updated_at")]
248    pub updated_at: DateTime<Utc>,
249}
250
251impl Table {
252    /// Create a new table with the given name and columns
253    ///
254    /// # Arguments
255    ///
256    /// * `name` - The table name (must be valid according to naming conventions)
257    /// * `columns` - Vector of columns for the table
258    ///
259    /// # Returns
260    ///
261    /// A new `Table` instance with a generated UUIDv4 ID and current timestamps.
262    ///
263    /// # Example
264    ///
265    /// ```rust
266    /// use data_modelling_core::models::{Table, Column};
267    ///
268    /// let table = Table::new(
269    ///     "users".to_string(),
270    ///     vec![Column::new("id".to_string(), "INT".to_string())],
271    /// );
272    /// ```
273    pub fn new(name: String, columns: Vec<Column>) -> Self {
274        let now = Utc::now();
275        // UUIDv4 everywhere (do not derive ids from natural keys like name).
276        let id = Self::generate_id(&name, None, None, None);
277        Self {
278            id,
279            name,
280            columns,
281            database_type: None,
282            catalog_name: None,
283            schema_name: None,
284            medallion_layers: Vec::new(),
285            scd_pattern: None,
286            data_vault_classification: None,
287            modeling_level: None,
288            tags: Vec::new(),
289            odcl_metadata: HashMap::new(),
290            owner: None,
291            sla: None,
292            contact_details: None,
293            infrastructure_type: None,
294            notes: None,
295            position: None,
296            yaml_file_path: None,
297            drawio_cell_id: None,
298            quality: Vec::new(),
299            errors: Vec::new(),
300            created_at: now,
301            updated_at: now,
302        }
303    }
304
305    /// Get the unique key tuple for this table
306    ///
307    /// Returns a tuple of (database_type, name, catalog_name, schema_name) that uniquely
308    /// identifies this table within its scope. Used for detecting naming conflicts.
309    ///
310    /// # Returns
311    ///
312    /// A tuple containing the database type (as string), name, catalog name, and schema name.
313    pub fn get_unique_key(&self) -> (Option<String>, String, Option<String>, Option<String>) {
314        (
315            self.database_type.as_ref().map(|dt| format!("{:?}", dt)),
316            self.name.clone(),
317            self.catalog_name.clone(),
318            self.schema_name.clone(),
319        )
320    }
321
322    /// Generate a UUIDv4 for a new table id.
323    ///
324    /// Note: params are retained for backward-compatibility with previous deterministic-v5 API.
325    pub fn generate_id(
326        _name: &str,
327        _database_type: Option<&DatabaseType>,
328        _catalog_name: Option<&str>,
329        _schema_name: Option<&str>,
330    ) -> Uuid {
331        Uuid::new_v4()
332    }
333
334    /// Create a Table from imported TableData.
335    ///
336    /// Converts the import format (TableData) to the internal Table model.
337    /// This is used when exporting ODCS YAML directly to PDF/Markdown.
338    ///
339    /// # Arguments
340    ///
341    /// * `table_data` - The imported table data from ODCS parser
342    ///
343    /// # Returns
344    ///
345    /// A new Table instance populated from the import data
346    pub fn from_table_data(table_data: &crate::import::TableData) -> Self {
347        use crate::models::Column;
348
349        let table_name = table_data
350            .name
351            .clone()
352            .unwrap_or_else(|| format!("table_{}", table_data.table_index));
353
354        let columns: Vec<Column> = table_data
355            .columns
356            .iter()
357            .map(|col_data| Column {
358                id: col_data.id.clone(),
359                name: col_data.name.clone(),
360                business_name: col_data.business_name.clone(),
361                description: col_data.description.clone().unwrap_or_default(),
362                data_type: col_data.data_type.clone(),
363                physical_type: col_data.physical_type.clone(),
364                physical_name: col_data.physical_name.clone(),
365                logical_type_options: col_data.logical_type_options.clone(),
366                primary_key: col_data.primary_key,
367                primary_key_position: col_data.primary_key_position,
368                unique: col_data.unique,
369                nullable: col_data.nullable,
370                partitioned: col_data.partitioned,
371                partition_key_position: col_data.partition_key_position,
372                clustered: col_data.clustered,
373                classification: col_data.classification.clone(),
374                critical_data_element: col_data.critical_data_element,
375                encrypted_name: col_data.encrypted_name.clone(),
376                transform_source_objects: col_data.transform_source_objects.clone(),
377                transform_logic: col_data.transform_logic.clone(),
378                transform_description: col_data.transform_description.clone(),
379                examples: col_data.examples.clone(),
380                default_value: col_data.default_value.clone(),
381                relationships: col_data.relationships.clone(),
382                authoritative_definitions: col_data.authoritative_definitions.clone(),
383                quality: col_data.quality.clone().unwrap_or_default(),
384                enum_values: col_data.enum_values.clone().unwrap_or_default(),
385                tags: col_data.tags.clone(),
386                custom_properties: col_data.custom_properties.clone(),
387                ..Default::default()
388            })
389            .collect();
390
391        let mut table = Self::new(table_name, columns);
392
393        // Preserve ODCS metadata
394        if let Some(ref domain) = table_data.domain {
395            table
396                .odcl_metadata
397                .insert("domain".to_string(), serde_json::json!(domain));
398        }
399        if let Some(ref version) = table_data.version {
400            table
401                .odcl_metadata
402                .insert("version".to_string(), serde_json::json!(version));
403        }
404        if let Some(ref status) = table_data.status {
405            table
406                .odcl_metadata
407                .insert("status".to_string(), serde_json::json!(status));
408        }
409        if let Some(ref description) = table_data.description {
410            table
411                .odcl_metadata
412                .insert("description".to_string(), serde_json::json!(description));
413        }
414        if let Some(ref team) = table_data.team {
415            table.odcl_metadata.insert(
416                "team".to_string(),
417                serde_json::to_value(team).unwrap_or_default(),
418            );
419        }
420        if let Some(ref support) = table_data.support {
421            table.odcl_metadata.insert(
422                "support".to_string(),
423                serde_json::to_value(support).unwrap_or_default(),
424            );
425        }
426
427        table
428    }
429}