data_modelling_sdk/models/table.rs
1//! Table model for the SDK
2
3use super::column::Column;
4use super::enums::{
5 DataVaultClassification, DatabaseType, InfrastructureType, MedallionLayer, ModelingLevel,
6 SCDPattern,
7};
8use chrono::{DateTime, Utc};
9use serde::{Deserialize, Serialize};
10use serde_json;
11use std::collections::HashMap;
12use uuid::Uuid;
13
14/// Position coordinates for table placement on canvas
15#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
16pub struct Position {
17 /// X coordinate
18 pub x: f64,
19 /// Y coordinate
20 pub y: f64,
21}
22
23/// SLA (Service Level Agreement) property following ODCS-inspired structure
24///
25/// Represents a single SLA property for Data Flow nodes and relationships.
26/// Uses a lightweight format inspired by ODCS servicelevels but separate from ODCS.
27#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
28pub struct SlaProperty {
29 /// SLA attribute name (e.g., "latency", "availability", "throughput")
30 pub property: String,
31 /// Metric value (flexible type to support numbers, strings, etc.)
32 pub value: serde_json::Value,
33 /// Measurement unit (e.g., "hours", "percent", "requests_per_second")
34 pub unit: String,
35 /// Optional: Data elements this SLA applies to
36 #[serde(skip_serializing_if = "Option::is_none")]
37 pub element: Option<String>,
38 /// Optional: Importance driver (e.g., "regulatory", "analytics", "operational")
39 #[serde(skip_serializing_if = "Option::is_none")]
40 pub driver: Option<String>,
41 /// Optional: Description of the SLA
42 #[serde(skip_serializing_if = "Option::is_none")]
43 pub description: Option<String>,
44 /// Optional: Scheduler type for monitoring
45 #[serde(skip_serializing_if = "Option::is_none")]
46 pub scheduler: Option<String>,
47 /// Optional: Schedule expression (e.g., cron format)
48 #[serde(skip_serializing_if = "Option::is_none")]
49 pub schedule: Option<String>,
50}
51
52/// Contact details for Data Flow node/relationship owners/responsible parties
53///
54/// Structured contact information for operational and governance purposes.
55#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
56pub struct ContactDetails {
57 /// Email address
58 #[serde(skip_serializing_if = "Option::is_none")]
59 pub email: Option<String>,
60 /// Phone number
61 #[serde(skip_serializing_if = "Option::is_none")]
62 pub phone: Option<String>,
63 /// Contact name
64 #[serde(skip_serializing_if = "Option::is_none")]
65 pub name: Option<String>,
66 /// Role or title
67 #[serde(skip_serializing_if = "Option::is_none")]
68 pub role: Option<String>,
69 /// Other contact methods or additional information
70 #[serde(skip_serializing_if = "Option::is_none")]
71 pub other: Option<String>,
72}
73
74/// Table model representing a database table or data contract
75///
76/// A table represents a structured data entity with columns, metadata, and relationships.
77/// Tables can be imported from various formats (SQL, ODCS, JSON Schema, etc.) and exported
78/// to multiple formats.
79///
80/// # Example
81///
82/// ```rust
83/// use data_modelling_sdk::models::{Table, Column};
84///
85/// let table = Table::new(
86/// "users".to_string(),
87/// vec![
88/// Column::new("id".to_string(), "INT".to_string()),
89/// Column::new("name".to_string(), "VARCHAR(100)".to_string()),
90/// ],
91/// );
92/// ```
93///
94/// # Example with Metadata (Data Flow Node)
95///
96/// ```rust
97/// use data_modelling_sdk::models::{Table, Column, InfrastructureType, ContactDetails, SlaProperty};
98/// use serde_json::json;
99///
100/// let mut table = Table::new(
101/// "user_events".to_string(),
102/// vec![Column::new("id".to_string(), "UUID".to_string())],
103/// );
104/// table.owner = Some("Data Engineering Team".to_string());
105/// table.infrastructure_type = Some(InfrastructureType::Kafka);
106/// table.contact_details = Some(ContactDetails {
107/// email: Some("team@example.com".to_string()),
108/// phone: None,
109/// name: Some("Data Team".to_string()),
110/// role: Some("Data Owner".to_string()),
111/// other: None,
112/// });
113/// table.sla = Some(vec![SlaProperty {
114/// property: "latency".to_string(),
115/// value: json!(4),
116/// unit: "hours".to_string(),
117/// description: Some("Data must be available within 4 hours".to_string()),
118/// element: None,
119/// driver: Some("operational".to_string()),
120/// scheduler: None,
121/// schedule: None,
122/// }]);
123/// table.notes = Some("User interaction events from web application".to_string());
124/// ```
125#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
126pub struct Table {
127 /// Unique identifier for the table (UUIDv4)
128 pub id: Uuid,
129 /// Table name (must be unique within database_type/catalog/schema scope)
130 pub name: String,
131 /// List of columns in the table
132 pub columns: Vec<Column>,
133 /// Database type (PostgreSQL, MySQL, etc.) if applicable
134 #[serde(skip_serializing_if = "Option::is_none")]
135 pub database_type: Option<DatabaseType>,
136 /// Catalog name (database name in some systems)
137 #[serde(skip_serializing_if = "Option::is_none")]
138 pub catalog_name: Option<String>,
139 /// Schema name (namespace within catalog)
140 #[serde(skip_serializing_if = "Option::is_none")]
141 pub schema_name: Option<String>,
142 /// Medallion architecture layers (Bronze, Silver, Gold)
143 #[serde(default)]
144 pub medallion_layers: Vec<MedallionLayer>,
145 /// Slowly Changing Dimension pattern (Type 1, Type 2, etc.)
146 #[serde(skip_serializing_if = "Option::is_none")]
147 pub scd_pattern: Option<SCDPattern>,
148 /// Data Vault classification (Hub, Link, Satellite)
149 #[serde(skip_serializing_if = "Option::is_none")]
150 pub data_vault_classification: Option<DataVaultClassification>,
151 /// Modeling level (Conceptual, Logical, Physical)
152 #[serde(skip_serializing_if = "Option::is_none")]
153 pub modeling_level: Option<ModelingLevel>,
154 /// Tags for categorization and filtering
155 #[serde(default)]
156 pub tags: Vec<String>,
157 /// ODCL/ODCS metadata (legacy format support)
158 #[serde(default)]
159 pub odcl_metadata: HashMap<String, serde_json::Value>,
160 /// Owner information (person, team, or organization name) for Data Flow nodes
161 #[serde(skip_serializing_if = "Option::is_none")]
162 pub owner: Option<String>,
163 /// SLA (Service Level Agreement) information (ODCS-inspired but lightweight format)
164 #[serde(skip_serializing_if = "Option::is_none")]
165 pub sla: Option<Vec<SlaProperty>>,
166 /// Contact details for responsible parties
167 #[serde(skip_serializing_if = "Option::is_none")]
168 pub contact_details: Option<ContactDetails>,
169 /// Infrastructure type (hosting platform, service, or tool) for Data Flow nodes
170 #[serde(skip_serializing_if = "Option::is_none")]
171 pub infrastructure_type: Option<InfrastructureType>,
172 /// Additional notes and context for Data Flow nodes
173 #[serde(skip_serializing_if = "Option::is_none")]
174 pub notes: Option<String>,
175 /// Canvas position for visual representation
176 #[serde(skip_serializing_if = "Option::is_none")]
177 pub position: Option<Position>,
178 /// Path to YAML file if loaded from file system
179 #[serde(skip_serializing_if = "Option::is_none")]
180 pub yaml_file_path: Option<String>,
181 /// Draw.io cell ID for diagram integration
182 #[serde(skip_serializing_if = "Option::is_none")]
183 pub drawio_cell_id: Option<String>,
184 /// Quality rules and checks
185 #[serde(default)]
186 pub quality: Vec<HashMap<String, serde_json::Value>>,
187 /// Validation errors and warnings
188 #[serde(default)]
189 pub errors: Vec<HashMap<String, serde_json::Value>>,
190 /// Creation timestamp
191 pub created_at: DateTime<Utc>,
192 /// Last update timestamp
193 pub updated_at: DateTime<Utc>,
194}
195
196impl Table {
197 /// Create a new table with the given name and columns
198 ///
199 /// # Arguments
200 ///
201 /// * `name` - The table name (must be valid according to naming conventions)
202 /// * `columns` - Vector of columns for the table
203 ///
204 /// # Returns
205 ///
206 /// A new `Table` instance with a generated UUIDv4 ID and current timestamps.
207 ///
208 /// # Example
209 ///
210 /// ```rust
211 /// use data_modelling_sdk::models::{Table, Column};
212 ///
213 /// let table = Table::new(
214 /// "users".to_string(),
215 /// vec![Column::new("id".to_string(), "INT".to_string())],
216 /// );
217 /// ```
218 pub fn new(name: String, columns: Vec<Column>) -> Self {
219 let now = Utc::now();
220 // UUIDv4 everywhere (do not derive ids from natural keys like name).
221 let id = Self::generate_id(&name, None, None, None);
222 Self {
223 id,
224 name,
225 columns,
226 database_type: None,
227 catalog_name: None,
228 schema_name: None,
229 medallion_layers: Vec::new(),
230 scd_pattern: None,
231 data_vault_classification: None,
232 modeling_level: None,
233 tags: Vec::new(),
234 odcl_metadata: HashMap::new(),
235 owner: None,
236 sla: None,
237 contact_details: None,
238 infrastructure_type: None,
239 notes: None,
240 position: None,
241 yaml_file_path: None,
242 drawio_cell_id: None,
243 quality: Vec::new(),
244 errors: Vec::new(),
245 created_at: now,
246 updated_at: now,
247 }
248 }
249
250 /// Get the unique key tuple for this table
251 ///
252 /// Returns a tuple of (database_type, name, catalog_name, schema_name) that uniquely
253 /// identifies this table within its scope. Used for detecting naming conflicts.
254 ///
255 /// # Returns
256 ///
257 /// A tuple containing the database type (as string), name, catalog name, and schema name.
258 pub fn get_unique_key(&self) -> (Option<String>, String, Option<String>, Option<String>) {
259 (
260 self.database_type.as_ref().map(|dt| format!("{:?}", dt)),
261 self.name.clone(),
262 self.catalog_name.clone(),
263 self.schema_name.clone(),
264 )
265 }
266
267 /// Generate a UUIDv4 for a new table id.
268 ///
269 /// Note: params are retained for backward-compatibility with previous deterministic-v5 API.
270 pub fn generate_id(
271 _name: &str,
272 _database_type: Option<&DatabaseType>,
273 _catalog_name: Option<&str>,
274 _schema_name: Option<&str>,
275 ) -> Uuid {
276 Uuid::new_v4()
277 }
278}