data_modelling_sdk/models/table.rs
1//! Table model for the SDK
2
3use super::column::Column;
4use super::enums::{
5 DataVaultClassification, DatabaseType, InfrastructureType, MedallionLayer, ModelingLevel,
6 SCDPattern,
7};
8use super::tag::Tag;
9use chrono::{DateTime, Utc};
10use serde::{Deserialize, Deserializer, Serialize};
11use serde_json;
12use std::collections::HashMap;
13use std::str::FromStr;
14use uuid::Uuid;
15
16/// Deserialize tags with backward compatibility (supports Vec<String> and Vec<Tag>)
17fn deserialize_tags<'de, D>(deserializer: D) -> Result<Vec<Tag>, D::Error>
18where
19 D: Deserializer<'de>,
20{
21 // Accept either Vec<String> (backward compatibility) or Vec<Tag>
22 struct TagVisitor;
23
24 impl<'de> serde::de::Visitor<'de> for TagVisitor {
25 type Value = Vec<Tag>;
26
27 fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
28 formatter.write_str("a vector of tags (strings or Tag objects)")
29 }
30
31 fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
32 where
33 A: serde::de::SeqAccess<'de>,
34 {
35 let mut tags = Vec::new();
36 while let Some(item) = seq.next_element::<serde_json::Value>()? {
37 match item {
38 serde_json::Value::String(s) => {
39 // Backward compatibility: parse string as Tag
40 if let Ok(tag) = Tag::from_str(&s) {
41 tags.push(tag);
42 }
43 }
44 _ => {
45 // Try to deserialize as Tag directly (if it's a string in JSON)
46 if let serde_json::Value::String(s) = item
47 && let Ok(tag) = Tag::from_str(&s)
48 {
49 tags.push(tag);
50 }
51 }
52 }
53 }
54 Ok(tags)
55 }
56 }
57
58 deserializer.deserialize_seq(TagVisitor)
59}
60
61/// Position coordinates for table placement on canvas
62#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
63pub struct Position {
64 /// X coordinate
65 pub x: f64,
66 /// Y coordinate
67 pub y: f64,
68}
69
70/// SLA (Service Level Agreement) property following ODCS-inspired structure
71///
72/// Represents a single SLA property for Data Flow nodes and relationships.
73/// Uses a lightweight format inspired by ODCS servicelevels but separate from ODCS.
74#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
75pub struct SlaProperty {
76 /// SLA attribute name (e.g., "latency", "availability", "throughput")
77 pub property: String,
78 /// Metric value (flexible type to support numbers, strings, etc.)
79 pub value: serde_json::Value,
80 /// Measurement unit (e.g., "hours", "percent", "requests_per_second")
81 pub unit: String,
82 /// Optional: Data elements this SLA applies to
83 #[serde(skip_serializing_if = "Option::is_none")]
84 pub element: Option<String>,
85 /// Optional: Importance driver (e.g., "regulatory", "analytics", "operational")
86 #[serde(skip_serializing_if = "Option::is_none")]
87 pub driver: Option<String>,
88 /// Optional: Description of the SLA
89 #[serde(skip_serializing_if = "Option::is_none")]
90 pub description: Option<String>,
91 /// Optional: Scheduler type for monitoring
92 #[serde(skip_serializing_if = "Option::is_none")]
93 pub scheduler: Option<String>,
94 /// Optional: Schedule expression (e.g., cron format)
95 #[serde(skip_serializing_if = "Option::is_none")]
96 pub schedule: Option<String>,
97}
98
99/// Contact details for Data Flow node/relationship owners/responsible parties
100///
101/// Structured contact information for operational and governance purposes.
102#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
103pub struct ContactDetails {
104 /// Email address
105 #[serde(skip_serializing_if = "Option::is_none")]
106 pub email: Option<String>,
107 /// Phone number
108 #[serde(skip_serializing_if = "Option::is_none")]
109 pub phone: Option<String>,
110 /// Contact name
111 #[serde(skip_serializing_if = "Option::is_none")]
112 pub name: Option<String>,
113 /// Role or title
114 #[serde(skip_serializing_if = "Option::is_none")]
115 pub role: Option<String>,
116 /// Other contact methods or additional information
117 #[serde(skip_serializing_if = "Option::is_none")]
118 pub other: Option<String>,
119}
120
121/// Table model representing a database table or data contract
122///
123/// A table represents a structured data entity with columns, metadata, and relationships.
124/// Tables can be imported from various formats (SQL, ODCS, JSON Schema, etc.) and exported
125/// to multiple formats.
126///
127/// # Example
128///
129/// ```rust
130/// use data_modelling_sdk::models::{Table, Column};
131///
132/// let table = Table::new(
133/// "users".to_string(),
134/// vec![
135/// Column::new("id".to_string(), "INT".to_string()),
136/// Column::new("name".to_string(), "VARCHAR(100)".to_string()),
137/// ],
138/// );
139/// ```
140///
141/// # Example with Metadata (Data Flow Node)
142///
143/// ```rust
144/// use data_modelling_sdk::models::{Table, Column, InfrastructureType, ContactDetails, SlaProperty};
145/// use serde_json::json;
146///
147/// let mut table = Table::new(
148/// "user_events".to_string(),
149/// vec![Column::new("id".to_string(), "UUID".to_string())],
150/// );
151/// table.owner = Some("Data Engineering Team".to_string());
152/// table.infrastructure_type = Some(InfrastructureType::Kafka);
153/// table.contact_details = Some(ContactDetails {
154/// email: Some("team@example.com".to_string()),
155/// phone: None,
156/// name: Some("Data Team".to_string()),
157/// role: Some("Data Owner".to_string()),
158/// other: None,
159/// });
160/// table.sla = Some(vec![SlaProperty {
161/// property: "latency".to_string(),
162/// value: json!(4),
163/// unit: "hours".to_string(),
164/// description: Some("Data must be available within 4 hours".to_string()),
165/// element: None,
166/// driver: Some("operational".to_string()),
167/// scheduler: None,
168/// schedule: None,
169/// }]);
170/// table.notes = Some("User interaction events from web application".to_string());
171/// ```
172#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
173pub struct Table {
174 /// Unique identifier for the table (UUIDv4)
175 pub id: Uuid,
176 /// Table name (must be unique within database_type/catalog/schema scope)
177 pub name: String,
178 /// List of columns in the table
179 pub columns: Vec<Column>,
180 /// Database type (PostgreSQL, MySQL, etc.) if applicable
181 #[serde(skip_serializing_if = "Option::is_none")]
182 pub database_type: Option<DatabaseType>,
183 /// Catalog name (database name in some systems)
184 #[serde(skip_serializing_if = "Option::is_none")]
185 pub catalog_name: Option<String>,
186 /// Schema name (namespace within catalog)
187 #[serde(skip_serializing_if = "Option::is_none")]
188 pub schema_name: Option<String>,
189 /// Medallion architecture layers (Bronze, Silver, Gold)
190 #[serde(default)]
191 pub medallion_layers: Vec<MedallionLayer>,
192 /// Slowly Changing Dimension pattern (Type 1, Type 2, etc.)
193 #[serde(skip_serializing_if = "Option::is_none")]
194 pub scd_pattern: Option<SCDPattern>,
195 /// Data Vault classification (Hub, Link, Satellite)
196 #[serde(skip_serializing_if = "Option::is_none")]
197 pub data_vault_classification: Option<DataVaultClassification>,
198 /// Modeling level (Conceptual, Logical, Physical)
199 #[serde(skip_serializing_if = "Option::is_none")]
200 pub modeling_level: Option<ModelingLevel>,
201 /// Tags for categorization and filtering (supports Simple, Pair, and List formats)
202 #[serde(default, deserialize_with = "deserialize_tags")]
203 pub tags: Vec<Tag>,
204 /// ODCL/ODCS metadata (legacy format support)
205 #[serde(default)]
206 pub odcl_metadata: HashMap<String, serde_json::Value>,
207 /// Owner information (person, team, or organization name) for Data Flow nodes
208 #[serde(skip_serializing_if = "Option::is_none")]
209 pub owner: Option<String>,
210 /// SLA (Service Level Agreement) information (ODCS-inspired but lightweight format)
211 #[serde(skip_serializing_if = "Option::is_none")]
212 pub sla: Option<Vec<SlaProperty>>,
213 /// Contact details for responsible parties
214 #[serde(skip_serializing_if = "Option::is_none")]
215 pub contact_details: Option<ContactDetails>,
216 /// Infrastructure type (hosting platform, service, or tool) for Data Flow nodes
217 #[serde(skip_serializing_if = "Option::is_none")]
218 pub infrastructure_type: Option<InfrastructureType>,
219 /// Additional notes and context for Data Flow nodes
220 #[serde(skip_serializing_if = "Option::is_none")]
221 pub notes: Option<String>,
222 /// Canvas position for visual representation
223 #[serde(skip_serializing_if = "Option::is_none")]
224 pub position: Option<Position>,
225 /// Path to YAML file if loaded from file system
226 #[serde(skip_serializing_if = "Option::is_none")]
227 pub yaml_file_path: Option<String>,
228 /// Draw.io cell ID for diagram integration
229 #[serde(skip_serializing_if = "Option::is_none")]
230 pub drawio_cell_id: Option<String>,
231 /// Quality rules and checks
232 #[serde(default)]
233 pub quality: Vec<HashMap<String, serde_json::Value>>,
234 /// Validation errors and warnings
235 #[serde(default)]
236 pub errors: Vec<HashMap<String, serde_json::Value>>,
237 /// Creation timestamp
238 pub created_at: DateTime<Utc>,
239 /// Last update timestamp
240 pub updated_at: DateTime<Utc>,
241}
242
243impl Table {
244 /// Create a new table with the given name and columns
245 ///
246 /// # Arguments
247 ///
248 /// * `name` - The table name (must be valid according to naming conventions)
249 /// * `columns` - Vector of columns for the table
250 ///
251 /// # Returns
252 ///
253 /// A new `Table` instance with a generated UUIDv4 ID and current timestamps.
254 ///
255 /// # Example
256 ///
257 /// ```rust
258 /// use data_modelling_sdk::models::{Table, Column};
259 ///
260 /// let table = Table::new(
261 /// "users".to_string(),
262 /// vec![Column::new("id".to_string(), "INT".to_string())],
263 /// );
264 /// ```
265 pub fn new(name: String, columns: Vec<Column>) -> Self {
266 let now = Utc::now();
267 // UUIDv4 everywhere (do not derive ids from natural keys like name).
268 let id = Self::generate_id(&name, None, None, None);
269 Self {
270 id,
271 name,
272 columns,
273 database_type: None,
274 catalog_name: None,
275 schema_name: None,
276 medallion_layers: Vec::new(),
277 scd_pattern: None,
278 data_vault_classification: None,
279 modeling_level: None,
280 tags: Vec::new(),
281 odcl_metadata: HashMap::new(),
282 owner: None,
283 sla: None,
284 contact_details: None,
285 infrastructure_type: None,
286 notes: None,
287 position: None,
288 yaml_file_path: None,
289 drawio_cell_id: None,
290 quality: Vec::new(),
291 errors: Vec::new(),
292 created_at: now,
293 updated_at: now,
294 }
295 }
296
297 /// Get the unique key tuple for this table
298 ///
299 /// Returns a tuple of (database_type, name, catalog_name, schema_name) that uniquely
300 /// identifies this table within its scope. Used for detecting naming conflicts.
301 ///
302 /// # Returns
303 ///
304 /// A tuple containing the database type (as string), name, catalog name, and schema name.
305 pub fn get_unique_key(&self) -> (Option<String>, String, Option<String>, Option<String>) {
306 (
307 self.database_type.as_ref().map(|dt| format!("{:?}", dt)),
308 self.name.clone(),
309 self.catalog_name.clone(),
310 self.schema_name.clone(),
311 )
312 }
313
314 /// Generate a UUIDv4 for a new table id.
315 ///
316 /// Note: params are retained for backward-compatibility with previous deterministic-v5 API.
317 pub fn generate_id(
318 _name: &str,
319 _database_type: Option<&DatabaseType>,
320 _catalog_name: Option<&str>,
321 _schema_name: Option<&str>,
322 ) -> Uuid {
323 Uuid::new_v4()
324 }
325}