data_modelling_sdk/models/data_model.rs
1//! DataModel for the SDK
2
3use super::domain::{CADSNode, Domain, NodeConnection, ODCSNode, System, SystemConnection};
4use super::enums::InfrastructureType;
5use super::relationship::Relationship;
6use super::table::Table;
7use super::tag::Tag;
8use chrono::{DateTime, Utc};
9use serde::{Deserialize, Serialize};
10use std::str::FromStr;
11use uuid::Uuid;
12
13/// Data model representing a complete data model with tables and relationships
14///
15/// A `DataModel` is a container for a collection of tables and their relationships.
16/// It represents a workspace or domain within a larger data modeling system.
17///
18/// # Example
19///
20/// ```rust
21/// use data_modelling_sdk::models::DataModel;
22///
23/// let model = DataModel::new(
24/// "MyModel".to_string(),
25/// "/path/to/git".to_string(),
26/// "control.yaml".to_string(),
27/// );
28/// ```
29#[derive(Debug, Clone, Serialize, Deserialize)]
30pub struct DataModel {
31 /// Unique identifier for the model (UUIDv5 based on name and path)
32 pub id: Uuid,
33 /// Model name
34 pub name: String,
35 /// Optional description of the model
36 #[serde(skip_serializing_if = "Option::is_none")]
37 pub description: Option<String>,
38 /// Path to the Git repository directory
39 pub git_directory_path: String,
40 /// Tables in this model
41 #[serde(default)]
42 pub tables: Vec<Table>,
43 /// Relationships between tables
44 #[serde(default)]
45 pub relationships: Vec<Relationship>,
46 /// Business domains in this model
47 #[serde(default)]
48 pub domains: Vec<Domain>,
49 /// Path to the control file (relationships.yaml)
50 pub control_file_path: String,
51 /// Path to diagram file if applicable
52 #[serde(skip_serializing_if = "Option::is_none")]
53 pub diagram_file_path: Option<String>,
54 /// Whether this model is in a subfolder
55 #[serde(default)]
56 pub is_subfolder: bool,
57 /// Parent Git directory if this is a subfolder
58 #[serde(skip_serializing_if = "Option::is_none")]
59 pub parent_git_directory: Option<String>,
60 /// Creation timestamp
61 pub created_at: DateTime<Utc>,
62 /// Last update timestamp
63 pub updated_at: DateTime<Utc>,
64}
65
66impl DataModel {
67 /// Create a new data model with the given name and paths
68 ///
69 /// # Arguments
70 ///
71 /// * `name` - The model name
72 /// * `git_directory_path` - Path to the Git repository directory
73 /// * `control_file_path` - Path to the control file (typically "relationships.yaml")
74 ///
75 /// # Returns
76 ///
77 /// A new `DataModel` instance with a UUIDv5 ID (deterministic based on name and path)
78 /// and current timestamps.
79 ///
80 /// # Example
81 ///
82 /// ```rust
83 /// use data_modelling_sdk::models::DataModel;
84 ///
85 /// let model = DataModel::new(
86 /// "MyModel".to_string(),
87 /// "/workspace/models".to_string(),
88 /// "relationships.yaml".to_string(),
89 /// );
90 /// ```
91 pub fn new(name: String, git_directory_path: String, control_file_path: String) -> Self {
92 let now = Utc::now();
93 // Use deterministic UUID v5 based on model name and git path
94 // This avoids requiring random number generation (getrandom/wasm_js)
95 let key = format!("{}:{}", git_directory_path, name);
96 let id = Uuid::new_v5(&Uuid::NAMESPACE_DNS, key.as_bytes());
97 Self {
98 id,
99 name,
100 description: None,
101 git_directory_path,
102 tables: Vec::new(),
103 relationships: Vec::new(),
104 domains: Vec::new(),
105 control_file_path,
106 diagram_file_path: None,
107 is_subfolder: false,
108 parent_git_directory: None,
109 created_at: now,
110 updated_at: now,
111 }
112 }
113
114 /// Get a table by its ID
115 ///
116 /// # Arguments
117 ///
118 /// * `table_id` - The UUID of the table to find
119 ///
120 /// # Returns
121 ///
122 /// A reference to the table if found, `None` otherwise.
123 pub fn get_table_by_id(&self, table_id: Uuid) -> Option<&Table> {
124 self.tables.iter().find(|t| t.id == table_id)
125 }
126
127 /// Get a mutable reference to a table by its ID
128 ///
129 /// # Arguments
130 ///
131 /// * `table_id` - The UUID of the table to find
132 ///
133 /// # Returns
134 ///
135 /// A mutable reference to the table if found, `None` otherwise.
136 pub fn get_table_by_id_mut(&mut self, table_id: Uuid) -> Option<&mut Table> {
137 self.tables.iter_mut().find(|t| t.id == table_id)
138 }
139
140 /// Get a table by its name
141 ///
142 /// # Arguments
143 ///
144 /// * `name` - The name of the table to find
145 ///
146 /// # Returns
147 ///
148 /// A reference to the first table with the given name if found, `None` otherwise.
149 ///
150 /// # Note
151 ///
152 /// If multiple tables have the same name (different database_type/catalog/schema),
153 /// use `get_table_by_unique_key` instead.
154 pub fn get_table_by_name(&self, name: &str) -> Option<&Table> {
155 self.tables.iter().find(|t| t.name == name)
156 }
157
158 /// Get a table by its unique key (database_type, name, catalog, schema)
159 ///
160 /// # Arguments
161 ///
162 /// * `database_type` - Optional database type
163 /// * `name` - Table name
164 /// * `catalog_name` - Optional catalog name
165 /// * `schema_name` - Optional schema name
166 ///
167 /// # Returns
168 ///
169 /// A reference to the table if found, `None` otherwise.
170 ///
171 /// # Example
172 ///
173 /// ```rust
174 /// # use data_modelling_sdk::models::DataModel;
175 /// # let model = DataModel::new("test".to_string(), "/path".to_string(), "control.yaml".to_string());
176 /// // Find table in specific schema
177 /// let table = model.get_table_by_unique_key(
178 /// Some("PostgreSQL"),
179 /// "users",
180 /// Some("mydb"),
181 /// Some("public"),
182 /// );
183 /// ```
184 pub fn get_table_by_unique_key(
185 &self,
186 database_type: Option<&str>,
187 name: &str,
188 catalog_name: Option<&str>,
189 schema_name: Option<&str>,
190 ) -> Option<&Table> {
191 let target_key = (
192 database_type.map(|s| s.to_string()),
193 name.to_string(),
194 catalog_name.map(|s| s.to_string()),
195 schema_name.map(|s| s.to_string()),
196 );
197 self.tables
198 .iter()
199 .find(|t| t.get_unique_key() == target_key)
200 }
201
202 /// Get all relationships involving a specific table
203 ///
204 /// # Arguments
205 ///
206 /// * `table_id` - The UUID of the table
207 ///
208 /// # Returns
209 ///
210 /// A vector of references to relationships where the table is either the source or target.
211 ///
212 /// # Example
213 ///
214 /// ```rust
215 /// # use data_modelling_sdk::models::DataModel;
216 /// # let model = DataModel::new("test".to_string(), "/path".to_string(), "control.yaml".to_string());
217 /// # let table_id = uuid::Uuid::new_v4();
218 /// // Get all relationships for a table
219 /// let relationships = model.get_relationships_for_table(table_id);
220 /// ```
221 pub fn get_relationships_for_table(&self, table_id: Uuid) -> Vec<&Relationship> {
222 self.relationships
223 .iter()
224 .filter(|r| r.source_table_id == table_id || r.target_table_id == table_id)
225 .collect()
226 }
227
228 /// Filter Data Flow nodes (tables) by owner
229 ///
230 /// # Arguments
231 ///
232 /// * `owner` - The owner name to filter by (case-sensitive exact match)
233 ///
234 /// # Returns
235 ///
236 /// A vector of references to tables matching the owner.
237 ///
238 /// # Example
239 ///
240 /// ```rust
241 /// # use data_modelling_sdk::models::{DataModel, Table, Column};
242 /// # let mut model = DataModel::new("test".to_string(), "/path".to_string(), "control.yaml".to_string());
243 /// # let mut table = Table::new("test_table".to_string(), vec![Column::new("id".to_string(), "INT".to_string())]);
244 /// # table.owner = Some("Data Engineering Team".to_string());
245 /// # model.tables.push(table);
246 /// let owned_nodes = model.filter_nodes_by_owner("Data Engineering Team");
247 /// ```
248 pub fn filter_nodes_by_owner(&self, owner: &str) -> Vec<&Table> {
249 self.tables
250 .iter()
251 .filter(|t| t.owner.as_deref() == Some(owner))
252 .collect()
253 }
254
255 /// Filter Data Flow relationships by owner
256 ///
257 /// # Arguments
258 ///
259 /// * `owner` - The owner name to filter by (case-sensitive exact match)
260 ///
261 /// # Returns
262 ///
263 /// A vector of references to relationships matching the owner.
264 ///
265 /// # Example
266 ///
267 /// ```rust
268 /// # use data_modelling_sdk::models::{DataModel, Relationship};
269 /// # use uuid::Uuid;
270 /// # let mut model = DataModel::new("test".to_string(), "/path".to_string(), "control.yaml".to_string());
271 /// # let mut rel = Relationship::new(Uuid::new_v4(), Uuid::new_v4());
272 /// # rel.owner = Some("Data Engineering Team".to_string());
273 /// # model.relationships.push(rel);
274 /// let owned_relationships = model.filter_relationships_by_owner("Data Engineering Team");
275 /// ```
276 pub fn filter_relationships_by_owner(&self, owner: &str) -> Vec<&Relationship> {
277 self.relationships
278 .iter()
279 .filter(|r| r.owner.as_deref() == Some(owner))
280 .collect()
281 }
282
283 /// Filter Data Flow nodes (tables) by infrastructure type
284 ///
285 /// # Arguments
286 ///
287 /// * `infra_type` - The infrastructure type to filter by
288 ///
289 /// # Returns
290 ///
291 /// A vector of references to tables matching the infrastructure type.
292 ///
293 /// # Example
294 ///
295 /// ```rust
296 /// # use data_modelling_sdk::models::{DataModel, Table, Column, InfrastructureType};
297 /// # let mut model = DataModel::new("test".to_string(), "/path".to_string(), "control.yaml".to_string());
298 /// # let mut table = Table::new("test_table".to_string(), vec![Column::new("id".to_string(), "INT".to_string())]);
299 /// # table.infrastructure_type = Some(InfrastructureType::Kafka);
300 /// # model.tables.push(table);
301 /// let kafka_nodes = model.filter_nodes_by_infrastructure_type(InfrastructureType::Kafka);
302 /// ```
303 pub fn filter_nodes_by_infrastructure_type(
304 &self,
305 infra_type: InfrastructureType,
306 ) -> Vec<&Table> {
307 self.tables
308 .iter()
309 .filter(|t| t.infrastructure_type == Some(infra_type))
310 .collect()
311 }
312
313 /// Filter Data Flow relationships by infrastructure type
314 ///
315 /// # Arguments
316 ///
317 /// * `infra_type` - The infrastructure type to filter by
318 ///
319 /// # Returns
320 ///
321 /// A vector of references to relationships matching the infrastructure type.
322 ///
323 /// # Example
324 ///
325 /// ```rust
326 /// # use data_modelling_sdk::models::{DataModel, Relationship, InfrastructureType};
327 /// # use uuid::Uuid;
328 /// # let mut model = DataModel::new("test".to_string(), "/path".to_string(), "control.yaml".to_string());
329 /// # let mut rel = Relationship::new(Uuid::new_v4(), Uuid::new_v4());
330 /// # rel.infrastructure_type = Some(InfrastructureType::Kafka);
331 /// # model.relationships.push(rel);
332 /// let kafka_relationships = model.filter_relationships_by_infrastructure_type(InfrastructureType::Kafka);
333 /// ```
334 pub fn filter_relationships_by_infrastructure_type(
335 &self,
336 infra_type: InfrastructureType,
337 ) -> Vec<&Relationship> {
338 self.relationships
339 .iter()
340 .filter(|r| r.infrastructure_type == Some(infra_type))
341 .collect()
342 }
343
344 /// Filter Data Flow nodes and relationships by tag
345 ///
346 /// # Arguments
347 ///
348 /// * `tag` - The tag to filter by
349 ///
350 /// # Returns
351 ///
352 /// A tuple containing vectors of references to tables and relationships containing the tag.
353 ///
354 /// # Example
355 ///
356 /// ```rust
357 /// # use data_modelling_sdk::models::{DataModel, Table, Column, Tag};
358 /// # let mut model = DataModel::new("test".to_string(), "/path".to_string(), "control.yaml".to_string());
359 /// # let mut table = Table::new("test_table".to_string(), vec![Column::new("id".to_string(), "INT".to_string())]);
360 /// # table.tags.push(Tag::Simple("production".to_string()));
361 /// # model.tables.push(table);
362 /// let (tagged_nodes, tagged_relationships) = model.filter_by_tags("production");
363 /// ```
364 pub fn filter_by_tags(&self, tag: &str) -> (Vec<&Table>, Vec<&Relationship>) {
365 // Parse the tag string to Tag enum for comparison
366 let search_tag = Tag::from_str(tag).unwrap_or_else(|_| {
367 // If parsing fails, create a Simple tag
368 Tag::Simple(tag.to_string())
369 });
370
371 let tagged_tables: Vec<&Table> = self
372 .tables
373 .iter()
374 .filter(|t| t.tags.contains(&search_tag))
375 .collect();
376 let tagged_relationships: Vec<&Relationship> = self
377 .relationships
378 .iter()
379 .filter(|_r| {
380 // Relationships don't have tags field, so we return empty for now
381 // This maintains the API contract but relationships don't support tags yet
382 false
383 })
384 .collect();
385 (tagged_tables, tagged_relationships)
386 }
387
388 /// Add a domain to the model
389 ///
390 /// # Arguments
391 ///
392 /// * `domain` - The domain to add
393 ///
394 /// # Example
395 ///
396 /// ```rust
397 /// # use data_modelling_sdk::models::{DataModel, Domain};
398 /// # let mut model = DataModel::new("test".to_string(), "/path".to_string(), "control.yaml".to_string());
399 /// let domain = Domain::new("customer-service".to_string());
400 /// model.add_domain(domain);
401 /// ```
402 pub fn add_domain(&mut self, domain: Domain) {
403 self.domains.push(domain);
404 self.updated_at = Utc::now();
405 }
406
407 /// Get a domain by its ID
408 ///
409 /// # Arguments
410 ///
411 /// * `domain_id` - The UUID of the domain to find
412 ///
413 /// # Returns
414 ///
415 /// A reference to the domain if found, `None` otherwise.
416 pub fn get_domain_by_id(&self, domain_id: Uuid) -> Option<&Domain> {
417 self.domains.iter().find(|d| d.id == domain_id)
418 }
419
420 /// Get a mutable reference to a domain by its ID
421 ///
422 /// # Arguments
423 ///
424 /// * `domain_id` - The UUID of the domain to find
425 ///
426 /// # Returns
427 ///
428 /// A mutable reference to the domain if found, `None` otherwise.
429 pub fn get_domain_by_id_mut(&mut self, domain_id: Uuid) -> Option<&mut Domain> {
430 self.domains.iter_mut().find(|d| d.id == domain_id)
431 }
432
433 /// Get a domain by its name
434 ///
435 /// # Arguments
436 ///
437 /// * `name` - The name of the domain to find
438 ///
439 /// # Returns
440 ///
441 /// A reference to the first domain with the given name if found, `None` otherwise.
442 pub fn get_domain_by_name(&self, name: &str) -> Option<&Domain> {
443 self.domains.iter().find(|d| d.name == name)
444 }
445
446 /// Add a system to a domain
447 ///
448 /// # Arguments
449 ///
450 /// * `domain_id` - The UUID of the domain
451 /// * `system` - The system to add
452 ///
453 /// # Returns
454 ///
455 /// `Ok(())` if the domain was found and the system was added, `Err` otherwise.
456 ///
457 /// # Example
458 ///
459 /// ```rust
460 /// # use data_modelling_sdk::models::{DataModel, Domain, System, InfrastructureType};
461 /// # use uuid::Uuid;
462 /// # let mut model = DataModel::new("test".to_string(), "/path".to_string(), "control.yaml".to_string());
463 /// # let domain = Domain::new("customer-service".to_string());
464 /// # let domain_id = domain.id;
465 /// # model.add_domain(domain);
466 /// let system = System::new("kafka-cluster".to_string(), InfrastructureType::Kafka, domain_id);
467 /// model.add_system_to_domain(domain_id, system).unwrap();
468 /// ```
469 pub fn add_system_to_domain(&mut self, domain_id: Uuid, system: System) -> Result<(), String> {
470 let domain = self
471 .get_domain_by_id_mut(domain_id)
472 .ok_or_else(|| format!("Domain with ID {} not found", domain_id))?;
473 domain.add_system(system);
474 self.updated_at = Utc::now();
475 Ok(())
476 }
477
478 /// Add a CADS node to a domain
479 ///
480 /// # Arguments
481 ///
482 /// * `domain_id` - The UUID of the domain
483 /// * `node` - The CADS node to add
484 ///
485 /// # Returns
486 ///
487 /// `Ok(())` if the domain was found and the node was added, `Err` otherwise.
488 pub fn add_cads_node_to_domain(
489 &mut self,
490 domain_id: Uuid,
491 node: CADSNode,
492 ) -> Result<(), String> {
493 let domain = self
494 .get_domain_by_id_mut(domain_id)
495 .ok_or_else(|| format!("Domain with ID {} not found", domain_id))?;
496 domain.add_cads_node(node);
497 self.updated_at = Utc::now();
498 Ok(())
499 }
500
501 /// Add an ODCS node to a domain
502 ///
503 /// # Arguments
504 ///
505 /// * `domain_id` - The UUID of the domain
506 /// * `node` - The ODCS node to add
507 ///
508 /// # Returns
509 ///
510 /// `Ok(())` if the domain was found and the node was added, `Err` otherwise.
511 pub fn add_odcs_node_to_domain(
512 &mut self,
513 domain_id: Uuid,
514 node: ODCSNode,
515 ) -> Result<(), String> {
516 let domain = self
517 .get_domain_by_id_mut(domain_id)
518 .ok_or_else(|| format!("Domain with ID {} not found", domain_id))?;
519 domain.add_odcs_node(node);
520 self.updated_at = Utc::now();
521 Ok(())
522 }
523
524 /// Add a system connection to a domain
525 ///
526 /// # Arguments
527 ///
528 /// * `domain_id` - The UUID of the domain
529 /// * `connection` - The system connection to add
530 ///
531 /// # Returns
532 ///
533 /// `Ok(())` if the domain was found and the connection was added, `Err` otherwise.
534 pub fn add_system_connection_to_domain(
535 &mut self,
536 domain_id: Uuid,
537 connection: SystemConnection,
538 ) -> Result<(), String> {
539 let domain = self
540 .get_domain_by_id_mut(domain_id)
541 .ok_or_else(|| format!("Domain with ID {} not found", domain_id))?;
542 domain.add_system_connection(connection);
543 self.updated_at = Utc::now();
544 Ok(())
545 }
546
547 /// Add a node connection to a domain
548 ///
549 /// # Arguments
550 ///
551 /// * `domain_id` - The UUID of the domain
552 /// * `connection` - The node connection to add
553 ///
554 /// # Returns
555 ///
556 /// `Ok(())` if the domain was found and the connection was added, `Err` otherwise.
557 pub fn add_node_connection_to_domain(
558 &mut self,
559 domain_id: Uuid,
560 connection: NodeConnection,
561 ) -> Result<(), String> {
562 let domain = self
563 .get_domain_by_id_mut(domain_id)
564 .ok_or_else(|| format!("Domain with ID {} not found", domain_id))?;
565 domain.add_node_connection(connection);
566 self.updated_at = Utc::now();
567 Ok(())
568 }
569}