data_modelling_sdk/models/
data_model.rs

1//! DataModel for the SDK
2
3use super::domain::{CADSNode, Domain, NodeConnection, ODCSNode, System, SystemConnection};
4use super::enums::InfrastructureType;
5use super::relationship::Relationship;
6use super::table::Table;
7use super::tag::Tag;
8use chrono::{DateTime, Utc};
9use serde::{Deserialize, Serialize};
10use std::str::FromStr;
11use uuid::Uuid;
12
13/// Data model representing a complete data model with tables and relationships
14///
15/// A `DataModel` is a container for a collection of tables and their relationships.
16/// It represents a workspace or domain within a larger data modeling system.
17///
18/// # Example
19///
20/// ```rust
21/// use data_modelling_sdk::models::DataModel;
22///
23/// let model = DataModel::new(
24///     "MyModel".to_string(),
25///     "/path/to/git".to_string(),
26///     "control.yaml".to_string(),
27/// );
28/// ```
29#[derive(Debug, Clone, Serialize, Deserialize)]
30pub struct DataModel {
31    /// Unique identifier for the model (UUIDv5 based on name and path)
32    pub id: Uuid,
33    /// Model name
34    pub name: String,
35    /// Optional description of the model
36    #[serde(skip_serializing_if = "Option::is_none")]
37    pub description: Option<String>,
38    /// Path to the Git repository directory
39    pub git_directory_path: String,
40    /// Tables in this model
41    #[serde(default)]
42    pub tables: Vec<Table>,
43    /// Relationships between tables
44    #[serde(default)]
45    pub relationships: Vec<Relationship>,
46    /// Business domains in this model
47    #[serde(default)]
48    pub domains: Vec<Domain>,
49    /// Path to the control file (relationships.yaml)
50    pub control_file_path: String,
51    /// Path to diagram file if applicable
52    #[serde(skip_serializing_if = "Option::is_none")]
53    pub diagram_file_path: Option<String>,
54    /// Whether this model is in a subfolder
55    #[serde(default)]
56    pub is_subfolder: bool,
57    /// Parent Git directory if this is a subfolder
58    #[serde(skip_serializing_if = "Option::is_none")]
59    pub parent_git_directory: Option<String>,
60    /// Creation timestamp
61    pub created_at: DateTime<Utc>,
62    /// Last update timestamp
63    pub updated_at: DateTime<Utc>,
64}
65
66impl DataModel {
67    /// Create a new data model with the given name and paths
68    ///
69    /// # Arguments
70    ///
71    /// * `name` - The model name
72    /// * `git_directory_path` - Path to the Git repository directory
73    /// * `control_file_path` - Path to the control file (typically "relationships.yaml")
74    ///
75    /// # Returns
76    ///
77    /// A new `DataModel` instance with a UUIDv5 ID (deterministic based on name and path)
78    /// and current timestamps.
79    ///
80    /// # Example
81    ///
82    /// ```rust
83    /// use data_modelling_sdk::models::DataModel;
84    ///
85    /// let model = DataModel::new(
86    ///     "MyModel".to_string(),
87    ///     "/workspace/models".to_string(),
88    ///     "relationships.yaml".to_string(),
89    /// );
90    /// ```
91    pub fn new(name: String, git_directory_path: String, control_file_path: String) -> Self {
92        let now = Utc::now();
93        // Use deterministic UUID v5 based on model name and git path
94        // This avoids requiring random number generation (getrandom/wasm_js)
95        let key = format!("{}:{}", git_directory_path, name);
96        let id = Uuid::new_v5(&Uuid::NAMESPACE_DNS, key.as_bytes());
97        Self {
98            id,
99            name,
100            description: None,
101            git_directory_path,
102            tables: Vec::new(),
103            relationships: Vec::new(),
104            domains: Vec::new(),
105            control_file_path,
106            diagram_file_path: None,
107            is_subfolder: false,
108            parent_git_directory: None,
109            created_at: now,
110            updated_at: now,
111        }
112    }
113
114    /// Get a table by its ID
115    ///
116    /// # Arguments
117    ///
118    /// * `table_id` - The UUID of the table to find
119    ///
120    /// # Returns
121    ///
122    /// A reference to the table if found, `None` otherwise.
123    pub fn get_table_by_id(&self, table_id: Uuid) -> Option<&Table> {
124        self.tables.iter().find(|t| t.id == table_id)
125    }
126
127    /// Get a mutable reference to a table by its ID
128    ///
129    /// # Arguments
130    ///
131    /// * `table_id` - The UUID of the table to find
132    ///
133    /// # Returns
134    ///
135    /// A mutable reference to the table if found, `None` otherwise.
136    pub fn get_table_by_id_mut(&mut self, table_id: Uuid) -> Option<&mut Table> {
137        self.tables.iter_mut().find(|t| t.id == table_id)
138    }
139
140    /// Get a table by its name
141    ///
142    /// # Arguments
143    ///
144    /// * `name` - The name of the table to find
145    ///
146    /// # Returns
147    ///
148    /// A reference to the first table with the given name if found, `None` otherwise.
149    ///
150    /// # Note
151    ///
152    /// If multiple tables have the same name (different database_type/catalog/schema),
153    /// use `get_table_by_unique_key` instead.
154    pub fn get_table_by_name(&self, name: &str) -> Option<&Table> {
155        self.tables.iter().find(|t| t.name == name)
156    }
157
158    /// Get a table by its unique key (database_type, name, catalog, schema)
159    ///
160    /// # Arguments
161    ///
162    /// * `database_type` - Optional database type
163    /// * `name` - Table name
164    /// * `catalog_name` - Optional catalog name
165    /// * `schema_name` - Optional schema name
166    ///
167    /// # Returns
168    ///
169    /// A reference to the table if found, `None` otherwise.
170    ///
171    /// # Example
172    ///
173    /// ```rust
174    /// # use data_modelling_sdk::models::DataModel;
175    /// # let model = DataModel::new("test".to_string(), "/path".to_string(), "control.yaml".to_string());
176    /// // Find table in specific schema
177    /// let table = model.get_table_by_unique_key(
178    ///     Some("PostgreSQL"),
179    ///     "users",
180    ///     Some("mydb"),
181    ///     Some("public"),
182    /// );
183    /// ```
184    pub fn get_table_by_unique_key(
185        &self,
186        database_type: Option<&str>,
187        name: &str,
188        catalog_name: Option<&str>,
189        schema_name: Option<&str>,
190    ) -> Option<&Table> {
191        let target_key = (
192            database_type.map(|s| s.to_string()),
193            name.to_string(),
194            catalog_name.map(|s| s.to_string()),
195            schema_name.map(|s| s.to_string()),
196        );
197        self.tables
198            .iter()
199            .find(|t| t.get_unique_key() == target_key)
200    }
201
202    /// Get all relationships involving a specific table
203    ///
204    /// # Arguments
205    ///
206    /// * `table_id` - The UUID of the table
207    ///
208    /// # Returns
209    ///
210    /// A vector of references to relationships where the table is either the source or target.
211    ///
212    /// # Example
213    ///
214    /// ```rust
215    /// # use data_modelling_sdk::models::DataModel;
216    /// # let model = DataModel::new("test".to_string(), "/path".to_string(), "control.yaml".to_string());
217    /// # let table_id = uuid::Uuid::new_v4();
218    /// // Get all relationships for a table
219    /// let relationships = model.get_relationships_for_table(table_id);
220    /// ```
221    pub fn get_relationships_for_table(&self, table_id: Uuid) -> Vec<&Relationship> {
222        self.relationships
223            .iter()
224            .filter(|r| r.source_table_id == table_id || r.target_table_id == table_id)
225            .collect()
226    }
227
228    /// Filter Data Flow nodes (tables) by owner
229    ///
230    /// # Arguments
231    ///
232    /// * `owner` - The owner name to filter by (case-sensitive exact match)
233    ///
234    /// # Returns
235    ///
236    /// A vector of references to tables matching the owner.
237    ///
238    /// # Example
239    ///
240    /// ```rust
241    /// # use data_modelling_sdk::models::{DataModel, Table, Column};
242    /// # let mut model = DataModel::new("test".to_string(), "/path".to_string(), "control.yaml".to_string());
243    /// # let mut table = Table::new("test_table".to_string(), vec![Column::new("id".to_string(), "INT".to_string())]);
244    /// # table.owner = Some("Data Engineering Team".to_string());
245    /// # model.tables.push(table);
246    /// let owned_nodes = model.filter_nodes_by_owner("Data Engineering Team");
247    /// ```
248    pub fn filter_nodes_by_owner(&self, owner: &str) -> Vec<&Table> {
249        self.tables
250            .iter()
251            .filter(|t| t.owner.as_deref() == Some(owner))
252            .collect()
253    }
254
255    /// Filter Data Flow relationships by owner
256    ///
257    /// # Arguments
258    ///
259    /// * `owner` - The owner name to filter by (case-sensitive exact match)
260    ///
261    /// # Returns
262    ///
263    /// A vector of references to relationships matching the owner.
264    ///
265    /// # Example
266    ///
267    /// ```rust
268    /// # use data_modelling_sdk::models::{DataModel, Relationship};
269    /// # use uuid::Uuid;
270    /// # let mut model = DataModel::new("test".to_string(), "/path".to_string(), "control.yaml".to_string());
271    /// # let mut rel = Relationship::new(Uuid::new_v4(), Uuid::new_v4());
272    /// # rel.owner = Some("Data Engineering Team".to_string());
273    /// # model.relationships.push(rel);
274    /// let owned_relationships = model.filter_relationships_by_owner("Data Engineering Team");
275    /// ```
276    pub fn filter_relationships_by_owner(&self, owner: &str) -> Vec<&Relationship> {
277        self.relationships
278            .iter()
279            .filter(|r| r.owner.as_deref() == Some(owner))
280            .collect()
281    }
282
283    /// Filter Data Flow nodes (tables) by infrastructure type
284    ///
285    /// # Arguments
286    ///
287    /// * `infra_type` - The infrastructure type to filter by
288    ///
289    /// # Returns
290    ///
291    /// A vector of references to tables matching the infrastructure type.
292    ///
293    /// # Example
294    ///
295    /// ```rust
296    /// # use data_modelling_sdk::models::{DataModel, Table, Column, InfrastructureType};
297    /// # let mut model = DataModel::new("test".to_string(), "/path".to_string(), "control.yaml".to_string());
298    /// # let mut table = Table::new("test_table".to_string(), vec![Column::new("id".to_string(), "INT".to_string())]);
299    /// # table.infrastructure_type = Some(InfrastructureType::Kafka);
300    /// # model.tables.push(table);
301    /// let kafka_nodes = model.filter_nodes_by_infrastructure_type(InfrastructureType::Kafka);
302    /// ```
303    pub fn filter_nodes_by_infrastructure_type(
304        &self,
305        infra_type: InfrastructureType,
306    ) -> Vec<&Table> {
307        self.tables
308            .iter()
309            .filter(|t| t.infrastructure_type == Some(infra_type))
310            .collect()
311    }
312
313    /// Filter Data Flow relationships by infrastructure type
314    ///
315    /// # Arguments
316    ///
317    /// * `infra_type` - The infrastructure type to filter by
318    ///
319    /// # Returns
320    ///
321    /// A vector of references to relationships matching the infrastructure type.
322    ///
323    /// # Example
324    ///
325    /// ```rust
326    /// # use data_modelling_sdk::models::{DataModel, Relationship, InfrastructureType};
327    /// # use uuid::Uuid;
328    /// # let mut model = DataModel::new("test".to_string(), "/path".to_string(), "control.yaml".to_string());
329    /// # let mut rel = Relationship::new(Uuid::new_v4(), Uuid::new_v4());
330    /// # rel.infrastructure_type = Some(InfrastructureType::Kafka);
331    /// # model.relationships.push(rel);
332    /// let kafka_relationships = model.filter_relationships_by_infrastructure_type(InfrastructureType::Kafka);
333    /// ```
334    pub fn filter_relationships_by_infrastructure_type(
335        &self,
336        infra_type: InfrastructureType,
337    ) -> Vec<&Relationship> {
338        self.relationships
339            .iter()
340            .filter(|r| r.infrastructure_type == Some(infra_type))
341            .collect()
342    }
343
344    /// Filter Data Flow nodes and relationships by tag
345    ///
346    /// # Arguments
347    ///
348    /// * `tag` - The tag to filter by
349    ///
350    /// # Returns
351    ///
352    /// A tuple containing vectors of references to tables and relationships containing the tag.
353    ///
354    /// # Example
355    ///
356    /// ```rust
357    /// # use data_modelling_sdk::models::{DataModel, Table, Column, Tag};
358    /// # let mut model = DataModel::new("test".to_string(), "/path".to_string(), "control.yaml".to_string());
359    /// # let mut table = Table::new("test_table".to_string(), vec![Column::new("id".to_string(), "INT".to_string())]);
360    /// # table.tags.push(Tag::Simple("production".to_string()));
361    /// # model.tables.push(table);
362    /// let (tagged_nodes, tagged_relationships) = model.filter_by_tags("production");
363    /// ```
364    pub fn filter_by_tags(&self, tag: &str) -> (Vec<&Table>, Vec<&Relationship>) {
365        // Parse the tag string to Tag enum for comparison
366        let search_tag = Tag::from_str(tag).unwrap_or_else(|_| {
367            // If parsing fails, create a Simple tag
368            Tag::Simple(tag.to_string())
369        });
370
371        let tagged_tables: Vec<&Table> = self
372            .tables
373            .iter()
374            .filter(|t| t.tags.contains(&search_tag))
375            .collect();
376        let tagged_relationships: Vec<&Relationship> = self
377            .relationships
378            .iter()
379            .filter(|_r| {
380                // Relationships don't have tags field, so we return empty for now
381                // This maintains the API contract but relationships don't support tags yet
382                false
383            })
384            .collect();
385        (tagged_tables, tagged_relationships)
386    }
387
388    /// Add a domain to the model
389    ///
390    /// # Arguments
391    ///
392    /// * `domain` - The domain to add
393    ///
394    /// # Example
395    ///
396    /// ```rust
397    /// # use data_modelling_sdk::models::{DataModel, Domain};
398    /// # let mut model = DataModel::new("test".to_string(), "/path".to_string(), "control.yaml".to_string());
399    /// let domain = Domain::new("customer-service".to_string());
400    /// model.add_domain(domain);
401    /// ```
402    pub fn add_domain(&mut self, domain: Domain) {
403        self.domains.push(domain);
404        self.updated_at = Utc::now();
405    }
406
407    /// Get a domain by its ID
408    ///
409    /// # Arguments
410    ///
411    /// * `domain_id` - The UUID of the domain to find
412    ///
413    /// # Returns
414    ///
415    /// A reference to the domain if found, `None` otherwise.
416    pub fn get_domain_by_id(&self, domain_id: Uuid) -> Option<&Domain> {
417        self.domains.iter().find(|d| d.id == domain_id)
418    }
419
420    /// Get a mutable reference to a domain by its ID
421    ///
422    /// # Arguments
423    ///
424    /// * `domain_id` - The UUID of the domain to find
425    ///
426    /// # Returns
427    ///
428    /// A mutable reference to the domain if found, `None` otherwise.
429    pub fn get_domain_by_id_mut(&mut self, domain_id: Uuid) -> Option<&mut Domain> {
430        self.domains.iter_mut().find(|d| d.id == domain_id)
431    }
432
433    /// Get a domain by its name
434    ///
435    /// # Arguments
436    ///
437    /// * `name` - The name of the domain to find
438    ///
439    /// # Returns
440    ///
441    /// A reference to the first domain with the given name if found, `None` otherwise.
442    pub fn get_domain_by_name(&self, name: &str) -> Option<&Domain> {
443        self.domains.iter().find(|d| d.name == name)
444    }
445
446    /// Add a system to a domain
447    ///
448    /// # Arguments
449    ///
450    /// * `domain_id` - The UUID of the domain
451    /// * `system` - The system to add
452    ///
453    /// # Returns
454    ///
455    /// `Ok(())` if the domain was found and the system was added, `Err` otherwise.
456    ///
457    /// # Example
458    ///
459    /// ```rust
460    /// # use data_modelling_sdk::models::{DataModel, Domain, System, InfrastructureType};
461    /// # use uuid::Uuid;
462    /// # let mut model = DataModel::new("test".to_string(), "/path".to_string(), "control.yaml".to_string());
463    /// # let domain = Domain::new("customer-service".to_string());
464    /// # let domain_id = domain.id;
465    /// # model.add_domain(domain);
466    /// let system = System::new("kafka-cluster".to_string(), InfrastructureType::Kafka, domain_id);
467    /// model.add_system_to_domain(domain_id, system).unwrap();
468    /// ```
469    pub fn add_system_to_domain(&mut self, domain_id: Uuid, system: System) -> Result<(), String> {
470        let domain = self
471            .get_domain_by_id_mut(domain_id)
472            .ok_or_else(|| format!("Domain with ID {} not found", domain_id))?;
473        domain.add_system(system);
474        self.updated_at = Utc::now();
475        Ok(())
476    }
477
478    /// Add a CADS node to a domain
479    ///
480    /// # Arguments
481    ///
482    /// * `domain_id` - The UUID of the domain
483    /// * `node` - The CADS node to add
484    ///
485    /// # Returns
486    ///
487    /// `Ok(())` if the domain was found and the node was added, `Err` otherwise.
488    pub fn add_cads_node_to_domain(
489        &mut self,
490        domain_id: Uuid,
491        node: CADSNode,
492    ) -> Result<(), String> {
493        let domain = self
494            .get_domain_by_id_mut(domain_id)
495            .ok_or_else(|| format!("Domain with ID {} not found", domain_id))?;
496        domain.add_cads_node(node);
497        self.updated_at = Utc::now();
498        Ok(())
499    }
500
501    /// Add an ODCS node to a domain
502    ///
503    /// # Arguments
504    ///
505    /// * `domain_id` - The UUID of the domain
506    /// * `node` - The ODCS node to add
507    ///
508    /// # Returns
509    ///
510    /// `Ok(())` if the domain was found and the node was added, `Err` otherwise.
511    pub fn add_odcs_node_to_domain(
512        &mut self,
513        domain_id: Uuid,
514        node: ODCSNode,
515    ) -> Result<(), String> {
516        let domain = self
517            .get_domain_by_id_mut(domain_id)
518            .ok_or_else(|| format!("Domain with ID {} not found", domain_id))?;
519        domain.add_odcs_node(node);
520        self.updated_at = Utc::now();
521        Ok(())
522    }
523
524    /// Add a system connection to a domain
525    ///
526    /// # Arguments
527    ///
528    /// * `domain_id` - The UUID of the domain
529    /// * `connection` - The system connection to add
530    ///
531    /// # Returns
532    ///
533    /// `Ok(())` if the domain was found and the connection was added, `Err` otherwise.
534    pub fn add_system_connection_to_domain(
535        &mut self,
536        domain_id: Uuid,
537        connection: SystemConnection,
538    ) -> Result<(), String> {
539        let domain = self
540            .get_domain_by_id_mut(domain_id)
541            .ok_or_else(|| format!("Domain with ID {} not found", domain_id))?;
542        domain.add_system_connection(connection);
543        self.updated_at = Utc::now();
544        Ok(())
545    }
546
547    /// Add a node connection to a domain
548    ///
549    /// # Arguments
550    ///
551    /// * `domain_id` - The UUID of the domain
552    /// * `connection` - The node connection to add
553    ///
554    /// # Returns
555    ///
556    /// `Ok(())` if the domain was found and the connection was added, `Err` otherwise.
557    pub fn add_node_connection_to_domain(
558        &mut self,
559        domain_id: Uuid,
560        connection: NodeConnection,
561    ) -> Result<(), String> {
562        let domain = self
563            .get_domain_by_id_mut(domain_id)
564            .ok_or_else(|| format!("Domain with ID {} not found", domain_id))?;
565        domain.add_node_connection(connection);
566        self.updated_at = Utc::now();
567        Ok(())
568    }
569}