data_modelling_sdk/models/
data_model.rs

1//! DataModel for the SDK
2
3use super::enums::InfrastructureType;
4use super::relationship::Relationship;
5use super::table::Table;
6use chrono::{DateTime, Utc};
7use serde::{Deserialize, Serialize};
8use uuid::Uuid;
9
10/// Data model representing a complete data model with tables and relationships
11///
12/// A `DataModel` is a container for a collection of tables and their relationships.
13/// It represents a workspace or domain within a larger data modeling system.
14///
15/// # Example
16///
17/// ```rust
18/// use data_modelling_sdk::models::DataModel;
19///
20/// let model = DataModel::new(
21///     "MyModel".to_string(),
22///     "/path/to/git".to_string(),
23///     "control.yaml".to_string(),
24/// );
25/// ```
26#[derive(Debug, Clone, Serialize, Deserialize)]
27pub struct DataModel {
28    /// Unique identifier for the model (UUIDv5 based on name and path)
29    pub id: Uuid,
30    /// Model name
31    pub name: String,
32    /// Optional description of the model
33    #[serde(skip_serializing_if = "Option::is_none")]
34    pub description: Option<String>,
35    /// Path to the Git repository directory
36    pub git_directory_path: String,
37    /// Tables in this model
38    #[serde(default)]
39    pub tables: Vec<Table>,
40    /// Relationships between tables
41    #[serde(default)]
42    pub relationships: Vec<Relationship>,
43    /// Path to the control file (relationships.yaml)
44    pub control_file_path: String,
45    /// Path to diagram file if applicable
46    #[serde(skip_serializing_if = "Option::is_none")]
47    pub diagram_file_path: Option<String>,
48    /// Whether this model is in a subfolder
49    #[serde(default)]
50    pub is_subfolder: bool,
51    /// Parent Git directory if this is a subfolder
52    #[serde(skip_serializing_if = "Option::is_none")]
53    pub parent_git_directory: Option<String>,
54    /// Creation timestamp
55    pub created_at: DateTime<Utc>,
56    /// Last update timestamp
57    pub updated_at: DateTime<Utc>,
58}
59
60impl DataModel {
61    /// Create a new data model with the given name and paths
62    ///
63    /// # Arguments
64    ///
65    /// * `name` - The model name
66    /// * `git_directory_path` - Path to the Git repository directory
67    /// * `control_file_path` - Path to the control file (typically "relationships.yaml")
68    ///
69    /// # Returns
70    ///
71    /// A new `DataModel` instance with a UUIDv5 ID (deterministic based on name and path)
72    /// and current timestamps.
73    ///
74    /// # Example
75    ///
76    /// ```rust
77    /// use data_modelling_sdk::models::DataModel;
78    ///
79    /// let model = DataModel::new(
80    ///     "MyModel".to_string(),
81    ///     "/workspace/models".to_string(),
82    ///     "relationships.yaml".to_string(),
83    /// );
84    /// ```
85    pub fn new(name: String, git_directory_path: String, control_file_path: String) -> Self {
86        let now = Utc::now();
87        // Use deterministic UUID v5 based on model name and git path
88        // This avoids requiring random number generation (getrandom/wasm_js)
89        let key = format!("{}:{}", git_directory_path, name);
90        let id = Uuid::new_v5(&Uuid::NAMESPACE_DNS, key.as_bytes());
91        Self {
92            id,
93            name,
94            description: None,
95            git_directory_path,
96            tables: Vec::new(),
97            relationships: Vec::new(),
98            control_file_path,
99            diagram_file_path: None,
100            is_subfolder: false,
101            parent_git_directory: None,
102            created_at: now,
103            updated_at: now,
104        }
105    }
106
107    /// Get a table by its ID
108    ///
109    /// # Arguments
110    ///
111    /// * `table_id` - The UUID of the table to find
112    ///
113    /// # Returns
114    ///
115    /// A reference to the table if found, `None` otherwise.
116    pub fn get_table_by_id(&self, table_id: Uuid) -> Option<&Table> {
117        self.tables.iter().find(|t| t.id == table_id)
118    }
119
120    /// Get a mutable reference to a table by its ID
121    ///
122    /// # Arguments
123    ///
124    /// * `table_id` - The UUID of the table to find
125    ///
126    /// # Returns
127    ///
128    /// A mutable reference to the table if found, `None` otherwise.
129    pub fn get_table_by_id_mut(&mut self, table_id: Uuid) -> Option<&mut Table> {
130        self.tables.iter_mut().find(|t| t.id == table_id)
131    }
132
133    /// Get a table by its name
134    ///
135    /// # Arguments
136    ///
137    /// * `name` - The name of the table to find
138    ///
139    /// # Returns
140    ///
141    /// A reference to the first table with the given name if found, `None` otherwise.
142    ///
143    /// # Note
144    ///
145    /// If multiple tables have the same name (different database_type/catalog/schema),
146    /// use `get_table_by_unique_key` instead.
147    pub fn get_table_by_name(&self, name: &str) -> Option<&Table> {
148        self.tables.iter().find(|t| t.name == name)
149    }
150
151    /// Get a table by its unique key (database_type, name, catalog, schema)
152    ///
153    /// # Arguments
154    ///
155    /// * `database_type` - Optional database type
156    /// * `name` - Table name
157    /// * `catalog_name` - Optional catalog name
158    /// * `schema_name` - Optional schema name
159    ///
160    /// # Returns
161    ///
162    /// A reference to the table if found, `None` otherwise.
163    ///
164    /// # Example
165    ///
166    /// ```rust
167    /// # use data_modelling_sdk::models::DataModel;
168    /// # let model = DataModel::new("test".to_string(), "/path".to_string(), "control.yaml".to_string());
169    /// // Find table in specific schema
170    /// let table = model.get_table_by_unique_key(
171    ///     Some("PostgreSQL"),
172    ///     "users",
173    ///     Some("mydb"),
174    ///     Some("public"),
175    /// );
176    /// ```
177    pub fn get_table_by_unique_key(
178        &self,
179        database_type: Option<&str>,
180        name: &str,
181        catalog_name: Option<&str>,
182        schema_name: Option<&str>,
183    ) -> Option<&Table> {
184        let target_key = (
185            database_type.map(|s| s.to_string()),
186            name.to_string(),
187            catalog_name.map(|s| s.to_string()),
188            schema_name.map(|s| s.to_string()),
189        );
190        self.tables
191            .iter()
192            .find(|t| t.get_unique_key() == target_key)
193    }
194
195    /// Get all relationships involving a specific table
196    ///
197    /// # Arguments
198    ///
199    /// * `table_id` - The UUID of the table
200    ///
201    /// # Returns
202    ///
203    /// A vector of references to relationships where the table is either the source or target.
204    ///
205    /// # Example
206    ///
207    /// ```rust
208    /// # use data_modelling_sdk::models::DataModel;
209    /// # let model = DataModel::new("test".to_string(), "/path".to_string(), "control.yaml".to_string());
210    /// # let table_id = uuid::Uuid::new_v4();
211    /// // Get all relationships for a table
212    /// let relationships = model.get_relationships_for_table(table_id);
213    /// ```
214    pub fn get_relationships_for_table(&self, table_id: Uuid) -> Vec<&Relationship> {
215        self.relationships
216            .iter()
217            .filter(|r| r.source_table_id == table_id || r.target_table_id == table_id)
218            .collect()
219    }
220
221    /// Filter Data Flow nodes (tables) by owner
222    ///
223    /// # Arguments
224    ///
225    /// * `owner` - The owner name to filter by (case-sensitive exact match)
226    ///
227    /// # Returns
228    ///
229    /// A vector of references to tables matching the owner.
230    ///
231    /// # Example
232    ///
233    /// ```rust
234    /// # use data_modelling_sdk::models::{DataModel, Table, Column};
235    /// # let mut model = DataModel::new("test".to_string(), "/path".to_string(), "control.yaml".to_string());
236    /// # let mut table = Table::new("test_table".to_string(), vec![Column::new("id".to_string(), "INT".to_string())]);
237    /// # table.owner = Some("Data Engineering Team".to_string());
238    /// # model.tables.push(table);
239    /// let owned_nodes = model.filter_nodes_by_owner("Data Engineering Team");
240    /// ```
241    pub fn filter_nodes_by_owner(&self, owner: &str) -> Vec<&Table> {
242        self.tables
243            .iter()
244            .filter(|t| t.owner.as_deref() == Some(owner))
245            .collect()
246    }
247
248    /// Filter Data Flow relationships by owner
249    ///
250    /// # Arguments
251    ///
252    /// * `owner` - The owner name to filter by (case-sensitive exact match)
253    ///
254    /// # Returns
255    ///
256    /// A vector of references to relationships matching the owner.
257    ///
258    /// # Example
259    ///
260    /// ```rust
261    /// # use data_modelling_sdk::models::{DataModel, Relationship};
262    /// # use uuid::Uuid;
263    /// # let mut model = DataModel::new("test".to_string(), "/path".to_string(), "control.yaml".to_string());
264    /// # let mut rel = Relationship::new(Uuid::new_v4(), Uuid::new_v4());
265    /// # rel.owner = Some("Data Engineering Team".to_string());
266    /// # model.relationships.push(rel);
267    /// let owned_relationships = model.filter_relationships_by_owner("Data Engineering Team");
268    /// ```
269    pub fn filter_relationships_by_owner(&self, owner: &str) -> Vec<&Relationship> {
270        self.relationships
271            .iter()
272            .filter(|r| r.owner.as_deref() == Some(owner))
273            .collect()
274    }
275
276    /// Filter Data Flow nodes (tables) by infrastructure type
277    ///
278    /// # Arguments
279    ///
280    /// * `infra_type` - The infrastructure type to filter by
281    ///
282    /// # Returns
283    ///
284    /// A vector of references to tables matching the infrastructure type.
285    ///
286    /// # Example
287    ///
288    /// ```rust
289    /// # use data_modelling_sdk::models::{DataModel, Table, Column, InfrastructureType};
290    /// # let mut model = DataModel::new("test".to_string(), "/path".to_string(), "control.yaml".to_string());
291    /// # let mut table = Table::new("test_table".to_string(), vec![Column::new("id".to_string(), "INT".to_string())]);
292    /// # table.infrastructure_type = Some(InfrastructureType::Kafka);
293    /// # model.tables.push(table);
294    /// let kafka_nodes = model.filter_nodes_by_infrastructure_type(InfrastructureType::Kafka);
295    /// ```
296    pub fn filter_nodes_by_infrastructure_type(
297        &self,
298        infra_type: InfrastructureType,
299    ) -> Vec<&Table> {
300        self.tables
301            .iter()
302            .filter(|t| t.infrastructure_type == Some(infra_type))
303            .collect()
304    }
305
306    /// Filter Data Flow relationships by infrastructure type
307    ///
308    /// # Arguments
309    ///
310    /// * `infra_type` - The infrastructure type to filter by
311    ///
312    /// # Returns
313    ///
314    /// A vector of references to relationships matching the infrastructure type.
315    ///
316    /// # Example
317    ///
318    /// ```rust
319    /// # use data_modelling_sdk::models::{DataModel, Relationship, InfrastructureType};
320    /// # use uuid::Uuid;
321    /// # let mut model = DataModel::new("test".to_string(), "/path".to_string(), "control.yaml".to_string());
322    /// # let mut rel = Relationship::new(Uuid::new_v4(), Uuid::new_v4());
323    /// # rel.infrastructure_type = Some(InfrastructureType::Kafka);
324    /// # model.relationships.push(rel);
325    /// let kafka_relationships = model.filter_relationships_by_infrastructure_type(InfrastructureType::Kafka);
326    /// ```
327    pub fn filter_relationships_by_infrastructure_type(
328        &self,
329        infra_type: InfrastructureType,
330    ) -> Vec<&Relationship> {
331        self.relationships
332            .iter()
333            .filter(|r| r.infrastructure_type == Some(infra_type))
334            .collect()
335    }
336
337    /// Filter Data Flow nodes and relationships by tag
338    ///
339    /// # Arguments
340    ///
341    /// * `tag` - The tag to filter by
342    ///
343    /// # Returns
344    ///
345    /// A tuple containing vectors of references to tables and relationships containing the tag.
346    ///
347    /// # Example
348    ///
349    /// ```rust
350    /// # use data_modelling_sdk::models::{DataModel, Table, Column};
351    /// # let mut model = DataModel::new("test".to_string(), "/path".to_string(), "control.yaml".to_string());
352    /// # let mut table = Table::new("test_table".to_string(), vec![Column::new("id".to_string(), "INT".to_string())]);
353    /// # table.tags.push("production".to_string());
354    /// # model.tables.push(table);
355    /// let (tagged_nodes, tagged_relationships) = model.filter_by_tags("production");
356    /// ```
357    pub fn filter_by_tags(&self, tag: &str) -> (Vec<&Table>, Vec<&Relationship>) {
358        let tagged_tables: Vec<&Table> = self
359            .tables
360            .iter()
361            .filter(|t| t.tags.contains(&tag.to_string()))
362            .collect();
363        let tagged_relationships: Vec<&Relationship> = self
364            .relationships
365            .iter()
366            .filter(|_r| {
367                // Relationships don't have tags field, so we return empty for now
368                // This maintains the API contract but relationships don't support tags yet
369                false
370            })
371            .collect();
372        (tagged_tables, tagged_relationships)
373    }
374}