data_modelling_sdk/models/data_model.rs
1//! DataModel for the SDK
2
3use super::enums::InfrastructureType;
4use super::relationship::Relationship;
5use super::table::Table;
6use chrono::{DateTime, Utc};
7use serde::{Deserialize, Serialize};
8use uuid::Uuid;
9
10/// Data model representing a complete data model with tables and relationships
11///
12/// A `DataModel` is a container for a collection of tables and their relationships.
13/// It represents a workspace or domain within a larger data modeling system.
14///
15/// # Example
16///
17/// ```rust
18/// use data_modelling_sdk::models::DataModel;
19///
20/// let model = DataModel::new(
21/// "MyModel".to_string(),
22/// "/path/to/git".to_string(),
23/// "control.yaml".to_string(),
24/// );
25/// ```
26#[derive(Debug, Clone, Serialize, Deserialize)]
27pub struct DataModel {
28 /// Unique identifier for the model (UUIDv5 based on name and path)
29 pub id: Uuid,
30 /// Model name
31 pub name: String,
32 /// Optional description of the model
33 #[serde(skip_serializing_if = "Option::is_none")]
34 pub description: Option<String>,
35 /// Path to the Git repository directory
36 pub git_directory_path: String,
37 /// Tables in this model
38 #[serde(default)]
39 pub tables: Vec<Table>,
40 /// Relationships between tables
41 #[serde(default)]
42 pub relationships: Vec<Relationship>,
43 /// Path to the control file (relationships.yaml)
44 pub control_file_path: String,
45 /// Path to diagram file if applicable
46 #[serde(skip_serializing_if = "Option::is_none")]
47 pub diagram_file_path: Option<String>,
48 /// Whether this model is in a subfolder
49 #[serde(default)]
50 pub is_subfolder: bool,
51 /// Parent Git directory if this is a subfolder
52 #[serde(skip_serializing_if = "Option::is_none")]
53 pub parent_git_directory: Option<String>,
54 /// Creation timestamp
55 pub created_at: DateTime<Utc>,
56 /// Last update timestamp
57 pub updated_at: DateTime<Utc>,
58}
59
60impl DataModel {
61 /// Create a new data model with the given name and paths
62 ///
63 /// # Arguments
64 ///
65 /// * `name` - The model name
66 /// * `git_directory_path` - Path to the Git repository directory
67 /// * `control_file_path` - Path to the control file (typically "relationships.yaml")
68 ///
69 /// # Returns
70 ///
71 /// A new `DataModel` instance with a UUIDv5 ID (deterministic based on name and path)
72 /// and current timestamps.
73 ///
74 /// # Example
75 ///
76 /// ```rust
77 /// use data_modelling_sdk::models::DataModel;
78 ///
79 /// let model = DataModel::new(
80 /// "MyModel".to_string(),
81 /// "/workspace/models".to_string(),
82 /// "relationships.yaml".to_string(),
83 /// );
84 /// ```
85 pub fn new(name: String, git_directory_path: String, control_file_path: String) -> Self {
86 let now = Utc::now();
87 // Use deterministic UUID v5 based on model name and git path
88 // This avoids requiring random number generation (getrandom/wasm_js)
89 let key = format!("{}:{}", git_directory_path, name);
90 let id = Uuid::new_v5(&Uuid::NAMESPACE_DNS, key.as_bytes());
91 Self {
92 id,
93 name,
94 description: None,
95 git_directory_path,
96 tables: Vec::new(),
97 relationships: Vec::new(),
98 control_file_path,
99 diagram_file_path: None,
100 is_subfolder: false,
101 parent_git_directory: None,
102 created_at: now,
103 updated_at: now,
104 }
105 }
106
107 /// Get a table by its ID
108 ///
109 /// # Arguments
110 ///
111 /// * `table_id` - The UUID of the table to find
112 ///
113 /// # Returns
114 ///
115 /// A reference to the table if found, `None` otherwise.
116 pub fn get_table_by_id(&self, table_id: Uuid) -> Option<&Table> {
117 self.tables.iter().find(|t| t.id == table_id)
118 }
119
120 /// Get a mutable reference to a table by its ID
121 ///
122 /// # Arguments
123 ///
124 /// * `table_id` - The UUID of the table to find
125 ///
126 /// # Returns
127 ///
128 /// A mutable reference to the table if found, `None` otherwise.
129 pub fn get_table_by_id_mut(&mut self, table_id: Uuid) -> Option<&mut Table> {
130 self.tables.iter_mut().find(|t| t.id == table_id)
131 }
132
133 /// Get a table by its name
134 ///
135 /// # Arguments
136 ///
137 /// * `name` - The name of the table to find
138 ///
139 /// # Returns
140 ///
141 /// A reference to the first table with the given name if found, `None` otherwise.
142 ///
143 /// # Note
144 ///
145 /// If multiple tables have the same name (different database_type/catalog/schema),
146 /// use `get_table_by_unique_key` instead.
147 pub fn get_table_by_name(&self, name: &str) -> Option<&Table> {
148 self.tables.iter().find(|t| t.name == name)
149 }
150
151 /// Get a table by its unique key (database_type, name, catalog, schema)
152 ///
153 /// # Arguments
154 ///
155 /// * `database_type` - Optional database type
156 /// * `name` - Table name
157 /// * `catalog_name` - Optional catalog name
158 /// * `schema_name` - Optional schema name
159 ///
160 /// # Returns
161 ///
162 /// A reference to the table if found, `None` otherwise.
163 ///
164 /// # Example
165 ///
166 /// ```rust
167 /// # use data_modelling_sdk::models::DataModel;
168 /// # let model = DataModel::new("test".to_string(), "/path".to_string(), "control.yaml".to_string());
169 /// // Find table in specific schema
170 /// let table = model.get_table_by_unique_key(
171 /// Some("PostgreSQL"),
172 /// "users",
173 /// Some("mydb"),
174 /// Some("public"),
175 /// );
176 /// ```
177 pub fn get_table_by_unique_key(
178 &self,
179 database_type: Option<&str>,
180 name: &str,
181 catalog_name: Option<&str>,
182 schema_name: Option<&str>,
183 ) -> Option<&Table> {
184 let target_key = (
185 database_type.map(|s| s.to_string()),
186 name.to_string(),
187 catalog_name.map(|s| s.to_string()),
188 schema_name.map(|s| s.to_string()),
189 );
190 self.tables
191 .iter()
192 .find(|t| t.get_unique_key() == target_key)
193 }
194
195 /// Get all relationships involving a specific table
196 ///
197 /// # Arguments
198 ///
199 /// * `table_id` - The UUID of the table
200 ///
201 /// # Returns
202 ///
203 /// A vector of references to relationships where the table is either the source or target.
204 ///
205 /// # Example
206 ///
207 /// ```rust
208 /// # use data_modelling_sdk::models::DataModel;
209 /// # let model = DataModel::new("test".to_string(), "/path".to_string(), "control.yaml".to_string());
210 /// # let table_id = uuid::Uuid::new_v4();
211 /// // Get all relationships for a table
212 /// let relationships = model.get_relationships_for_table(table_id);
213 /// ```
214 pub fn get_relationships_for_table(&self, table_id: Uuid) -> Vec<&Relationship> {
215 self.relationships
216 .iter()
217 .filter(|r| r.source_table_id == table_id || r.target_table_id == table_id)
218 .collect()
219 }
220
221 /// Filter Data Flow nodes (tables) by owner
222 ///
223 /// # Arguments
224 ///
225 /// * `owner` - The owner name to filter by (case-sensitive exact match)
226 ///
227 /// # Returns
228 ///
229 /// A vector of references to tables matching the owner.
230 ///
231 /// # Example
232 ///
233 /// ```rust
234 /// # use data_modelling_sdk::models::{DataModel, Table, Column};
235 /// # let mut model = DataModel::new("test".to_string(), "/path".to_string(), "control.yaml".to_string());
236 /// # let mut table = Table::new("test_table".to_string(), vec![Column::new("id".to_string(), "INT".to_string())]);
237 /// # table.owner = Some("Data Engineering Team".to_string());
238 /// # model.tables.push(table);
239 /// let owned_nodes = model.filter_nodes_by_owner("Data Engineering Team");
240 /// ```
241 pub fn filter_nodes_by_owner(&self, owner: &str) -> Vec<&Table> {
242 self.tables
243 .iter()
244 .filter(|t| t.owner.as_deref() == Some(owner))
245 .collect()
246 }
247
248 /// Filter Data Flow relationships by owner
249 ///
250 /// # Arguments
251 ///
252 /// * `owner` - The owner name to filter by (case-sensitive exact match)
253 ///
254 /// # Returns
255 ///
256 /// A vector of references to relationships matching the owner.
257 ///
258 /// # Example
259 ///
260 /// ```rust
261 /// # use data_modelling_sdk::models::{DataModel, Relationship};
262 /// # use uuid::Uuid;
263 /// # let mut model = DataModel::new("test".to_string(), "/path".to_string(), "control.yaml".to_string());
264 /// # let mut rel = Relationship::new(Uuid::new_v4(), Uuid::new_v4());
265 /// # rel.owner = Some("Data Engineering Team".to_string());
266 /// # model.relationships.push(rel);
267 /// let owned_relationships = model.filter_relationships_by_owner("Data Engineering Team");
268 /// ```
269 pub fn filter_relationships_by_owner(&self, owner: &str) -> Vec<&Relationship> {
270 self.relationships
271 .iter()
272 .filter(|r| r.owner.as_deref() == Some(owner))
273 .collect()
274 }
275
276 /// Filter Data Flow nodes (tables) by infrastructure type
277 ///
278 /// # Arguments
279 ///
280 /// * `infra_type` - The infrastructure type to filter by
281 ///
282 /// # Returns
283 ///
284 /// A vector of references to tables matching the infrastructure type.
285 ///
286 /// # Example
287 ///
288 /// ```rust
289 /// # use data_modelling_sdk::models::{DataModel, Table, Column, InfrastructureType};
290 /// # let mut model = DataModel::new("test".to_string(), "/path".to_string(), "control.yaml".to_string());
291 /// # let mut table = Table::new("test_table".to_string(), vec![Column::new("id".to_string(), "INT".to_string())]);
292 /// # table.infrastructure_type = Some(InfrastructureType::Kafka);
293 /// # model.tables.push(table);
294 /// let kafka_nodes = model.filter_nodes_by_infrastructure_type(InfrastructureType::Kafka);
295 /// ```
296 pub fn filter_nodes_by_infrastructure_type(
297 &self,
298 infra_type: InfrastructureType,
299 ) -> Vec<&Table> {
300 self.tables
301 .iter()
302 .filter(|t| t.infrastructure_type == Some(infra_type))
303 .collect()
304 }
305
306 /// Filter Data Flow relationships by infrastructure type
307 ///
308 /// # Arguments
309 ///
310 /// * `infra_type` - The infrastructure type to filter by
311 ///
312 /// # Returns
313 ///
314 /// A vector of references to relationships matching the infrastructure type.
315 ///
316 /// # Example
317 ///
318 /// ```rust
319 /// # use data_modelling_sdk::models::{DataModel, Relationship, InfrastructureType};
320 /// # use uuid::Uuid;
321 /// # let mut model = DataModel::new("test".to_string(), "/path".to_string(), "control.yaml".to_string());
322 /// # let mut rel = Relationship::new(Uuid::new_v4(), Uuid::new_v4());
323 /// # rel.infrastructure_type = Some(InfrastructureType::Kafka);
324 /// # model.relationships.push(rel);
325 /// let kafka_relationships = model.filter_relationships_by_infrastructure_type(InfrastructureType::Kafka);
326 /// ```
327 pub fn filter_relationships_by_infrastructure_type(
328 &self,
329 infra_type: InfrastructureType,
330 ) -> Vec<&Relationship> {
331 self.relationships
332 .iter()
333 .filter(|r| r.infrastructure_type == Some(infra_type))
334 .collect()
335 }
336
337 /// Filter Data Flow nodes and relationships by tag
338 ///
339 /// # Arguments
340 ///
341 /// * `tag` - The tag to filter by
342 ///
343 /// # Returns
344 ///
345 /// A tuple containing vectors of references to tables and relationships containing the tag.
346 ///
347 /// # Example
348 ///
349 /// ```rust
350 /// # use data_modelling_sdk::models::{DataModel, Table, Column};
351 /// # let mut model = DataModel::new("test".to_string(), "/path".to_string(), "control.yaml".to_string());
352 /// # let mut table = Table::new("test_table".to_string(), vec![Column::new("id".to_string(), "INT".to_string())]);
353 /// # table.tags.push("production".to_string());
354 /// # model.tables.push(table);
355 /// let (tagged_nodes, tagged_relationships) = model.filter_by_tags("production");
356 /// ```
357 pub fn filter_by_tags(&self, tag: &str) -> (Vec<&Table>, Vec<&Relationship>) {
358 let tagged_tables: Vec<&Table> = self
359 .tables
360 .iter()
361 .filter(|t| t.tags.contains(&tag.to_string()))
362 .collect();
363 let tagged_relationships: Vec<&Relationship> = self
364 .relationships
365 .iter()
366 .filter(|_r| {
367 // Relationships don't have tags field, so we return empty for now
368 // This maintains the API contract but relationships don't support tags yet
369 false
370 })
371 .collect();
372 (tagged_tables, tagged_relationships)
373 }
374}