data_modelling_sdk/model/
loader.rs

1//! Model loading functionality
2//!
3//! Loads models from storage backends, handling YAML parsing and validation.
4//!
5//! Supports both file-based loading (FileSystemStorageBackend, BrowserStorageBackend)
6//! and API-based loading (ApiStorageBackend).
7
8use crate::storage::{StorageBackend, StorageError};
9use anyhow::Result;
10use serde_yaml;
11use std::collections::HashMap;
12use tracing::{info, warn};
13use uuid::Uuid;
14// Serialize/Deserialize not currently used
15
16/// Model loader that uses a storage backend
17pub struct ModelLoader<B: StorageBackend> {
18    storage: B,
19}
20
21impl<B: StorageBackend> ModelLoader<B> {
22    /// Create a new model loader with the given storage backend
23    pub fn new(storage: B) -> Self {
24        Self { storage }
25    }
26
27    /// Load a model from storage
28    ///
29    /// For file-based backends (FileSystemStorageBackend, BrowserStorageBackend):
30    /// - Loads from `tables/` subdirectory with YAML files
31    /// - Loads from `relationships.yaml` file
32    ///
33    /// For API backend (ApiStorageBackend), use `load_model_from_api()` instead.
34    ///
35    /// Returns the loaded model data and a list of orphaned relationships
36    /// (relationships that reference non-existent tables).
37    pub async fn load_model(&self, workspace_path: &str) -> Result<ModelLoadResult, StorageError> {
38        // File-based loading implementation
39        self.load_model_from_files(workspace_path).await
40    }
41
42    /// Load model from file-based storage
43    async fn load_model_from_files(
44        &self,
45        workspace_path: &str,
46    ) -> Result<ModelLoadResult, StorageError> {
47        let tables_dir = format!("{}/tables", workspace_path);
48
49        // Ensure tables directory exists
50        if !self.storage.dir_exists(&tables_dir).await? {
51            self.storage.create_dir(&tables_dir).await?;
52        }
53
54        // Load tables from individual YAML files
55        let mut tables = Vec::new();
56        let mut table_ids: HashMap<Uuid, String> = HashMap::new();
57
58        let files = self.storage.list_files(&tables_dir).await?;
59        for file_name in files {
60            if file_name.ends_with(".yaml") || file_name.ends_with(".yml") {
61                let file_path = format!("{}/{}", tables_dir, file_name);
62                match self.load_table_from_yaml(&file_path, workspace_path).await {
63                    Ok(table_data) => {
64                        table_ids.insert(table_data.id, table_data.name.clone());
65                        tables.push(table_data);
66                    }
67                    Err(e) => {
68                        warn!("Failed to load table from {}: {}", file_path, e);
69                    }
70                }
71            }
72        }
73
74        info!(
75            "Loaded {} tables from workspace {}",
76            tables.len(),
77            workspace_path
78        );
79
80        // Load relationships from control file
81        let relationships_file = format!("{}/relationships.yaml", workspace_path);
82        let mut relationships = Vec::new();
83        let mut orphaned_relationships = Vec::new();
84
85        if self.storage.file_exists(&relationships_file).await? {
86            match self.load_relationships_from_yaml(&relationships_file).await {
87                Ok(loaded_rels) => {
88                    // Separate valid and orphaned relationships
89                    for rel in loaded_rels {
90                        let source_exists = table_ids.contains_key(&rel.source_table_id);
91                        let target_exists = table_ids.contains_key(&rel.target_table_id);
92
93                        if source_exists && target_exists {
94                            relationships.push(rel.clone());
95                        } else {
96                            orphaned_relationships.push(rel.clone());
97                            warn!(
98                                "Orphaned relationship {}: source={} (exists: {}), target={} (exists: {})",
99                                rel.id,
100                                rel.source_table_id,
101                                source_exists,
102                                rel.target_table_id,
103                                target_exists
104                            );
105                        }
106                    }
107                }
108                Err(e) => {
109                    warn!(
110                        "Failed to load relationships from {}: {}",
111                        relationships_file, e
112                    );
113                }
114            }
115        }
116
117        info!(
118            "Loaded {} relationships ({} orphaned) from workspace {}",
119            relationships.len(),
120            orphaned_relationships.len(),
121            workspace_path
122        );
123
124        Ok(ModelLoadResult {
125            tables,
126            relationships,
127            orphaned_relationships,
128        })
129    }
130
131    /// Load a table from a YAML file
132    ///
133    /// Uses ODCSImporter to fully parse the table structure, including all columns,
134    /// metadata, and nested properties. This ensures complete table data is loaded.
135    async fn load_table_from_yaml(
136        &self,
137        yaml_path: &str,
138        workspace_path: &str,
139    ) -> Result<TableData, StorageError> {
140        let content = self.storage.read_file(yaml_path).await?;
141        let yaml_content = String::from_utf8(content)
142            .map_err(|e| StorageError::SerializationError(format!("Invalid UTF-8: {}", e)))?;
143
144        // Use ODCSImporter to fully parse the table structure
145        let mut importer = crate::import::odcs::ODCSImporter::new();
146        let (table, parse_errors) = importer.parse_table(&yaml_content).map_err(|e| {
147            StorageError::SerializationError(format!("Failed to parse ODCS YAML: {}", e))
148        })?;
149
150        // Log any parse warnings/errors but don't fail if table was successfully parsed
151        if !parse_errors.is_empty() {
152            warn!(
153                "Table '{}' parsed with {} warnings/errors",
154                table.name,
155                parse_errors.len()
156            );
157        }
158
159        // Calculate relative path
160        let relative_path = yaml_path
161            .strip_prefix(workspace_path)
162            .map(|s| s.strip_prefix('/').unwrap_or(s).to_string())
163            .unwrap_or_else(|| yaml_path.to_string());
164
165        Ok(TableData {
166            id: table.id,
167            name: table.name,
168            yaml_file_path: Some(relative_path),
169            yaml_content,
170        })
171    }
172
173    /// Load relationships from YAML file
174    async fn load_relationships_from_yaml(
175        &self,
176        yaml_path: &str,
177    ) -> Result<Vec<RelationshipData>, StorageError> {
178        let content = self.storage.read_file(yaml_path).await?;
179        let yaml_content = String::from_utf8(content)
180            .map_err(|e| StorageError::SerializationError(format!("Invalid UTF-8: {}", e)))?;
181
182        let data: serde_yaml::Value = serde_yaml::from_str(&yaml_content).map_err(|e| {
183            StorageError::SerializationError(format!("Failed to parse YAML: {}", e))
184        })?;
185
186        let mut relationships = Vec::new();
187
188        // Handle both formats: direct array or object with "relationships" key
189        let rels_array = data
190            .get("relationships")
191            .and_then(|v| v.as_sequence())
192            .or_else(|| data.as_sequence());
193
194        if let Some(rels_array) = rels_array {
195            for rel_data in rels_array {
196                match self.parse_relationship(rel_data) {
197                    Ok(rel) => relationships.push(rel),
198                    Err(e) => {
199                        warn!("Failed to parse relationship: {}", e);
200                    }
201                }
202            }
203        }
204
205        Ok(relationships)
206    }
207
208    /// Parse a relationship from YAML value
209    fn parse_relationship(
210        &self,
211        data: &serde_yaml::Value,
212    ) -> Result<RelationshipData, StorageError> {
213        let source_table_id = data
214            .get("source_table_id")
215            .and_then(|v| v.as_str())
216            .and_then(|s| Uuid::parse_str(s).ok())
217            .ok_or_else(|| {
218                StorageError::SerializationError("Missing source_table_id".to_string())
219            })?;
220
221        let target_table_id = data
222            .get("target_table_id")
223            .and_then(|v| v.as_str())
224            .and_then(|s| Uuid::parse_str(s).ok())
225            .ok_or_else(|| {
226                StorageError::SerializationError("Missing target_table_id".to_string())
227            })?;
228
229        // Parse existing UUID or generate deterministic one based on source and target table IDs
230        let id = data
231            .get("id")
232            .and_then(|v| v.as_str())
233            .and_then(|s| Uuid::parse_str(s).ok())
234            .unwrap_or_else(|| {
235                crate::models::relationship::Relationship::generate_id(
236                    source_table_id,
237                    target_table_id,
238                )
239            });
240
241        Ok(RelationshipData {
242            id,
243            source_table_id,
244            target_table_id,
245        })
246    }
247}
248
249/// Result of loading a model
250#[derive(Debug)]
251pub struct ModelLoadResult {
252    pub tables: Vec<TableData>,
253    pub relationships: Vec<RelationshipData>,
254    pub orphaned_relationships: Vec<RelationshipData>,
255}
256
257/// Table data loaded from storage
258#[derive(Debug, Clone)]
259pub struct TableData {
260    pub id: Uuid,
261    pub name: String,
262    pub yaml_file_path: Option<String>,
263    pub yaml_content: String,
264}
265
266/// Relationship data loaded from storage
267#[derive(Debug, Clone)]
268pub struct RelationshipData {
269    pub id: Uuid,
270    pub source_table_id: Uuid,
271    pub target_table_id: Uuid,
272}