Skip to main content

mollendorff_forge/parser/
multi_doc.rs

1//! Multi-document YAML parsing for Forge models (v4.4.2)
2//!
3//! Handles parsing of YAML files with multiple documents (--- separators).
4
5use crate::error::{ForgeError, ForgeResult};
6use crate::types::ParsedModel;
7use serde_yaml_ng::Value;
8use std::collections::HashSet;
9use std::path::Path;
10
11use super::includes::resolve_includes;
12use super::model::parse_v1_model;
13
14/// Detect if content is a multi-document YAML file
15/// A multi-document file has at least two document separators (---) on their own lines
16#[must_use]
17pub fn detect_multi_document(content: &str) -> bool {
18    let mut separator_count = 0;
19    for line in content.lines() {
20        let trimmed = line.trim();
21        // Document separator is "---" optionally followed by whitespace
22        if trimmed == "---" || trimmed.starts_with("--- ") {
23            separator_count += 1;
24            if separator_count >= 2 {
25                return true;
26            }
27        }
28    }
29    false
30}
31
32/// Parse a single YAML document
33///
34/// # Errors
35///
36/// Returns an error if the YAML content is invalid or fails schema validation.
37pub fn parse_single_document_yaml(content: &str, path: &Path) -> ForgeResult<ParsedModel> {
38    // Strip leading document marker if present
39    let content = content.trim_start();
40    let content = content.strip_prefix("---").map_or(content, str::trim_start);
41
42    let yaml: Value = serde_yaml_ng::from_str(content)?;
43
44    let mut model = parse_v1_model(&yaml)?;
45
46    // Resolve includes if any (v4.0)
47    if !model.includes.is_empty() {
48        resolve_includes(&mut model, path, &mut HashSet::new())?;
49    }
50
51    Ok(model)
52}
53
54/// Parse a multi-document YAML file (v4.4.2)
55/// Each document is parsed and merged into a single model.
56/// Document names come from _name field or are auto-generated as "doc1", "doc2", etc.
57///
58/// # Errors
59///
60/// Returns an error if any document in the file contains invalid YAML or fails
61/// schema validation.
62pub fn parse_multi_document_yaml(content: &str, path: &Path) -> ForgeResult<ParsedModel> {
63    let mut merged_model = ParsedModel::new();
64    let mut doc_index = 0;
65
66    // Split by document separator lines (--- on its own line)
67    let docs = split_yaml_documents(content);
68
69    for doc_content in docs {
70        let doc_content = doc_content.trim();
71        if doc_content.is_empty() {
72            continue;
73        }
74
75        // Skip if it's just comments
76        let non_comment_content: String = doc_content
77            .lines()
78            .filter(|line| !line.trim().starts_with('#') && !line.trim().is_empty())
79            .collect::<Vec<_>>()
80            .join("\n");
81        if non_comment_content.is_empty() {
82            continue;
83        }
84
85        doc_index += 1;
86
87        // Parse the document
88        let yaml: Value = match serde_yaml_ng::from_str(doc_content) {
89            Ok(v) => v,
90            Err(e) => {
91                return Err(ForgeError::Parse(format!(
92                    "Failed to parse document {doc_index}: {e}"
93                )));
94            },
95        };
96
97        let doc_model = parse_v1_model(&yaml)?;
98
99        // Get document name from _name field or generate one
100        let doc_name = if let Some(Value::String(name)) = yaml.get("_name") {
101            name.clone()
102        } else {
103            format!("doc{doc_index}")
104        };
105
106        // Merge tables with document prefix
107        for (table_name, table) in doc_model.tables {
108            let prefixed_name = format!("{doc_name}.{table_name}");
109            let mut prefixed_table = table;
110            prefixed_table.name.clone_from(&prefixed_name);
111            merged_model.tables.insert(prefixed_name, prefixed_table);
112        }
113
114        // Merge scalars with document prefix
115        for (scalar_name, mut scalar) in doc_model.scalars {
116            let prefixed_name = format!("{doc_name}.{scalar_name}");
117            scalar.path.clone_from(&prefixed_name);
118            merged_model.scalars.insert(prefixed_name, scalar);
119        }
120
121        // Merge includes (keep original, they'll be resolved with proper paths)
122        for include in doc_model.includes {
123            merged_model.includes.push(include);
124        }
125
126        // Merge scenarios
127        for (scenario_name, scenario) in doc_model.scenarios {
128            let prefixed_name = format!("{doc_name}.{scenario_name}");
129            merged_model.scenarios.insert(prefixed_name, scenario);
130        }
131
132        // Store document metadata
133        merged_model.documents.push(doc_name);
134    }
135
136    // Resolve includes if any (v4.0)
137    if !merged_model.includes.is_empty() {
138        resolve_includes(&mut merged_model, path, &mut HashSet::new())?;
139    }
140
141    Ok(merged_model)
142}
143
144/// Split YAML content into separate documents by "---" separator lines
145#[must_use]
146pub fn split_yaml_documents(content: &str) -> Vec<String> {
147    let mut documents = Vec::new();
148    let mut current_doc = String::new();
149    let mut in_document = false;
150
151    for line in content.lines() {
152        let trimmed = line.trim();
153        // Check if this is a document separator
154        if trimmed == "---" || trimmed.starts_with("--- ") {
155            if in_document && !current_doc.trim().is_empty() {
156                documents.push(std::mem::take(&mut current_doc));
157            }
158            in_document = true;
159            current_doc.clear();
160        } else {
161            // Add line to current document
162            if !current_doc.is_empty() {
163                current_doc.push('\n');
164            }
165            current_doc.push_str(line);
166        }
167    }
168
169    // Don't forget the last document
170    if !current_doc.trim().is_empty() {
171        documents.push(current_doc);
172    }
173
174    documents
175}
176
177#[cfg(test)]
178mod tests {
179    use super::*;
180    use std::io::Write;
181    use tempfile::NamedTempFile;
182
183    #[test]
184    fn test_detect_multi_document_true() {
185        let content = "---\nfirst: 1\n---\nsecond: 2\n";
186        assert!(detect_multi_document(content));
187    }
188
189    #[test]
190    fn test_detect_multi_document_false_single_separator() {
191        let content = "---\nfirst: 1\n";
192        assert!(!detect_multi_document(content));
193    }
194
195    #[test]
196    fn test_detect_multi_document_false_no_separator() {
197        let content = "first: 1\nsecond: 2\n";
198        assert!(!detect_multi_document(content));
199    }
200
201    #[test]
202    fn test_detect_multi_document_with_trailing_content() {
203        let content = "--- first doc\nfirst: 1\n--- second\nsecond: 2\n";
204        assert!(detect_multi_document(content));
205    }
206
207    #[test]
208    fn test_split_yaml_documents() {
209        let content = "---\nfirst: 1\n---\nsecond: 2\n";
210        let docs = split_yaml_documents(content);
211        assert_eq!(docs.len(), 2);
212        assert!(docs[0].contains("first: 1"));
213        assert!(docs[1].contains("second: 2"));
214    }
215
216    #[test]
217    fn test_split_yaml_documents_empty() {
218        let content = "";
219        let docs = split_yaml_documents(content);
220        assert!(docs.is_empty());
221    }
222
223    #[test]
224    fn test_split_yaml_documents_single() {
225        let content = "---\nfirst: 1\n";
226        let docs = split_yaml_documents(content);
227        assert_eq!(docs.len(), 1);
228    }
229
230    #[test]
231    fn test_parse_multi_doc_with_names() {
232        let yaml_content = r#"---
233_forge_version: "5.0.0"
234_name: "revenue"
235data:
236  values: [100, 200, 300]
237---
238_forge_version: "5.0.0"
239_name: "costs"
240expenses:
241  amounts: [50, 100, 150]
242"#;
243
244        let mut temp_file = NamedTempFile::new().unwrap();
245        temp_file.write_all(yaml_content.as_bytes()).unwrap();
246
247        let content = std::fs::read_to_string(temp_file.path()).unwrap();
248        let result = parse_multi_document_yaml(&content, temp_file.path()).unwrap();
249
250        assert!(result.tables.contains_key("revenue.data"));
251        assert!(result.tables.contains_key("costs.expenses"));
252        assert_eq!(result.documents.len(), 2);
253        assert!(result.documents.contains(&"revenue".to_string()));
254        assert!(result.documents.contains(&"costs".to_string()));
255    }
256
257    #[test]
258    fn test_parse_multi_doc_auto_names() {
259        let yaml_content = r#"---
260_forge_version: "5.0.0"
261data1:
262  values: [1, 2, 3]
263---
264_forge_version: "5.0.0"
265data2:
266  values: [4, 5, 6]
267"#;
268
269        let mut temp_file = NamedTempFile::new().unwrap();
270        temp_file.write_all(yaml_content.as_bytes()).unwrap();
271
272        let content = std::fs::read_to_string(temp_file.path()).unwrap();
273        let result = parse_multi_document_yaml(&content, temp_file.path()).unwrap();
274
275        assert!(result.tables.contains_key("doc1.data1"));
276        assert!(result.tables.contains_key("doc2.data2"));
277    }
278
279    #[test]
280    fn test_parse_multi_doc_with_scalars() {
281        let yaml_content = r#"---
282_forge_version: "5.0.0"
283_name: "config"
284rate:
285  value: 0.05
286  formula: null
287---
288_forge_version: "5.0.0"
289_name: "data"
290values:
291  items: [1, 2, 3]
292"#;
293
294        let mut temp_file = NamedTempFile::new().unwrap();
295        temp_file.write_all(yaml_content.as_bytes()).unwrap();
296
297        let content = std::fs::read_to_string(temp_file.path()).unwrap();
298        let result = parse_multi_document_yaml(&content, temp_file.path()).unwrap();
299
300        assert!(result.scalars.contains_key("config.rate"));
301        assert!(result.tables.contains_key("data.values"));
302    }
303
304    #[test]
305    fn test_parse_multi_doc_skip_comments() {
306        let yaml_content = r#"---
307# This is a comment-only document
308# No actual content
309---
310_forge_version: "5.0.0"
311data:
312  values: [1, 2, 3]
313"#;
314
315        let mut temp_file = NamedTempFile::new().unwrap();
316        temp_file.write_all(yaml_content.as_bytes()).unwrap();
317
318        let content = std::fs::read_to_string(temp_file.path()).unwrap();
319        let result = parse_multi_document_yaml(&content, temp_file.path()).unwrap();
320        assert!(!result.tables.is_empty());
321    }
322
323    #[test]
324    fn test_parse_multi_doc_with_empty_doc() {
325        let yaml_content = r#"---
326_forge_version: "5.0.0"
327data:
328  values: [1, 2, 3]
329---
330
331---
332_forge_version: "5.0.0"
333data2:
334  values: [4, 5, 6]
335"#;
336
337        let mut temp_file = NamedTempFile::new().unwrap();
338        temp_file.write_all(yaml_content.as_bytes()).unwrap();
339
340        let content = std::fs::read_to_string(temp_file.path()).unwrap();
341        let result = parse_multi_document_yaml(&content, temp_file.path()).unwrap();
342        assert_eq!(result.tables.len(), 2);
343    }
344
345    #[test]
346    fn test_parse_multi_doc_invalid_yaml_error() {
347        let yaml_content = r#"---
348_forge_version: "5.0.0"
349data:
350  values: [1, 2, 3]
351---
352invalid: yaml: [[[
353"#;
354
355        let mut temp_file = NamedTempFile::new().unwrap();
356        temp_file.write_all(yaml_content.as_bytes()).unwrap();
357
358        let content = std::fs::read_to_string(temp_file.path()).unwrap();
359        let result = parse_multi_document_yaml(&content, temp_file.path());
360        assert!(result.is_err());
361        assert!(result
362            .unwrap_err()
363            .to_string()
364            .contains("Failed to parse document"));
365    }
366
367    #[test]
368    fn test_parse_multi_doc_with_scenarios() {
369        let yaml_content = r#"---
370_name: doc1
371_forge_version: "5.0.0"
372budget:
373  revenue: [1000, 2000]
374scenarios:
375  optimistic:
376    growth: 1.2
377---
378_name: doc2
379_forge_version: "5.0.0"
380budget:
381  costs: [500, 600]
382scenarios:
383  pessimistic:
384    growth: 0.8
385"#;
386
387        let mut temp_file = NamedTempFile::new().unwrap();
388        temp_file.write_all(yaml_content.as_bytes()).unwrap();
389
390        let content = std::fs::read_to_string(temp_file.path()).unwrap();
391        let result = parse_multi_document_yaml(&content, temp_file.path()).unwrap();
392        assert!(result.scenarios.contains_key("doc1.optimistic"));
393        assert!(result.scenarios.contains_key("doc2.pessimistic"));
394    }
395
396    #[test]
397    fn test_multi_document_yaml_with_leading_separator() {
398        let yaml_content = r#"---
399_forge_version: "5.0.0"
400
401sales:
402  month: ["Jan", "Feb", "Mar"]
403  revenue: [100, 200, 300]
404"#;
405
406        let mut temp_file = NamedTempFile::new().unwrap();
407        temp_file.write_all(yaml_content.as_bytes()).unwrap();
408
409        let content = std::fs::read_to_string(temp_file.path()).unwrap();
410        // Single doc with leading separator should be parsed as single doc
411        let result = parse_single_document_yaml(&content, temp_file.path()).unwrap();
412
413        assert_eq!(result.tables.len(), 1);
414        let sales = result.tables.get("sales").unwrap();
415        assert_eq!(sales.row_count(), 3);
416    }
417}