mollendorff_forge/parser/
multi_doc.rs1use crate::error::{ForgeError, ForgeResult};
6use crate::types::ParsedModel;
7use serde_yaml_ng::Value;
8use std::collections::HashSet;
9use std::path::Path;
10
11use super::includes::resolve_includes;
12use super::model::parse_v1_model;
13
14#[must_use]
17pub fn detect_multi_document(content: &str) -> bool {
18 let mut separator_count = 0;
19 for line in content.lines() {
20 let trimmed = line.trim();
21 if trimmed == "---" || trimmed.starts_with("--- ") {
23 separator_count += 1;
24 if separator_count >= 2 {
25 return true;
26 }
27 }
28 }
29 false
30}
31
32pub fn parse_single_document_yaml(content: &str, path: &Path) -> ForgeResult<ParsedModel> {
38 let content = content.trim_start();
40 let content = content.strip_prefix("---").map_or(content, str::trim_start);
41
42 let yaml: Value = serde_yaml_ng::from_str(content)?;
43
44 let mut model = parse_v1_model(&yaml)?;
45
46 if !model.includes.is_empty() {
48 resolve_includes(&mut model, path, &mut HashSet::new())?;
49 }
50
51 Ok(model)
52}
53
54pub fn parse_multi_document_yaml(content: &str, path: &Path) -> ForgeResult<ParsedModel> {
63 let mut merged_model = ParsedModel::new();
64 let mut doc_index = 0;
65
66 let docs = split_yaml_documents(content);
68
69 for doc_content in docs {
70 let doc_content = doc_content.trim();
71 if doc_content.is_empty() {
72 continue;
73 }
74
75 let non_comment_content: String = doc_content
77 .lines()
78 .filter(|line| !line.trim().starts_with('#') && !line.trim().is_empty())
79 .collect::<Vec<_>>()
80 .join("\n");
81 if non_comment_content.is_empty() {
82 continue;
83 }
84
85 doc_index += 1;
86
87 let yaml: Value = match serde_yaml_ng::from_str(doc_content) {
89 Ok(v) => v,
90 Err(e) => {
91 return Err(ForgeError::Parse(format!(
92 "Failed to parse document {doc_index}: {e}"
93 )));
94 },
95 };
96
97 let doc_model = parse_v1_model(&yaml)?;
98
99 let doc_name = if let Some(Value::String(name)) = yaml.get("_name") {
101 name.clone()
102 } else {
103 format!("doc{doc_index}")
104 };
105
106 for (table_name, table) in doc_model.tables {
108 let prefixed_name = format!("{doc_name}.{table_name}");
109 let mut prefixed_table = table;
110 prefixed_table.name.clone_from(&prefixed_name);
111 merged_model.tables.insert(prefixed_name, prefixed_table);
112 }
113
114 for (scalar_name, mut scalar) in doc_model.scalars {
116 let prefixed_name = format!("{doc_name}.{scalar_name}");
117 scalar.path.clone_from(&prefixed_name);
118 merged_model.scalars.insert(prefixed_name, scalar);
119 }
120
121 for include in doc_model.includes {
123 merged_model.includes.push(include);
124 }
125
126 for (scenario_name, scenario) in doc_model.scenarios {
128 let prefixed_name = format!("{doc_name}.{scenario_name}");
129 merged_model.scenarios.insert(prefixed_name, scenario);
130 }
131
132 merged_model.documents.push(doc_name);
134 }
135
136 if !merged_model.includes.is_empty() {
138 resolve_includes(&mut merged_model, path, &mut HashSet::new())?;
139 }
140
141 Ok(merged_model)
142}
143
144#[must_use]
146pub fn split_yaml_documents(content: &str) -> Vec<String> {
147 let mut documents = Vec::new();
148 let mut current_doc = String::new();
149 let mut in_document = false;
150
151 for line in content.lines() {
152 let trimmed = line.trim();
153 if trimmed == "---" || trimmed.starts_with("--- ") {
155 if in_document && !current_doc.trim().is_empty() {
156 documents.push(std::mem::take(&mut current_doc));
157 }
158 in_document = true;
159 current_doc.clear();
160 } else {
161 if !current_doc.is_empty() {
163 current_doc.push('\n');
164 }
165 current_doc.push_str(line);
166 }
167 }
168
169 if !current_doc.trim().is_empty() {
171 documents.push(current_doc);
172 }
173
174 documents
175}
176
177#[cfg(test)]
178mod tests {
179 use super::*;
180 use std::io::Write;
181 use tempfile::NamedTempFile;
182
183 #[test]
184 fn test_detect_multi_document_true() {
185 let content = "---\nfirst: 1\n---\nsecond: 2\n";
186 assert!(detect_multi_document(content));
187 }
188
189 #[test]
190 fn test_detect_multi_document_false_single_separator() {
191 let content = "---\nfirst: 1\n";
192 assert!(!detect_multi_document(content));
193 }
194
195 #[test]
196 fn test_detect_multi_document_false_no_separator() {
197 let content = "first: 1\nsecond: 2\n";
198 assert!(!detect_multi_document(content));
199 }
200
201 #[test]
202 fn test_detect_multi_document_with_trailing_content() {
203 let content = "--- first doc\nfirst: 1\n--- second\nsecond: 2\n";
204 assert!(detect_multi_document(content));
205 }
206
207 #[test]
208 fn test_split_yaml_documents() {
209 let content = "---\nfirst: 1\n---\nsecond: 2\n";
210 let docs = split_yaml_documents(content);
211 assert_eq!(docs.len(), 2);
212 assert!(docs[0].contains("first: 1"));
213 assert!(docs[1].contains("second: 2"));
214 }
215
216 #[test]
217 fn test_split_yaml_documents_empty() {
218 let content = "";
219 let docs = split_yaml_documents(content);
220 assert!(docs.is_empty());
221 }
222
223 #[test]
224 fn test_split_yaml_documents_single() {
225 let content = "---\nfirst: 1\n";
226 let docs = split_yaml_documents(content);
227 assert_eq!(docs.len(), 1);
228 }
229
230 #[test]
231 fn test_parse_multi_doc_with_names() {
232 let yaml_content = r#"---
233_forge_version: "5.0.0"
234_name: "revenue"
235data:
236 values: [100, 200, 300]
237---
238_forge_version: "5.0.0"
239_name: "costs"
240expenses:
241 amounts: [50, 100, 150]
242"#;
243
244 let mut temp_file = NamedTempFile::new().unwrap();
245 temp_file.write_all(yaml_content.as_bytes()).unwrap();
246
247 let content = std::fs::read_to_string(temp_file.path()).unwrap();
248 let result = parse_multi_document_yaml(&content, temp_file.path()).unwrap();
249
250 assert!(result.tables.contains_key("revenue.data"));
251 assert!(result.tables.contains_key("costs.expenses"));
252 assert_eq!(result.documents.len(), 2);
253 assert!(result.documents.contains(&"revenue".to_string()));
254 assert!(result.documents.contains(&"costs".to_string()));
255 }
256
257 #[test]
258 fn test_parse_multi_doc_auto_names() {
259 let yaml_content = r#"---
260_forge_version: "5.0.0"
261data1:
262 values: [1, 2, 3]
263---
264_forge_version: "5.0.0"
265data2:
266 values: [4, 5, 6]
267"#;
268
269 let mut temp_file = NamedTempFile::new().unwrap();
270 temp_file.write_all(yaml_content.as_bytes()).unwrap();
271
272 let content = std::fs::read_to_string(temp_file.path()).unwrap();
273 let result = parse_multi_document_yaml(&content, temp_file.path()).unwrap();
274
275 assert!(result.tables.contains_key("doc1.data1"));
276 assert!(result.tables.contains_key("doc2.data2"));
277 }
278
279 #[test]
280 fn test_parse_multi_doc_with_scalars() {
281 let yaml_content = r#"---
282_forge_version: "5.0.0"
283_name: "config"
284rate:
285 value: 0.05
286 formula: null
287---
288_forge_version: "5.0.0"
289_name: "data"
290values:
291 items: [1, 2, 3]
292"#;
293
294 let mut temp_file = NamedTempFile::new().unwrap();
295 temp_file.write_all(yaml_content.as_bytes()).unwrap();
296
297 let content = std::fs::read_to_string(temp_file.path()).unwrap();
298 let result = parse_multi_document_yaml(&content, temp_file.path()).unwrap();
299
300 assert!(result.scalars.contains_key("config.rate"));
301 assert!(result.tables.contains_key("data.values"));
302 }
303
304 #[test]
305 fn test_parse_multi_doc_skip_comments() {
306 let yaml_content = r#"---
307# This is a comment-only document
308# No actual content
309---
310_forge_version: "5.0.0"
311data:
312 values: [1, 2, 3]
313"#;
314
315 let mut temp_file = NamedTempFile::new().unwrap();
316 temp_file.write_all(yaml_content.as_bytes()).unwrap();
317
318 let content = std::fs::read_to_string(temp_file.path()).unwrap();
319 let result = parse_multi_document_yaml(&content, temp_file.path()).unwrap();
320 assert!(!result.tables.is_empty());
321 }
322
323 #[test]
324 fn test_parse_multi_doc_with_empty_doc() {
325 let yaml_content = r#"---
326_forge_version: "5.0.0"
327data:
328 values: [1, 2, 3]
329---
330
331---
332_forge_version: "5.0.0"
333data2:
334 values: [4, 5, 6]
335"#;
336
337 let mut temp_file = NamedTempFile::new().unwrap();
338 temp_file.write_all(yaml_content.as_bytes()).unwrap();
339
340 let content = std::fs::read_to_string(temp_file.path()).unwrap();
341 let result = parse_multi_document_yaml(&content, temp_file.path()).unwrap();
342 assert_eq!(result.tables.len(), 2);
343 }
344
345 #[test]
346 fn test_parse_multi_doc_invalid_yaml_error() {
347 let yaml_content = r#"---
348_forge_version: "5.0.0"
349data:
350 values: [1, 2, 3]
351---
352invalid: yaml: [[[
353"#;
354
355 let mut temp_file = NamedTempFile::new().unwrap();
356 temp_file.write_all(yaml_content.as_bytes()).unwrap();
357
358 let content = std::fs::read_to_string(temp_file.path()).unwrap();
359 let result = parse_multi_document_yaml(&content, temp_file.path());
360 assert!(result.is_err());
361 assert!(result
362 .unwrap_err()
363 .to_string()
364 .contains("Failed to parse document"));
365 }
366
367 #[test]
368 fn test_parse_multi_doc_with_scenarios() {
369 let yaml_content = r#"---
370_name: doc1
371_forge_version: "5.0.0"
372budget:
373 revenue: [1000, 2000]
374scenarios:
375 optimistic:
376 growth: 1.2
377---
378_name: doc2
379_forge_version: "5.0.0"
380budget:
381 costs: [500, 600]
382scenarios:
383 pessimistic:
384 growth: 0.8
385"#;
386
387 let mut temp_file = NamedTempFile::new().unwrap();
388 temp_file.write_all(yaml_content.as_bytes()).unwrap();
389
390 let content = std::fs::read_to_string(temp_file.path()).unwrap();
391 let result = parse_multi_document_yaml(&content, temp_file.path()).unwrap();
392 assert!(result.scenarios.contains_key("doc1.optimistic"));
393 assert!(result.scenarios.contains_key("doc2.pessimistic"));
394 }
395
396 #[test]
397 fn test_multi_document_yaml_with_leading_separator() {
398 let yaml_content = r#"---
399_forge_version: "5.0.0"
400
401sales:
402 month: ["Jan", "Feb", "Mar"]
403 revenue: [100, 200, 300]
404"#;
405
406 let mut temp_file = NamedTempFile::new().unwrap();
407 temp_file.write_all(yaml_content.as_bytes()).unwrap();
408
409 let content = std::fs::read_to_string(temp_file.path()).unwrap();
410 let result = parse_single_document_yaml(&content, temp_file.path()).unwrap();
412
413 assert_eq!(result.tables.len(), 1);
414 let sales = result.tables.get("sales").unwrap();
415 assert_eq!(sales.row_count(), 3);
416 }
417}