Skip to main content

panache_parser/parser/blocks/
metadata.rs

1//! YAML metadata block parsing utilities.
2
3use crate::parser::utils::helpers::{emit_line_tokens, strip_newline};
4use crate::syntax::SyntaxKind;
5use rowan::GreenNodeBuilder;
6
7/// Try to parse a YAML metadata block starting at the given position.
8/// Returns the new position after the block if successful, None otherwise.
9///
10/// A YAML block:
11/// - Starts with `---` (not followed by blank line)
12/// - Ends with `---` or `...`
13/// - At document start OR preceded by blank line
14pub(crate) fn try_parse_yaml_block(
15    lines: &[&str],
16    pos: usize,
17    builder: &mut GreenNodeBuilder<'static>,
18    at_document_start: bool,
19) -> Option<usize> {
20    let closing_pos = find_yaml_block_closing_pos(lines, pos, at_document_start)?;
21    emit_yaml_block(lines, pos, closing_pos, builder)
22}
23
24pub(crate) fn find_yaml_block_closing_pos(
25    lines: &[&str],
26    pos: usize,
27    at_document_start: bool,
28) -> Option<usize> {
29    if pos >= lines.len() {
30        return None;
31    }
32
33    let line = lines[pos];
34
35    // Must start with ---
36    if line.trim() != "---" {
37        return None;
38    }
39
40    // If not at document start, previous line must be blank
41    if !at_document_start && pos > 0 {
42        let prev_line = lines[pos - 1];
43        if !prev_line.trim().is_empty() {
44            return None;
45        }
46    }
47
48    // Check that next line (if exists) is NOT blank (this distinguishes from horizontal rule)
49    if pos + 1 < lines.len() {
50        let next_line = lines[pos + 1];
51        if next_line.trim().is_empty() {
52            // This is likely a horizontal rule, not YAML
53            return None;
54        }
55    } else {
56        // No content after ---, can't be a YAML block
57        return None;
58    }
59
60    // Find a closing delimiter before emitting; otherwise this is not a valid YAML block.
61    let mut closing_pos = None;
62    for (i, content_line) in lines.iter().enumerate().skip(pos + 1) {
63        if content_line.trim() == "---" || content_line.trim() == "..." {
64            closing_pos = Some(i);
65            break;
66        }
67    }
68    closing_pos
69}
70
71pub(crate) fn emit_yaml_block(
72    lines: &[&str],
73    pos: usize,
74    closing_pos: usize,
75    builder: &mut GreenNodeBuilder<'static>,
76) -> Option<usize> {
77    if pos >= lines.len() || closing_pos <= pos || closing_pos >= lines.len() {
78        return None;
79    }
80    // Start metadata node
81    builder.start_node(SyntaxKind::YAML_METADATA.into());
82
83    // Opening delimiter - strip newline before emitting
84    let (text, newline_str) = strip_newline(lines[pos]);
85    builder.token(SyntaxKind::YAML_METADATA_DELIM.into(), text);
86    if !newline_str.is_empty() {
87        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
88    }
89
90    builder.start_node(SyntaxKind::YAML_METADATA_CONTENT.into());
91    for content_line in lines.iter().take(closing_pos).skip(pos + 1) {
92        emit_line_tokens(builder, content_line);
93    }
94    builder.finish_node(); // YAML_METADATA_CONTENT
95
96    let (closing_text, closing_newline) = strip_newline(lines[closing_pos]);
97    builder.token(SyntaxKind::YAML_METADATA_DELIM.into(), closing_text);
98    if !closing_newline.is_empty() {
99        builder.token(SyntaxKind::NEWLINE.into(), closing_newline);
100    }
101
102    builder.finish_node(); // YamlMetadata
103
104    Some(closing_pos + 1)
105}
106
107/// Try to parse a Pandoc title block starting at the beginning of document.
108/// Returns the new position after the block if successful, None otherwise.
109///
110/// A Pandoc title block:
111/// - Must be at document start (pos == 0)
112/// - Has 1-3 lines starting with `%`
113/// - Format: % title, % author(s), % date
114/// - Continuation lines start with leading space
115pub(crate) fn try_parse_pandoc_title_block(
116    lines: &[&str],
117    pos: usize,
118    builder: &mut GreenNodeBuilder<'static>,
119) -> Option<usize> {
120    if pos != 0 || lines.is_empty() {
121        return None;
122    }
123
124    let first_line = lines[0];
125    if !first_line.trim_start().starts_with('%') {
126        return None;
127    }
128
129    // Start title block node
130    builder.start_node(SyntaxKind::PANDOC_TITLE_BLOCK.into());
131
132    let mut current_pos = 0;
133    let mut field_count = 0;
134
135    // Parse up to 3 fields (title, author, date)
136    while current_pos < lines.len() && field_count < 3 {
137        let line = lines[current_pos];
138
139        // Check if this line starts a field (begins with %)
140        if line.trim_start().starts_with('%') {
141            emit_line_tokens(builder, line);
142            field_count += 1;
143            current_pos += 1;
144
145            // Collect continuation lines (start with leading space, not with %)
146            while current_pos < lines.len() {
147                let cont_line = lines[current_pos];
148                if cont_line.is_empty() {
149                    // Blank line ends title block
150                    break;
151                }
152                if cont_line.trim_start().starts_with('%') {
153                    // Next field
154                    break;
155                }
156                if cont_line.starts_with(' ') || cont_line.starts_with('\t') {
157                    // Continuation line
158                    emit_line_tokens(builder, cont_line);
159                    current_pos += 1;
160                } else {
161                    // Non-continuation, non-% line ends title block
162                    break;
163                }
164            }
165        } else {
166            // Line doesn't start with %, title block ends
167            break;
168        }
169    }
170
171    builder.finish_node(); // PandocTitleBlock
172
173    if field_count > 0 {
174        Some(current_pos)
175    } else {
176        None
177    }
178}
179
180fn mmd_key_value(line: &str) -> Option<(String, String)> {
181    let (key, value) = line.split_once(':')?;
182    let key_trimmed = key.trim();
183    if key_trimmed.is_empty() {
184        return None;
185    }
186    Some((key_trimmed.to_string(), value.trim().to_string()))
187}
188
189/// Try to parse a MultiMarkdown title block starting at the beginning of document.
190/// Returns the new position after the block if successful, None otherwise.
191///
192/// A MultiMarkdown title block:
193/// - Must be at document start (pos == 0)
194/// - Contains one or more `Key: Value` lines
195/// - The first field value must be non-empty
196/// - Continuation lines start with leading space or tab
197/// - Terminates with a blank line
198pub(crate) fn try_parse_mmd_title_block(
199    lines: &[&str],
200    pos: usize,
201    builder: &mut GreenNodeBuilder<'static>,
202) -> Option<usize> {
203    if pos != 0 || lines.is_empty() {
204        return None;
205    }
206
207    let mut current_pos = pos;
208
209    // First line must be a key-value pair with non-empty value.
210    let first = lines[current_pos];
211    let (_first_key, first_value) = mmd_key_value(first)?;
212    if first_value.is_empty() {
213        return None;
214    }
215
216    builder.start_node(SyntaxKind::MMD_TITLE_BLOCK.into());
217
218    while current_pos < lines.len() {
219        let line = lines[current_pos];
220
221        if line.trim().is_empty() {
222            break;
223        }
224
225        if mmd_key_value(line).is_none() {
226            builder.finish_node();
227            return None;
228        }
229
230        emit_line_tokens(builder, line);
231        current_pos += 1;
232
233        // Optional continuation lines (must be indented and not key-value starts).
234        while current_pos < lines.len() {
235            let cont_line = lines[current_pos];
236            if cont_line.trim().is_empty() {
237                break;
238            }
239
240            let trimmed = cont_line.trim_start();
241            if mmd_key_value(trimmed).is_some() {
242                break;
243            }
244
245            if cont_line.starts_with(' ') || cont_line.starts_with('\t') {
246                emit_line_tokens(builder, cont_line);
247                current_pos += 1;
248            } else {
249                builder.finish_node();
250                return None;
251            }
252        }
253    }
254
255    if current_pos >= lines.len() || !lines[current_pos].trim().is_empty() {
256        builder.finish_node();
257        return None;
258    }
259
260    emit_line_tokens(builder, lines[current_pos]);
261    current_pos += 1;
262
263    builder.finish_node(); // MMD_TITLE_BLOCK
264    Some(current_pos)
265}
266
267#[cfg(test)]
268mod tests {
269    use super::*;
270
271    #[test]
272    fn test_yaml_block_at_start() {
273        let lines = vec!["---", "title: Test", "---", "Content"];
274        let mut builder = GreenNodeBuilder::new();
275        let result = try_parse_yaml_block(&lines, 0, &mut builder, true);
276        assert_eq!(result, Some(3));
277    }
278
279    #[test]
280    fn test_yaml_block_not_at_start() {
281        let lines = vec!["Paragraph", "", "---", "title: Test", "---", "Content"];
282        let mut builder = GreenNodeBuilder::new();
283        let result = try_parse_yaml_block(&lines, 2, &mut builder, false);
284        assert_eq!(result, Some(5));
285    }
286
287    #[test]
288    fn test_horizontal_rule_not_yaml() {
289        let lines = vec!["---", "", "Content"];
290        let mut builder = GreenNodeBuilder::new();
291        let result = try_parse_yaml_block(&lines, 0, &mut builder, true);
292        assert_eq!(result, None); // Followed by blank line, so not YAML
293    }
294
295    #[test]
296    fn test_yaml_with_dots_closer() {
297        let lines = vec!["---", "title: Test", "...", "Content"];
298        let mut builder = GreenNodeBuilder::new();
299        let result = try_parse_yaml_block(&lines, 0, &mut builder, true);
300        assert_eq!(result, Some(3));
301    }
302
303    #[test]
304    fn test_yaml_without_closing_delimiter_is_not_yaml_block() {
305        let lines = vec!["---", "title: Test", "Content"];
306        let mut builder = GreenNodeBuilder::new();
307        let result = try_parse_yaml_block(&lines, 0, &mut builder, true);
308        assert_eq!(result, None);
309    }
310
311    #[test]
312    fn test_find_yaml_block_closing_pos() {
313        let lines = vec!["---", "title: Test", "---", "Content"];
314        let result = find_yaml_block_closing_pos(&lines, 0, true);
315        assert_eq!(result, Some(2));
316    }
317
318    #[test]
319    fn test_yaml_block_emits_content_node() {
320        let input = "---\ntitle: Test\nlist:\n  - a\n---\n";
321        let tree = crate::parse(input, Some(crate::ParserOptions::default()));
322        let metadata = tree
323            .descendants()
324            .find(|n| n.kind() == SyntaxKind::YAML_METADATA)
325            .expect("yaml metadata node");
326        let content = metadata
327            .children()
328            .find(|n| n.kind() == SyntaxKind::YAML_METADATA_CONTENT)
329            .expect("yaml metadata content node");
330        assert_eq!(content.text().to_string(), "title: Test\nlist:\n  - a\n");
331    }
332
333    #[test]
334    fn test_pandoc_title_simple() {
335        let lines = vec!["% My Title", "% Author", "% Date", "", "Content"];
336        let mut builder = GreenNodeBuilder::new();
337        let result = try_parse_pandoc_title_block(&lines, 0, &mut builder);
338        assert_eq!(result, Some(3));
339    }
340
341    #[test]
342    fn test_pandoc_title_with_continuation() {
343        let lines = vec![
344            "% My Title",
345            "  on multiple lines",
346            "% Author One",
347            "  Author Two",
348            "% June 15, 2006",
349            "",
350            "Content",
351        ];
352        let mut builder = GreenNodeBuilder::new();
353        let result = try_parse_pandoc_title_block(&lines, 0, &mut builder);
354        assert_eq!(result, Some(5));
355    }
356
357    #[test]
358    fn test_pandoc_title_partial() {
359        let lines = vec!["% My Title", "%", "% June 15, 2006", "", "Content"];
360        let mut builder = GreenNodeBuilder::new();
361        let result = try_parse_pandoc_title_block(&lines, 0, &mut builder);
362        assert_eq!(result, Some(3));
363    }
364
365    #[test]
366    fn test_pandoc_title_not_at_start() {
367        let lines = vec!["Content", "% Title"];
368        let mut builder = GreenNodeBuilder::new();
369        let result = try_parse_pandoc_title_block(&lines, 1, &mut builder);
370        assert_eq!(result, None);
371    }
372
373    #[test]
374    fn test_mmd_title_simple() {
375        let lines = vec!["Title: My Title", "Author: Jane Doe", "", "Content"];
376        let mut builder = GreenNodeBuilder::new();
377        let result = try_parse_mmd_title_block(&lines, 0, &mut builder);
378        assert_eq!(result, Some(3));
379    }
380
381    #[test]
382    fn test_mmd_title_with_continuation() {
383        let lines = vec![
384            "Title: My title",
385            "Author: John Doe",
386            "Comment: This is a sample mmd title block, with",
387            "  a field spanning multiple lines.",
388            "",
389            "Body",
390        ];
391        let mut builder = GreenNodeBuilder::new();
392        let result = try_parse_mmd_title_block(&lines, 0, &mut builder);
393        assert_eq!(result, Some(5));
394    }
395
396    #[test]
397    fn test_mmd_title_requires_non_empty_first_value() {
398        let lines = vec!["Title:", "Author: Jane Doe", "", "Body"];
399        let mut builder = GreenNodeBuilder::new();
400        let result = try_parse_mmd_title_block(&lines, 0, &mut builder);
401        assert_eq!(result, None);
402    }
403
404    #[test]
405    fn test_mmd_title_requires_trailing_blank_line() {
406        let lines = vec!["Title: My Title", "Author: Jane Doe"];
407        let mut builder = GreenNodeBuilder::new();
408        let result = try_parse_mmd_title_block(&lines, 0, &mut builder);
409        assert_eq!(result, None);
410    }
411
412    #[test]
413    fn test_mmd_title_not_at_start() {
414        let lines = vec!["Body", "Title: My Title", ""];
415        let mut builder = GreenNodeBuilder::new();
416        let result = try_parse_mmd_title_block(&lines, 1, &mut builder);
417        assert_eq!(result, None);
418    }
419
420    #[test]
421    fn test_indented_yaml_delimiters_are_lossless() {
422        let input = "    ---\n    title: Test\n    ...\n";
423        let tree = crate::parse(input, Some(crate::ParserOptions::default()));
424        assert_eq!(tree.text().to_string(), input);
425    }
426}