use crate::options::Flavor;
use crate::parser::diagnostics::{Diagnostics, SyntaxError, SyntaxErrorSource};
use crate::parser::utils::helpers::{emit_line_tokens, strip_newline};
use crate::parser::utils::tree_copy::copy_green_children;
use crate::parser::yaml::{YamlValidationContext, locate_yaml_diagnostic_ctx, parse_stream};
use crate::syntax::SyntaxKind;
use rowan::{GreenNodeBuilder, TextRange};
pub(crate) fn try_parse_yaml_block(
lines: &[&str],
pos: usize,
builder: &mut GreenNodeBuilder<'static>,
at_document_start: bool,
diags: &Diagnostics,
flavor: Flavor,
) -> Option<usize> {
let closing_pos = find_yaml_block_closing_pos(lines, pos, at_document_start)?;
emit_yaml_block(lines, pos, closing_pos, builder, diags, flavor)
}
pub(crate) fn find_yaml_block_closing_pos(
lines: &[&str],
pos: usize,
at_document_start: bool,
) -> Option<usize> {
if pos >= lines.len() {
return None;
}
let line = lines[pos];
if line.trim() != "---" {
return None;
}
if !at_document_start && pos > 0 {
let prev_line = lines[pos - 1];
if !prev_line.trim().is_empty() {
return None;
}
}
if pos + 1 < lines.len() {
let next_line = lines[pos + 1];
if next_line.trim().is_empty() {
return None;
}
} else {
return None;
}
let mut closing_pos = None;
for (i, content_line) in lines.iter().enumerate().skip(pos + 1) {
if content_line.trim() == "---" || content_line.trim() == "..." {
closing_pos = Some(i);
break;
}
}
closing_pos
}
pub(crate) fn emit_yaml_block(
lines: &[&str],
pos: usize,
closing_pos: usize,
builder: &mut GreenNodeBuilder<'static>,
diags: &Diagnostics,
flavor: Flavor,
) -> Option<usize> {
if pos >= lines.len() || closing_pos <= pos || closing_pos >= lines.len() {
return None;
}
builder.start_node(SyntaxKind::YAML_METADATA.into());
let (text, newline_str) = strip_newline(lines[pos]);
builder.token(SyntaxKind::YAML_METADATA_DELIM.into(), text);
if !newline_str.is_empty() {
builder.token(SyntaxKind::NEWLINE.into(), newline_str);
}
builder.start_node(SyntaxKind::YAML_METADATA_CONTENT.into());
let mut content = String::new();
for content_line in lines.iter().take(closing_pos).skip(pos + 1) {
content.push_str(content_line);
}
let yaml_ctx = YamlValidationContext::frontmatter(flavor);
if let Some((diag, start_off, end_off)) = locate_yaml_diagnostic_ctx(&content, "", yaml_ctx) {
let host_start = lines[pos + 1].as_ptr() as usize - lines[0].as_ptr() as usize;
diags.push(SyntaxError {
range: TextRange::new(
((host_start + start_off) as u32).into(),
((host_start + end_off) as u32).into(),
),
message: diag.message.to_string(),
source: SyntaxErrorSource::Yaml,
});
for content_line in lines.iter().take(closing_pos).skip(pos + 1) {
emit_line_tokens(builder, content_line);
}
} else {
let stream_green = parse_stream(&content).green().into_owned();
copy_green_children(builder, &stream_green);
}
builder.finish_node();
let (closing_text, closing_newline) = strip_newline(lines[closing_pos]);
builder.token(SyntaxKind::YAML_METADATA_DELIM.into(), closing_text);
if !closing_newline.is_empty() {
builder.token(SyntaxKind::NEWLINE.into(), closing_newline);
}
builder.finish_node();
Some(closing_pos + 1)
}
pub(crate) fn try_parse_pandoc_title_block(
lines: &[&str],
pos: usize,
builder: &mut GreenNodeBuilder<'static>,
) -> Option<usize> {
if pos != 0 || lines.is_empty() {
return None;
}
let first_line = lines[0];
if !first_line.trim_start().starts_with('%') {
return None;
}
builder.start_node(SyntaxKind::PANDOC_TITLE_BLOCK.into());
let mut current_pos = 0;
let mut field_count = 0;
while current_pos < lines.len() && field_count < 3 {
let line = lines[current_pos];
if line.trim_start().starts_with('%') {
emit_line_tokens(builder, line);
field_count += 1;
current_pos += 1;
while current_pos < lines.len() {
let cont_line = lines[current_pos];
if cont_line.is_empty() {
break;
}
if cont_line.trim_start().starts_with('%') {
break;
}
if cont_line.starts_with(' ') || cont_line.starts_with('\t') {
emit_line_tokens(builder, cont_line);
current_pos += 1;
} else {
break;
}
}
} else {
break;
}
}
builder.finish_node();
if field_count > 0 {
Some(current_pos)
} else {
None
}
}
fn mmd_key_value(line: &str) -> Option<(String, String)> {
let (key, value) = line.split_once(':')?;
let key_trimmed = key.trim();
if key_trimmed.is_empty() {
return None;
}
Some((key_trimmed.to_string(), value.trim().to_string()))
}
pub(crate) fn try_parse_mmd_title_block(
lines: &[&str],
pos: usize,
builder: &mut GreenNodeBuilder<'static>,
) -> Option<usize> {
if pos != 0 || lines.is_empty() {
return None;
}
let mut current_pos = pos;
let first = lines[current_pos];
let (_first_key, first_value) = mmd_key_value(first)?;
if first_value.is_empty() {
return None;
}
builder.start_node(SyntaxKind::MMD_TITLE_BLOCK.into());
while current_pos < lines.len() {
let line = lines[current_pos];
if line.trim().is_empty() {
break;
}
if mmd_key_value(line).is_none() {
builder.finish_node();
return None;
}
emit_line_tokens(builder, line);
current_pos += 1;
while current_pos < lines.len() {
let cont_line = lines[current_pos];
if cont_line.trim().is_empty() {
break;
}
let trimmed = cont_line.trim_start();
if mmd_key_value(trimmed).is_some() {
break;
}
if cont_line.starts_with(' ') || cont_line.starts_with('\t') {
emit_line_tokens(builder, cont_line);
current_pos += 1;
} else {
builder.finish_node();
return None;
}
}
}
if current_pos >= lines.len() || !lines[current_pos].trim().is_empty() {
builder.finish_node();
return None;
}
emit_line_tokens(builder, lines[current_pos]);
current_pos += 1;
builder.finish_node(); Some(current_pos)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_yaml_block_at_start() {
let lines = vec!["---", "title: Test", "---", "Content"];
let mut builder = GreenNodeBuilder::new();
let result = try_parse_yaml_block(
&lines,
0,
&mut builder,
true,
&Diagnostics::default(),
Flavor::Pandoc,
);
assert_eq!(result, Some(3));
}
#[test]
fn test_yaml_block_not_at_start() {
let lines = vec!["Paragraph", "", "---", "title: Test", "---", "Content"];
let mut builder = GreenNodeBuilder::new();
let result = try_parse_yaml_block(
&lines,
2,
&mut builder,
false,
&Diagnostics::default(),
Flavor::Pandoc,
);
assert_eq!(result, Some(5));
}
#[test]
fn test_horizontal_rule_not_yaml() {
let lines = vec!["---", "", "Content"];
let mut builder = GreenNodeBuilder::new();
let result = try_parse_yaml_block(
&lines,
0,
&mut builder,
true,
&Diagnostics::default(),
Flavor::Pandoc,
);
assert_eq!(result, None); }
#[test]
fn test_yaml_with_dots_closer() {
let lines = vec!["---", "title: Test", "...", "Content"];
let mut builder = GreenNodeBuilder::new();
let result = try_parse_yaml_block(
&lines,
0,
&mut builder,
true,
&Diagnostics::default(),
Flavor::Pandoc,
);
assert_eq!(result, Some(3));
}
#[test]
fn test_yaml_without_closing_delimiter_is_not_yaml_block() {
let lines = vec!["---", "title: Test", "Content"];
let mut builder = GreenNodeBuilder::new();
let result = try_parse_yaml_block(
&lines,
0,
&mut builder,
true,
&Diagnostics::default(),
Flavor::Pandoc,
);
assert_eq!(result, None);
}
#[test]
fn test_find_yaml_block_closing_pos() {
let lines = vec!["---", "title: Test", "---", "Content"];
let result = find_yaml_block_closing_pos(&lines, 0, true);
assert_eq!(result, Some(2));
}
#[test]
fn test_yaml_block_emits_content_node() {
let input = "---\ntitle: Test\nlist:\n - a\n---\n";
let tree = crate::parse(input, Some(crate::ParserOptions::default()));
let metadata = tree
.descendants()
.find(|n| n.kind() == SyntaxKind::YAML_METADATA)
.expect("yaml metadata node");
let content = metadata
.children()
.find(|n| n.kind() == SyntaxKind::YAML_METADATA_CONTENT)
.expect("yaml metadata content node");
assert_eq!(content.text().to_string(), "title: Test\nlist:\n - a\n");
}
#[test]
fn test_pandoc_title_simple() {
let lines = vec!["% My Title", "% Author", "% Date", "", "Content"];
let mut builder = GreenNodeBuilder::new();
let result = try_parse_pandoc_title_block(&lines, 0, &mut builder);
assert_eq!(result, Some(3));
}
#[test]
fn test_pandoc_title_with_continuation() {
let lines = vec![
"% My Title",
" on multiple lines",
"% Author One",
" Author Two",
"% June 15, 2006",
"",
"Content",
];
let mut builder = GreenNodeBuilder::new();
let result = try_parse_pandoc_title_block(&lines, 0, &mut builder);
assert_eq!(result, Some(5));
}
#[test]
fn test_pandoc_title_partial() {
let lines = vec!["% My Title", "%", "% June 15, 2006", "", "Content"];
let mut builder = GreenNodeBuilder::new();
let result = try_parse_pandoc_title_block(&lines, 0, &mut builder);
assert_eq!(result, Some(3));
}
#[test]
fn test_pandoc_title_not_at_start() {
let lines = vec!["Content", "% Title"];
let mut builder = GreenNodeBuilder::new();
let result = try_parse_pandoc_title_block(&lines, 1, &mut builder);
assert_eq!(result, None);
}
#[test]
fn test_mmd_title_simple() {
let lines = vec!["Title: My Title", "Author: Jane Doe", "", "Content"];
let mut builder = GreenNodeBuilder::new();
let result = try_parse_mmd_title_block(&lines, 0, &mut builder);
assert_eq!(result, Some(3));
}
#[test]
fn test_mmd_title_with_continuation() {
let lines = vec![
"Title: My title",
"Author: John Doe",
"Comment: This is a sample mmd title block, with",
" a field spanning multiple lines.",
"",
"Body",
];
let mut builder = GreenNodeBuilder::new();
let result = try_parse_mmd_title_block(&lines, 0, &mut builder);
assert_eq!(result, Some(5));
}
#[test]
fn test_mmd_title_requires_non_empty_first_value() {
let lines = vec!["Title:", "Author: Jane Doe", "", "Body"];
let mut builder = GreenNodeBuilder::new();
let result = try_parse_mmd_title_block(&lines, 0, &mut builder);
assert_eq!(result, None);
}
#[test]
fn test_mmd_title_requires_trailing_blank_line() {
let lines = vec!["Title: My Title", "Author: Jane Doe"];
let mut builder = GreenNodeBuilder::new();
let result = try_parse_mmd_title_block(&lines, 0, &mut builder);
assert_eq!(result, None);
}
#[test]
fn test_mmd_title_not_at_start() {
let lines = vec!["Body", "Title: My Title", ""];
let mut builder = GreenNodeBuilder::new();
let result = try_parse_mmd_title_block(&lines, 1, &mut builder);
assert_eq!(result, None);
}
#[test]
fn test_indented_yaml_delimiters_are_lossless() {
let input = " ---\n title: Test\n ...\n";
let tree = crate::parse(input, Some(crate::ParserOptions::default()));
assert_eq!(tree.text().to_string(), input);
}
#[test]
fn test_valid_yaml_content_embeds_yaml_document_subtree() {
let input = "---\ntitle: Test\nlist:\n - a\n---\n";
let tree = crate::parse(input, Some(crate::ParserOptions::default()));
assert_eq!(tree.text().to_string(), input);
let content = tree
.descendants()
.find(|n| n.kind() == SyntaxKind::YAML_METADATA)
.and_then(|m| {
m.children()
.find(|c| c.kind() == SyntaxKind::YAML_METADATA_CONTENT)
})
.expect("yaml metadata content node");
let first_child = content
.children()
.next()
.expect("embedded yaml subtree child");
assert_eq!(first_child.kind(), SyntaxKind::YAML_DOCUMENT);
assert_eq!(first_child.text_range(), content.text_range());
assert!(
content
.descendants()
.all(|n| n.kind() != SyntaxKind::YAML_STREAM),
"host embed should not carry the redundant YAML_STREAM wrapper"
);
}
#[test]
fn test_invalid_yaml_content_falls_back_to_line_tokens() {
let input = "---\ntitle: 'unterminated\n---\n";
let tree = crate::parse(input, Some(crate::ParserOptions::default()));
assert_eq!(tree.text().to_string(), input);
let content = tree
.descendants()
.find(|n| n.kind() == SyntaxKind::YAML_METADATA)
.and_then(|m| {
m.children()
.find(|c| c.kind() == SyntaxKind::YAML_METADATA_CONTENT)
})
.expect("yaml metadata content node");
assert!(
content
.children()
.all(|c| c.kind() != SyntaxKind::YAML_DOCUMENT),
"invalid YAML must not embed a YAML_DOCUMENT subtree"
);
}
}