use crate::options::ParserOptions;
use crate::syntax::SyntaxKind;
use rowan::GreenNodeBuilder;
use crate::parser::utils::attributes::try_parse_trailing_attributes_with_pos;
use crate::parser::utils::inline_emission;
fn try_parse_mmd_header_identifier_with_pos(content: &str) -> Option<(String, usize, usize)> {
let trimmed = content.trim_end_matches([' ', '\t']);
let end = trimmed.len();
let bytes = trimmed.as_bytes();
if end == 0 || bytes[end - 1] != b']' {
return None;
}
let start = trimmed[..end - 1].rfind('[')?;
let raw = &trimmed[start..end];
let inner = &raw[1..raw.len() - 1];
if inner.trim().is_empty() {
return None;
}
let normalized = inner.split_whitespace().collect::<String>().to_lowercase();
if normalized.is_empty() {
return None;
}
Some((normalized, start, end))
}
pub fn try_parse_atx_heading(content: &str) -> Option<usize> {
let line = if let Some(stripped) = content.strip_suffix("\r\n") {
stripped
} else if let Some(stripped) = content.strip_suffix('\n') {
stripped
} else {
content
};
let trimmed = line.trim_start();
let hash_count = trimmed.chars().take_while(|&c| c == '#').count();
if hash_count == 0 || hash_count > 6 {
return None;
}
let after_hashes = &trimmed[hash_count..];
if !after_hashes.is_empty() && !after_hashes.starts_with(' ') && !after_hashes.starts_with('\t')
{
return None;
}
let leading_spaces = line.len() - trimmed.len();
if leading_spaces > 3 {
return None;
}
Some(hash_count)
}
pub fn try_parse_setext_heading(lines: &[&str], pos: usize) -> Option<(usize, char)> {
if pos >= lines.len() {
return None;
}
let text_line = lines[pos];
let next_pos = pos + 1;
if next_pos >= lines.len() {
return None;
}
let underline = lines[next_pos];
if text_line.trim().is_empty() {
return None;
}
let leading_spaces = text_line.len() - text_line.trim_start().len();
if leading_spaces >= 4 {
return None;
}
let underline_trimmed = underline.trim();
if underline_trimmed.len() < 3 {
return None;
}
let first_char = underline_trimmed.chars().next()?;
if first_char != '=' && first_char != '-' {
return None;
}
if !underline_trimmed.chars().all(|c| c == first_char) {
return None;
}
let underline_leading_spaces = underline.len() - underline.trim_start().len();
if underline_leading_spaces >= 4 {
return None;
}
let level = if first_char == '=' { 1 } else { 2 };
Some((level, first_char))
}
pub(crate) fn emit_setext_heading(
builder: &mut GreenNodeBuilder<'static>,
text_line: &str,
underline_line: &str,
_level: usize,
config: &ParserOptions,
) {
builder.start_node(SyntaxKind::HEADING.into());
let (text_without_newline, text_newline_str) =
if let Some(stripped) = text_line.strip_suffix("\r\n") {
(stripped, "\r\n")
} else if let Some(stripped) = text_line.strip_suffix('\n') {
(stripped, "\n")
} else {
(text_line, "")
};
let text_trimmed = text_without_newline.trim_start();
let leading_spaces = text_without_newline.len() - text_trimmed.len();
if leading_spaces > 0 {
builder.token(
SyntaxKind::WHITESPACE.into(),
&text_without_newline[..leading_spaces],
);
}
let (text_content, attr_text, space_before_attrs) =
if let Some((_attrs, text_before, start_brace_pos)) =
try_parse_trailing_attributes_with_pos(text_trimmed)
{
let space = &text_trimmed[text_before.len()..start_brace_pos];
let raw_attrs = &text_trimmed[start_brace_pos..];
(text_before, Some(raw_attrs), space)
} else if config.extensions.mmd_header_identifiers {
if let Some((_normalized, start_bracket_pos, end_bracket_pos)) =
try_parse_mmd_header_identifier_with_pos(text_trimmed)
{
let text_before = text_trimmed[..start_bracket_pos].trim_end_matches([' ', '\t']);
let space = &text_trimmed[text_before.len()..start_bracket_pos];
let raw_attrs = &text_trimmed[start_bracket_pos..end_bracket_pos];
(text_before, Some(raw_attrs), space)
} else {
(text_trimmed, None, "")
}
} else {
(text_trimmed, None, "")
};
builder.start_node(SyntaxKind::HEADING_CONTENT.into());
if !text_content.is_empty() {
inline_emission::emit_inlines(builder, text_content, config);
}
builder.finish_node();
if !space_before_attrs.is_empty() {
builder.token(SyntaxKind::WHITESPACE.into(), space_before_attrs);
}
if let Some(attr_text) = attr_text {
builder.start_node(SyntaxKind::ATTRIBUTE.into());
builder.token(SyntaxKind::ATTRIBUTE.into(), attr_text);
builder.finish_node();
}
if !text_newline_str.is_empty() {
builder.token(SyntaxKind::NEWLINE.into(), text_newline_str);
}
let (underline_without_newline, underline_newline_str) =
if let Some(stripped) = underline_line.strip_suffix("\r\n") {
(stripped, "\r\n")
} else if let Some(stripped) = underline_line.strip_suffix('\n') {
(stripped, "\n")
} else {
(underline_line, "")
};
let underline_trimmed = underline_without_newline.trim_start();
let underline_leading_spaces = underline_without_newline.len() - underline_trimmed.len();
if underline_leading_spaces > 0 {
builder.token(
SyntaxKind::WHITESPACE.into(),
&underline_without_newline[..underline_leading_spaces],
);
}
builder.start_node(SyntaxKind::SETEXT_HEADING_UNDERLINE.into());
builder.token(
SyntaxKind::SETEXT_HEADING_UNDERLINE.into(),
underline_trimmed,
);
builder.finish_node();
if !underline_newline_str.is_empty() {
builder.token(SyntaxKind::NEWLINE.into(), underline_newline_str);
}
builder.finish_node(); }
pub(crate) fn emit_atx_heading(
builder: &mut GreenNodeBuilder<'static>,
content: &str,
level: usize,
config: &ParserOptions,
) {
builder.start_node(SyntaxKind::HEADING.into());
let (content_without_newline, newline_str) =
if let Some(stripped) = content.strip_suffix("\r\n") {
(stripped, "\r\n")
} else if let Some(stripped) = content.strip_suffix('\n') {
(stripped, "\n")
} else {
(content, "")
};
let trimmed = content_without_newline.trim_start();
let leading_spaces = content_without_newline.len() - trimmed.len();
if leading_spaces > 0 {
builder.token(
SyntaxKind::WHITESPACE.into(),
&content_without_newline[..leading_spaces],
);
}
builder.start_node(SyntaxKind::ATX_HEADING_MARKER.into());
builder.token(SyntaxKind::ATX_HEADING_MARKER.into(), &trimmed[..level]);
builder.finish_node();
let after_marker = &trimmed[level..];
let spaces_after_marker_count = after_marker
.find(|c: char| !c.is_whitespace())
.unwrap_or(after_marker.len());
if spaces_after_marker_count > 0 {
builder.token(
SyntaxKind::WHITESPACE.into(),
&after_marker[..spaces_after_marker_count],
);
}
let heading_text = &after_marker[spaces_after_marker_count..];
let (heading_content, closing_suffix) = {
let without_trailing_ws = heading_text.trim_end_matches([' ', '\t']);
let trailing_hashes = without_trailing_ws
.chars()
.rev()
.take_while(|&c| c == '#')
.count();
if trailing_hashes > 0 {
let hashes_start = without_trailing_ws.len() - trailing_hashes;
let before_hashes = &without_trailing_ws[..hashes_start];
if before_hashes
.chars()
.last()
.is_some_and(|c| c == ' ' || c == '\t')
{
let content_end = before_hashes.trim_end_matches([' ', '\t']).len();
(&heading_text[..content_end], &heading_text[content_end..])
} else {
(heading_text, "")
}
} else {
(heading_text, "")
}
};
let (text_content, attr_text, space_before_attrs) =
if let Some((_attrs, text_before, start_brace_pos)) =
try_parse_trailing_attributes_with_pos(heading_content)
{
let space = &heading_content[text_before.len()..start_brace_pos];
let raw_attrs = &heading_content[start_brace_pos..];
(text_before, Some(raw_attrs), space)
} else if config.extensions.mmd_header_identifiers {
if let Some((_normalized, start_bracket_pos, end_bracket_pos)) =
try_parse_mmd_header_identifier_with_pos(heading_content)
{
let text_before =
heading_content[..start_bracket_pos].trim_end_matches([' ', '\t']);
let space = &heading_content[text_before.len()..start_bracket_pos];
let raw_attrs = &heading_content[start_bracket_pos..end_bracket_pos];
(text_before, Some(raw_attrs), space)
} else {
(heading_content, None, "")
}
} else {
(heading_content, None, "")
};
builder.start_node(SyntaxKind::HEADING_CONTENT.into());
if !text_content.is_empty() {
inline_emission::emit_inlines(builder, text_content, config);
}
builder.finish_node();
if !space_before_attrs.is_empty() {
builder.token(SyntaxKind::WHITESPACE.into(), space_before_attrs);
}
if let Some(attr_text) = attr_text {
builder.start_node(SyntaxKind::ATTRIBUTE.into());
builder.token(SyntaxKind::ATTRIBUTE.into(), attr_text);
builder.finish_node();
}
if !closing_suffix.is_empty() {
let closing_trimmed = closing_suffix.trim_matches(|c| c == ' ' || c == '\t');
let leading_ws_len = closing_suffix
.find(|c: char| c != ' ' && c != '\t')
.unwrap_or(closing_suffix.len());
let trailing_ws_len = closing_suffix.len() - leading_ws_len - closing_trimmed.len();
if leading_ws_len > 0 {
builder.token(
SyntaxKind::WHITESPACE.into(),
&closing_suffix[..leading_ws_len],
);
}
if !closing_trimmed.is_empty() {
builder.token(SyntaxKind::ATX_HEADING_MARKER.into(), closing_trimmed);
}
if trailing_ws_len > 0 {
builder.token(
SyntaxKind::WHITESPACE.into(),
&closing_suffix[closing_suffix.len() - trailing_ws_len..],
);
}
}
if !newline_str.is_empty() {
builder.token(SyntaxKind::NEWLINE.into(), newline_str);
}
builder.finish_node(); }
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_simple_heading() {
assert_eq!(try_parse_atx_heading("# Heading"), Some(1));
}
#[test]
fn test_level_3_heading() {
assert_eq!(try_parse_atx_heading("### Level 3"), Some(3));
}
#[test]
fn test_heading_with_leading_spaces() {
assert_eq!(try_parse_atx_heading(" # Heading"), Some(1));
}
#[test]
fn test_atx_heading_with_attributes_losslessness() {
use crate::ParserOptions;
let input = "# Test {#id}\n";
let config = ParserOptions::default();
let tree = crate::parse(input, Some(config));
assert_eq!(
tree.text().to_string(),
input,
"Parser must preserve all bytes including space before attributes"
);
let heading = tree.first_child().unwrap();
assert_eq!(heading.kind(), SyntaxKind::HEADING);
let mut found_whitespace = false;
for child in heading.children_with_tokens() {
if child.kind() == SyntaxKind::WHITESPACE
&& let Some(token) = child.as_token()
{
let start: usize = token.text_range().start().into();
if token.text() == " " && start == 6 {
found_whitespace = true;
break;
}
}
}
assert!(
found_whitespace,
"Whitespace token between heading content and attributes must be present"
);
}
#[test]
fn test_atx_heading_closing_hashes_are_lossless() {
let input = "### Extension: `smart` ###\n";
let tree = crate::parse(input, Some(crate::ParserOptions::default()));
assert_eq!(tree.text().to_string(), input);
}
#[test]
fn test_four_spaces_not_heading() {
assert_eq!(try_parse_atx_heading(" # Not heading"), None);
}
#[test]
fn test_no_space_after_hash() {
assert_eq!(try_parse_atx_heading("#NoSpace"), None);
}
#[test]
fn test_empty_heading() {
assert_eq!(try_parse_atx_heading("# "), Some(1));
}
#[test]
fn test_level_7_invalid() {
assert_eq!(try_parse_atx_heading("####### Too many"), None);
}
#[test]
fn test_setext_level_1() {
let lines = vec!["Heading", "======="];
assert_eq!(try_parse_setext_heading(&lines, 0), Some((1, '=')));
}
#[test]
fn test_setext_level_2() {
let lines = vec!["Heading", "-------"];
assert_eq!(try_parse_setext_heading(&lines, 0), Some((2, '-')));
}
#[test]
fn test_setext_minimum_three_chars() {
let lines = vec!["Heading", "=="];
assert_eq!(try_parse_setext_heading(&lines, 0), None);
let lines = vec!["Heading", "==="];
assert_eq!(try_parse_setext_heading(&lines, 0), Some((1, '=')));
}
#[test]
fn test_setext_mixed_chars_invalid() {
let lines = vec!["Heading", "==-=="];
assert_eq!(try_parse_setext_heading(&lines, 0), None);
}
#[test]
fn test_setext_with_leading_spaces() {
let lines = vec!["Heading", " ======="];
assert_eq!(try_parse_setext_heading(&lines, 0), Some((1, '=')));
}
#[test]
fn test_setext_with_trailing_spaces() {
let lines = vec!["Heading", "======= "];
assert_eq!(try_parse_setext_heading(&lines, 0), Some((1, '=')));
}
#[test]
fn test_setext_empty_text_line() {
let lines = vec!["", "======="];
assert_eq!(try_parse_setext_heading(&lines, 0), None);
}
#[test]
fn test_setext_no_next_line() {
let lines = vec!["Heading"];
assert_eq!(try_parse_setext_heading(&lines, 0), None);
}
#[test]
fn test_setext_four_spaces_indent() {
let lines = vec![" Heading", " ======="];
assert_eq!(try_parse_setext_heading(&lines, 0), None);
}
#[test]
fn test_setext_long_underline() {
let underline = "=".repeat(100);
let lines = vec!["Heading", underline.as_str()];
assert_eq!(try_parse_setext_heading(&lines, 0), Some((1, '=')));
}
#[test]
fn test_parse_mmd_header_identifier_normalizes_like_pandoc() {
let parsed = try_parse_mmd_header_identifier_with_pos("A heading [My ID]")
.expect("should parse mmd header identifier");
assert_eq!(parsed.0, "myid");
assert_eq!(parsed.1, 10);
}
}