use crate::options::ParserOptions;
use crate::parser::inlines::inline_html::{parse_close_tag, parse_open_tag};
use crate::syntax::{SyntaxKind, SyntaxNode};
use rowan::GreenNodeBuilder;
use super::blockquotes::{count_blockquote_markers, strip_n_blockquote_markers};
use crate::parser::utils::helpers::{strip_leading_spaces, strip_newline};
const BLOCK_TAGS: &[&str] = &[
"address",
"article",
"aside",
"base",
"basefont",
"blockquote",
"body",
"caption",
"center",
"col",
"colgroup",
"dd",
"details",
"dialog",
"dir",
"div",
"dl",
"dt",
"fieldset",
"figcaption",
"figure",
"footer",
"form",
"frame",
"frameset",
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
"head",
"header",
"hr",
"html",
"iframe",
"legend",
"li",
"link",
"main",
"menu",
"menuitem",
"nav",
"noframes",
"ol",
"optgroup",
"option",
"p",
"param",
"section",
"source",
"summary",
"table",
"tbody",
"td",
"tfoot",
"th",
"thead",
"title",
"tr",
"track",
"ul",
];
const VERBATIM_TAGS: &[&str] = &["script", "style", "pre", "textarea"];
const PANDOC_BLOCK_TAGS: &[&str] = &[
"address",
"article",
"aside",
"blockquote",
"body",
"canvas",
"caption",
"center",
"col",
"colgroup",
"dd",
"details",
"dir",
"div",
"dl",
"dt",
"fieldset",
"figcaption",
"figure",
"footer",
"form",
"frameset",
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
"head",
"header",
"hgroup",
"hr",
"html",
"isindex",
"li",
"main",
"menu",
"meta",
"nav",
"noframes",
"ol",
"output",
"p",
"pre",
"script",
"section",
"style",
"summary",
"table",
"tbody",
"td",
"textarea",
"tfoot",
"th",
"thead",
"tr",
"ul",
];
pub fn is_html_block_tag_name(name: &str) -> bool {
let lower = name.to_ascii_lowercase();
BLOCK_TAGS.contains(&lower.as_str())
}
pub fn is_pandoc_block_tag_name(name: &str) -> bool {
let lower = name.to_ascii_lowercase();
PANDOC_BLOCK_TAGS.contains(&lower.as_str())
}
const PANDOC_INLINE_BLOCK_TAGS: &[&str] = &[
"applet", "audio", "button", "del", "iframe", "ins", "map", "noscript", "object", "progress",
"svg", "video",
];
pub fn is_pandoc_inline_block_tag_name(name: &str) -> bool {
let lower = name.to_ascii_lowercase();
PANDOC_INLINE_BLOCK_TAGS.contains(&lower.as_str())
}
const PANDOC_VOID_BLOCK_TAGS: &[&str] = &["area", "embed", "source", "track"];
pub fn is_pandoc_void_block_tag_name(name: &str) -> bool {
let lower = name.to_ascii_lowercase();
PANDOC_VOID_BLOCK_TAGS.contains(&lower.as_str())
}
pub(crate) fn is_pandoc_lift_eligible_block_tag(name: &str) -> bool {
let lower = name.to_ascii_lowercase();
if VERBATIM_TAGS.contains(&lower.as_str()) {
return false;
}
if PANDOC_VOID_BLOCK_TAGS.contains(&lower.as_str()) {
return false;
}
if lower == "div" {
return false;
}
PANDOC_BLOCK_TAGS.contains(&lower.as_str())
|| PANDOC_INLINE_BLOCK_TAGS.contains(&lower.as_str())
}
pub(crate) fn is_pandoc_matched_pair_tag(name: &str) -> bool {
let lower = name.to_ascii_lowercase();
if PANDOC_VOID_BLOCK_TAGS.contains(&lower.as_str()) {
return false;
}
PANDOC_BLOCK_TAGS.contains(&lower.as_str())
|| PANDOC_INLINE_BLOCK_TAGS.contains(&lower.as_str())
|| VERBATIM_TAGS.contains(&lower.as_str())
}
fn bq_strict_attr_emit_tag_name(
wrapper_kind: SyntaxKind,
block_type: &HtmlBlockType,
bq_depth: usize,
) -> Option<&str> {
if bq_depth == 0 || wrapper_kind != SyntaxKind::HTML_BLOCK {
return None;
}
match block_type {
HtmlBlockType::BlockTag {
tag_name,
is_verbatim: false,
closed_by_blank_line: false,
depth_aware: true,
closes_at_open_tag: false,
is_closing: false,
} if is_pandoc_lift_eligible_block_tag(tag_name) => Some(tag_name.as_str()),
_ => None,
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) enum HtmlBlockType {
Comment,
ProcessingInstruction,
Declaration,
CData,
BlockTag {
tag_name: String,
is_verbatim: bool,
closed_by_blank_line: bool,
depth_aware: bool,
closes_at_open_tag: bool,
is_closing: bool,
},
Type7,
}
pub(crate) fn try_parse_html_block_start(
content: &str,
is_commonmark: bool,
) -> Option<HtmlBlockType> {
let trimmed = strip_leading_spaces(content);
if !trimmed.starts_with('<') {
return None;
}
if trimmed.starts_with("<!--") {
return Some(HtmlBlockType::Comment);
}
if trimmed.starts_with("<?") {
return Some(HtmlBlockType::ProcessingInstruction);
}
if is_commonmark && trimmed.starts_with("<![CDATA[") {
return Some(HtmlBlockType::CData);
}
if is_commonmark && trimmed.starts_with("<!") && trimmed.len() > 2 {
let after_bang = &trimmed[2..];
if after_bang.chars().next()?.is_ascii_alphabetic() {
return Some(HtmlBlockType::Declaration);
}
}
if let Some(tag_name) = extract_block_tag_name(trimmed, true) {
let tag_lower = tag_name.to_lowercase();
let is_closing = trimmed.starts_with("</");
if !is_commonmark
&& is_closing
&& (PANDOC_BLOCK_TAGS.contains(&tag_lower.as_str())
|| VERBATIM_TAGS.contains(&tag_lower.as_str()))
&& !PANDOC_INLINE_BLOCK_TAGS.contains(&tag_lower.as_str())
&& !PANDOC_VOID_BLOCK_TAGS.contains(&tag_lower.as_str())
{
return Some(HtmlBlockType::BlockTag {
tag_name: tag_lower,
is_verbatim: false,
closed_by_blank_line: false,
depth_aware: false,
closes_at_open_tag: true,
is_closing: true,
});
}
if !is_commonmark
&& is_closing
&& !PANDOC_INLINE_BLOCK_TAGS.contains(&tag_lower.as_str())
&& !PANDOC_VOID_BLOCK_TAGS.contains(&tag_lower.as_str())
{
return None;
}
let is_block_tag = if is_commonmark {
BLOCK_TAGS.contains(&tag_lower.as_str())
} else {
PANDOC_BLOCK_TAGS.contains(&tag_lower.as_str())
};
if is_block_tag {
let is_verbatim = VERBATIM_TAGS.contains(&tag_lower.as_str());
return Some(HtmlBlockType::BlockTag {
tag_name: tag_lower,
is_verbatim,
closed_by_blank_line: is_commonmark && !is_verbatim,
depth_aware: !is_commonmark,
closes_at_open_tag: false,
is_closing,
});
}
if !is_commonmark && PANDOC_INLINE_BLOCK_TAGS.contains(&tag_lower.as_str()) {
return Some(HtmlBlockType::BlockTag {
tag_name: tag_lower,
is_verbatim: false,
closed_by_blank_line: false,
depth_aware: !is_closing,
closes_at_open_tag: is_closing,
is_closing,
});
}
if !is_commonmark && PANDOC_VOID_BLOCK_TAGS.contains(&tag_lower.as_str()) {
return Some(HtmlBlockType::BlockTag {
tag_name: tag_lower,
is_verbatim: false,
closed_by_blank_line: false,
depth_aware: false,
closes_at_open_tag: true,
is_closing,
});
}
if !is_closing && VERBATIM_TAGS.contains(&tag_lower.as_str()) {
return Some(HtmlBlockType::BlockTag {
tag_name: tag_lower,
is_verbatim: true,
closed_by_blank_line: false,
depth_aware: !is_commonmark,
closes_at_open_tag: false,
is_closing: false,
});
}
}
if is_commonmark && let Some(end) = parse_open_tag(trimmed).or_else(|| parse_close_tag(trimmed))
{
let rest = &trimmed[end..];
let only_ws = rest
.bytes()
.all(|b| matches!(b, b' ' | b'\t' | b'\n' | b'\r'));
if only_ws {
let leading = trimmed.strip_prefix("</").unwrap_or_else(|| &trimmed[1..]);
let name_end = leading
.find(|c: char| !(c.is_ascii_alphanumeric() || c == '-'))
.unwrap_or(leading.len());
let name = leading[..name_end].to_ascii_lowercase();
if !VERBATIM_TAGS.contains(&name.as_str()) {
return Some(HtmlBlockType::Type7);
}
}
}
None
}
fn extract_block_tag_name(text: &str, accept_closing: bool) -> Option<String> {
if !text.starts_with('<') {
return None;
}
let after_bracket = &text[1..];
let after_slash = if let Some(stripped) = after_bracket.strip_prefix('/') {
if !accept_closing {
return None;
}
stripped
} else {
after_bracket
};
let tag_end = after_slash
.find(|c: char| c.is_whitespace() || c == '>' || c == '/')
.unwrap_or(after_slash.len());
if tag_end == 0 {
return None;
}
let tag_name = &after_slash[..tag_end];
if !tag_name.chars().next()?.is_ascii_alphabetic() {
return None;
}
if !tag_name.chars().all(|c| c.is_ascii_alphanumeric()) {
return None;
}
Some(tag_name.to_string())
}
fn ends_at_blank_line(block_type: &HtmlBlockType) -> bool {
matches!(
block_type,
HtmlBlockType::Type7
| HtmlBlockType::BlockTag {
closed_by_blank_line: true,
..
}
)
}
fn is_closing_marker(line: &str, block_type: &HtmlBlockType) -> bool {
match block_type {
HtmlBlockType::Comment => line.contains("-->"),
HtmlBlockType::ProcessingInstruction => line.contains("?>"),
HtmlBlockType::Declaration => line.contains('>'),
HtmlBlockType::CData => line.contains("]]>"),
HtmlBlockType::BlockTag {
tag_name,
closed_by_blank_line: false,
..
} => {
let closing_tag = format!("</{}>", tag_name);
line.to_lowercase().contains(&closing_tag)
}
HtmlBlockType::BlockTag {
closed_by_blank_line: true,
..
}
| HtmlBlockType::Type7 => false,
}
}
pub(crate) fn count_tag_balance(line: &str, tag_name: &str) -> (usize, usize) {
let bytes = line.as_bytes();
let lower_line = line.to_ascii_lowercase();
let lower_bytes = lower_line.as_bytes();
let tag_lower = tag_name.to_ascii_lowercase();
let tag_bytes = tag_lower.as_bytes();
let mut opens = 0usize;
let mut closes = 0usize;
let mut i = 0usize;
while i < bytes.len() {
if bytes[i] != b'<' {
i += 1;
continue;
}
let after = i + 1;
let is_close = after < bytes.len() && bytes[after] == b'/';
let name_start = if is_close { after + 1 } else { after };
let matched = name_start + tag_bytes.len() <= bytes.len()
&& &lower_bytes[name_start..name_start + tag_bytes.len()] == tag_bytes;
let after_name = name_start + tag_bytes.len();
let is_boundary = matched
&& matches!(
bytes.get(after_name).copied(),
Some(b' ' | b'\t' | b'\n' | b'\r' | b'>' | b'/') | None
);
let mut j = if matched { after_name } else { after };
let mut quote: Option<u8> = None;
let mut self_close = false;
let mut found_gt = false;
while j < bytes.len() {
let b = bytes[j];
match (quote, b) {
(Some(q), x) if x == q => quote = None,
(None, b'"') | (None, b'\'') => quote = Some(b),
(None, b'>') => {
found_gt = true;
if j > i + 1 && bytes[j - 1] == b'/' {
self_close = true;
}
break;
}
_ => {}
}
j += 1;
}
if matched && is_boundary {
if is_close {
closes += 1;
} else if !self_close {
opens += 1;
}
}
if found_gt {
i = j + 1;
} else {
break;
}
}
(opens, closes)
}
fn try_parse_comment_pi_with_trailing_split(
builder: &mut GreenNodeBuilder<'static>,
lines: &[&str],
start_pos: usize,
block_type: &HtmlBlockType,
wrapper_kind: SyntaxKind,
bq_depth: usize,
config: &ParserOptions,
) -> Option<usize> {
let marker: &str = match block_type {
HtmlBlockType::Comment => "-->",
HtmlBlockType::ProcessingInstruction => "?>",
_ => return None,
};
let mut close_line_idx: Option<usize> = None;
let mut marker_end_in_inner: usize = 0;
for (offset, line) in lines[start_pos..].iter().enumerate() {
let inner = if bq_depth > 0 {
strip_n_blockquote_markers(line, bq_depth)
} else {
line
};
if let Some(pos) = inner.find(marker) {
close_line_idx = Some(start_pos + offset);
marker_end_in_inner = pos + marker.len();
break;
}
}
let close_line_idx = close_line_idx?;
let close_line = lines[close_line_idx];
let close_inner = if bq_depth > 0 {
strip_n_blockquote_markers(close_line, bq_depth)
} else {
close_line
};
let close_prefix_len = close_line.len() - close_inner.len();
let trailing = &close_inner[marker_end_in_inner..];
let has_non_ws_trailing = trailing.bytes().any(|b| !b.is_ascii_whitespace());
if !has_non_ws_trailing {
return None;
}
builder.start_node(wrapper_kind.into());
if close_line_idx == start_pos {
builder.start_node(SyntaxKind::HTML_BLOCK_TAG.into());
let close_part = &close_inner[..marker_end_in_inner];
if !close_part.is_empty() {
builder.token(SyntaxKind::TEXT.into(), close_part);
}
builder.finish_node();
} else {
builder.start_node(SyntaxKind::HTML_BLOCK_TAG.into());
let first_line = lines[start_pos];
let first_inner = if bq_depth > 0 {
strip_n_blockquote_markers(first_line, bq_depth)
} else {
first_line
};
let (line_no_nl, nl) = strip_newline(first_inner);
if !line_no_nl.is_empty() {
builder.token(SyntaxKind::TEXT.into(), line_no_nl);
}
if !nl.is_empty() {
builder.token(SyntaxKind::NEWLINE.into(), nl);
}
builder.finish_node();
if close_line_idx > start_pos + 1 {
builder.start_node(SyntaxKind::HTML_BLOCK_CONTENT.into());
for content_line in &lines[start_pos + 1..close_line_idx] {
emit_html_block_line(builder, content_line, bq_depth);
}
builder.finish_node();
}
builder.start_node(SyntaxKind::HTML_BLOCK_TAG.into());
if bq_depth > 0 && close_prefix_len > 0 {
emit_bq_prefix_tokens(builder, &close_line[..close_prefix_len]);
}
let close_part = &close_inner[..marker_end_in_inner];
if !close_part.is_empty() {
builder.token(SyntaxKind::TEXT.into(), close_part);
}
builder.finish_node();
}
builder.finish_node();
if !trailing.is_empty() {
let mut inner_options = config.clone();
let refdefs = config.refdef_labels.clone().unwrap_or_default();
inner_options.refdef_labels = Some(refdefs.clone());
let inner_root = crate::parser::parse_with_refdefs(trailing, Some(inner_options), refdefs);
let mut bq = None;
graft_document_children(builder, &inner_root, LastParaDemote::Never, &mut bq);
}
Some(close_line_idx + 1)
}
pub(crate) fn parse_html_block_with_wrapper(
builder: &mut GreenNodeBuilder<'static>,
lines: &[&str],
start_pos: usize,
block_type: HtmlBlockType,
bq_depth: usize,
wrapper_kind: SyntaxKind,
config: &ParserOptions,
) -> usize {
if config.dialect == crate::options::Dialect::Pandoc
&& matches!(
block_type,
HtmlBlockType::Comment | HtmlBlockType::ProcessingInstruction
)
&& let Some(consumed) = try_parse_comment_pi_with_trailing_split(
builder,
lines,
start_pos,
&block_type,
wrapper_kind,
bq_depth,
config,
)
{
return consumed;
}
builder.start_node(wrapper_kind.into());
let first_line = lines[start_pos];
let blank_terminated = ends_at_blank_line(&block_type);
let first_inner = if bq_depth > 0 {
strip_n_blockquote_markers(first_line, bq_depth)
} else {
first_line
};
let multiline_open_end = match (wrapper_kind, &block_type) {
(SyntaxKind::HTML_BLOCK_DIV, _) => {
find_multiline_open_end(lines, start_pos, first_inner, "div", bq_depth)
}
(
_,
HtmlBlockType::BlockTag {
tag_name,
closes_at_open_tag: true,
..
},
) => find_multiline_open_end(lines, start_pos, first_inner, tag_name, bq_depth),
(
_,
HtmlBlockType::BlockTag {
tag_name,
is_verbatim: false,
closed_by_blank_line: false,
depth_aware: true,
closes_at_open_tag: false,
is_closing: false,
},
) if is_pandoc_lift_eligible_block_tag(tag_name) => {
find_multiline_open_end(lines, start_pos, first_inner, tag_name, bq_depth)
}
_ => None,
};
let depth_aware_tag: Option<String> = match &block_type {
HtmlBlockType::BlockTag {
tag_name,
closed_by_blank_line: false,
depth_aware: true,
..
} => Some(tag_name.clone()),
_ => None,
};
let mut depth: i64 = 1;
if let Some(tag_name) = &depth_aware_tag {
let last_open_line = multiline_open_end.unwrap_or(start_pos);
let mut opens = 0usize;
let mut closes = 0usize;
for line in &lines[start_pos..=last_open_line] {
let inner = if bq_depth > 0 {
strip_n_blockquote_markers(line, bq_depth)
} else {
line
};
let (o, c) = count_tag_balance(inner, tag_name);
opens += o;
closes += c;
}
depth = opens as i64 - closes as i64;
}
let is_same_line_div = wrapper_kind == SyntaxKind::HTML_BLOCK_DIV
&& multiline_open_end.is_none()
&& depth_aware_tag.is_some()
&& depth <= 0;
let same_line_div_lift_safe = is_same_line_div && bq_depth == 0 && {
let (line_without_newline, _) = strip_newline(first_inner);
probe_same_line_lift(line_without_newline, "div")
};
let strict_block_tag_name: Option<&str> =
if wrapper_kind == SyntaxKind::HTML_BLOCK && bq_depth == 0 {
match &block_type {
HtmlBlockType::BlockTag {
tag_name,
is_verbatim: false,
closed_by_blank_line: false,
depth_aware: true,
closes_at_open_tag: false,
is_closing: false,
} if is_pandoc_lift_eligible_block_tag(tag_name) => Some(tag_name.as_str()),
_ => None,
}
} else {
None
};
let same_line_strict_lift_safe = strict_block_tag_name.is_some_and(|name| {
multiline_open_end.is_none() && depth <= 0 && {
let (line_no_nl, _) = strip_newline(first_inner);
probe_same_line_lift(line_no_nl, name)
}
});
let strict_block_lift = strict_block_tag_name.is_some_and(|name| {
let (line_no_nl, _) = strip_newline(first_inner);
let shape_ok = if multiline_open_end.is_some() {
true
} else if depth > 0 {
probe_open_tag_line_has_close_gt(line_no_nl, name)
} else {
same_line_strict_lift_safe
};
if !shape_ok {
return false;
}
if !is_pandoc_inline_block_tag_name(name) {
return true;
}
!inline_block_void_interior_abandons(
first_inner,
lines,
start_pos,
multiline_open_end,
bq_depth,
name,
)
});
let same_line_bq_lift_tag: Option<&str> = if bq_depth > 0
&& multiline_open_end.is_none()
&& depth_aware_tag.is_some()
&& depth <= 0
{
let (line_no_nl, _) = strip_newline(first_inner);
if wrapper_kind == SyntaxKind::HTML_BLOCK_DIV {
if probe_same_line_lift(line_no_nl, "div") {
Some("div")
} else {
None
}
} else if wrapper_kind == SyntaxKind::HTML_BLOCK {
match &block_type {
HtmlBlockType::BlockTag {
tag_name,
is_verbatim: false,
closed_by_blank_line: false,
depth_aware: true,
closes_at_open_tag: false,
is_closing: false,
} if is_pandoc_lift_eligible_block_tag(tag_name)
&& probe_same_line_lift(line_no_nl, tag_name.as_str()) =>
{
Some(tag_name.as_str())
}
_ => None,
}
} else {
None
}
} else {
None
};
let bq_messy_lift_tag: Option<&str> = if bq_depth > 0 && depth_aware_tag.is_some() && depth > 0
{
if wrapper_kind == SyntaxKind::HTML_BLOCK_DIV {
Some("div")
} else if wrapper_kind == SyntaxKind::HTML_BLOCK {
match &block_type {
HtmlBlockType::BlockTag {
tag_name,
is_verbatim: false,
closed_by_blank_line: false,
depth_aware: true,
closes_at_open_tag: false,
is_closing: false,
} if is_pandoc_lift_eligible_block_tag(tag_name) => {
if is_pandoc_inline_block_tag_name(tag_name)
&& inline_block_void_interior_abandons(
first_inner,
lines,
start_pos,
multiline_open_end,
bq_depth,
tag_name,
)
{
None
} else {
Some(tag_name.as_str())
}
}
_ => None,
}
} else {
None
}
} else {
None
};
let bq_multiline_close_lift_tag: Option<&str> = if bq_depth > 0
&& multiline_open_end.is_some()
&& depth_aware_tag.is_some()
&& depth <= 0
{
if wrapper_kind == SyntaxKind::HTML_BLOCK_DIV {
Some("div")
} else if wrapper_kind == SyntaxKind::HTML_BLOCK {
match &block_type {
HtmlBlockType::BlockTag {
tag_name,
is_verbatim: false,
closed_by_blank_line: false,
depth_aware: true,
closes_at_open_tag: false,
is_closing: false,
} if is_pandoc_lift_eligible_block_tag(tag_name) => {
if is_pandoc_inline_block_tag_name(tag_name)
&& inline_block_void_interior_abandons(
first_inner,
lines,
start_pos,
multiline_open_end,
bq_depth,
tag_name,
)
{
None
} else {
Some(tag_name.as_str())
}
}
_ => None,
}
} else {
None
}
} else {
None
};
let lift_mode = (wrapper_kind == SyntaxKind::HTML_BLOCK_DIV
&& bq_depth == 0
&& (!is_same_line_div || same_line_div_lift_safe))
|| strict_block_lift
|| same_line_bq_lift_tag.is_some()
|| bq_messy_lift_tag.is_some()
|| bq_multiline_close_lift_tag.is_some();
let mut pre_content = String::new();
builder.start_node(SyntaxKind::HTML_BLOCK_TAG.into());
if let Some(end_line_idx) = multiline_open_end {
if wrapper_kind == SyntaxKind::HTML_BLOCK_DIV {
emit_multiline_open_tag_with_attrs(
builder,
lines,
start_pos,
end_line_idx,
"div",
bq_depth,
lift_mode,
&mut pre_content,
);
} else if let Some(name) = strict_block_tag_name
&& strict_block_lift
{
emit_multiline_open_tag_with_attrs(
builder,
lines,
start_pos,
end_line_idx,
name,
bq_depth,
lift_mode,
&mut pre_content,
);
} else if let Some(name) = bq_strict_attr_emit_tag_name(wrapper_kind, &block_type, bq_depth)
{
let lift_trailing =
bq_messy_lift_tag == Some(name) || bq_multiline_close_lift_tag == Some(name);
emit_multiline_open_tag_with_attrs(
builder,
lines,
start_pos,
end_line_idx,
name,
bq_depth,
lift_trailing,
&mut pre_content,
);
} else {
emit_multiline_open_tag_simple(builder, lines, start_pos, end_line_idx, bq_depth);
}
} else {
let (line_without_newline, newline_str) = strip_newline(first_inner);
if !line_without_newline.is_empty() {
if wrapper_kind == SyntaxKind::HTML_BLOCK_DIV {
let trailing =
emit_open_tag_tokens(builder, line_without_newline, "div", lift_mode);
if !trailing.is_empty() {
pre_content.push_str(trailing);
pre_content.push_str(newline_str);
}
} else if let Some(name) = strict_block_tag_name
&& strict_block_lift
{
let trailing = emit_open_tag_tokens(builder, line_without_newline, name, lift_mode);
if !trailing.is_empty() {
pre_content.push_str(trailing);
pre_content.push_str(newline_str);
}
} else if let Some(name) =
bq_strict_attr_emit_tag_name(wrapper_kind, &block_type, bq_depth)
{
let lift_trailing =
same_line_bq_lift_tag == Some(name) || bq_messy_lift_tag == Some(name);
let trailing =
emit_open_tag_tokens(builder, line_without_newline, name, lift_trailing);
if lift_trailing && !trailing.is_empty() {
pre_content.push_str(trailing);
pre_content.push_str(newline_str);
}
} else {
builder.token(SyntaxKind::TEXT.into(), line_without_newline);
}
}
if pre_content.is_empty() && !newline_str.is_empty() {
builder.token(SyntaxKind::NEWLINE.into(), newline_str);
}
}
builder.finish_node();
let void_block = matches!(
&block_type,
HtmlBlockType::BlockTag {
closes_at_open_tag: true,
..
}
);
if void_block && let Some(end_line_idx) = multiline_open_end {
log::trace!(
"HTML void block at line {} closes after multi-line open ending at line {}",
start_pos + 1,
end_line_idx + 1
);
builder.finish_node(); return end_line_idx + 1;
}
if let Some(end_line_idx) = multiline_open_end
&& !blank_terminated
&& depth_aware_tag.is_some()
&& depth <= 0
&& lift_mode
&& (bq_depth == 0 || bq_multiline_close_lift_tag.is_some())
&& !pre_content.is_empty()
{
let tag_name_opt: Option<&str> = if wrapper_kind == SyntaxKind::HTML_BLOCK_DIV {
Some("div")
} else if strict_block_lift {
strict_block_tag_name
} else if let Some(name) = bq_multiline_close_lift_tag {
Some(name)
} else {
None
};
if let Some(tag_name) = tag_name_opt {
let (pre_no_nl, post_nl) = strip_newline(&pre_content);
if let Some((leading, close_part)) =
try_split_close_line_depth_aware(pre_no_nl, tag_name)
{
let close_marker_end =
split_close_marker_end(close_part, tag_name).unwrap_or(close_part.len());
let close_marker = &close_part[..close_marker_end];
let same_line_trailing = &close_part[close_marker_end..];
let policy = if wrapper_kind == SyntaxKind::HTML_BLOCK_DIV {
LastParaDemote::SkipTrailingBlanks
} else {
LastParaDemote::OnlyIfLast
};
emit_html_block_body_lifted(builder, "", &[], leading, policy, config);
builder.start_node(SyntaxKind::HTML_BLOCK_TAG.into());
if same_line_trailing.is_empty() {
let mut close_line = String::with_capacity(close_marker.len() + post_nl.len());
close_line.push_str(close_marker);
close_line.push_str(post_nl);
emit_html_block_line(builder, &close_line, 0);
builder.finish_node();
builder.finish_node(); } else {
builder.token(SyntaxKind::TEXT.into(), close_marker);
builder.finish_node(); builder.finish_node();
let mut trailing_text =
String::with_capacity(same_line_trailing.len() + post_nl.len());
trailing_text.push_str(same_line_trailing);
trailing_text.push_str(post_nl);
let mut inner_options = config.clone();
let refdefs = config.refdef_labels.clone().unwrap_or_default();
inner_options.refdef_labels = Some(refdefs.clone());
let inner_root = crate::parser::parse_with_refdefs(
&trailing_text,
Some(inner_options),
refdefs,
);
let mut bq = None;
graft_document_children(builder, &inner_root, LastParaDemote::Never, &mut bq);
}
return end_line_idx + 1;
}
}
}
let same_line_closed = !blank_terminated
&& multiline_open_end.is_none()
&& (void_block
|| match &depth_aware_tag {
Some(_) => depth <= 0,
None => is_closing_marker(first_inner, &block_type),
});
if same_line_closed {
log::trace!(
"HTML block at line {} opens and closes on same line",
start_pos + 1
);
let same_line_lift_tag: Option<&str> = if !lift_mode || pre_content.is_empty() {
None
} else if wrapper_kind == SyntaxKind::HTML_BLOCK_DIV && same_line_div_lift_safe {
Some("div")
} else if same_line_strict_lift_safe {
strict_block_tag_name
} else if let Some(name) = same_line_bq_lift_tag {
Some(name)
} else {
None
};
if let Some(tag_name) = same_line_lift_tag {
let (pre_no_nl, post_nl) = strip_newline(&pre_content);
if let Some((leading, close_part)) =
try_split_close_line_depth_aware(pre_no_nl, tag_name)
{
let close_marker_end =
split_close_marker_end(close_part, tag_name).unwrap_or(close_part.len());
let close_marker = &close_part[..close_marker_end];
let same_line_trailing = &close_part[close_marker_end..];
let policy = if wrapper_kind == SyntaxKind::HTML_BLOCK_DIV {
LastParaDemote::SkipTrailingBlanks
} else {
LastParaDemote::OnlyIfLast
};
emit_html_block_body_lifted(builder, "", &[], leading, policy, config);
builder.start_node(SyntaxKind::HTML_BLOCK_TAG.into());
if same_line_trailing.is_empty() {
let mut close_line = String::with_capacity(close_marker.len() + post_nl.len());
close_line.push_str(close_marker);
close_line.push_str(post_nl);
emit_html_block_line(builder, &close_line, 0);
builder.finish_node();
builder.finish_node(); } else {
builder.token(SyntaxKind::TEXT.into(), close_marker);
builder.finish_node(); builder.finish_node();
let mut trailing_text =
String::with_capacity(same_line_trailing.len() + post_nl.len());
trailing_text.push_str(same_line_trailing);
trailing_text.push_str(post_nl);
let mut inner_options = config.clone();
let refdefs = config.refdef_labels.clone().unwrap_or_default();
inner_options.refdef_labels = Some(refdefs.clone());
let inner_root = crate::parser::parse_with_refdefs(
&trailing_text,
Some(inner_options),
refdefs,
);
let mut bq = None;
graft_document_children(builder, &inner_root, LastParaDemote::Never, &mut bq);
}
return start_pos + 1;
}
}
builder.finish_node(); return start_pos + 1;
}
let mut current_pos = multiline_open_end
.map(|end| end + 1)
.unwrap_or(start_pos + 1);
let mut content_lines: Vec<&str> = Vec::new();
let mut found_closing = false;
while current_pos < lines.len() {
let line = lines[current_pos];
let (line_bq_depth, inner) = count_blockquote_markers(line);
if line_bq_depth < bq_depth {
break;
}
if blank_terminated && inner.trim().is_empty() {
break;
}
let line_closes = match &depth_aware_tag {
Some(tag_name) => {
let (opens, closes) = count_tag_balance(inner, tag_name);
depth += opens as i64;
depth -= closes as i64;
depth <= 0
}
None => is_closing_marker(inner, &block_type),
};
if line_closes {
log::trace!("Found HTML block closing at line {}", current_pos + 1);
found_closing = true;
let bq_lift_tag: Option<&str> = if bq_depth > 0 && pre_content.is_empty() {
if wrapper_kind == SyntaxKind::HTML_BLOCK_DIV {
Some("div")
} else if wrapper_kind == SyntaxKind::HTML_BLOCK {
match &block_type {
HtmlBlockType::BlockTag {
tag_name,
is_verbatim: false,
closed_by_blank_line: false,
depth_aware: true,
closes_at_open_tag: false,
is_closing: false,
} if is_pandoc_lift_eligible_block_tag(tag_name) => Some(tag_name.as_str()),
_ => None,
}
} else {
None
}
} else {
None
};
let bq_clean_lift = bq_lift_tag.is_some_and(|tag_name| {
let last_open_line: &str = match multiline_open_end {
None => first_inner,
Some(end) if bq_depth > 0 => strip_n_blockquote_markers(lines[end], bq_depth),
Some(end) => lines[end],
};
let (open_no_nl, _) = strip_newline(last_open_line);
if !open_no_nl.trim_end_matches([' ', '\t']).ends_with('>') {
return false;
}
let close_stripped = strip_n_blockquote_markers(line, bq_depth);
let (close_no_nl, _) = strip_newline(close_stripped);
if !close_no_nl
.trim_start_matches([' ', '\t'])
.starts_with("</")
{
return false;
}
if is_pandoc_inline_block_tag_name(tag_name)
&& inline_block_void_interior_abandons(
first_inner,
lines,
start_pos,
multiline_open_end,
bq_depth,
tag_name,
)
{
return false;
}
true
});
if bq_clean_lift {
let demote_policy = if wrapper_kind == SyntaxKind::HTML_BLOCK_DIV {
LastParaDemote::Never
} else {
LastParaDemote::OnlyIfLast
};
emit_html_block_body_lifted_bq(
builder,
&content_lines,
bq_depth,
demote_policy,
config,
);
builder.start_node(SyntaxKind::HTML_BLOCK_TAG.into());
emit_html_block_line(builder, line, bq_depth);
builder.finish_node();
current_pos += 1;
break;
}
if let Some(tag_name) = bq_messy_lift_tag {
let close_stripped = strip_n_blockquote_markers(line, bq_depth);
let close_prefix_len = line.len() - close_stripped.len();
let close_prefix = &line[..close_prefix_len];
if let Some((leading, close_part)) = try_split_close_line(close_stripped, tag_name)
{
let policy = if wrapper_kind == SyntaxKind::HTML_BLOCK_DIV {
if leading.is_empty() {
LastParaDemote::Never
} else {
LastParaDemote::SkipTrailingBlanks
}
} else {
LastParaDemote::OnlyIfLast
};
emit_html_block_body_lifted_bq_messy(
builder,
&pre_content,
&content_lines,
leading,
close_prefix,
bq_depth,
policy,
config,
);
builder.start_node(SyntaxKind::HTML_BLOCK_TAG.into());
if leading.is_empty() {
emit_bq_prefix_tokens(builder, close_prefix);
}
emit_html_block_line(builder, close_part, 0);
builder.finish_node();
current_pos += 1;
break;
}
}
let close_split_tag = if lift_mode {
if strict_block_lift {
strict_block_tag_name
} else if wrapper_kind == SyntaxKind::HTML_BLOCK_DIV {
Some("div")
} else {
None
}
} else {
None
};
let (close_no_nl, close_post_nl) = strip_newline(line);
let close_split = close_split_tag
.and_then(|name| try_split_close_line_depth_aware(close_no_nl, name));
if let Some((leading, close_part)) = close_split {
let leading_is_ws_only =
!leading.is_empty() && leading.bytes().all(|b| b == b' ' || b == b'\t');
let body_leading = if leading_is_ws_only { "" } else { leading };
let policy = if strict_block_lift {
LastParaDemote::OnlyIfLast
} else if !leading.is_empty() {
LastParaDemote::SkipTrailingBlanks
} else {
LastParaDemote::Never
};
let close_tag_name = close_split_tag.expect("close_split_tag present");
let close_marker_end =
split_close_marker_end(close_part, close_tag_name).unwrap_or(close_part.len());
let close_marker = &close_part[..close_marker_end];
let close_trailing = &close_part[close_marker_end..];
emit_html_block_body_lifted(
builder,
&pre_content,
&content_lines,
body_leading,
policy,
config,
);
builder.start_node(SyntaxKind::HTML_BLOCK_TAG.into());
if leading_is_ws_only {
builder.token(SyntaxKind::WHITESPACE.into(), leading);
}
if close_trailing.is_empty() {
let mut close_line =
String::with_capacity(close_marker.len() + close_post_nl.len());
close_line.push_str(close_marker);
close_line.push_str(close_post_nl);
emit_html_block_line(builder, &close_line, 0);
builder.finish_node();
} else {
builder.token(SyntaxKind::TEXT.into(), close_marker);
builder.finish_node(); builder.finish_node();
let mut trailing_text =
String::with_capacity(close_trailing.len() + close_post_nl.len());
trailing_text.push_str(close_trailing);
trailing_text.push_str(close_post_nl);
let mut inner_options = config.clone();
let refdefs = config.refdef_labels.clone().unwrap_or_default();
inner_options.refdef_labels = Some(refdefs.clone());
let inner_root = crate::parser::parse_with_refdefs(
&trailing_text,
Some(inner_options),
refdefs,
);
let mut bq = None;
graft_document_children(builder, &inner_root, LastParaDemote::Never, &mut bq);
current_pos += 1;
return current_pos;
}
} else {
emit_html_block_body(
builder,
&pre_content,
&content_lines,
bq_depth,
wrapper_kind,
lift_mode,
config,
);
builder.start_node(SyntaxKind::HTML_BLOCK_TAG.into());
emit_html_block_line(builder, line, bq_depth);
builder.finish_node();
}
current_pos += 1;
break;
}
content_lines.push(line);
current_pos += 1;
}
if !found_closing {
log::trace!("HTML block at line {} has no closing marker", start_pos + 1);
emit_html_block_body(
builder,
&pre_content,
&content_lines,
bq_depth,
wrapper_kind,
lift_mode,
config,
);
}
builder.finish_node(); current_pos
}
fn emit_html_block_body(
builder: &mut GreenNodeBuilder<'static>,
pre_content: &str,
content_lines: &[&str],
bq_depth: usize,
wrapper_kind: SyntaxKind,
lift_mode: bool,
config: &ParserOptions,
) {
if pre_content.is_empty() && content_lines.is_empty() {
return;
}
if lift_mode && wrapper_kind == SyntaxKind::HTML_BLOCK_DIV {
emit_html_block_body_lifted(
builder,
pre_content,
content_lines,
"",
LastParaDemote::Never,
config,
);
return;
}
builder.start_node(SyntaxKind::HTML_BLOCK_CONTENT.into());
if !pre_content.is_empty() {
builder.token(SyntaxKind::TEXT.into(), pre_content);
}
for content_line in content_lines {
emit_html_block_line(builder, content_line, bq_depth);
}
builder.finish_node();
}
#[derive(Copy, Clone, Debug)]
enum LastParaDemote {
Never,
SkipTrailingBlanks,
OnlyIfLast,
}
fn emit_html_block_body_lifted(
builder: &mut GreenNodeBuilder<'static>,
pre_content: &str,
content_lines: &[&str],
post_content: &str,
demote_policy: LastParaDemote,
config: &ParserOptions,
) {
emit_html_block_body_lifted_inner(
builder,
pre_content,
content_lines,
post_content,
demote_policy,
config,
&mut None,
)
}
fn emit_html_block_body_lifted_bq(
builder: &mut GreenNodeBuilder<'static>,
content_lines: &[&str],
bq_depth: usize,
demote_policy: LastParaDemote,
config: &ParserOptions,
) {
let mut prefixes: Vec<String> = Vec::with_capacity(content_lines.len());
let mut stripped_lines: Vec<&str> = Vec::with_capacity(content_lines.len());
for cl in content_lines {
let stripped = strip_n_blockquote_markers(cl, bq_depth);
let prefix_len = cl.len() - stripped.len();
prefixes.push(cl[..prefix_len].to_string());
stripped_lines.push(stripped);
}
let mut bq = Some(BqPrefixState {
prefixes,
line_idx: 0,
at_line_start: true,
});
emit_html_block_body_lifted_inner(
builder,
"",
&stripped_lines,
"",
demote_policy,
config,
&mut bq,
)
}
#[allow(clippy::too_many_arguments)]
fn emit_html_block_body_lifted_bq_messy(
builder: &mut GreenNodeBuilder<'static>,
pre_content: &str,
content_lines: &[&str],
leading: &str,
close_line_prefix: &str,
bq_depth: usize,
demote_policy: LastParaDemote,
config: &ParserOptions,
) {
let mut prefixes: Vec<String> = Vec::new();
if !pre_content.is_empty() {
prefixes.push(String::new());
}
let mut stripped_lines: Vec<&str> = Vec::with_capacity(content_lines.len());
for cl in content_lines {
let stripped = strip_n_blockquote_markers(cl, bq_depth);
let prefix_len = cl.len() - stripped.len();
prefixes.push(cl[..prefix_len].to_string());
stripped_lines.push(stripped);
}
if !leading.is_empty() {
prefixes.push(close_line_prefix.to_string());
}
let mut bq = Some(BqPrefixState {
prefixes,
line_idx: 0,
at_line_start: true,
});
emit_html_block_body_lifted_inner(
builder,
pre_content,
&stripped_lines,
leading,
demote_policy,
config,
&mut bq,
)
}
fn emit_html_block_body_lifted_inner(
builder: &mut GreenNodeBuilder<'static>,
pre_content: &str,
content_lines: &[&str],
post_content: &str,
demote_policy: LastParaDemote,
config: &ParserOptions,
bq: &mut Option<BqPrefixState>,
) {
if pre_content.is_empty() && content_lines.is_empty() && post_content.is_empty() {
return;
}
let mut inner_text = String::with_capacity(
pre_content.len()
+ content_lines.iter().map(|s| s.len()).sum::<usize>()
+ post_content.len(),
);
inner_text.push_str(pre_content);
for line in content_lines {
inner_text.push_str(line);
}
inner_text.push_str(post_content);
let mut inner_options = config.clone();
let refdefs = config.refdef_labels.clone().unwrap_or_default();
inner_options.refdef_labels = Some(refdefs.clone());
let inner_root = crate::parser::parse_with_refdefs(&inner_text, Some(inner_options), refdefs);
graft_document_children(builder, &inner_root, demote_policy, bq);
}
struct BqPrefixState {
prefixes: Vec<String>,
line_idx: usize,
at_line_start: bool,
}
fn graft_document_children(
builder: &mut GreenNodeBuilder<'static>,
doc: &SyntaxNode,
demote_policy: LastParaDemote,
bq: &mut Option<BqPrefixState>,
) {
let children: Vec<rowan::NodeOrToken<SyntaxNode, _>> = doc.children_with_tokens().collect();
let mut demote_idx: Option<usize> = None;
match demote_policy {
LastParaDemote::Never => {}
LastParaDemote::SkipTrailingBlanks => {
for (i, c) in children.iter().enumerate().rev() {
if let rowan::NodeOrToken::Node(n) = c {
if n.kind() == SyntaxKind::BLANK_LINE {
continue;
}
if n.kind() == SyntaxKind::PARAGRAPH {
demote_idx = Some(i);
}
break;
}
}
}
LastParaDemote::OnlyIfLast => {
for (i, c) in children.iter().enumerate().rev() {
if let rowan::NodeOrToken::Node(n) = c {
if n.kind() == SyntaxKind::PARAGRAPH {
demote_idx = Some(i);
}
break;
}
}
}
}
for (i, child) in children.into_iter().enumerate() {
match child {
rowan::NodeOrToken::Node(n) => {
if Some(i) == demote_idx {
graft_subtree_as(builder, &n, SyntaxKind::PLAIN, bq);
} else {
graft_subtree(builder, &n, bq);
}
}
rowan::NodeOrToken::Token(t) => {
emit_grafted_token(builder, t.kind(), t.text(), bq);
}
}
}
}
fn graft_subtree(
builder: &mut GreenNodeBuilder<'static>,
node: &SyntaxNode,
bq: &mut Option<BqPrefixState>,
) {
graft_subtree_as(builder, node, node.kind(), bq);
}
fn graft_subtree_as(
builder: &mut GreenNodeBuilder<'static>,
node: &SyntaxNode,
kind: SyntaxKind,
bq: &mut Option<BqPrefixState>,
) {
builder.start_node(kind.into());
for child in node.children_with_tokens() {
match child {
rowan::NodeOrToken::Node(n) => graft_subtree(builder, &n, bq),
rowan::NodeOrToken::Token(t) => {
emit_grafted_token(builder, t.kind(), t.text(), bq);
}
}
}
builder.finish_node();
}
fn emit_grafted_token(
builder: &mut GreenNodeBuilder<'static>,
kind: SyntaxKind,
text: &str,
bq: &mut Option<BqPrefixState>,
) {
if let Some(state) = bq.as_mut() {
if state.at_line_start {
if let Some(prefix) = state.prefixes.get(state.line_idx) {
emit_bq_prefix_tokens(builder, prefix);
}
state.at_line_start = false;
}
builder.token(kind.into(), text);
if kind == SyntaxKind::NEWLINE || kind == SyntaxKind::BLANK_LINE {
state.line_idx += 1;
state.at_line_start = true;
}
} else {
builder.token(kind.into(), text);
}
}
fn emit_bq_prefix_tokens(builder: &mut GreenNodeBuilder<'static>, prefix: &str) {
for ch in prefix.chars() {
if ch == '>' {
builder.token(SyntaxKind::BLOCK_QUOTE_MARKER.into(), ">");
} else {
let mut buf = [0u8; 4];
builder.token(SyntaxKind::WHITESPACE.into(), ch.encode_utf8(&mut buf));
}
}
}
fn locate_open_tag_close_gt(line: &str, tag_name: &str) -> Option<usize> {
let bytes = line.as_bytes();
let indent_end = bytes
.iter()
.position(|&b| b != b' ' && b != b'\t')
.unwrap_or(bytes.len());
let rest = &line[indent_end..];
let rest_bytes = rest.as_bytes();
let prefix_len = 1 + tag_name.len();
if rest_bytes.len() < prefix_len + 1
|| rest_bytes[0] != b'<'
|| !rest_bytes[1..prefix_len].eq_ignore_ascii_case(tag_name.as_bytes())
{
return None;
}
let after_name = &rest[prefix_len..];
let after_name_bytes = after_name.as_bytes();
let mut i = 0usize;
let mut quote: Option<u8> = None;
while i < after_name_bytes.len() {
match (quote, after_name_bytes[i]) {
(None, b'"') | (None, b'\'') => quote = Some(after_name_bytes[i]),
(Some(q), b2) if b2 == q => quote = None,
(None, b'>') => return Some(indent_end + prefix_len + i),
_ => {}
}
i += 1;
}
None
}
fn slice_starts_with_void_block_tag(slice: &str) -> bool {
let trimmed = slice.trim_start_matches([' ', '\t', '\n', '\r']);
if !trimmed.starts_with('<') || trimmed.starts_with("</") {
return false;
}
let Some(tag_end) = parse_open_tag(trimmed) else {
return false;
};
let bytes = trimmed.as_bytes();
let mut name_end = 1usize;
while name_end < tag_end && (bytes[name_end].is_ascii_alphanumeric() || bytes[name_end] == b'-')
{
name_end += 1;
}
if name_end == 1 {
return false;
}
is_pandoc_void_block_tag_name(&trimmed[1..name_end])
}
fn inline_block_void_interior_abandons(
first_inner: &str,
lines: &[&str],
start_pos: usize,
multiline_open_end: Option<usize>,
bq_depth: usize,
tag_name: &str,
) -> bool {
let (line_no_nl, _) = strip_newline(first_inner);
let (body_start_line_idx, open_trailing) = match multiline_open_end {
Some(end) => (end + 1, ""),
None => {
let gt = locate_open_tag_close_gt(line_no_nl, tag_name);
let trailing = gt.map(|i| &line_no_nl[i + 1..]).unwrap_or("");
(start_pos + 1, trailing)
}
};
let trimmed = open_trailing.trim_start_matches([' ', '\t']);
if !trimmed.is_empty() {
return slice_starts_with_void_block_tag(trimmed);
}
for line in &lines[body_start_line_idx..] {
let inner = if bq_depth > 0 {
strip_n_blockquote_markers(line, bq_depth)
} else {
line
};
let trimmed = inner.trim_start_matches([' ', '\t', '\n', '\r']);
if trimmed.is_empty() {
continue;
}
return slice_starts_with_void_block_tag(trimmed);
}
false
}
pub(crate) fn probe_open_tag_line_has_close_gt(line: &str, tag_name: &str) -> bool {
let bytes = line.as_bytes();
let indent_end = bytes
.iter()
.position(|&b| b != b' ' && b != b'\t')
.unwrap_or(bytes.len());
let rest = &line[indent_end..];
let rest_bytes = rest.as_bytes();
let prefix_len = 1 + tag_name.len();
if rest_bytes.len() < prefix_len + 1
|| rest_bytes[0] != b'<'
|| !rest_bytes[1..prefix_len].eq_ignore_ascii_case(tag_name.as_bytes())
{
return false;
}
let after_name = &rest[prefix_len..];
let after_name_bytes = after_name.as_bytes();
let mut i = 0usize;
let mut quote: Option<u8> = None;
while i < after_name_bytes.len() {
match (quote, after_name_bytes[i]) {
(None, b'"') | (None, b'\'') => quote = Some(after_name_bytes[i]),
(Some(q), b2) if b2 == q => quote = None,
(None, b'>') => return true,
_ => {}
}
i += 1;
}
false
}
fn probe_same_line_lift(line: &str, tag_name: &str) -> bool {
let bytes = line.as_bytes();
let indent_end = bytes
.iter()
.position(|&b| b != b' ' && b != b'\t')
.unwrap_or(bytes.len());
let rest = &line[indent_end..];
let rest_bytes = rest.as_bytes();
let prefix_len = 1 + tag_name.len();
if rest_bytes.len() < prefix_len
|| rest_bytes[0] != b'<'
|| !rest_bytes[1..prefix_len].eq_ignore_ascii_case(tag_name.as_bytes())
{
return false;
}
let after_name = &rest[prefix_len..];
let after_name_bytes = after_name.as_bytes();
let mut i = 0usize;
let mut quote: Option<u8> = None;
let mut gt_idx: Option<usize> = None;
while i < after_name_bytes.len() {
match (quote, after_name_bytes[i]) {
(None, b'"') | (None, b'\'') => quote = Some(after_name_bytes[i]),
(Some(q), b2) if b2 == q => quote = None,
(None, b'>') => {
gt_idx = Some(i);
break;
}
_ => {}
}
i += 1;
}
let Some(gt_idx) = gt_idx else {
return false;
};
let trailing = &after_name[gt_idx + 1..];
matched_close_offset(trailing, tag_name).is_some()
}
fn matched_close_offset(trailing: &str, tag_name: &str) -> Option<(usize, usize)> {
let bytes = trailing.as_bytes();
let lower_line = trailing.to_ascii_lowercase();
let lower_bytes = lower_line.as_bytes();
let tag_lower = tag_name.to_ascii_lowercase();
let tag_bytes = tag_lower.as_bytes();
let mut depth: i32 = 1;
let mut i = 0usize;
while i < bytes.len() {
if bytes[i] != b'<' {
i += 1;
continue;
}
let after = i + 1;
let is_close = after < bytes.len() && bytes[after] == b'/';
let name_start = if is_close { after + 1 } else { after };
let matched = name_start + tag_bytes.len() <= bytes.len()
&& &lower_bytes[name_start..name_start + tag_bytes.len()] == tag_bytes;
let after_name = name_start + tag_bytes.len();
let is_boundary = matched
&& matches!(
bytes.get(after_name).copied(),
Some(b' ' | b'\t' | b'\n' | b'\r' | b'>' | b'/') | None
);
let mut j = if matched { after_name } else { after };
let mut quote: Option<u8> = None;
let mut self_close = false;
let mut found_gt = false;
while j < bytes.len() {
let b = bytes[j];
match (quote, b) {
(Some(q), x) if x == q => quote = None,
(None, b'"') | (None, b'\'') => quote = Some(b),
(None, b'>') => {
found_gt = true;
if j > i + 1 && bytes[j - 1] == b'/' {
self_close = true;
}
break;
}
_ => {}
}
j += 1;
}
if matched && is_boundary {
if is_close {
depth -= 1;
if depth == 0 && found_gt {
return Some((i, j + 1));
}
} else if !self_close {
depth += 1;
}
}
if found_gt {
i = j + 1;
} else {
break;
}
}
None
}
fn split_close_marker_end(close_part: &str, tag_name: &str) -> Option<usize> {
let prefix_len = 2 + tag_name.len();
let bytes = close_part.as_bytes();
if bytes.len() < prefix_len
|| bytes[0] != b'<'
|| bytes[1] != b'/'
|| !bytes[2..prefix_len].eq_ignore_ascii_case(tag_name.as_bytes())
{
return None;
}
let mut i = prefix_len;
let mut quote: Option<u8> = None;
while i < bytes.len() {
match (quote, bytes[i]) {
(None, b'"') | (None, b'\'') => quote = Some(bytes[i]),
(Some(q), b2) if b2 == q => quote = None,
(None, b'>') => return Some(i + 1),
_ => {}
}
i += 1;
}
None
}
fn try_split_close_line<'a>(line: &'a str, tag_name: &str) -> Option<(&'a str, &'a str)> {
let (opens, closes) = count_tag_balance(line, tag_name);
if opens != 0 || closes != 1 {
return None;
}
let needle = format!("</{}", tag_name);
let lower = line.to_ascii_lowercase();
let close_lt = lower.find(&needle)?;
Some((&line[..close_lt], &line[close_lt..]))
}
fn try_split_close_line_depth_aware<'a>(
line: &'a str,
tag_name: &str,
) -> Option<(&'a str, &'a str)> {
let (close_start, _close_end) = matched_close_offset(line, tag_name)?;
Some((&line[..close_start], &line[close_start..]))
}
fn emit_open_tag_tokens<'a>(
builder: &mut GreenNodeBuilder<'static>,
line: &'a str,
tag_name: &str,
lift_trailing: bool,
) -> &'a str {
let bytes = line.as_bytes();
let indent_end = bytes.iter().position(|&b| b != b' ').unwrap_or(bytes.len());
if indent_end > 0 {
builder.token(SyntaxKind::WHITESPACE.into(), &line[..indent_end]);
}
let rest = &line[indent_end..];
let prefix_len = 1 + tag_name.len();
if !rest.starts_with('<')
|| rest.len() < prefix_len
|| !rest.as_bytes()[1..prefix_len].eq_ignore_ascii_case(tag_name.as_bytes())
{
builder.token(SyntaxKind::TEXT.into(), rest);
return "";
}
let after_name = &rest[prefix_len..];
let after_name_bytes = after_name.as_bytes();
let mut i = 0usize;
let mut quote: Option<u8> = None;
let mut tag_close: Option<usize> = None;
while i < after_name_bytes.len() {
let b = after_name_bytes[i];
match (quote, b) {
(None, b'"') | (None, b'\'') => quote = Some(b),
(Some(q), b2) if b2 == q => quote = None,
(None, b'>') => {
tag_close = Some(i);
break;
}
_ => {}
}
i += 1;
}
let Some(tag_close) = tag_close else {
builder.token(SyntaxKind::TEXT.into(), rest);
return "";
};
let attrs_inner = &after_name[..tag_close];
let ws_end = attrs_inner
.as_bytes()
.iter()
.position(|&b| !matches!(b, b' ' | b'\t'))
.unwrap_or(attrs_inner.len());
let leading_ws = &attrs_inner[..ws_end];
let attrs_after_ws = &attrs_inner[ws_end..];
let mut attr_end = attrs_after_ws.len();
let attr_bytes = attrs_after_ws.as_bytes();
let mut self_close_start = attr_end;
if attr_end > 0 && attr_bytes[attr_end - 1] == b'/' {
self_close_start = attr_end - 1;
attr_end = self_close_start;
while attr_end > 0 && matches!(attr_bytes[attr_end - 1], b' ' | b'\t') {
attr_end -= 1;
}
}
let attrs_text = &attrs_after_ws[..attr_end];
let trailing_text = &attrs_after_ws[attr_end..self_close_start.max(attr_end)];
let after_self_close = &attrs_after_ws[self_close_start..];
builder.token(SyntaxKind::TEXT.into(), &rest[..prefix_len]);
if !leading_ws.is_empty() {
builder.token(SyntaxKind::WHITESPACE.into(), leading_ws);
}
if !attrs_text.is_empty() {
builder.start_node(SyntaxKind::HTML_ATTRS.into());
builder.token(SyntaxKind::TEXT.into(), attrs_text);
builder.finish_node();
}
if !trailing_text.is_empty() {
builder.token(SyntaxKind::WHITESPACE.into(), trailing_text);
}
if !after_self_close.is_empty() {
builder.token(SyntaxKind::TEXT.into(), after_self_close);
}
builder.token(SyntaxKind::TEXT.into(), ">");
let after_gt = &after_name[tag_close + 1..];
if lift_trailing {
return after_gt;
}
if !after_gt.is_empty() {
builder.token(SyntaxKind::TEXT.into(), after_gt);
}
""
}
fn find_multiline_open_end(
lines: &[&str],
start_pos: usize,
first_inner: &str,
tag_name: &str,
bq_depth: usize,
) -> Option<usize> {
let trimmed = strip_leading_spaces(first_inner);
let prefix_len = 1 + tag_name.len();
if !trimmed.starts_with('<')
|| trimmed.len() < prefix_len
|| !trimmed[1..prefix_len].eq_ignore_ascii_case(tag_name)
{
return None;
}
let leading_indent = first_inner.len() - trimmed.len();
let mut i = leading_indent + prefix_len; let mut quote: Option<u8> = None;
let line0_bytes = first_inner.as_bytes();
while i < line0_bytes.len() {
match (quote, line0_bytes[i]) {
(None, b'"') | (None, b'\'') => quote = Some(line0_bytes[i]),
(Some(q), x) if x == q => quote = None,
(None, b'>') => return None, _ => {}
}
i += 1;
}
let mut line_idx = start_pos + 1;
while line_idx < lines.len() {
let raw = lines[line_idx];
let inner = if bq_depth > 0 {
strip_n_blockquote_markers(raw, bq_depth)
} else {
raw
};
for &b in inner.as_bytes() {
match (quote, b) {
(None, b'"') | (None, b'\'') => quote = Some(b),
(Some(q), x) if x == q => quote = None,
(None, b'>') => return Some(line_idx),
_ => {}
}
}
line_idx += 1;
}
None
}
pub(crate) fn pandoc_html_open_tag_closes(
lines: &[&str],
start_pos: usize,
bq_depth: usize,
) -> bool {
if start_pos >= lines.len() {
return false;
}
let mut quote: Option<u8> = None;
for (offset, line) in lines.iter().enumerate().skip(start_pos) {
let inner = if bq_depth > 0 {
strip_n_blockquote_markers(line, bq_depth)
} else {
line
};
let bytes = inner.as_bytes();
let mut i = 0usize;
if offset == start_pos {
while i < bytes.len() && bytes[i] == b' ' {
i += 1;
}
if bytes.get(i) != Some(&b'<') {
return false;
}
i += 1;
}
while i < bytes.len() {
match (quote, bytes[i]) {
(None, b'"') | (None, b'\'') => quote = Some(bytes[i]),
(Some(q), x) if x == q => quote = None,
(None, b'>') => return true,
_ => {}
}
i += 1;
}
}
false
}
#[allow(clippy::too_many_arguments)]
fn emit_multiline_open_tag_with_attrs(
builder: &mut GreenNodeBuilder<'static>,
lines: &[&str],
start_pos: usize,
end_line_idx: usize,
tag_name: &str,
bq_depth: usize,
lift_trailing: bool,
pre_content: &mut String,
) {
let prefix_len = 1 + tag_name.len();
for (line_idx, raw) in lines
.iter()
.enumerate()
.take(end_line_idx + 1)
.skip(start_pos)
{
let stripped = if bq_depth > 0 {
strip_n_blockquote_markers(raw, bq_depth)
} else {
raw
};
let bq_prefix_len = raw.len() - stripped.len();
if bq_prefix_len > 0 && line_idx != start_pos {
emit_bq_prefix_tokens(builder, &raw[..bq_prefix_len]);
}
let line = stripped;
let (line_no_nl, newline_str) = strip_newline(line);
if line_idx == start_pos {
let bytes = line_no_nl.as_bytes();
let indent_end = bytes.iter().position(|&b| b != b' ').unwrap_or(bytes.len());
if indent_end > 0 {
builder.token(SyntaxKind::WHITESPACE.into(), &line_no_nl[..indent_end]);
}
let after_indent = &line_no_nl[indent_end..];
if after_indent.len() >= prefix_len {
builder.token(SyntaxKind::TEXT.into(), &after_indent[..prefix_len]);
let rest = &after_indent[prefix_len..];
emit_attr_region(builder, rest);
} else {
builder.token(SyntaxKind::TEXT.into(), after_indent);
}
} else if line_idx < end_line_idx {
let bytes = line_no_nl.as_bytes();
let indent_end = bytes
.iter()
.position(|&b| !matches!(b, b' ' | b'\t'))
.unwrap_or(bytes.len());
if indent_end > 0 {
builder.token(SyntaxKind::WHITESPACE.into(), &line_no_nl[..indent_end]);
}
let attrs_text = &line_no_nl[indent_end..];
if !attrs_text.is_empty() {
builder.start_node(SyntaxKind::HTML_ATTRS.into());
builder.token(SyntaxKind::TEXT.into(), attrs_text);
builder.finish_node();
}
} else {
let bytes = line_no_nl.as_bytes();
let indent_end = bytes
.iter()
.position(|&b| !matches!(b, b' ' | b'\t'))
.unwrap_or(bytes.len());
if indent_end > 0 {
builder.token(SyntaxKind::WHITESPACE.into(), &line_no_nl[..indent_end]);
}
let mut quote: Option<u8> = None;
let mut gt_pos: Option<usize> = None;
for (j, &b) in line_no_nl.as_bytes()[indent_end..].iter().enumerate() {
let actual_j = indent_end + j;
match (quote, b) {
(None, b'"') | (None, b'\'') => quote = Some(b),
(Some(q), x) if x == q => quote = None,
(None, b'>') => {
gt_pos = Some(actual_j);
break;
}
_ => {}
}
}
let Some(gt) = gt_pos else {
builder.token(SyntaxKind::TEXT.into(), &line_no_nl[indent_end..]);
if !newline_str.is_empty() {
builder.token(SyntaxKind::NEWLINE.into(), newline_str);
}
continue;
};
let attrs_region = &line_no_nl[indent_end..gt];
let region_bytes = attrs_region.as_bytes();
let mut attr_end = region_bytes.len();
while attr_end > 0 && matches!(region_bytes[attr_end - 1], b' ' | b'\t') {
attr_end -= 1;
}
let attrs_text = &attrs_region[..attr_end];
let trailing_ws = &attrs_region[attr_end..];
if !attrs_text.is_empty() {
builder.start_node(SyntaxKind::HTML_ATTRS.into());
builder.token(SyntaxKind::TEXT.into(), attrs_text);
builder.finish_node();
}
if !trailing_ws.is_empty() {
builder.token(SyntaxKind::WHITESPACE.into(), trailing_ws);
}
builder.token(SyntaxKind::TEXT.into(), ">");
let after_gt = &line_no_nl[gt + 1..];
if lift_trailing && !after_gt.is_empty() {
pre_content.push_str(after_gt);
pre_content.push_str(newline_str);
continue;
}
if !after_gt.is_empty() {
builder.token(SyntaxKind::TEXT.into(), after_gt);
}
}
if !newline_str.is_empty() {
builder.token(SyntaxKind::NEWLINE.into(), newline_str);
}
}
}
fn emit_multiline_open_tag_simple(
builder: &mut GreenNodeBuilder<'static>,
lines: &[&str],
start_pos: usize,
end_line_idx: usize,
bq_depth: usize,
) {
for (line_idx, raw) in lines
.iter()
.enumerate()
.take(end_line_idx + 1)
.skip(start_pos)
{
let stripped = if bq_depth > 0 {
strip_n_blockquote_markers(raw, bq_depth)
} else {
raw
};
let bq_prefix_len = raw.len() - stripped.len();
if bq_prefix_len > 0 && line_idx != start_pos {
emit_bq_prefix_tokens(builder, &raw[..bq_prefix_len]);
}
let (line_no_nl, newline_str) = strip_newline(stripped);
if !line_no_nl.is_empty() {
builder.token(SyntaxKind::TEXT.into(), line_no_nl);
}
if !newline_str.is_empty() {
builder.token(SyntaxKind::NEWLINE.into(), newline_str);
}
}
}
fn emit_attr_region(builder: &mut GreenNodeBuilder<'static>, region: &str) {
if region.is_empty() {
return;
}
let bytes = region.as_bytes();
let ws_end = bytes
.iter()
.position(|&b| !matches!(b, b' ' | b'\t'))
.unwrap_or(bytes.len());
if ws_end > 0 {
builder.token(SyntaxKind::WHITESPACE.into(), ®ion[..ws_end]);
}
let attrs_text = ®ion[ws_end..];
if !attrs_text.is_empty() {
builder.start_node(SyntaxKind::HTML_ATTRS.into());
builder.token(SyntaxKind::TEXT.into(), attrs_text);
builder.finish_node();
}
}
fn emit_html_block_line(builder: &mut GreenNodeBuilder<'static>, line: &str, bq_depth: usize) {
let inner = if bq_depth > 0 {
let stripped = strip_n_blockquote_markers(line, bq_depth);
let prefix_len = line.len() - stripped.len();
if prefix_len > 0 {
for ch in line[..prefix_len].chars() {
if ch == '>' {
builder.token(SyntaxKind::BLOCK_QUOTE_MARKER.into(), ">");
} else {
let mut buf = [0u8; 4];
builder.token(SyntaxKind::WHITESPACE.into(), ch.encode_utf8(&mut buf));
}
}
}
stripped
} else {
line
};
let (line_without_newline, newline_str) = strip_newline(inner);
if !line_without_newline.is_empty() {
builder.token(SyntaxKind::TEXT.into(), line_without_newline);
}
if !newline_str.is_empty() {
builder.token(SyntaxKind::NEWLINE.into(), newline_str);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_try_parse_html_comment() {
assert_eq!(
try_parse_html_block_start("<!-- comment -->", false),
Some(HtmlBlockType::Comment)
);
assert_eq!(
try_parse_html_block_start(" <!-- comment -->", false),
Some(HtmlBlockType::Comment)
);
}
#[test]
fn test_try_parse_div_tag() {
assert_eq!(
try_parse_html_block_start("<div>", false),
Some(HtmlBlockType::BlockTag {
tag_name: "div".to_string(),
is_verbatim: false,
closed_by_blank_line: false,
depth_aware: true,
closes_at_open_tag: false,
is_closing: false,
})
);
assert_eq!(
try_parse_html_block_start("<div class=\"test\">", false),
Some(HtmlBlockType::BlockTag {
tag_name: "div".to_string(),
is_verbatim: false,
closed_by_blank_line: false,
depth_aware: true,
closes_at_open_tag: false,
is_closing: false,
})
);
}
#[test]
fn test_try_parse_script_tag() {
assert_eq!(
try_parse_html_block_start("<script>", false),
Some(HtmlBlockType::BlockTag {
tag_name: "script".to_string(),
is_verbatim: true,
closed_by_blank_line: false,
depth_aware: true,
closes_at_open_tag: false,
is_closing: false,
})
);
}
#[test]
fn test_try_parse_processing_instruction() {
assert_eq!(
try_parse_html_block_start("<?xml version=\"1.0\"?>", false),
Some(HtmlBlockType::ProcessingInstruction)
);
}
#[test]
fn test_try_parse_declaration() {
assert_eq!(
try_parse_html_block_start("<!DOCTYPE html>", true),
Some(HtmlBlockType::Declaration)
);
assert_eq!(
try_parse_html_block_start("<!doctype html>", true),
Some(HtmlBlockType::Declaration)
);
assert_eq!(try_parse_html_block_start("<!DOCTYPE html>", false), None);
assert_eq!(try_parse_html_block_start("<!doctype html>", false), None);
}
#[test]
fn test_dialect_specific_block_tag_membership() {
for cm_only in [
"<dialog>",
"<legend>",
"<menuitem>",
"<optgroup>",
"<option>",
"<frame>",
"<base>",
"<basefont>",
"<link>",
"<param>",
] {
assert!(
matches!(
try_parse_html_block_start(cm_only, true),
Some(HtmlBlockType::BlockTag { .. })
),
"{cm_only} should be a block-tag start under CommonMark",
);
assert_eq!(
try_parse_html_block_start(cm_only, false),
None,
"{cm_only} should NOT be a block-tag start under Pandoc",
);
}
for pandoc_only in ["<canvas>", "<hgroup>", "<isindex>", "<meta>", "<output>"] {
assert!(
!matches!(
try_parse_html_block_start(pandoc_only, true),
Some(HtmlBlockType::BlockTag { .. })
),
"{pandoc_only} should NOT be a type-6 block-tag start under CommonMark",
);
assert!(
matches!(
try_parse_html_block_start(pandoc_only, false),
Some(HtmlBlockType::BlockTag { .. })
),
"{pandoc_only} should be a block-tag start under Pandoc",
);
}
}
#[test]
fn test_pandoc_inline_block_tag_membership() {
for tag in [
"<button>",
"<iframe>",
"<video>",
"<audio>",
"<noscript>",
"<object>",
"<map>",
"<progress>",
"<del>",
"<ins>",
"<svg>",
"<applet>",
] {
assert!(
matches!(
try_parse_html_block_start(tag, false),
Some(HtmlBlockType::BlockTag {
depth_aware: true,
..
})
),
"{tag} should be a depth-aware block-tag start under Pandoc",
);
}
for closing in ["</button>", "</iframe>", "</video>", "</audio>"] {
assert!(
matches!(
try_parse_html_block_start(closing, false),
Some(HtmlBlockType::BlockTag {
depth_aware: false,
closes_at_open_tag: true,
..
})
),
"{closing} (closing form) should be a single-line block-tag start under Pandoc",
);
}
}
#[test]
fn test_pandoc_void_block_tag_membership() {
for tag in [
"<area>",
"<embed>",
"<source>",
"<track>",
"<embed src=\"foo.swf\">",
"<source src=\"foo.mp4\" type=\"video/mp4\">",
] {
assert!(
matches!(
try_parse_html_block_start(tag, false),
Some(HtmlBlockType::BlockTag {
depth_aware: false,
closes_at_open_tag: true,
..
})
),
"{tag} should be a void block-tag start under Pandoc",
);
}
for closing in ["</area>", "</embed>", "</source>", "</track>"] {
assert!(
matches!(
try_parse_html_block_start(closing, false),
Some(HtmlBlockType::BlockTag {
depth_aware: false,
closes_at_open_tag: true,
..
})
),
"{closing} (closing form) should be a single-line void block-tag start under Pandoc",
);
}
assert_eq!(
try_parse_html_block_start("<embed>", true),
Some(HtmlBlockType::Type7)
);
assert_eq!(
try_parse_html_block_start("<area>", true),
Some(HtmlBlockType::Type7)
);
assert!(matches!(
try_parse_html_block_start("<source src=\"x\">", true),
Some(HtmlBlockType::BlockTag {
closed_by_blank_line: true,
closes_at_open_tag: false,
..
})
));
assert!(matches!(
try_parse_html_block_start("<track src=\"x\">", true),
Some(HtmlBlockType::BlockTag {
closed_by_blank_line: true,
closes_at_open_tag: false,
..
})
));
}
#[test]
fn test_find_multiline_open_end() {
assert_eq!(
find_multiline_open_end(&["<div id=\"x\">"], 0, "<div id=\"x\">", "div", 0),
None
);
assert_eq!(
find_multiline_open_end(&["<embed src=\"x\">"], 0, "<embed src=\"x\">", "embed", 0),
None
);
assert_eq!(
find_multiline_open_end(&["<embed", " src=\"x\">"], 0, "<embed", "embed", 0),
Some(1)
);
assert_eq!(
find_multiline_open_end(
&["<embed", " src=\"x\"", " type=\"video\">"],
0,
"<embed",
"embed",
0
),
Some(2)
);
assert_eq!(
find_multiline_open_end(&["<embed", " src=\"x\">"], 0, "<embed", "div", 0),
None
);
assert_eq!(
find_multiline_open_end(&["<EMBED", " src=\"x\">"], 0, "<EMBED", "embed", 0),
Some(1)
);
assert_eq!(
find_multiline_open_end(
&["<embed title=\"a>b", " c\">"],
0,
"<embed title=\"a>b",
"embed",
0
),
Some(1)
);
assert_eq!(
find_multiline_open_end(&["<embed", " src=\"x\""], 0, "<embed", "embed", 0),
None
);
assert_eq!(
find_multiline_open_end(&["<div", "> id=\"x\">"], 0, "<div", "div", 1),
Some(1)
);
assert_eq!(
find_multiline_open_end(
&["<section", "> > id=\"x\">"],
0,
"<section",
"section",
2
),
Some(1)
);
}
#[test]
fn test_pandoc_html_open_tag_closes() {
assert!(pandoc_html_open_tag_closes(&["<div>"], 0, 0));
assert!(pandoc_html_open_tag_closes(&["<embed src=\"x\">"], 0, 0));
assert!(pandoc_html_open_tag_closes(
&["<div", " id=\"x\">", "body", "</div>"],
0,
0
));
assert!(pandoc_html_open_tag_closes(
&["<embed", " src=\"x.png\" alt=\"y\">"],
0,
0
));
assert!(!pandoc_html_open_tag_closes(
&["<div title=\"a>b", " c\""],
0,
0
));
assert!(pandoc_html_open_tag_closes(
&["<div title=\"a>b", " c\">"],
0,
0
));
assert!(!pandoc_html_open_tag_closes(&["<embed"], 0, 0));
assert!(!pandoc_html_open_tag_closes(&["<div", "foo", "bar"], 0, 0));
assert!(pandoc_html_open_tag_closes(
&["<div", "", "id=\"x\">"],
0,
0
));
}
#[test]
fn test_try_parse_cdata() {
assert_eq!(
try_parse_html_block_start("<![CDATA[content]]>", true),
Some(HtmlBlockType::CData)
);
assert_eq!(
try_parse_html_block_start("<![CDATA[content]]>", false),
None
);
}
#[test]
fn test_extract_block_tag_name_open_only() {
assert_eq!(
extract_block_tag_name("<div>", false),
Some("div".to_string())
);
assert_eq!(
extract_block_tag_name("<div class=\"test\">", false),
Some("div".to_string())
);
assert_eq!(
extract_block_tag_name("<div/>", false),
Some("div".to_string())
);
assert_eq!(extract_block_tag_name("</div>", false), None);
assert_eq!(extract_block_tag_name("<>", false), None);
assert_eq!(extract_block_tag_name("< div>", false), None);
}
#[test]
fn test_extract_block_tag_name_with_closing() {
assert_eq!(
extract_block_tag_name("</div>", true),
Some("div".to_string())
);
assert_eq!(
extract_block_tag_name("</div >", true),
Some("div".to_string())
);
}
#[test]
fn test_commonmark_type6_closing_tag_start() {
assert_eq!(
try_parse_html_block_start("</div>", true),
Some(HtmlBlockType::BlockTag {
tag_name: "div".to_string(),
is_verbatim: false,
closed_by_blank_line: true,
depth_aware: false,
closes_at_open_tag: false,
is_closing: true,
})
);
}
#[test]
fn test_commonmark_type7_open_tag() {
assert_eq!(
try_parse_html_block_start("<a href=\"foo\">", true),
Some(HtmlBlockType::Type7)
);
assert_eq!(try_parse_html_block_start("<a href=\"foo\">", false), None);
}
#[test]
fn test_commonmark_type7_close_tag() {
assert_eq!(
try_parse_html_block_start("</ins>", true),
Some(HtmlBlockType::Type7)
);
}
#[test]
fn test_commonmark_type7_rejects_with_trailing_text() {
assert_eq!(try_parse_html_block_start("<a> hi", true), None);
}
#[test]
fn test_is_closing_marker_comment() {
let block_type = HtmlBlockType::Comment;
assert!(is_closing_marker("-->", &block_type));
assert!(is_closing_marker("end -->", &block_type));
assert!(!is_closing_marker("<!--", &block_type));
}
#[test]
fn test_is_closing_marker_tag() {
let block_type = HtmlBlockType::BlockTag {
tag_name: "div".to_string(),
is_verbatim: false,
closed_by_blank_line: false,
depth_aware: false,
closes_at_open_tag: false,
is_closing: false,
};
assert!(is_closing_marker("</div>", &block_type));
assert!(is_closing_marker("</DIV>", &block_type)); assert!(is_closing_marker("content</div>", &block_type));
assert!(!is_closing_marker("<div>", &block_type));
}
#[test]
fn test_parse_html_comment_block() {
let input = "<!-- comment -->\n";
let lines: Vec<&str> = crate::parser::utils::helpers::split_lines_inclusive(input);
let mut builder = GreenNodeBuilder::new();
let block_type = try_parse_html_block_start(lines[0], false).unwrap();
let opts = ParserOptions::default();
let new_pos = parse_html_block_with_wrapper(
&mut builder,
&lines,
0,
block_type,
0,
SyntaxKind::HTML_BLOCK,
&opts,
);
assert_eq!(new_pos, 1);
}
#[test]
fn test_parse_div_block() {
let input = "<div>\ncontent\n</div>\n";
let lines: Vec<&str> = crate::parser::utils::helpers::split_lines_inclusive(input);
let mut builder = GreenNodeBuilder::new();
let block_type = try_parse_html_block_start(lines[0], false).unwrap();
let opts = ParserOptions::default();
let new_pos = parse_html_block_with_wrapper(
&mut builder,
&lines,
0,
block_type,
0,
SyntaxKind::HTML_BLOCK,
&opts,
);
assert_eq!(new_pos, 3);
}
#[test]
fn test_parse_html_block_no_closing() {
let input = "<div>\ncontent\n";
let lines: Vec<&str> = crate::parser::utils::helpers::split_lines_inclusive(input);
let mut builder = GreenNodeBuilder::new();
let block_type = try_parse_html_block_start(lines[0], false).unwrap();
let opts = ParserOptions::default();
let new_pos = parse_html_block_with_wrapper(
&mut builder,
&lines,
0,
block_type,
0,
SyntaxKind::HTML_BLOCK,
&opts,
);
assert_eq!(new_pos, 2);
}
#[test]
fn test_parse_div_block_nested_pandoc() {
let input =
"<div id=\"outer\">\n\n<div id=\"inner\">\n\ndeep content\n\n</div>\n\n</div>\n";
let lines: Vec<&str> = crate::parser::utils::helpers::split_lines_inclusive(input);
let mut builder = GreenNodeBuilder::new();
let block_type = try_parse_html_block_start(lines[0], false).unwrap();
let opts = ParserOptions::default();
let new_pos = parse_html_block_with_wrapper(
&mut builder,
&lines,
0,
block_type,
0,
SyntaxKind::HTML_BLOCK_DIV,
&opts,
);
assert_eq!(new_pos, 9);
}
#[test]
fn test_parse_div_block_same_line_pandoc() {
let input = "<div>foo</div>\n";
let lines: Vec<&str> = crate::parser::utils::helpers::split_lines_inclusive(input);
let mut builder = GreenNodeBuilder::new();
let block_type = try_parse_html_block_start(lines[0], false).unwrap();
let opts = ParserOptions::default();
let new_pos = parse_html_block_with_wrapper(
&mut builder,
&lines,
0,
block_type,
0,
SyntaxKind::HTML_BLOCK_DIV,
&opts,
);
assert_eq!(new_pos, 1);
}
#[test]
fn test_commonmark_verbatim_first_close() {
let input = "<script>\nlet x = '<script>';\n</script>\n";
let lines: Vec<&str> = crate::parser::utils::helpers::split_lines_inclusive(input);
let mut builder = GreenNodeBuilder::new();
let block_type = try_parse_html_block_start(lines[0], true).unwrap();
let opts = ParserOptions::default();
let new_pos = parse_html_block_with_wrapper(
&mut builder,
&lines,
0,
block_type,
0,
SyntaxKind::HTML_BLOCK,
&opts,
);
assert_eq!(new_pos, 3);
}
#[test]
fn test_parse_div_block_multiline_open_close_separate_line_pandoc() {
let input = "<div\n id=\"x\"\n class=\"y\"\n>\n\nfoo\n\n</div>\n";
let lines: Vec<&str> = crate::parser::utils::helpers::split_lines_inclusive(input);
let mut builder = GreenNodeBuilder::new();
let block_type = try_parse_html_block_start(lines[0], false).unwrap();
let opts = ParserOptions::default();
let new_pos = parse_html_block_with_wrapper(
&mut builder,
&lines,
0,
block_type,
0,
SyntaxKind::HTML_BLOCK_DIV,
&opts,
);
assert_eq!(new_pos, 8);
let green = builder.finish();
let root = crate::syntax::SyntaxNode::new_root(green);
let attrs_count = root
.descendants()
.filter(|n| n.kind() == SyntaxKind::HTML_ATTRS)
.count();
assert!(attrs_count >= 1, "expected at least one HTML_ATTRS node");
let collected: String = root
.descendants_with_tokens()
.filter_map(|n| n.into_token())
.map(|t| t.text().to_string())
.collect();
assert_eq!(collected, input);
}
#[test]
fn test_parse_div_block_multiline_open_close_inline_pandoc() {
let input = "<div\n id=\"x\"\n class=\"y\">\nfoo\n</div>\n";
let lines: Vec<&str> = crate::parser::utils::helpers::split_lines_inclusive(input);
let mut builder = GreenNodeBuilder::new();
let block_type = try_parse_html_block_start(lines[0], false).unwrap();
let opts = ParserOptions::default();
let new_pos = parse_html_block_with_wrapper(
&mut builder,
&lines,
0,
block_type,
0,
SyntaxKind::HTML_BLOCK_DIV,
&opts,
);
assert_eq!(new_pos, 5);
let green = builder.finish();
let root = crate::syntax::SyntaxNode::new_root(green);
let attrs_count = root
.descendants()
.filter(|n| n.kind() == SyntaxKind::HTML_ATTRS)
.count();
assert!(attrs_count >= 1, "expected at least one HTML_ATTRS node");
let collected: String = root
.descendants_with_tokens()
.filter_map(|n| n.into_token())
.map(|t| t.text().to_string())
.collect();
assert_eq!(collected, input);
}
#[test]
fn test_commonmark_type6_blank_line_terminates() {
let input = "<div>\nfoo\n\nbar\n";
let lines: Vec<&str> = crate::parser::utils::helpers::split_lines_inclusive(input);
let mut builder = GreenNodeBuilder::new();
let block_type = try_parse_html_block_start(lines[0], true).unwrap();
let opts = ParserOptions::default();
let new_pos = parse_html_block_with_wrapper(
&mut builder,
&lines,
0,
block_type,
0,
SyntaxKind::HTML_BLOCK,
&opts,
);
assert_eq!(new_pos, 2);
}
}