use crate::options::{Dialect, ParserOptions};
use crate::parser::blocks::headings::{emit_atx_heading, try_parse_atx_heading};
use crate::parser::blocks::horizontal_rules::{emit_horizontal_rule, try_parse_horizontal_rule};
use crate::parser::blocks::html_blocks::{
HtmlBlockType, count_tag_balance, is_pandoc_matched_pair_tag, try_parse_html_block_start,
};
use crate::parser::utils::inline_emission;
use crate::parser::utils::text_buffer::ParagraphBuffer;
use crate::syntax::{SyntaxKind, SyntaxNode};
use rowan::{GreenNodeBuilder, TextSize};
#[derive(Debug, Clone)]
pub(crate) enum ListItemContent {
Text(String),
BlockquoteMarker {
leading_spaces: usize,
has_trailing_space: bool,
},
}
#[derive(Debug, Default, Clone)]
pub(crate) struct ListItemBuffer {
segments: Vec<ListItemContent>,
}
impl ListItemBuffer {
pub(crate) fn new() -> Self {
Self {
segments: Vec::new(),
}
}
pub(crate) fn push_text(&mut self, text: impl Into<String>) {
let text = text.into();
if text.is_empty() {
return;
}
self.segments.push(ListItemContent::Text(text));
}
pub(crate) fn push_blockquote_marker(
&mut self,
leading_spaces: usize,
has_trailing_space: bool,
) {
self.segments.push(ListItemContent::BlockquoteMarker {
leading_spaces,
has_trailing_space,
});
}
pub(crate) fn is_empty(&self) -> bool {
self.segments.is_empty()
}
pub(crate) fn segment_count(&self) -> usize {
self.segments.len()
}
pub(crate) fn first_text(&self) -> Option<&str> {
match self.segments.first()? {
ListItemContent::Text(t) => Some(t.as_str()),
ListItemContent::BlockquoteMarker { .. } => None,
}
}
pub(crate) fn unclosed_pandoc_matched_pair_tag(
&self,
config: &ParserOptions,
) -> Option<String> {
if config.dialect != Dialect::Pandoc {
return None;
}
let first = self.first_text()?;
let first_line_with_nl = first.split_inclusive('\n').next()?;
let first_line_no_nl = first_line_with_nl
.strip_suffix("\r\n")
.or_else(|| first_line_with_nl.strip_suffix('\n'))
.unwrap_or(first_line_with_nl);
let HtmlBlockType::BlockTag {
tag_name,
is_closing: false,
..
} = try_parse_html_block_start(first_line_no_nl, false)?
else {
return None;
};
if !is_pandoc_matched_pair_tag(&tag_name) {
return None;
}
let mut opens = 0usize;
let mut closes = 0usize;
for segment in &self.segments {
if let ListItemContent::Text(t) = segment {
let (o, c) = count_tag_balance(t, &tag_name);
opens += o;
closes += c;
}
}
if opens > closes { Some(tag_name) } else { None }
}
pub(crate) fn has_blank_lines_between_content(&self) -> bool {
log::trace!(
"has_blank_lines_between_content: segments={} result=false",
self.segments.len()
);
false
}
fn get_text_for_parsing(&self) -> String {
let mut result = String::new();
for segment in &self.segments {
if let ListItemContent::Text(text) = segment {
result.push_str(text);
}
}
result
}
fn to_paragraph_buffer(&self) -> ParagraphBuffer {
let mut paragraph_buffer = ParagraphBuffer::new();
for segment in &self.segments {
match segment {
ListItemContent::Text(text) => paragraph_buffer.push_text(text),
ListItemContent::BlockquoteMarker {
leading_spaces,
has_trailing_space,
} => paragraph_buffer.push_marker(*leading_spaces, *has_trailing_space),
}
}
paragraph_buffer
}
pub(crate) fn emit_as_block(
&self,
builder: &mut GreenNodeBuilder<'static>,
use_paragraph: bool,
config: &ParserOptions,
content_col: usize,
suppress_footnote_refs: bool,
) {
if self.is_empty() {
return;
}
let text = self.get_text_for_parsing();
if !text.is_empty() {
let line_without_newline = text
.strip_suffix("\r\n")
.or_else(|| text.strip_suffix('\n'));
if let Some(line) = line_without_newline
&& !line.contains('\n')
&& !line.contains('\r')
{
if let Some(level) = try_parse_atx_heading(line) {
emit_atx_heading(builder, &text, level, config);
return;
}
if try_parse_horizontal_rule(line).is_some() {
emit_horizontal_rule(builder, &text);
return;
}
}
if self
.segments
.iter()
.all(|s| matches!(s, ListItemContent::Text(_)))
&& let Some(first_nl) = text.find('\n')
{
let first_line = &text[..first_nl];
let after_first = &text[first_nl + 1..];
if !after_first.is_empty()
&& let Some(level) = try_parse_atx_heading(first_line)
{
let heading_bytes = &text[..first_nl + 1];
emit_atx_heading(builder, heading_bytes, level, config);
let block_kind = if use_paragraph {
SyntaxKind::PARAGRAPH
} else {
SyntaxKind::PLAIN
};
builder.start_node(block_kind.into());
inline_emission::emit_inlines(
builder,
after_first,
config,
suppress_footnote_refs,
);
builder.finish_node();
return;
}
}
if config.dialect == Dialect::Pandoc
&& self
.segments
.iter()
.all(|s| matches!(s, ListItemContent::Text(_)))
&& try_emit_html_block_lift(builder, &text, config, content_col, use_paragraph)
{
return;
}
}
let block_kind = if use_paragraph {
SyntaxKind::PARAGRAPH
} else {
SyntaxKind::PLAIN
};
builder.start_node(block_kind.into());
let paragraph_buffer = self.to_paragraph_buffer();
if !paragraph_buffer.is_empty() {
paragraph_buffer.emit_with_inlines(builder, config, suppress_footnote_refs);
} else if !text.is_empty() {
inline_emission::emit_inlines(builder, &text, config, suppress_footnote_refs);
}
builder.finish_node(); }
pub(crate) fn clear(&mut self) {
self.segments.clear();
}
}
fn try_emit_html_block_lift(
builder: &mut GreenNodeBuilder<'static>,
text: &str,
config: &ParserOptions,
content_col: usize,
use_paragraph: bool,
) -> bool {
let first_line = text.split_inclusive('\n').next().unwrap_or(text);
let first_line_no_nl = first_line
.strip_suffix("\r\n")
.or_else(|| first_line.strip_suffix('\n'))
.unwrap_or(first_line);
if try_parse_html_block_start(first_line_no_nl, false).is_none() {
return false;
}
let (parse_text, prefixes) = if content_col > 0 {
strip_list_item_indent(text, content_col)
} else {
(text.to_string(), Vec::new())
};
let refdefs = config.refdef_labels.clone().unwrap_or_default();
let inner_root = crate::parser::parse_with_refdefs(&parse_text, Some(config.clone()), refdefs);
let children: Vec<SyntaxNode> = inner_root.children().collect();
if children.is_empty() {
return false;
}
let first = &children[0];
if !matches!(
first.kind(),
SyntaxKind::HTML_BLOCK | SyntaxKind::HTML_BLOCK_DIV
) {
return false;
}
let total_end = children.last().unwrap().text_range().end();
if total_end != TextSize::of(parse_text.as_str()) {
return false;
}
let multi_child_trailing = if children.len() == 1 {
false
} else if children.len() == 2
&& matches!(
first.kind(),
SyntaxKind::HTML_BLOCK | SyntaxKind::HTML_BLOCK_DIV
)
&& children[1].kind() == SyntaxKind::PARAGRAPH
{
true
} else {
return false;
};
if first.kind() == SyntaxKind::HTML_BLOCK_DIV {
let html_block_tag_count = first
.children()
.filter(|c| c.kind() == SyntaxKind::HTML_BLOCK_TAG)
.count();
if html_block_tag_count < 2 {
return false;
}
}
let mut prefix_state = if prefixes.is_empty() {
None
} else {
Some(LinePrefixState {
prefixes,
line_idx: 0,
at_line_start: true,
})
};
if multi_child_trailing {
graft_node(builder, first, &mut prefix_state);
let trailing_kind = if use_paragraph {
SyntaxKind::PARAGRAPH
} else {
SyntaxKind::PLAIN
};
graft_node_retag_root(builder, &children[1], &mut prefix_state, trailing_kind);
} else {
graft_node(builder, first, &mut prefix_state);
}
true
}
fn graft_node_retag_root(
builder: &mut GreenNodeBuilder<'static>,
node: &SyntaxNode,
prefix: &mut Option<LinePrefixState>,
new_kind: SyntaxKind,
) {
builder.start_node(new_kind.into());
for child in node.children_with_tokens() {
match child {
rowan::NodeOrToken::Node(n) => graft_node(builder, &n, prefix),
rowan::NodeOrToken::Token(t) => {
emit_grafted_token(builder, t.kind(), t.text(), prefix);
}
}
}
builder.finish_node();
}
struct LinePrefixState {
prefixes: Vec<String>,
line_idx: usize,
at_line_start: bool,
}
fn strip_list_item_indent(text: &str, content_col: usize) -> (String, Vec<String>) {
let mut stripped = String::with_capacity(text.len());
let mut prefixes: Vec<String> = Vec::new();
for (i, line) in text.split_inclusive('\n').enumerate() {
if i == 0 {
prefixes.push(String::new());
stripped.push_str(line);
continue;
}
let mut consumed = 0usize;
let mut col = 0usize;
for &b in line.as_bytes() {
if col >= content_col {
break;
}
match b {
b' ' => {
col += 1;
consumed += 1;
}
b'\t' => {
let next = (col / 4 + 1) * 4;
if next > content_col {
break;
}
col = next;
consumed += 1;
}
_ => break,
}
}
prefixes.push(line[..consumed].to_string());
stripped.push_str(&line[consumed..]);
}
(stripped, prefixes)
}
fn graft_node(
builder: &mut GreenNodeBuilder<'static>,
node: &SyntaxNode,
prefix: &mut Option<LinePrefixState>,
) {
builder.start_node(node.kind().into());
for child in node.children_with_tokens() {
match child {
rowan::NodeOrToken::Node(n) => graft_node(builder, &n, prefix),
rowan::NodeOrToken::Token(t) => {
emit_grafted_token(builder, t.kind(), t.text(), prefix);
}
}
}
builder.finish_node();
}
fn emit_grafted_token(
builder: &mut GreenNodeBuilder<'static>,
kind: SyntaxKind,
text: &str,
prefix: &mut Option<LinePrefixState>,
) {
if let Some(state) = prefix.as_mut() {
if state.at_line_start {
if let Some(p) = state.prefixes.get(state.line_idx)
&& !p.is_empty()
{
builder.token(SyntaxKind::WHITESPACE.into(), p);
}
state.at_line_start = false;
}
builder.token(kind.into(), text);
if kind == SyntaxKind::NEWLINE || kind == SyntaxKind::BLANK_LINE {
state.line_idx += 1;
state.at_line_start = true;
}
} else {
builder.token(kind.into(), text);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_new_buffer_is_empty() {
let buffer = ListItemBuffer::new();
assert!(buffer.is_empty());
assert!(!buffer.has_blank_lines_between_content());
}
#[test]
fn test_push_single_text() {
let mut buffer = ListItemBuffer::new();
buffer.push_text("Hello, world!");
assert!(!buffer.is_empty());
assert!(!buffer.has_blank_lines_between_content());
assert_eq!(buffer.get_text_for_parsing(), "Hello, world!");
}
#[test]
fn test_push_multiple_text_segments() {
let mut buffer = ListItemBuffer::new();
buffer.push_text("Line 1\n");
buffer.push_text("Line 2\n");
buffer.push_text("Line 3");
assert_eq!(buffer.get_text_for_parsing(), "Line 1\nLine 2\nLine 3");
}
#[test]
fn test_clear_buffer() {
let mut buffer = ListItemBuffer::new();
buffer.push_text("Some text");
assert!(!buffer.is_empty());
buffer.clear();
assert!(buffer.is_empty());
assert_eq!(buffer.get_text_for_parsing(), "");
}
#[test]
fn test_empty_text_ignored() {
let mut buffer = ListItemBuffer::new();
buffer.push_text("");
assert!(buffer.is_empty());
}
}