use crate::options::ParserOptions;
use crate::syntax::SyntaxKind;
use rowan::GreenNodeBuilder;
use super::bookdown::{
try_parse_bookdown_definition, try_parse_bookdown_reference, try_parse_bookdown_text_reference,
};
use super::bracketed_spans::{emit_bracketed_span, try_parse_bracketed_span};
use super::citations::{
emit_bare_citation, emit_bracketed_citation, try_parse_bare_citation,
try_parse_bracketed_citation,
};
use super::code_spans::{emit_code_span, try_parse_code_span};
use super::emoji::{emit_emoji, try_parse_emoji};
use super::escapes::{EscapeType, emit_escape, try_parse_escape};
use super::inline_executable::{emit_inline_executable, try_parse_inline_executable};
use super::inline_footnotes::{
emit_footnote_reference, emit_inline_footnote, try_parse_footnote_reference,
try_parse_inline_footnote,
};
use super::latex::{parse_latex_command, try_parse_latex_command};
use super::links::{
emit_autolink, emit_bare_uri_link, emit_inline_image, emit_inline_link, emit_reference_image,
emit_reference_link, try_parse_autolink, try_parse_bare_uri, try_parse_inline_image,
try_parse_inline_link, try_parse_reference_image, try_parse_reference_link,
};
use super::math::{
emit_display_math, emit_display_math_environment, emit_double_backslash_display_math,
emit_double_backslash_inline_math, emit_gfm_inline_math, emit_inline_math,
emit_single_backslash_display_math, emit_single_backslash_inline_math, try_parse_display_math,
try_parse_double_backslash_display_math, try_parse_double_backslash_inline_math,
try_parse_gfm_inline_math, try_parse_inline_math, try_parse_math_environment,
try_parse_single_backslash_display_math, try_parse_single_backslash_inline_math,
};
use super::native_spans::{emit_native_span, try_parse_native_span};
use super::raw_inline::is_raw_inline;
use super::shortcodes::{emit_shortcode, try_parse_shortcode};
use super::strikeout::{emit_strikeout, try_parse_strikeout};
use super::subscript::{emit_subscript, try_parse_subscript};
use super::superscript::{emit_superscript, try_parse_superscript};
pub fn parse_inline_text_recursive(
builder: &mut GreenNodeBuilder,
text: &str,
config: &ParserOptions,
) {
log::debug!(
"Recursive inline parsing: {:?} ({} bytes)",
&text[..text.len().min(40)],
text.len()
);
parse_inline_range(text, 0, text.len(), config, builder);
log::debug!("Recursive inline parsing complete");
}
pub fn parse_inline_text(
builder: &mut GreenNodeBuilder,
text: &str,
config: &ParserOptions,
_allow_reference_links: bool,
) {
log::trace!(
"Parsing inline text (recursive): {:?} ({} bytes)",
&text[..text.len().min(40)],
text.len()
);
parse_inline_text_recursive(builder, text, config);
}
pub fn try_parse_emphasis(
text: &str,
pos: usize,
end: usize,
config: &ParserOptions,
builder: &mut GreenNodeBuilder,
) -> Option<(usize, usize)> {
let bytes = text.as_bytes();
if pos >= bytes.len() {
return None;
}
let delim_char = bytes[pos] as char;
if delim_char != '*' && delim_char != '_' {
return None;
}
let mut count = 0;
while pos + count < bytes.len() && bytes[pos + count] == bytes[pos] {
count += 1;
}
let after_pos = pos + count;
log::debug!(
"try_parse_emphasis: '{}' x {} at pos {}",
delim_char,
count,
pos
);
if after_pos < text.len()
&& let Some(next_char) = text[after_pos..].chars().next()
&& next_char.is_whitespace()
{
log::trace!("Delimiter followed by whitespace, treating as literal");
return None;
}
if delim_char == '_'
&& pos > 0
&& let Some(prev_char) = text[..pos].chars().last()
&& prev_char.is_alphanumeric()
{
log::trace!("Underscore preceded by alphanumeric, can't open (intraword)");
return None;
}
let result = match count {
1 => try_parse_one(text, pos, delim_char, end, config, builder),
2 => try_parse_two(text, pos, delim_char, end, config, builder),
3 => try_parse_three(text, pos, delim_char, end, config, builder),
_ => {
log::trace!("{} delimiters (4+), treating as literal", count);
None
}
};
result.map(|consumed| (consumed, count))
}
fn try_parse_emphasis_nested(
text: &str,
pos: usize,
end: usize,
config: &ParserOptions,
builder: &mut GreenNodeBuilder,
) -> Option<(usize, usize)> {
let bytes = text.as_bytes();
if pos >= bytes.len() {
return None;
}
let delim_char = bytes[pos] as char;
if delim_char != '*' && delim_char != '_' {
return None;
}
let mut count = 0;
while pos + count < bytes.len() && bytes[pos + count] == bytes[pos] {
count += 1;
}
log::debug!(
"try_parse_emphasis_nested: '{}' x {} at pos {}",
delim_char,
count,
pos
);
if delim_char == '_'
&& pos > 0
&& let Some(prev_char) = text[..pos].chars().last()
&& prev_char.is_alphanumeric()
{
log::trace!("Underscore preceded by alphanumeric, can't open (intraword)");
return None;
}
let result = match count {
1 => try_parse_one(text, pos, delim_char, end, config, builder),
2 => try_parse_two(text, pos, delim_char, end, config, builder),
3 => try_parse_three(text, pos, delim_char, end, config, builder),
_ => {
log::trace!("{} delimiters (4+), treating as literal", count);
None
}
};
result.map(|consumed| (consumed, count))
}
fn try_parse_three(
text: &str,
pos: usize,
delim_char: char,
end: usize,
config: &ParserOptions,
builder: &mut GreenNodeBuilder,
) -> Option<usize> {
let content_start = pos + 3;
let one = delim_char.to_string();
let two = one.repeat(2);
log::debug!("try_parse_three: '{}' x 3 at pos {}", delim_char, pos);
let mut search_pos = content_start;
loop {
let closer_start = match find_first_potential_ender(text, search_pos, delim_char, end) {
Some(p) => p,
None => {
log::trace!("No potential ender found for ***");
return None;
}
};
log::debug!("Potential ender at pos {}", closer_start);
let bytes = text.as_bytes();
let mut closer_count = 0;
let mut check_pos = closer_start;
while check_pos < bytes.len() && bytes[check_pos] == delim_char as u8 {
closer_count += 1;
check_pos += 1;
}
log::debug!(
"Found {} x {} at pos {}",
delim_char,
closer_count,
closer_start
);
if closer_count >= 3 && is_valid_ender(text, closer_start, delim_char, 3) {
log::debug!("Matched *** closer, emitting Strong[Emph[content]]");
builder.start_node(SyntaxKind::STRONG.into());
builder.token(SyntaxKind::STRONG_MARKER.into(), &two);
builder.start_node(SyntaxKind::EMPHASIS.into());
builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &one);
parse_inline_range_nested(text, content_start, closer_start, config, builder);
builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &one);
builder.finish_node();
builder.token(SyntaxKind::STRONG_MARKER.into(), &two);
builder.finish_node();
return Some(closer_start + 3 - pos);
}
if closer_count >= 2 && is_valid_ender(text, closer_start, delim_char, 2) {
log::debug!("Matched ** closer, wrapping as Strong and continuing with one");
let continue_pos = closer_start + 2;
if let Some(final_closer_pos) =
parse_until_closer_with_nested_two(text, continue_pos, delim_char, 1, end, config)
{
log::debug!(
"Found * closer at pos {}, emitting Emph[Strong[...], ...]",
final_closer_pos
);
builder.start_node(SyntaxKind::EMPHASIS.into());
builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &one);
builder.start_node(SyntaxKind::STRONG.into());
builder.token(SyntaxKind::STRONG_MARKER.into(), &two);
parse_inline_range_nested(text, content_start, closer_start, config, builder);
builder.token(SyntaxKind::STRONG_MARKER.into(), &two);
builder.finish_node();
parse_inline_range_nested(text, continue_pos, final_closer_pos, config, builder);
builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &one);
builder.finish_node();
return Some(final_closer_pos + 1 - pos);
}
log::debug!("No * closer found after **, emitting * + STRONG");
builder.token(SyntaxKind::TEXT.into(), &one);
builder.start_node(SyntaxKind::STRONG.into());
builder.token(SyntaxKind::STRONG_MARKER.into(), &two);
parse_inline_range_nested(text, content_start, closer_start, config, builder);
builder.token(SyntaxKind::STRONG_MARKER.into(), &two);
builder.finish_node();
return Some(closer_start + 2 - pos);
}
if closer_count >= 1 && is_valid_ender(text, closer_start, delim_char, 1) {
log::debug!("Matched * closer, wrapping as Emph and continuing with two");
let continue_pos = closer_start + 1;
if let Some(final_closer_pos) =
parse_until_closer_with_nested_one(text, continue_pos, delim_char, 2, end, config)
{
log::debug!(
"Found ** closer at pos {}, emitting Strong[Emph[...], ...]",
final_closer_pos
);
builder.start_node(SyntaxKind::STRONG.into());
builder.token(SyntaxKind::STRONG_MARKER.into(), &two);
builder.start_node(SyntaxKind::EMPHASIS.into());
builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &one);
parse_inline_range_nested(text, content_start, closer_start, config, builder);
builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &one);
builder.finish_node();
parse_inline_range_nested(text, continue_pos, final_closer_pos, config, builder);
builder.token(SyntaxKind::STRONG_MARKER.into(), &two);
builder.finish_node();
return Some(final_closer_pos + 2 - pos);
}
log::debug!("No ** closer found after *, emitting ** + EMPH");
builder.token(SyntaxKind::TEXT.into(), &two);
builder.start_node(SyntaxKind::EMPHASIS.into());
builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &one);
parse_inline_range_nested(text, content_start, closer_start, config, builder);
builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &one);
builder.finish_node();
return Some(closer_start + 1 - pos);
}
log::debug!(
"No valid ender at pos {}, continuing search from {}",
closer_start,
closer_start + closer_count
);
search_pos = closer_start + closer_count;
}
}
fn find_first_potential_ender(
text: &str,
start: usize,
delim_char: char,
end: usize,
) -> Option<usize> {
let bytes = text.as_bytes();
let mut pos = start;
while pos < end.min(text.len()) {
if bytes[pos] == delim_char as u8 {
let is_escaped = {
let mut backslash_count = 0;
let mut check_pos = pos;
while check_pos > 0 && bytes[check_pos - 1] == b'\\' {
backslash_count += 1;
check_pos -= 1;
}
backslash_count % 2 == 1
};
if !is_escaped {
return Some(pos);
}
}
pos += 1;
}
None
}
fn is_valid_ender(text: &str, pos: usize, delim_char: char, delim_count: usize) -> bool {
let bytes = text.as_bytes();
if pos + delim_count > text.len() {
return false;
}
for i in 0..delim_count {
if bytes[pos + i] != delim_char as u8 {
return false;
}
}
if pos > 0 && bytes[pos - 1] == delim_char as u8 {
return false;
}
let after_pos = pos + delim_count;
if after_pos < bytes.len() && bytes[after_pos] == delim_char as u8 {
return false;
}
if delim_char == '_' {
if pos > 0
&& let Some(prev_char) = text[..pos].chars().last()
&& prev_char.is_whitespace()
{
return false;
}
if after_pos < text.len()
&& let Some(next_char) = text[after_pos..].chars().next()
&& next_char.is_alphanumeric()
{
return false;
}
}
true
}
fn try_parse_two(
text: &str,
pos: usize,
delim_char: char,
end: usize,
config: &ParserOptions,
builder: &mut GreenNodeBuilder,
) -> Option<usize> {
let content_start = pos + 2;
log::debug!("try_parse_two: '{}' x 2 at pos {}", delim_char, pos);
if let Some(closer_pos) =
parse_until_closer_with_nested_one(text, content_start, delim_char, 2, end, config)
{
log::debug!("Found ** closer at pos {}", closer_pos);
builder.start_node(SyntaxKind::STRONG.into());
builder.token(SyntaxKind::STRONG_MARKER.into(), &text[pos..pos + 2]);
parse_inline_range_nested(text, content_start, closer_pos, config, builder);
builder.token(
SyntaxKind::STRONG_MARKER.into(),
&text[closer_pos..closer_pos + 2],
);
builder.finish_node();
return Some(closer_pos + 2 - pos);
}
log::trace!("No closer found for **");
None
}
fn try_parse_one(
text: &str,
pos: usize,
delim_char: char,
end: usize,
config: &ParserOptions,
builder: &mut GreenNodeBuilder,
) -> Option<usize> {
let content_start = pos + 1;
log::debug!("try_parse_one: '{}' x 1 at pos {}", delim_char, pos);
if let Some(closer_pos) =
parse_until_closer_with_nested_two(text, content_start, delim_char, 1, end, config)
{
log::debug!("Found * closer at pos {}", closer_pos);
builder.start_node(SyntaxKind::EMPHASIS.into());
builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &text[pos..pos + 1]);
parse_inline_range_nested(text, content_start, closer_pos, config, builder);
builder.token(
SyntaxKind::EMPHASIS_MARKER.into(),
&text[closer_pos..closer_pos + 1],
);
builder.finish_node();
return Some(closer_pos + 1 - pos);
}
log::trace!("No closer found for *");
None
}
fn parse_until_closer_with_nested_two(
text: &str,
start: usize,
delim_char: char,
delim_count: usize,
end: usize,
config: &ParserOptions,
) -> Option<usize> {
let bytes = text.as_bytes();
let mut pos = start;
while pos < end.min(text.len()) {
if bytes[pos] == b'`'
&& let Some(m) = try_parse_inline_executable(
&text[pos..],
config.extensions.rmarkdown_inline_code,
config.extensions.quarto_inline_code,
)
{
log::trace!(
"Skipping inline executable span of {} bytes at pos {}",
m.total_len,
pos
);
pos += m.total_len;
continue;
}
if bytes[pos] == b'`'
&& let Some((len, _, _, _)) = try_parse_code_span(&text[pos..])
{
log::trace!("Skipping code span of {} bytes at pos {}", len, pos);
pos += len;
continue;
}
if bytes[pos] == b'$'
&& let Some((len, _)) = try_parse_inline_math(&text[pos..])
{
log::trace!("Skipping inline math of {} bytes at pos {}", len, pos);
pos += len;
continue;
}
if bytes[pos] == b'['
&& let Some((len, _, _, _)) = try_parse_inline_link(&text[pos..])
{
log::trace!("Skipping inline link of {} bytes at pos {}", len, pos);
pos += len;
continue;
}
if delim_count == 1
&& pos + 2 <= text.len()
&& bytes[pos] == delim_char as u8
&& bytes[pos + 1] == delim_char as u8
{
let first_is_escaped = {
let mut backslash_count = 0;
let mut check_pos = pos;
while check_pos > 0 && bytes[check_pos - 1] == b'\\' {
backslash_count += 1;
check_pos -= 1;
}
backslash_count % 2 == 1
};
if first_is_escaped {
log::trace!(
"First * at pos {} is escaped, skipping to check second *",
pos
);
pos += 1;
continue;
}
let no_third_delim = pos + 2 >= bytes.len() || bytes[pos + 2] != delim_char as u8;
if no_third_delim {
log::trace!(
"try_parse_one: found ** at pos {}, attempting nested two",
pos
);
let mut temp_builder = GreenNodeBuilder::new();
if let Some(two_consumed) =
try_parse_two(text, pos, delim_char, end, config, &mut temp_builder)
{
log::debug!(
"Nested two succeeded, consumed {} bytes, continuing search",
two_consumed
);
pos += two_consumed;
continue;
}
log::trace!("Nested two failed at pos {}, entire one() should fail", pos);
return None;
}
}
if pos + delim_count <= text.len() {
let mut matches = true;
for i in 0..delim_count {
if bytes[pos + i] != delim_char as u8 {
matches = false;
break;
}
}
if matches {
let is_escaped = {
let mut backslash_count = 0;
let mut check_pos = pos;
while check_pos > 0 && bytes[check_pos - 1] == b'\\' {
backslash_count += 1;
check_pos -= 1;
}
backslash_count % 2 == 1 };
let at_run_start = pos == 0 || bytes[pos - 1] != delim_char as u8;
let after_pos = pos + delim_count;
let at_run_end = after_pos >= bytes.len() || bytes[after_pos] != delim_char as u8;
if (at_run_start || at_run_end) && !is_escaped {
if delim_char == '_'
&& pos > start
&& let Some(prev_char) = text[..pos].chars().last()
&& prev_char.is_whitespace()
{
log::trace!(
"Underscore closer preceded by whitespace at pos {}, not right-flanking",
pos
);
pos += 1;
continue;
}
log::trace!(
"Found exact {} x {} closer at pos {}",
delim_char,
delim_count,
pos
);
return Some(pos);
}
}
}
pos += 1;
}
None
}
fn parse_until_closer_with_nested_one(
text: &str,
start: usize,
delim_char: char,
delim_count: usize,
end: usize,
config: &ParserOptions,
) -> Option<usize> {
let bytes = text.as_bytes();
let mut pos = start;
while pos < end.min(text.len()) {
if bytes[pos] == b'`'
&& let Some(m) = try_parse_inline_executable(
&text[pos..],
config.extensions.rmarkdown_inline_code,
config.extensions.quarto_inline_code,
)
{
log::trace!(
"Skipping inline executable span of {} bytes at pos {}",
m.total_len,
pos
);
pos += m.total_len;
continue;
}
if bytes[pos] == b'`'
&& let Some((len, _, _, _)) = try_parse_code_span(&text[pos..])
{
log::trace!("Skipping code span of {} bytes at pos {}", len, pos);
pos += len;
continue;
}
if bytes[pos] == b'$'
&& let Some((len, _)) = try_parse_inline_math(&text[pos..])
{
log::trace!("Skipping inline math of {} bytes at pos {}", len, pos);
pos += len;
continue;
}
if bytes[pos] == b'['
&& let Some((len, _, _, _)) = try_parse_inline_link(&text[pos..])
{
log::trace!("Skipping inline link of {} bytes at pos {}", len, pos);
pos += len;
continue;
}
if delim_count == 2 && pos < text.len() && bytes[pos] == delim_char as u8 {
let no_second_delim = pos + 1 >= bytes.len() || bytes[pos + 1] != delim_char as u8;
if no_second_delim {
let is_escaped = {
let mut backslash_count = 0;
let mut check_pos = pos;
while check_pos > 0 && bytes[check_pos - 1] == b'\\' {
backslash_count += 1;
check_pos -= 1;
}
backslash_count % 2 == 1
};
if is_escaped {
log::trace!("* at pos {} is escaped, skipping", pos);
pos += 1;
continue;
}
let after_delim = pos + 1;
let followed_by_whitespace = after_delim < text.len()
&& text[after_delim..]
.chars()
.next()
.is_some_and(|c| c.is_whitespace());
if followed_by_whitespace {
log::trace!(
"* at pos {} followed by whitespace, not an opener, skipping",
pos
);
pos += 1;
continue;
}
log::trace!(
"try_parse_two: found * at pos {}, attempting nested one",
pos
);
let mut temp_builder = GreenNodeBuilder::new();
if let Some(one_consumed) =
try_parse_one(text, pos, delim_char, end, config, &mut temp_builder)
{
log::debug!(
"Nested one succeeded, consumed {} bytes, continuing search",
one_consumed
);
pos += one_consumed;
continue;
}
log::debug!(
"Nested one failed at pos {}, poisoning outer two (no closer found)",
pos
);
return None;
}
}
if pos + delim_count <= text.len() {
let mut matches = true;
for i in 0..delim_count {
if bytes[pos + i] != delim_char as u8 {
matches = false;
break;
}
}
if matches {
let is_escaped = {
let mut backslash_count = 0;
let mut check_pos = pos;
while check_pos > 0 && bytes[check_pos - 1] == b'\\' {
backslash_count += 1;
check_pos -= 1;
}
backslash_count % 2 == 1 };
let at_run_start = pos == 0 || bytes[pos - 1] != delim_char as u8;
let after_pos = pos + delim_count;
let at_run_end = after_pos >= bytes.len() || bytes[after_pos] != delim_char as u8;
if (at_run_start || at_run_end) && !is_escaped {
if delim_char == '_'
&& pos > start
&& let Some(prev_char) = text[..pos].chars().last()
&& prev_char.is_whitespace()
{
log::trace!(
"Underscore closer preceded by whitespace at pos {}, not right-flanking",
pos
);
pos += 1;
continue;
}
log::trace!(
"Found exact {} x {} closer at pos {}",
delim_char,
delim_count,
pos
);
return Some(pos);
}
}
}
pos += 1;
}
None
}
fn parse_inline_range(
text: &str,
start: usize,
end: usize,
config: &ParserOptions,
builder: &mut GreenNodeBuilder,
) {
parse_inline_range_impl(text, start, end, config, builder, false)
}
fn parse_inline_range_nested(
text: &str,
start: usize,
end: usize,
config: &ParserOptions,
builder: &mut GreenNodeBuilder,
) {
parse_inline_range_impl(text, start, end, config, builder, true)
}
fn is_emoji_boundary(text: &str, pos: usize) -> bool {
if pos > 0 {
let prev = text.as_bytes()[pos - 1] as char;
if prev.is_ascii_alphanumeric() || prev == '_' {
return false;
}
}
true
}
fn parse_inline_range_impl(
text: &str,
start: usize,
end: usize,
config: &ParserOptions,
builder: &mut GreenNodeBuilder,
nested_emphasis: bool,
) {
log::debug!(
"parse_inline_range: start={}, end={}, text={:?}",
start,
end,
&text[start..end]
);
let mut pos = start;
let mut text_start = start;
while pos < end {
let byte = text.as_bytes()[pos];
if byte == b'\\' {
if config.extensions.tex_math_double_backslash {
if let Some((len, content)) = try_parse_double_backslash_display_math(&text[pos..])
{
if pos > text_start {
builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
}
log::debug!("Matched double backslash display math at pos {}", pos);
emit_double_backslash_display_math(builder, content);
pos += len;
text_start = pos;
continue;
}
if let Some((len, content)) = try_parse_double_backslash_inline_math(&text[pos..]) {
if pos > text_start {
builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
}
log::debug!("Matched double backslash inline math at pos {}", pos);
emit_double_backslash_inline_math(builder, content);
pos += len;
text_start = pos;
continue;
}
}
if config.extensions.tex_math_single_backslash {
if let Some((len, content)) = try_parse_single_backslash_display_math(&text[pos..])
{
if pos > text_start {
builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
}
log::debug!("Matched single backslash display math at pos {}", pos);
emit_single_backslash_display_math(builder, content);
pos += len;
text_start = pos;
continue;
}
if let Some((len, content)) = try_parse_single_backslash_inline_math(&text[pos..]) {
if pos > text_start {
builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
}
log::debug!("Matched single backslash inline math at pos {}", pos);
emit_single_backslash_inline_math(builder, content);
pos += len;
text_start = pos;
continue;
}
}
if config.extensions.raw_tex
&& let Some((len, begin_marker, content, end_marker)) =
try_parse_math_environment(&text[pos..])
{
if pos > text_start {
builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
}
log::debug!("Matched math environment at pos {}", pos);
emit_display_math_environment(builder, begin_marker, content, end_marker);
pos += len;
text_start = pos;
continue;
}
if config.extensions.bookdown_references
&& let Some((len, label)) = try_parse_bookdown_reference(&text[pos..])
{
if pos > text_start {
builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
}
log::debug!("Matched bookdown reference at pos {}: {}", pos, label);
super::citations::emit_bookdown_crossref(builder, label);
pos += len;
text_start = pos;
continue;
}
if let Some((len, ch, escape_type)) = try_parse_escape(&text[pos..]) {
let escape_enabled = match escape_type {
EscapeType::HardLineBreak => config.extensions.escaped_line_breaks,
EscapeType::NonbreakingSpace => config.extensions.all_symbols_escapable,
EscapeType::Literal => {
const BASE_ESCAPABLE: &str = "\\`*_{}[]()>#+-.!";
BASE_ESCAPABLE.contains(ch) || config.extensions.all_symbols_escapable
}
};
if !escape_enabled {
pos += 1;
continue;
}
if pos > text_start {
builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
}
log::debug!("Matched escape at pos {}: \\{}", pos, ch);
emit_escape(builder, ch, escape_type);
pos += len;
text_start = pos;
continue;
}
if config.extensions.raw_tex
&& let Some(len) = try_parse_latex_command(&text[pos..])
{
if pos > text_start {
builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
}
log::debug!("Matched LaTeX command at pos {}", pos);
parse_latex_command(builder, &text[pos..], len);
pos += len;
text_start = pos;
continue;
}
}
if byte == b'{'
&& pos + 1 < text.len()
&& text.as_bytes()[pos + 1] == b'{'
&& let Some((len, name, attrs)) = try_parse_shortcode(&text[pos..])
{
if pos > text_start {
builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
}
log::debug!("Matched shortcode at pos {}: {}", pos, &name);
emit_shortcode(builder, &name, attrs);
pos += len;
text_start = pos;
continue;
}
if byte == b'`'
&& let Some(m) = try_parse_inline_executable(
&text[pos..],
config.extensions.rmarkdown_inline_code,
config.extensions.quarto_inline_code,
)
{
if pos > text_start {
builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
}
log::debug!("Matched inline executable code at pos {}", pos);
emit_inline_executable(builder, &m);
pos += m.total_len;
text_start = pos;
continue;
}
if byte == b'`'
&& let Some((len, content, backtick_count, attributes)) =
try_parse_code_span(&text[pos..])
{
if pos > text_start {
builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
}
log::debug!(
"Matched code span at pos {}: {} backticks",
pos,
backtick_count
);
if let Some(ref attrs) = attributes
&& config.extensions.raw_attribute
&& let Some(format) = is_raw_inline(attrs)
{
use super::raw_inline::emit_raw_inline;
log::debug!("Matched raw inline span at pos {}: format={}", pos, format);
emit_raw_inline(builder, content, backtick_count, format);
} else if !config.extensions.inline_code_attributes && attributes.is_some() {
let code_span_len = backtick_count * 2 + content.len();
emit_code_span(builder, content, backtick_count, None);
pos += code_span_len;
text_start = pos;
continue;
} else {
emit_code_span(builder, content, backtick_count, attributes);
}
pos += len;
text_start = pos;
continue;
}
if byte == b':'
&& config.extensions.emoji
&& is_emoji_boundary(text, pos)
&& let Some((len, _alias)) = try_parse_emoji(&text[pos..])
{
if pos > text_start {
builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
}
log::debug!("Matched emoji at pos {}", pos);
emit_emoji(builder, &text[pos..pos + len]);
pos += len;
text_start = pos;
continue;
}
if byte == b'^'
&& pos + 1 < text.len()
&& text.as_bytes()[pos + 1] == b'['
&& config.extensions.inline_footnotes
&& let Some((len, content)) = try_parse_inline_footnote(&text[pos..])
{
if pos > text_start {
builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
}
log::debug!("Matched inline footnote at pos {}", pos);
emit_inline_footnote(builder, content, config);
pos += len;
text_start = pos;
continue;
}
if byte == b'^'
&& config.extensions.superscript
&& let Some((len, content)) = try_parse_superscript(&text[pos..])
{
if pos > text_start {
builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
}
log::debug!("Matched superscript at pos {}", pos);
emit_superscript(builder, content, config);
pos += len;
text_start = pos;
continue;
}
if byte == b'(' && config.extensions.bookdown_references {
if let Some((len, label)) = try_parse_bookdown_definition(&text[pos..]) {
if pos > text_start {
builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
}
log::debug!("Matched bookdown definition at pos {}: {}", pos, label);
builder.token(SyntaxKind::TEXT.into(), &text[pos..pos + len]);
pos += len;
text_start = pos;
continue;
}
if let Some((len, label)) = try_parse_bookdown_text_reference(&text[pos..]) {
if pos > text_start {
builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
}
log::debug!("Matched bookdown text reference at pos {}: {}", pos, label);
builder.token(SyntaxKind::TEXT.into(), &text[pos..pos + len]);
pos += len;
text_start = pos;
continue;
}
}
if byte == b'~'
&& config.extensions.subscript
&& let Some((len, content)) = try_parse_subscript(&text[pos..])
{
if pos > text_start {
builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
}
log::debug!("Matched subscript at pos {}", pos);
emit_subscript(builder, content, config);
pos += len;
text_start = pos;
continue;
}
if byte == b'~'
&& config.extensions.strikeout
&& let Some((len, content)) = try_parse_strikeout(&text[pos..])
{
if pos > text_start {
builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
}
log::debug!("Matched strikeout at pos {}", pos);
emit_strikeout(builder, content, config);
pos += len;
text_start = pos;
continue;
}
if byte == b'$'
&& config.extensions.tex_math_gfm
&& let Some((len, content)) = try_parse_gfm_inline_math(&text[pos..])
{
if pos > text_start {
builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
}
log::debug!("Matched GFM inline math at pos {}", pos);
emit_gfm_inline_math(builder, content);
pos += len;
text_start = pos;
continue;
}
if byte == b'$' && config.extensions.tex_math_dollars {
if let Some((len, content)) = try_parse_display_math(&text[pos..]) {
if pos > text_start {
builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
}
let dollar_count = text[pos..].chars().take_while(|&c| c == '$').count();
log::debug!(
"Matched display math at pos {}: {} dollars",
pos,
dollar_count
);
let after_math = &text[pos + len..];
let attr_len = if config.extensions.quarto_crossrefs {
use crate::parser::utils::attributes::try_parse_trailing_attributes;
if let Some((_attr_block, _)) = try_parse_trailing_attributes(after_math) {
let trimmed_after = after_math.trim_start();
if let Some(open_brace_pos) = trimmed_after.find('{') {
let ws_before_brace = after_math.len() - trimmed_after.len();
let attr_text_len = trimmed_after[open_brace_pos..]
.find('}')
.map(|close| close + 1)
.unwrap_or(0);
ws_before_brace + open_brace_pos + attr_text_len
} else {
0
}
} else {
0
}
} else {
0
};
let total_len = len + attr_len;
emit_display_math(builder, content, dollar_count);
if attr_len > 0 {
use crate::parser::utils::attributes::{
emit_attributes, try_parse_trailing_attributes,
};
let attr_text = &text[pos + len..pos + total_len];
if let Some((attr_block, _text_before)) =
try_parse_trailing_attributes(attr_text)
{
let trimmed_after = attr_text.trim_start();
let ws_len = attr_text.len() - trimmed_after.len();
if ws_len > 0 {
builder.token(SyntaxKind::WHITESPACE.into(), &attr_text[..ws_len]);
}
emit_attributes(builder, &attr_block);
}
}
pos += total_len;
text_start = pos;
continue;
}
if let Some((len, content)) = try_parse_inline_math(&text[pos..]) {
if pos > text_start {
builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
}
log::debug!("Matched inline math at pos {}", pos);
emit_inline_math(builder, content);
pos += len;
text_start = pos;
continue;
}
if pos > text_start {
builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
}
builder.token(SyntaxKind::TEXT.into(), "$");
pos += 1;
text_start = pos;
continue;
}
if byte == b'<'
&& config.extensions.autolinks
&& let Some((len, url)) = try_parse_autolink(&text[pos..])
{
if pos > text_start {
builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
}
log::debug!("Matched autolink at pos {}", pos);
emit_autolink(builder, &text[pos..pos + len], url);
pos += len;
text_start = pos;
continue;
}
if config.extensions.autolink_bare_uris
&& let Some((len, url)) = try_parse_bare_uri(&text[pos..])
{
if pos > text_start {
builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
}
log::debug!("Matched bare URI at pos {}", pos);
emit_bare_uri_link(builder, url, config);
pos += len;
text_start = pos;
continue;
}
if byte == b'<'
&& config.extensions.native_spans
&& let Some((len, content, attributes)) = try_parse_native_span(&text[pos..])
{
if pos > text_start {
builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
}
log::debug!("Matched native span at pos {}", pos);
emit_native_span(builder, content, &attributes, config);
pos += len;
text_start = pos;
continue;
}
if byte == b'!' && pos + 1 < text.len() && text.as_bytes()[pos + 1] == b'[' {
if let Some((len, alt_text, dest, attributes)) = try_parse_inline_image(&text[pos..]) {
if pos > text_start {
builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
}
log::debug!("Matched inline image at pos {}", pos);
emit_inline_image(
builder,
&text[pos..pos + len],
alt_text,
dest,
attributes,
config,
);
pos += len;
text_start = pos;
continue;
}
if config.extensions.reference_links {
let allow_shortcut = config.extensions.shortcut_reference_links;
if let Some((len, alt_text, reference, is_implicit)) =
try_parse_reference_image(&text[pos..], allow_shortcut)
{
if pos > text_start {
builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
}
log::debug!("Matched reference image at pos {}", pos);
emit_reference_image(builder, alt_text, &reference, is_implicit, config);
pos += len;
text_start = pos;
continue;
}
}
}
if byte == b'[' {
if config.extensions.footnotes
&& let Some((len, id)) = try_parse_footnote_reference(&text[pos..])
{
if pos > text_start {
builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
}
log::debug!("Matched footnote reference at pos {}", pos);
emit_footnote_reference(builder, &id);
pos += len;
text_start = pos;
continue;
}
if config.extensions.inline_links
&& let Some((len, link_text, dest, attributes)) =
try_parse_inline_link(&text[pos..])
{
if pos > text_start {
builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
}
log::debug!("Matched inline link at pos {}", pos);
emit_inline_link(
builder,
&text[pos..pos + len],
link_text,
dest,
attributes,
config,
);
pos += len;
text_start = pos;
continue;
}
if config.extensions.reference_links {
let allow_shortcut = config.extensions.shortcut_reference_links;
if let Some((len, link_text, reference, is_implicit)) =
try_parse_reference_link(&text[pos..], allow_shortcut)
{
if pos > text_start {
builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
}
log::debug!("Matched reference link at pos {}", pos);
emit_reference_link(builder, link_text, &reference, is_implicit, config);
pos += len;
text_start = pos;
continue;
}
}
if config.extensions.citations
&& let Some((len, content)) = try_parse_bracketed_citation(&text[pos..])
{
if pos > text_start {
builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
}
log::debug!("Matched bracketed citation at pos {}", pos);
emit_bracketed_citation(builder, content);
pos += len;
text_start = pos;
continue;
}
}
if byte == b'['
&& config.extensions.bracketed_spans
&& let Some((len, text_content, attrs)) = try_parse_bracketed_span(&text[pos..])
{
if pos > text_start {
builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
}
log::debug!("Matched bracketed span at pos {}", pos);
emit_bracketed_span(builder, &text_content, &attrs, config);
pos += len;
text_start = pos;
continue;
}
if byte == b'@'
&& (config.extensions.citations || config.extensions.quarto_crossrefs)
&& let Some((len, key, has_suppress)) = try_parse_bare_citation(&text[pos..])
{
let is_crossref =
config.extensions.quarto_crossrefs && super::citations::is_quarto_crossref_key(key);
if is_crossref || config.extensions.citations {
if pos > text_start {
builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
}
if is_crossref {
log::debug!("Matched Quarto crossref at pos {}: {}", pos, &key);
super::citations::emit_crossref(builder, key, has_suppress);
} else {
log::debug!("Matched bare citation at pos {}: {}", pos, &key);
emit_bare_citation(builder, key, has_suppress);
}
pos += len;
text_start = pos;
continue;
}
}
if byte == b'-'
&& pos + 1 < text.len()
&& text.as_bytes()[pos + 1] == b'@'
&& (config.extensions.citations || config.extensions.quarto_crossrefs)
&& let Some((len, key, has_suppress)) = try_parse_bare_citation(&text[pos..])
{
let is_crossref =
config.extensions.quarto_crossrefs && super::citations::is_quarto_crossref_key(key);
if is_crossref || config.extensions.citations {
if pos > text_start {
builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
}
if is_crossref {
log::debug!("Matched Quarto crossref at pos {}: {}", pos, &key);
super::citations::emit_crossref(builder, key, has_suppress);
} else {
log::debug!("Matched suppress-author citation at pos {}: {}", pos, &key);
emit_bare_citation(builder, key, has_suppress);
}
pos += len;
text_start = pos;
continue;
}
}
if byte == b'*' || byte == b'_' {
let bytes = text.as_bytes();
let mut delim_count = 0;
while pos + delim_count < bytes.len() && bytes[pos + delim_count] == byte {
delim_count += 1;
}
if pos > text_start {
log::debug!(
"Emitting TEXT before delimiter: {:?}",
&text[text_start..pos]
);
builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
text_start = pos; }
let emphasis_result = if nested_emphasis {
try_parse_emphasis_nested(text, pos, end, config, builder)
} else {
try_parse_emphasis(text, pos, end, config, builder)
};
if let Some((consumed, _)) = emphasis_result {
log::debug!(
"Parsed emphasis, consumed {} bytes from pos {}",
consumed,
pos
);
pos += consumed;
text_start = pos;
} else {
log::debug!(
"Failed to parse emphasis at pos {}, skipping {} delimiters as literal",
pos,
delim_count
);
pos += delim_count;
}
continue;
}
if byte == b'\r' && pos + 1 < end && text.as_bytes()[pos + 1] == b'\n' {
let text_before = &text[text_start..pos];
let trailing_spaces = text_before.chars().rev().take_while(|&c| c == ' ').count();
if trailing_spaces >= 2 {
let text_content = &text_before[..text_before.len() - trailing_spaces];
if !text_content.is_empty() {
builder.token(SyntaxKind::TEXT.into(), text_content);
}
let spaces = " ".repeat(trailing_spaces);
builder.token(
SyntaxKind::HARD_LINE_BREAK.into(),
&format!("{}\r\n", spaces),
);
pos += 2;
text_start = pos;
continue;
}
if config.extensions.hard_line_breaks {
if !text_before.is_empty() {
builder.token(SyntaxKind::TEXT.into(), text_before);
}
builder.token(SyntaxKind::HARD_LINE_BREAK.into(), "\r\n");
pos += 2;
text_start = pos;
continue;
}
if !text_before.is_empty() {
builder.token(SyntaxKind::TEXT.into(), text_before);
}
builder.token(SyntaxKind::NEWLINE.into(), "\r\n");
pos += 2;
text_start = pos;
continue;
}
if byte == b'\n' {
let text_before = &text[text_start..pos];
let trailing_spaces = text_before.chars().rev().take_while(|&c| c == ' ').count();
if trailing_spaces >= 2 {
let text_content = &text_before[..text_before.len() - trailing_spaces];
if !text_content.is_empty() {
builder.token(SyntaxKind::TEXT.into(), text_content);
}
let spaces = " ".repeat(trailing_spaces);
builder.token(SyntaxKind::HARD_LINE_BREAK.into(), &format!("{}\n", spaces));
pos += 1;
text_start = pos;
continue;
}
if config.extensions.hard_line_breaks {
if !text_before.is_empty() {
builder.token(SyntaxKind::TEXT.into(), text_before);
}
builder.token(SyntaxKind::HARD_LINE_BREAK.into(), "\n");
pos += 1;
text_start = pos;
continue;
}
if !text_before.is_empty() {
builder.token(SyntaxKind::TEXT.into(), text_before);
}
builder.token(SyntaxKind::NEWLINE.into(), "\n");
pos += 1;
text_start = pos;
continue;
}
pos += 1;
}
if pos > text_start && text_start < end {
log::debug!("Emitting remaining TEXT: {:?}", &text[text_start..end]);
builder.token(SyntaxKind::TEXT.into(), &text[text_start..end]);
}
log::debug!("parse_inline_range complete: start={}, end={}", start, end);
}
#[cfg(test)]
mod tests {
use super::*;
use crate::syntax::{SyntaxKind, SyntaxNode};
use rowan::GreenNode;
#[test]
fn test_recursive_simple_emphasis() {
let text = "*test*";
let config = ParserOptions::default();
let mut builder = GreenNodeBuilder::new();
parse_inline_text_recursive(&mut builder, text, &config);
let green: GreenNode = builder.finish();
let node = SyntaxNode::new_root(green);
assert_eq!(node.text().to_string(), text);
let has_emph = node.descendants().any(|n| n.kind() == SyntaxKind::EMPHASIS);
assert!(has_emph, "Should have EMPHASIS node");
}
#[test]
fn test_recursive_nested() {
let text = "*foo **bar** baz*";
let config = ParserOptions::default();
let mut builder = GreenNodeBuilder::new();
builder.start_node(SyntaxKind::PARAGRAPH.into());
parse_inline_text_recursive(&mut builder, text, &config);
builder.finish_node();
let green: GreenNode = builder.finish();
let node = SyntaxNode::new_root(green);
assert_eq!(node.text().to_string(), text);
let has_emph = node.descendants().any(|n| n.kind() == SyntaxKind::EMPHASIS);
let has_strong = node.descendants().any(|n| n.kind() == SyntaxKind::STRONG);
assert!(has_emph, "Should have EMPHASIS node");
assert!(has_strong, "Should have STRONG node");
}
#[test]
fn test_parse_simple_emphasis() {
use crate::options::ParserOptions;
use crate::syntax::SyntaxNode;
use rowan::GreenNode;
let text = "*test*";
let config = ParserOptions::default();
let mut builder = GreenNodeBuilder::new();
let result = try_parse_emphasis(text, 0, text.len(), &config, &mut builder);
assert_eq!(result, Some((6, 1)));
let green: GreenNode = builder.finish();
let node = SyntaxNode::new_root(green);
assert_eq!(node.kind(), SyntaxKind::EMPHASIS);
assert_eq!(node.text().to_string(), text);
}
#[test]
fn test_parse_nested_emphasis_strong() {
use crate::options::ParserOptions;
let text = "*foo **bar** baz*";
let config = ParserOptions::default();
let mut builder = GreenNodeBuilder::new();
parse_inline_range(text, 0, text.len(), &config, &mut builder);
let green = builder.finish();
let node = crate::syntax::SyntaxNode::new_root(green);
assert_eq!(node.text().to_string(), text);
let has_emph = node.descendants().any(|n| n.kind() == SyntaxKind::EMPHASIS);
let has_strong = node.descendants().any(|n| n.kind() == SyntaxKind::STRONG);
assert!(has_emph, "Should have EMPHASIS node");
assert!(has_strong, "Should have STRONG node");
}
#[test]
fn test_triple_emphasis_star_then_double_star() {
use crate::options::ParserOptions;
use crate::syntax::SyntaxNode;
use rowan::GreenNode;
let text = "***foo* bar**";
let config = ParserOptions::default();
let mut builder = GreenNodeBuilder::new();
builder.start_node(SyntaxKind::DOCUMENT.into());
parse_inline_range(text, 0, text.len(), &config, &mut builder);
builder.finish_node();
let green: GreenNode = builder.finish();
let node = SyntaxNode::new_root(green);
assert_eq!(node.text().to_string(), text);
let structure = format!("{:#?}", node);
assert!(structure.contains("STRONG"), "Should have STRONG node");
assert!(structure.contains("EMPHASIS"), "Should have EMPHASIS node");
let mut found_strong = false;
let mut found_emph_after_strong = false;
for descendant in node.descendants() {
if descendant.kind() == SyntaxKind::STRONG {
found_strong = true;
}
if found_strong && descendant.kind() == SyntaxKind::EMPHASIS {
found_emph_after_strong = true;
break;
}
}
assert!(
found_emph_after_strong,
"EMPH should be inside STRONG, not before it. Current structure:\n{}",
structure
);
}
#[test]
fn test_triple_emphasis_double_star_then_star() {
use crate::options::ParserOptions;
use crate::syntax::SyntaxNode;
use rowan::GreenNode;
let text = "***foo** bar*";
let config = ParserOptions::default();
let mut builder = GreenNodeBuilder::new();
builder.start_node(SyntaxKind::DOCUMENT.into());
parse_inline_range(text, 0, text.len(), &config, &mut builder);
builder.finish_node();
let green: GreenNode = builder.finish();
let node = SyntaxNode::new_root(green);
assert_eq!(node.text().to_string(), text);
let structure = format!("{:#?}", node);
assert!(structure.contains("EMPHASIS"), "Should have EMPHASIS node");
assert!(structure.contains("STRONG"), "Should have STRONG node");
let mut found_emph = false;
let mut found_strong_after_emph = false;
for descendant in node.descendants() {
if descendant.kind() == SyntaxKind::EMPHASIS {
found_emph = true;
}
if found_emph && descendant.kind() == SyntaxKind::STRONG {
found_strong_after_emph = true;
break;
}
}
assert!(
found_strong_after_emph,
"STRONG should be inside EMPH. Current structure:\n{}",
structure
);
}
#[test]
fn test_display_math_with_attributes() {
use crate::options::ParserOptions;
use crate::syntax::SyntaxNode;
use rowan::GreenNode;
let text = "$$ E = mc^2 $$ {#eq-einstein}";
let mut config = ParserOptions::default();
config.extensions.quarto_crossrefs = true;
let mut builder = GreenNodeBuilder::new();
builder.start_node(SyntaxKind::DOCUMENT.into());
parse_inline_text_recursive(&mut builder, text, &config);
builder.finish_node(); let green: GreenNode = builder.finish();
let node = SyntaxNode::new_root(green);
assert_eq!(node.text().to_string(), text);
let has_display_math = node
.descendants()
.any(|n| n.kind() == SyntaxKind::DISPLAY_MATH);
assert!(has_display_math, "Should have DISPLAY_MATH node");
let has_attributes = node
.descendants()
.any(|n| n.kind() == SyntaxKind::ATTRIBUTE);
assert!(
has_attributes,
"Should have ATTRIBUTE node for {{#eq-einstein}}"
);
let math_followed_by_text = node.descendants().any(|n| {
n.kind() == SyntaxKind::DISPLAY_MATH
&& n.next_sibling()
.map(|s| {
s.kind() == SyntaxKind::TEXT
&& s.text().to_string().contains("{#eq-einstein}")
})
.unwrap_or(false)
});
assert!(
!math_followed_by_text,
"Attributes should not be parsed as TEXT"
);
}
}
#[test]
fn test_two_with_nested_one_and_triple_closer() {
use crate::options::ParserOptions;
use crate::syntax::SyntaxNode;
use rowan::GreenNode;
let text = "**bold with *italic***";
let config = ParserOptions::default();
let mut builder = GreenNodeBuilder::new();
parse_inline_range(text, 0, text.len(), &config, &mut builder);
let green: GreenNode = builder.finish();
let node = SyntaxNode::new_root(green);
assert_eq!(node.text().to_string(), text, "Should be lossless");
assert_eq!(
node.kind(),
SyntaxKind::STRONG,
"Root should be STRONG, got: {:?}",
node.kind()
);
let has_emphasis = node.children().any(|c| c.kind() == SyntaxKind::EMPHASIS);
assert!(has_emphasis, "STRONG should contain EMPHASIS node");
}
#[test]
fn test_emphasis_with_trailing_space_before_closer() {
use crate::options::ParserOptions;
use crate::syntax::SyntaxNode;
use rowan::GreenNode;
let text = "*foo *";
let config = ParserOptions::default();
let mut builder = GreenNodeBuilder::new();
let result = try_parse_emphasis(text, 0, text.len(), &config, &mut builder);
assert_eq!(
result,
Some((6, 1)),
"Should parse as emphasis, result: {:?}",
result
);
let green: GreenNode = builder.finish();
let node = SyntaxNode::new_root(green);
assert_eq!(node.kind(), SyntaxKind::EMPHASIS);
assert_eq!(node.text().to_string(), text);
}
#[test]
fn test_triple_emphasis_all_strong_nested() {
use crate::options::ParserOptions;
use crate::syntax::SyntaxNode;
use rowan::GreenNode;
let text = "***foo** bar **baz***";
let config = ParserOptions::default();
let mut builder = GreenNodeBuilder::new();
parse_inline_range(text, 0, text.len(), &config, &mut builder);
let green: GreenNode = builder.finish();
let node = SyntaxNode::new_root(green);
let emphasis_nodes: Vec<_> = node
.descendants()
.filter(|n| n.kind() == SyntaxKind::EMPHASIS)
.collect();
assert_eq!(
emphasis_nodes.len(),
1,
"Should have exactly one EMPHASIS node, found: {}",
emphasis_nodes.len()
);
let emphasis_node = emphasis_nodes[0].clone();
let strong_in_emphasis: Vec<_> = emphasis_node
.children()
.filter(|n| n.kind() == SyntaxKind::STRONG)
.collect();
assert_eq!(
strong_in_emphasis.len(),
2,
"EMPHASIS should contain two STRONG nodes, found: {}",
strong_in_emphasis.len()
);
assert_eq!(node.text().to_string(), text);
}
#[test]
fn test_triple_emphasis_all_emph_nested() {
use crate::options::ParserOptions;
use crate::syntax::SyntaxNode;
use rowan::GreenNode;
let text = "***foo* bar *baz***";
let config = ParserOptions::default();
let mut builder = GreenNodeBuilder::new();
parse_inline_range(text, 0, text.len(), &config, &mut builder);
let green: GreenNode = builder.finish();
let node = SyntaxNode::new_root(green);
let strong_nodes: Vec<_> = node
.descendants()
.filter(|n| n.kind() == SyntaxKind::STRONG)
.collect();
assert_eq!(
strong_nodes.len(),
1,
"Should have exactly one STRONG node, found: {}",
strong_nodes.len()
);
let strong_node = strong_nodes[0].clone();
let emph_in_strong: Vec<_> = strong_node
.children()
.filter(|n| n.kind() == SyntaxKind::EMPHASIS)
.collect();
assert_eq!(
emph_in_strong.len(),
2,
"STRONG should contain two EMPHASIS nodes, found: {}",
emph_in_strong.len()
);
assert_eq!(node.text().to_string(), text);
}
#[test]
fn test_parse_emphasis_multiline() {
use crate::options::ParserOptions;
use crate::syntax::SyntaxNode;
use rowan::GreenNode;
let text = "*text on\nline two*";
let config = ParserOptions::default();
let mut builder = GreenNodeBuilder::new();
let result = try_parse_emphasis(text, 0, text.len(), &config, &mut builder);
assert_eq!(
result,
Some((text.len(), 1)),
"Emphasis should parse multiline content"
);
let green: GreenNode = builder.finish();
let node = SyntaxNode::new_root(green);
assert_eq!(node.kind(), SyntaxKind::EMPHASIS);
assert_eq!(node.text().to_string(), text);
assert!(
node.text().to_string().contains('\n'),
"Should preserve newline in emphasis content"
);
}
#[test]
fn test_parse_strong_multiline() {
use crate::options::ParserOptions;
use crate::syntax::SyntaxNode;
use rowan::GreenNode;
let text = "**strong on\nline two**";
let config = ParserOptions::default();
let mut builder = GreenNodeBuilder::new();
let result = try_parse_emphasis(text, 0, text.len(), &config, &mut builder);
assert_eq!(
result,
Some((text.len(), 2)),
"Strong emphasis should parse multiline content"
);
let green: GreenNode = builder.finish();
let node = SyntaxNode::new_root(green);
assert_eq!(node.kind(), SyntaxKind::STRONG);
assert_eq!(node.text().to_string(), text);
assert!(
node.text().to_string().contains('\n'),
"Should preserve newline in strong content"
);
}
#[test]
fn test_parse_triple_emphasis_multiline() {
use crate::options::ParserOptions;
use crate::syntax::SyntaxNode;
use rowan::GreenNode;
let text = "***both on\nline two***";
let config = ParserOptions::default();
let mut builder = GreenNodeBuilder::new();
let result = try_parse_emphasis(text, 0, text.len(), &config, &mut builder);
assert_eq!(
result,
Some((text.len(), 3)),
"Triple emphasis should parse multiline content"
);
let green: GreenNode = builder.finish();
let node = SyntaxNode::new_root(green);
let has_strong = node.descendants().any(|n| n.kind() == SyntaxKind::STRONG);
assert!(has_strong, "Should have STRONG node");
assert_eq!(node.text().to_string(), text);
assert!(
node.text().to_string().contains('\n'),
"Should preserve newline in triple emphasis content"
);
}