use alloc::{borrow::Cow, string::String, vec, vec::Vec};
use crate::{
ast::*,
diagnostic::{Diagnostic, DiagnosticCode, DiagnosticSeverity},
entities::named_character_reference,
options::{SyntaxConfigError, SyntaxOptions},
span::Span,
validate::is_directive_name,
};
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct ParseOutput {
pub document: Document,
pub diagnostics: Vec<Diagnostic>,
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum ParseStrictError {
Config(SyntaxConfigError),
Diagnostic(Diagnostic),
}
#[derive(Clone, Debug, Eq, PartialEq)]
struct ParsedLinkResource {
destination: String,
destination_kind: LinkDestinationKind,
title: Option<String>,
title_kind: Option<LinkTitleKind>,
}
const REFERENCE_LABEL_MAX_CHARS: usize = 999;
const WIKILINK_MAX_BYTES: usize = 999;
#[derive(Clone, Copy, Debug)]
struct Line<'a> {
text: &'a str,
eol: &'a str,
start: usize,
end: usize,
end_with_eol: usize,
lazy: bool,
}
#[derive(Clone, Copy, Debug)]
struct ListMarkerInfo<'a> {
ordered: bool,
start: Option<u64>,
delimiter: ListDelimiter,
indent: usize,
marker_len: usize,
content_indent: usize,
content: &'a str,
}
#[derive(Clone, Copy, Debug)]
struct DescriptionMarker<'a> {
content_offset: usize,
content: &'a str,
}
#[derive(Clone, Debug)]
struct DescriptionTerm {
marker_index: usize,
term_end: usize,
blank_after_term: bool,
source: String,
source_offset: usize,
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
enum HtmlBlockKind {
RawTag,
BlockTag,
Until(&'static str),
UntilBlank,
}
pub fn parse(input: &str) -> ParseOutput {
SyntaxOptions::default().parse(input)
}
impl SyntaxOptions {
pub fn parse(&self, input: &str) -> ParseOutput {
match parse_checked(input, self) {
Ok(output) => output,
Err(error) => ParseOutput {
document: Document::default(),
diagnostics: vec![Diagnostic::new(
DiagnosticSeverity::Error,
DiagnosticCode::StrictParse,
Span::new(0, input.len()),
error.message(),
)],
},
}
}
pub fn parse_strict(&self, input: &str) -> Result<ParseOutput, ParseStrictError> {
let output = parse_checked(input, self).map_err(ParseStrictError::Config)?;
if let Some(diagnostic) = output
.diagnostics
.iter()
.find(|diagnostic| diagnostic.severity == DiagnosticSeverity::Error)
{
return Err(ParseStrictError::Diagnostic(diagnostic.clone()));
}
Ok(output)
}
}
fn parse_checked(input: &str, options: &SyntaxOptions) -> Result<ParseOutput, SyntaxConfigError> {
options.validate()?;
let mut diagnostics = Vec::new();
let definitions = collect_definitions(input, options);
let children = parse_blocks(input, 0, true, options, &definitions, &mut diagnostics);
Ok(ParseOutput {
document: Document {
meta: NodeMeta::new(Some(Span::new(0, input.len()))),
children,
},
diagnostics,
})
}
fn parse_blocks(
input: &str,
base_offset: usize,
allow_frontmatter: bool,
options: &SyntaxOptions,
definitions: &[String],
diagnostics: &mut Vec<Diagnostic>,
) -> Vec<Block> {
let lines = collect_lines(input, base_offset);
parse_blocks_from_lines(&lines, allow_frontmatter, options, definitions, diagnostics)
}
fn parse_blocks_from_lines(
lines: &[Line<'_>],
allow_frontmatter: bool,
options: &SyntaxOptions,
definitions: &[String],
diagnostics: &mut Vec<Diagnostic>,
) -> Vec<Block> {
let mut blocks = Vec::new();
let mut index = 0;
while index < lines.len() {
let line = lines[index];
if line.text.trim().is_empty() {
index += 1;
continue;
}
let after_definition_unbroken = index > 0
&& !lines[index - 1].text.trim().is_empty()
&& matches!(blocks.last(), Some(Block::Definition(_)));
if allow_frontmatter && index == 0 {
if let Some((block, next)) = parse_frontmatter(lines, index, options) {
blocks.push(block);
index = next;
continue;
}
}
if let Some((block, next)) =
parse_container_directive(lines, index, options, definitions, diagnostics)
{
blocks.push(block);
index = next;
continue;
}
if let Some((block, next)) = parse_math_block(lines, index, options) {
blocks.push(block);
index = next;
continue;
}
if let Some((block, next)) = parse_fenced_code(lines, index, options) {
blocks.push(block);
index = next;
continue;
}
if let Some((block, next)) =
parse_block_quote(lines, index, options, definitions, diagnostics)
{
blocks.push(block);
index = next;
continue;
}
if let Some(block) = parse_atx_heading(line, options, definitions) {
blocks.push(block);
index += 1;
continue;
}
if let Some(block) = parse_thematic_break(line) {
blocks.push(block);
index += 1;
continue;
}
if let Some((block, next)) = parse_list(lines, index, options, definitions, diagnostics) {
blocks.push(block);
index = next;
continue;
}
if let Some((block, next)) =
parse_footnote_definition(lines, index, options, definitions, diagnostics)
{
blocks.push(block);
index = next;
continue;
}
if let Some((block, next)) =
parse_definition(lines, index, options, after_definition_unbroken)
{
blocks.push(block);
index = next;
continue;
}
if let Some(block) = parse_leaf_directive(line, options, definitions, diagnostics) {
blocks.push(block);
index += 1;
continue;
}
if let Some((block, next)) = parse_html_block(lines, index, options) {
blocks.push(block);
index = next;
continue;
}
if let Some((block, next)) = parse_mdx_flow(lines, index, options, diagnostics) {
blocks.push(block);
index = next;
continue;
}
if !after_definition_unbroken {
if let Some((block, next)) = parse_indented_code(lines, index, options) {
blocks.push(block);
index = next;
continue;
}
}
if let Some((block, next)) = parse_table(lines, index, options, definitions, diagnostics) {
blocks.push(block);
index = next;
continue;
}
if let Some((block, next)) = parse_setext_heading(lines, index, options, definitions) {
blocks.push(block);
index = next;
continue;
}
if let Some((block, next)) =
parse_description_list(lines, index, options, definitions, diagnostics)
{
blocks.push(block);
index = next;
continue;
}
let (block, next) = parse_paragraph(lines, index, options, definitions, diagnostics);
blocks.push(block);
index = next;
}
blocks
}
fn collect_lines(input: &str, base_offset: usize) -> Vec<Line<'_>> {
let bytes = input.as_bytes();
let mut lines = Vec::new();
let mut start = 0;
let mut index = 0;
while index < bytes.len() {
match bytes[index] {
b'\n' => {
let end = index;
lines.push(Line {
text: &input[start..end],
eol: &input[index..index + 1],
start: base_offset + start,
end: base_offset + end,
end_with_eol: base_offset + index + 1,
lazy: false,
});
index += 1;
start = index;
}
b'\r' => {
let end = index;
let eol_end = if index + 1 < bytes.len() && bytes[index + 1] == b'\n' {
index + 2
} else {
index + 1
};
lines.push(Line {
text: &input[start..end],
eol: &input[index..eol_end],
start: base_offset + start,
end: base_offset + end,
end_with_eol: base_offset + eol_end,
lazy: false,
});
index = eol_end;
start = index;
}
_ => index += 1,
}
}
if start < bytes.len() || input.is_empty() {
lines.push(Line {
text: &input[start..],
eol: "",
start: base_offset + start,
end: base_offset + bytes.len(),
end_with_eol: base_offset + bytes.len(),
lazy: false,
});
}
lines
}
fn collect_definitions(input: &str, options: &SyntaxOptions) -> Vec<String> {
let mut diagnostics = Vec::new();
let blocks = parse_blocks(input, 0, true, options, &[], &mut diagnostics);
let mut definitions = Vec::new();
collect_definition_refs_from_blocks(&blocks, &mut definitions);
definitions
}
fn collect_definition_refs_from_blocks(blocks: &[Block], definitions: &mut Vec<String>) {
for block in blocks {
match block {
Block::Definition(definition) => {
if definitions
.iter()
.all(|identifier| identifier != &definition.identifier)
{
definitions.push(definition.identifier.clone());
}
}
Block::BlockQuote(node) => {
collect_definition_refs_from_blocks(&node.children, definitions);
}
Block::Alert(node) => {
collect_definition_refs_from_blocks(&node.children, definitions);
}
Block::List(node) => {
for item in &node.children {
collect_definition_refs_from_blocks(&item.children, definitions);
}
}
Block::DescriptionList(node) => {
for item in &node.children {
for details in &item.details {
collect_definition_refs_from_blocks(&details.children, definitions);
}
}
}
Block::FootnoteDefinition(node) => {
collect_definition_refs_from_blocks(&node.children, definitions);
}
Block::ContainerDirective(node) => {
collect_definition_refs_from_blocks(&node.children, definitions);
}
_ => {}
}
}
}
fn parse_frontmatter(
lines: &[Line<'_>],
index: usize,
options: &SyntaxOptions,
) -> Option<(Block, usize)> {
if !options.constructs.frontmatter {
return None;
}
let kind = frontmatter_fence_kind(lines[index].text)?;
let mut value = String::new();
let mut cursor = index + 1;
while cursor < lines.len() {
if frontmatter_fence_kind(lines[cursor].text) == Some(kind) {
let span = Span::new(lines[index].start, lines[cursor].end_with_eol);
return Some((
Block::Frontmatter(Frontmatter {
meta: NodeMeta::new(Some(span)),
kind,
value,
}),
cursor + 1,
));
}
push_line(&mut value, lines[cursor].text);
cursor += 1;
}
None
}
fn frontmatter_fence_kind(line: &str) -> Option<FrontmatterKind> {
match line.trim_end_matches([' ', '\t']) {
"---" => Some(FrontmatterKind::Yaml),
"+++" => Some(FrontmatterKind::Toml),
_ => None,
}
}
fn parse_container_directive(
lines: &[Line<'_>],
index: usize,
options: &SyntaxOptions,
definitions: &[String],
diagnostics: &mut Vec<Diagnostic>,
) -> Option<(Block, usize)> {
if !options.constructs.directive_container {
return None;
}
let trimmed = trim_up_to_three_spaces(lines[index].text)?;
let Some((fence_len, opener_rest)) = directive_container_opener_prefix(trimmed) else {
return None;
};
let opener_base = lines[index].start + (lines[index].text.len() - trimmed.len()) + fence_len;
let Some((name, label_source, attributes, _consumed)) = parse_directive_opener(opener_rest)
else {
diagnostics.push(Diagnostic::new(
DiagnosticSeverity::Error,
DiagnosticCode::InvalidDirectiveName,
Span::new(lines[index].start, lines[index].end),
"container directive must have a valid name",
));
return None;
};
let label_base = opener_base + name.len() + 1;
let mut content = String::new();
let mut cursor = index + 1;
let mut nested_fences = Vec::new();
while cursor < lines.len() {
let line = lines[cursor].text;
let trimmed = trim_up_to_three_spaces(line);
if let Some(trimmed) = trimmed {
if let Some(nested_len) = nested_fences.last().copied() {
if directive_container_closing_fence(trimmed, nested_len).is_some() {
nested_fences.pop();
push_line(&mut content, line);
cursor += 1;
continue;
}
} else if directive_container_closing_fence(trimmed, fence_len).is_some() {
let label = label_source
.map(|source| {
parse_inlines(source, label_base, options, definitions, diagnostics)
})
.unwrap_or_default();
let children = parse_blocks(
&content,
lines[index + 1].start,
false,
options,
definitions,
diagnostics,
);
return Some((
Block::ContainerDirective(ContainerDirective {
meta: NodeMeta::new(Some(Span::new(
lines[index].start,
lines[cursor].end_with_eol,
))),
name,
label,
attributes,
children,
}),
cursor + 1,
));
}
if let Some((nested_len, nested_rest)) = directive_container_opener_prefix(trimmed) {
if parse_directive_opener(nested_rest).is_some() {
nested_fences.push(nested_len);
}
}
}
push_line(&mut content, line);
cursor += 1;
}
diagnostics.push(Diagnostic::new(
DiagnosticSeverity::Error,
DiagnosticCode::UnclosedDirectiveContainer,
Span::new(lines[index].start, lines[index].end),
"container directive is missing a closing fence",
));
Some((
Block::ContainerDirective(ContainerDirective {
meta: NodeMeta::new(Some(Span::new(
lines[index].start,
lines.last()?.end_with_eol,
))),
name,
label: label_source
.map(|source| parse_inlines(source, label_base, options, definitions, diagnostics))
.unwrap_or_default(),
attributes,
children: parse_blocks(
&content,
lines
.get(index + 1)
.map(|line| line.start)
.unwrap_or(lines[index].end),
false,
options,
definitions,
diagnostics,
),
}),
lines.len(),
))
}
fn directive_container_opener_prefix(input: &str) -> Option<(usize, &str)> {
let fence_len = input
.as_bytes()
.iter()
.take_while(|byte| **byte == b':')
.count();
if fence_len >= 3 {
Some((fence_len, &input[fence_len..]))
} else {
None
}
}
fn directive_container_closing_fence(input: &str, min_len: usize) -> Option<usize> {
let fence_len = input
.as_bytes()
.iter()
.take_while(|byte| **byte == b':')
.count();
if fence_len >= min_len && input[fence_len..].trim().is_empty() {
Some(fence_len)
} else {
None
}
}
fn parse_math_block(
lines: &[Line<'_>],
index: usize,
options: &SyntaxOptions,
) -> Option<(Block, usize)> {
if !options.constructs.math_block {
return None;
}
let opener = trim_up_to_three_spaces(lines[index].text)?;
let fence_length = math_block_fence_length(opener)?;
let opening_indent = leading_indent_columns(lines[index].text);
let mut value = String::new();
let mut content_lines = 0usize;
let mut cursor = index + 1;
while cursor < lines.len() {
if let Some(close_line) = trim_up_to_three_spaces(lines[cursor].text) {
if math_block_fence_closes(close_line, fence_length) {
return Some((
Block::MathBlock(MathBlock {
meta: NodeMeta::new(Some(Span::new(
lines[index].start,
lines[cursor].end_with_eol,
))),
value,
}),
cursor + 1,
));
}
}
if content_lines > 0 {
ensure_line_separator(&mut value);
}
let stripped = strip_leading_indent_columns(lines[cursor].text, opening_indent);
value.push_str(&stripped);
value.push_str(lines[cursor].eol);
content_lines += 1;
cursor += 1;
}
Some((
Block::MathBlock(MathBlock {
meta: NodeMeta::new(Some(Span::new(
lines[index].start,
lines.last()?.end_with_eol,
))),
value,
}),
lines.len(),
))
}
fn math_block_fence_length(input: &str) -> Option<usize> {
let length = input
.as_bytes()
.iter()
.take_while(|byte| **byte == b'$')
.count();
if length < 2 || input[length..].contains('$') {
return None;
}
Some(length)
}
fn math_block_fence_closes(input: &str, length: usize) -> bool {
let count = input
.as_bytes()
.iter()
.take_while(|byte| **byte == b'$')
.count();
count >= length && input[count..].trim().is_empty()
}
fn parse_fenced_code(
lines: &[Line<'_>],
index: usize,
options: &SyntaxOptions,
) -> Option<(Block, usize)> {
let line = fence_line(lines[index].text, options)?;
let (marker, length) = fence_start(line)?;
let opening_indent = leading_indent_columns(lines[index].text);
let info = line[length..].trim();
if marker == FenceMarker::Backtick && info.contains('`') {
return None;
}
let info = if info.is_empty() {
None
} else {
Some(unescape_string(info))
};
let mut value = String::new();
let mut content_lines = 0usize;
let mut cursor = index + 1;
while cursor < lines.len() {
if let Some(close_line) = fence_line(lines[cursor].text, options) {
if fence_close(close_line, marker, length) {
return Some((
Block::CodeBlock(CodeBlock {
meta: NodeMeta::new(Some(Span::new(
lines[index].start,
lines[cursor].end_with_eol,
))),
kind: CodeBlockKind::Fenced { marker, length },
info,
value,
}),
cursor + 1,
));
}
}
if content_lines > 0 {
ensure_line_separator(&mut value);
}
let stripped = strip_leading_indent_columns(lines[cursor].text, opening_indent);
value.push_str(&stripped);
value.push_str(lines[cursor].eol);
content_lines += 1;
cursor += 1;
}
Some((
Block::CodeBlock(CodeBlock {
meta: NodeMeta::new(Some(Span::new(
lines[index].start,
lines.last()?.end_with_eol,
))),
kind: CodeBlockKind::Fenced { marker, length },
info,
value,
}),
lines.len(),
))
}
fn fence_line<'a>(line: &'a str, options: &SyntaxOptions) -> Option<&'a str> {
if options.constructs.indented_code {
trim_up_to_three_spaces(line)
} else {
Some(trim_ascii_start(line))
}
}
fn container_closed_after_unclosed_fence(
lines: &[Line<'_>],
cursor: usize,
last_content_index: usize,
content: &str,
options: &SyntaxOptions,
) -> bool {
!lines[last_content_index].eol.is_empty()
&& (cursor >= lines.len() || lines[cursor].text.trim().is_empty())
&& content_has_unclosed_fenced_code(content, options)
}
fn content_has_unclosed_fenced_code(content: &str, options: &SyntaxOptions) -> bool {
let lines = collect_lines(content, 0);
let mut open_fence = None;
for line in lines {
let Some(trimmed) = fence_line(line.text, options) else {
continue;
};
if let Some((marker, length, has_nonblank_content)) = open_fence {
if fence_close(trimmed, marker, length) {
open_fence = None;
} else {
open_fence = Some((
marker,
length,
has_nonblank_content || !trimmed.trim().is_empty(),
));
}
continue;
}
let Some((marker, length)) = fence_start(trimmed) else {
continue;
};
let info = trimmed[length..].trim();
if marker != FenceMarker::Backtick || !info.contains('`') {
open_fence = Some((marker, length, false));
}
}
open_fence.is_some_and(|(_, _, has_nonblank_content)| !has_nonblank_content)
}
fn block_quote_content_paragraph_open(content: &str, options: &SyntaxOptions) -> bool {
let Some(trimmed) = trim_up_to_three_spaces(content) else {
return false;
};
if trimmed.is_empty() {
return false;
}
if let Some(rest) = trimmed.strip_prefix('>') {
let rest = rest.strip_prefix(' ').unwrap_or(rest);
return block_quote_content_paragraph_open(rest, options);
}
if let Some(marker) = list_marker_info(trimmed) {
let first_content = list_marker_first_content(trimmed, marker);
return block_quote_content_paragraph_open(&first_content, options);
}
!lazy_line_starts_block(trimmed, options)
}
fn lazy_line_starts_block(input: &str, options: &SyntaxOptions) -> bool {
likely_block_start(input, options)
|| (options.constructs.html_block && line_starts_html_block(input))
|| trim_up_to_three_spaces(input).is_some_and(|t| t.starts_with('`') || t.starts_with('~'))
}
fn parse_block_quote(
lines: &[Line<'_>],
index: usize,
options: &SyntaxOptions,
definitions: &[String],
diagnostics: &mut Vec<Diagnostic>,
) -> Option<(Block, usize)> {
if !trim_up_to_three_spaces(lines[index].text)?.starts_with('>') {
return None;
}
let mut content = String::new();
let mut lazy_flags: Vec<bool> = Vec::new();
let mut cursor = index;
let mut paragraph_open = false;
let mut in_table = false;
let mut last_content_line: Option<String> = None;
let mut content_base_offset = None;
while cursor < lines.len() {
let raw = lines[cursor].text;
let trimmed_opt = trim_up_to_three_spaces(raw);
let marked = trimmed_opt.is_some_and(|trimmed| trimmed.starts_with('>'));
let quote_rest_owned: String;
if let Some(trimmed) = trimmed_opt {
if trimmed.is_empty() {
break;
}
}
let (line, line_start) = if marked {
let trimmed = trimmed_opt.expect("marked implies a trimmed line");
let trimmed_start = lines[cursor].start + (raw.len() - trimmed.len());
let mut rest_start = 1;
let mut rest = &trimmed[rest_start..];
if rest.starts_with(' ') {
rest_start += 1;
rest = &rest[1..];
} else if rest.starts_with('\t') {
let marker_end_column = leading_indent_columns(raw) + 1;
match strip_leading_indent_columns_from(rest, 1, marker_end_column) {
Cow::Borrowed(stripped) => rest = stripped,
Cow::Owned(stripped) => {
quote_rest_owned = stripped;
rest = "e_rest_owned;
}
}
}
(rest, trimmed_start + rest_start)
} else if in_table {
break;
} else if paragraph_open && !lazy_line_starts_block(raw, options) {
(raw, lines[cursor].start)
} else {
break;
};
let mut escaped_lazy = String::new();
let line = if !marked
&& last_content_line.as_deref().is_some_and(|previous| {
table_can_start_source(
previous,
line,
options.constructs.indented_code,
options.constructs.spoiler,
)
}) {
escaped_lazy.push_str(line);
if let Some(offset) = escaped_lazy.find('-') {
escaped_lazy.insert(offset, '\\');
}
&escaped_lazy
} else {
line
};
let starts_table = last_content_line.as_deref().is_some_and(|previous| {
table_can_start_source(
previous,
line,
options.constructs.indented_code,
options.constructs.spoiler,
)
});
if marked && starts_table {
paragraph_open = false;
in_table = true;
} else if marked && in_table && block_quote_table_body_row(line, options) {
paragraph_open = false;
} else {
in_table = false;
paragraph_open = block_quote_content_paragraph_open(line, options);
}
last_content_line = Some(line.into());
if content_base_offset.is_none() {
content_base_offset = Some(line_start);
}
push_line(&mut content, line);
lazy_flags.push(!marked);
cursor += 1;
}
let span = Span::new(lines[index].start, lines[cursor - 1].end_with_eol);
let child_base_offset = content_base_offset.unwrap_or(lines[index].start);
if !lines[cursor - 1].eol.is_empty() && !ends_with_line_ending(&content) {
content.push_str(lines[cursor - 1].eol);
}
if container_closed_after_unclosed_fence(lines, cursor, cursor - 1, &content, options) {
content.push('\n');
}
if let Some(alert) = parse_alert_from_block_quote(
&content,
child_base_offset,
span,
options,
definitions,
diagnostics,
) {
return Some((alert, cursor));
}
let mut child_lines = collect_lines(&content, child_base_offset);
for (child, &lazy) in child_lines.iter_mut().zip(lazy_flags.iter()) {
child.lazy = lazy;
}
let children = parse_blocks_from_lines(&child_lines, false, options, definitions, diagnostics);
Some((
Block::BlockQuote(BlockQuote {
meta: NodeMeta::new(Some(span)),
children,
}),
cursor,
))
}
fn parse_alert_from_block_quote(
content: &str,
base_offset: usize,
span: Span,
options: &SyntaxOptions,
definitions: &[String],
diagnostics: &mut Vec<Diagnostic>,
) -> Option<Block> {
if !options.constructs.gfm_alert {
return None;
}
let (first_line, rest) = content.split_once('\n').unwrap_or((content, ""));
let (kind, title) = parse_alert_marker(first_line)?;
let rest_base_offset = base_offset + first_line.len() + usize::from(!rest.is_empty());
let children = if rest.is_empty() {
Vec::new()
} else {
parse_blocks(
rest,
rest_base_offset,
false,
options,
definitions,
diagnostics,
)
};
Some(Block::Alert(Alert {
meta: NodeMeta::new(Some(span)),
kind,
title,
children,
}))
}
fn parse_alert_marker(line: &str) -> Option<(AlertKind, Option<String>)> {
let close = line.find(']')?;
let marker = line.get(0..close + 1)?;
if !marker.starts_with("[!") {
return None;
}
let kind = match &marker[2..close].to_ascii_lowercase()[..] {
"note" => AlertKind::Note,
"tip" => AlertKind::Tip,
"important" => AlertKind::Important,
"warning" => AlertKind::Warning,
"caution" => AlertKind::Caution,
_ => return None,
};
let title = line[close + 1..].trim();
Some((
kind,
if title.is_empty() {
None
} else {
Some(title.into())
},
))
}
fn block_quote_table_body_row(line: &str, options: &SyntaxOptions) -> bool {
table_indent_line(line, options.constructs.indented_code).is_some_and(|row| {
!row.trim().is_empty() && contains_unescaped_pipe(row, options.constructs.spoiler)
})
}
fn parse_list(
lines: &[Line<'_>],
index: usize,
options: &SyntaxOptions,
definitions: &[String],
diagnostics: &mut Vec<Diagnostic>,
) -> Option<(Block, usize)> {
let first_marker = list_marker_info(lines[index].text)?;
let mut items = Vec::new();
let mut cursor = index;
let mut tight = true;
while cursor < lines.len() {
if parse_thematic_break(lines[cursor]).is_some() {
break;
}
let Some(marker) = list_marker_info(lines[cursor].text) else {
break;
};
if !same_list_marker(first_marker, marker) {
break;
}
let item_start = cursor;
let mut item_end = cursor;
let mut item_tight = true;
let mut item_blank_offsets: Vec<usize> = Vec::new();
let mut content = String::new();
let mut lazy_flags: Vec<bool> = Vec::new();
let mut open_fence = None;
let first_content = list_marker_first_content(lines[cursor].text, marker);
let mut last_content_line: Option<String> = Some(first_content.as_ref().into());
let mut paragraph_open = list_item_paragraph_stays_open(None, &first_content, options);
let mut item_started_blank = first_content.trim().is_empty();
push_line(&mut content, &first_content);
lazy_flags.push(false);
update_list_item_fence(&first_content, &mut open_fence);
cursor += 1;
while cursor < lines.len() {
if lines[cursor].text.trim().is_empty() {
if open_fence.is_some() {
let stripped = strip_list_continuation(
lines[cursor].text,
marker.content_indent,
first_marker.indent,
);
push_line(&mut content, &stripped);
lazy_flags.push(false);
update_list_item_fence(&stripped, &mut open_fence);
item_end = cursor;
cursor += 1;
continue;
}
let next = next_nonblank_line(lines, cursor + 1);
if item_started_blank
|| next >= lines.len()
|| sibling_list_marker_at_line(
lines[next].text,
first_marker,
marker.content_indent,
)
|| leading_indent_columns(lines[next].text) < marker.content_indent
{
if next < lines.len()
&& sibling_list_marker_at_line(
lines[next].text,
first_marker,
marker.content_indent,
)
{
item_tight = false;
}
cursor = next;
break;
}
item_blank_offsets.push(content.len() + usize::from(!content.is_empty()));
paragraph_open = false;
push_line(&mut content, "");
lazy_flags.push(false);
item_end = cursor;
cursor += 1;
continue;
}
item_started_blank = false;
if sibling_list_marker_at_line(lines[cursor].text, first_marker, marker.content_indent)
{
break;
}
if leading_indent_columns(lines[cursor].text) < marker.content_indent
&& !same_list_marker_line(lines[cursor].text, first_marker)
&& list_marker_info(lines[cursor].text).is_some()
{
break;
}
if leading_indent_columns(lines[cursor].text) < marker.content_indent {
if likely_block_start(lines[cursor].text, options) || !paragraph_open {
break;
}
}
let lazy = paragraph_open
&& leading_indent_columns(lines[cursor].text) < marker.content_indent;
let stripped = strip_list_continuation(
lines[cursor].text,
marker.content_indent,
first_marker.indent,
);
let starts_table = last_content_line.as_deref().is_some_and(|previous| {
table_can_start_source(
previous,
&stripped,
options.constructs.indented_code,
options.constructs.spoiler,
)
});
paragraph_open = if starts_table {
false
} else {
list_item_paragraph_stays_open(Some(paragraph_open), &stripped, options)
};
push_line(&mut content, &stripped);
lazy_flags.push(lazy);
update_list_item_fence(&stripped, &mut open_fence);
last_content_line = Some(stripped.into_owned());
item_end = cursor;
cursor += 1;
}
let child_base = lines[item_start].start + marker.content_indent;
if !lines[item_end].eol.is_empty() && !ends_with_line_ending(&content) {
content.push_str(lines[item_end].eol);
}
if container_closed_after_unclosed_fence(lines, cursor, item_end, &content, options) {
content.push('\n');
}
let mut child_lines = collect_lines(&content, child_base);
for (child, &lazy) in child_lines.iter_mut().zip(lazy_flags.iter()) {
child.lazy = lazy;
}
let mut children =
parse_blocks_from_lines(&child_lines, false, options, definitions, diagnostics);
let checked = if options.constructs.gfm_task_list_item {
take_task_marker_from_children(&mut children)
} else {
None
};
if item_tight
&& blank_separates_top_level_blocks(&item_blank_offsets, &children, child_base)
{
item_tight = false;
}
tight = tight && item_tight;
items.push(ListItem {
meta: NodeMeta::new(Some(Span::new(
lines[item_start].start,
lines[item_end].end_with_eol,
))),
checked,
children,
});
}
Some((
Block::List(List {
meta: NodeMeta::new(Some(Span::new(
lines[index].start,
lines[cursor - 1].end_with_eol,
))),
ordered: first_marker.ordered,
start: first_marker.start,
delimiter: first_marker.delimiter,
tight,
children: items,
}),
cursor,
))
}
fn blank_separates_top_level_blocks(
blank_offsets: &[usize],
children: &[Block],
child_base: usize,
) -> bool {
if blank_offsets.is_empty() || children.len() < 2 {
return false;
}
let Some(&first_blank) = blank_offsets.iter().min() else {
return false;
};
children.iter().any(|child| {
block_span(child).is_some_and(|span| span.start.saturating_sub(child_base) > first_blank)
})
}
fn block_span(block: &Block) -> Option<Span> {
let meta = match block {
Block::Paragraph(node) => &node.meta,
Block::Heading(node) => &node.meta,
Block::ThematicBreak(node) => &node.meta,
Block::BlockQuote(node) => &node.meta,
Block::Alert(node) => &node.meta,
Block::List(node) => &node.meta,
Block::DescriptionList(node) => &node.meta,
Block::CodeBlock(node) => &node.meta,
Block::HtmlBlock(node) => &node.meta,
Block::Definition(node) => &node.meta,
Block::FootnoteDefinition(node) => &node.meta,
Block::Table(node) => &node.meta,
Block::MathBlock(node) => &node.meta,
Block::Frontmatter(node) => &node.meta,
Block::MdxEsm(node) => &node.meta,
Block::MdxExpression(node) => &node.meta,
Block::MdxJsx(node) => &node.meta,
Block::LeafDirective(node) => &node.meta,
Block::ContainerDirective(node) => &node.meta,
};
meta.span
}
fn list_item_paragraph_stays_open(
previous_open: Option<bool>,
line: &str,
options: &SyntaxOptions,
) -> bool {
if line.trim().is_empty() {
return false;
}
if previous_open == Some(false) {
return false;
}
block_quote_content_paragraph_open(line, options)
}
fn parse_description_list(
lines: &[Line<'_>],
index: usize,
options: &SyntaxOptions,
definitions: &[String],
diagnostics: &mut Vec<Diagnostic>,
) -> Option<(Block, usize)> {
if !options.constructs.description_list || !is_description_term_line(lines[index].text, options)
{
return None;
}
let mut cursor = index;
let mut items = Vec::new();
let mut tight = true;
let mut list_end = lines[index].end_with_eol;
while cursor < lines.len() {
if !is_description_term_line(lines[cursor].text, options) {
break;
}
let Some(term) = description_term(lines, cursor, options) else {
break;
};
let term_line = lines[cursor];
let mut details = Vec::new();
let item_start = term_line.start;
let mut item_end = lines[term.term_end].end_with_eol;
tight = tight && !term.blank_after_term;
cursor = term.marker_index;
loop {
let Some(marker) = description_marker(lines[cursor].text) else {
break;
};
let (detail, next, detail_tight) = parse_description_details(
lines,
cursor,
marker,
options,
definitions,
diagnostics,
)?;
tight = tight && detail_tight;
item_end = detail
.meta
.span
.map(|span| span.end)
.unwrap_or(lines[cursor].end_with_eol);
details.push(detail);
cursor = next;
let next_nonblank = next_nonblank_line(lines, cursor);
if next_nonblank < lines.len()
&& description_marker(lines[next_nonblank].text).is_some()
{
if next_nonblank != cursor {
tight = false;
}
cursor = next_nonblank;
continue;
}
break;
}
if details.is_empty() {
return None;
}
list_end = item_end;
items.push(DescriptionItem {
meta: NodeMeta::new(Some(Span::new(item_start, item_end))),
term: parse_inlines(
&term.source,
term.source_offset,
options,
definitions,
diagnostics,
),
details,
});
let next_item = next_nonblank_line(lines, cursor);
if next_item >= lines.len() {
cursor = next_item;
break;
}
if description_term(lines, next_item, options).is_some() {
if next_item != cursor {
tight = false;
}
cursor = next_item;
continue;
}
cursor = next_item;
break;
}
(!items.is_empty()).then_some((
Block::DescriptionList(DescriptionList {
meta: NodeMeta::new(Some(Span::new(lines[index].start, list_end))),
tight,
children: items,
}),
cursor,
))
}
fn parse_description_details(
lines: &[Line<'_>],
index: usize,
marker: DescriptionMarker<'_>,
options: &SyntaxOptions,
definitions: &[String],
diagnostics: &mut Vec<Diagnostic>,
) -> Option<(DescriptionDetails, usize, bool)> {
let mut content = String::new();
push_line(&mut content, marker.content);
let mut cursor = index + 1;
let mut end = lines[index].end_with_eol;
let mut tight = true;
let mut paragraph_open = paragraph_stays_open(marker.content, options);
while cursor < lines.len() {
if lines[cursor].text.trim().is_empty() {
let next = next_nonblank_line(lines, cursor + 1);
if next >= lines.len() || description_term(lines, next, options).is_some() {
cursor = next;
break;
}
if description_marker(lines[next].text).is_some() {
tight = false;
cursor = next;
break;
}
if strip_indent_continuation(lines[next].text).is_none() {
break;
}
push_line(&mut content, "");
paragraph_open = false;
tight = false;
end = lines[cursor].end_with_eol;
cursor += 1;
continue;
}
if description_marker(lines[cursor].text).is_some()
|| description_term(lines, cursor, options).is_some()
{
break;
}
let continuation = if let Some(continuation) = strip_indent_continuation(lines[cursor].text)
{
continuation
} else if paragraph_open && !likely_block_start(lines[cursor].text, options) {
trim_ascii_start(lines[cursor].text)
} else {
break;
};
paragraph_open = paragraph_stays_open(continuation, options);
push_line(&mut content, continuation);
end = lines[cursor].end_with_eol;
cursor += 1;
}
if content.trim().is_empty() {
return None;
}
Some((
DescriptionDetails {
meta: NodeMeta::new(Some(Span::new(lines[index].start, end))),
children: parse_blocks(
&content,
lines[index].start + marker.content_offset,
false,
options,
definitions,
diagnostics,
),
},
cursor,
tight,
))
}
fn description_term(
lines: &[Line<'_>],
term_index: usize,
options: &SyntaxOptions,
) -> Option<DescriptionTerm> {
if term_index >= lines.len() || !is_description_term_line(lines[term_index].text, options) {
return None;
}
let mut source = String::new();
let mut term_end = term_index;
let mut cursor = term_index;
while cursor < lines.len() && is_description_term_line(lines[cursor].text, options) {
if !source.is_empty() {
source.push('\n');
}
source.push_str(trim_ascii_start(lines[cursor].text).trim_end());
term_end = cursor;
cursor += 1;
}
let mut marker_index = cursor;
let mut blank_after_term = false;
while marker_index < lines.len() && lines[marker_index].text.trim().is_empty() {
blank_after_term = true;
marker_index += 1;
}
(marker_index < lines.len() && description_marker(lines[marker_index].text).is_some()).then(
|| DescriptionTerm {
marker_index,
term_end,
blank_after_term,
source,
source_offset: lines[term_index].start + leading_trim_bytes(lines[term_index].text),
},
)
}
fn is_description_term_line(line: &str, options: &SyntaxOptions) -> bool {
leading_indent_columns(line) <= 3
&& !line.trim().is_empty()
&& description_marker(line).is_none()
&& !likely_block_start(line, options)
}
fn description_marker(line: &str) -> Option<DescriptionMarker<'_>> {
let (columns, bytes) = leading_indent(line);
if columns > 2 || !matches!(line.as_bytes().get(bytes), Some(b':' | b'~')) {
return None;
}
if line
.as_bytes()
.get(bytes + 1)
.is_some_and(|byte| !matches!(*byte, b' ' | b'\t'))
{
return None;
}
let mut content_offset = bytes + 1;
while line
.as_bytes()
.get(content_offset)
.is_some_and(|byte| matches!(*byte, b' ' | b'\t'))
{
content_offset += 1;
}
Some(DescriptionMarker {
content_offset,
content: &line[content_offset..],
})
}
fn paragraph_stays_open(line: &str, options: &SyntaxOptions) -> bool {
!line.trim().is_empty() && !likely_block_start(line, options)
}
fn strip_indent_continuation(input: &str) -> Option<&str> {
input
.strip_prefix(" ")
.or_else(|| input.strip_prefix('\t'))
}
fn parse_atx_heading(
line: Line<'_>,
options: &SyntaxOptions,
definitions: &[String],
) -> Option<Block> {
let text = trim_up_to_three_spaces(line.text)?;
let depth = text
.as_bytes()
.iter()
.take_while(|byte| **byte == b'#')
.count();
if depth == 0 || depth > 6 {
return None;
}
if text
.as_bytes()
.get(depth)
.is_some_and(|byte| !matches!(*byte, b' ' | b'\t'))
&& text.len() != depth
{
return None;
}
let after_opening = &text[depth..];
let content_start_in_text = depth + leading_trim_bytes(after_opening);
let content = trim_closing_hashes(after_opening.trim_start());
let content_start = line.start + (line.text.len() - text.len()) + content_start_in_text;
Some(Block::Heading(Heading {
meta: NodeMeta::new(Some(Span::new(line.start, line.end))),
depth: depth as u8,
kind: HeadingKind::Atx,
children: parse_inlines(
content,
content_start,
options,
definitions,
&mut Vec::new(),
),
}))
}
fn parse_thematic_break(line: Line<'_>) -> Option<Block> {
let text = trim_up_to_three_spaces(line.text)?.trim();
let mut marker = None;
let mut count = 0;
for char in text.chars() {
if char == ' ' || char == '\t' {
continue;
}
let current = match char {
'-' => ThematicBreakMarker::Dash,
'*' => ThematicBreakMarker::Asterisk,
'_' => ThematicBreakMarker::Underscore,
_ => return None,
};
if marker.is_some_and(|marker| marker != current) {
return None;
}
marker = Some(current);
count += 1;
}
if count >= 3 {
Some(Block::ThematicBreak(ThematicBreak {
meta: NodeMeta::new(Some(Span::new(line.start, line.end))),
marker: marker?,
}))
} else {
None
}
}
fn parse_definition(
lines: &[Line<'_>],
index: usize,
options: &SyntaxOptions,
allow_subsequent_indent: bool,
) -> Option<(Block, usize)> {
let line = lines[index];
let text = trim_definition_start(line.text, allow_subsequent_indent)?;
if !text.starts_with('[') {
return None;
}
let mut accumulated = String::from(text);
let mut label_end_line = index;
let close = loop {
if let Some(close) = find_reference_label_end(&accumulated, 0) {
if accumulated.as_bytes().get(close + 1) == Some(&b':') {
break close;
}
return None;
}
let next = label_end_line + 1;
if next >= lines.len() || lines[next].text.trim().is_empty() {
return None;
}
if likely_block_start(lines[next].text, options)
|| setext_underline_depth(lines[next].text).is_some()
|| table_can_start(lines, next, options)
{
return None;
}
accumulated.push('\n');
accumulated.push_str(lines[next].text);
label_end_line = next;
};
let label = String::from(&accumulated[1..close]);
if normalize_label(&label).is_empty() {
return None;
}
let label = label.as_str();
let mut source = String::from(&accumulated[close + 2..]);
let mut cursor = label_end_line;
let mut best_without_title = None;
loop {
if let Some(resource) = parse_definition_destination_title(&source) {
if resource.title.is_some() {
return Some((
Block::Definition(Definition {
meta: NodeMeta::new(Some(Span::new(
line.start,
lines[cursor].end_with_eol,
))),
label: label.into(),
identifier: normalize_label(label),
destination: resource.destination,
destination_kind: resource.destination_kind,
title: resource.title,
title_kind: resource.title_kind,
}),
cursor + 1,
));
}
best_without_title = Some((resource, cursor + 1));
let next = cursor + 1;
if next >= lines.len()
|| lines[next].text.trim().is_empty()
|| !line_can_start_definition_title(lines[next].text)
{
break;
}
}
let next = cursor + 1;
if next >= lines.len() || lines[next].text.trim().is_empty() {
break;
}
if likely_block_start(lines[next].text, options)
|| setext_underline_depth(lines[next].text).is_some()
{
break;
}
source.push('\n');
source.push_str(lines[next].text);
cursor = next;
}
let (resource, next) = best_without_title?;
let end = lines[next - 1].end_with_eol;
Some((
Block::Definition(Definition {
meta: NodeMeta::new(Some(Span::new(line.start, end))),
label: label.into(),
identifier: normalize_label(label),
destination: resource.destination,
destination_kind: resource.destination_kind,
title: resource.title,
title_kind: resource.title_kind,
}),
next,
))
}
fn trim_definition_start(input: &str, allow_subsequent_indent: bool) -> Option<&str> {
if let Some(trimmed) = trim_up_to_three_spaces(input) {
return Some(trimmed);
}
if allow_subsequent_indent {
let (columns, bytes) = leading_indent(input);
if columns == 4 {
return Some(&input[bytes..]);
}
}
None
}
fn parse_footnote_definition(
lines: &[Line<'_>],
index: usize,
options: &SyntaxOptions,
definitions: &[String],
diagnostics: &mut Vec<Diagnostic>,
) -> Option<(Block, usize)> {
if !options.constructs.footnote_definition {
return None;
}
let line = lines[index];
let text = line.text.trim();
if !text.starts_with("[^") {
return None;
}
let close = find_footnote_definition_label_end(text)?;
let label = &text[2..close];
if !is_footnote_label(label) {
return None;
}
let rest = text[close + 2..].trim();
let mut content = String::new();
push_line(&mut content, rest);
let mut cursor = index + 1;
let mut end = line.end_with_eol;
let mut paragraph_open = paragraph_stays_open(rest, options);
while cursor < lines.len() {
if lines[cursor].text.trim().is_empty() {
let next = next_nonblank_line(lines, cursor + 1);
if next >= lines.len() || !is_footnote_continuation(lines[next].text) {
break;
}
push_line(&mut content, "");
paragraph_open = false;
end = lines[cursor].end_with_eol;
cursor += 1;
continue;
}
let continuation = if let Some(continuation) = strip_indent_continuation(lines[cursor].text)
{
continuation
} else if paragraph_open && !likely_block_start(lines[cursor].text, options) {
trim_ascii_start(lines[cursor].text)
} else {
break;
};
paragraph_open = paragraph_stays_open(continuation, options);
push_line(&mut content, continuation);
end = lines[cursor].end_with_eol;
cursor += 1;
}
Some((
Block::FootnoteDefinition(FootnoteDefinition {
meta: NodeMeta::new(Some(Span::new(line.start, end))),
label: label.into(),
identifier: normalize_label(label),
children: parse_blocks(
&content,
line.end.saturating_sub(rest.len()),
false,
options,
definitions,
diagnostics,
),
}),
cursor,
))
}
fn is_footnote_continuation(input: &str) -> bool {
strip_indent_continuation(input).is_some()
}
fn parse_leaf_directive(
line: Line<'_>,
options: &SyntaxOptions,
definitions: &[String],
diagnostics: &mut Vec<Diagnostic>,
) -> Option<Block> {
if !options.constructs.directive_leaf {
return None;
}
let trimmed = line.text.trim_start();
if trimmed.starts_with(":::") || !trimmed.starts_with("::") {
return None;
}
let opener_base = line.start + (line.text.len() - trimmed.len()) + 2;
let Some((name, label_source, attributes, _)) = parse_directive_opener(&trimmed[2..]) else {
diagnostics.push(Diagnostic::new(
DiagnosticSeverity::Error,
DiagnosticCode::InvalidDirectiveName,
Span::new(line.start, line.end),
"leaf directive must have a valid name",
));
return None;
};
let label = label_source
.map(|source| {
parse_inlines(
source,
opener_base + name.len() + 1,
options,
definitions,
diagnostics,
)
})
.unwrap_or_default();
Some(Block::LeafDirective(LeafDirective {
meta: NodeMeta::new(Some(Span::new(line.start, line.end))),
name,
label,
attributes,
}))
}
fn parse_html_block(
lines: &[Line<'_>],
index: usize,
options: &SyntaxOptions,
) -> Option<(Block, usize)> {
if !options.constructs.html_block {
return None;
}
let trimmed = trim_up_to_three_spaces(lines[index].text)?;
let kind = html_block_start(trimmed)?;
let mut value = String::new();
let mut cursor = index;
match kind {
HtmlBlockKind::RawTag => {
while cursor < lines.len() {
push_line(&mut value, lines[cursor].text);
if ["script", "pre", "style", "textarea"]
.iter()
.any(|tag| line_contains_raw_closing_tag(lines[cursor].text, tag))
{
cursor += 1;
break;
}
cursor += 1;
}
}
HtmlBlockKind::BlockTag => {
while cursor < lines.len() && !lines[cursor].text.trim().is_empty() {
push_line(&mut value, lines[cursor].text);
cursor += 1;
}
}
HtmlBlockKind::Until(end) => {
while cursor < lines.len() {
push_line(&mut value, lines[cursor].text);
if lines[cursor].text.contains(end) {
cursor += 1;
break;
}
cursor += 1;
}
}
HtmlBlockKind::UntilBlank => {
while cursor < lines.len() && !lines[cursor].text.trim().is_empty() {
push_line(&mut value, lines[cursor].text);
cursor += 1;
}
}
}
Some((
Block::HtmlBlock(HtmlBlock {
meta: NodeMeta::new(Some(Span::new(
lines[index].start,
lines[cursor - 1].end_with_eol,
))),
value,
}),
cursor,
))
}
fn html_block_start(input: &str) -> Option<HtmlBlockKind> {
let trimmed = input.trim_end();
if !trimmed.starts_with('<') {
return None;
}
if raw_html_tag_start(trimmed) {
return Some(HtmlBlockKind::RawTag);
}
if trimmed.starts_with("<!--") {
return Some(HtmlBlockKind::Until("-->"));
}
if trimmed.starts_with("<?") {
return Some(HtmlBlockKind::Until("?>"));
}
if is_declaration_start(trimmed) {
return Some(HtmlBlockKind::Until(">"));
}
if trimmed.starts_with("<![CDATA[") {
return Some(HtmlBlockKind::Until("]]>"));
}
if html_block_tag_start(trimmed) {
return Some(HtmlBlockKind::BlockTag);
}
let Some((end, _tag_name)) = parse_html_tag(trimmed, 0) else {
return None;
};
let rest = trimmed[end..].trim();
if rest.is_empty() {
Some(HtmlBlockKind::UntilBlank)
} else {
None
}
}
pub(crate) fn line_starts_html_block(input: &str) -> bool {
trim_up_to_three_spaces(input)
.and_then(html_block_start)
.is_some()
}
fn raw_html_tag_start(input: &str) -> bool {
for tag in ["script", "pre", "style", "textarea"] {
if html_raw_open_tag_prefix(input, tag) {
return true;
}
}
false
}
fn html_raw_open_tag_prefix(input: &str, tag: &str) -> bool {
let Some(rest) = input.strip_prefix('<') else {
return false;
};
if rest.starts_with('/') || rest.len() < tag.len() {
return false;
}
let rest_bytes = rest.as_bytes();
let tag_bytes = tag.as_bytes();
if !rest_bytes
.get(..tag_bytes.len())
.is_some_and(|name| name.eq_ignore_ascii_case(tag_bytes))
{
return false;
}
match rest_bytes.get(tag.len()) {
None => true,
Some(b' ' | b'\t' | b'\n' | b'\r' | b'>') => true,
Some(b'/') => {
rest_bytes.get(tag.len() + 1) == Some(&b'>') && rest_bytes.get(tag.len() + 2).is_none()
}
_ => false,
}
}
fn line_contains_raw_closing_tag(input: &str, tag: &str) -> bool {
let bytes = input.as_bytes();
let tag_bytes = tag.as_bytes();
let mut cursor = 0;
while cursor + 2 + tag_bytes.len() <= bytes.len() {
let tag_start = cursor + 2;
let tag_end = tag_start + tag_bytes.len();
if bytes.get(cursor) == Some(&b'<')
&& bytes.get(cursor + 1) == Some(&b'/')
&& bytes
.get(tag_start..tag_end)
.is_some_and(|name| name.eq_ignore_ascii_case(tag_bytes))
{
match bytes.get(tag_end) {
Some(b'>') => return true,
Some(byte) if byte.is_ascii_whitespace() => {
let mut after_space = tag_end;
while bytes
.get(after_space)
.is_some_and(|byte| byte.is_ascii_whitespace())
{
after_space += 1;
}
if bytes.get(after_space) == Some(&b'>') {
return true;
}
}
_ => {}
}
}
cursor += 1;
}
false
}
fn html_block_tag_start(input: &str) -> bool {
let bytes = input.as_bytes();
if bytes.first() != Some(&b'<') {
return false;
}
let mut cursor = 1;
if bytes.get(cursor) == Some(&b'/') {
cursor += 1;
}
let name_start = cursor;
if !bytes
.get(cursor)
.is_some_and(|byte| byte.is_ascii_alphabetic())
{
return false;
}
cursor += 1;
while bytes.get(cursor).is_some_and(|byte| html_name_byte(*byte)) {
cursor += 1;
}
let name = &input[name_start..cursor];
if !html_block_tag(name) {
return false;
}
match bytes.get(cursor) {
None | Some(b' ' | b'\t' | b'\n' | b'\r' | b'>') => true,
Some(b'/') if bytes.get(cursor + 1) == Some(&b'>') => true,
_ => false,
}
}
fn html_block_tag(tag: &str) -> bool {
matches!(
tag.to_ascii_lowercase().as_str(),
"address"
| "article"
| "aside"
| "base"
| "basefont"
| "blockquote"
| "body"
| "caption"
| "center"
| "col"
| "colgroup"
| "dd"
| "details"
| "dialog"
| "dir"
| "div"
| "dl"
| "dt"
| "fieldset"
| "figcaption"
| "figure"
| "footer"
| "form"
| "frame"
| "frameset"
| "h1"
| "h2"
| "h3"
| "h4"
| "h5"
| "h6"
| "head"
| "header"
| "hr"
| "html"
| "iframe"
| "legend"
| "li"
| "link"
| "main"
| "menu"
| "menuitem"
| "nav"
| "noframes"
| "ol"
| "optgroup"
| "option"
| "p"
| "param"
| "search"
| "section"
| "summary"
| "table"
| "tbody"
| "td"
| "tfoot"
| "th"
| "thead"
| "title"
| "tr"
| "track"
| "ul"
)
}
fn is_declaration_start(input: &str) -> bool {
input
.as_bytes()
.get(2)
.is_some_and(|byte| input.starts_with("<!") && byte.is_ascii_alphabetic())
}
fn parse_mdx_flow(
lines: &[Line<'_>],
index: usize,
options: &SyntaxOptions,
diagnostics: &mut Vec<Diagnostic>,
) -> Option<(Block, usize)> {
if options.constructs.mdx_esm {
if let Some((block, next)) = parse_mdx_esm_flow(lines, index, diagnostics) {
return Some((block, next));
}
}
let line = lines[index];
let trimmed = line.text.trim_start();
if options.constructs.mdx_expression_block && trimmed.starts_with('{') {
let open_byte = line.text.len() - trimmed.len();
if let Some((close_line, close_byte)) = find_mdx_expression_close(lines, index, open_byte) {
return Some((
Block::MdxExpression(MdxExpression {
meta: NodeMeta::new(Some(Span::new(line.start, lines[close_line].end))),
value: collect_mdx_expression_value(
lines, index, open_byte, close_line, close_byte,
),
}),
close_line + 1,
));
}
diagnostics.push(Diagnostic::new(
DiagnosticSeverity::Error,
DiagnosticCode::InvalidMdx,
Span::new(line.start + open_byte, lines.last()?.end_with_eol),
"MDX expression block is missing a closing brace",
));
}
if options.constructs.mdx_jsx_block && trimmed.starts_with('<') {
if let Some(close_line) = find_mdx_jsx_close(lines, index) {
return Some((
Block::MdxJsx(MdxJsx {
meta: NodeMeta::new(Some(Span::new(line.start, lines[close_line].end))),
value: collect_line_range(lines, index, close_line),
}),
close_line + 1,
));
}
let start_byte = line.text.len() - trimmed.len();
if let Some(root) = mdx_jsx_tag_start(line.text, start_byte) {
if !root.closing {
if let Some((_tag_end_line, _tag_end_byte, self_closing)) =
find_mdx_jsx_tag_end(lines, index, start_byte)
{
if !self_closing {
diagnostics.push(Diagnostic::new(
DiagnosticSeverity::Error,
DiagnosticCode::InvalidMdx,
Span::new(line.start + start_byte, lines.last()?.end_with_eol),
"MDX JSX block is missing a closing tag",
));
}
}
}
}
}
None
}
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
struct MdxEsmState {
brace_depth: usize,
bracket_depth: usize,
paren_depth: usize,
block_comment: bool,
quote: Option<u8>,
escaped: bool,
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
enum MdxBraceState {
Normal,
SingleQuoted,
DoubleQuoted,
Template,
LineComment,
BlockComment,
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
enum MdxJsxTag<'a> {
Fragment,
Named(&'a str),
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
struct MdxJsxTagStart<'a> {
tag: MdxJsxTag<'a>,
closing: bool,
}
fn parse_mdx_esm_flow(
lines: &[Line<'_>],
index: usize,
diagnostics: &mut Vec<Diagnostic>,
) -> Option<(Block, usize)> {
if !is_mdx_esm_start(lines[index].text) {
return None;
}
let mut value = String::new();
let mut state = MdxEsmState::default();
let mut cursor = index;
while cursor < lines.len() {
let line = lines[cursor].text;
if cursor > index && !is_mdx_esm_continuation(line, &state) {
break;
}
if cursor > index {
value.push('\n');
}
value.push_str(line);
update_mdx_esm_state(line, &mut state);
cursor += 1;
}
if cursor >= lines.len() && state_has_open_mdx_esm_construct(&state) {
diagnostics.push(Diagnostic::new(
DiagnosticSeverity::Error,
DiagnosticCode::InvalidMdx,
Span::new(lines[index].start, lines[cursor - 1].end_with_eol),
"MDX ESM block is missing a closing delimiter",
));
}
Some((
Block::MdxEsm(MdxEsm {
meta: NodeMeta::new(Some(Span::new(lines[index].start, lines[cursor - 1].end))),
value,
}),
cursor,
))
}
fn is_mdx_esm_start(line: &str) -> bool {
line.starts_with("import ") || line.starts_with("export ")
}
fn is_mdx_esm_continuation(line: &str, state: &MdxEsmState) -> bool {
if state_has_open_mdx_esm_construct(state) {
return true;
}
let trimmed = line.trim_start();
if trimmed.is_empty() {
return false;
}
is_mdx_esm_start(line) || trimmed.starts_with("//") || trimmed.starts_with("/*")
}
fn state_has_open_mdx_esm_construct(state: &MdxEsmState) -> bool {
state.brace_depth > 0
|| state.bracket_depth > 0
|| state.paren_depth > 0
|| state.block_comment
|| state.quote == Some(b'`')
}
fn update_mdx_esm_state(line: &str, state: &mut MdxEsmState) {
let bytes = line.as_bytes();
let mut index = 0;
while index < bytes.len() {
let byte = bytes[index];
if state.block_comment {
if byte == b'*' && bytes.get(index + 1) == Some(&b'/') {
state.block_comment = false;
index += 1;
}
index += 1;
continue;
}
if let Some(delimiter) = state.quote {
if state.escaped {
state.escaped = false;
} else if byte == b'\\' {
state.escaped = true;
} else if byte == delimiter {
state.quote = None;
}
index += 1;
continue;
}
match byte {
b'\'' | b'"' | b'`' => state.quote = Some(byte),
b'/' if bytes.get(index + 1) == Some(&b'/') => break,
b'/' if bytes.get(index + 1) == Some(&b'*') => {
state.block_comment = true;
index += 1;
}
b'{' => state.brace_depth += 1,
b'}' => state.brace_depth = state.brace_depth.saturating_sub(1),
b'[' => state.bracket_depth += 1,
b']' => state.bracket_depth = state.bracket_depth.saturating_sub(1),
b'(' => state.paren_depth += 1,
b')' => state.paren_depth = state.paren_depth.saturating_sub(1),
_ => {}
}
index += 1;
}
}
fn find_mdx_expression_close(
lines: &[Line<'_>],
index: usize,
open_byte: usize,
) -> Option<(usize, usize)> {
let mut depth = 0usize;
let mut state = MdxBraceState::Normal;
let mut escaped = false;
let mut cursor = index;
while cursor < lines.len() {
let bytes = lines[cursor].text.as_bytes();
let mut byte_index = if cursor == index { open_byte } else { 0 };
while byte_index < bytes.len() {
let byte = bytes[byte_index];
match state {
MdxBraceState::Normal => match byte {
b'\'' => state = MdxBraceState::SingleQuoted,
b'"' => state = MdxBraceState::DoubleQuoted,
b'`' => state = MdxBraceState::Template,
b'/' if bytes.get(byte_index + 1) == Some(&b'/') => {
state = MdxBraceState::LineComment;
break;
}
b'/' if bytes.get(byte_index + 1) == Some(&b'*') => {
state = MdxBraceState::BlockComment;
byte_index += 1;
}
b'{' => depth += 1,
b'}' => {
depth = depth.checked_sub(1)?;
if depth == 0 {
return lines[cursor].text[byte_index + 1..]
.trim()
.is_empty()
.then_some((cursor, byte_index));
}
}
_ => {}
},
MdxBraceState::SingleQuoted => {
update_mdx_quote_state(byte, b'\'', &mut state, &mut escaped);
}
MdxBraceState::DoubleQuoted => {
update_mdx_quote_state(byte, b'"', &mut state, &mut escaped);
}
MdxBraceState::Template => {
update_mdx_quote_state(byte, b'`', &mut state, &mut escaped);
}
MdxBraceState::LineComment => break,
MdxBraceState::BlockComment => {
if byte == b'*' && bytes.get(byte_index + 1) == Some(&b'/') {
state = MdxBraceState::Normal;
byte_index += 1;
}
}
}
byte_index += 1;
}
if state == MdxBraceState::LineComment {
state = MdxBraceState::Normal;
}
cursor += 1;
}
None
}
fn update_mdx_quote_state(byte: u8, delimiter: u8, state: &mut MdxBraceState, escaped: &mut bool) {
if *escaped {
*escaped = false;
return;
}
if byte == b'\\' {
*escaped = true;
return;
}
if byte == delimiter {
*state = MdxBraceState::Normal;
}
}
fn find_mdx_expression_inline_close(input: &str, open_byte: usize) -> Option<usize> {
let bytes = input.as_bytes();
if bytes.get(open_byte) != Some(&b'{') {
return None;
}
let mut depth = 0usize;
let mut state = MdxBraceState::Normal;
let mut escaped = false;
let mut cursor = open_byte;
while cursor < bytes.len() {
let byte = bytes[cursor];
match state {
MdxBraceState::Normal => match byte {
b'\'' => state = MdxBraceState::SingleQuoted,
b'"' => state = MdxBraceState::DoubleQuoted,
b'`' => state = MdxBraceState::Template,
b'/' if bytes.get(cursor + 1) == Some(&b'/') => {
state = MdxBraceState::LineComment;
cursor += 1;
}
b'/' if bytes.get(cursor + 1) == Some(&b'*') => {
state = MdxBraceState::BlockComment;
cursor += 1;
}
b'{' => depth += 1,
b'}' => {
depth = depth.checked_sub(1)?;
if depth == 0 {
return Some(cursor);
}
}
_ => {}
},
MdxBraceState::SingleQuoted => {
update_mdx_quote_state(byte, b'\'', &mut state, &mut escaped);
}
MdxBraceState::DoubleQuoted => {
update_mdx_quote_state(byte, b'"', &mut state, &mut escaped);
}
MdxBraceState::Template => {
update_mdx_quote_state(byte, b'`', &mut state, &mut escaped);
}
MdxBraceState::LineComment => {
if byte == b'\n' {
state = MdxBraceState::Normal;
}
}
MdxBraceState::BlockComment => {
if byte == b'*' && bytes.get(cursor + 1) == Some(&b'/') {
state = MdxBraceState::Normal;
cursor += 1;
}
}
}
cursor += 1;
}
None
}
fn collect_mdx_expression_value(
lines: &[Line<'_>],
start_line: usize,
open_byte: usize,
close_line: usize,
close_byte: usize,
) -> String {
let mut value = String::new();
let mut cursor = start_line;
while cursor <= close_line {
if cursor > start_line {
value.push('\n');
}
let line = lines[cursor].text;
let segment = if cursor == start_line && cursor == close_line {
&line[open_byte + 1..close_byte]
} else if cursor == start_line {
&line[open_byte + 1..]
} else if cursor == close_line {
&line[..close_byte]
} else {
line
};
value.push_str(segment);
cursor += 1;
}
value
}
fn find_mdx_jsx_close<'a>(lines: &'a [Line<'a>], index: usize) -> Option<usize> {
let line = lines[index];
let trimmed = line.text.trim_start();
let start_byte = line.text.len() - trimmed.len();
let root = mdx_jsx_tag_start(line.text, start_byte)?;
if root.closing {
return None;
}
let (mut cursor_line, mut cursor_byte, self_closing) =
find_mdx_jsx_tag_end(lines, index, start_byte)?;
if self_closing {
return Some(cursor_line);
}
let mut depth = 1usize;
cursor_byte += 1;
'scan: while cursor_line < lines.len() {
let line = lines[cursor_line].text;
while cursor_byte < line.len() {
let Some(relative_start) = line[cursor_byte..].find('<') else {
break;
};
let tag_start_byte = cursor_byte + relative_start;
let Some(candidate) = mdx_jsx_tag_start(line, tag_start_byte) else {
cursor_byte = tag_start_byte + 1;
continue;
};
let Some((tag_end_line, tag_end_byte, candidate_self_closing)) =
find_mdx_jsx_tag_end(lines, cursor_line, tag_start_byte)
else {
return None;
};
if mdx_jsx_tag_matches(root.tag, candidate.tag) {
if candidate.closing {
depth = depth.saturating_sub(1);
if depth == 0 {
return Some(tag_end_line);
}
} else if !candidate_self_closing {
depth += 1;
}
}
cursor_byte = tag_end_byte + 1;
if tag_end_line != cursor_line {
cursor_line = tag_end_line;
continue 'scan;
}
}
cursor_line += 1;
cursor_byte = 0;
}
None
}
fn parse_mdx_jsx_inline(input: &str, index: usize) -> Option<(usize, String)> {
let root = mdx_jsx_tag_start(input, index)?;
if root.closing {
return None;
}
let (mut cursor, self_closing) = find_mdx_jsx_tag_end_in_text(input, index)?;
if self_closing {
let end = cursor + 1;
return Some((end, input[index..end].into()));
}
let mut depth = 1usize;
cursor += 1;
while cursor < input.len() {
let Some(relative_start) = input[cursor..].find('<') else {
return None;
};
let tag_start_byte = cursor + relative_start;
let Some(candidate) = mdx_jsx_tag_start(input, tag_start_byte) else {
cursor = tag_start_byte + 1;
continue;
};
let Some((tag_end, candidate_self_closing)) =
find_mdx_jsx_tag_end_in_text(input, tag_start_byte)
else {
return None;
};
if mdx_jsx_tag_matches(root.tag, candidate.tag) {
if candidate.closing {
depth = depth.saturating_sub(1);
if depth == 0 {
let end = tag_end + 1;
return Some((end, input[index..end].into()));
}
} else if !candidate_self_closing {
depth += 1;
}
}
cursor = tag_end + 1;
}
None
}
fn mdx_jsx_tag_start(input: &str, start: usize) -> Option<MdxJsxTagStart<'_>> {
let bytes = input.as_bytes();
if bytes.get(start) != Some(&b'<') {
return None;
}
match bytes.get(start + 1) {
Some(b'>') => {
return Some(MdxJsxTagStart {
tag: MdxJsxTag::Fragment,
closing: false,
});
}
Some(b'/') if bytes.get(start + 2) == Some(&b'>') => {
return Some(MdxJsxTagStart {
tag: MdxJsxTag::Fragment,
closing: true,
});
}
Some(b'!' | b'?') | None => return None,
_ => {}
}
let closing = bytes.get(start + 1) == Some(&b'/');
let name_start = start + if closing { 2 } else { 1 };
if !bytes
.get(name_start)
.is_some_and(|byte| is_mdx_jsx_name_start_byte(*byte))
{
return None;
}
let mut name_end = name_start + 1;
while bytes
.get(name_end)
.is_some_and(|byte| is_mdx_jsx_name_byte(*byte))
{
name_end += 1;
}
if name_end == name_start {
return None;
}
if bytes
.get(name_end)
.is_some_and(|byte| !is_mdx_jsx_name_delimiter(*byte))
{
return None;
}
Some(MdxJsxTagStart {
tag: MdxJsxTag::Named(&input[name_start..name_end]),
closing,
})
}
fn mdx_jsx_tag_matches(left: MdxJsxTag<'_>, right: MdxJsxTag<'_>) -> bool {
match (left, right) {
(MdxJsxTag::Fragment, MdxJsxTag::Fragment) => true,
(MdxJsxTag::Named(left), MdxJsxTag::Named(right)) => left == right,
_ => false,
}
}
fn find_mdx_jsx_tag_end(
lines: &[Line<'_>],
start_line: usize,
start_byte: usize,
) -> Option<(usize, usize, bool)> {
let mut line_index = start_line;
let mut byte_index = start_byte + 1;
let mut quote = None;
let mut escaped = false;
let mut expression_depth = 0usize;
let mut expression_state = MdxBraceState::Normal;
let mut expression_escaped = false;
while line_index < lines.len() {
let bytes = lines[line_index].text.as_bytes();
while byte_index < bytes.len() {
let byte = bytes[byte_index];
if expression_depth > 0 {
if update_mdx_jsx_expression_state(
byte,
bytes.get(byte_index + 1).copied(),
&mut expression_depth,
&mut expression_state,
&mut expression_escaped,
) {
byte_index += 1;
}
byte_index += 1;
continue;
}
if let Some(delimiter) = quote {
if escaped {
escaped = false;
} else if byte == b'\\' {
escaped = true;
} else if byte == delimiter {
quote = None;
}
byte_index += 1;
continue;
}
match byte {
b'\'' | b'"' => quote = Some(byte),
b'{' => {
expression_depth = 1;
expression_state = MdxBraceState::Normal;
expression_escaped = false;
}
b'>' if expression_depth == 0 => {
let self_closing =
previous_nonspace_before(lines, line_index, byte_index) == Some(b'/');
return Some((line_index, byte_index, self_closing));
}
_ => {}
}
byte_index += 1;
}
if expression_state == MdxBraceState::LineComment {
expression_state = MdxBraceState::Normal;
}
line_index += 1;
byte_index = 0;
}
None
}
fn previous_nonspace_before(
lines: &[Line<'_>],
line_index: usize,
byte_index: usize,
) -> Option<u8> {
let mut cursor_line = line_index;
let mut cursor_byte = byte_index;
loop {
if let Some(byte) = lines[cursor_line].text.as_bytes()[..cursor_byte]
.iter()
.rev()
.copied()
.find(|byte| !byte.is_ascii_whitespace())
{
return Some(byte);
}
if cursor_line == 0 {
return None;
}
cursor_line -= 1;
cursor_byte = lines[cursor_line].text.len();
}
}
fn find_mdx_jsx_tag_end_in_text(input: &str, start_byte: usize) -> Option<(usize, bool)> {
let bytes = input.as_bytes();
let mut byte_index = start_byte + 1;
let mut quote = None;
let mut escaped = false;
let mut expression_depth = 0usize;
let mut expression_state = MdxBraceState::Normal;
let mut expression_escaped = false;
while byte_index < bytes.len() {
let byte = bytes[byte_index];
if expression_depth > 0 {
if update_mdx_jsx_expression_state(
byte,
bytes.get(byte_index + 1).copied(),
&mut expression_depth,
&mut expression_state,
&mut expression_escaped,
) {
byte_index += 1;
}
byte_index += 1;
continue;
}
if let Some(delimiter) = quote {
if escaped {
escaped = false;
} else if byte == b'\\' {
escaped = true;
} else if byte == delimiter {
quote = None;
}
byte_index += 1;
continue;
}
match byte {
b'\'' | b'"' => quote = Some(byte),
b'{' => {
expression_depth = 1;
expression_state = MdxBraceState::Normal;
expression_escaped = false;
}
b'>' if expression_depth == 0 => {
let self_closing = previous_nonspace_before_text(input, byte_index) == Some(b'/');
return Some((byte_index, self_closing));
}
_ => {}
}
byte_index += 1;
}
None
}
fn previous_nonspace_before_text(input: &str, byte_index: usize) -> Option<u8> {
input.as_bytes()[..byte_index]
.iter()
.rev()
.copied()
.find(|byte| !byte.is_ascii_whitespace())
}
fn update_mdx_jsx_expression_state(
byte: u8,
next: Option<u8>,
depth: &mut usize,
state: &mut MdxBraceState,
escaped: &mut bool,
) -> bool {
match *state {
MdxBraceState::Normal => match byte {
b'\'' => *state = MdxBraceState::SingleQuoted,
b'"' => *state = MdxBraceState::DoubleQuoted,
b'`' => *state = MdxBraceState::Template,
b'/' if next == Some(b'/') => {
*state = MdxBraceState::LineComment;
return true;
}
b'/' if next == Some(b'*') => {
*state = MdxBraceState::BlockComment;
return true;
}
b'{' => *depth += 1,
b'}' => {
*depth = (*depth).saturating_sub(1);
if *depth == 0 {
*state = MdxBraceState::Normal;
*escaped = false;
}
}
_ => {}
},
MdxBraceState::SingleQuoted => {
update_mdx_quote_state(byte, b'\'', state, escaped);
}
MdxBraceState::DoubleQuoted => {
update_mdx_quote_state(byte, b'"', state, escaped);
}
MdxBraceState::Template => {
update_mdx_quote_state(byte, b'`', state, escaped);
}
MdxBraceState::LineComment => {
if byte == b'\n' {
*state = MdxBraceState::Normal;
}
}
MdxBraceState::BlockComment => {
if byte == b'*' && next == Some(b'/') {
*state = MdxBraceState::Normal;
return true;
}
}
}
false
}
fn is_mdx_jsx_name_start_byte(byte: u8) -> bool {
byte.is_ascii_alphabetic() || matches!(byte, b'_' | b'$')
}
fn is_mdx_jsx_name_byte(byte: u8) -> bool {
byte.is_ascii_alphanumeric() || matches!(byte, b'.' | b':' | b'_' | b'-' | b'$')
}
fn is_mdx_jsx_name_delimiter(byte: u8) -> bool {
byte.is_ascii_whitespace() || matches!(byte, b'/' | b'>' | b'{' | b'}')
}
fn collect_line_range(lines: &[Line<'_>], start: usize, end: usize) -> String {
let mut value = String::new();
let mut cursor = start;
while cursor <= end {
if cursor > start {
value.push('\n');
}
value.push_str(lines[cursor].text);
cursor += 1;
}
value
}
fn parse_indented_code(
lines: &[Line<'_>],
index: usize,
options: &SyntaxOptions,
) -> Option<(Block, usize)> {
if !options.constructs.indented_code || strip_indented_code_prefix(lines[index].text).is_none()
{
return None;
}
let mut value = String::new();
let mut cursor = index;
let mut content_end = index;
let mut content_end_len = 0usize;
while cursor < lines.len() {
if let Some(text) = strip_indented_code_prefix(lines[cursor].text) {
ensure_line_separator(&mut value);
value.push_str(text);
value.push_str(lines[cursor].eol);
if !text.trim().is_empty() {
content_end = cursor;
content_end_len = value.len();
}
cursor += 1;
continue;
}
if !lines[cursor].text.trim().is_empty() {
break;
}
ensure_line_separator(&mut value);
value.push_str(lines[cursor].eol);
cursor += 1;
}
value.truncate(content_end_len);
Some((
Block::CodeBlock(CodeBlock {
meta: NodeMeta::new(Some(Span::new(
lines[index].start,
lines[content_end].end_with_eol,
))),
kind: CodeBlockKind::Indented,
info: None,
value,
}),
cursor,
))
}
fn strip_indented_code_prefix(input: &str) -> Option<&str> {
let mut column = 0usize;
for (index, byte) in input.as_bytes().iter().enumerate() {
match *byte {
b' ' => {
column += 1;
if column == 4 {
return Some(&input[index + 1..]);
}
}
b'\t' => {
column += 4 - (column % 4);
if column >= 4 {
return Some(&input[index + 1..]);
}
}
_ => return None,
}
}
None
}
fn parse_table(
lines: &[Line<'_>],
index: usize,
options: &SyntaxOptions,
definitions: &[String],
diagnostics: &mut Vec<Diagnostic>,
) -> Option<(Block, usize)> {
if !options.constructs.gfm_table || index + 1 >= lines.len() {
return None;
}
let delimiter = table_indent_line(lines[index + 1].text, options.constructs.indented_code)?;
if list_marker_info(delimiter).is_some() {
return None;
}
if !table_has_separator(lines[index].text, delimiter, options.constructs.spoiler) {
return None;
}
let alignments = parse_table_delimiter(delimiter, options.constructs.spoiler)?;
let headers = split_table_row(lines[index].text, options.constructs.spoiler);
if headers.len() != alignments.len() {
return None;
}
let mut rows = Vec::new();
rows.push(TableRow {
meta: NodeMeta::new(Some(Span::new(lines[index].start, lines[index].end))),
cells: headers
.iter()
.map(|cell| TableCell {
meta: NodeMeta::default(),
children: parse_inlines(
cell.trim(),
lines[index].start,
options,
definitions,
diagnostics,
),
})
.collect(),
});
let mut cursor = index + 2;
while cursor < lines.len() {
let Some(row) = table_indent_line(lines[cursor].text, options.constructs.indented_code)
else {
break;
};
if row.trim().is_empty() || table_body_line_ends_table(lines[cursor].text, options) {
break;
}
let cells = split_table_row(row, options.constructs.spoiler);
rows.push(TableRow {
meta: NodeMeta::new(Some(Span::new(lines[cursor].start, lines[cursor].end))),
cells: alignments
.iter()
.enumerate()
.map(|(cell_index, _)| {
let value = cells.get(cell_index).map(String::as_str).unwrap_or("");
TableCell {
meta: NodeMeta::default(),
children: parse_inlines(
value.trim(),
lines[cursor].start,
options,
definitions,
diagnostics,
),
}
})
.collect(),
});
cursor += 1;
}
Some((
Block::Table(Table {
meta: NodeMeta::new(Some(Span::new(
lines[index].start,
lines[cursor - 1].end_with_eol,
))),
alignments,
rows,
}),
cursor,
))
}
fn parse_setext_heading(
lines: &[Line<'_>],
index: usize,
options: &SyntaxOptions,
definitions: &[String],
) -> Option<(Block, usize)> {
if index + 1 >= lines.len() || lines[index].text.trim().is_empty() {
return None;
}
let mut underline_index = index + 1;
loop {
let underline_depth = if lines[underline_index].lazy {
None
} else {
setext_underline_depth(lines[underline_index].text)
};
if let Some(depth) = underline_depth {
let mut value = String::new();
for line in &lines[index..underline_index] {
push_line(&mut value, trim_ascii_start(line.text));
}
return Some((
Block::Heading(Heading {
meta: NodeMeta::new(Some(Span::new(
lines[index].start,
lines[underline_index].end,
))),
depth,
kind: HeadingKind::Setext,
children: parse_inlines(
&value,
lines[index].start,
options,
definitions,
&mut Vec::new(),
),
}),
underline_index + 1,
));
}
let line = lines[underline_index].text;
if line.trim().is_empty()
|| table_can_start(lines, underline_index, options)
|| likely_block_start(line, options)
{
return None;
}
underline_index += 1;
if underline_index >= lines.len() {
return None;
}
}
}
fn setext_underline_depth(input: &str) -> Option<u8> {
let underline = trim_up_to_three_spaces(input)?.trim();
match underline {
text if !text.is_empty() && text.chars().all(|char| char == '=') => Some(1),
text if !text.is_empty() && text.chars().all(|char| char == '-') => Some(2),
_ => None,
}
}
fn parse_paragraph(
lines: &[Line<'_>],
index: usize,
options: &SyntaxOptions,
definitions: &[String],
diagnostics: &mut Vec<Diagnostic>,
) -> (Block, usize) {
let mut value = String::new();
let start = lines[index].start;
let mut cursor = index;
while cursor < lines.len() {
if lines[cursor].text.trim().is_empty() {
break;
}
if cursor > index && !lines[cursor].lazy {
if table_can_start(lines, cursor, options) {
break;
}
if likely_block_start(lines[cursor].text, options) {
break;
}
}
if !value.is_empty() {
value.push('\n');
}
value.push_str(trim_ascii_start(lines[cursor].text));
cursor += 1;
}
let end = lines[cursor - 1].end;
(
Block::Paragraph(Paragraph {
meta: NodeMeta::new(Some(Span::new(start, end))),
children: parse_inlines(&value, start, options, definitions, diagnostics),
}),
cursor,
)
}
#[derive(Clone, Copy)]
struct DelimMarker {
node_index: usize,
marker: u8,
length: usize,
can_open: bool,
can_close: bool,
span_start: usize,
inactive: bool,
}
fn record_emphasis_delimiter(
nodes: &mut Vec<Inline>,
delimiters: &mut Vec<DelimMarker>,
input: &str,
index: usize,
base_offset: usize,
marker: u8,
strikethrough: bool,
) {
let length = delimiter_byte_run_len(input, index, marker);
let (mut can_open, mut can_close) = if marker == b'_' {
(
can_open_underscore(input, index, length),
can_close_underscore(input, index, length),
)
} else {
(
can_open_delimited(input, index, length),
can_close_delimited(input, index, length),
)
};
if strikethrough && marker != b'~' {
let before = input[..index].chars().next_back();
let after = input[index + length..].chars().next();
if after == Some('~') {
can_open = true;
}
if before == Some('~') {
can_close = true;
}
}
let value = String::from(marker as char).repeat(length);
let node_index = nodes.len();
nodes.push(Inline::Text(Text {
meta: NodeMeta::new(Some(Span::new(
base_offset + index,
base_offset + index + length,
))),
value,
}));
delimiters.push(DelimMarker {
node_index,
marker,
length,
can_open,
can_close,
span_start: base_offset + index,
inactive: false,
});
}
fn process_emphasis(mut nodes: Vec<Inline>, mut delimiters: Vec<DelimMarker>) -> Vec<Inline> {
if delimiters.is_empty() {
return nodes;
}
let mut openers_bottom: [Option<usize>; 18] = [None; 18];
let mut closer_idx = 0;
while closer_idx < delimiters.len() {
let closer = delimiters[closer_idx];
if closer.inactive || !closer.can_close {
closer_idx += 1;
continue;
}
let key = openers_bottom_key(&closer);
let bottom = openers_bottom[key];
let mut opener_idx = None;
let mut search = closer_idx;
while search > 0 {
search -= 1;
if let Some(bottom) = bottom {
if search < bottom {
break;
}
}
let candidate = delimiters[search];
if candidate.inactive || candidate.marker != closer.marker || !candidate.can_open {
continue;
}
if emphasis_delimiters_match(&candidate, &closer) {
opener_idx = Some(search);
break;
}
}
let Some(opener_idx) = opener_idx else {
openers_bottom[key] = Some(closer_idx);
if !closer.can_open {
delimiters[closer_idx].inactive = true;
}
closer_idx += 1;
continue;
};
let (used, wrap) = if closer.marker == b'~' {
let length = delimiters[closer_idx].length;
let marker = if length >= 2 {
DeleteMarker::DoubleTilde
} else {
DeleteMarker::SingleTilde
};
(length, EmphasisWrap::Delete(marker))
} else {
let strong = delimiters[opener_idx].length >= 2 && delimiters[closer_idx].length >= 2;
let used = if strong { 2 } else { 1 };
let wrap = if strong {
EmphasisWrap::Strong
} else {
EmphasisWrap::Emphasis
};
(used, wrap)
};
apply_emphasis(
&mut nodes,
&mut delimiters,
opener_idx,
closer_idx,
used,
wrap,
);
let mut inner = opener_idx + 1;
while inner < closer_idx {
delimiters[inner].inactive = true;
inner += 1;
}
if delimiters[opener_idx].length == 0 {
delimiters[opener_idx].inactive = true;
}
if delimiters[closer_idx].length == 0 {
delimiters[closer_idx].inactive = true;
closer_idx += 1;
}
}
merge_adjacent_text(&mut nodes);
nodes
}
fn merge_adjacent_text(nodes: &mut Vec<Inline>) {
let mut write = 0;
for read in 0..nodes.len() {
if read != write {
nodes.swap(read, write);
}
if write > 0 {
let (head, tail) = nodes.split_at_mut(write);
if let (Inline::Text(previous), Inline::Text(current)) =
(&mut head[write - 1], &tail[0])
{
previous.value.push_str(¤t.value);
if let (Some(previous_span), Some(current_span)) =
(previous.meta.span.as_mut(), current.meta.span)
{
previous_span.end = current_span.end;
}
continue;
}
}
write += 1;
}
nodes.truncate(write);
for node in nodes.iter_mut() {
match node {
Inline::Emphasis(emphasis) => merge_adjacent_text(&mut emphasis.children),
Inline::Strong(strong) => merge_adjacent_text(&mut strong.children),
Inline::Delete(delete) => merge_adjacent_text(&mut delete.children),
_ => {}
}
}
}
fn openers_bottom_key(closer: &DelimMarker) -> usize {
let marker = match closer.marker {
b'_' => 1,
b'~' => 2,
_ => 0,
};
let both = usize::from(closer.can_open && closer.can_close);
let modulo = closer.length % 3;
((marker * 2) + both) * 3 + modulo
}
fn emphasis_delimiters_match(opener: &DelimMarker, closer: &DelimMarker) -> bool {
if opener.marker == b'~' {
return opener.length == closer.length;
}
let opener_both = opener.can_open && opener.can_close;
let closer_both = closer.can_open && closer.can_close;
if opener_both || closer_both {
let sum = opener.length + closer.length;
if sum % 3 == 0 && !(opener.length % 3 == 0 && closer.length % 3 == 0) {
return false;
}
}
true
}
#[derive(Clone, Copy)]
enum EmphasisWrap {
Emphasis,
Strong,
Delete(DeleteMarker),
}
fn apply_emphasis(
nodes: &mut Vec<Inline>,
delimiters: &mut [DelimMarker],
opener_idx: usize,
closer_idx: usize,
used: usize,
wrap: EmphasisWrap,
) {
let opener_node = delimiters[opener_idx].node_index;
let closer_node = delimiters[closer_idx].node_index;
trim_delimiter_text_tail(&mut nodes[opener_node], used);
delimiters[opener_idx].length -= used;
delimiters[opener_idx].span_start += used;
trim_delimiter_text_head(&mut nodes[closer_node], used);
delimiters[closer_idx].length -= used;
let span_start = delimiters[opener_idx].span_start - used;
let span_end = delimiters[closer_idx].span_start + delimiters[closer_idx].length + used;
let children_start = opener_node + 1;
let children_end = closer_node; let children: Vec<Inline> = nodes.drain(children_start..children_end).collect();
let removed = children.len();
let meta = NodeMeta::new(Some(Span::new(span_start, span_end)));
let wrapped = match wrap {
EmphasisWrap::Strong => Inline::Strong(Strong { meta, children }),
EmphasisWrap::Emphasis => Inline::Emphasis(Emphasis { meta, children }),
EmphasisWrap::Delete(marker) => Inline::Delete(Delete {
meta,
marker,
children,
}),
};
nodes.insert(children_start, wrapped);
reindex_delimiters(delimiters, children_end, 1 - removed as isize);
if delimiters[closer_idx].length == 0 {
let pos = delimiters[closer_idx].node_index;
nodes.remove(pos);
reindex_delimiters(delimiters, pos, -1);
}
if delimiters[opener_idx].length == 0 {
let pos = delimiters[opener_idx].node_index;
nodes.remove(pos);
reindex_delimiters(delimiters, pos, -1);
}
}
fn reindex_delimiters(delimiters: &mut [DelimMarker], from: usize, delta: isize) {
if delta == 0 {
return;
}
for delimiter in delimiters.iter_mut() {
if delimiter.node_index >= from {
delimiter.node_index = (delimiter.node_index as isize + delta) as usize;
}
}
}
fn trim_delimiter_text_tail(node: &mut Inline, count: usize) {
if let Inline::Text(text) = node {
let new_len = text.value.len().saturating_sub(count);
text.value.truncate(new_len);
if let Some(span) = text.meta.span.as_mut() {
span.end = span.end.saturating_sub(count);
}
}
}
fn trim_delimiter_text_head(node: &mut Inline, count: usize) {
if let Inline::Text(text) = node {
let count = count.min(text.value.len());
text.value.drain(..count);
if let Some(span) = text.meta.span.as_mut() {
span.start += count;
}
}
}
fn parse_inlines(
input: &str,
base_offset: usize,
options: &SyntaxOptions,
definitions: &[String],
diagnostics: &mut Vec<Diagnostic>,
) -> Vec<Inline> {
parse_inlines_with_context(
input,
base_offset,
options,
definitions,
diagnostics,
InlineContext::default(),
)
}
#[derive(Clone, Copy)]
struct InlineContext {
allow_links: bool,
}
impl Default for InlineContext {
fn default() -> Self {
Self { allow_links: true }
}
}
fn parse_inlines_with_context(
input: &str,
base_offset: usize,
options: &SyntaxOptions,
definitions: &[String],
diagnostics: &mut Vec<Diagnostic>,
context: InlineContext,
) -> Vec<Inline> {
let bytes = input.as_bytes();
let mut nodes = Vec::new();
let mut text_start = 0;
let mut text = String::new();
let mut index = 0;
let mut delimiters: Vec<DelimMarker> = Vec::new();
while index < bytes.len() {
if bytes[index] == b'\\' {
if let Some((next_index, char)) = next_char(input, index + 1) {
if char.is_ascii_punctuation() {
if options.parse.preserve_character_escapes {
flush_text(&mut nodes, &mut text, text_start, base_offset + index);
nodes.push(Inline::Escape(Escape {
meta: NodeMeta::new(Some(Span::new(
base_offset + index,
base_offset + next_index,
))),
value: char,
}));
index = next_index;
text_start = index;
continue;
}
if text.is_empty() {
text_start = base_offset + index;
}
if gfm_link_label_preserves_url_dot_escape(&text, char, options, context) {
text.push('\\');
}
text.push(char);
index = next_index;
continue;
}
}
}
if bytes[index] == b'&' {
if let Some((end, value)) = parse_character_reference(input, index) {
if options.parse.preserve_character_references {
flush_text(&mut nodes, &mut text, text_start, base_offset + index);
nodes.push(Inline::CharacterReference(CharacterReference {
meta: NodeMeta::new(Some(Span::new(
base_offset + index,
base_offset + end,
))),
reference: input[index..end].into(),
value,
}));
index = end;
text_start = index;
continue;
}
if text.is_empty() {
text_start = base_offset + index;
}
text.push_str(&value);
index = end;
continue;
}
}
if bytes[index] == b'\n' {
if text.ends_with('\\') {
text.pop();
flush_text(
&mut nodes,
&mut text,
text_start,
base_offset + index.saturating_sub(1),
);
nodes.push(Inline::LineBreak(LineBreak {
meta: NodeMeta::new(Some(Span::new(
base_offset + index.saturating_sub(1),
base_offset + index + 1,
))),
kind: LineBreakKind::Backslash,
}));
index += 1;
text_start = index;
continue;
}
let trailing_spaces = trailing_space_count(&text);
if is_hard_break_suffix(&text, trailing_spaces) {
text.truncate(text.len() - trailing_spaces);
flush_text(
&mut nodes,
&mut text,
text_start,
base_offset + index.saturating_sub(trailing_spaces),
);
nodes.push(Inline::LineBreak(LineBreak {
meta: NodeMeta::new(Some(Span::new(
base_offset + index.saturating_sub(trailing_spaces),
base_offset + index + 1,
))),
kind: LineBreakKind::Spaces,
}));
index += 1;
text_start = index;
continue;
}
if trailing_spaces > 0 {
text.truncate(text.len() - trailing_spaces);
}
flush_text(&mut nodes, &mut text, text_start, base_offset + index);
nodes.push(Inline::SoftBreak(SoftBreak {
meta: NodeMeta::new(Some(Span::new(
base_offset + index,
base_offset + index + 1,
))),
}));
index += 1;
text_start = index;
continue;
}
if bytes[index] == b'`' {
if let Some((end, code_span)) = parse_code_span(input, index) {
flush_text(&mut nodes, &mut text, text_start, base_offset + index);
nodes.push(Inline::Code(CodeInline {
meta: NodeMeta::new(Some(Span::new(base_offset + index, base_offset + end))),
value: code_span.value,
raw: code_span.raw,
fence_length: code_span.fence_length,
}));
index = end;
text_start = index;
continue;
} else {
let run = bytes[index..]
.iter()
.take_while(|byte| **byte == b'`')
.count();
if text.is_empty() {
text_start = base_offset + index;
}
for _ in 0..run {
text.push('`');
}
index += run;
continue;
}
}
if options.constructs.spoiler
&& bytes.get(index) == Some(&b'|')
&& bytes.get(index + 1) == Some(&b'|')
&& bytes.get(index + 2) != Some(&b'|')
{
if let Some(end) = find_spoiler_close(input, index + 2) {
flush_text(&mut nodes, &mut text, text_start, base_offset + index);
let inner = &input[index + 2..end];
nodes.push(Inline::Spoiler(Spoiler {
meta: NodeMeta::new(Some(Span::new(
base_offset + index,
base_offset + end + 2,
))),
children: parse_inlines_with_context(
inner,
base_offset + index + 2,
options,
definitions,
diagnostics,
context,
),
}));
index = end + 2;
text_start = index;
continue;
}
}
if bytes[index] == b'*' && delimiter_byte_run_start(input, index, b'*') == index {
let run_len = delimiter_byte_run_len(input, index, b'*');
flush_text(&mut nodes, &mut text, text_start, base_offset + index);
record_emphasis_delimiter(
&mut nodes,
&mut delimiters,
input,
index,
base_offset,
b'*',
options.constructs.gfm_strikethrough,
);
index += run_len;
text_start = index;
continue;
}
if options.constructs.underline
&& bytes.get(index) == Some(&b'_')
&& bytes.get(index + 1) == Some(&b'_')
&& bytes.get(index + 2) == Some(&b'_')
&& can_open_underscore(input, index, 1)
{
if let Some(end) = find_closing_delimiter(input, index + 3, "___", true) {
flush_text(&mut nodes, &mut text, text_start, base_offset + index);
let inner = &input[index + 3..end];
let underline = Inline::Underline(Underline {
meta: NodeMeta::new(Some(Span::new(
base_offset + index + 1,
base_offset + end + 2,
))),
children: parse_inlines_with_context(
inner,
base_offset + index + 3,
options,
definitions,
diagnostics,
context,
),
});
nodes.push(Inline::Emphasis(Emphasis {
meta: NodeMeta::new(Some(Span::new(
base_offset + index,
base_offset + end + 3,
))),
children: vec![underline],
}));
index = end + 3;
text_start = index;
continue;
}
}
if options.constructs.underline
&& bytes.get(index) == Some(&b'_')
&& bytes.get(index + 1) == Some(&b'_')
&& can_open_underscore(input, index, 2)
{
if let Some(end) = find_closing_delimiter(input, index + 2, "__", true) {
flush_text(&mut nodes, &mut text, text_start, base_offset + index);
let inner = &input[index + 2..end];
nodes.push(Inline::Underline(Underline {
meta: NodeMeta::new(Some(Span::new(
base_offset + index,
base_offset + end + 2,
))),
children: parse_inlines_with_context(
inner,
base_offset + index + 2,
options,
definitions,
diagnostics,
context,
),
}));
index = end + 2;
text_start = index;
continue;
}
}
if bytes[index] == b'_' && delimiter_byte_run_start(input, index, b'_') == index {
if (options.constructs.gfm_autolink_literal || options.constructs.relaxed_autolinks)
&& context.allow_links
{
if let Some((end, destination)) = parse_literal_autolink(
input,
index,
options.constructs.gfm_autolink_literal,
options.constructs.relaxed_autolinks,
) {
flush_text(&mut nodes, &mut text, text_start, base_offset + index);
nodes.push(Inline::Autolink(Autolink {
meta: NodeMeta::new(Some(Span::new(
base_offset + index,
base_offset + end,
))),
destination,
kind: AutolinkKind::GfmLiteral {
original: input[index..end].into(),
},
}));
index = end;
text_start = index;
continue;
}
}
let run_len = delimiter_byte_run_len(input, index, b'_');
flush_text(&mut nodes, &mut text, text_start, base_offset + index);
record_emphasis_delimiter(
&mut nodes,
&mut delimiters,
input,
index,
base_offset,
b'_',
options.constructs.gfm_strikethrough,
);
index += run_len;
text_start = index;
continue;
}
if options.constructs.insert
&& bytes.get(index) == Some(&b'+')
&& bytes.get(index + 1) == Some(&b'+')
&& bytes.get(index + 2) != Some(&b'+')
&& can_open_delimited(input, index, 2)
{
if let Some(end) = find_closing_delimiter(input, index + 2, "++", false) {
flush_text(&mut nodes, &mut text, text_start, base_offset + index);
let inner = &input[index + 2..end];
nodes.push(Inline::Insert(Insert {
meta: NodeMeta::new(Some(Span::new(
base_offset + index,
base_offset + end + 2,
))),
children: parse_inlines_with_context(
inner,
base_offset + index + 2,
options,
definitions,
diagnostics,
context,
),
}));
index = end + 2;
text_start = index;
continue;
}
}
if options.constructs.highlight
&& bytes.get(index) == Some(&b'=')
&& bytes.get(index + 1) == Some(&b'=')
&& bytes.get(index + 2) != Some(&b'=')
&& can_open_delimited(input, index, 2)
{
if let Some(end) = find_closing_delimiter(input, index + 2, "==", false) {
flush_text(&mut nodes, &mut text, text_start, base_offset + index);
let inner = &input[index + 2..end];
nodes.push(Inline::Mark(Mark {
meta: NodeMeta::new(Some(Span::new(
base_offset + index,
base_offset + end + 2,
))),
children: parse_inlines_with_context(
inner,
base_offset + index + 2,
options,
definitions,
diagnostics,
context,
),
}));
index = end + 2;
text_start = index;
continue;
}
}
if options.constructs.subscript
&& starts_exact_byte_run(input, index, b'~', 1)
&& !single_tilde_delete_takes_precedence(options, input, index)
{
if let Some(end) = find_simple_inline_close(input, index + 1, b'~') {
flush_text(&mut nodes, &mut text, text_start, base_offset + index);
let inner = &input[index + 1..end];
nodes.push(Inline::Subscript(Subscript {
meta: NodeMeta::new(Some(Span::new(
base_offset + index,
base_offset + end + 1,
))),
children: parse_inlines_with_context(
inner,
base_offset + index + 1,
options,
definitions,
diagnostics,
context,
),
}));
index = end + 1;
text_start = index;
continue;
}
}
if options.constructs.inline_footnote
&& options.constructs.footnote_reference
&& bytes.get(index) == Some(&b'^')
&& bytes.get(index + 1) == Some(&b'[')
{
if let Some(close) = find_inline_footnote_end(input, index + 2) {
let inner = &input[index + 2..close];
if !inner.trim().is_empty() {
flush_text(&mut nodes, &mut text, text_start, base_offset + index);
nodes.push(Inline::InlineFootnote(InlineFootnote {
meta: NodeMeta::new(Some(Span::new(
base_offset + index,
base_offset + close + 1,
))),
children: parse_inlines_with_context(
inner,
base_offset + index + 2,
options,
definitions,
diagnostics,
context,
),
}));
index = close + 1;
text_start = index;
continue;
}
}
}
if options.constructs.superscript
&& bytes.get(index) == Some(&b'^')
&& !(options.constructs.inline_footnote && bytes.get(index + 1) == Some(&b'['))
{
if let Some(end) = find_simple_inline_close(input, index + 1, b'^') {
flush_text(&mut nodes, &mut text, text_start, base_offset + index);
let inner = &input[index + 1..end];
nodes.push(Inline::Superscript(Superscript {
meta: NodeMeta::new(Some(Span::new(
base_offset + index,
base_offset + end + 1,
))),
children: parse_inlines_with_context(
inner,
base_offset + index + 1,
options,
definitions,
diagnostics,
context,
),
}));
index = end + 1;
text_start = index;
continue;
}
}
if options.constructs.gfm_strikethrough
&& bytes[index] == b'~'
&& delimiter_byte_run_start(input, index, b'~') == index
{
let run_len = delimiter_byte_run_len(input, index, b'~');
let recordable =
run_len == 2 || (run_len == 1 && options.parse.single_tilde_strikethrough);
if recordable {
flush_text(&mut nodes, &mut text, text_start, base_offset + index);
record_emphasis_delimiter(
&mut nodes,
&mut delimiters,
input,
index,
base_offset,
b'~',
true,
);
index += run_len;
text_start = index;
continue;
}
}
if bytes[index] == b'!' && index + 1 < bytes.len() && bytes[index + 1] == b'[' {
if let Some((end, image)) =
parse_image(input, index, base_offset, options, definitions, diagnostics)
{
flush_text(&mut nodes, &mut text, text_start, base_offset + index);
nodes.push(image);
index = end;
text_start = index;
continue;
}
}
if bytes[index] == b'[' {
if let Some((end, wikilink)) = parse_wikilink(input, index, base_offset, options) {
flush_text(&mut nodes, &mut text, text_start, base_offset + index);
nodes.push(wikilink);
index = end;
text_start = index;
continue;
}
if let Some((end, link)) = parse_link(
input,
index,
base_offset,
options,
definitions,
diagnostics,
context,
) {
flush_text(&mut nodes, &mut text, text_start, base_offset + index);
nodes.push(link);
index = end;
text_start = index;
continue;
}
if options.constructs.footnote_reference
&& bytes.get(index) == Some(&b'[')
&& bytes.get(index + 1) == Some(&b'^')
{
if let Some(close) = find_footnote_reference_label_end(input, index + 2) {
let label = &input[index + 2..close];
if is_footnote_label(label) {
flush_text(&mut nodes, &mut text, text_start, base_offset + index);
nodes.push(Inline::FootnoteReference(FootnoteReference {
meta: NodeMeta::new(Some(Span::new(
base_offset + index,
base_offset + close + 1,
))),
label: label.into(),
identifier: normalize_label(label),
}));
index = close + 1;
text_start = index;
continue;
}
}
}
}
if bytes[index] == b'$' && options.constructs.math_inline {
if let Some((end, value, kind)) = parse_math_inline(input, index) {
flush_text(&mut nodes, &mut text, text_start, base_offset + index);
nodes.push(Inline::Math(MathInline {
meta: NodeMeta::new(Some(Span::new(base_offset + index, base_offset + end))),
value,
kind,
}));
index = end;
text_start = index;
continue;
}
let run = bytes[index..]
.iter()
.take_while(|byte| **byte == b'$')
.count();
if run > 1 {
if text.is_empty() {
text_start = base_offset + index;
}
text.push_str(&input[index..index + run]);
index += run;
continue;
}
}
if (options.constructs.gfm_autolink_literal || options.constructs.relaxed_autolinks)
&& context.allow_links
{
if let Some((end, destination)) = parse_literal_autolink(
input,
index,
options.constructs.gfm_autolink_literal,
options.constructs.relaxed_autolinks,
) {
flush_text(&mut nodes, &mut text, text_start, base_offset + index);
nodes.push(Inline::Autolink(Autolink {
meta: NodeMeta::new(Some(Span::new(base_offset + index, base_offset + end))),
destination,
kind: AutolinkKind::GfmLiteral {
original: input[index..end].into(),
},
}));
index = end;
text_start = index;
continue;
}
}
if bytes[index] == b'<' {
if let Some(end) = parse_autolink_end(input, index) {
let raw = &input[index..end];
if is_autolink(raw) {
flush_text(&mut nodes, &mut text, text_start, base_offset + index);
if context.allow_links {
nodes.push(Inline::Autolink(Autolink {
meta: NodeMeta::new(Some(Span::new(
base_offset + index,
base_offset + end,
))),
destination: raw[1..raw.len() - 1].into(),
kind: AutolinkKind::Angle,
}));
} else {
nodes.push(Inline::Text(Text {
meta: NodeMeta::new(Some(Span::new(
base_offset + index,
base_offset + end,
))),
value: raw[1..raw.len() - 1].into(),
}));
}
index = end;
text_start = index;
continue;
}
}
if options.constructs.mdx_jsx_inline {
if let Some((end, raw)) = parse_mdx_jsx_inline(input, index) {
flush_text(&mut nodes, &mut text, text_start, base_offset + index);
nodes.push(Inline::MdxJsx(MdxJsxInline {
meta: NodeMeta::new(Some(Span::new(
base_offset + index,
base_offset + end,
))),
value: raw,
}));
index = end;
text_start = index;
continue;
}
}
if let Some((end, raw)) = parse_html_inline(input, index) {
if options.constructs.html_inline {
flush_text(&mut nodes, &mut text, text_start, base_offset + index);
nodes.push(Inline::Html(HtmlInline {
meta: NodeMeta::new(Some(Span::new(
base_offset + index,
base_offset + end,
))),
value: raw,
}));
index = end;
text_start = index;
continue;
}
}
}
if bytes[index] == b'{' && options.constructs.mdx_expression_inline {
if let Some(end) = find_mdx_expression_inline_close(input, index) {
flush_text(&mut nodes, &mut text, text_start, base_offset + index);
nodes.push(Inline::MdxExpression(MdxExpressionInline {
meta: NodeMeta::new(Some(Span::new(
base_offset + index,
base_offset + end + 1,
))),
value: input[index + 1..end].into(),
}));
index = end + 1;
text_start = index;
continue;
} else {
diagnostics.push(Diagnostic::new(
DiagnosticSeverity::Error,
DiagnosticCode::InvalidMdx,
Span::new(base_offset + index, base_offset + input.len()),
"MDX expression is missing a closing brace",
));
}
}
if bytes[index] == b':' && options.constructs.shortcode {
if let Some((end, name)) = parse_shortcode(input, index) {
flush_text(&mut nodes, &mut text, text_start, base_offset + index);
nodes.push(Inline::Shortcode(Shortcode {
meta: NodeMeta::new(Some(Span::new(base_offset + index, base_offset + end))),
name,
}));
index = end;
text_start = index;
continue;
}
}
if bytes[index] == b':' && options.constructs.directive_text {
if let Some((end, directive)) =
parse_text_directive(input, index, base_offset, options, definitions, diagnostics)
{
flush_text(&mut nodes, &mut text, text_start, base_offset + index);
nodes.push(directive);
index = end;
text_start = index;
continue;
}
}
let (next_index, char) = next_char(input, index).expect("valid UTF-8 byte index");
if text.is_empty() {
text_start = base_offset + index;
}
text.push(if char == '\0' { '\u{FFFD}' } else { char });
index = next_index;
}
flush_text(&mut nodes, &mut text, text_start, base_offset + input.len());
process_emphasis(nodes, delimiters)
}
fn parse_shortcode(input: &str, index: usize) -> Option<(usize, String)> {
if input[index..].starts_with("::") {
return None;
}
let mut cursor = index + 1;
while let Some((next, char)) = next_char(input, cursor) {
if char == ':' {
if cursor == index + 1 {
return None;
}
return Some((next, input[index + 1..cursor].into()));
}
if !(char.is_ascii_alphanumeric() || matches!(char, '_' | '-' | '+')) {
return None;
}
cursor = next;
}
None
}
fn parse_wikilink(
input: &str,
index: usize,
base_offset: usize,
options: &SyntaxOptions,
) -> Option<(usize, Inline)> {
let configured_order = if options.constructs.wikilink_title_after_pipe {
WikiLinkLabelOrder::AfterPipe
} else if options.constructs.wikilink_title_before_pipe {
WikiLinkLabelOrder::BeforePipe
} else {
return None;
};
if input.as_bytes().get(index) != Some(&b'[') || input.as_bytes().get(index + 1) != Some(&b'[')
{
return None;
}
let close = find_wikilink_close(input, index + 2)?;
let source = &input[index + 2..close];
if source.is_empty() || source.len() > WIKILINK_MAX_BYTES {
return None;
}
let (target_source, label_source, label_order) =
if let Some(separator) = find_wikilink_separator(source) {
match configured_order {
WikiLinkLabelOrder::AfterPipe => (
&source[..separator],
&source[separator + 1..],
WikiLinkLabelOrder::AfterPipe,
),
WikiLinkLabelOrder::BeforePipe => (
&source[separator + 1..],
&source[..separator],
WikiLinkLabelOrder::BeforePipe,
),
}
} else {
(source, source, configured_order)
};
let target = unescape_string(target_source);
if target.is_empty() {
return None;
}
let label = unescape_string(label_source);
let end = close + 2;
Some((
end,
Inline::WikiLink(WikiLink {
meta: NodeMeta::new(Some(Span::new(base_offset + index, base_offset + end))),
target,
label,
label_order,
}),
))
}
fn find_wikilink_close(input: &str, start: usize) -> Option<usize> {
let bytes = input.as_bytes();
let mut cursor = start;
while cursor < input.len() {
match bytes[cursor] {
b'\\' => {
cursor += 1;
if cursor < input.len() {
cursor = next_char(input, cursor)?.0;
}
}
b'\n' | b'\r' => return None,
b']' if bytes.get(cursor + 1) == Some(&b']') => return Some(cursor),
_ => cursor = next_char(input, cursor)?.0,
}
}
None
}
fn find_wikilink_separator(input: &str) -> Option<usize> {
let bytes = input.as_bytes();
let mut cursor = 0;
while cursor < input.len() {
match bytes[cursor] {
b'\\' => {
cursor += 1;
if cursor < input.len() {
cursor = next_char(input, cursor)?.0;
}
}
b'|' => return Some(cursor),
_ => cursor = next_char(input, cursor)?.0,
}
}
None
}
fn trailing_space_count(input: &str) -> usize {
input
.as_bytes()
.iter()
.rev()
.take_while(|byte| matches!(**byte, b' ' | b'\t'))
.count()
}
fn is_hard_break_suffix(input: &str, trailing: usize) -> bool {
let bytes = input.as_bytes();
trailing >= 2
&& bytes[bytes.len() - trailing..]
.iter()
.all(|byte| *byte == b' ')
}
fn parse_image(
input: &str,
index: usize,
base_offset: usize,
options: &SyntaxOptions,
definitions: &[String],
diagnostics: &mut Vec<Diagnostic>,
) -> Option<(usize, Inline)> {
let label_start = index + 2;
let label_end = find_link_label_end(input, index + 1)?;
let alt_source = &input[label_start..label_end];
let after_label = label_end + 1;
if input.as_bytes().get(after_label) == Some(&b'(') {
let (close, resource) = parse_link_resource(input, after_label)?;
return Some((
close,
Inline::Image(Image {
meta: NodeMeta::new(Some(Span::new(base_offset + index, base_offset + close))),
destination: resource.destination,
destination_kind: resource.destination_kind,
title: resource.title,
title_kind: resource.title_kind,
alt: parse_inlines(
alt_source,
base_offset + label_start,
options,
definitions,
diagnostics,
),
}),
));
}
if input.as_bytes().get(after_label) == Some(&b'[') {
let close = find_reference_label_end(input, after_label)?;
let label = &input[after_label + 1..close];
let identifier = if label.is_empty() { alt_source } else { label };
if definition_exists(definitions, identifier) {
return Some((
close + 1,
Inline::ImageReference(ImageReference {
meta: NodeMeta::new(Some(Span::new(
base_offset + index,
base_offset + close + 1,
))),
identifier: normalize_label(identifier),
label: identifier.into(),
kind: if label.is_empty() {
ReferenceKind::Collapsed
} else {
ReferenceKind::Full
},
alt: parse_inlines(
alt_source,
base_offset + label_start,
options,
definitions,
diagnostics,
),
}),
));
}
return None;
}
if definition_exists(definitions, alt_source) {
return Some((
after_label,
Inline::ImageReference(ImageReference {
meta: NodeMeta::new(Some(Span::new(
base_offset + index,
base_offset + after_label,
))),
identifier: normalize_label(alt_source),
label: alt_source.into(),
kind: ReferenceKind::Shortcut,
alt: parse_inlines(
alt_source,
base_offset + label_start,
options,
definitions,
diagnostics,
),
}),
));
}
None
}
fn parse_link(
input: &str,
index: usize,
base_offset: usize,
options: &SyntaxOptions,
definitions: &[String],
diagnostics: &mut Vec<Diagnostic>,
context: InlineContext,
) -> Option<(usize, Inline)> {
if !context.allow_links {
return None;
}
let label_end = find_link_label_end(input, index)?;
let label_source = &input[index + 1..label_end];
if label_contains_link(label_source, base_offset + index + 1, options, definitions) {
return None;
}
let after_label = label_end + 1;
if input.as_bytes().get(after_label) == Some(&b'(') {
if let Some((close, resource)) = parse_link_resource(input, after_label) {
return Some((
close,
Inline::Link(Link {
meta: NodeMeta::new(Some(Span::new(base_offset + index, base_offset + close))),
destination: resource.destination,
destination_kind: resource.destination_kind,
title: resource.title,
title_kind: resource.title_kind,
children: parse_inlines_with_context(
label_source,
base_offset + index + 1,
options,
definitions,
diagnostics,
InlineContext { allow_links: false },
),
}),
));
}
}
if input.as_bytes().get(after_label) == Some(&b'[') {
let close = find_reference_label_end(input, after_label)?;
let label = &input[after_label + 1..close];
let identifier = if label.is_empty() {
label_source
} else {
label
};
if definition_exists(definitions, identifier) {
return Some((
close + 1,
Inline::LinkReference(LinkReference {
meta: NodeMeta::new(Some(Span::new(
base_offset + index,
base_offset + close + 1,
))),
identifier: normalize_label(identifier),
label: identifier.into(),
kind: if label.is_empty() {
ReferenceKind::Collapsed
} else {
ReferenceKind::Full
},
children: parse_inlines_with_context(
label_source,
base_offset + index + 1,
options,
definitions,
diagnostics,
InlineContext { allow_links: false },
),
}),
));
}
return None;
}
if definition_exists(definitions, label_source) {
return Some((
after_label,
Inline::LinkReference(LinkReference {
meta: NodeMeta::new(Some(Span::new(
base_offset + index,
base_offset + after_label,
))),
identifier: normalize_label(label_source),
label: label_source.into(),
kind: ReferenceKind::Shortcut,
children: parse_inlines_with_context(
label_source,
base_offset + index + 1,
options,
definitions,
diagnostics,
InlineContext { allow_links: false },
),
}),
));
}
None
}
fn find_reference_label_end(input: &str, open: usize) -> Option<usize> {
if input.as_bytes().get(open) != Some(&b'[') {
return None;
}
let mut cursor = open + 1;
while cursor < input.len() {
let (next, char) = next_char(input, cursor)?;
match char {
'\\' => {
cursor = next_char(input, next)
.map(|(after_escape, _)| after_escape)
.unwrap_or(next);
continue;
}
'[' => return None,
']' => {
return reference_label_is_within_limit(&input[open + 1..cursor]).then_some(cursor);
}
_ => {}
}
cursor = next;
}
None
}
fn label_contains_link(
label_source: &str,
base_offset: usize,
options: &SyntaxOptions,
definitions: &[String],
) -> bool {
let mut diagnostics = Vec::new();
let inlines = parse_inlines_with_context(
label_source,
base_offset,
options,
definitions,
&mut diagnostics,
InlineContext::default(),
);
contains_link_inline(&inlines)
}
fn contains_link_inline(inlines: &[Inline]) -> bool {
inlines.iter().any(|inline| match inline {
Inline::Link(_) | Inline::LinkReference(_) => true,
Inline::Emphasis(node) => contains_link_inline(&node.children),
Inline::Strong(node) => contains_link_inline(&node.children),
Inline::Delete(node) => contains_link_inline(&node.children),
Inline::TextDirective(node) => contains_link_inline(&node.label),
_ => false,
})
}
fn find_link_label_end(input: &str, open: usize) -> Option<usize> {
if input.as_bytes().get(open) != Some(&b'[') {
return None;
}
let mut depth = 1usize;
let mut cursor = open + 1;
while cursor < input.len() {
let (next, char) = next_char(input, cursor)?;
match char {
'\\' => {
cursor = next_char(input, next)
.map(|(after_escape, _)| after_escape)
.unwrap_or(next);
continue;
}
'`' => {
if let Some((end, _)) = parse_code_span(input, cursor) {
cursor = end;
continue;
}
}
'<' => {
if let Some(end) = parse_autolink_end(input, cursor) {
let raw = &input[cursor..end];
if is_autolink(raw) {
cursor = end;
continue;
}
}
if let Some((end, _)) = parse_html_inline(input, cursor) {
cursor = end;
continue;
}
}
'[' => depth += 1,
']' => {
depth = depth.checked_sub(1)?;
if depth == 0 {
return Some(cursor);
}
}
_ => {}
}
cursor = next;
}
None
}
fn parse_text_directive(
input: &str,
index: usize,
base_offset: usize,
options: &SyntaxOptions,
definitions: &[String],
diagnostics: &mut Vec<Diagnostic>,
) -> Option<(usize, Inline)> {
if input[index..].starts_with("::") {
return None;
}
if index > 0 {
let previous = input[..index].chars().next_back()?;
if !previous.is_whitespace() && !matches!(previous, '(' | '[' | '{') {
return None;
}
}
let opener_source = &input[index + 1..];
let (name, label_source, attributes, consumed) = match parse_directive_opener(opener_source) {
Some(opener) => opener,
None => {
if directive_opener_looks_malformed(opener_source) {
diagnostics.push(Diagnostic::new(
DiagnosticSeverity::Error,
DiagnosticCode::InvalidDirectiveName,
Span::new(base_offset + index, base_offset + input.len()),
"text directive opener is malformed",
));
}
return None;
}
};
let label = label_source
.map(|source| {
parse_inlines(
source,
base_offset + index + 1 + name.len() + 1,
options,
definitions,
diagnostics,
)
})
.unwrap_or_default();
Some((
index + 1 + consumed,
Inline::TextDirective(TextDirective {
meta: NodeMeta::new(Some(Span::new(
base_offset + index,
base_offset + index + 1 + consumed,
))),
name,
label,
attributes,
}),
))
}
fn parse_directive_opener(
input: &str,
) -> Option<(String, Option<&str>, Vec<DirectiveAttribute>, usize)> {
let mut index = 0;
while let Some((next, char)) = next_char(input, index) {
if char.is_ascii_alphanumeric() || char == '_' || char == '-' {
index = next;
} else {
break;
}
}
let name = &input[..index];
if !is_directive_name(name) {
return None;
}
let mut label = None;
let mut attributes = Vec::new();
let mut consumed = index;
if input.as_bytes().get(consumed) == Some(&b'[') {
let close = find_link_label_end(input, consumed)?;
label = Some(&input[consumed + 1..close]);
consumed = close + 1;
}
if input.as_bytes().get(consumed) == Some(&b'{') {
let close = find_directive_attributes_close(input, consumed)?;
attributes = parse_attributes(&input[consumed + 1..close]);
consumed = close + 1;
}
Some((name.into(), label, attributes, consumed))
}
fn directive_opener_looks_malformed(input: &str) -> bool {
let mut index = 0;
while let Some((next, char)) = next_char(input, index) {
if char.is_ascii_alphanumeric() || char == '_' || char == '-' {
index = next;
} else {
break;
}
}
index > 0
&& is_directive_name(&input[..index])
&& matches!(input.as_bytes().get(index), Some(b'[' | b'{'))
}
fn find_directive_attributes_close(input: &str, open: usize) -> Option<usize> {
if input.as_bytes().get(open) != Some(&b'{') {
return None;
}
let bytes = input.as_bytes();
let mut cursor = open + 1;
let mut quote = None;
let mut escaped = false;
while cursor < input.len() {
let byte = bytes[cursor];
if escaped {
escaped = false;
cursor += 1;
continue;
}
if byte == b'\\' {
escaped = true;
cursor += 1;
continue;
}
if let Some(delimiter) = quote {
if byte == delimiter {
quote = None;
}
cursor += 1;
continue;
}
match byte {
b'"' | b'\'' => quote = Some(byte),
b'}' => return Some(cursor),
_ => {}
}
cursor += 1;
}
None
}
fn parse_attributes(input: &str) -> Vec<DirectiveAttribute> {
let mut attributes = Vec::new();
let mut cursor = 0;
while cursor < input.len() {
cursor = skip_spaces(input, cursor);
if cursor >= input.len() {
break;
}
if input.as_bytes().get(cursor) == Some(&b'#') {
let (id, next) = parse_attribute_token(input, cursor + 1);
if !id.is_empty() {
attributes.push(DirectiveAttribute {
name: "id".into(),
value: Some(id.into()),
});
}
cursor = next;
continue;
}
if input.as_bytes().get(cursor) == Some(&b'.') {
let (class, next) = parse_attribute_token(input, cursor + 1);
if !class.is_empty() {
attributes.push(DirectiveAttribute {
name: "class".into(),
value: Some(class.into()),
});
}
cursor = next;
continue;
}
let (name, next) = parse_attribute_name(input, cursor);
if name.is_empty() {
break;
}
cursor = skip_spaces(input, next);
if input.as_bytes().get(cursor) == Some(&b'=') {
cursor = skip_spaces(input, cursor + 1);
if let Some((value, next)) = parse_attribute_value(input, cursor) {
attributes.push(DirectiveAttribute {
name: name.into(),
value: Some(value),
});
cursor = next;
} else {
attributes.push(DirectiveAttribute {
name: name.into(),
value: Some(String::new()),
});
}
} else {
attributes.push(DirectiveAttribute {
name: name.into(),
value: None,
});
}
}
attributes
}
fn parse_attribute_token(input: &str, index: usize) -> (&str, usize) {
let mut cursor = index;
while let Some((next, char)) = next_char(input, cursor) {
if char.is_whitespace() {
break;
}
cursor = next;
}
(&input[index..cursor], cursor)
}
fn parse_attribute_name(input: &str, index: usize) -> (&str, usize) {
let mut cursor = index;
while let Some((next, char)) = next_char(input, cursor) {
if char.is_whitespace() || char == '=' {
break;
}
cursor = next;
}
(&input[index..cursor], cursor)
}
fn parse_attribute_value(input: &str, index: usize) -> Option<(String, usize)> {
let quote = input.as_bytes().get(index).copied();
if matches!(quote, Some(b'"' | b'\'')) {
let quote = quote?;
let mut cursor = index + 1;
while cursor < input.len() {
let (next, char) = next_char(input, cursor)?;
if char as u8 == quote && !is_escaped_at(input, cursor) {
return Some((unescape_ascii_punctuation(&input[index + 1..cursor]), next));
}
cursor = next;
}
return None;
}
let (value, next) = parse_attribute_token(input, index);
Some((
unescape_selected(value, |char| matches!(char, '\\' | '&')),
next,
))
}
struct CodeSpanSource {
value: String,
raw: String,
fence_length: usize,
}
fn parse_code_span(input: &str, index: usize) -> Option<(usize, CodeSpanSource)> {
let len = input[index..]
.as_bytes()
.iter()
.take_while(|byte| **byte == b'`')
.count();
let search_start = index + len;
let close = find_code_span_close(input, search_start, len)?;
let raw = &input[search_start..close];
Some((
close + len,
CodeSpanSource {
value: normalize_code_span(raw),
raw: raw.into(),
fence_length: len,
},
))
}
fn find_code_span_close(input: &str, start: usize, marker_len: usize) -> Option<usize> {
let bytes = input.as_bytes();
let mut cursor = start;
while cursor < bytes.len() {
if bytes[cursor] != b'`' {
cursor = next_char(input, cursor)
.map(|(next, _)| next)
.unwrap_or(bytes.len());
continue;
}
let run_len = bytes[cursor..]
.iter()
.take_while(|byte| **byte == b'`')
.count();
if run_len == marker_len {
return Some(cursor);
}
cursor += run_len;
}
None
}
fn normalize_code_span(input: &str) -> String {
let mut normalized = String::new();
let mut cursor = 0;
while cursor < input.len() {
let (next, char) = next_char(input, cursor).expect("valid UTF-8 byte index");
if char == '\r' {
if input.as_bytes().get(next) == Some(&b'\n') {
cursor = next + 1;
} else {
cursor = next;
}
normalized.push(' ');
continue;
}
if char == '\n' {
normalized.push(' ');
cursor = next;
continue;
}
normalized.push(char);
cursor = next;
}
if normalized.starts_with(' ')
&& normalized.ends_with(' ')
&& normalized.chars().any(|char| char != ' ')
{
normalized[1..normalized.len() - 1].into()
} else {
normalized
}
}
fn can_open_delimited(input: &str, index: usize, marker_len: usize) -> bool {
delimiter_flanking(input, index, marker_len).left
}
fn can_close_delimited(input: &str, index: usize, marker_len: usize) -> bool {
delimiter_flanking(input, index, marker_len).right
}
fn find_closing_delimiter(
input: &str,
start: usize,
marker: &str,
underscore: bool,
) -> Option<usize> {
let marker_len = marker.len();
let mut cursor = start;
let mut nested = 0usize;
while cursor <= input.len() {
let candidate = input[cursor..].find(marker).map(|offset| cursor + offset)?;
if is_escaped_at(input, candidate) {
cursor = candidate + marker_len;
continue;
}
if delimiter_candidate_precedes_link_close(input, start, candidate, marker_len) {
cursor = candidate + marker_len;
continue;
}
if marker_len == 1 && nested == 0 && starts_longer_delimiter_run(input, candidate, marker) {
cursor = candidate + delimiter_run_len(input, candidate, marker);
continue;
}
let can_open = if underscore {
can_open_underscore(input, candidate, marker_len)
} else {
can_open_delimited(input, candidate, marker_len)
};
let can_close = if underscore {
can_close_underscore(input, candidate, marker_len)
} else {
can_close_delimited(input, candidate, marker_len)
};
if can_close {
if nested == 0 {
return Some(candidate);
}
nested -= 1;
cursor = candidate + marker_len;
continue;
}
if can_open {
nested += 1;
}
cursor = candidate + marker_len;
}
None
}
fn find_single_tilde_delete_close(input: &str, start: usize) -> Option<usize> {
let mut cursor = start;
while cursor < input.len() {
let Some(candidate) = input[cursor..].find('~').map(|index| cursor + index) else {
break;
};
if !is_escaped_at(input, candidate) && single_tilde_can_close_delete(input, candidate) {
return Some(candidate);
}
cursor = candidate + 1;
}
None
}
fn single_tilde_can_open_delete(input: &str, index: usize) -> bool {
starts_exact_byte_run(input, index, b'~', 1)
&& can_open_delimited(input, index, 1)
&& !tilde_is_alphanumeric_interior(input, index)
}
fn single_tilde_can_close_delete(input: &str, index: usize) -> bool {
starts_exact_byte_run(input, index, b'~', 1)
&& can_close_delimited(input, index, 1)
&& !tilde_is_alphanumeric_interior(input, index)
}
fn single_tilde_delete_takes_precedence(
options: &SyntaxOptions,
input: &str,
index: usize,
) -> bool {
options.constructs.gfm_strikethrough
&& options.parse.single_tilde_strikethrough
&& single_tilde_can_open_delete(input, index)
&& find_single_tilde_delete_close(input, index + 1).is_some()
}
fn tilde_is_alphanumeric_interior(input: &str, index: usize) -> bool {
let previous = input[..index].chars().next_back();
let next = input[index + 1..].chars().next();
previous.is_some_and(|char| char.is_alphanumeric())
&& next.is_some_and(|char| char.is_alphanumeric())
}
fn starts_exact_byte_run(input: &str, index: usize, marker: u8, len: usize) -> bool {
input.as_bytes().get(index) == Some(&marker)
&& delimiter_byte_run_start(input, index, marker) == index
&& delimiter_byte_run_len(input, index, marker) == len
}
fn delimiter_byte_run_start(input: &str, index: usize, marker: u8) -> usize {
let bytes = input.as_bytes();
let mut start = index;
while start > 0 && bytes[start - 1] == marker && !is_escaped_at(input, start - 1) {
start -= 1;
}
start
}
fn delimiter_byte_run_len(input: &str, index: usize, marker: u8) -> usize {
let bytes = input.as_bytes();
let mut cursor = index;
while bytes.get(cursor) == Some(&marker) {
cursor += 1;
}
cursor - index
}
fn find_simple_inline_close(input: &str, start: usize, marker: u8) -> Option<usize> {
let bytes = input.as_bytes();
let mut cursor = start;
while cursor < input.len() {
match bytes[cursor] {
b'\\' => {
cursor += 1;
if cursor < input.len() {
cursor = next_char(input, cursor)?.0;
}
}
b'\n' | b'\r' => return None,
byte if byte == marker => return (cursor > start).then_some(cursor),
_ => cursor = next_char(input, cursor)?.0,
}
}
None
}
fn find_spoiler_close(input: &str, start: usize) -> Option<usize> {
let bytes = input.as_bytes();
let mut cursor = start;
while cursor + 1 < input.len() {
match bytes[cursor] {
b'\\' => {
cursor += 1;
if cursor < input.len() {
cursor = next_char(input, cursor)?.0;
}
}
b'\n' | b'\r' => return None,
b'|' if bytes.get(cursor + 1) == Some(&b'|')
&& cursor > start
&& bytes.get(cursor.wrapping_sub(1)) != Some(&b'|') =>
{
return Some(cursor);
}
_ => cursor = next_char(input, cursor)?.0,
}
}
None
}
fn starts_longer_delimiter_run(input: &str, index: usize, marker: &str) -> bool {
input[index..].starts_with(marker)
&& !input[..index].ends_with(marker)
&& input[index + marker.len()..].starts_with(marker)
}
fn delimiter_run_len(input: &str, index: usize, marker: &str) -> usize {
let mut cursor = index;
while input[cursor..].starts_with(marker) {
cursor += marker.len();
}
cursor - index
}
fn delimiter_candidate_precedes_link_close(
input: &str,
start: usize,
candidate: usize,
marker_len: usize,
) -> bool {
let bytes = input.as_bytes();
if bytes.get(candidate + marker_len) != Some(&b']') {
return false;
}
if !matches!(bytes.get(candidate + marker_len + 1), Some(b'(' | b'[')) {
return false;
}
let mut depth = 0usize;
let mut cursor = start;
while cursor < candidate {
let Some((next, char)) = next_char(input, cursor) else {
break;
};
match char {
'\\' => {
cursor = next_char(input, next)
.map(|(after_escape, _)| after_escape)
.unwrap_or(next);
continue;
}
'`' => {
if let Some((end, _)) = parse_code_span(input, cursor) {
cursor = end;
continue;
}
}
'[' => depth += 1,
']' => depth = depth.saturating_sub(1),
_ => {}
}
cursor = next;
}
depth > 0
}
fn can_open_underscore(input: &str, index: usize, marker_len: usize) -> bool {
let flanking = delimiter_flanking(input, index, marker_len);
flanking.left
&& (!flanking.right || flanking.previous.is_some_and(|c| c.is_ascii_punctuation()))
}
fn can_close_underscore(input: &str, index: usize, marker_len: usize) -> bool {
let flanking = delimiter_flanking(input, index, marker_len);
flanking.right && (!flanking.left || flanking.next.is_some_and(|c| c.is_ascii_punctuation()))
}
#[derive(Clone, Copy)]
struct DelimiterFlanking {
left: bool,
right: bool,
previous: Option<char>,
next: Option<char>,
}
fn delimiter_flanking(input: &str, index: usize, marker_len: usize) -> DelimiterFlanking {
let previous = input[..index].chars().next_back();
let next = input[index + marker_len..].chars().next();
let previous_whitespace = previous.is_none_or(char::is_whitespace);
let next_whitespace = next.is_none_or(char::is_whitespace);
let previous_punctuation = previous.is_some_and(is_flanking_punctuation);
let next_punctuation = next.is_some_and(is_flanking_punctuation);
let left = next.is_some()
&& !next_whitespace
&& !(next_punctuation && !previous_whitespace && !previous_punctuation);
let right = previous.is_some()
&& !previous_whitespace
&& !(previous_punctuation && !next_whitespace && !next_punctuation);
DelimiterFlanking {
left,
right,
previous,
next,
}
}
fn parse_math_inline(input: &str, index: usize) -> Option<(usize, String, MathInlineKind)> {
if let Some((end, value)) = parse_math_code_inline(input, index) {
return Some((end, value, MathInlineKind::Code));
}
let bytes = input.as_bytes();
let open_dollars = bytes[index..]
.iter()
.take_while(|byte| **byte == b'$')
.count();
if open_dollars == 0 || open_dollars > 2 {
return None;
}
let content_start = index + open_dollars;
let close = scan_to_closing_dollar(input, content_start, open_dollars)?;
let content_end = close - open_dollars;
if content_end <= content_start {
return None;
}
let raw = &input[content_start..content_end];
let value = if open_dollars == 1 {
normalize_math_text(raw)
} else {
raw.into()
};
let dollars = u8::try_from(open_dollars).unwrap_or(u8::MAX);
Some((close, value, MathInlineKind::Dollar { dollars }))
}
fn scan_to_closing_dollar(input: &str, start: usize, open_dollars: usize) -> Option<usize> {
let bytes = input.as_bytes();
if open_dollars == 1 && bytes.get(start).is_some_and(|byte| is_math_space(*byte)) {
return None;
}
let mut cursor = start;
loop {
while cursor < bytes.len() && bytes[cursor] != b'$' {
cursor += 1;
}
if cursor >= bytes.len() {
return None;
}
let prev = bytes[cursor - 1];
if open_dollars == 1 && is_math_space(prev) {
return None;
}
if open_dollars == 1 && prev == b'\\' {
cursor += 1;
continue;
}
let run = bytes[cursor..]
.iter()
.take(open_dollars)
.take_while(|byte| **byte == b'$')
.count();
if open_dollars == 1 && bytes.get(cursor + run).is_some_and(u8::is_ascii_digit) {
return None;
}
if run == open_dollars {
return Some(cursor + run);
}
cursor += run;
}
}
fn is_math_space(byte: u8) -> bool {
matches!(byte, b'\t' | b'\n' | b'\r' | b' ')
}
fn normalize_math_text(input: &str) -> String {
let mut normalized = String::new();
let mut cursor = 0;
while cursor < input.len() {
let (next, char) = next_char(input, cursor).expect("valid UTF-8 byte index");
if char == '\r' {
if input.as_bytes().get(next) == Some(&b'\n') {
cursor = next + 1;
} else {
cursor = next;
}
normalized.push(' ');
continue;
}
if char == '\n' {
normalized.push(' ');
cursor = next;
continue;
}
normalized.push(char);
cursor = next;
}
if normalized.starts_with(' ')
&& normalized.ends_with(' ')
&& normalized.chars().any(|char| char != ' ')
{
normalized[1..normalized.len() - 1].into()
} else {
normalized
}
}
fn parse_math_code_inline(input: &str, index: usize) -> Option<(usize, String)> {
if !input[index..].starts_with("$`") {
return None;
}
let search_start = index + 2;
let close = input[search_start..]
.find("`$")
.map(|offset| search_start + offset)?;
if close == search_start {
return None;
}
Some((close + 2, input[search_start..close].into()))
}
fn parse_link_resource(input: &str, open: usize) -> Option<(usize, ParsedLinkResource)> {
let bytes = input.as_bytes();
if bytes.get(open) != Some(&b'(') {
return None;
}
let (mut cursor, initial_space) = skip_link_resource_space_with_info(input, open + 1)?;
if bytes.get(cursor) == Some(&b')') {
return Some((
cursor + 1,
ParsedLinkResource {
destination: String::new(),
destination_kind: LinkDestinationKind::Omitted,
title: None,
title_kind: None,
},
));
}
if initial_space && matches!(bytes.get(cursor), Some(b'"' | b'\'' | b'(')) {
let (title, title_kind, next) = parse_link_title(input, cursor)?;
cursor = skip_link_resource_space(input, next)?;
if bytes.get(cursor) == Some(&b')') {
return Some((
cursor + 1,
ParsedLinkResource {
destination: String::new(),
destination_kind: LinkDestinationKind::Omitted,
title: Some(title),
title_kind: Some(title_kind),
},
));
}
return None;
}
let (destination, destination_kind, next) = parse_link_destination(input, cursor)?;
let (after_destination, had_space) = skip_link_resource_space_with_info(input, next)?;
cursor = after_destination;
if bytes.get(cursor) == Some(&b')') {
return Some((
cursor + 1,
ParsedLinkResource {
destination,
destination_kind,
title: None,
title_kind: None,
},
));
}
if !had_space {
return None;
}
let (title, title_kind, next) = parse_link_title(input, cursor)?;
cursor = skip_link_resource_space(input, next)?;
if bytes.get(cursor) == Some(&b')') {
Some((
cursor + 1,
ParsedLinkResource {
destination,
destination_kind,
title: Some(title),
title_kind: Some(title_kind),
},
))
} else {
None
}
}
fn parse_link_destination(
input: &str,
index: usize,
) -> Option<(String, LinkDestinationKind, usize)> {
if input.as_bytes().get(index) == Some(&b'<') {
let mut cursor = index + 1;
while cursor < input.len() {
let (next, char) = next_char(input, cursor)?;
if char == '>' && !is_escaped_at(input, cursor) {
return Some((
unescape_ascii_punctuation(&input[index + 1..cursor]),
LinkDestinationKind::Angle,
next,
));
}
if (char == '<' && !is_escaped_at(input, cursor)) || char == '\n' || char == '\r' {
return None;
}
cursor = next;
}
return None;
}
let mut cursor = index;
let mut depth = 0usize;
while cursor < input.len() {
let (next, char) = next_char(input, cursor)?;
if (char == ' ' || char.is_ascii_control()) && depth == 0 {
break;
}
if char == '(' && !is_escaped_at(input, cursor) {
depth += 1;
if depth > 32 {
return None;
}
} else if char == ')' && !is_escaped_at(input, cursor) {
if depth == 0 {
break;
}
depth -= 1;
}
cursor = next;
}
if cursor == index || depth > 0 {
None
} else {
Some((
unescape_ascii_punctuation(&input[index..cursor]),
LinkDestinationKind::Bare,
cursor,
))
}
}
fn parse_link_title(input: &str, index: usize) -> Option<(String, LinkTitleKind, usize)> {
let opener = input.as_bytes().get(index).copied()?;
let (closer, title_kind) = match opener {
b'"' => ('"', LinkTitleKind::DoubleQuote),
b'\'' => ('\'', LinkTitleKind::SingleQuote),
b'(' => (')', LinkTitleKind::Paren),
_ => return None,
};
let mut cursor = index + 1;
while cursor < input.len() {
let (next, char) = next_char(input, cursor)?;
if char == closer && !is_escaped_at(input, cursor) {
if contains_blank_line(&input[index + 1..cursor]) {
return None;
}
return Some((
unescape_ascii_punctuation(&input[index + 1..cursor]),
title_kind,
next,
));
}
if opener == b'(' && char == '(' && !is_escaped_at(input, cursor) {
return None;
}
cursor = next;
}
None
}
fn contains_blank_line(input: &str) -> bool {
if !input.bytes().any(|byte| matches!(byte, b'\n' | b'\r')) {
return false;
}
let lines = collect_lines(input, 0);
let interior = lines.len().saturating_sub(1);
lines
.iter()
.take(interior)
.skip(1)
.any(|line| line.text.trim().is_empty())
}
fn skip_link_resource_space(input: &str, index: usize) -> Option<usize> {
skip_link_resource_space_with_info(input, index).map(|(index, _)| index)
}
fn skip_link_resource_space_with_info(input: &str, mut index: usize) -> Option<(usize, bool)> {
let mut line_breaks = 0usize;
let mut had_space = false;
while input
.as_bytes()
.get(index)
.is_some_and(|byte| matches!(*byte, b' ' | b'\t' | b'\n' | b'\r'))
{
had_space = true;
match input.as_bytes()[index] {
b'\n' => {
line_breaks += 1;
if line_breaks > 1 {
return None;
}
index += 1;
}
b'\r' => {
line_breaks += 1;
if line_breaks > 1 {
return None;
}
if input.as_bytes().get(index + 1) == Some(&b'\n') {
index += 2;
} else {
index += 1;
}
}
_ => index += 1,
}
}
Some((index, had_space))
}
pub(crate) fn parse_character_reference(input: &str, index: usize) -> Option<(usize, String)> {
let rest = input.get(index..)?;
if let Some(rest) = rest
.strip_prefix("&#x")
.or_else(|| rest.strip_prefix("&#X"))
{
let digits = rest.find(';')?;
if digits == 0 || digits > 6 || !rest[..digits].bytes().all(|byte| byte.is_ascii_hexdigit())
{
return None;
}
let value = u32::from_str_radix(&rest[..digits], 16).ok()?;
return Some((
index + 3 + digits + 1,
character_reference_value(value).into(),
));
}
if let Some(rest) = rest.strip_prefix("&#") {
let digits = rest.find(';')?;
if digits == 0 || digits > 7 || !rest[..digits].bytes().all(|byte| byte.is_ascii_digit()) {
return None;
}
let value = rest[..digits].parse::<u32>().ok()?;
return Some((
index + 2 + digits + 1,
character_reference_value(value).into(),
));
}
let name_end = rest.find(';')?;
if name_end == 0 || name_end > 32 {
return None;
}
let name = &rest[1..name_end];
named_character_reference(name).map(|value| (index + name_end + 1, value.into()))
}
pub(crate) fn character_reference_value(value: u32) -> char {
if value == 0 {
'\u{FFFD}'
} else {
char::from_u32(value).unwrap_or('\u{FFFD}')
}
}
pub(crate) fn is_escaped_at(input: &str, index: usize) -> bool {
let bytes = input.as_bytes();
let mut cursor = index;
let mut count = 0;
while cursor > 0 && bytes[cursor - 1] == b'\\' {
count += 1;
cursor -= 1;
}
count % 2 == 1
}
fn parse_definition_destination_title(input: &str) -> Option<ParsedLinkResource> {
let (mut cursor, _) = skip_link_resource_space_with_info(input, 0)?;
let (destination, destination_kind, next) = parse_link_destination(input, cursor)?;
cursor = next;
let (next, had_space) = skip_link_resource_space_with_info(input, cursor)?;
cursor = next;
if cursor >= input.len() {
return Some(ParsedLinkResource {
destination,
destination_kind,
title: None,
title_kind: None,
});
}
if !had_space {
return None;
}
let (title, title_kind, next) = parse_link_title(input, cursor)?;
let after_title = skip_link_resource_space(input, next)?;
(after_title == input.len()).then_some(ParsedLinkResource {
destination,
destination_kind,
title: Some(title),
title_kind: Some(title_kind),
})
}
fn line_can_start_definition_title(input: &str) -> bool {
let trimmed = input.trim_start();
matches!(trimmed.as_bytes().first(), Some(b'"' | b'\'' | b'('))
}
fn unescape_ascii_punctuation(input: &str) -> String {
unescape_selected(input, |char| char.is_ascii_punctuation())
}
fn unescape_string(input: &str) -> String {
unescape_selected(input, |char| char.is_ascii_punctuation() || char == '&')
}
fn unescape_selected(input: &str, should_unescape: impl Fn(char) -> bool) -> String {
let mut output = String::new();
let mut cursor = 0;
while cursor < input.len() {
if input.as_bytes().get(cursor) == Some(&b'&') {
if let Some((end, value)) = parse_character_reference(input, cursor) {
output.push_str(&value);
cursor = end;
continue;
}
}
let (next, char) = next_char(input, cursor).expect("valid UTF-8 byte index");
if char == '\\' {
if let Some((after_escape, escaped)) = next_char(input, next) {
if should_unescape(escaped) {
output.push(escaped);
} else {
output.push(char);
output.push(escaped);
}
cursor = after_escape;
} else {
output.push(char);
cursor = next;
}
} else {
output.push(if char == '\0' { '\u{FFFD}' } else { char });
cursor = next;
}
}
output
}
fn push_line(output: &mut String, line: &str) {
if !output.is_empty() {
output.push('\n');
}
output.push_str(line);
}
fn ensure_line_separator(output: &mut String) {
if !output.is_empty() && !ends_with_line_ending(output) {
output.push('\n');
}
}
fn ends_with_line_ending(input: &str) -> bool {
input.ends_with('\n') || input.ends_with('\r')
}
fn flush_text(nodes: &mut Vec<Inline>, text: &mut String, text_start: usize, end: usize) {
if !text.is_empty() {
nodes.push(Inline::Text(Text {
meta: NodeMeta::new(Some(Span::new(text_start, end))),
value: core::mem::take(text),
}));
}
}
fn gfm_link_label_preserves_url_dot_escape(
text: &str,
escaped: char,
options: &SyntaxOptions,
context: InlineContext,
) -> bool {
escaped == '.'
&& !context.allow_links
&& options.constructs.gfm_autolink_literal
&& (text.starts_with("www.") || text.starts_with("http://") || text.starts_with("https://"))
}
fn next_char(input: &str, index: usize) -> Option<(usize, char)> {
let char = input[index..].chars().next()?;
Some((index + char.len_utf8(), char))
}
fn is_flanking_punctuation(value: char) -> bool {
value.is_ascii_punctuation() || crate::unicode_punctuation::is_unicode_punctuation(value)
}
pub(crate) fn normalize_label(label: &str) -> String {
label
.replace('ẞ', "ss")
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
.to_uppercase()
.to_lowercase()
}
fn definition_exists(definitions: &[String], label: &str) -> bool {
if label.is_empty() || !reference_label_is_within_limit(label) {
return false;
}
let identifier = normalize_label(label);
definitions
.iter()
.any(|definition| definition == &identifier)
}
fn reference_label_is_within_limit(label: &str) -> bool {
label.chars().take(REFERENCE_LABEL_MAX_CHARS + 1).count() <= REFERENCE_LABEL_MAX_CHARS
}
fn trim_up_to_three_spaces(input: &str) -> Option<&str> {
let (columns, bytes) = leading_indent(input);
if columns <= 3 {
Some(&input[bytes..])
} else {
None
}
}
fn fence_start(input: &str) -> Option<(FenceMarker, usize)> {
let marker = match input.as_bytes().first()? {
b'`' => FenceMarker::Backtick,
b'~' => FenceMarker::Tilde,
_ => return None,
};
let byte = match marker {
FenceMarker::Backtick => b'`',
FenceMarker::Tilde => b'~',
};
let length = input
.as_bytes()
.iter()
.take_while(|item| **item == byte)
.count();
if length >= 3 {
Some((marker, length))
} else {
None
}
}
fn fence_close(input: &str, marker: FenceMarker, length: usize) -> bool {
let byte = match marker {
FenceMarker::Backtick => b'`',
FenceMarker::Tilde => b'~',
};
let count = input
.as_bytes()
.iter()
.take_while(|item| **item == byte)
.count();
count >= length && input[count..].trim().is_empty()
}
fn trim_closing_hashes(input: &str) -> &str {
let input = input.trim_end();
let hash_start = input.trim_end_matches('#').len();
if hash_start == input.len() {
return input;
}
if hash_start == 0 {
return "";
}
let before = &input[..hash_start];
if before.ends_with(' ') || before.ends_with('\t') {
before.trim_end()
} else {
input
}
}
fn list_marker_info(input: &str) -> Option<ListMarkerInfo<'_>> {
let trimmed = trim_up_to_three_spaces(input)?;
let indent = input.len() - trimmed.len();
let bytes = trimmed.as_bytes();
match bytes.first()? {
b'-' | b'*' | b'+' if is_list_padding_byte(bytes.get(1).copied()) => {
let delimiter = match bytes[0] {
b'-' => ListDelimiter::Dash,
b'*' => ListDelimiter::Asterisk,
_ => ListDelimiter::Plus,
};
let (content_offset, content_indent) = list_content_offset(trimmed, 1, indent);
Some(ListMarkerInfo {
ordered: false,
start: None,
delimiter,
indent,
marker_len: 1,
content_indent,
content: &trimmed[content_offset..],
})
}
byte if byte.is_ascii_digit() => {
let mut end = 0;
while bytes.get(end).is_some_and(|byte| byte.is_ascii_digit()) {
end += 1;
}
if end > 9 {
return None;
}
let delimiter = match bytes.get(end)? {
b'.' => ListDelimiter::Period,
b')' => ListDelimiter::Paren,
_ => return None,
};
if !is_list_padding_byte(bytes.get(end + 1).copied()) {
return None;
}
let start = trimmed[..end].parse().ok()?;
let marker_len = end + 1;
let (content_offset, content_indent) = list_content_offset(trimmed, marker_len, indent);
Some(ListMarkerInfo {
ordered: true,
start: Some(start),
delimiter,
indent,
marker_len,
content_indent,
content: &trimmed[content_offset..],
})
}
_ => None,
}
}
fn list_content_offset(input: &str, marker_len: usize, indent: usize) -> (usize, usize) {
let bytes = input.as_bytes();
if bytes.get(marker_len).is_none() {
return (marker_len, indent + marker_len + 1);
}
let mut cursor = marker_len;
let mut column = indent + marker_len;
let marker_end_column = column;
while let Some(byte) = bytes.get(cursor) {
match *byte {
b' ' => column += 1,
b'\t' => column += 4 - (column % 4),
_ => break,
}
cursor += 1;
}
if cursor >= bytes.len() {
return (cursor, marker_end_column + 1);
}
let padding_columns = column.saturating_sub(marker_end_column);
if padding_columns > 0 && padding_columns <= 4 {
(cursor, column)
} else {
(marker_len + 1, marker_end_column + 1)
}
}
fn list_marker_first_content<'a>(input: &'a str, marker: ListMarkerInfo<'a>) -> Cow<'a, str> {
let Some(trimmed) = trim_up_to_three_spaces(input) else {
return Cow::Borrowed(marker.content);
};
let after_marker = &trimmed[marker.marker_len..];
if after_marker.starts_with('\t') {
strip_leading_indent_columns_from(after_marker, 1, marker.indent + marker.marker_len)
} else {
Cow::Borrowed(marker.content)
}
}
fn is_list_padding_byte(byte: Option<u8>) -> bool {
matches!(byte, None | Some(b' ' | b'\t'))
}
fn same_list_marker(left: ListMarkerInfo<'_>, right: ListMarkerInfo<'_>) -> bool {
left.ordered == right.ordered && left.delimiter == right.delimiter
}
fn sibling_list_marker_at_line(
input: &str,
first_marker: ListMarkerInfo<'_>,
content_indent: usize,
) -> bool {
list_marker_info(input).is_some_and(|candidate| {
same_list_marker(first_marker, candidate) && candidate.indent < content_indent
})
}
fn same_list_marker_line(input: &str, first_marker: ListMarkerInfo<'_>) -> bool {
list_marker_info(input).is_some_and(|candidate| same_list_marker(first_marker, candidate))
}
fn next_nonblank_line(lines: &[Line<'_>], mut index: usize) -> usize {
while index < lines.len() && lines[index].text.trim().is_empty() {
index += 1;
}
index
}
fn leading_indent(input: &str) -> (usize, usize) {
let mut column = 0usize;
let mut bytes = 0usize;
for byte in input.as_bytes() {
match *byte {
b' ' => column += 1,
b'\t' => column += 4 - (column % 4),
_ => break,
}
bytes += 1;
}
(column, bytes)
}
fn leading_indent_columns(input: &str) -> usize {
leading_indent(input).0
}
fn strip_leading_indent_columns(input: &str, max_columns: usize) -> Cow<'_, str> {
strip_leading_indent_columns_from(input, max_columns, 0)
}
fn strip_leading_indent_columns_from(
input: &str,
max_columns: usize,
start_column: usize,
) -> Cow<'_, str> {
let mut column = start_column;
let target_column = start_column + max_columns;
for (index, byte) in input.as_bytes().iter().enumerate() {
let next = match *byte {
b' ' => column + 1,
b'\t' => column + (4 - (column % 4)),
_ => return Cow::Borrowed(&input[index..]),
};
if next > target_column {
if *byte == b'\t' && column < target_column {
let residual = next - target_column;
let mut owned = String::with_capacity(residual + input.len() - (index + 1));
for _ in 0..residual {
owned.push(' ');
}
let mut rest_column = next;
let mut rest_index = index + 1;
while let Some(rest_byte) = input.as_bytes().get(rest_index) {
match *rest_byte {
b' ' => {
owned.push(' ');
rest_column += 1;
rest_index += 1;
}
b'\t' => {
let width = 4 - (rest_column % 4);
for _ in 0..width {
owned.push(' ');
}
rest_column += width;
rest_index += 1;
}
_ => break,
}
}
owned.push_str(&input[rest_index..]);
return Cow::Owned(owned);
}
return Cow::Borrowed(&input[index..]);
}
column = next;
}
Cow::Borrowed("")
}
fn strip_list_continuation(input: &str, content_indent: usize, list_indent: usize) -> Cow<'_, str> {
let (indent_columns, indent_bytes) = leading_indent(input);
if indent_columns >= content_indent {
strip_leading_indent_columns(input, content_indent)
} else if indent_columns > list_indent {
Cow::Borrowed(&input[indent_bytes..])
} else {
Cow::Borrowed(trim_ascii_start(input))
}
}
fn take_task_marker_from_children(children: &mut [Block]) -> Option<bool> {
let Some(Block::Paragraph(paragraph)) = children.first_mut() else {
return None;
};
take_task_marker_from_inlines(&mut paragraph.children)
}
fn take_task_marker_from_inlines(inlines: &mut Vec<Inline>) -> Option<bool> {
let Some(Inline::Text(text)) = inlines.first() else {
return None;
};
let first = text.value.clone();
if let Some((checked, consumed)) = task_marker_inline_prefix(&first) {
if !first[consumed..].is_empty() || inlines_have_content_after(inlines, 1) {
remove_text_prefix(inlines, consumed);
return Some(checked);
}
}
if let Some(checked) = task_marker_at_text_end(&first) {
if inlines
.get(1)
.is_some_and(|inline| matches!(inline, Inline::SoftBreak(_)))
&& inlines_have_content_after(inlines, 2)
{
inlines.remove(1);
inlines.remove(0);
return Some(checked);
}
}
if task_marker_split_open(&first)
&& inlines
.get(1)
.is_some_and(|inline| matches!(inline, Inline::SoftBreak(_)))
{
let Some(Inline::Text(next)) = inlines.get(2) else {
return None;
};
if let Some((checked, consumed)) = task_marker_split_close_prefix(&next.value) {
if !next.value[consumed..].is_empty() || inlines_have_content_after(inlines, 3) {
inlines.remove(1);
inlines.remove(0);
remove_text_prefix(inlines, consumed);
return Some(checked);
}
}
}
None
}
fn task_marker_inline_prefix(input: &str) -> Option<(bool, usize)> {
let start = leading_trim_bytes(input);
let rest = &input[start..];
let checked = task_marker_checked(rest)?;
let after_marker = start + 3;
match input.as_bytes().get(after_marker) {
Some(b' ' | b'\t') => Some((checked, after_marker + 1)),
_ => None,
}
}
fn task_marker_at_text_end(input: &str) -> Option<bool> {
let start = leading_trim_bytes(input);
let rest = &input[start..];
let checked = task_marker_checked(rest)?;
if rest.len() == 3 {
Some(checked)
} else {
None
}
}
fn task_marker_split_open(input: &str) -> bool {
let start = leading_trim_bytes(input);
input[start..] == *"["
}
fn task_marker_split_close_prefix(input: &str) -> Option<(bool, usize)> {
match input.as_bytes().get(..2)? {
b"] " => Some((false, 2)),
b"]\t" => Some((false, 2)),
b"x]" | b"X]" if matches!(input.as_bytes().get(2), Some(b' ' | b'\t')) => Some((true, 3)),
_ => None,
}
}
fn task_marker_checked(input: &str) -> Option<bool> {
if input.starts_with("[ ]") {
Some(false)
} else if input.starts_with("[x]") || input.starts_with("[X]") {
Some(true)
} else {
None
}
}
fn remove_text_prefix(inlines: &mut Vec<Inline>, consumed: usize) {
if let Some(Inline::Text(text)) = inlines.first_mut() {
text.value = text.value[consumed..].into();
if text.value.is_empty() {
inlines.remove(0);
}
}
}
fn inlines_have_content_after(inlines: &[Inline], start: usize) -> bool {
inlines.iter().skip(start).any(|inline| match inline {
Inline::Text(text) => !text.value.is_empty(),
Inline::SoftBreak(_) | Inline::LineBreak(_) => false,
_ => true,
})
}
fn update_list_item_fence(line: &str, open_fence: &mut Option<(FenceMarker, usize)>) {
let Some(trimmed) = trim_up_to_three_spaces(line) else {
return;
};
if let Some((marker, length)) = *open_fence {
if fence_close(trimmed, marker, length) {
*open_fence = None;
}
return;
}
if let Some((marker, length)) = fence_start(trimmed) {
*open_fence = Some((marker, length));
}
}
fn trim_ascii_start(input: &str) -> &str {
input.trim_start_matches(|char| matches!(char, ' ' | '\t'))
}
fn leading_trim_bytes(input: &str) -> usize {
input.len() - trim_ascii_start(input).len()
}
fn parse_table_delimiter(input: &str, spoiler: bool) -> Option<Vec<TableAlignment>> {
let cells = split_table_row(input, spoiler);
if cells.is_empty() {
return None;
}
let mut alignments = Vec::new();
for cell in cells {
alignments.push(table_delimiter_alignment(cell.trim())?);
}
Some(alignments)
}
fn table_delimiter_alignment(cell: &str) -> Option<TableAlignment> {
let bytes = cell.as_bytes();
let mut cursor = 0;
let left = bytes.first() == Some(&b':');
if left {
cursor += 1;
}
let dash_start = cursor;
while bytes.get(cursor) == Some(&b'-') {
cursor += 1;
}
if cursor == dash_start {
return None;
}
let right = bytes.get(cursor) == Some(&b':');
if right {
cursor += 1;
}
if cursor != bytes.len() {
return None;
}
Some(match (left, right) {
(true, true) => TableAlignment::Center,
(true, false) => TableAlignment::Left,
(false, true) => TableAlignment::Right,
(false, false) => TableAlignment::None,
})
}
fn table_indent_line(input: &str, indented_code: bool) -> Option<&str> {
if indented_code {
trim_up_to_three_spaces(input)
} else {
Some(input)
}
}
fn backtick_run_has_close(input: &str, start: usize, length: usize) -> bool {
let bytes = input.as_bytes();
let mut i = start + length;
while i < input.len() {
if bytes[i] == b'`' {
let run = input[i..]
.as_bytes()
.iter()
.take_while(|byte| **byte == b'`')
.count();
if run == length {
return true;
}
i += run;
} else {
i += 1;
}
}
false
}
fn table_backslash_pipe_run(input: &str, cursor: usize) -> Option<(usize, bool)> {
let bytes = input.as_bytes();
if bytes.get(cursor) != Some(&b'\\') {
return None;
}
let mut pipe = cursor;
while bytes.get(pipe) == Some(&b'\\') {
pipe += 1;
}
(bytes.get(pipe) == Some(&b'|')).then_some((pipe, (pipe - cursor) % 2 == 1))
}
fn split_table_row(input: &str, spoiler: bool) -> Vec<String> {
let trimmed = input.trim();
let mut cells = Vec::new();
let mut cell = String::new();
let mut cursor = 0;
let mut code_fence = None;
let mut spoiler_open = false;
let mut trailing_delimiter_end = None;
while cursor < trimmed.len() {
let (next, char) = next_char(trimmed, cursor).expect("valid UTF-8 byte index");
if char == '\\' {
if let Some((pipe, escaped)) = table_backslash_pipe_run(trimmed, cursor) {
if escaped {
for _ in 0..pipe - cursor - 1 {
cell.push('\\');
}
cell.push('|');
cursor = pipe + 1;
} else {
for _ in 0..pipe - cursor {
cell.push('\\');
}
cursor = pipe;
}
continue;
}
}
if char == '`' {
let length = trimmed[cursor..]
.as_bytes()
.iter()
.take_while(|byte| **byte == b'`')
.count();
if code_fence == Some(length) {
code_fence = None;
} else if code_fence.is_none() && backtick_run_has_close(trimmed, cursor, length) {
code_fence = Some(length);
}
cell.push_str(&trimmed[cursor..cursor + length]);
cursor += length;
continue;
}
if spoiler
&& char == '|'
&& trimmed.as_bytes().get(cursor + 1) == Some(&b'|')
&& code_fence.is_some()
{
cell.push_str("||");
cursor += 2;
continue;
}
if spoiler
&& char == '|'
&& trimmed.as_bytes().get(cursor + 1) == Some(&b'|')
&& code_fence.is_none()
&& !is_escaped_at(trimmed, cursor)
{
let closes_spoiler =
spoiler_open && trimmed.as_bytes().get(cursor.wrapping_sub(1)) != Some(&b'|');
let opens_spoiler = !spoiler_open
&& trimmed.as_bytes().get(cursor + 2) != Some(&b'|')
&& find_spoiler_close(trimmed, cursor + 2).is_some();
if closes_spoiler || opens_spoiler {
spoiler_open = opens_spoiler;
cell.push_str("||");
cursor += 2;
continue;
}
}
if char == '|' && !spoiler_open && !is_escaped_at(trimmed, cursor) {
cells.push(core::mem::take(&mut cell));
spoiler_open = false;
trailing_delimiter_end = Some(next);
} else {
cell.push(char);
}
cursor = next;
}
cells.push(cell);
if trimmed.starts_with('|') {
cells.remove(0);
}
if let Some(end) = trailing_delimiter_end {
if trimmed[end..].trim().is_empty() {
cells.pop();
}
}
cells
}
fn table_can_start(lines: &[Line<'_>], index: usize, options: &SyntaxOptions) -> bool {
if !options.constructs.gfm_table || index + 1 >= lines.len() {
return false;
}
table_can_start_source(
lines[index].text,
lines[index + 1].text,
options.constructs.indented_code,
options.constructs.spoiler,
)
}
pub(crate) fn gfm_table_can_start_source(header: &str, delimiter: &str) -> bool {
table_can_start_source(header, delimiter, true, false)
}
fn table_can_start_source(
header: &str,
delimiter: &str,
indented_code: bool,
spoiler: bool,
) -> bool {
let Some(delimiter) = table_indent_line(delimiter, indented_code) else {
return false;
};
if list_marker_info(delimiter).is_some() {
return false;
}
if !table_has_separator(header, delimiter, spoiler) {
return false;
}
let Some(alignments) = parse_table_delimiter(delimiter, spoiler) else {
return false;
};
split_table_row(header, spoiler).len() == alignments.len()
}
fn table_has_separator(header: &str, delimiter: &str, spoiler: bool) -> bool {
let Some(alignments) = parse_table_delimiter(delimiter, spoiler) else {
return true;
};
if alignments.len() == 1 {
return contains_unescaped_pipe(header, spoiler)
|| contains_unescaped_pipe(delimiter, spoiler)
|| delimiter.contains(':');
}
true
}
fn contains_unescaped_pipe(input: &str, spoiler: bool) -> bool {
let mut cursor = 0;
let mut code_fence = None;
let mut spoiler_open = false;
while cursor < input.len() {
let (next, char) = next_char(input, cursor).expect("valid UTF-8 byte index");
if char == '\\' {
if let Some((pipe, escaped)) = table_backslash_pipe_run(input, cursor) {
cursor = if escaped { pipe + 1 } else { pipe };
continue;
}
}
if char == '`' {
let length = input[cursor..]
.as_bytes()
.iter()
.take_while(|byte| **byte == b'`')
.count();
if code_fence == Some(length) {
code_fence = None;
} else if code_fence.is_none() {
code_fence = Some(length);
}
cursor += length;
continue;
}
if spoiler
&& char == '|'
&& input.as_bytes().get(cursor + 1) == Some(&b'|')
&& code_fence.is_some()
{
cursor += 2;
continue;
}
if spoiler
&& char == '|'
&& input.as_bytes().get(cursor + 1) == Some(&b'|')
&& code_fence.is_none()
&& !is_escaped_at(input, cursor)
{
let closes_spoiler =
spoiler_open && input.as_bytes().get(cursor.wrapping_sub(1)) != Some(&b'|');
let opens_spoiler = !spoiler_open
&& input.as_bytes().get(cursor + 2) != Some(&b'|')
&& find_spoiler_close(input, cursor + 2).is_some();
if closes_spoiler || opens_spoiler {
spoiler_open = opens_spoiler;
cursor += 2;
continue;
}
}
if char == '|' && !spoiler_open && !is_escaped_at(input, cursor) {
return true;
}
cursor = next;
}
false
}
fn likely_block_start(input: &str, options: &SyntaxOptions) -> bool {
let Some(trimmed) = trim_up_to_three_spaces(input) else {
return false;
};
trimmed.starts_with('#')
|| trimmed.starts_with('>')
|| trimmed.starts_with("```")
|| trimmed.starts_with("~~~")
|| list_marker_can_interrupt_paragraph(input)
|| parse_thematic_break(Line {
text: input,
eol: "",
start: 0,
end: input.len(),
end_with_eol: input.len(),
lazy: false,
})
.is_some()
|| (options.constructs.html_block && line_starts_interrupting_html_block(input))
|| (options.constructs.math_block && math_block_fence_length(trimmed).is_some())
|| (options.constructs.directive_container && trimmed.starts_with(":::"))
|| (options.constructs.directive_leaf && trimmed.starts_with("::"))
|| (options.constructs.footnote_definition && line_starts_footnote_definition(trimmed))
}
fn line_starts_footnote_definition(trimmed: &str) -> bool {
trimmed.starts_with("[^")
&& find_footnote_definition_label_end(trimmed)
.is_some_and(|close| is_footnote_label(&trimmed[2..close]))
}
fn list_marker_can_interrupt_paragraph(input: &str) -> bool {
list_marker_info(input).is_some_and(|marker| {
!marker.content.trim().is_empty() && (!marker.ordered || marker.start == Some(1))
})
}
fn table_body_line_ends_table(line: &str, options: &SyntaxOptions) -> bool {
likely_block_start(line, options)
|| list_marker_info(line).is_some()
|| (options.constructs.html_block && line_starts_html_block(line))
}
fn line_starts_interrupting_html_block(input: &str) -> bool {
match trim_up_to_three_spaces(input).and_then(html_block_start) {
Some(HtmlBlockKind::UntilBlank) | None => false,
Some(_) => true,
}
}
fn parse_autolink_end(input: &str, index: usize) -> Option<usize> {
input[index..].find('>').map(|end| index + end + 1)
}
fn parse_html_inline(input: &str, index: usize) -> Option<(usize, String)> {
let rest = &input[index..];
if rest.starts_with("<!--") {
let end = rest.find("-->")? + 3;
return Some((index + end, rest[..end].into()));
}
if rest.starts_with("<?") {
let end = rest.find("?>")? + 2;
return Some((index + end, rest[..end].into()));
}
if rest.starts_with("<![CDATA[") {
let end = rest.find("]]>")? + 3;
return Some((index + end, rest[..end].into()));
}
if is_declaration_start(rest) {
let end = rest.find('>')? + 1;
return Some((index + end, rest[..end].into()));
}
let (end, _) = parse_html_tag(input, index)?;
Some((end, input[index..end].into()))
}
fn parse_html_tag(input: &str, index: usize) -> Option<(usize, &str)> {
let bytes = input.as_bytes();
if bytes.get(index) != Some(&b'<') {
return None;
}
let closing = bytes.get(index + 1) == Some(&b'/');
let name_start = index + if closing { 2 } else { 1 };
let first = *bytes.get(name_start)?;
if !first.is_ascii_alphabetic() {
return None;
}
let mut cursor = name_start + 1;
while bytes.get(cursor).is_some_and(|byte| html_name_byte(*byte)) {
cursor += 1;
}
let name = &input[name_start..cursor];
if closing {
cursor = skip_spaces(input, cursor);
if bytes.get(cursor) == Some(&b'>') {
return Some((cursor + 1, name));
}
return None;
}
let mut needs_space = false;
loop {
let before_spaces = cursor;
cursor = skip_spaces(input, cursor);
let had_space = cursor > before_spaces;
match bytes.get(cursor) {
Some(b'>') => return Some((cursor + 1, name)),
Some(b'/') if bytes.get(cursor + 1) == Some(&b'>') => return Some((cursor + 2, name)),
Some(byte) if had_space && html_attribute_name_start(*byte) => {
cursor += 1;
while bytes
.get(cursor)
.is_some_and(|byte| html_attribute_name_byte(*byte))
{
cursor += 1;
}
let after_name = cursor;
let after_spaces = skip_spaces(input, cursor);
if bytes.get(after_spaces) == Some(&b'=') {
cursor = skip_spaces(input, after_spaces + 1);
cursor = parse_html_attribute_value(input, cursor)?;
} else {
cursor = after_name;
}
needs_space = true;
}
Some(_) if needs_space => return None,
_ => return None,
}
}
}
fn parse_html_attribute_value(input: &str, index: usize) -> Option<usize> {
let bytes = input.as_bytes();
match bytes.get(index)? {
b'"' | b'\'' => {
let quote = bytes[index];
let mut cursor = index + 1;
while cursor < bytes.len() {
if bytes[cursor] == quote {
return Some(cursor + 1);
}
cursor += 1;
}
None
}
b'=' | b'<' | b'>' | b'`' => None,
_ => {
let mut cursor = index;
while bytes.get(cursor).is_some_and(|byte| {
!byte.is_ascii_whitespace()
&& !matches!(*byte, b'"' | b'\'' | b'=' | b'<' | b'>' | b'`')
}) {
cursor += 1;
}
if cursor == index {
None
} else {
Some(cursor)
}
}
}
}
fn html_name_byte(byte: u8) -> bool {
byte.is_ascii_alphanumeric() || byte == b'-'
}
fn html_attribute_name_start(byte: u8) -> bool {
byte.is_ascii_alphabetic() || byte == b'_' || byte == b':'
}
fn html_attribute_name_byte(byte: u8) -> bool {
byte.is_ascii_alphanumeric() || matches!(byte, b'_' | b':' | b'.' | b'-')
}
fn skip_spaces(input: &str, mut index: usize) -> usize {
while input
.as_bytes()
.get(index)
.is_some_and(|byte| matches!(*byte, b' ' | b'\t' | b'\n' | b'\r'))
{
index += 1;
}
index
}
fn is_autolink(input: &str) -> bool {
let inner = &input[1..input.len() - 1];
is_uri_autolink(inner) || is_email_autolink(inner)
}
fn is_uri_autolink(input: &str) -> bool {
let Some(colon) = input.find(':') else {
return false;
};
let scheme = &input[..colon];
if scheme.len() < 2 || scheme.len() > 32 {
return false;
}
let mut bytes = scheme.bytes();
if !bytes.next().is_some_and(|byte| byte.is_ascii_alphabetic()) {
return false;
}
if !bytes.all(|byte| byte.is_ascii_alphanumeric() || matches!(byte, b'+' | b'.' | b'-')) {
return false;
}
input[colon + 1..]
.chars()
.all(|char| !matches!(char, '<' | '>') && !char.is_control() && !char.is_whitespace())
}
fn is_email_autolink(input: &str) -> bool {
if input.chars().any(char::is_whitespace) {
return false;
}
let Some(at) = input.find('@') else {
return false;
};
if at == 0 || at + 1 >= input.len() {
return false;
}
is_email_local_part(&input[..at]) && is_email_domain(&input[at + 1..], 1)
}
fn parse_literal_autolink(
input: &str,
index: usize,
gfm: bool,
relaxed: bool,
) -> Option<(usize, String)> {
let rest = &input[index..];
if gfm {
if let Some(scheme_len) = rest
.starts_with("http://")
.then_some(7)
.or_else(|| rest.starts_with("https://").then_some(8))
{
if !literal_scheme_prefix_ok(input, index) {
return None;
}
let host = &input[index + scheme_len..];
if !http_literal_host_ok(host) {
if relaxed {
} else {
return None;
}
} else {
let end = autolink_url_end(input, index + scheme_len, index + scheme_len, relaxed);
if end <= index + scheme_len {
return None;
}
if literal_autolink_suppressed_by_link_label(input, index, end, relaxed, gfm) {
return None;
}
return Some((end, input[index..end].into()));
}
}
if rest
.as_bytes()
.get(..4)
.is_some_and(|prefix| prefix.eq_ignore_ascii_case(b"www."))
{
if !literal_www_prefix_ok(input, index) {
return None;
}
check_domain(rest, false)?;
let end = autolink_url_end(input, index, index, relaxed);
if end <= index || (!relaxed && end <= index + 3 && !literal_starts_line(input, index))
{
return None;
}
if literal_autolink_suppressed_by_link_label(input, index, end, relaxed, gfm) {
return None;
}
let mut destination = String::from("http://");
destination.push_str(&input[index..end]);
return Some((end, destination));
}
if let Some(email) = parse_literal_email(input, index) {
return Some(email);
}
}
if relaxed {
if literal_scheme_prefix_ok(input, index) {
if let Some(after_slashes) = relaxed_scheme_after_slashes(rest) {
let body_start = index + after_slashes;
let next = input[body_start..].chars().next();
if next.is_none_or(|char| char.is_whitespace()) && after_slashes == 3 {
return None;
}
let end = autolink_url_end(input, body_start, body_start, true);
if end > index {
if literal_autolink_suppressed_by_link_label(input, index, end, relaxed, gfm) {
return None;
}
return Some((end, input[index..end].into()));
}
}
}
}
None
}
fn relaxed_scheme_after_slashes(rest: &str) -> Option<usize> {
let bytes = rest.as_bytes();
if bytes.starts_with(b"://") {
return Some(3);
}
let first = bytes.first()?;
if !first.is_ascii_alphabetic() {
return None;
}
let mut i = 1;
while i < bytes.len() {
match bytes[i] {
b':' => break,
byte if byte.is_ascii_alphanumeric() || matches!(byte, b'+' | b'.' | b'-') => i += 1,
_ => return None,
}
}
if bytes.get(i..i + 3) == Some(b"://") {
Some(i + 3)
} else {
None
}
}
fn literal_scheme_prefix_ok(input: &str, index: usize) -> bool {
if index == 0 {
return true;
}
let Some(previous) = input[..index].chars().next_back() else {
return true;
};
!previous.is_ascii_alphabetic()
}
fn literal_www_prefix_ok(input: &str, index: usize) -> bool {
if index == 0 {
return true;
}
let Some(previous) = input[..index].chars().next_back() else {
return true;
};
if matches!(previous, '*' | '_' | '~' | '(' | '[' | ']') {
return true;
}
matches!(previous, ' ' | '\t' | '\n' | '\r')
}
fn literal_starts_line(input: &str, index: usize) -> bool {
index == 0
|| input
.as_bytes()
.get(index - 1)
.is_some_and(|byte| matches!(byte, b'\n' | b'\r'))
}
fn literal_autolink_suppressed_by_link_label(
input: &str,
index: usize,
end: usize,
relaxed: bool,
gfm_autolink_literal: bool,
) -> bool {
if !has_unclosed_link_label_opener(input, index) {
return false;
}
if input[end..].starts_with("](") && !link_resource_tail_has_close(input, end + 2) {
return true;
}
!relaxed && !gfm_autolink_literal && input.as_bytes().get(end).is_some_and(|byte| *byte == b']')
}
fn has_unclosed_link_label_opener(input: &str, index: usize) -> bool {
let line_start = input[..index]
.rfind(['\n', '\r'])
.map_or(0, |offset| offset + 1);
let mut depth = 0usize;
let mut cursor = line_start;
while cursor < index {
let Some((next, char)) = next_char(input, cursor) else {
break;
};
match char {
'\\' => {
cursor = next_char(input, next)
.map(|(after_escape, _)| after_escape)
.unwrap_or(next);
continue;
}
'[' => depth += 1,
']' => {
depth = depth.saturating_sub(1);
}
_ => {}
}
cursor = next;
}
depth > 0
}
fn link_resource_tail_has_close(input: &str, start: usize) -> bool {
let mut cursor = start;
while cursor < input.len() {
let Some((next, char)) = next_char(input, cursor) else {
break;
};
match char {
'\\' => {
cursor = next_char(input, next)
.map(|(after_escape, _)| after_escape)
.unwrap_or(next);
continue;
}
'\n' | '\r' => return false,
')' => return true,
_ => {}
}
cursor = next;
}
false
}
fn http_literal_host_ok(host: &str) -> bool {
if host.starts_with('[') {
return bracketed_ipv6_host_end(host).is_some();
}
match host.chars().next() {
Some(char) if char.is_ascii() && char.is_ascii_alphanumeric() => {
check_domain(host, true).is_some()
}
Some(char) if !char.is_ascii() && is_valid_hostchar(char) => {
check_domain(host, true).is_some()
}
_ => false,
}
}
fn bracketed_ipv6_host_end(host: &str) -> Option<usize> {
let close = host.find(']')?;
(close > 1).then_some(close + 1)
}
fn is_valid_hostchar(char: char) -> bool {
!char.is_whitespace() && !crate::unicode_punctuation::is_unicode_punctuation(char)
}
fn check_domain(data: &str, allow_short: bool) -> Option<usize> {
let mut np = 0usize;
let mut uscore1 = 0usize;
let mut uscore2 = 0usize;
let mut host_len = 0usize;
let mut chars = data.char_indices().peekable();
while let Some((offset, char)) = chars.next() {
let account = offset != 0 && chars.peek().is_some();
match char {
'\\' => {
host_len = offset + char.len_utf8();
if let Some((next_off, next)) = chars.next() {
host_len = next_off + next.len_utf8();
}
}
'_' if account => {
uscore2 += 1;
host_len = offset + char.len_utf8();
}
'.' if account => {
uscore1 = uscore2;
uscore2 = 0;
np += 1;
host_len = offset + char.len_utf8();
}
'_' | '.' | '-' => {
host_len = offset + char.len_utf8();
}
_ => {
if !is_valid_hostchar(char) {
break;
}
host_len = offset + char.len_utf8();
}
}
}
if (uscore1 > 0 || uscore2 > 0) && np <= 10 {
return None;
}
if allow_short || np > 0 {
Some(host_len)
} else {
None
}
}
fn autolink_url_end(input: &str, start: usize, trim_from: usize, balanced: bool) -> usize {
let bytes = input.as_bytes();
let mut end = start;
let mut bracket_depth = 0i32;
let mut curly_depth = 0i32;
let mut strict_has_open_bracket = false;
let mut strict_inside_backticks = false;
for (offset, char) in input[start..].char_indices() {
if char.is_whitespace() || char == '<' || is_autolink_terminating_control(char) {
break;
}
if balanced {
match char {
'[' => bracket_depth += 1,
']' => {
if bracket_depth > 0 {
bracket_depth -= 1;
} else {
break;
}
}
'{' => curly_depth += 1,
'}' => {
if curly_depth > 0 {
curly_depth -= 1;
} else {
break;
}
}
_ => {}
}
} else {
match char {
'[' => strict_has_open_bracket = true,
'`' => strict_inside_backticks = !strict_inside_backticks,
']' if !strict_has_open_bracket && !strict_inside_backticks => break,
_ => {}
}
}
if char == '\\' {
if let Some(&next) = bytes.get(start + offset + 1) {
let next_is_escapable_punct = next.is_ascii_punctuation() && next != b'.';
if next_is_escapable_punct {
break;
}
}
}
end = start + offset + char.len_utf8();
}
autolink_delim(input, trim_from, end)
}
fn is_autolink_terminating_control(char: char) -> bool {
matches!(char, '\u{2066}'..='\u{2069}')
}
fn autolink_delim(input: &str, start: usize, mut end: usize) -> usize {
let bytes = input.as_bytes();
let mut opening = 0usize;
let mut closing = 0usize;
for &byte in &bytes[start..end] {
match byte {
b'(' => opening += 1,
b')' => closing += 1,
_ => {}
}
}
while end > start {
match bytes[end - 1] {
b')' => {
if closing <= opening {
break;
}
closing -= 1;
end -= 1;
}
b'?' | b'!' | b'.' | b',' | b':' | b'*' | b'_' | b'~' | b'\'' | b'"' => {
end -= 1;
}
b';' => {
if let Some(amp) = trailing_hex_entity_run_start(bytes, start, end) {
end = amp;
} else {
let mut new_end = end - 1;
while new_end > start && bytes[new_end - 1].is_ascii_alphanumeric() {
new_end -= 1;
}
if new_end > start && new_end < end - 1 && bytes[new_end - 1] == b'&' {
end = new_end - 1;
} else {
end -= 1;
}
}
}
_ => break,
}
}
end
}
fn trailing_hex_entity_run_start(bytes: &[u8], start: usize, end: usize) -> Option<usize> {
if end <= start || bytes[end - 1] != b';' {
return None;
}
let mut cursor = end - 1;
while cursor > start && bytes[cursor - 1].is_ascii_hexdigit() {
cursor -= 1;
}
if cursor == end - 1 || cursor < start + 3 {
return None;
}
let x = bytes[cursor - 1];
if (x == b'x' || x == b'X') && bytes[cursor - 2] == b'#' && bytes[cursor - 3] == b'&' {
Some(cursor - 3)
} else {
None
}
}
fn parse_literal_email(input: &str, index: usize) -> Option<(usize, String)> {
let rest = &input[index..];
let at = rest.find('@')?;
if at == 0 {
return None;
}
let local = &rest[..at];
let (auto_mailto, is_xmpp) = classify_email_local(local);
if !email_left_boundary_ok(input, index, auto_mailto) {
return None;
}
if !email_local_is_valid(local, auto_mailto) {
return None;
}
let domain_start = index + at + 1;
let domain_end = literal_email_domain_end(input, domain_start, is_xmpp)?;
let trimmed = autolink_delim(input, domain_start, domain_end);
if trimmed <= domain_start {
return None;
}
let domain = &input[domain_start..trimmed];
if !is_gfm_email_domain(domain, is_xmpp) {
return None;
}
let mut destination = String::new();
if auto_mailto {
destination.push_str("mailto:");
}
destination.push_str(&input[index..trimmed]);
Some((trimmed, destination))
}
fn classify_email_local(local: &str) -> (bool, bool) {
if let Some(rest) = strip_ci_prefix(local, "mailto:") {
if !rest.is_empty() {
return (false, false);
}
}
if let Some(rest) = strip_ci_prefix(local, "xmpp:") {
if !rest.is_empty() {
return (false, true);
}
}
(true, false)
}
fn strip_ci_prefix<'a>(input: &'a str, prefix: &str) -> Option<&'a str> {
let bytes = input.as_bytes();
let plen = prefix.len();
if bytes.len() >= plen && bytes[..plen].eq_ignore_ascii_case(prefix.as_bytes()) {
Some(&input[plen..])
} else {
None
}
}
fn email_left_boundary_ok(input: &str, index: usize, auto_mailto: bool) -> bool {
if index == 0 {
return true;
}
let Some(previous) = input[..index].chars().next_back() else {
return true;
};
if previous.is_ascii_alphanumeric() {
if auto_mailto
&& input[index..].starts_with('+')
&& prefix_ends_with_gfm_email(input, index)
{
return true;
}
return false;
}
if auto_mailto && previous == '/' {
return false;
}
true
}
fn prefix_ends_with_gfm_email(input: &str, end: usize) -> bool {
let start = input[..end]
.rfind(char::is_whitespace)
.map_or(0, |offset| offset + 1);
let candidate = &input[start..end];
let Some(at) = candidate.rfind('@') else {
return false;
};
email_local_is_valid(&candidate[..at], true) && is_gfm_email_domain(&candidate[at + 1..], false)
}
fn email_local_is_valid(local: &str, auto_mailto: bool) -> bool {
let body = if auto_mailto {
local
} else if let Some(rest) = strip_ci_prefix(local, "mailto:") {
rest
} else if let Some(rest) = strip_ci_prefix(local, "xmpp:") {
rest
} else {
local
};
!body.is_empty() && body.bytes().all(is_gfm_email_local_byte)
}
fn is_gfm_email_local_byte(byte: u8) -> bool {
byte.is_ascii_alphanumeric() || matches!(byte, b'.' | b'+' | b'_' | b'-')
}
fn is_email_local_part(input: &str) -> bool {
!input.is_empty()
&& input
.split('.')
.all(|segment| !segment.is_empty() && segment.bytes().all(is_email_atext))
}
fn is_email_atext(byte: u8) -> bool {
byte.is_ascii_alphanumeric()
|| matches!(
byte,
b'!' | b'#'
| b'$'
| b'%'
| b'&'
| b'\''
| b'*'
| b'+'
| b'/'
| b'='
| b'?'
| b'^'
| b'_'
| b'`'
| b'{'
| b'|'
| b'}'
| b'~'
| b'-'
)
}
fn literal_email_domain_end(input: &str, index: usize, is_xmpp: bool) -> Option<usize> {
let bytes = input.as_bytes();
let mut end = index;
let mut np = 0usize;
while end < bytes.len() {
let byte = bytes[end];
if byte.is_ascii_alphanumeric() {
end += 1;
} else if byte == b'.' && end + 1 < bytes.len() && bytes[end + 1].is_ascii_alphanumeric() {
np += 1;
end += 1;
} else if byte == b'-' || byte == b'_' || (byte == b'/' && is_xmpp) {
end += 1;
} else {
break;
}
}
if end <= index {
return None;
}
let len = end - index;
let last = bytes[end - 1];
if len < 1 || np == 0 || !(last.is_ascii_alphabetic() || last == b'.') {
return None;
}
Some(end)
}
fn is_gfm_email_domain(input: &str, is_xmpp: bool) -> bool {
if input.is_empty() {
return false;
}
let host = if is_xmpp {
input.split('/').next().unwrap_or(input)
} else {
input
};
if !host.contains('.') {
return false;
}
let last = host.as_bytes()[host.len() - 1];
if matches!(last, b'-' | b'_') {
return false;
}
host.split('.').all(|label| {
!label.is_empty()
&& label
.bytes()
.all(|byte| byte.is_ascii_alphanumeric() || matches!(byte, b'-' | b'_'))
})
}
fn is_email_domain(input: &str, min_labels: usize) -> bool {
let mut label_count = 0usize;
for label in input.split('.') {
label_count += 1;
let bytes = label.as_bytes();
if bytes.is_empty()
|| bytes.len() > 63
|| !bytes
.first()
.is_some_and(|byte| byte.is_ascii_alphanumeric())
|| !bytes
.last()
.is_some_and(|byte| byte.is_ascii_alphanumeric())
|| !bytes
.iter()
.all(|byte| byte.is_ascii_alphanumeric() || *byte == b'-')
{
return false;
}
}
label_count >= min_labels
}
fn is_footnote_label(label: &str) -> bool {
!label.is_empty()
&& reference_label_is_within_limit(label)
&& !label.chars().any(char::is_whitespace)
}
fn find_footnote_definition_label_end(input: &str) -> Option<usize> {
let close = find_footnote_reference_label_end(input, 2)?;
if input.as_bytes().get(close + 1) == Some(&b':') {
Some(close)
} else {
None
}
}
fn find_footnote_reference_label_end(input: &str, mut cursor: usize) -> Option<usize> {
while cursor < input.len() {
let (next, char) = next_char(input, cursor)?;
if char == ']' && !is_escaped_at(input, cursor) {
return Some(cursor);
}
cursor = next;
}
None
}
fn find_inline_footnote_end(input: &str, mut cursor: usize) -> Option<usize> {
let mut depth = 0usize;
while cursor < input.len() {
let (next, char) = next_char(input, cursor)?;
if !is_escaped_at(input, cursor) {
match char {
'[' => depth += 1,
']' if depth == 0 => return Some(cursor),
']' => depth = depth.saturating_sub(1),
_ => {}
}
}
cursor = next;
}
None
}