use crate::options::ParserOptions;
use crate::parser::inlines::refdef_map::{RefdefMap, normalize_label};
use std::collections::{BTreeMap, HashSet};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum EmphasisKind {
Emph,
Strong,
}
#[derive(Debug, Clone, Copy)]
pub enum DelimChar {
Open {
len: u8,
partner: usize,
partner_len: u8,
kind: EmphasisKind,
},
Close,
Literal,
}
#[derive(Debug, Default, Clone)]
pub struct EmphasisPlan {
by_pos: BTreeMap<usize, DelimChar>,
}
impl EmphasisPlan {
pub fn lookup(&self, pos: usize) -> Option<DelimChar> {
self.by_pos.get(&pos).copied()
}
pub fn is_empty(&self) -> bool {
self.by_pos.is_empty()
}
pub fn from_dispositions(by_pos: BTreeMap<usize, DelimChar>) -> Self {
Self { by_pos }
}
}
use super::bracketed_spans::try_parse_bracketed_span;
use super::citations::{try_parse_bare_citation, try_parse_bracketed_citation};
use super::code_spans::try_parse_code_span;
use super::escapes::{EscapeType, try_parse_escape};
use super::inline_footnotes::{try_parse_footnote_reference, try_parse_inline_footnote};
use super::inline_html::try_parse_inline_html;
use super::links::{
LinkScanContext, try_parse_autolink, try_parse_inline_image, try_parse_inline_link,
try_parse_reference_image, try_parse_reference_link,
};
use super::math::{
try_parse_display_math, try_parse_double_backslash_display_math,
try_parse_double_backslash_inline_math, try_parse_gfm_inline_math, try_parse_inline_math,
try_parse_single_backslash_display_math, try_parse_single_backslash_inline_math,
};
use super::native_spans::try_parse_native_span;
#[derive(Debug, Clone)]
pub enum IrEvent {
Text { start: usize, end: usize },
Construct {
start: usize,
end: usize,
kind: ConstructKind,
},
DelimRun {
ch: u8,
start: usize,
end: usize,
can_open: bool,
can_close: bool,
matches: Vec<DelimMatch>,
},
OpenBracket {
start: usize,
end: usize,
is_image: bool,
active: bool,
resolution: Option<BracketResolution>,
unresolved_ref: Option<UnresolvedRefShape>,
},
CloseBracket {
pos: usize,
matched: bool,
},
SoftBreak { start: usize, end: usize },
HardBreak { start: usize, end: usize },
}
impl IrEvent {
pub fn range(&self) -> (usize, usize) {
match self {
IrEvent::Text { start, end }
| IrEvent::Construct { start, end, .. }
| IrEvent::DelimRun { start, end, .. }
| IrEvent::OpenBracket { start, end, .. }
| IrEvent::SoftBreak { start, end }
| IrEvent::HardBreak { start, end } => (*start, *end),
IrEvent::CloseBracket { pos, .. } => (*pos, *pos + 1),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ConstructKind {
Escape,
CodeSpan,
Autolink,
InlineHtml,
PandocOpaque,
InlineFootnote,
NativeSpan,
FootnoteReference,
BracketedCitation,
BareCitation,
BracketedSpan,
}
#[derive(Debug, Clone, Copy)]
pub struct DelimMatch {
pub offset_in_run: u8,
pub len: u8,
pub is_opener: bool,
pub partner_event: u32,
pub partner_offset: u8,
pub kind: EmphasisKind,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct UnresolvedRefShape {
pub close_event: u32,
pub text_end: usize,
pub end: usize,
}
#[derive(Debug, Clone)]
pub struct BracketResolution {
pub close_event: u32,
pub text_start: usize,
pub text_end: usize,
pub suffix_start: usize,
pub suffix_end: usize,
pub kind: LinkKind,
}
#[derive(Debug, Clone)]
pub enum LinkKind {
Inline { dest: String, title: Option<String> },
FullReference { label: String },
CollapsedReference,
ShortcutReference,
}
pub fn build_ir(text: &str, start: usize, end: usize, config: &ParserOptions) -> Vec<IrEvent> {
let mut events = Vec::new();
build_ir_into(text, start, end, config, &mut events);
events
}
pub(super) fn build_ir_into(
text: &str,
start: usize,
end: usize,
config: &ParserOptions,
events: &mut Vec<IrEvent>,
) {
events.clear();
let bytes = text.as_bytes();
let exts = &config.extensions;
let is_commonmark = config.dialect == crate::options::Dialect::CommonMark;
let mut pos = start;
let mut text_run_start = start;
let mut pandoc_bracket_extent: usize = 0;
let mask = build_ir_byte_mask(config);
macro_rules! flush_text {
() => {
if pos > text_run_start {
events.push(IrEvent::Text {
start: text_run_start,
end: pos,
});
}
};
}
while pos < end {
while pos < end && !mask[bytes[pos] as usize] {
pos += 1;
}
if pos >= end {
break;
}
let b = bytes[pos];
if !is_commonmark
&& pos >= pandoc_bracket_extent
&& (b == b'[' || (b == b'!' && pos + 1 < end && bytes[pos + 1] == b'['))
&& let Some(len) = try_pandoc_bracket_link_extent(text, pos, end, config)
{
pandoc_bracket_extent = pos + len;
}
let in_pandoc_bracket = !is_commonmark && pos < pandoc_bracket_extent;
if b == b'\\'
&& let Some((len, _ch, escape_type)) = try_parse_escape(&text[pos..])
&& pos + len <= end
{
let enabled = match escape_type {
EscapeType::Literal => is_commonmark || exts.all_symbols_escapable,
EscapeType::HardLineBreak => exts.escaped_line_breaks,
EscapeType::NonbreakingSpace => exts.all_symbols_escapable,
};
if enabled {
flush_text!();
let kind = match escape_type {
EscapeType::HardLineBreak => {
events.push(IrEvent::HardBreak {
start: pos,
end: pos + len,
});
pos += len;
text_run_start = pos;
continue;
}
EscapeType::Literal | EscapeType::NonbreakingSpace => ConstructKind::Escape,
};
events.push(IrEvent::Construct {
start: pos,
end: pos + len,
kind,
});
pos += len;
text_run_start = pos;
continue;
}
}
if b == b'`'
&& let Some((len, _, _, _)) = try_parse_code_span(&text[pos..])
&& pos + len <= end
{
flush_text!();
events.push(IrEvent::Construct {
start: pos,
end: pos + len,
kind: ConstructKind::CodeSpan,
});
pos += len;
text_run_start = pos;
continue;
}
if !is_commonmark && let Some(len) = try_pandoc_math_opaque(text, pos, end, config) {
flush_text!();
events.push(IrEvent::Construct {
start: pos,
end: pos + len,
kind: ConstructKind::PandocOpaque,
});
pos += len;
text_run_start = pos;
continue;
}
if !is_commonmark
&& !in_pandoc_bracket
&& b == b'<'
&& exts.native_spans
&& let Some((len, _, _)) = try_parse_native_span(&text[pos..])
&& pos + len <= end
{
flush_text!();
events.push(IrEvent::Construct {
start: pos,
end: pos + len,
kind: ConstructKind::NativeSpan,
});
pos += len;
text_run_start = pos;
continue;
}
if b == b'<' && !in_pandoc_bracket {
if exts.autolinks
&& let Some((len, _)) = try_parse_autolink(&text[pos..], is_commonmark)
&& pos + len <= end
{
flush_text!();
events.push(IrEvent::Construct {
start: pos,
end: pos + len,
kind: ConstructKind::Autolink,
});
pos += len;
text_run_start = pos;
continue;
}
if exts.raw_html
&& let Some(len) = try_parse_inline_html(&text[pos..], config.dialect)
&& pos + len <= end
{
flush_text!();
events.push(IrEvent::Construct {
start: pos,
end: pos + len,
kind: ConstructKind::InlineHtml,
});
pos += len;
text_run_start = pos;
continue;
}
}
if !is_commonmark
&& b == b'^'
&& exts.inline_footnotes
&& let Some((len, _)) = try_parse_inline_footnote(&text[pos..])
&& pos + len <= end
{
flush_text!();
events.push(IrEvent::Construct {
start: pos,
end: pos + len,
kind: ConstructKind::InlineFootnote,
});
pos += len;
text_run_start = pos;
continue;
}
if !is_commonmark
&& b == b'['
&& pos + 1 < end
&& bytes[pos + 1] == b'^'
&& exts.footnotes
&& let Some((len, _)) = try_parse_footnote_reference(&text[pos..])
&& pos + len <= end
{
flush_text!();
events.push(IrEvent::Construct {
start: pos,
end: pos + len,
kind: ConstructKind::FootnoteReference,
});
pos += len;
text_run_start = pos;
continue;
}
if !is_commonmark
&& b == b'['
&& exts.citations
&& let Some((len, _)) = try_parse_bracketed_citation(&text[pos..])
&& pos + len <= end
{
flush_text!();
events.push(IrEvent::Construct {
start: pos,
end: pos + len,
kind: ConstructKind::BracketedCitation,
});
pos += len;
text_run_start = pos;
continue;
}
if !is_commonmark
&& (b == b'@' || (b == b'-' && pos + 1 < end && bytes[pos + 1] == b'@'))
&& (exts.citations || exts.quarto_crossrefs)
&& let Some((len, _, _)) = try_parse_bare_citation(&text[pos..])
&& pos + len <= end
{
flush_text!();
events.push(IrEvent::Construct {
start: pos,
end: pos + len,
kind: ConstructKind::BareCitation,
});
pos += len;
text_run_start = pos;
continue;
}
if !is_commonmark
&& b == b'['
&& exts.bracketed_spans
&& let Some((len, _, _)) = try_parse_bracketed_span(&text[pos..])
&& pos + len <= end
{
flush_text!();
events.push(IrEvent::Construct {
start: pos,
end: pos + len,
kind: ConstructKind::BracketedSpan,
});
pos += len;
text_run_start = pos;
continue;
}
if b == b'!'
&& pos + 1 < end
&& bytes[pos + 1] == b'['
&& (exts.inline_images || exts.reference_links)
{
flush_text!();
events.push(IrEvent::OpenBracket {
start: pos,
end: pos + 2,
is_image: true,
active: true,
resolution: None,
unresolved_ref: None,
});
pos += 2;
text_run_start = pos;
continue;
}
if b == b'[' && (exts.inline_links || exts.reference_links) {
flush_text!();
events.push(IrEvent::OpenBracket {
start: pos,
end: pos + 1,
is_image: false,
active: true,
resolution: None,
unresolved_ref: None,
});
pos += 1;
text_run_start = pos;
continue;
}
if b == b']' {
flush_text!();
events.push(IrEvent::CloseBracket {
pos,
matched: false,
});
pos += 1;
text_run_start = pos;
continue;
}
if b == b'*' || b == b'_' {
flush_text!();
let mut run_end = pos;
while run_end < end && bytes[run_end] == b {
run_end += 1;
}
let count = run_end - pos;
let (can_open, can_close) = compute_flanking(text, pos, count, b, config.dialect);
events.push(IrEvent::DelimRun {
ch: b,
start: pos,
end: run_end,
can_open,
can_close,
matches: Vec::new(),
});
pos = run_end;
text_run_start = pos;
continue;
}
if b == b'\n' || (b == b'\r' && pos + 1 < end && bytes[pos + 1] == b'\n') {
let nl_len = if b == b'\r' { 2 } else { 1 };
let mut trailing_spaces = 0;
let mut s = pos;
while s > text_run_start && bytes[s - 1] == b' ' {
trailing_spaces += 1;
s -= 1;
}
if trailing_spaces >= 2 {
if s > text_run_start {
events.push(IrEvent::Text {
start: text_run_start,
end: s,
});
}
events.push(IrEvent::HardBreak {
start: s,
end: pos + nl_len,
});
pos += nl_len;
text_run_start = pos;
continue;
}
flush_text!();
events.push(IrEvent::SoftBreak {
start: pos,
end: pos + nl_len,
});
pos += nl_len;
text_run_start = pos;
continue;
}
let ch_len = text[pos..]
.chars()
.next()
.map_or(1, std::primitive::char::len_utf8);
pos += ch_len.max(1);
}
flush_text!();
}
fn build_ir_byte_mask(config: &ParserOptions) -> [bool; 256] {
let mut mask = [false; 256];
let exts = &config.extensions;
let is_commonmark = config.dialect == crate::options::Dialect::CommonMark;
mask[b'\n' as usize] = true;
mask[b'\r' as usize] = true;
mask[b'\\' as usize] = true;
mask[b'`' as usize] = true;
mask[b'*' as usize] = true;
mask[b'_' as usize] = true;
if exts.inline_links
|| exts.reference_links
|| exts.inline_images
|| exts.bracketed_spans
|| exts.footnotes
|| exts.citations
{
mask[b'[' as usize] = true;
mask[b']' as usize] = true;
}
if exts.inline_images || exts.reference_links {
mask[b'!' as usize] = true;
}
if exts.autolinks || exts.raw_html || (!is_commonmark && exts.native_spans) {
mask[b'<' as usize] = true;
}
if !is_commonmark && exts.inline_footnotes {
mask[b'^' as usize] = true;
}
if !is_commonmark && (exts.citations || exts.quarto_crossrefs) {
mask[b'@' as usize] = true;
mask[b'-' as usize] = true;
}
if !is_commonmark
&& (exts.tex_math_dollars
|| exts.tex_math_gfm
|| exts.tex_math_single_backslash
|| exts.tex_math_double_backslash)
{
mask[b'$' as usize] = true;
}
mask
}
fn compute_flanking(
text: &str,
pos: usize,
count: usize,
ch: u8,
dialect: crate::options::Dialect,
) -> (bool, bool) {
if dialect == crate::options::Dialect::Pandoc {
let prev_char = (pos > 0).then(|| text[..pos].chars().last()).flatten();
let next_char = text.get(pos + count..).and_then(|s| s.chars().next());
let followed_by_ws = next_char.is_none_or(|c| c.is_whitespace());
let mut can_open = !followed_by_ws;
let mut can_close = true;
if ch == b'_' {
let prev_is_alnum = prev_char.is_some_and(|c| c.is_alphanumeric());
let next_is_alnum = next_char.is_some_and(|c| c.is_alphanumeric());
if prev_is_alnum {
can_open = false;
}
if next_is_alnum {
can_close = false;
}
}
return (can_open, can_close);
}
let lf = is_left_flanking(text, pos, count);
let rf = is_right_flanking(text, pos, count);
if ch == b'*' {
(lf, rf)
} else {
let prev_char = (pos > 0).then(|| text[..pos].chars().last()).flatten();
let next_char = text.get(pos + count..).and_then(|s| s.chars().next());
let preceded_by_punct = prev_char.is_some_and(is_unicode_punct_or_symbol);
let followed_by_punct = next_char.is_some_and(is_unicode_punct_or_symbol);
let can_open = lf && (!rf || preceded_by_punct);
let can_close = rf && (!lf || followed_by_punct);
(can_open, can_close)
}
}
fn try_pandoc_math_opaque(
text: &str,
pos: usize,
end: usize,
config: &ParserOptions,
) -> Option<usize> {
let bytes = text.as_bytes();
let exts = &config.extensions;
let b = bytes[pos];
if exts.tex_math_dollars && b == b'$' {
if let Some((len, _)) = try_parse_display_math(&text[pos..])
&& pos + len <= end
{
return Some(len);
}
if let Some((len, _)) = try_parse_inline_math(&text[pos..])
&& pos + len <= end
{
return Some(len);
}
}
if exts.tex_math_gfm
&& b == b'$'
&& let Some((len, _)) = try_parse_gfm_inline_math(&text[pos..])
&& pos + len <= end
{
return Some(len);
}
if exts.tex_math_double_backslash && b == b'\\' {
if let Some((len, _)) = try_parse_double_backslash_display_math(&text[pos..])
&& pos + len <= end
{
return Some(len);
}
if let Some((len, _)) = try_parse_double_backslash_inline_math(&text[pos..])
&& pos + len <= end
{
return Some(len);
}
}
if exts.tex_math_single_backslash && b == b'\\' {
if let Some((len, _)) = try_parse_single_backslash_display_math(&text[pos..])
&& pos + len <= end
{
return Some(len);
}
if let Some((len, _)) = try_parse_single_backslash_inline_math(&text[pos..])
&& pos + len <= end
{
return Some(len);
}
}
None
}
fn try_pandoc_bracket_link_extent(
text: &str,
pos: usize,
end: usize,
config: &ParserOptions,
) -> Option<usize> {
let bytes = text.as_bytes();
let exts = &config.extensions;
let ctx = LinkScanContext::from_options(config);
let allow_shortcut = exts.shortcut_reference_links;
if bytes[pos] == b'!' {
if pos + 1 >= end || bytes[pos + 1] != b'[' {
return None;
}
if exts.inline_images
&& let Some((len, _, _, _)) = try_parse_inline_image(&text[pos..], ctx)
&& pos + len <= end
{
return Some(len);
}
if exts.reference_links
&& let Some((len, _, _, _)) = try_parse_reference_image(&text[pos..], allow_shortcut)
&& pos + len <= end
{
return Some(len);
}
return None;
}
if exts.inline_links
&& let Some((len, _, _, _)) = try_parse_inline_link(&text[pos..], false, ctx)
&& pos + len <= end
{
return Some(len);
}
if exts.reference_links
&& let Some((len, _, _, _)) =
try_parse_reference_link(&text[pos..], allow_shortcut, exts.inline_links, ctx)
&& pos + len <= end
{
return Some(len);
}
None
}
fn is_unicode_punct_or_symbol(c: char) -> bool {
if c.is_ascii() {
c.is_ascii_punctuation()
} else {
!c.is_alphanumeric() && !c.is_whitespace()
}
}
fn is_left_flanking(text: &str, run_start: usize, run_len: usize) -> bool {
let after = run_start + run_len;
let next_char = text.get(after..).and_then(|s| s.chars().next());
let prev_char = (run_start > 0)
.then(|| text[..run_start].chars().last())
.flatten();
let followed_by_ws = next_char.is_none_or(|c| c.is_whitespace());
if followed_by_ws {
return false;
}
let followed_by_punct = next_char.is_some_and(is_unicode_punct_or_symbol);
if !followed_by_punct {
return true;
}
prev_char.is_none_or(|c| c.is_whitespace() || is_unicode_punct_or_symbol(c))
}
fn is_right_flanking(text: &str, run_start: usize, run_len: usize) -> bool {
let after = run_start + run_len;
let next_char = text.get(after..).and_then(|s| s.chars().next());
let prev_char = (run_start > 0)
.then(|| text[..run_start].chars().last())
.flatten();
let preceded_by_ws = prev_char.is_none_or(|c| c.is_whitespace());
if preceded_by_ws {
return false;
}
let preceded_by_punct = prev_char.is_some_and(is_unicode_punct_or_symbol);
if !preceded_by_punct {
return true;
}
next_char.is_none_or(|c| c.is_whitespace() || is_unicode_punct_or_symbol(c))
}
pub fn process_emphasis(events: &mut [IrEvent], dialect: crate::options::Dialect) {
process_emphasis_in_range(events, 0, events.len(), dialect);
}
pub fn process_emphasis_in_range(
events: &mut [IrEvent],
lo: usize,
hi: usize,
dialect: crate::options::Dialect,
) {
process_emphasis_in_range_filtered(events, lo, hi, None, dialect);
}
fn process_emphasis_in_range_filtered(
events: &mut [IrEvent],
lo: usize,
hi: usize,
excluded: Option<&[bool]>,
dialect: crate::options::Dialect,
) {
let is_commonmark = dialect == crate::options::Dialect::CommonMark;
if is_commonmark {
run_emphasis_pass(events, lo, hi, excluded, dialect, &[], false);
return;
}
let mut rejected: Vec<(usize, usize)> = Vec::new();
let max_iters = events.len().saturating_add(2);
let mut iter = 0;
loop {
let strict = iter > 0;
run_emphasis_pass(events, lo, hi, excluded, dialect, &rejected, strict);
let invalidations = pandoc_cascade_invalidate(events, excluded);
if invalidations.is_empty() {
break;
}
rejected.extend(invalidations);
iter += 1;
if iter >= max_iters {
break;
}
}
pandoc_inner_strong_recovery(events);
}
fn run_emphasis_pass(
events: &mut [IrEvent],
lo: usize,
hi: usize,
excluded: Option<&[bool]>,
dialect: crate::options::Dialect,
rejected_pairs: &[(usize, usize)],
strict_pandoc: bool,
) {
let is_commonmark = dialect == crate::options::Dialect::CommonMark;
let hi = hi.min(events.len());
if lo >= hi {
return;
}
let mut delim_idxs: Vec<usize> = events[lo..hi]
.iter()
.enumerate()
.filter_map(|(i, e)| {
let abs = lo + i;
match e {
IrEvent::DelimRun { matches, .. }
if matches.is_empty()
&& excluded.is_none_or(|ex| ex.get(abs).copied() != Some(true)) =>
{
Some(abs)
}
_ => None,
}
})
.collect();
if delim_idxs.is_empty() {
return;
}
let mut count: Vec<usize> = Vec::with_capacity(delim_idxs.len());
let mut source_start: Vec<usize> = Vec::with_capacity(delim_idxs.len());
let mut removed: Vec<bool> = vec![false; delim_idxs.len()];
for &ev_idx in &delim_idxs {
if let IrEvent::DelimRun { start, end, .. } = &events[ev_idx] {
count.push(end - start);
source_start.push(*start);
}
}
let mut openers_bottom: [[[Option<usize>; 2]; 3]; 2] = [[[None; 2]; 3]; 2];
let first_active =
|removed: &[bool]| -> Option<usize> { (0..removed.len()).find(|&i| !removed[i]) };
let next_active = |removed: &[bool], from: usize| -> Option<usize> {
(from + 1..removed.len()).find(|&i| !removed[i])
};
let prev_active =
|removed: &[bool], from: usize| -> Option<usize> { (0..from).rev().find(|&i| !removed[i]) };
let min_closer_count = 1usize;
let mut closer_local = first_active(&removed);
while let Some(c) = closer_local {
let ev_c_idx = delim_idxs[c];
let (ch_c, can_open_c, can_close_c) = match &events[ev_c_idx] {
IrEvent::DelimRun {
ch,
can_open,
can_close,
..
} => (*ch, *can_open, *can_close),
_ => unreachable!(),
};
if !can_close_c || removed[c] || count[c] < min_closer_count {
closer_local = next_active(&removed, c);
continue;
}
let ch_idx = if ch_c == b'*' { 0 } else { 1 };
let closer_mod = count[c] % 3;
let closer_open_bucket = can_open_c as usize;
let bottom = openers_bottom[ch_idx][closer_mod][closer_open_bucket];
let mut found_opener: Option<usize> = None;
let mut walk = prev_active(&removed, c);
while let Some(o) = walk {
if Some(o) == bottom {
break;
}
let ev_o_idx = delim_idxs[o];
let (ch_o, can_open_o, can_close_o) = match &events[ev_o_idx] {
IrEvent::DelimRun {
ch,
can_open,
can_close,
..
} => (*ch, *can_open, *can_close),
_ => unreachable!(),
};
if !removed[o] && ch_o == ch_c && can_open_o {
let oc_sum = count[o] + count[c];
let opener_both = can_open_o && can_close_o;
let closer_both = can_open_c && can_close_c;
let mod3_reject = is_commonmark
&& (opener_both || closer_both)
&& oc_sum.is_multiple_of(3)
&& !(count[o].is_multiple_of(3) && count[c].is_multiple_of(3));
let pandoc_reject = !is_commonmark
&& ((count[o] == 1 && count[c] == 2)
|| (count[o] == 2 && count[c] == 1)
|| count[o] >= 4);
let pair_rejected = !is_commonmark && {
let oe = delim_idxs[o];
let ce = delim_idxs[c];
rejected_pairs.iter().any(|&(ro, rc)| ro == oe && rc == ce)
};
let strict_block = strict_pandoc && {
let tentative_consume = if !is_commonmark && count[o] >= 3 && count[c] >= 3 {
1
} else if count[o] >= 2 && count[c] >= 2 {
2
} else {
1
};
let lo_evt = delim_idxs[o] + 1;
let hi_evt = delim_idxs[c];
(lo_evt..hi_evt).any(|k| match &events[k] {
IrEvent::DelimRun {
ch: ch_k,
start,
end,
matches,
..
} => {
*ch_k == ch_c && {
let total = end - start;
let consumed: usize = matches.iter().map(|m| m.len as usize).sum();
total.saturating_sub(consumed) > tentative_consume
}
}
_ => false,
})
};
if !mod3_reject && !pandoc_reject && !pair_rejected && !strict_block {
found_opener = Some(o);
break;
}
}
if o == 0 {
break;
}
walk = prev_active(&removed, o);
}
if let Some(o) = found_opener {
let consume = if !is_commonmark && count[o] >= 3 && count[c] >= 3 {
1
} else if count[o] >= 2 && count[c] >= 2 {
2
} else {
1
};
let kind = if consume == 2 {
EmphasisKind::Strong
} else {
EmphasisKind::Emph
};
let opener_match_offset =
source_start[o] + count[o] - consume - source_start_event(&events[delim_idxs[o]]);
let closer_match_offset = source_start[c] - source_start_event(&events[delim_idxs[c]]);
if let IrEvent::DelimRun { matches, .. } = &mut events[delim_idxs[o]] {
matches.push(DelimMatch {
offset_in_run: opener_match_offset as u8,
len: consume as u8,
is_opener: true,
partner_event: delim_idxs[c] as u32,
partner_offset: closer_match_offset as u8,
kind,
});
}
if let IrEvent::DelimRun { matches, .. } = &mut events[delim_idxs[c]] {
matches.push(DelimMatch {
offset_in_run: closer_match_offset as u8,
len: consume as u8,
is_opener: false,
partner_event: delim_idxs[o] as u32,
partner_offset: opener_match_offset as u8,
kind,
});
}
count[o] -= consume;
source_start[c] += consume;
count[c] -= consume;
let mut between = next_active(&removed, o);
while let Some(idx) = between {
if idx == c {
break;
}
removed[idx] = true;
between = next_active(&removed, idx);
}
if count[o] == 0 {
removed[o] = true;
}
if count[c] == 0 {
removed[c] = true;
closer_local = next_active(&removed, c);
}
} else {
openers_bottom[ch_idx][closer_mod][closer_open_bucket] = prev_active(&removed, c);
if !can_open_c {
removed[c] = true;
}
closer_local = next_active(&removed, c);
}
}
let _ = (&mut delim_idxs, &mut openers_bottom, min_closer_count);
}
fn pandoc_cascade_invalidate(
events: &mut [IrEvent],
excluded: Option<&[bool]>,
) -> Vec<(usize, usize)> {
let mut invalidated_pairs: Vec<(usize, usize)> = Vec::new();
if !events.iter().any(|e| matches!(e, IrEvent::DelimRun { .. })) {
return invalidated_pairs;
}
let is_excluded = |k: usize| excluded.is_some_and(|ex| ex.get(k).copied() == Some(true));
let mut total: Vec<usize> = Vec::with_capacity(events.len());
let mut consumed: Vec<usize> = Vec::with_capacity(events.len());
loop {
total.clear();
consumed.clear();
total.extend(events.iter().map(|e| match e {
IrEvent::DelimRun { start, end, .. } => end - start,
_ => 0,
}));
consumed.extend(events.iter().map(|e| match e {
IrEvent::DelimRun { matches, .. } => matches.iter().map(|m| m.len as usize).sum(),
_ => 0,
}));
let mut to_invalidate: Option<(usize, u8)> = None;
'outer: for opener_idx in 0..events.len() {
let IrEvent::DelimRun {
ch: ch_o, matches, ..
} = &events[opener_idx]
else {
continue;
};
for (mi, m) in matches.iter().enumerate() {
if !m.is_opener {
continue;
}
let closer_idx = m.partner_event as usize;
if closer_idx <= opener_idx || closer_idx >= events.len() {
continue;
}
for k in (opener_idx + 1)..closer_idx {
if is_excluded(k) {
continue;
}
if let IrEvent::DelimRun {
ch: ch_k,
can_open: co_k,
can_close: cc_k,
..
} = &events[k]
&& *ch_k == *ch_o
&& consumed[k] < total[k]
&& *co_k
&& *cc_k
{
to_invalidate = Some((opener_idx, mi as u8));
break 'outer;
}
}
}
}
let Some((opener_idx, mi)) = to_invalidate else {
break;
};
let (closer_idx, opener_offset) = match &events[opener_idx] {
IrEvent::DelimRun { matches, .. } => {
let m = matches[mi as usize];
(m.partner_event as usize, m.offset_in_run)
}
_ => break,
};
if let IrEvent::DelimRun { matches, .. } = &mut events[opener_idx] {
matches.remove(mi as usize);
}
if let IrEvent::DelimRun { matches, .. } = &mut events[closer_idx] {
matches.retain(|m| m.is_opener || m.partner_offset != opener_offset);
}
invalidated_pairs.push((opener_idx, closer_idx));
}
invalidated_pairs
}
fn pandoc_inner_strong_recovery(events: &mut [IrEvent]) {
let n = events.len();
let mut to_apply: Vec<(usize, usize, usize, u8)> = Vec::new();
for opener_idx in 0..n {
let (open_total, open_matches_clone, ch_o) = match &events[opener_idx] {
IrEvent::DelimRun {
start,
end,
matches,
ch,
..
} => (*end - *start, matches.clone(), *ch),
_ => continue,
};
if open_total < 3 {
continue;
}
for m in open_matches_clone.iter() {
if !m.is_opener || m.kind != EmphasisKind::Emph {
continue;
}
let closer_idx = m.partner_event as usize;
if closer_idx <= opener_idx || closer_idx >= n {
continue;
}
let (close_total, close_consumed) = match &events[closer_idx] {
IrEvent::DelimRun {
start,
end,
matches,
..
} => {
let total = end - start;
let consumed: usize = matches.iter().map(|m| m.len as usize).sum();
(total, consumed)
}
_ => continue,
};
if close_total < 3 {
continue;
}
let leftover = close_total.saturating_sub(close_consumed);
if leftover < 2 {
continue;
}
for k in ((opener_idx + 1)..closer_idx).rev() {
if let IrEvent::DelimRun {
ch,
start,
end,
matches,
can_close,
..
} = &events[k]
{
if *ch != ch_o || !*can_close {
continue;
}
let total = end - start;
let consumed: usize = matches.iter().map(|m| m.len as usize).sum();
let remaining = total.saturating_sub(consumed);
if remaining < 2 {
continue;
}
to_apply.push((k, opener_idx, closer_idx, 2));
break;
}
}
}
}
for (between_idx, opener_idx, closer_idx, len) in to_apply {
let (closer_emph_match_idx, closer_emph_offset) = {
let mut found: Option<(usize, u8)> = None;
if let IrEvent::DelimRun { matches, .. } = &events[closer_idx] {
for (mi, m) in matches.iter().enumerate() {
if !m.is_opener
&& m.partner_event as usize == opener_idx
&& m.kind == EmphasisKind::Emph
{
found = Some((mi, m.offset_in_run));
break;
}
}
}
match found {
Some(x) => x,
None => continue,
}
};
let opener_emph_match_idx = {
let mut found: Option<usize> = None;
if let IrEvent::DelimRun { matches, .. } = &events[opener_idx] {
for (mi, m) in matches.iter().enumerate() {
if m.is_opener
&& m.partner_event as usize == closer_idx
&& m.kind == EmphasisKind::Emph
{
found = Some(mi);
break;
}
}
}
match found {
Some(x) => x,
None => continue,
}
};
let new_closer_emph_offset = closer_emph_offset + len;
if let IrEvent::DelimRun { matches, .. } = &mut events[closer_idx] {
matches[closer_emph_match_idx].offset_in_run = new_closer_emph_offset;
}
if let IrEvent::DelimRun { matches, .. } = &mut events[opener_idx] {
matches[opener_emph_match_idx].partner_offset = new_closer_emph_offset;
}
if let IrEvent::DelimRun { matches, .. } = &mut events[between_idx] {
matches.push(DelimMatch {
offset_in_run: 0,
len,
is_opener: true,
partner_event: closer_idx as u32,
partner_offset: closer_emph_offset,
kind: EmphasisKind::Strong,
});
}
if let IrEvent::DelimRun { matches, .. } = &mut events[closer_idx] {
matches.push(DelimMatch {
offset_in_run: closer_emph_offset,
len,
is_opener: false,
partner_event: between_idx as u32,
partner_offset: 0,
kind: EmphasisKind::Strong,
});
}
}
}
fn source_start_event(event: &IrEvent) -> usize {
match event {
IrEvent::DelimRun { start, .. } => *start,
_ => unreachable!("source_start_event called on non-DelimRun"),
}
}
pub fn process_brackets(
events: &mut [IrEvent],
text: &str,
refdefs: Option<&RefdefMap>,
dialect: crate::options::Dialect,
) {
let empty: HashSet<String> = HashSet::new();
let labels: &HashSet<String> = match refdefs {
Some(map) => map.as_ref(),
None => &empty,
};
let is_commonmark = dialect == crate::options::Dialect::CommonMark;
let label_resolves =
|key_norm: &str| -> bool { !key_norm.is_empty() && labels.contains(key_norm) };
let mut i = 0;
while i < events.len() {
let close_pos = match &events[i] {
IrEvent::CloseBracket { pos, .. } => *pos,
_ => {
i += 1;
continue;
}
};
let mut o = match find_active_opener(events, i) {
Some(o) => o,
None => {
i += 1;
continue;
}
};
let (open_end, is_image) = match &events[o] {
IrEvent::OpenBracket { end, is_image, .. } => (*end, *is_image),
_ => unreachable!(),
};
let text_start = open_end;
let text_end = close_pos;
let after_close = close_pos + 1;
if let Some((suffix_end, dest, title)) = try_inline_suffix(text, after_close) {
if !is_image && is_commonmark {
deactivate_earlier_link_openers(events, o);
}
commit_resolution(
events,
o,
i,
text_start,
text_end,
after_close,
suffix_end,
LinkKind::Inline { dest, title },
);
mark_opener_resolved(events, o);
i += 1;
continue;
}
let full_ref_suffix = try_full_reference_suffix(text, after_close);
if let Some((suffix_end, label_raw)) = &full_ref_suffix {
let label_norm = normalize_label(label_raw);
if label_resolves(&label_norm) {
if !is_image && is_commonmark {
deactivate_earlier_link_openers(events, o);
}
commit_resolution(
events,
o,
i,
text_start,
text_end,
after_close,
*suffix_end,
LinkKind::FullReference {
label: label_raw.clone(),
},
);
mark_opener_resolved(events, o);
i += 1;
continue;
}
}
let link_text = &text[text_start..text_end];
let link_text_norm = normalize_label(link_text);
let is_collapsed = is_collapsed_marker(text, after_close);
let collapsed_suffix_end = after_close + 2;
if is_collapsed && label_resolves(&link_text_norm) {
if !is_image && is_commonmark {
deactivate_earlier_link_openers(events, o);
}
commit_resolution(
events,
o,
i,
text_start,
text_end,
after_close,
collapsed_suffix_end,
LinkKind::CollapsedReference,
);
mark_opener_resolved(events, o);
i += 1;
continue;
}
let shortcut_suppressed = full_ref_suffix.is_some() || is_collapsed;
if !shortcut_suppressed && label_resolves(&link_text_norm) {
if !is_image && is_commonmark {
deactivate_earlier_link_openers(events, o);
}
commit_resolution(
events,
o,
i,
text_start,
text_end,
after_close,
after_close,
LinkKind::ShortcutReference,
);
mark_opener_resolved(events, o);
i += 1;
continue;
}
let unresolved_shape = if !is_commonmark {
let (end, has_substantive_label) =
if let Some((suffix_end, label_raw)) = &full_ref_suffix {
(*suffix_end, !normalize_label(label_raw).is_empty())
} else if is_collapsed {
(collapsed_suffix_end, !link_text_norm.is_empty())
} else {
(after_close, !link_text_norm.is_empty())
};
if has_substantive_label {
Some(UnresolvedRefShape {
close_event: i as u32,
text_end,
end,
})
} else {
None
}
} else {
None
};
if let IrEvent::OpenBracket {
active,
unresolved_ref,
..
} = &mut events[o]
{
*active = false;
*unresolved_ref = unresolved_shape;
}
if unresolved_shape.is_some()
&& let IrEvent::CloseBracket { matched, .. } = &mut events[i]
{
*matched = true;
}
let _ = &mut o;
i += 1;
}
}
fn find_active_opener(events: &[IrEvent], close_idx: usize) -> Option<usize> {
(0..close_idx).rev().find(|&i| {
matches!(
&events[i],
IrEvent::OpenBracket {
active: true,
resolution: None,
..
}
)
})
}
fn deactivate_earlier_link_openers(events: &mut [IrEvent], open_idx: usize) {
for ev in &mut events[..open_idx] {
if let IrEvent::OpenBracket {
is_image: false,
active,
resolution: None,
..
} = ev
{
*active = false;
}
}
}
fn mark_opener_resolved(events: &mut [IrEvent], open_idx: usize) {
if let IrEvent::OpenBracket { active, .. } = &mut events[open_idx] {
*active = false;
}
}
#[allow(clippy::too_many_arguments)]
fn commit_resolution(
events: &mut [IrEvent],
open_idx: usize,
close_idx: usize,
text_start: usize,
text_end: usize,
suffix_start: usize,
suffix_end: usize,
kind: LinkKind,
) {
if let IrEvent::OpenBracket { resolution, .. } = &mut events[open_idx] {
*resolution = Some(BracketResolution {
close_event: close_idx as u32,
text_start,
text_end,
suffix_start,
suffix_end,
kind,
});
}
if let IrEvent::CloseBracket { matched, .. } = &mut events[close_idx] {
*matched = true;
}
}
fn try_inline_suffix(text: &str, pos: usize) -> Option<(usize, String, Option<String>)> {
let bytes = text.as_bytes();
if pos >= bytes.len() || bytes[pos] != b'(' {
return None;
}
let mut p = pos + 1;
while p < bytes.len() && matches!(bytes[p], b' ' | b'\t' | b'\n') {
p += 1;
}
if p < bytes.len() && bytes[p] == b')' {
return Some((p + 1, String::new(), None));
}
let (dest, dest_end) = parse_link_destination(text, p)?;
p = dest_end;
while p < bytes.len() && matches!(bytes[p], b' ' | b'\t' | b'\n') {
p += 1;
}
let mut title = None;
if p < bytes.len() && matches!(bytes[p], b'"' | b'\'' | b'(') {
let (t, t_end) = parse_link_title(text, p)?;
title = Some(t);
p = t_end;
while p < bytes.len() && matches!(bytes[p], b' ' | b'\t' | b'\n') {
p += 1;
}
}
if p >= bytes.len() || bytes[p] != b')' {
return None;
}
Some((p + 1, dest, title))
}
fn parse_link_destination(text: &str, start: usize) -> Option<(String, usize)> {
let bytes = text.as_bytes();
if start >= bytes.len() {
return None;
}
if bytes[start] == b'<' {
let mut p = start + 1;
let begin = p;
while p < bytes.len() && bytes[p] != b'>' && bytes[p] != b'\n' && bytes[p] != b'<' {
if bytes[p] == b'\\' && p + 1 < bytes.len() {
p += 2;
} else {
p += 1;
}
}
if p >= bytes.len() || bytes[p] != b'>' {
return None;
}
let dest = text[begin..p].to_string();
Some((dest, p + 1))
} else {
let mut p = start;
let mut paren_depth: i32 = 0;
while p < bytes.len() {
let b = bytes[p];
if b == b'\\' && p + 1 < bytes.len() {
p += 2;
continue;
}
if b == b'(' {
paren_depth += 1;
p += 1;
continue;
}
if b == b')' {
if paren_depth == 0 {
break;
}
paren_depth -= 1;
p += 1;
continue;
}
if b == b' ' || b == b'\t' || b == b'\n' || b < 0x20 || b == 0x7f {
break;
}
p += 1;
}
if p == start || paren_depth != 0 {
return None;
}
Some((text[start..p].to_string(), p))
}
}
fn parse_link_title(text: &str, start: usize) -> Option<(String, usize)> {
let bytes = text.as_bytes();
let q = bytes[start];
let close = match q {
b'"' => b'"',
b'\'' => b'\'',
b'(' => b')',
_ => return None,
};
let mut p = start + 1;
let begin = p;
while p < bytes.len() {
let b = bytes[p];
if b == b'\\' && p + 1 < bytes.len() {
p += 2;
continue;
}
if b == close {
let title = text[begin..p].to_string();
return Some((title, p + 1));
}
p += 1;
}
None
}
fn try_full_reference_suffix(text: &str, pos: usize) -> Option<(usize, String)> {
let bytes = text.as_bytes();
if pos >= bytes.len() || bytes[pos] != b'[' {
return None;
}
let label_start = pos + 1;
let mut p = label_start;
let mut escape_next = false;
while p < bytes.len() {
if escape_next {
escape_next = false;
p += 1;
continue;
}
match bytes[p] {
b'\\' => {
escape_next = true;
p += 1;
}
b']' => break,
b'[' => return None,
b'\n' => {
p += 1;
}
_ => p += 1,
}
}
if p >= bytes.len() || bytes[p] != b']' {
return None;
}
let label = text[label_start..p].to_string();
if label.is_empty() {
return None;
}
Some((p + 1, label))
}
fn is_collapsed_marker(text: &str, pos: usize) -> bool {
text.as_bytes().get(pos) == Some(&b'[') && text.as_bytes().get(pos + 1) == Some(&b']')
}
#[derive(Debug, Clone)]
pub enum BracketDispo {
Open {
is_image: bool,
text_start: usize,
text_end: usize,
suffix_start: usize,
suffix_end: usize,
kind: LinkKind,
},
UnresolvedReference {
is_image: bool,
text_start: usize,
text_end: usize,
end: usize,
},
Literal,
}
#[derive(Debug, Default, Clone)]
pub struct BracketPlan {
by_pos: BTreeMap<usize, BracketDispo>,
}
impl BracketPlan {
pub fn lookup(&self, pos: usize) -> Option<&BracketDispo> {
self.by_pos.get(&pos)
}
pub fn is_empty(&self) -> bool {
self.by_pos.is_empty()
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ConstructDispo {
InlineFootnote { end: usize },
NativeSpan { end: usize },
FootnoteReference { end: usize },
BracketedCitation { end: usize },
BareCitation { end: usize },
BracketedSpan { end: usize },
}
#[derive(Debug, Default, Clone)]
pub struct ConstructPlan {
by_pos: BTreeMap<usize, ConstructDispo>,
}
impl ConstructPlan {
pub fn lookup(&self, pos: usize) -> Option<&ConstructDispo> {
self.by_pos.get(&pos)
}
pub fn is_empty(&self) -> bool {
self.by_pos.is_empty()
}
}
pub fn build_construct_plan(events: &[IrEvent]) -> ConstructPlan {
let mut by_pos: BTreeMap<usize, ConstructDispo> = BTreeMap::new();
for ev in events {
if let IrEvent::Construct { start, end, kind } = ev {
match kind {
ConstructKind::InlineFootnote => {
by_pos.insert(*start, ConstructDispo::InlineFootnote { end: *end });
}
ConstructKind::NativeSpan => {
by_pos.insert(*start, ConstructDispo::NativeSpan { end: *end });
}
ConstructKind::FootnoteReference => {
by_pos.insert(*start, ConstructDispo::FootnoteReference { end: *end });
}
ConstructKind::BracketedCitation => {
by_pos.insert(*start, ConstructDispo::BracketedCitation { end: *end });
}
ConstructKind::BareCitation => {
by_pos.insert(*start, ConstructDispo::BareCitation { end: *end });
}
ConstructKind::BracketedSpan => {
by_pos.insert(*start, ConstructDispo::BracketedSpan { end: *end });
}
_ => {}
}
}
}
ConstructPlan { by_pos }
}
pub fn build_bracket_plan(events: &[IrEvent]) -> BracketPlan {
let mut by_pos: BTreeMap<usize, BracketDispo> = BTreeMap::new();
for ev in events {
match ev {
IrEvent::OpenBracket {
start,
is_image,
resolution: Some(res),
..
} => {
by_pos.insert(
*start,
BracketDispo::Open {
is_image: *is_image,
text_start: res.text_start,
text_end: res.text_end,
suffix_start: res.suffix_start,
suffix_end: res.suffix_end,
kind: res.kind.clone(),
},
);
}
IrEvent::OpenBracket {
start,
end,
is_image,
resolution: None,
unresolved_ref: Some(shape),
..
} => {
by_pos.insert(
*start,
BracketDispo::UnresolvedReference {
is_image: *is_image,
text_start: *end,
text_end: shape.text_end,
end: shape.end,
},
);
}
IrEvent::OpenBracket {
start,
is_image,
resolution: None,
unresolved_ref: None,
..
} => {
let len = if *is_image { 2 } else { 1 };
for off in 0..len {
by_pos.insert(*start + off, BracketDispo::Literal);
}
}
IrEvent::CloseBracket {
pos,
matched: false,
} => {
by_pos.insert(*pos, BracketDispo::Literal);
}
_ => {}
}
}
BracketPlan { by_pos }
}
pub fn build_full_plans(
text: &str,
start: usize,
end: usize,
config: &ParserOptions,
) -> InlinePlans {
let mut scratch = ScratchEvents::checkout();
let bundle = scratch.inner.as_mut().unwrap();
bundle.events.clear();
bundle.bracket_pairs.clear();
bundle.excluded.clear();
build_ir_into(text, start, end, config, &mut bundle.events);
process_brackets(
&mut bundle.events,
text,
config.refdef_labels.as_ref(),
config.dialect,
);
bundle.bracket_pairs.extend(
bundle
.events
.iter()
.enumerate()
.filter_map(|(i, ev)| match ev {
IrEvent::OpenBracket {
resolution: Some(res),
..
} => Some((i, res.close_event as usize)),
IrEvent::OpenBracket {
resolution: None,
unresolved_ref: Some(shape),
..
} => Some((i, shape.close_event as usize)),
_ => None,
}),
);
bundle
.bracket_pairs
.sort_by(|a, b| a.1.cmp(&b.1).then(b.0.cmp(&a.0)));
for i in 0..bundle.bracket_pairs.len() {
let (open_idx, close_idx) = bundle.bracket_pairs[i];
process_emphasis_in_range(&mut bundle.events, open_idx + 1, close_idx, config.dialect);
}
for i in 0..bundle.bracket_pairs.len() {
let (open_idx, close_idx) = bundle.bracket_pairs[i];
let is_unresolved = matches!(
&bundle.events[open_idx],
IrEvent::OpenBracket {
resolution: None,
unresolved_ref: Some(_),
..
}
);
if !is_unresolved {
continue;
}
if !range_has_unmatched_delim_bytes(&bundle.events, open_idx + 1, close_idx) {
continue;
}
if let IrEvent::OpenBracket { unresolved_ref, .. } = &mut bundle.events[open_idx] {
*unresolved_ref = None;
}
if let IrEvent::CloseBracket { matched, .. } = &mut bundle.events[close_idx] {
*matched = false;
}
}
let len = bundle.events.len();
if bundle.bracket_pairs.is_empty() {
process_emphasis_in_range_filtered(&mut bundle.events, 0, len, None, config.dialect);
} else {
bundle.excluded.resize(len, false);
for &(open_idx, close_idx) in &bundle.bracket_pairs {
for slot in bundle
.excluded
.iter_mut()
.take(close_idx)
.skip(open_idx + 1)
{
*slot = true;
}
}
process_emphasis_in_range_filtered(
&mut bundle.events,
0,
len,
Some(&bundle.excluded),
config.dialect,
);
}
InlinePlans {
emphasis: build_emphasis_plan(&bundle.events),
brackets: build_bracket_plan(&bundle.events),
constructs: build_construct_plan(&bundle.events),
}
}
fn range_has_unmatched_delim_bytes(events: &[IrEvent], lo: usize, hi: usize) -> bool {
let hi = hi.min(events.len());
for ev in &events[lo..hi] {
if let IrEvent::DelimRun {
start,
end,
matches,
can_open,
can_close,
..
} = ev
{
if !can_open && !can_close {
continue;
}
let total = end - start;
let matched: usize = matches.iter().map(|m| m.len as usize).sum();
if matched < total {
return true;
}
}
}
false
}
struct ScratchEvents {
inner: Option<ScratchBundle>,
}
#[derive(Default)]
struct ScratchBundle {
events: Vec<IrEvent>,
bracket_pairs: Vec<(usize, usize)>,
excluded: Vec<bool>,
}
thread_local! {
static IR_EVENT_POOL: std::cell::RefCell<Vec<ScratchBundle>> =
const { std::cell::RefCell::new(Vec::new()) };
}
impl ScratchEvents {
fn checkout() -> Self {
let bundle = IR_EVENT_POOL
.with(|p| p.borrow_mut().pop())
.unwrap_or_default();
Self {
inner: Some(bundle),
}
}
}
impl Drop for ScratchEvents {
fn drop(&mut self) {
if let Some(mut bundle) = self.inner.take() {
bundle.events.clear();
bundle.bracket_pairs.clear();
bundle.excluded.clear();
if bundle.events.capacity() <= 8192 {
IR_EVENT_POOL.with(|p| {
let mut pool = p.borrow_mut();
if pool.len() < 8 {
pool.push(bundle);
}
});
}
}
}
}
#[derive(Debug, Default, Clone)]
pub struct InlinePlans {
pub emphasis: EmphasisPlan,
pub brackets: BracketPlan,
pub constructs: ConstructPlan,
}
pub fn build_emphasis_plan(events: &[IrEvent]) -> EmphasisPlan {
let mut by_pos: BTreeMap<usize, DelimChar> = BTreeMap::new();
for ev in events {
if let IrEvent::DelimRun {
start,
end,
matches,
..
} = ev
{
for m in matches {
let pos = *start + m.offset_in_run as usize;
let partner_run_start = match &events[m.partner_event as usize] {
IrEvent::DelimRun { start: ps, .. } => *ps,
_ => continue,
};
let partner_pos = partner_run_start + m.partner_offset as usize;
if m.is_opener {
by_pos.insert(
pos,
DelimChar::Open {
len: m.len,
partner: partner_pos,
partner_len: m.len,
kind: m.kind,
},
);
} else {
by_pos.insert(pos, DelimChar::Close);
}
}
for pos in *start..*end {
by_pos.entry(pos).or_insert(DelimChar::Literal);
}
}
}
EmphasisPlan::from_dispositions(by_pos)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::options::Flavor;
use crate::parser::inlines::inline_ir::DelimChar;
use std::sync::Arc;
fn cm_opts() -> ParserOptions {
let flavor = Flavor::CommonMark;
ParserOptions {
flavor,
dialect: crate::options::Dialect::for_flavor(flavor),
extensions: crate::options::Extensions::for_flavor(flavor),
pandoc_compat: crate::options::PandocCompat::default(),
refdef_labels: None,
}
}
fn refdefs<I: IntoIterator<Item = &'static str>>(labels: I) -> RefdefMap {
Arc::new(labels.into_iter().map(|s| s.to_string()).collect())
}
#[test]
fn ir_event_range_covers_all_variants() {
let txt = IrEvent::Text { start: 0, end: 5 };
assert_eq!(txt.range(), (0, 5));
let close = IrEvent::CloseBracket {
pos: 7,
matched: false,
};
assert_eq!(close.range(), (7, 8));
let open = IrEvent::OpenBracket {
start: 1,
end: 3,
is_image: true,
active: true,
resolution: None,
unresolved_ref: None,
};
assert_eq!(open.range(), (1, 3));
}
#[test]
fn scan_records_text_and_delim_run() {
let opts = cm_opts();
let ir = build_ir("foo *bar*", 0, 9, &opts);
assert!(matches!(ir[0], IrEvent::Text { start: 0, end: 4 }));
assert!(matches!(
ir[1],
IrEvent::DelimRun {
ch: b'*',
start: 4,
end: 5,
..
}
));
assert!(matches!(ir[2], IrEvent::Text { start: 5, end: 8 }));
assert!(matches!(
ir[3],
IrEvent::DelimRun {
ch: b'*',
start: 8,
end: 9,
..
}
));
}
#[test]
fn scan_records_brackets() {
let opts = cm_opts();
let ir = build_ir("[foo]", 0, 5, &opts);
assert!(matches!(
ir[0],
IrEvent::OpenBracket {
start: 0,
end: 1,
is_image: false,
..
}
));
assert!(matches!(ir[1], IrEvent::Text { start: 1, end: 4 }));
assert!(matches!(
ir[2],
IrEvent::CloseBracket {
pos: 4,
matched: false
}
));
}
#[test]
fn scan_records_image_bracket() {
let opts = cm_opts();
let ir = build_ir("![alt]", 0, 6, &opts);
assert!(matches!(
ir[0],
IrEvent::OpenBracket {
start: 0,
end: 2,
is_image: true,
..
}
));
}
#[test]
fn scan_handles_code_span_opacity() {
let opts = cm_opts();
let ir = build_ir("a `*x*` b", 0, 9, &opts);
let has_delim_run = ir.iter().any(|e| matches!(e, IrEvent::DelimRun { .. }));
assert!(
!has_delim_run,
"code span content should not produce delim runs"
);
assert!(ir.iter().any(|e| matches!(
e,
IrEvent::Construct {
kind: ConstructKind::CodeSpan,
..
}
)));
}
#[test]
fn process_emphasis_simple_pair() {
let opts = cm_opts();
let mut ir = build_ir("*foo*", 0, 5, &opts);
process_emphasis(&mut ir, opts.dialect);
let opener = ir
.iter()
.find(|e| matches!(e, IrEvent::DelimRun { start: 0, .. }))
.unwrap();
if let IrEvent::DelimRun { matches, .. } = opener {
assert_eq!(matches.len(), 1);
assert!(matches[0].is_opener);
assert_eq!(matches[0].kind, EmphasisKind::Emph);
}
}
#[test]
fn brackets_resolve_inline_link() {
let opts = cm_opts();
let mut ir = build_ir("[foo](/url)", 0, 11, &opts);
process_brackets(&mut ir, "[foo](/url)", None, opts.dialect);
let open = ir
.iter()
.find(|e| matches!(e, IrEvent::OpenBracket { start: 0, .. }))
.unwrap();
if let IrEvent::OpenBracket { resolution, .. } = open {
let r = resolution.as_ref().expect("inline link resolved");
assert!(matches!(r.kind, LinkKind::Inline { .. }));
if let LinkKind::Inline { dest, .. } = &r.kind {
assert_eq!(dest, "/url");
}
}
}
#[test]
fn brackets_shortcut_resolves_only_with_refdef() {
let opts = cm_opts();
let text = "[foo]";
let map = refdefs(["foo"]);
let mut ir = build_ir(text, 0, text.len(), &opts);
process_brackets(&mut ir, text, Some(&map), opts.dialect);
let open = ir
.iter()
.find(|e| matches!(e, IrEvent::OpenBracket { start: 0, .. }))
.unwrap();
if let IrEvent::OpenBracket { resolution, .. } = open {
assert!(matches!(
resolution.as_ref().unwrap().kind,
LinkKind::ShortcutReference
));
}
}
#[test]
fn brackets_shortcut_falls_through_without_refdef() {
let opts = cm_opts();
let text = "[bar* baz]";
let mut ir = build_ir(text, 0, text.len(), &opts);
process_brackets(&mut ir, text, None, opts.dialect);
let open = ir
.iter()
.find(|e| matches!(e, IrEvent::OpenBracket { start: 0, .. }))
.unwrap();
if let IrEvent::OpenBracket { resolution, .. } = open {
assert!(resolution.is_none(), "no refdef → bracket stays literal");
}
}
#[test]
fn full_plans_emphasis_does_not_cross_resolved_link_boundary() {
let opts = cm_opts();
let text = "*[bar*](/url)";
let plans = build_full_plans(text, 0, text.len(), &opts);
assert!(
matches!(plans.emphasis.lookup(0), Some(DelimChar::Literal) | None),
"outer `*` at byte 0 must not pair across link boundary, got {:?}",
plans.emphasis.lookup(0)
);
assert!(
matches!(plans.brackets.lookup(1), Some(BracketDispo::Open { .. })),
"link [bar*](/url) must resolve at byte 1"
);
}
fn pandoc_opts() -> ParserOptions {
let flavor = Flavor::Pandoc;
ParserOptions {
flavor,
dialect: crate::options::Dialect::for_flavor(flavor),
extensions: crate::options::Extensions::for_flavor(flavor),
pandoc_compat: crate::options::PandocCompat::default(),
refdef_labels: None,
}
}
#[test]
fn full_plans_unresolved_bracket_degrades_when_inner_delim_unmatched() {
let opts = pandoc_opts();
let text = "*foo [bar*] baz*";
let plans = build_full_plans(text, 0, text.len(), &opts);
assert!(
matches!(plans.brackets.lookup(5), Some(BracketDispo::Literal) | None),
"degraded `[` at byte 5 must be Literal/None, got {:?}",
plans.brackets.lookup(5)
);
assert!(
matches!(plans.emphasis.lookup(0), Some(DelimChar::Open { .. })),
"outer `*` at byte 0 must open Emph after degrade, got {:?}",
plans.emphasis.lookup(0)
);
}
#[test]
fn full_plans_unresolved_bracket_keeps_wrapper_with_intraword_underscore() {
let opts = pandoc_opts();
let text = "[foo_bar more]";
let plans = build_full_plans(text, 0, text.len(), &opts);
assert!(
matches!(
plans.brackets.lookup(0),
Some(BracketDispo::UnresolvedReference { .. })
),
"wrapper must be preserved across intraword `_`, got {:?}",
plans.brackets.lookup(0)
);
}
#[test]
fn full_plans_unresolved_bracket_keeps_wrapper_when_inner_paired() {
let opts = pandoc_opts();
let text = "[foo *bar*]";
let plans = build_full_plans(text, 0, text.len(), &opts);
assert!(
matches!(
plans.brackets.lookup(0),
Some(BracketDispo::UnresolvedReference { .. })
),
"wrapper must be preserved when inner emph pairs, got {:?}",
plans.brackets.lookup(0)
);
}
#[test]
fn full_plans_link_in_link_suppression_for_reference_links() {
let opts = cm_opts();
let text = "[foo *bar [baz][ref]*][ref]";
let mut opts_with_refs = opts.clone();
let labels: HashSet<String> = ["ref".to_string()].into_iter().collect();
opts_with_refs.refdef_labels = Some(std::sync::Arc::new(labels));
let plans = build_full_plans(text, 0, text.len(), &opts_with_refs);
assert!(
matches!(plans.brackets.lookup(10), Some(BracketDispo::Open { .. })),
"inner [baz][ref] must resolve at byte 10, got {:?}",
plans.brackets.lookup(10)
);
assert!(
matches!(plans.brackets.lookup(0), Some(BracketDispo::Literal) | None),
"outer [foo ...][ref] must fall through to literal at byte 0, got {:?}",
plans.brackets.lookup(0)
);
assert!(
matches!(plans.brackets.lookup(22), Some(BracketDispo::Open { .. })),
"trailing [ref] must resolve at byte 22, got {:?}",
plans.brackets.lookup(22)
);
assert!(
matches!(plans.emphasis.lookup(5), Some(DelimChar::Open { .. })),
"emphasis opener at byte 5 must pair, got {:?}",
plans.emphasis.lookup(5)
);
}
}