panache_parser/parser/inlines/inline_ir.rs
1//! Inline IR for both CommonMark and Pandoc dialects.
2//!
3//! The inline parsing pipeline runs in three passes over an intermediate
4//! representation (IR):
5//!
6//! 1. **Scan** ([`build_ir`]): walk the source bytes once, producing a flat
7//! [`Vec<IrEvent>`]. Opaque higher-precedence constructs (escapes, code
8//! spans, autolinks, raw HTML, plus Pandoc math / native spans / inline
9//! footnotes / footnote references / citations / bracketed spans) are
10//! skipped past as a single [`IrEvent::Construct`] event whose source
11//! range is preserved for losslessness. Delimiter runs (`*`/`_`),
12//! bracket markers (`[`, `![`, `]`), soft line breaks, and plain text
13//! spans become distinct events.
14//!
15//! 2. **Process brackets** ([`process_brackets`]) — CommonMark §6.3: the
16//! bracket-stack algorithm walks `]` markers left-to-right. For each
17//! `]`, the algorithm finds the nearest active opener and tries to
18//! resolve the pair as a link or image: inline `[text](dest)`, full
19//! reference `[text][label]`, collapsed `[text][]`, or shortcut
20//! `[text]`. Under CommonMark, reference forms are validated against
21//! the document refdef map and a successful match deactivates all
22//! earlier active openers (§6.3 "links may not contain other links").
23//! Under Pandoc, reference forms resolve shape-only (any non-empty
24//! label) and the deactivation pass is skipped; outer-wins nested-link
25//! semantics are enforced by the emission walk's `suppress_inner_links`
26//! flag instead.
27//!
28//! 3. **Process emphasis** ([`process_emphasis_in_range`]): the classic
29//! delimiter-stack algorithm runs over the [`IrEvent::DelimRun`]
30//! events, pairing openers with closers and recording matches on the
31//! runs. Runs first scoped per resolved bracket pair (innermost
32//! first), then a top-level pass over the residual events. Each match
33//! consumes 1 or 2 inner-edge bytes from each side; leftover bytes
34//! fall through to literal text. Dialect gates (Pandoc flanking rules,
35//! mod-3 rejection, asymmetric (1,2)/(2,1) rejection, opener-count >= 4
36//! rejection, triple-emph nesting flip, cascade-then-rerun) branch on
37//! the `dialect` parameter.
38//!
39//! The emission walk in [`super::core::parse_inline_range_impl`] consumes
40//! three byte-keyed plans built by [`build_full_plans`]: an
41//! [`EmphasisPlan`] for delim-run dispositions, a [`BracketPlan`] for
42//! resolved link/image bracket pairs, and a [`ConstructPlan`] for
43//! standalone Pandoc constructs (inline footnotes, native spans, footnote
44//! references, citations, bracketed spans). Matched delim runs become
45//! `EMPHASIS` / `STRONG` nodes; matched bracket pairs become `LINK` /
46//! `IMAGE` nodes via the dispatcher's `try_parse_*` recognizers (called
47//! to *parse* a matched range, not to *resolve* it). Unmatched delims and
48//! brackets fall through to plain text.
49
50use crate::options::ParserOptions;
51use crate::parser::inlines::refdef_map::{RefdefMap, normalize_label};
52use std::collections::{BTreeMap, HashSet};
53
54#[derive(Debug, Clone, Copy, PartialEq, Eq)]
55pub enum EmphasisKind {
56 Emph,
57 Strong,
58}
59
60/// Disposition of a single delimiter byte after emphasis resolution.
61#[derive(Debug, Clone, Copy)]
62pub enum DelimChar {
63 /// Start of an opening marker. The marker spans `len` bytes from this
64 /// position; the matching closer starts at `partner` and spans
65 /// `partner_len` bytes.
66 Open {
67 len: u8,
68 partner: usize,
69 partner_len: u8,
70 kind: EmphasisKind,
71 },
72 /// Start of a closing marker. The matching opener starts at `partner`.
73 /// Emission jumps past close markers via the matching `Open` entry, so
74 /// this variant is only consulted defensively.
75 Close,
76 /// Unmatched delimiter byte; emit as literal text.
77 Literal,
78}
79
80/// Byte-keyed disposition map for `*` / `_` delimiter chars produced by
81/// the IR's emphasis pass and consumed by the inline emission walk.
82#[derive(Debug, Default, Clone)]
83pub struct EmphasisPlan {
84 by_pos: BTreeMap<usize, DelimChar>,
85}
86
87impl EmphasisPlan {
88 pub fn lookup(&self, pos: usize) -> Option<DelimChar> {
89 self.by_pos.get(&pos).copied()
90 }
91
92 pub fn is_empty(&self) -> bool {
93 self.by_pos.is_empty()
94 }
95
96 /// Construct an `EmphasisPlan` from a byte-keyed disposition map.
97 pub fn from_dispositions(by_pos: BTreeMap<usize, DelimChar>) -> Self {
98 Self { by_pos }
99 }
100}
101
102use super::bracketed_spans::try_parse_bracketed_span;
103use super::citations::{try_parse_bare_citation, try_parse_bracketed_citation};
104use super::code_spans::try_parse_code_span;
105use super::escapes::{EscapeType, try_parse_escape};
106use super::inline_footnotes::{try_parse_footnote_reference, try_parse_inline_footnote};
107use super::inline_html::try_parse_inline_html;
108use super::links::{
109 LinkScanContext, try_parse_autolink, try_parse_inline_image, try_parse_inline_link,
110 try_parse_reference_image, try_parse_reference_link,
111};
112use super::math::{
113 try_parse_display_math, try_parse_double_backslash_display_math,
114 try_parse_double_backslash_inline_math, try_parse_gfm_inline_math, try_parse_inline_math,
115 try_parse_single_backslash_display_math, try_parse_single_backslash_inline_math,
116};
117use super::native_spans::try_parse_native_span;
118
119/// One event in the inline IR.
120///
121/// Events partition the source byte range covered by the IR exactly: their
122/// `range()` values are contiguous and non-overlapping, so concatenating
123/// them reproduces the original input. This is the losslessness invariant
124/// the emission pass relies on.
125#[derive(Debug, Clone)]
126pub enum IrEvent {
127 /// Plain text byte span. Emitted as a single `TEXT` token, possibly
128 /// merged with adjacent literal-disposition delim/bracket bytes.
129 Text { start: usize, end: usize },
130
131 /// An opaque higher-precedence construct (escape, code span, autolink,
132 /// raw HTML). The emission pass re-parses these from the source byte
133 /// range using the existing per-construct emitters; we don't store a
134 /// pre-built `GreenNode` because `rowan::GreenNodeBuilder` doesn't
135 /// support inserting subtrees directly. The byte range is what makes
136 /// emission well-defined — the construct kind is recovered by the
137 /// emitter dispatching on the leading byte.
138 Construct {
139 start: usize,
140 end: usize,
141 kind: ConstructKind,
142 },
143
144 /// A `*` or `_` delimiter run. The `matches` vec is filled in by
145 /// [`process_emphasis`]; before that pass it is empty.
146 DelimRun {
147 ch: u8,
148 start: usize,
149 end: usize,
150 can_open: bool,
151 can_close: bool,
152 /// Matched fragments produced by `process_emphasis`. Each entry
153 /// is one `(byte_offset_within_run, len, partner_event_idx,
154 /// partner_byte_offset, kind, is_opener)` tuple. Empty until the
155 /// pass runs; possibly multiple entries when a single run matches
156 /// at multiple positions (e.g. a 4-run that closes 2+2 pairs).
157 matches: Vec<DelimMatch>,
158 },
159
160 /// `[` or `![` bracket marker. Resolved by [`process_brackets`].
161 OpenBracket {
162 start: usize,
163 /// `start + 1` for `[`, `start + 2` for `![`.
164 end: usize,
165 is_image: bool,
166 /// True until a later resolution rule deactivates this opener.
167 active: bool,
168 /// Filled in when the matching `CloseBracket` resolves the pair
169 /// to a link / image.
170 resolution: Option<BracketResolution>,
171 /// Pandoc-only: extents of an unresolved bracket-shape pattern
172 /// (full reference / collapsed / shortcut whose label doesn't
173 /// match a refdef). Mutually exclusive with `resolution:
174 /// Some(...)`. When `Some`, emission wraps `[start, end)` in
175 /// an `UNRESOLVED_REFERENCE` node so downstream tools can
176 /// attach behavior to the bracket-shape pattern. Always
177 /// `None` under `Dialect::CommonMark`.
178 unresolved_ref: Option<UnresolvedRefShape>,
179 },
180
181 /// `]` bracket marker. Resolved by [`process_brackets`].
182 CloseBracket {
183 pos: usize,
184 /// True if this `]` was paired with an opener and the pair was
185 /// turned into a link / image.
186 matched: bool,
187 },
188
189 /// A soft line break (a `\n` or `\r\n` ending a paragraph-internal
190 /// line). Includes the line-ending bytes verbatim.
191 SoftBreak { start: usize, end: usize },
192
193 /// A hard line break (` \n` / `\\\n` / ` \n` etc.). Includes any
194 /// trailing-space bytes plus the line ending.
195 HardBreak { start: usize, end: usize },
196}
197
198impl IrEvent {
199 /// The source byte range this event covers.
200 pub fn range(&self) -> (usize, usize) {
201 match self {
202 IrEvent::Text { start, end }
203 | IrEvent::Construct { start, end, .. }
204 | IrEvent::DelimRun { start, end, .. }
205 | IrEvent::OpenBracket { start, end, .. }
206 | IrEvent::SoftBreak { start, end }
207 | IrEvent::HardBreak { start, end } => (*start, *end),
208 IrEvent::CloseBracket { pos, .. } => (*pos, *pos + 1),
209 }
210 }
211}
212
213/// Categorical tag for a [`IrEvent::Construct`] event so emission knows
214/// which parser to call to rebuild the CST subtree.
215#[derive(Debug, Clone, Copy, PartialEq, Eq)]
216pub enum ConstructKind {
217 /// `\X` literal-character escape (CommonMark §2.4).
218 Escape,
219 /// `` `code` `` span (§6.1).
220 CodeSpan,
221 /// `<scheme://...>` or `<email@host>` (§6.5).
222 Autolink,
223 /// `<tag ...>` and friends (§6.6).
224 InlineHtml,
225 /// Pandoc opaque construct that doesn't have a dedicated kind yet
226 /// (currently: math spans). Pre-recognised in `build_ir` under
227 /// `Dialect::Pandoc` solely so the emphasis pass treats the entire
228 /// construct as opaque and delim runs inside don't cross its
229 /// boundary. Emission re-parses the construct via the dispatcher's
230 /// existing `try_parse_*` chain.
231 PandocOpaque,
232 /// Pandoc inline footnote `^[note text]`. Recognised in `build_ir`
233 /// under `Dialect::Pandoc` and consumed by the emission walk via
234 /// the IR's `ConstructPlan`. The dispatcher's legacy `^[` branch
235 /// is gated to CommonMark dialect only.
236 InlineFootnote,
237 /// Pandoc native span `<span ...>...</span>`. Recognised in
238 /// `build_ir` under `Dialect::Pandoc` and consumed by the emission
239 /// walk via the IR's `ConstructPlan`. The dispatcher's legacy
240 /// `<span>` branch is gated to CommonMark dialect only.
241 NativeSpan,
242 /// Pandoc footnote reference `[^id]`. Recognised in `build_ir`
243 /// under `Dialect::Pandoc` and consumed by the emission walk via
244 /// the IR's `ConstructPlan`. The dispatcher's legacy `[^id]`
245 /// branch is gated to CommonMark dialect only.
246 FootnoteReference,
247 /// Pandoc bracketed citation `[@key]`, `[see @key, p. 1]`,
248 /// `[@a; @b]`. Recognised in `build_ir` under `Dialect::Pandoc`
249 /// and consumed by the emission walk via the IR's `ConstructPlan`.
250 /// The dispatcher's legacy `[@cite]` branch is gated to CommonMark
251 /// dialect only.
252 BracketedCitation,
253 /// Pandoc bare citation `@key` or `-@key` (author-in-text /
254 /// suppress-author). Recognised in `build_ir` under
255 /// `Dialect::Pandoc` and consumed by the emission walk via the
256 /// IR's `ConstructPlan`. The dispatcher's legacy `@` and `-@`
257 /// branches are gated to CommonMark dialect only.
258 BareCitation,
259 /// Pandoc bracketed span `[content]{attrs}`. Recognised in
260 /// `build_ir` under `Dialect::Pandoc` and consumed by the emission
261 /// walk via the IR's `ConstructPlan`. The dispatcher's legacy
262 /// `[text]{attrs}` branch is gated to CommonMark dialect only.
263 BracketedSpan,
264}
265
266/// One matched fragment within a [`IrEvent::DelimRun`].
267#[derive(Debug, Clone, Copy)]
268pub struct DelimMatch {
269 /// Byte offset of this fragment relative to the run's `start`.
270 pub offset_in_run: u8,
271 /// Number of bytes in this fragment (1 or 2).
272 pub len: u8,
273 /// Whether this fragment is the opener (`true`) or closer of the pair.
274 pub is_opener: bool,
275 /// IR event index of the partner run.
276 pub partner_event: u32,
277 /// Byte offset within the partner run of the partner fragment.
278 pub partner_offset: u8,
279 /// Emphasis kind (Emph for `len == 1`, Strong for `len == 2`).
280 pub kind: EmphasisKind,
281}
282
283/// Pandoc-only: extents of an unresolved bracket-shape reference
284/// pattern. Recorded on `IrEvent::OpenBracket.unresolved_ref` when the
285/// no-resolution fall-through fires under `Dialect::Pandoc`.
286#[derive(Debug, Clone, Copy, PartialEq, Eq)]
287pub struct UnresolvedRefShape {
288 /// IR event index of the matching `CloseBracket`. Used by the
289 /// scoped-emphasis pass to treat the wrapper as a tree boundary.
290 pub close_event: u32,
291 /// One past the end of the inner text (the byte position of the
292 /// outer `]`). Combined with the opener's `end` field, this is the
293 /// inner text range that goes through normal inline parsing.
294 pub text_end: usize,
295 /// One past the end of the full bracket-shape pattern. For
296 /// shortcut form `[text]`: `close_pos + 1`. For collapsed
297 /// `[text][]`: `close_pos + 3`. For full `[text][label]`: the byte
298 /// after the closing `]` of `[label]`.
299 pub end: usize,
300}
301
302/// Successful bracket resolution: the `[`...`]` pair is a link or image.
303#[derive(Debug, Clone)]
304pub struct BracketResolution {
305 /// IR event index of the matching `CloseBracket`.
306 pub close_event: u32,
307 /// Source range of the link text (between `[`/`![` and `]`).
308 pub text_start: usize,
309 pub text_end: usize,
310 /// Source range of the link suffix (`(...)`, `[label]`, `[]`, or
311 /// empty for shortcut). When `kind == ShortcutReference`,
312 /// `suffix_start == suffix_end == close_pos + 1`.
313 pub suffix_start: usize,
314 pub suffix_end: usize,
315 pub kind: LinkKind,
316}
317
318/// What kind of link/image we resolved a bracket pair to.
319#[derive(Debug, Clone)]
320pub enum LinkKind {
321 /// `[text](dest)` or `[text](dest "title")`.
322 Inline { dest: String, title: Option<String> },
323 /// `[text][label]` — explicit reference.
324 FullReference { label: String },
325 /// `[text][]` — collapsed reference. Label is the link text.
326 CollapsedReference,
327 /// `[text]` — shortcut reference. Label is the link text.
328 ShortcutReference,
329}
330
331// ============================================================================
332// Pass 1: Scan
333// ============================================================================
334
335/// Scan `text[start..end]` once, producing a flat IR of events.
336///
337/// The scan is forward-only and never backtracks: each iteration either
338/// consumes a known construct (escape, code span, autolink, raw HTML),
339/// records a delim run / bracket marker / line break, or steps past a
340/// single UTF-8 boundary as plain text. Adjacent text bytes are coalesced
341/// into a single [`IrEvent::Text`] event by the run-flush step.
342pub fn build_ir(text: &str, start: usize, end: usize, config: &ParserOptions) -> Vec<IrEvent> {
343 let mut events = Vec::new();
344 build_ir_into(text, start, end, config, &mut events);
345 events
346}
347
348/// Like [`build_ir`] but writes into a caller-provided `Vec<IrEvent>`,
349/// clearing it first. Used by [`build_full_plans`] to amortise the
350/// per-call allocation through a thread-local scratch pool.
351pub(super) fn build_ir_into(
352 text: &str,
353 start: usize,
354 end: usize,
355 config: &ParserOptions,
356 events: &mut Vec<IrEvent>,
357) {
358 events.clear();
359 let bytes = text.as_bytes();
360 let exts = &config.extensions;
361 let is_commonmark = config.dialect == crate::options::Dialect::CommonMark;
362
363 let mut pos = start;
364 let mut text_run_start = start;
365 // Pandoc-only: extent of the current bracket-shape link/image's
366 // opaque range. While `pos < pandoc_bracket_extent`, autolinks /
367 // raw HTML / native spans are NOT recognised — pandoc-native
368 // treats `[link text]` as opaque to those constructs (CommonMark
369 // spec example #526 / #538). The lookahead at `[`/`![` sets this
370 // when a bracket-shape forms a valid link/image; once `pos`
371 // passes the extent, normal scanning resumes. CommonMark
372 // dialect's link-text-vs-autolink ordering is handled by the
373 // dispatcher's `try_parse_inline_link` rejecting outer matches
374 // when the link text contains a valid autolink (a different
375 // mechanism, see `LinkScanContext.skip_autolinks`).
376 let mut pandoc_bracket_extent: usize = 0;
377
378 // Pre-computed byte mask: `mask[b]` is `true` iff byte `b` could
379 // start any IR-recognised construct under the current dialect /
380 // extensions. Used to bulk-skip plain bytes between structural
381 // bytes — the per-byte branch chain below only runs at positions
382 // where a construct is actually possible. Non-ASCII bytes
383 // (>= 0x80) are never structural and are skipped together with
384 // ASCII plain text.
385 let mask = build_ir_byte_mask(config);
386
387 macro_rules! flush_text {
388 () => {
389 if pos > text_run_start {
390 events.push(IrEvent::Text {
391 start: text_run_start,
392 end: pos,
393 });
394 }
395 };
396 }
397
398 while pos < end {
399 // Fast-skip plain bytes. `text_run_start` is preserved across
400 // the skip so the next structural-event flush picks them up.
401 while pos < end && !mask[bytes[pos] as usize] {
402 pos += 1;
403 }
404 if pos >= end {
405 break;
406 }
407 let b = bytes[pos];
408
409 // Pandoc-only: at `[` or `![`, look ahead to see if this
410 // bracket-shape forms a valid link/image. If so, suppress
411 // autolink / raw HTML / native span recognition until `pos`
412 // passes the bracket-shape's end. Skipped if we're already
413 // inside an enclosing bracket-shape's opaque range.
414 if !is_commonmark
415 && pos >= pandoc_bracket_extent
416 && (b == b'[' || (b == b'!' && pos + 1 < end && bytes[pos + 1] == b'['))
417 && let Some(len) = try_pandoc_bracket_link_extent(text, pos, end, config)
418 {
419 pandoc_bracket_extent = pos + len;
420 }
421 let in_pandoc_bracket = !is_commonmark && pos < pandoc_bracket_extent;
422
423 // Backslash escape (§2.4) — including `\\\n` hard line break.
424 if b == b'\\'
425 && let Some((len, _ch, escape_type)) = try_parse_escape(&text[pos..])
426 && pos + len <= end
427 {
428 let enabled = match escape_type {
429 EscapeType::Literal => is_commonmark || exts.all_symbols_escapable,
430 EscapeType::HardLineBreak => exts.escaped_line_breaks,
431 EscapeType::NonbreakingSpace => exts.all_symbols_escapable,
432 };
433 if enabled {
434 flush_text!();
435 let kind = match escape_type {
436 EscapeType::HardLineBreak => {
437 events.push(IrEvent::HardBreak {
438 start: pos,
439 end: pos + len,
440 });
441 pos += len;
442 text_run_start = pos;
443 continue;
444 }
445 EscapeType::Literal | EscapeType::NonbreakingSpace => ConstructKind::Escape,
446 };
447 events.push(IrEvent::Construct {
448 start: pos,
449 end: pos + len,
450 kind,
451 });
452 pos += len;
453 text_run_start = pos;
454 continue;
455 }
456 }
457
458 // Code span (§6.1) — opaque to emphasis and brackets.
459 if b == b'`'
460 && let Some((len, _, _, _)) = try_parse_code_span(&text[pos..])
461 && pos + len <= end
462 {
463 flush_text!();
464 events.push(IrEvent::Construct {
465 start: pos,
466 end: pos + len,
467 kind: ConstructKind::CodeSpan,
468 });
469 pos += len;
470 text_run_start = pos;
471 continue;
472 }
473
474 // Pandoc-only: math spans are opaque to emphasis. The legacy
475 // `parse_until_closer_with_nested_*` skip-list includes inline
476 // math; without recognising it here, delim runs inside `$math$`
477 // would be picked up by the emphasis pass and break losslessness
478 // (the dispatcher's math parser would later re-claim the bytes,
479 // duplicating content).
480 if !is_commonmark && let Some(len) = try_pandoc_math_opaque(text, pos, end, config) {
481 flush_text!();
482 events.push(IrEvent::Construct {
483 start: pos,
484 end: pos + len,
485 kind: ConstructKind::PandocOpaque,
486 });
487 pos += len;
488 text_run_start = pos;
489 continue;
490 }
491
492 // Pandoc-only: native span `<span ...>...</span>`. Must come
493 // before the generic autolink/raw-html branches so the open tag
494 // doesn't get claimed as inline HTML. Span content is opaque to
495 // the emphasis pass; emission consumes the event via the IR's
496 // `ConstructPlan`. Suppressed inside Pandoc bracket-shape
497 // link/image text.
498 if !is_commonmark
499 && !in_pandoc_bracket
500 && b == b'<'
501 && exts.native_spans
502 && let Some((len, _, _)) = try_parse_native_span(&text[pos..])
503 && pos + len <= end
504 {
505 flush_text!();
506 events.push(IrEvent::Construct {
507 start: pos,
508 end: pos + len,
509 kind: ConstructKind::NativeSpan,
510 });
511 pos += len;
512 text_run_start = pos;
513 continue;
514 }
515
516 // Autolink (§6.5) before raw HTML — autolinks are the more
517 // specific shape inside `<...>`. Both are suppressed inside
518 // Pandoc bracket-shape link/image text (pandoc-native treats
519 // link text as opaque to autolinks and raw HTML).
520 if b == b'<' && !in_pandoc_bracket {
521 if exts.autolinks
522 && let Some((len, _)) = try_parse_autolink(&text[pos..], is_commonmark)
523 && pos + len <= end
524 {
525 flush_text!();
526 events.push(IrEvent::Construct {
527 start: pos,
528 end: pos + len,
529 kind: ConstructKind::Autolink,
530 });
531 pos += len;
532 text_run_start = pos;
533 continue;
534 }
535 if exts.raw_html
536 && let Some(len) = try_parse_inline_html(&text[pos..], config.dialect)
537 && pos + len <= end
538 {
539 flush_text!();
540 events.push(IrEvent::Construct {
541 start: pos,
542 end: pos + len,
543 kind: ConstructKind::InlineHtml,
544 });
545 pos += len;
546 text_run_start = pos;
547 continue;
548 }
549 }
550
551 // Pandoc-only: inline footnote `^[note]`. Recognized at scan
552 // time so the emphasis pass treats it as opaque (delim runs
553 // inside the footnote can't pair with delim runs outside).
554 if !is_commonmark
555 && b == b'^'
556 && exts.inline_footnotes
557 && let Some((len, _)) = try_parse_inline_footnote(&text[pos..])
558 && pos + len <= end
559 {
560 flush_text!();
561 events.push(IrEvent::Construct {
562 start: pos,
563 end: pos + len,
564 kind: ConstructKind::InlineFootnote,
565 });
566 pos += len;
567 text_run_start = pos;
568 continue;
569 }
570
571 // Pandoc-only: footnote reference `[^id]`. Recognised at scan
572 // time so the emphasis pass treats it as opaque (delim runs
573 // inside the label can't pair with delim runs outside) and the
574 // emission walk dispatches it directly via the IR's
575 // `ConstructPlan`. Must come before the generic bracket-opaque
576 // scan so the dedicated kind wins.
577 if !is_commonmark
578 && b == b'['
579 && pos + 1 < end
580 && bytes[pos + 1] == b'^'
581 && exts.footnotes
582 && let Some((len, _)) = try_parse_footnote_reference(&text[pos..])
583 && pos + len <= end
584 {
585 flush_text!();
586 events.push(IrEvent::Construct {
587 start: pos,
588 end: pos + len,
589 kind: ConstructKind::FootnoteReference,
590 });
591 pos += len;
592 text_run_start = pos;
593 continue;
594 }
595
596 // Pandoc-only: bracketed citation `[@cite]`. Recognised at
597 // scan time so the emphasis pass treats it as opaque (delim
598 // runs inside the citation can't pair with delim runs outside)
599 // and the emission walk dispatches it directly via the IR's
600 // `ConstructPlan`. Must come before the generic bracket-opaque
601 // scan so the dedicated kind wins.
602 if !is_commonmark
603 && b == b'['
604 && exts.citations
605 && let Some((len, _)) = try_parse_bracketed_citation(&text[pos..])
606 && pos + len <= end
607 {
608 flush_text!();
609 events.push(IrEvent::Construct {
610 start: pos,
611 end: pos + len,
612 kind: ConstructKind::BracketedCitation,
613 });
614 pos += len;
615 text_run_start = pos;
616 continue;
617 }
618
619 // Pandoc-only: bare citation `@key` or `-@key`. Recognised at
620 // scan time so the emission walk dispatches it directly via
621 // the IR's `ConstructPlan`. Bare citations don't contain
622 // emphasis-eligible content, so opacity is moot here — IR
623 // participation is only for dispatch consolidation.
624 if !is_commonmark
625 && (b == b'@' || (b == b'-' && pos + 1 < end && bytes[pos + 1] == b'@'))
626 && (exts.citations || exts.quarto_crossrefs)
627 && let Some((len, _, _)) = try_parse_bare_citation(&text[pos..])
628 && pos + len <= end
629 {
630 flush_text!();
631 events.push(IrEvent::Construct {
632 start: pos,
633 end: pos + len,
634 kind: ConstructKind::BareCitation,
635 });
636 pos += len;
637 text_run_start = pos;
638 continue;
639 }
640
641 // Pandoc-only: bracketed span `[content]{attrs}`. Recognised
642 // at scan time so the emphasis pass treats it as opaque (delim
643 // runs inside the span content can't pair with delim runs
644 // outside) and the emission walk dispatches it directly via
645 // the IR's `ConstructPlan`. Must come before the generic
646 // bracket-opaque scan so the dedicated kind wins.
647 // `try_parse_bracketed_span` requires `]` to be immediately
648 // followed by `{`, so this never shadows inline links
649 // (`[text](url)`) or reference links (`[label][refdef]`) —
650 // those don't have the `{attrs}` suffix.
651 if !is_commonmark
652 && b == b'['
653 && exts.bracketed_spans
654 && let Some((len, _, _)) = try_parse_bracketed_span(&text[pos..])
655 && pos + len <= end
656 {
657 flush_text!();
658 events.push(IrEvent::Construct {
659 start: pos,
660 end: pos + len,
661 kind: ConstructKind::BracketedSpan,
662 });
663 pos += len;
664 text_run_start = pos;
665 continue;
666 }
667
668 // `` form, or `reference_links` for the
671 // `![alt][label]` reference-image form (e.g. MultiMarkdown
672 // disables `inline_images` but uses reference images).
673 if b == b'!'
674 && pos + 1 < end
675 && bytes[pos + 1] == b'['
676 && (exts.inline_images || exts.reference_links)
677 {
678 flush_text!();
679 events.push(IrEvent::OpenBracket {
680 start: pos,
681 end: pos + 2,
682 is_image: true,
683 active: true,
684 resolution: None,
685 unresolved_ref: None,
686 });
687 pos += 2;
688 text_run_start = pos;
689 continue;
690 }
691
692 // `[` opens a link bracket. Recognised whenever any
693 // link-producing extension is on — `inline_links` for
694 // `[text](url)`, or `reference_links` for `[text][label]` /
695 // `[text]` shortcut form.
696 if b == b'[' && (exts.inline_links || exts.reference_links) {
697 flush_text!();
698 events.push(IrEvent::OpenBracket {
699 start: pos,
700 end: pos + 1,
701 is_image: false,
702 active: true,
703 resolution: None,
704 unresolved_ref: None,
705 });
706 pos += 1;
707 text_run_start = pos;
708 continue;
709 }
710
711 // `]` closes a link/image bracket.
712 if b == b']' {
713 flush_text!();
714 events.push(IrEvent::CloseBracket {
715 pos,
716 matched: false,
717 });
718 pos += 1;
719 text_run_start = pos;
720 continue;
721 }
722
723 // `*` or `_` delimiter run.
724 if b == b'*' || b == b'_' {
725 flush_text!();
726 let mut run_end = pos;
727 while run_end < end && bytes[run_end] == b {
728 run_end += 1;
729 }
730 let count = run_end - pos;
731 let (can_open, can_close) = compute_flanking(text, pos, count, b, config.dialect);
732 events.push(IrEvent::DelimRun {
733 ch: b,
734 start: pos,
735 end: run_end,
736 can_open,
737 can_close,
738 matches: Vec::new(),
739 });
740 pos = run_end;
741 text_run_start = pos;
742 continue;
743 }
744
745 // Hard line break: 2+ trailing spaces before newline. We detect
746 // this when we're sitting on a `\n` (or `\r\n`) and the preceding
747 // bytes within the current text run are spaces.
748 if b == b'\n' || (b == b'\r' && pos + 1 < end && bytes[pos + 1] == b'\n') {
749 // Count trailing spaces in the text accumulated so far.
750 let nl_len = if b == b'\r' { 2 } else { 1 };
751 let mut trailing_spaces = 0;
752 let mut s = pos;
753 while s > text_run_start && bytes[s - 1] == b' ' {
754 trailing_spaces += 1;
755 s -= 1;
756 }
757 if trailing_spaces >= 2 {
758 // Flush text *before* the trailing spaces.
759 if s > text_run_start {
760 events.push(IrEvent::Text {
761 start: text_run_start,
762 end: s,
763 });
764 }
765 events.push(IrEvent::HardBreak {
766 start: s,
767 end: pos + nl_len,
768 });
769 pos += nl_len;
770 text_run_start = pos;
771 continue;
772 }
773
774 // Soft line break: flush preceding text, emit the line ending
775 // as its own event so the emitter can render `NEWLINE` tokens
776 // verbatim.
777 flush_text!();
778 events.push(IrEvent::SoftBreak {
779 start: pos,
780 end: pos + nl_len,
781 });
782 pos += nl_len;
783 text_run_start = pos;
784 continue;
785 }
786
787 // Plain byte — advance one UTF-8 char.
788 let ch_len = text[pos..]
789 .chars()
790 .next()
791 .map_or(1, std::primitive::char::len_utf8);
792 pos += ch_len.max(1);
793 }
794
795 flush_text!();
796}
797
798/// Build a 256-entry mask: `mask[b]` is `true` iff byte `b` could start
799/// any IR-recognised construct under the current dialect / extensions.
800///
801/// This is the build-IR-specific superset of "is this byte interesting".
802/// Plain bytes between structural bytes are bulk-skipped via this mask
803/// in the [`build_ir`] hot loop; missing a byte here is a correctness
804/// bug (we'd skip past a real construct), but having extras only costs
805/// us a wasted branch round-trip.
806fn build_ir_byte_mask(config: &ParserOptions) -> [bool; 256] {
807 let mut mask = [false; 256];
808 let exts = &config.extensions;
809 let is_commonmark = config.dialect == crate::options::Dialect::CommonMark;
810
811 // Always structural for IR scanning:
812 // `\n` / `\r` — soft / hard breaks
813 // `\\` — escape, hard line break, backslash math
814 // `` ` `` — code span (IR construct)
815 // `*` / `_` — emphasis delim runs (IR core)
816 mask[b'\n' as usize] = true;
817 mask[b'\r' as usize] = true;
818 mask[b'\\' as usize] = true;
819 mask[b'`' as usize] = true;
820 mask[b'*' as usize] = true;
821 mask[b'_' as usize] = true;
822
823 // Brackets: scanned whenever any bracket-shaped construct is
824 // reachable. `]` is structural unconditionally if `[` is — the IR
825 // emits a CloseBracket event regardless of which opener variant
826 // matches. `!` is gated on image-producing extensions; the leading
827 // `!` of `![alt]` is the only image entry point.
828 if exts.inline_links
829 || exts.reference_links
830 || exts.inline_images
831 || exts.bracketed_spans
832 || exts.footnotes
833 || exts.citations
834 {
835 mask[b'[' as usize] = true;
836 mask[b']' as usize] = true;
837 }
838 if exts.inline_images || exts.reference_links {
839 mask[b'!' as usize] = true;
840 }
841
842 // `<` covers autolinks, raw HTML, and Pandoc native spans.
843 if exts.autolinks || exts.raw_html || (!is_commonmark && exts.native_spans) {
844 mask[b'<' as usize] = true;
845 }
846
847 // `^` covers Pandoc inline footnotes (`^[...]` recognised in IR
848 // under Pandoc dialect). CM dialect inline footnotes go through
849 // the dispatcher, not the IR.
850 if !is_commonmark && exts.inline_footnotes {
851 mask[b'^' as usize] = true;
852 }
853
854 // `@` covers Pandoc bare citation `@key` and `[@cite]`. The leading
855 // `[` of `[@cite]` is already in the mask via the bracket gate;
856 // gating `@` here also covers the bare-citation form.
857 if !is_commonmark && (exts.citations || exts.quarto_crossrefs) {
858 mask[b'@' as usize] = true;
859 // `-` only matters as the first byte of `-@cite`. Tracking it
860 // here avoids missing the suppress-author bare citation form.
861 mask[b'-' as usize] = true;
862 }
863
864 // `$` covers Pandoc dollar / GFM math. CM doesn't recognise math
865 // in `build_ir`.
866 if !is_commonmark
867 && (exts.tex_math_dollars
868 || exts.tex_math_gfm
869 || exts.tex_math_single_backslash
870 || exts.tex_math_double_backslash)
871 {
872 mask[b'$' as usize] = true;
873 }
874
875 mask
876}
877
878// ============================================================================
879// Flanking (CommonMark §6.2)
880// ============================================================================
881
882fn compute_flanking(
883 text: &str,
884 pos: usize,
885 count: usize,
886 ch: u8,
887 dialect: crate::options::Dialect,
888) -> (bool, bool) {
889 if dialect == crate::options::Dialect::Pandoc {
890 // Pandoc-markdown's recursive-descent emphasis parser does NOT
891 // apply CommonMark §6.2 flanking rules. Instead it gates on:
892 // - opener: must not be followed by whitespace (Pandoc
893 // `try_parse_emphasis` line 247 in legacy core.rs).
894 // - closer: no flanking gate at all (Pandoc-markdown's
895 // `ender` parser only counts characters; see Markdown.hs
896 // in pandoc/src/Text/Pandoc/Readers/Markdown.hs).
897 // - underscore intraword hard rule: `_` adjacent to an
898 // alphanumeric on either side cannot open / close
899 // (Pandoc's `intraword_underscores` extension default).
900 let prev_char = (pos > 0).then(|| text[..pos].chars().last()).flatten();
901 let next_char = text.get(pos + count..).and_then(|s| s.chars().next());
902 let followed_by_ws = next_char.is_none_or(|c| c.is_whitespace());
903
904 let mut can_open = !followed_by_ws;
905 // Pandoc-markdown's `ender` (in pandoc/Readers/Markdown.hs)
906 // has no flanking restriction on closers — just a count match.
907 // Set can_close unconditionally; the per-pair match logic in
908 // `process_emphasis_in_range_filtered` constrains pairing via
909 // the equal-count rule.
910 let mut can_close = true;
911
912 if ch == b'_' {
913 let prev_is_alnum = prev_char.is_some_and(|c| c.is_alphanumeric());
914 let next_is_alnum = next_char.is_some_and(|c| c.is_alphanumeric());
915 if prev_is_alnum {
916 can_open = false;
917 }
918 if next_is_alnum {
919 can_close = false;
920 }
921 }
922
923 return (can_open, can_close);
924 }
925
926 // CommonMark §6.2 flanking.
927 let lf = is_left_flanking(text, pos, count);
928 let rf = is_right_flanking(text, pos, count);
929 if ch == b'*' {
930 (lf, rf)
931 } else {
932 let prev_char = (pos > 0).then(|| text[..pos].chars().last()).flatten();
933 let next_char = text.get(pos + count..).and_then(|s| s.chars().next());
934 let preceded_by_punct = prev_char.is_some_and(is_unicode_punct_or_symbol);
935 let followed_by_punct = next_char.is_some_and(is_unicode_punct_or_symbol);
936 let can_open = lf && (!rf || preceded_by_punct);
937 let can_close = rf && (!lf || followed_by_punct);
938 (can_open, can_close)
939 }
940}
941
942/// Pandoc-only: identify a math span starting at `pos` and return its
943/// byte length. Tries `$math$` and `$$display$$` (gated on
944/// `tex_math_dollars`), GFM `$math$` (gated on `tex_math_gfm`), and the
945/// `\(math\)` / `\[math\]` / `\\(math\\)` / `\\[math\\]` backslash
946/// forms (gated on `tex_math_single_backslash` / `_double_backslash`).
947/// Math content is opaque to emphasis: `$a * b$` must not produce an
948/// emphasis closer at the inner `*`.
949fn try_pandoc_math_opaque(
950 text: &str,
951 pos: usize,
952 end: usize,
953 config: &ParserOptions,
954) -> Option<usize> {
955 let bytes = text.as_bytes();
956 let exts = &config.extensions;
957 let b = bytes[pos];
958
959 if exts.tex_math_dollars && b == b'$' {
960 if let Some((len, _)) = try_parse_display_math(&text[pos..])
961 && pos + len <= end
962 {
963 return Some(len);
964 }
965 if let Some((len, _)) = try_parse_inline_math(&text[pos..])
966 && pos + len <= end
967 {
968 return Some(len);
969 }
970 }
971 if exts.tex_math_gfm
972 && b == b'$'
973 && let Some((len, _)) = try_parse_gfm_inline_math(&text[pos..])
974 && pos + len <= end
975 {
976 return Some(len);
977 }
978 if exts.tex_math_double_backslash && b == b'\\' {
979 if let Some((len, _)) = try_parse_double_backslash_display_math(&text[pos..])
980 && pos + len <= end
981 {
982 return Some(len);
983 }
984 if let Some((len, _)) = try_parse_double_backslash_inline_math(&text[pos..])
985 && pos + len <= end
986 {
987 return Some(len);
988 }
989 }
990 if exts.tex_math_single_backslash && b == b'\\' {
991 if let Some((len, _)) = try_parse_single_backslash_display_math(&text[pos..])
992 && pos + len <= end
993 {
994 return Some(len);
995 }
996 if let Some((len, _)) = try_parse_single_backslash_inline_math(&text[pos..])
997 && pos + len <= end
998 {
999 return Some(len);
1000 }
1001 }
1002 None
1003}
1004
1005/// Pandoc-only: identify a bracket-shaped opaque construct starting at
1006/// `pos` and return its byte length. Tries the dispatcher's precedence
1007/// order:
1008/// 1. `` inline image
1009/// 2. `![alt][ref]` / `![alt]` reference image (shape-only opacity)
1010/// 3. `[^id]` footnote reference
1011/// 4. `[text](dest)` inline link
1012/// 5. `[text][ref]` / `[text]` reference link (shape-only opacity)
1013/// 6. `[@cite]` bracketed citation
1014/// 7. `[text]{attrs}` bracketed span
1015///
1016/// Returns `None` if the bytes at `pos` don't open any recognised Pandoc
1017/// bracket-shaped construct. In that case the scanner falls through to
1018/// the generic `OpenBracket`/`CloseBracket` emission and the dispatcher
1019/// emits the bracket bytes as literal text (or as plain emphasis if the
1020/// pattern matches an opener).
1021/// Lookahead helper: at a `[` or `![` byte under Pandoc dialect, return
1022/// the total byte length of the bracket-shape link/image if it forms a
1023/// valid one, else `None`. Used by `build_ir` to suppress autolink /
1024/// raw HTML / native span recognition inside Pandoc link text —
1025/// pandoc-native treats link text as opaque to those constructs
1026/// (CommonMark spec example #526 / #538 differs). Mirrors the
1027/// dispatcher's `try_parse_*` precedence so the lookahead, the IR's
1028/// `process_brackets` resolution, and the dispatcher's emission agree
1029/// on the bracket-shape's byte boundaries.
1030fn try_pandoc_bracket_link_extent(
1031 text: &str,
1032 pos: usize,
1033 end: usize,
1034 config: &ParserOptions,
1035) -> Option<usize> {
1036 let bytes = text.as_bytes();
1037 let exts = &config.extensions;
1038 let ctx = LinkScanContext::from_options(config);
1039 let allow_shortcut = exts.shortcut_reference_links;
1040
1041 // `![...]` images.
1042 if bytes[pos] == b'!' {
1043 if pos + 1 >= end || bytes[pos + 1] != b'[' {
1044 return None;
1045 }
1046 if exts.inline_images
1047 && let Some((len, _, _, _)) = try_parse_inline_image(&text[pos..], ctx)
1048 && pos + len <= end
1049 {
1050 return Some(len);
1051 }
1052 if exts.reference_links
1053 && let Some((len, _, _, _)) = try_parse_reference_image(&text[pos..], allow_shortcut)
1054 && pos + len <= end
1055 {
1056 return Some(len);
1057 }
1058 return None;
1059 }
1060
1061 // `[...]` openers — try in dispatcher order. Footnote refs
1062 // (`[^id]`), bracketed citations (`[@cite]`), and bracketed spans
1063 // (`[text]{attrs}`) are recognised by their own dedicated branches
1064 // in `build_ir` and don't need this lookahead.
1065 if exts.inline_links
1066 && let Some((len, _, _, _)) = try_parse_inline_link(&text[pos..], false, ctx)
1067 && pos + len <= end
1068 {
1069 return Some(len);
1070 }
1071 if exts.reference_links
1072 && let Some((len, _, _, _)) =
1073 try_parse_reference_link(&text[pos..], allow_shortcut, exts.inline_links, ctx)
1074 && pos + len <= end
1075 {
1076 return Some(len);
1077 }
1078
1079 None
1080}
1081
1082fn is_unicode_punct_or_symbol(c: char) -> bool {
1083 if c.is_ascii() {
1084 c.is_ascii_punctuation()
1085 } else {
1086 !c.is_alphanumeric() && !c.is_whitespace()
1087 }
1088}
1089
1090fn is_left_flanking(text: &str, run_start: usize, run_len: usize) -> bool {
1091 let after = run_start + run_len;
1092 let next_char = text.get(after..).and_then(|s| s.chars().next());
1093 let prev_char = (run_start > 0)
1094 .then(|| text[..run_start].chars().last())
1095 .flatten();
1096
1097 let followed_by_ws = next_char.is_none_or(|c| c.is_whitespace());
1098 if followed_by_ws {
1099 return false;
1100 }
1101 let followed_by_punct = next_char.is_some_and(is_unicode_punct_or_symbol);
1102 if !followed_by_punct {
1103 return true;
1104 }
1105 prev_char.is_none_or(|c| c.is_whitespace() || is_unicode_punct_or_symbol(c))
1106}
1107
1108fn is_right_flanking(text: &str, run_start: usize, run_len: usize) -> bool {
1109 let after = run_start + run_len;
1110 let next_char = text.get(after..).and_then(|s| s.chars().next());
1111 let prev_char = (run_start > 0)
1112 .then(|| text[..run_start].chars().last())
1113 .flatten();
1114
1115 let preceded_by_ws = prev_char.is_none_or(|c| c.is_whitespace());
1116 if preceded_by_ws {
1117 return false;
1118 }
1119 let preceded_by_punct = prev_char.is_some_and(is_unicode_punct_or_symbol);
1120 if !preceded_by_punct {
1121 return true;
1122 }
1123 next_char.is_none_or(|c| c.is_whitespace() || is_unicode_punct_or_symbol(c))
1124}
1125
1126// ============================================================================
1127// Pass 2: Process emphasis (CommonMark §6.2)
1128// ============================================================================
1129
1130/// Run the CommonMark §6.3 `process_emphasis` algorithm over the IR's
1131/// delim runs. Mutates the IR in place: matched runs gain entries in their
1132/// `matches` vec, unmatched bytes stay implicit (the emission pass treats
1133/// any byte not covered by a match as literal text).
1134///
1135/// The algorithm tracks a per-bucket `openers_bottom` exclusive lower
1136/// bound to keep walk-back bounded; consume rules and the §6.2 mod-3
1137/// rejection match the reference implementation.
1138pub fn process_emphasis(events: &mut [IrEvent], dialect: crate::options::Dialect) {
1139 process_emphasis_in_range(events, 0, events.len(), dialect);
1140}
1141
1142/// Range-scoped variant of [`process_emphasis`].
1143///
1144/// Only delim runs whose IR event index lies in `[lo, hi)` are considered.
1145/// Used by [`build_full_plans`] to run emphasis pairing inside each
1146/// resolved bracket pair *before* the global top-level pass, so emphasis
1147/// can never form across a link's bracket boundary (CommonMark §6.3
1148/// requires bracket resolution to happen first when at a `]`, with
1149/// emphasis processed on the link's inner range).
1150///
1151/// The function additionally skips delim runs that already carry a
1152/// recorded match in their `matches` vec — this lets the second
1153/// (top-level) pass reuse the same algorithm without re-pairing bytes
1154/// already consumed by inner-range passes.
1155pub fn process_emphasis_in_range(
1156 events: &mut [IrEvent],
1157 lo: usize,
1158 hi: usize,
1159 dialect: crate::options::Dialect,
1160) {
1161 process_emphasis_in_range_filtered(events, lo, hi, None, dialect);
1162}
1163
1164/// Internal variant of [`process_emphasis_in_range`] with an optional
1165/// exclusion bitmap. Event indices for which `excluded[i] == true` are
1166/// treated as if their delim run were already fully consumed — used by
1167/// [`build_full_plans`] to keep the top-level emphasis pass from pairing
1168/// across a resolved bracket pair's boundary (the inner delim runs of
1169/// such a pair belong to the link's inner range and were already paired
1170/// by the scoped pass).
1171fn process_emphasis_in_range_filtered(
1172 events: &mut [IrEvent],
1173 lo: usize,
1174 hi: usize,
1175 excluded: Option<&[bool]>,
1176 dialect: crate::options::Dialect,
1177) {
1178 let is_commonmark = dialect == crate::options::Dialect::CommonMark;
1179 if is_commonmark {
1180 run_emphasis_pass(events, lo, hi, excluded, dialect, &[], false);
1181 return;
1182 }
1183 // Pandoc dialect: cascade-then-rerun. Run the standard pass, then
1184 // invalidate Emph/Strong pairs whose inner range contains an
1185 // unmatched same-char run with both can_open && can_close (Pandoc's
1186 // recursive descent would have failed those outer pairs because the
1187 // inner content has a stray, ambiguous delimiter the recursive
1188 // parser cannot pair). The invalidated pairs go into a "rejected
1189 // list" that the next iteration of the standard pass consults to
1190 // pick a different opener for the same closer (or reject the
1191 // closer altogether). Iterate to a fixed point.
1192 //
1193 // The rerun (iter 2+) runs in `strict` mode: a candidate pair is
1194 // rejected if its inner range contains an unmatched same-char run
1195 // with count > pair.count. This mirrors pandoc-markdown's
1196 // recursive-descent semantics where, e.g. inside a failed outer
1197 // `**...**` Strong, the inner `one c` parser's `option2`
1198 // (`string [c,c] >> two c mempty`) greedily consumes a stray `**`
1199 // and prevents subsequent `*` runs from pairing as Emph. Without
1200 // this gate, `**foo *bar** baz*` would produce Emph[bar** baz]
1201 // after the outer Strong invalidation, but pandoc treats it as
1202 // all-literal because the inner `**` blocks the Emph match.
1203 let mut rejected: Vec<(usize, usize)> = Vec::new();
1204 let max_iters = events.len().saturating_add(2);
1205 let mut iter = 0;
1206 loop {
1207 let strict = iter > 0;
1208 run_emphasis_pass(events, lo, hi, excluded, dialect, &rejected, strict);
1209 let invalidations = pandoc_cascade_invalidate(events, excluded);
1210 if invalidations.is_empty() {
1211 break;
1212 }
1213 rejected.extend(invalidations);
1214 iter += 1;
1215 if iter >= max_iters {
1216 break;
1217 }
1218 }
1219 // Recovery for `***A **B** C***` patterns: synthesise the inner
1220 // Strong match the standard delim-stack algorithm can't reach.
1221 pandoc_inner_strong_recovery(events);
1222}
1223
1224/// One pass of the CommonMark §6.2 emphasis pairing algorithm over the
1225/// IR's [`DelimRun`](IrEvent::DelimRun) events in `[lo, hi)`. Pandoc
1226/// dialect gates apply when `dialect == Dialect::Pandoc`. The
1227/// `rejected_pairs` list (Pandoc only) excludes specific
1228/// (opener_event_idx, closer_event_idx) pairs from matching — used by
1229/// the cascade-then-rerun loop to prevent invalidated pairs from
1230/// re-forming on the next iteration.
1231fn run_emphasis_pass(
1232 events: &mut [IrEvent],
1233 lo: usize,
1234 hi: usize,
1235 excluded: Option<&[bool]>,
1236 dialect: crate::options::Dialect,
1237 rejected_pairs: &[(usize, usize)],
1238 strict_pandoc: bool,
1239) {
1240 let is_commonmark = dialect == crate::options::Dialect::CommonMark;
1241 let hi = hi.min(events.len());
1242 if lo >= hi {
1243 return;
1244 }
1245 // Indices of DelimRun events within [lo, hi), in order, that have
1246 // not already been fully consumed by an earlier scoped pass and that
1247 // are not in the optional exclusion bitmap.
1248 let mut delim_idxs: Vec<usize> = events[lo..hi]
1249 .iter()
1250 .enumerate()
1251 .filter_map(|(i, e)| {
1252 let abs = lo + i;
1253 match e {
1254 IrEvent::DelimRun { matches, .. }
1255 if matches.is_empty()
1256 && excluded.is_none_or(|ex| ex.get(abs).copied() != Some(true)) =>
1257 {
1258 Some(abs)
1259 }
1260 _ => None,
1261 }
1262 })
1263 .collect();
1264 if delim_idxs.is_empty() {
1265 return;
1266 }
1267
1268 // Working state: count (remaining unmatched chars) and source_start
1269 // (first remaining char) per delim run. Indexed by position in
1270 // `delim_idxs`.
1271 let mut count: Vec<usize> = Vec::with_capacity(delim_idxs.len());
1272 let mut source_start: Vec<usize> = Vec::with_capacity(delim_idxs.len());
1273 let mut removed: Vec<bool> = vec![false; delim_idxs.len()];
1274
1275 for &ev_idx in &delim_idxs {
1276 if let IrEvent::DelimRun { start, end, .. } = &events[ev_idx] {
1277 count.push(end - start);
1278 source_start.push(*start);
1279 }
1280 }
1281
1282 // openers_bottom[ch_idx][len%3][can_open] → exclusive lower bound
1283 // (an index into `delim_idxs`, or None meaning "no bottom yet").
1284 let mut openers_bottom: [[[Option<usize>; 2]; 3]; 2] = [[[None; 2]; 3]; 2];
1285
1286 // First active index, scanning forward.
1287 let first_active =
1288 |removed: &[bool]| -> Option<usize> { (0..removed.len()).find(|&i| !removed[i]) };
1289 let next_active = |removed: &[bool], from: usize| -> Option<usize> {
1290 (from + 1..removed.len()).find(|&i| !removed[i])
1291 };
1292 let prev_active =
1293 |removed: &[bool], from: usize| -> Option<usize> { (0..from).rev().find(|&i| !removed[i]) };
1294
1295 let min_closer_count = 1usize;
1296 let mut closer_local = first_active(&removed);
1297 while let Some(c) = closer_local {
1298 let ev_c_idx = delim_idxs[c];
1299 let (ch_c, can_open_c, can_close_c) = match &events[ev_c_idx] {
1300 IrEvent::DelimRun {
1301 ch,
1302 can_open,
1303 can_close,
1304 ..
1305 } => (*ch, *can_open, *can_close),
1306 _ => unreachable!(),
1307 };
1308 if !can_close_c || removed[c] || count[c] < min_closer_count {
1309 closer_local = next_active(&removed, c);
1310 continue;
1311 }
1312
1313 let ch_idx = if ch_c == b'*' { 0 } else { 1 };
1314 let closer_mod = count[c] % 3;
1315 let closer_open_bucket = can_open_c as usize;
1316 let bottom = openers_bottom[ch_idx][closer_mod][closer_open_bucket];
1317
1318 // Walk back to find a compatible opener.
1319 let mut found_opener: Option<usize> = None;
1320 let mut walk = prev_active(&removed, c);
1321 while let Some(o) = walk {
1322 if Some(o) == bottom {
1323 break;
1324 }
1325 let ev_o_idx = delim_idxs[o];
1326 let (ch_o, can_open_o, can_close_o) = match &events[ev_o_idx] {
1327 IrEvent::DelimRun {
1328 ch,
1329 can_open,
1330 can_close,
1331 ..
1332 } => (*ch, *can_open, *can_close),
1333 _ => unreachable!(),
1334 };
1335 if !removed[o] && ch_o == ch_c && can_open_o {
1336 let oc_sum = count[o] + count[c];
1337 let opener_both = can_open_o && can_close_o;
1338 let closer_both = can_open_c && can_close_c;
1339 let mod3_reject = is_commonmark
1340 && (opener_both || closer_both)
1341 && oc_sum.is_multiple_of(3)
1342 && !(count[o].is_multiple_of(3) && count[c].is_multiple_of(3));
1343 // Pandoc-markdown rejects emph/strong pairs whose counts
1344 // disagree in the exactly-(1,2) / (2,1) shape:
1345 // - `**foo*` (2,1): `try_parse_two` looks only for a
1346 // `**` closer; the lone `*` doesn't satisfy that.
1347 // - `*foo**` (1,2): `try_parse_one` encountering `**`
1348 // tries `try_parse_two`; absence of an inner `**`
1349 // closer cascades the outer parse to fail.
1350 // Other count combinations DO match (verified against
1351 // `pandoc -f markdown`):
1352 // - (1,3) / (3,1) → emph match, opposite-side
1353 // leftover `**` literal.
1354 // - (2,3) / (3,2) → strong match, single `*` literal.
1355 // - (3,3) → STRONG(EM(...)) nested.
1356 // - (1..3, 4+) → match (Pandoc's ender walks the
1357 // closer run for a valid position; algorithm
1358 // consumes leftmost via leftover-as-literal).
1359 // Opener count >= 4 is rejected (Pandoc's
1360 // `try_parse_emphasis` has no count-4+ dispatch).
1361 let pandoc_reject = !is_commonmark
1362 && ((count[o] == 1 && count[c] == 2)
1363 || (count[o] == 2 && count[c] == 1)
1364 || count[o] >= 4);
1365 let pair_rejected = !is_commonmark && {
1366 let oe = delim_idxs[o];
1367 let ce = delim_idxs[c];
1368 rejected_pairs.iter().any(|&(ro, rc)| ro == oe && rc == ce)
1369 };
1370 // Pandoc strict-rerun gate (iter 2+ only): block a
1371 // candidate pair if any unmatched same-char run between
1372 // its opener and closer has remaining count strictly
1373 // greater than the consume rule for this pair.
1374 // Mirrors pandoc-markdown's recursive descent where
1375 // `one c`'s `option2` (`string [c,c] >> two c`) would
1376 // greedily consume a stray higher-count run, blocking
1377 // the outer `one c` from finding its `ender c 1` —
1378 // e.g. `**foo *bar** baz*` after the outer Strong
1379 // invalidates: a naïve rerun pairs ev1 (`*`) ↔ ev3
1380 // (`*`) as Emph (consume=1), but pandoc treats the
1381 // `**` between as having "consumed" any further
1382 // matching, leaving everything literal.
1383 let strict_block = strict_pandoc && {
1384 let tentative_consume = if !is_commonmark && count[o] >= 3 && count[c] >= 3 {
1385 1
1386 } else if count[o] >= 2 && count[c] >= 2 {
1387 2
1388 } else {
1389 1
1390 };
1391 let lo_evt = delim_idxs[o] + 1;
1392 let hi_evt = delim_idxs[c];
1393 (lo_evt..hi_evt).any(|k| match &events[k] {
1394 IrEvent::DelimRun {
1395 ch: ch_k,
1396 start,
1397 end,
1398 matches,
1399 ..
1400 } => {
1401 *ch_k == ch_c && {
1402 let total = end - start;
1403 let consumed: usize = matches.iter().map(|m| m.len as usize).sum();
1404 total.saturating_sub(consumed) > tentative_consume
1405 }
1406 }
1407 _ => false,
1408 })
1409 };
1410 if !mod3_reject && !pandoc_reject && !pair_rejected && !strict_block {
1411 found_opener = Some(o);
1412 break;
1413 }
1414 }
1415 if o == 0 {
1416 break;
1417 }
1418 walk = prev_active(&removed, o);
1419 }
1420
1421 if let Some(o) = found_opener {
1422 // Consume rule:
1423 // CommonMark — consume 2 (Strong) when both sides have
1424 // >= 2 chars, else 1 (Emph). For `***x***` (3,3) this
1425 // produces EM(STRONG(...)) because the first match
1426 // consumes 2 from each side (Strong outermost).
1427 // Pandoc — when both sides have >= 3, consume 1 first
1428 // (Emph innermost) leaving 2 + 2 to pair as Strong on
1429 // the second pass. This produces STRONG(EM(...)) for
1430 // `***x***`, matching Pandoc-markdown's recursive
1431 // `try_parse_three` algorithm.
1432 let consume = if !is_commonmark && count[o] >= 3 && count[c] >= 3 {
1433 1
1434 } else if count[o] >= 2 && count[c] >= 2 {
1435 2
1436 } else {
1437 1
1438 };
1439 let kind = if consume == 2 {
1440 EmphasisKind::Strong
1441 } else {
1442 EmphasisKind::Emph
1443 };
1444
1445 // Opener consumes inner-edge (rightmost) chars.
1446 let opener_match_offset =
1447 source_start[o] + count[o] - consume - source_start_event(&events[delim_idxs[o]]);
1448 // Closer consumes inner-edge (leftmost) chars.
1449 let closer_match_offset = source_start[c] - source_start_event(&events[delim_idxs[c]]);
1450
1451 // Record match on opener.
1452 if let IrEvent::DelimRun { matches, .. } = &mut events[delim_idxs[o]] {
1453 matches.push(DelimMatch {
1454 offset_in_run: opener_match_offset as u8,
1455 len: consume as u8,
1456 is_opener: true,
1457 partner_event: delim_idxs[c] as u32,
1458 partner_offset: closer_match_offset as u8,
1459 kind,
1460 });
1461 }
1462 // Record match on closer.
1463 if let IrEvent::DelimRun { matches, .. } = &mut events[delim_idxs[c]] {
1464 matches.push(DelimMatch {
1465 offset_in_run: closer_match_offset as u8,
1466 len: consume as u8,
1467 is_opener: false,
1468 partner_event: delim_idxs[o] as u32,
1469 partner_offset: opener_match_offset as u8,
1470 kind,
1471 });
1472 }
1473
1474 count[o] -= consume;
1475 source_start[c] += consume;
1476 count[c] -= consume;
1477
1478 // Remove all openers strictly between o and c.
1479 let mut between = next_active(&removed, o);
1480 while let Some(idx) = between {
1481 if idx == c {
1482 break;
1483 }
1484 removed[idx] = true;
1485 between = next_active(&removed, idx);
1486 }
1487
1488 if count[o] == 0 {
1489 removed[o] = true;
1490 }
1491 if count[c] == 0 {
1492 removed[c] = true;
1493 closer_local = next_active(&removed, c);
1494 }
1495 // Else re-process the same closer with reduced count.
1496 } else {
1497 openers_bottom[ch_idx][closer_mod][closer_open_bucket] = prev_active(&removed, c);
1498 if !can_open_c {
1499 removed[c] = true;
1500 }
1501 closer_local = next_active(&removed, c);
1502 }
1503 }
1504
1505 // No further mutation needed: matches are recorded; remaining bytes
1506 // stay implicit literal. Pandoc cascade is invoked by the caller
1507 // (`process_emphasis_in_range_filtered`) once per pass so it can
1508 // accumulate invalidations into a rejected-pairs list and re-run.
1509 let _ = (&mut delim_idxs, &mut openers_bottom, min_closer_count);
1510}
1511
1512/// Pandoc-only post-processing pass over [`process_emphasis_in_range_filtered`]
1513/// matches: invalidate any matched delim pair that contains an unmatched
1514/// same-character run between its opener and closer. Returns the list
1515/// of (opener_event_idx, closer_event_idx) pairs that were invalidated
1516/// in this call, so the caller can seed a rejected-pairs list and
1517/// re-run the standard pass — this lets Pandoc re-pair the inner runs
1518/// that the invalidated outer match would have stolen via
1519/// between-removal (e.g. `*foo **bar* baz**` → after the outer
1520/// `ev0..ev2` Emph is invalidated, `ev1..ev3` matches as Strong on the
1521/// next iteration).
1522fn pandoc_cascade_invalidate(
1523 events: &mut [IrEvent],
1524 excluded: Option<&[bool]>,
1525) -> Vec<(usize, usize)> {
1526 let mut invalidated_pairs: Vec<(usize, usize)> = Vec::new();
1527 // Early-exit: if there are no `DelimRun` events at all, the cascade
1528 // pass is a no-op. Avoids allocating the two scratch vecs below for
1529 // every range with no `*`/`_` runs (which is the common case for
1530 // ranges that contain only standalone constructs / brackets).
1531 if !events.iter().any(|e| matches!(e, IrEvent::DelimRun { .. })) {
1532 return invalidated_pairs;
1533 }
1534 let is_excluded = |k: usize| excluded.is_some_and(|ex| ex.get(k).copied() == Some(true));
1535 // Reuse two scratch vecs across the inner loop iterations instead
1536 // of `.collect()` each time. These are tiny per-paragraph
1537 // allocations but the function is called for every Pandoc inline
1538 // emphasis pass and shows up in malloc traffic.
1539 let mut total: Vec<usize> = Vec::with_capacity(events.len());
1540 let mut consumed: Vec<usize> = Vec::with_capacity(events.len());
1541 loop {
1542 total.clear();
1543 consumed.clear();
1544 // Compute total bytes (run length) and consumed bytes (sum of
1545 // match lens) per DelimRun event index.
1546 total.extend(events.iter().map(|e| match e {
1547 IrEvent::DelimRun { start, end, .. } => end - start,
1548 _ => 0,
1549 }));
1550 consumed.extend(events.iter().map(|e| match e {
1551 IrEvent::DelimRun { matches, .. } => matches.iter().map(|m| m.len as usize).sum(),
1552 _ => 0,
1553 }));
1554
1555 // Find a pair to invalidate. We invalidate one and restart so
1556 // the cascade can re-evaluate dependent pairs.
1557 let mut to_invalidate: Option<(usize, u8)> = None;
1558 'outer: for opener_idx in 0..events.len() {
1559 let IrEvent::DelimRun {
1560 ch: ch_o, matches, ..
1561 } = &events[opener_idx]
1562 else {
1563 continue;
1564 };
1565 for (mi, m) in matches.iter().enumerate() {
1566 if !m.is_opener {
1567 continue;
1568 }
1569 let closer_idx = m.partner_event as usize;
1570 if closer_idx <= opener_idx || closer_idx >= events.len() {
1571 continue;
1572 }
1573 // Scan events strictly between opener and closer for any
1574 // DelimRun with the same `ch`, unmatched bytes, AND
1575 // both `can_open` and `can_close` (i.e., the run could
1576 // have participated in pairing on both sides). A
1577 // can_open-only or can_close-only run is a one-sided
1578 // fragment (e.g. an isolated `*` after a backslash
1579 // escape) that the Pandoc recursive-descent path would
1580 // never have tried as a nested-strong opener — those
1581 // shouldn't cascade-invalidate the surrounding pair.
1582 for k in (opener_idx + 1)..closer_idx {
1583 if is_excluded(k) {
1584 continue;
1585 }
1586 if let IrEvent::DelimRun {
1587 ch: ch_k,
1588 can_open: co_k,
1589 can_close: cc_k,
1590 ..
1591 } = &events[k]
1592 && *ch_k == *ch_o
1593 && consumed[k] < total[k]
1594 && *co_k
1595 && *cc_k
1596 {
1597 to_invalidate = Some((opener_idx, mi as u8));
1598 break 'outer;
1599 }
1600 }
1601 }
1602 }
1603
1604 let Some((opener_idx, mi)) = to_invalidate else {
1605 break;
1606 };
1607
1608 // Look up the partner event/offset before mutating.
1609 let (closer_idx, opener_offset) = match &events[opener_idx] {
1610 IrEvent::DelimRun { matches, .. } => {
1611 let m = matches[mi as usize];
1612 (m.partner_event as usize, m.offset_in_run)
1613 }
1614 _ => break,
1615 };
1616
1617 // Remove the opener match.
1618 if let IrEvent::DelimRun { matches, .. } = &mut events[opener_idx] {
1619 matches.remove(mi as usize);
1620 }
1621 // Remove the corresponding closer match (closer's match has
1622 // is_opener=false and partner_offset == opener's offset_in_run).
1623 if let IrEvent::DelimRun { matches, .. } = &mut events[closer_idx] {
1624 matches.retain(|m| m.is_opener || m.partner_offset != opener_offset);
1625 }
1626 invalidated_pairs.push((opener_idx, closer_idx));
1627 }
1628 invalidated_pairs
1629}
1630
1631/// Pandoc-only post-pass: recover the inner Strong match in
1632/// `***A **B** C***` patterns where the IR's standard pass produced
1633/// `Emph[Strong[A], "B**...** C"]` (matching the outer triple as
1634/// Strong+Emph but losing the inner `**...**`-as-Strong-of-`C` pair).
1635///
1636/// Pandoc's recursive descent here goes
1637/// `three c → ender c 2 → one c → option2 → two c`, producing
1638/// `Emph[Strong[A], "B", Strong[C]]` — two Strong nodes inside an outer
1639/// Emph. The standard delim-stack algorithm can't reach this pairing
1640/// because between-removal during the outer Emph match removes the
1641/// inner closer-side `**` (e.g. `bar**`) from the candidate pool.
1642///
1643/// This recovery scans Emph matches whose opener and closer originally
1644/// had count >= 3, and whose closer has unmatched bytes >= 2 after the
1645/// standard pass; for each, we look for an unmatched same-char
1646/// between-run with count >= 2 and `can_close = true` (the would-be
1647/// inner-Strong opener) and synthesise a Strong match that consumes
1648/// the leftmost 2 bytes of the closer (where the existing Emph match
1649/// shifts to the rightmost 1 byte). The byte-position rewrite lets
1650/// the CST emission produce well-nested `Emph[..., Strong[...]]` —
1651/// outer Emph close at the rightmost outer-triple byte, inner Strong
1652/// close at the leftmost two.
1653fn pandoc_inner_strong_recovery(events: &mut [IrEvent]) {
1654 let n = events.len();
1655 // (between_idx, opener_idx, closer_idx, len)
1656 let mut to_apply: Vec<(usize, usize, usize, u8)> = Vec::new();
1657
1658 for opener_idx in 0..n {
1659 let (open_total, open_matches_clone, ch_o) = match &events[opener_idx] {
1660 IrEvent::DelimRun {
1661 start,
1662 end,
1663 matches,
1664 ch,
1665 ..
1666 } => (*end - *start, matches.clone(), *ch),
1667 _ => continue,
1668 };
1669 if open_total < 3 {
1670 continue;
1671 }
1672
1673 for m in open_matches_clone.iter() {
1674 if !m.is_opener || m.kind != EmphasisKind::Emph {
1675 continue;
1676 }
1677 let closer_idx = m.partner_event as usize;
1678 if closer_idx <= opener_idx || closer_idx >= n {
1679 continue;
1680 }
1681
1682 let (close_total, close_consumed) = match &events[closer_idx] {
1683 IrEvent::DelimRun {
1684 start,
1685 end,
1686 matches,
1687 ..
1688 } => {
1689 let total = end - start;
1690 let consumed: usize = matches.iter().map(|m| m.len as usize).sum();
1691 (total, consumed)
1692 }
1693 _ => continue,
1694 };
1695 if close_total < 3 {
1696 continue;
1697 }
1698 let leftover = close_total.saturating_sub(close_consumed);
1699 if leftover < 2 {
1700 continue;
1701 }
1702
1703 // Walk backward from closer-1 looking for the rightmost
1704 // unmatched same-char run with count >= 2 and
1705 // can_close=true.
1706 for k in ((opener_idx + 1)..closer_idx).rev() {
1707 if let IrEvent::DelimRun {
1708 ch,
1709 start,
1710 end,
1711 matches,
1712 can_close,
1713 ..
1714 } = &events[k]
1715 {
1716 if *ch != ch_o || !*can_close {
1717 continue;
1718 }
1719 let total = end - start;
1720 let consumed: usize = matches.iter().map(|m| m.len as usize).sum();
1721 let remaining = total.saturating_sub(consumed);
1722 if remaining < 2 {
1723 continue;
1724 }
1725 to_apply.push((k, opener_idx, closer_idx, 2));
1726 break;
1727 }
1728 }
1729 }
1730 }
1731
1732 for (between_idx, opener_idx, closer_idx, len) in to_apply {
1733 // Find the existing Emph match on the closer side.
1734 let (closer_emph_match_idx, closer_emph_offset) = {
1735 let mut found: Option<(usize, u8)> = None;
1736 if let IrEvent::DelimRun { matches, .. } = &events[closer_idx] {
1737 for (mi, m) in matches.iter().enumerate() {
1738 if !m.is_opener
1739 && m.partner_event as usize == opener_idx
1740 && m.kind == EmphasisKind::Emph
1741 {
1742 found = Some((mi, m.offset_in_run));
1743 break;
1744 }
1745 }
1746 }
1747 match found {
1748 Some(x) => x,
1749 None => continue,
1750 }
1751 };
1752
1753 // Find the corresponding Emph match on the opener side.
1754 let opener_emph_match_idx = {
1755 let mut found: Option<usize> = None;
1756 if let IrEvent::DelimRun { matches, .. } = &events[opener_idx] {
1757 for (mi, m) in matches.iter().enumerate() {
1758 if m.is_opener
1759 && m.partner_event as usize == closer_idx
1760 && m.kind == EmphasisKind::Emph
1761 {
1762 found = Some(mi);
1763 break;
1764 }
1765 }
1766 }
1767 match found {
1768 Some(x) => x,
1769 None => continue,
1770 }
1771 };
1772
1773 // Shift the Emph closer's offset to the right of the new
1774 // Strong closer's bytes (Strong takes leftmost `len` bytes,
1775 // Emph takes the next byte).
1776 let new_closer_emph_offset = closer_emph_offset + len;
1777
1778 // Update closer's Emph offset_in_run.
1779 if let IrEvent::DelimRun { matches, .. } = &mut events[closer_idx] {
1780 matches[closer_emph_match_idx].offset_in_run = new_closer_emph_offset;
1781 }
1782 // Update opener's Emph partner_offset to point at the shifted
1783 // Emph closer position.
1784 if let IrEvent::DelimRun { matches, .. } = &mut events[opener_idx] {
1785 matches[opener_emph_match_idx].partner_offset = new_closer_emph_offset;
1786 }
1787
1788 // Add Strong opener match on the between-run.
1789 if let IrEvent::DelimRun { matches, .. } = &mut events[between_idx] {
1790 matches.push(DelimMatch {
1791 offset_in_run: 0,
1792 len,
1793 is_opener: true,
1794 partner_event: closer_idx as u32,
1795 partner_offset: closer_emph_offset,
1796 kind: EmphasisKind::Strong,
1797 });
1798 }
1799 // Add Strong closer match on the closer (at the original
1800 // pre-shift Emph-closer position; the bytes that were the
1801 // single Emph closer now become the leftmost 2 bytes of the
1802 // Strong closer).
1803 if let IrEvent::DelimRun { matches, .. } = &mut events[closer_idx] {
1804 matches.push(DelimMatch {
1805 offset_in_run: closer_emph_offset,
1806 len,
1807 is_opener: false,
1808 partner_event: between_idx as u32,
1809 partner_offset: 0,
1810 kind: EmphasisKind::Strong,
1811 });
1812 }
1813 }
1814}
1815
1816fn source_start_event(event: &IrEvent) -> usize {
1817 match event {
1818 IrEvent::DelimRun { start, .. } => *start,
1819 _ => unreachable!("source_start_event called on non-DelimRun"),
1820 }
1821}
1822
1823// ============================================================================
1824// Pass 3: Process brackets (CommonMark §6.3)
1825// ============================================================================
1826
1827/// Resolve `[`/`![`/`]` markers into link/image nodes per CommonMark §6.3
1828/// (with Pandoc-aware variations under `Dialect::Pandoc`).
1829///
1830/// Walks the IR forward looking for `]` markers. For each one, finds the
1831/// nearest active matching `[`/`` or `[text](dest "title")`.
1835/// 2. Full reference: `[text][label]`, where `label` is in `refdefs`.
1836/// 3. Collapsed reference: `[text][]`, where `text` (normalised) is in
1837/// `refdefs`.
1838/// 4. Shortcut reference: `[text]` not followed by `(` or `[`, where
1839/// `text` (normalised) is in `refdefs`.
1840///
1841/// On a match, the opener gets a `BracketResolution` and the closer is
1842/// flagged `matched`. Under `Dialect::CommonMark`, all earlier active link
1843/// openers are deactivated to implement the §6.3 "links may not contain
1844/// other links" rule (image brackets do not deactivate earlier link
1845/// openers — only links do). Under `Dialect::Pandoc`, the deactivate-pass
1846/// is skipped: pandoc-native is outer-wins for nested links (the inner
1847/// `[inner](u2)` of `[link [inner](u2)](u1)` is literal text inside the
1848/// outer link), and the dispatcher enforces this via a `suppress_inner_links`
1849/// flag during LINK-text recursion. So under Pandoc the IR can leave both
1850/// outer and inner resolved and trust the dispatcher to suppress inner
1851/// LINK emission.
1852///
1853/// On a miss the bracket pair stays opaque-as-literal and the closer is
1854/// dropped from the bracket stack so the next `]` can re-pair.
1855///
1856/// Reference-form resolution consults the refdef map under both
1857/// dialects (CommonMark §6.3 and Pandoc-markdown agree on the
1858/// document-scoped lookup rule). Under Pandoc, when a bracket-shape
1859/// pattern (`[text][label]`, `[text][]`, `[text]`) doesn't resolve to
1860/// a refdef, the opener is tagged with `unresolved_ref = Some(...)`
1861/// and the closer's `matched` is set to `true` so that
1862/// [`build_bracket_plan`] emits a [`BracketDispo::UnresolvedReference`]
1863/// keyed at the opener. Emission then wraps `[start, end)` in an
1864/// `UNRESOLVED_REFERENCE` node — distinct from `LINK` — so downstream
1865/// tools (linter, LSP) can attach behavior to the bracket-shape
1866/// pattern without the parser having to lie about resolution.
1867///
1868/// Under CommonMark, no `unresolved_ref` is recorded; the
1869/// no-resolution fall-through behaves as today (opener deactivated,
1870/// brackets emit as literal text).
1871pub fn process_brackets(
1872 events: &mut [IrEvent],
1873 text: &str,
1874 refdefs: Option<&RefdefMap>,
1875 dialect: crate::options::Dialect,
1876) {
1877 let empty: HashSet<String> = HashSet::new();
1878 let labels: &HashSet<String> = match refdefs {
1879 Some(map) => map.as_ref(),
1880 None => &empty,
1881 };
1882 let is_commonmark = dialect == crate::options::Dialect::CommonMark;
1883 // Refdef-aware label resolution under both dialects.
1884 let label_resolves =
1885 |key_norm: &str| -> bool { !key_norm.is_empty() && labels.contains(key_norm) };
1886
1887 // Walk forward through events, treating it as a linear scan for `]`.
1888 let mut i = 0;
1889 while i < events.len() {
1890 let close_pos = match &events[i] {
1891 IrEvent::CloseBracket { pos, .. } => *pos,
1892 _ => {
1893 i += 1;
1894 continue;
1895 }
1896 };
1897
1898 // Find the nearest active OpenBracket before `i`.
1899 let mut o = match find_active_opener(events, i) {
1900 Some(o) => o,
1901 None => {
1902 i += 1;
1903 continue;
1904 }
1905 };
1906
1907 let (open_end, is_image) = match &events[o] {
1908 IrEvent::OpenBracket { end, is_image, .. } => (*end, *is_image),
1909 _ => unreachable!(),
1910 };
1911 let text_start = open_end;
1912 let text_end = close_pos;
1913 let after_close = close_pos + 1;
1914
1915 // 1. Inline link / image.
1916 if let Some((suffix_end, dest, title)) = try_inline_suffix(text, after_close) {
1917 // §6.3 link-in-link rule (CommonMark): if this is a *link*
1918 // (not an image), and any earlier active link opener exists,
1919 // deactivate them. We also deactivate openers strictly before
1920 // `o` here because matching means the inner link wins; the
1921 // spec applies this *after* matching. Pandoc skips this —
1922 // outer-wins is enforced by the dispatcher's
1923 // `suppress_inner_links` flag during LINK-text recursion.
1924 if !is_image && is_commonmark {
1925 deactivate_earlier_link_openers(events, o);
1926 }
1927 commit_resolution(
1928 events,
1929 o,
1930 i,
1931 text_start,
1932 text_end,
1933 after_close,
1934 suffix_end,
1935 LinkKind::Inline { dest, title },
1936 );
1937 // Remove the opener from the bracket stack: it has been
1938 // matched (active=false will fall out automatically since
1939 // resolution is Some).
1940 mark_opener_resolved(events, o);
1941 i += 1;
1942 continue;
1943 }
1944
1945 // 2. Full reference link: `[text][label]`.
1946 let full_ref_suffix = try_full_reference_suffix(text, after_close);
1947 if let Some((suffix_end, label_raw)) = &full_ref_suffix {
1948 let label_norm = normalize_label(label_raw);
1949 if label_resolves(&label_norm) {
1950 if !is_image && is_commonmark {
1951 deactivate_earlier_link_openers(events, o);
1952 }
1953 commit_resolution(
1954 events,
1955 o,
1956 i,
1957 text_start,
1958 text_end,
1959 after_close,
1960 *suffix_end,
1961 LinkKind::FullReference {
1962 label: label_raw.clone(),
1963 },
1964 );
1965 mark_opener_resolved(events, o);
1966 i += 1;
1967 continue;
1968 }
1969 // Bracketed but unresolved label: §6.3 says we still treat
1970 // `[text][label]` as not-a-link, but the brackets get
1971 // consumed as literal text AND the shortcut form is
1972 // suppressed (since the `]` is followed by a link label).
1973 }
1974
1975 // 3. Collapsed `[]`.
1976 let link_text = &text[text_start..text_end];
1977 let link_text_norm = normalize_label(link_text);
1978 let is_collapsed = is_collapsed_marker(text, after_close);
1979 let collapsed_suffix_end = after_close + 2;
1980
1981 if is_collapsed && label_resolves(&link_text_norm) {
1982 if !is_image && is_commonmark {
1983 deactivate_earlier_link_openers(events, o);
1984 }
1985 commit_resolution(
1986 events,
1987 o,
1988 i,
1989 text_start,
1990 text_end,
1991 after_close,
1992 collapsed_suffix_end,
1993 LinkKind::CollapsedReference,
1994 );
1995 mark_opener_resolved(events, o);
1996 i += 1;
1997 continue;
1998 }
1999 // `[text][]` with text not in refdefs — falls through to
2000 // literal text; shortcut is suppressed (followed by `[]`).
2001
2002 // 4. Shortcut form: `[text]` not followed by `[]` or `[label]`.
2003 // Per CommonMark §6.3: "A shortcut reference link consists of a
2004 // link label that matches a link reference definition elsewhere
2005 // in the document and is not followed by [] or a link label."
2006 // The full-ref / collapsed shape attempts above suppress the
2007 // shortcut even when their labels don't resolve — the bracket
2008 // bytes still get consumed as literal text.
2009 let shortcut_suppressed = full_ref_suffix.is_some() || is_collapsed;
2010 if !shortcut_suppressed && label_resolves(&link_text_norm) {
2011 if !is_image && is_commonmark {
2012 deactivate_earlier_link_openers(events, o);
2013 }
2014 commit_resolution(
2015 events,
2016 o,
2017 i,
2018 text_start,
2019 text_end,
2020 after_close,
2021 after_close,
2022 LinkKind::ShortcutReference,
2023 );
2024 mark_opener_resolved(events, o);
2025 i += 1;
2026 continue;
2027 }
2028
2029 // No resolution. Under Pandoc, the bracket pair is still a
2030 // recognisable reference shape (full / collapsed / shortcut) —
2031 // tag the opener with `unresolved_ref` so emission wraps it
2032 // in an `UNRESOLVED_REFERENCE` node, and mark the closer
2033 // matched so it doesn't fall through to a literal `]` token.
2034 // Under CommonMark, behavior unchanged: deactivate the opener,
2035 // brackets emit as literal text.
2036 //
2037 // Empty-component shapes (`[]`, `[][]`) aren't reference
2038 // patterns even in spirit — pandoc-native treats them as
2039 // literal text — so skip wrapping.
2040 let unresolved_shape = if !is_commonmark {
2041 let (end, has_substantive_label) =
2042 if let Some((suffix_end, label_raw)) = &full_ref_suffix {
2043 (*suffix_end, !normalize_label(label_raw).is_empty())
2044 } else if is_collapsed {
2045 (collapsed_suffix_end, !link_text_norm.is_empty())
2046 } else {
2047 (after_close, !link_text_norm.is_empty())
2048 };
2049 if has_substantive_label {
2050 Some(UnresolvedRefShape {
2051 close_event: i as u32,
2052 text_end,
2053 end,
2054 })
2055 } else {
2056 None
2057 }
2058 } else {
2059 None
2060 };
2061 if let IrEvent::OpenBracket {
2062 active,
2063 unresolved_ref,
2064 ..
2065 } = &mut events[o]
2066 {
2067 *active = false;
2068 *unresolved_ref = unresolved_shape;
2069 }
2070 if unresolved_shape.is_some()
2071 && let IrEvent::CloseBracket { matched, .. } = &mut events[i]
2072 {
2073 *matched = true;
2074 }
2075 let _ = &mut o;
2076 i += 1;
2077 }
2078}
2079
2080fn find_active_opener(events: &[IrEvent], close_idx: usize) -> Option<usize> {
2081 (0..close_idx).rev().find(|&i| {
2082 matches!(
2083 &events[i],
2084 IrEvent::OpenBracket {
2085 active: true,
2086 resolution: None,
2087 ..
2088 }
2089 )
2090 })
2091}
2092
2093fn deactivate_earlier_link_openers(events: &mut [IrEvent], open_idx: usize) {
2094 for ev in &mut events[..open_idx] {
2095 if let IrEvent::OpenBracket {
2096 is_image: false,
2097 active,
2098 resolution: None,
2099 ..
2100 } = ev
2101 {
2102 *active = false;
2103 }
2104 }
2105}
2106
2107fn mark_opener_resolved(events: &mut [IrEvent], open_idx: usize) {
2108 if let IrEvent::OpenBracket { active, .. } = &mut events[open_idx] {
2109 *active = false;
2110 }
2111}
2112
2113#[allow(clippy::too_many_arguments)]
2114fn commit_resolution(
2115 events: &mut [IrEvent],
2116 open_idx: usize,
2117 close_idx: usize,
2118 text_start: usize,
2119 text_end: usize,
2120 suffix_start: usize,
2121 suffix_end: usize,
2122 kind: LinkKind,
2123) {
2124 if let IrEvent::OpenBracket { resolution, .. } = &mut events[open_idx] {
2125 *resolution = Some(BracketResolution {
2126 close_event: close_idx as u32,
2127 text_start,
2128 text_end,
2129 suffix_start,
2130 suffix_end,
2131 kind,
2132 });
2133 }
2134 if let IrEvent::CloseBracket { matched, .. } = &mut events[close_idx] {
2135 *matched = true;
2136 }
2137}
2138
2139/// Try to parse `(dest)` or `(dest "title")` inline link suffix starting
2140/// at `text[pos]`. Returns `(end_pos_exclusive, dest, title)`.
2141fn try_inline_suffix(text: &str, pos: usize) -> Option<(usize, String, Option<String>)> {
2142 let bytes = text.as_bytes();
2143 if pos >= bytes.len() || bytes[pos] != b'(' {
2144 return None;
2145 }
2146 let mut p = pos + 1;
2147 // Skip leading whitespace.
2148 while p < bytes.len() && matches!(bytes[p], b' ' | b'\t' | b'\n') {
2149 p += 1;
2150 }
2151 // Empty `()` — link with empty destination.
2152 if p < bytes.len() && bytes[p] == b')' {
2153 return Some((p + 1, String::new(), None));
2154 }
2155
2156 // Parse destination.
2157 let (dest, dest_end) = parse_link_destination(text, p)?;
2158 p = dest_end;
2159
2160 // Skip whitespace.
2161 while p < bytes.len() && matches!(bytes[p], b' ' | b'\t' | b'\n') {
2162 p += 1;
2163 }
2164
2165 // Optional title.
2166 let mut title = None;
2167 if p < bytes.len() && matches!(bytes[p], b'"' | b'\'' | b'(') {
2168 let (t, t_end) = parse_link_title(text, p)?;
2169 title = Some(t);
2170 p = t_end;
2171 while p < bytes.len() && matches!(bytes[p], b' ' | b'\t' | b'\n') {
2172 p += 1;
2173 }
2174 }
2175
2176 if p >= bytes.len() || bytes[p] != b')' {
2177 return None;
2178 }
2179 Some((p + 1, dest, title))
2180}
2181
2182fn parse_link_destination(text: &str, start: usize) -> Option<(String, usize)> {
2183 let bytes = text.as_bytes();
2184 if start >= bytes.len() {
2185 return None;
2186 }
2187 if bytes[start] == b'<' {
2188 // <bracketed>
2189 let mut p = start + 1;
2190 let begin = p;
2191 while p < bytes.len() && bytes[p] != b'>' && bytes[p] != b'\n' && bytes[p] != b'<' {
2192 if bytes[p] == b'\\' && p + 1 < bytes.len() {
2193 p += 2;
2194 } else {
2195 p += 1;
2196 }
2197 }
2198 if p >= bytes.len() || bytes[p] != b'>' {
2199 return None;
2200 }
2201 let dest = text[begin..p].to_string();
2202 Some((dest, p + 1))
2203 } else {
2204 // unbracketed: balanced parens, no spaces, no controls
2205 let mut p = start;
2206 let mut paren_depth: i32 = 0;
2207 while p < bytes.len() {
2208 let b = bytes[p];
2209 if b == b'\\' && p + 1 < bytes.len() {
2210 p += 2;
2211 continue;
2212 }
2213 if b == b'(' {
2214 paren_depth += 1;
2215 p += 1;
2216 continue;
2217 }
2218 if b == b')' {
2219 if paren_depth == 0 {
2220 break;
2221 }
2222 paren_depth -= 1;
2223 p += 1;
2224 continue;
2225 }
2226 if b == b' ' || b == b'\t' || b == b'\n' || b < 0x20 || b == 0x7f {
2227 break;
2228 }
2229 p += 1;
2230 }
2231 if p == start || paren_depth != 0 {
2232 return None;
2233 }
2234 Some((text[start..p].to_string(), p))
2235 }
2236}
2237
2238fn parse_link_title(text: &str, start: usize) -> Option<(String, usize)> {
2239 let bytes = text.as_bytes();
2240 let q = bytes[start];
2241 let close = match q {
2242 b'"' => b'"',
2243 b'\'' => b'\'',
2244 b'(' => b')',
2245 _ => return None,
2246 };
2247 let mut p = start + 1;
2248 let begin = p;
2249 while p < bytes.len() {
2250 let b = bytes[p];
2251 if b == b'\\' && p + 1 < bytes.len() {
2252 p += 2;
2253 continue;
2254 }
2255 if b == close {
2256 let title = text[begin..p].to_string();
2257 return Some((title, p + 1));
2258 }
2259 p += 1;
2260 }
2261 None
2262}
2263
2264/// Try to parse `[label]` after a `]`. Returns `(suffix_end, label_raw)`.
2265/// For the collapsed form `[]`, returns `None` here (handled separately
2266/// by `is_collapsed_marker`).
2267fn try_full_reference_suffix(text: &str, pos: usize) -> Option<(usize, String)> {
2268 let bytes = text.as_bytes();
2269 if pos >= bytes.len() || bytes[pos] != b'[' {
2270 return None;
2271 }
2272 let label_start = pos + 1;
2273 let mut p = label_start;
2274 let mut escape_next = false;
2275 while p < bytes.len() {
2276 if escape_next {
2277 escape_next = false;
2278 p += 1;
2279 continue;
2280 }
2281 match bytes[p] {
2282 b'\\' => {
2283 escape_next = true;
2284 p += 1;
2285 }
2286 b']' => break,
2287 b'[' => return None,
2288 b'\n' => {
2289 p += 1;
2290 }
2291 _ => p += 1,
2292 }
2293 }
2294 if p >= bytes.len() || bytes[p] != b']' {
2295 return None;
2296 }
2297 let label = text[label_start..p].to_string();
2298 if label.is_empty() {
2299 return None;
2300 }
2301 Some((p + 1, label))
2302}
2303
2304fn is_collapsed_marker(text: &str, pos: usize) -> bool {
2305 text.as_bytes().get(pos) == Some(&b'[') && text.as_bytes().get(pos + 1) == Some(&b']')
2306}
2307
2308// ============================================================================
2309// Bracket plan — byte-position-keyed view of resolved brackets, consumed by
2310// the existing emission walk in `core::parse_inline_range_impl`.
2311// ============================================================================
2312
2313/// Disposition of a single bracket byte after [`process_brackets`].
2314#[derive(Debug, Clone)]
2315pub enum BracketDispo {
2316 /// `[` or `![` of a resolved link/image. Emission emits the LINK/IMAGE
2317 /// node and skips past `suffix_end`.
2318 Open {
2319 is_image: bool,
2320 text_start: usize,
2321 text_end: usize,
2322 suffix_start: usize,
2323 suffix_end: usize,
2324 kind: LinkKind,
2325 },
2326 /// Pandoc-only: `[` or `![` of a bracket-shape reference pattern
2327 /// whose label didn't resolve. Emission wraps `[start, end)` in an
2328 /// `UNRESOLVED_REFERENCE` node so downstream tools can attach
2329 /// behavior to the bracket-shape pattern. `text_start..text_end` is
2330 /// the inner text range (between the outer `[`/`![` and `]`).
2331 UnresolvedReference {
2332 is_image: bool,
2333 text_start: usize,
2334 text_end: usize,
2335 end: usize,
2336 },
2337 /// Bracket byte (one of `[`, `]`, or `!`) that fell through to literal
2338 /// text. Emission accumulates into the surrounding text run.
2339 Literal,
2340}
2341
2342/// A byte-keyed view of the IR's bracket resolutions.
2343#[derive(Debug, Default, Clone)]
2344pub struct BracketPlan {
2345 by_pos: BTreeMap<usize, BracketDispo>,
2346}
2347
2348impl BracketPlan {
2349 pub fn lookup(&self, pos: usize) -> Option<&BracketDispo> {
2350 self.by_pos.get(&pos)
2351 }
2352
2353 pub fn is_empty(&self) -> bool {
2354 self.by_pos.is_empty()
2355 }
2356}
2357
2358/// A standalone Pandoc inline construct recognised by `build_ir` and
2359/// dispatched directly from the emission walk. Carries the construct's
2360/// full source range so the emission walk can slice the content for the
2361/// existing `emit_*` helpers without re-running the recognition.
2362#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2363pub enum ConstructDispo {
2364 /// `^[note text]` — emit via `emit_inline_footnote` after slicing
2365 /// the inner content.
2366 InlineFootnote { end: usize },
2367 /// `<span ...>...</span>` — emit via `emit_native_span` after
2368 /// re-parsing the open-tag attributes from the source range.
2369 NativeSpan { end: usize },
2370 /// `[^id]` — emit via `emit_footnote_reference` after extracting
2371 /// the label id from the source range.
2372 FootnoteReference { end: usize },
2373 /// `[@cite]` — emit via `emit_bracketed_citation` after slicing
2374 /// the inner content.
2375 BracketedCitation { end: usize },
2376 /// `@key` or `-@key` — emit via `emit_bare_citation` (or
2377 /// `emit_crossref` when `is_quarto_crossref_key` matches and
2378 /// `extensions.quarto_crossrefs` is enabled).
2379 BareCitation { end: usize },
2380 /// `[content]{attrs}` — emit via `emit_bracketed_span` after
2381 /// slicing the inner content and attribute string.
2382 BracketedSpan { end: usize },
2383}
2384
2385/// A byte-keyed view of the IR's standalone Pandoc constructs that the
2386/// emission walk consumes directly: inline footnotes, native spans,
2387/// footnote references, bracketed citations, bare citations, and
2388/// bracketed spans. Recognition is authoritative in `build_ir` under
2389/// `Dialect::Pandoc`; the dispatcher's legacy branches for these
2390/// constructs (`^[`, `<span>`, `[^id]`, `[@cite]`, `@cite` / `-@cite`,
2391/// `[text]{attrs}`) are gated to `Dialect::CommonMark` only and only
2392/// fire when the relevant extension is explicitly enabled.
2393#[derive(Debug, Default, Clone)]
2394pub struct ConstructPlan {
2395 by_pos: BTreeMap<usize, ConstructDispo>,
2396}
2397
2398impl ConstructPlan {
2399 pub fn lookup(&self, pos: usize) -> Option<&ConstructDispo> {
2400 self.by_pos.get(&pos)
2401 }
2402
2403 pub fn is_empty(&self) -> bool {
2404 self.by_pos.is_empty()
2405 }
2406}
2407
2408/// Build a [`ConstructPlan`] from the resolved IR. Each
2409/// `Construct { kind: InlineFootnote | NativeSpan, .. }` becomes one
2410/// entry keyed at its start byte.
2411pub fn build_construct_plan(events: &[IrEvent]) -> ConstructPlan {
2412 let mut by_pos: BTreeMap<usize, ConstructDispo> = BTreeMap::new();
2413 for ev in events {
2414 if let IrEvent::Construct { start, end, kind } = ev {
2415 match kind {
2416 ConstructKind::InlineFootnote => {
2417 by_pos.insert(*start, ConstructDispo::InlineFootnote { end: *end });
2418 }
2419 ConstructKind::NativeSpan => {
2420 by_pos.insert(*start, ConstructDispo::NativeSpan { end: *end });
2421 }
2422 ConstructKind::FootnoteReference => {
2423 by_pos.insert(*start, ConstructDispo::FootnoteReference { end: *end });
2424 }
2425 ConstructKind::BracketedCitation => {
2426 by_pos.insert(*start, ConstructDispo::BracketedCitation { end: *end });
2427 }
2428 ConstructKind::BareCitation => {
2429 by_pos.insert(*start, ConstructDispo::BareCitation { end: *end });
2430 }
2431 ConstructKind::BracketedSpan => {
2432 by_pos.insert(*start, ConstructDispo::BracketedSpan { end: *end });
2433 }
2434 _ => {}
2435 }
2436 }
2437 }
2438 ConstructPlan { by_pos }
2439}
2440
2441/// Build a [`BracketPlan`] from the resolved IR. Each `OpenBracket`
2442/// resolution becomes an [`BracketDispo::Open`] keyed at the opener's
2443/// start byte. Unresolved openers and unmatched closers become
2444/// `BracketDispo::Literal` so the emission path can recognise them
2445/// without re-parsing.
2446pub fn build_bracket_plan(events: &[IrEvent]) -> BracketPlan {
2447 let mut by_pos: BTreeMap<usize, BracketDispo> = BTreeMap::new();
2448 for ev in events {
2449 match ev {
2450 IrEvent::OpenBracket {
2451 start,
2452 is_image,
2453 resolution: Some(res),
2454 ..
2455 } => {
2456 by_pos.insert(
2457 *start,
2458 BracketDispo::Open {
2459 is_image: *is_image,
2460 text_start: res.text_start,
2461 text_end: res.text_end,
2462 suffix_start: res.suffix_start,
2463 suffix_end: res.suffix_end,
2464 kind: res.kind.clone(),
2465 },
2466 );
2467 }
2468 IrEvent::OpenBracket {
2469 start,
2470 end,
2471 is_image,
2472 resolution: None,
2473 unresolved_ref: Some(shape),
2474 ..
2475 } => {
2476 by_pos.insert(
2477 *start,
2478 BracketDispo::UnresolvedReference {
2479 is_image: *is_image,
2480 text_start: *end,
2481 text_end: shape.text_end,
2482 end: shape.end,
2483 },
2484 );
2485 }
2486 IrEvent::OpenBracket {
2487 start,
2488 is_image,
2489 resolution: None,
2490 unresolved_ref: None,
2491 ..
2492 } => {
2493 let len = if *is_image { 2 } else { 1 };
2494 for off in 0..len {
2495 by_pos.insert(*start + off, BracketDispo::Literal);
2496 }
2497 }
2498 IrEvent::CloseBracket {
2499 pos,
2500 matched: false,
2501 } => {
2502 by_pos.insert(*pos, BracketDispo::Literal);
2503 }
2504 _ => {}
2505 }
2506 }
2507 BracketPlan { by_pos }
2508}
2509
2510/// One-shot helper: build the IR, run all passes, and return the
2511/// bundled [`InlinePlans`] (emphasis dispositions, bracket resolutions,
2512/// and standalone Pandoc constructs) — packaged together so the inline
2513/// emission path can consume them in one go for either dialect.
2514///
2515/// Pass ordering follows the CommonMark §6.3 reference impl: bracket
2516/// resolution runs first, then emphasis is processed *scoped per resolved
2517/// bracket pair's inner event range*, then once more on the residual
2518/// top-level events. This prevents emphasis pairs from forming across a
2519/// link's bracket boundary, which the previous "all-emphasis-then-all-
2520/// brackets" order got wrong (e.g. spec example #473).
2521pub fn build_full_plans(
2522 text: &str,
2523 start: usize,
2524 end: usize,
2525 config: &ParserOptions,
2526) -> InlinePlans {
2527 let mut scratch = ScratchEvents::checkout();
2528 let bundle = scratch.inner.as_mut().unwrap();
2529 bundle.events.clear();
2530 bundle.bracket_pairs.clear();
2531 bundle.excluded.clear();
2532
2533 build_ir_into(text, start, end, config, &mut bundle.events);
2534 // §6.3 bracket resolution runs for both dialects. Under CommonMark
2535 // it enforces refdef-aware shortcut/collapsed/full-ref resolution
2536 // and the §6.3 link-in-link deactivation rule. Under Pandoc it
2537 // performs shape-only resolution (any non-empty label resolves) and
2538 // skips the deactivation pass — pandoc-native is outer-wins for
2539 // nested links and the dispatcher's `suppress_inner_links` flag
2540 // suppresses inner LINK emission during LINK-text recursion.
2541 process_brackets(
2542 &mut bundle.events,
2543 text,
2544 config.refdef_labels.as_ref(),
2545 config.dialect,
2546 );
2547
2548 // Scoped emphasis pass per resolved bracket pair, innermost first.
2549 // We collect (open_idx, close_idx) pairs of resolved brackets and run
2550 // emphasis only over the events strictly between them. Innermost-first
2551 // ordering matters: an outer link wraps emphasis that wraps an inner
2552 // link, and the inner link's inner range must be paired before the
2553 // outer's inner range so the top-level pass sees consistent state.
2554 // Include both resolved-link bracket pairs and Pandoc unresolved-
2555 // reference bracket pairs in the scoping set. The latter wrap into
2556 // an `UNRESOLVED_REFERENCE` CST node, which is just as much a tree
2557 // boundary for emphasis as a resolved `LINK` — emphasis must not
2558 // pair across the wrapper's brackets, otherwise the emission walk
2559 // produces a non-tree-shaped CST.
2560 bundle.bracket_pairs.extend(
2561 bundle
2562 .events
2563 .iter()
2564 .enumerate()
2565 .filter_map(|(i, ev)| match ev {
2566 IrEvent::OpenBracket {
2567 resolution: Some(res),
2568 ..
2569 } => Some((i, res.close_event as usize)),
2570 IrEvent::OpenBracket {
2571 resolution: None,
2572 unresolved_ref: Some(shape),
2573 ..
2574 } => Some((i, shape.close_event as usize)),
2575 _ => None,
2576 }),
2577 );
2578 // Innermost-first: sort by close_idx ascending, then open_idx descending.
2579 bundle
2580 .bracket_pairs
2581 .sort_by(|a, b| a.1.cmp(&b.1).then(b.0.cmp(&a.0)));
2582 // Iterate pairs by index so we can hold &mut bundle.events while
2583 // reading bundle.bracket_pairs (split borrow on disjoint fields).
2584 for i in 0..bundle.bracket_pairs.len() {
2585 let (open_idx, close_idx) = bundle.bracket_pairs[i];
2586 process_emphasis_in_range(&mut bundle.events, open_idx + 1, close_idx, config.dialect);
2587 }
2588
2589 // Pandoc-only degrade pass for unresolved bracket-shape patterns
2590 // whose interior left any delim-run byte unmatched after the scoped
2591 // emphasis pass. Pandoc-native degrades such brackets to literal `[`
2592 // / `]` text — the user's intent was clearly not a reference. The
2593 // bracket_pairs entry stays so the inner delims remain in the
2594 // top-level exclusion mask (otherwise they'd re-enter pairing and
2595 // could form Emph spans with delims outside, which pandoc never
2596 // does — see the bug_2_emphasis_crosses_brackets_pandoc fixture).
2597 // Flipping `unresolved_ref` to `None` makes `build_bracket_plan`
2598 // emit `BracketDispo::Literal` for the bracket bytes; flipping
2599 // `CloseBracket.matched` to `false` does the same for the `]`.
2600 for i in 0..bundle.bracket_pairs.len() {
2601 let (open_idx, close_idx) = bundle.bracket_pairs[i];
2602 let is_unresolved = matches!(
2603 &bundle.events[open_idx],
2604 IrEvent::OpenBracket {
2605 resolution: None,
2606 unresolved_ref: Some(_),
2607 ..
2608 }
2609 );
2610 if !is_unresolved {
2611 continue;
2612 }
2613 if !range_has_unmatched_delim_bytes(&bundle.events, open_idx + 1, close_idx) {
2614 continue;
2615 }
2616 if let IrEvent::OpenBracket { unresolved_ref, .. } = &mut bundle.events[open_idx] {
2617 *unresolved_ref = None;
2618 }
2619 if let IrEvent::CloseBracket { matched, .. } = &mut bundle.events[close_idx] {
2620 *matched = false;
2621 }
2622 }
2623
2624 // Top-level emphasis pass: handles delim runs that fall outside any
2625 // resolved bracket pair.
2626 let len = bundle.events.len();
2627 if bundle.bracket_pairs.is_empty() {
2628 // Fast path: no resolved brackets means no exclusion mask needed —
2629 // skip the resize-and-fill pass entirely. Common for prose
2630 // paragraphs without inline links.
2631 process_emphasis_in_range_filtered(&mut bundle.events, 0, len, None, config.dialect);
2632 } else {
2633 // Build exclusion bitmap: any delim run whose event index lies
2634 // inside a resolved bracket pair is excluded from the top-level
2635 // pass. Implements the §6.3 boundary rule: emphasis at the top
2636 // level must not pair across a link's brackets.
2637 bundle.excluded.resize(len, false);
2638 for &(open_idx, close_idx) in &bundle.bracket_pairs {
2639 for slot in bundle
2640 .excluded
2641 .iter_mut()
2642 .take(close_idx)
2643 .skip(open_idx + 1)
2644 {
2645 *slot = true;
2646 }
2647 }
2648 process_emphasis_in_range_filtered(
2649 &mut bundle.events,
2650 0,
2651 len,
2652 Some(&bundle.excluded),
2653 config.dialect,
2654 );
2655 }
2656
2657 InlinePlans {
2658 emphasis: build_emphasis_plan(&bundle.events),
2659 brackets: build_bracket_plan(&bundle.events),
2660 constructs: build_construct_plan(&bundle.events),
2661 }
2662}
2663
2664/// Returns true if any [`IrEvent::DelimRun`] in the event range
2665/// `[lo, hi)` has byte coverage from its `matches` vec that is less
2666/// than the run length — i.e. at least one byte of the run failed to
2667/// pair as emphasis. Used by the Pandoc unresolved-reference degrade
2668/// pass in [`build_full_plans`].
2669///
2670/// Delim runs whose flanking rules forbid both opening *and* closing
2671/// (e.g. intraword `_` inside `foo_bar`) are skipped: those bytes were
2672/// never a pairing candidate, so an "unmatched" count for them isn't
2673/// evidence of a failed emphasis attempt. Without this exclusion every
2674/// URL or identifier with an underscore inside an unresolved bracket
2675/// pair would spuriously degrade the bracket-shape to literal text.
2676fn range_has_unmatched_delim_bytes(events: &[IrEvent], lo: usize, hi: usize) -> bool {
2677 let hi = hi.min(events.len());
2678 for ev in &events[lo..hi] {
2679 if let IrEvent::DelimRun {
2680 start,
2681 end,
2682 matches,
2683 can_open,
2684 can_close,
2685 ..
2686 } = ev
2687 {
2688 if !can_open && !can_close {
2689 continue;
2690 }
2691 let total = end - start;
2692 let matched: usize = matches.iter().map(|m| m.len as usize).sum();
2693 if matched < total {
2694 return true;
2695 }
2696 }
2697 }
2698 false
2699}
2700
2701/// Thread-local pool of scratch buffers used by [`build_full_plans`].
2702///
2703/// `build_full_plans` checks out one bundle for the duration of the call
2704/// and returns it on drop so the next call (or a recursive nested call
2705/// from an inline emitter) reuses the allocations. The pool is
2706/// per-thread — the parser is single-threaded — and bounded so a
2707/// long-running editor session can't accumulate stale capacity.
2708struct ScratchEvents {
2709 inner: Option<ScratchBundle>,
2710}
2711
2712#[derive(Default)]
2713struct ScratchBundle {
2714 events: Vec<IrEvent>,
2715 bracket_pairs: Vec<(usize, usize)>,
2716 excluded: Vec<bool>,
2717}
2718
2719thread_local! {
2720 static IR_EVENT_POOL: std::cell::RefCell<Vec<ScratchBundle>> =
2721 const { std::cell::RefCell::new(Vec::new()) };
2722}
2723
2724impl ScratchEvents {
2725 fn checkout() -> Self {
2726 let bundle = IR_EVENT_POOL
2727 .with(|p| p.borrow_mut().pop())
2728 .unwrap_or_default();
2729 Self {
2730 inner: Some(bundle),
2731 }
2732 }
2733}
2734
2735impl Drop for ScratchEvents {
2736 fn drop(&mut self) {
2737 if let Some(mut bundle) = self.inner.take() {
2738 bundle.events.clear();
2739 bundle.bracket_pairs.clear();
2740 bundle.excluded.clear();
2741 // Cap pool depth at 8 (deepest realistic nested-link recursion)
2742 // and drop any bundle whose `events` grew past 8K (a single
2743 // pathological paragraph shouldn't pin a huge allocation
2744 // forever).
2745 if bundle.events.capacity() <= 8192 {
2746 IR_EVENT_POOL.with(|p| {
2747 let mut pool = p.borrow_mut();
2748 if pool.len() < 8 {
2749 pool.push(bundle);
2750 }
2751 });
2752 }
2753 }
2754 }
2755}
2756
2757/// Bundle of plans produced by [`build_full_plans`] and consumed by the
2758/// inline emission walk.
2759#[derive(Debug, Default, Clone)]
2760pub struct InlinePlans {
2761 pub emphasis: EmphasisPlan,
2762 pub brackets: BracketPlan,
2763 pub constructs: ConstructPlan,
2764}
2765
2766/// Convert the IR's delim-run match decisions into an [`EmphasisPlan`],
2767/// preserving the byte-keyed disposition shape the existing emission walk
2768/// consumes.
2769///
2770/// Each match on a [`DelimRun`](IrEvent::DelimRun) produces one entry in
2771/// the plan: the opener side records `Open` with the partner's source
2772/// byte and length; the closer side records `Close`. Bytes within a run
2773/// that are *not* covered by any match get a `Literal` entry, which the
2774/// emission walk uses to coalesce unmatched delimiter bytes with
2775/// surrounding plain text.
2776pub fn build_emphasis_plan(events: &[IrEvent]) -> EmphasisPlan {
2777 let mut by_pos: BTreeMap<usize, DelimChar> = BTreeMap::new();
2778 for ev in events {
2779 if let IrEvent::DelimRun {
2780 start,
2781 end,
2782 matches,
2783 ..
2784 } = ev
2785 {
2786 for m in matches {
2787 let pos = *start + m.offset_in_run as usize;
2788 let partner_run_start = match &events[m.partner_event as usize] {
2789 IrEvent::DelimRun { start: ps, .. } => *ps,
2790 _ => continue,
2791 };
2792 let partner_pos = partner_run_start + m.partner_offset as usize;
2793 if m.is_opener {
2794 by_pos.insert(
2795 pos,
2796 DelimChar::Open {
2797 len: m.len,
2798 partner: partner_pos,
2799 partner_len: m.len,
2800 kind: m.kind,
2801 },
2802 );
2803 } else {
2804 by_pos.insert(pos, DelimChar::Close);
2805 }
2806 }
2807 // Any remaining bytes (not covered by a match) are literal.
2808 for pos in *start..*end {
2809 by_pos.entry(pos).or_insert(DelimChar::Literal);
2810 }
2811 }
2812 }
2813 EmphasisPlan::from_dispositions(by_pos)
2814}
2815
2816#[cfg(test)]
2817mod tests {
2818 use super::*;
2819 use crate::options::Flavor;
2820 use crate::parser::inlines::inline_ir::DelimChar;
2821 use std::sync::Arc;
2822
2823 fn cm_opts() -> ParserOptions {
2824 let flavor = Flavor::CommonMark;
2825 ParserOptions {
2826 flavor,
2827 dialect: crate::options::Dialect::for_flavor(flavor),
2828 extensions: crate::options::Extensions::for_flavor(flavor),
2829 pandoc_compat: crate::options::PandocCompat::default(),
2830 refdef_labels: None,
2831 }
2832 }
2833
2834 fn refdefs<I: IntoIterator<Item = &'static str>>(labels: I) -> RefdefMap {
2835 Arc::new(labels.into_iter().map(|s| s.to_string()).collect())
2836 }
2837
2838 #[test]
2839 fn ir_event_range_covers_all_variants() {
2840 let txt = IrEvent::Text { start: 0, end: 5 };
2841 assert_eq!(txt.range(), (0, 5));
2842
2843 let close = IrEvent::CloseBracket {
2844 pos: 7,
2845 matched: false,
2846 };
2847 assert_eq!(close.range(), (7, 8));
2848
2849 let open = IrEvent::OpenBracket {
2850 start: 1,
2851 end: 3,
2852 is_image: true,
2853 active: true,
2854 resolution: None,
2855 unresolved_ref: None,
2856 };
2857 assert_eq!(open.range(), (1, 3));
2858 }
2859
2860 #[test]
2861 fn scan_records_text_and_delim_run() {
2862 let opts = cm_opts();
2863 let ir = build_ir("foo *bar*", 0, 9, &opts);
2864 // Expect: Text "foo ", DelimRun "*", Text "bar", DelimRun "*"
2865 assert!(matches!(ir[0], IrEvent::Text { start: 0, end: 4 }));
2866 assert!(matches!(
2867 ir[1],
2868 IrEvent::DelimRun {
2869 ch: b'*',
2870 start: 4,
2871 end: 5,
2872 ..
2873 }
2874 ));
2875 assert!(matches!(ir[2], IrEvent::Text { start: 5, end: 8 }));
2876 assert!(matches!(
2877 ir[3],
2878 IrEvent::DelimRun {
2879 ch: b'*',
2880 start: 8,
2881 end: 9,
2882 ..
2883 }
2884 ));
2885 }
2886
2887 #[test]
2888 fn scan_records_brackets() {
2889 let opts = cm_opts();
2890 let ir = build_ir("[foo]", 0, 5, &opts);
2891 assert!(matches!(
2892 ir[0],
2893 IrEvent::OpenBracket {
2894 start: 0,
2895 end: 1,
2896 is_image: false,
2897 ..
2898 }
2899 ));
2900 assert!(matches!(ir[1], IrEvent::Text { start: 1, end: 4 }));
2901 assert!(matches!(
2902 ir[2],
2903 IrEvent::CloseBracket {
2904 pos: 4,
2905 matched: false
2906 }
2907 ));
2908 }
2909
2910 #[test]
2911 fn scan_records_image_bracket() {
2912 let opts = cm_opts();
2913 let ir = build_ir("![alt]", 0, 6, &opts);
2914 assert!(matches!(
2915 ir[0],
2916 IrEvent::OpenBracket {
2917 start: 0,
2918 end: 2,
2919 is_image: true,
2920 ..
2921 }
2922 ));
2923 }
2924
2925 #[test]
2926 fn scan_handles_code_span_opacity() {
2927 let opts = cm_opts();
2928 let ir = build_ir("a `*x*` b", 0, 9, &opts);
2929 // Code span `*x*` should be a Construct, NOT delim runs.
2930 let has_delim_run = ir.iter().any(|e| matches!(e, IrEvent::DelimRun { .. }));
2931 assert!(
2932 !has_delim_run,
2933 "code span content should not produce delim runs"
2934 );
2935 assert!(ir.iter().any(|e| matches!(
2936 e,
2937 IrEvent::Construct {
2938 kind: ConstructKind::CodeSpan,
2939 ..
2940 }
2941 )));
2942 }
2943
2944 #[test]
2945 fn process_emphasis_simple_pair() {
2946 let opts = cm_opts();
2947 let mut ir = build_ir("*foo*", 0, 5, &opts);
2948 process_emphasis(&mut ir, opts.dialect);
2949 // First DelimRun (open) gets a match.
2950 let opener = ir
2951 .iter()
2952 .find(|e| matches!(e, IrEvent::DelimRun { start: 0, .. }))
2953 .unwrap();
2954 if let IrEvent::DelimRun { matches, .. } = opener {
2955 assert_eq!(matches.len(), 1);
2956 assert!(matches[0].is_opener);
2957 assert_eq!(matches[0].kind, EmphasisKind::Emph);
2958 }
2959 }
2960
2961 #[test]
2962 fn brackets_resolve_inline_link() {
2963 let opts = cm_opts();
2964 let mut ir = build_ir("[foo](/url)", 0, 11, &opts);
2965 process_brackets(&mut ir, "[foo](/url)", None, opts.dialect);
2966 let open = ir
2967 .iter()
2968 .find(|e| matches!(e, IrEvent::OpenBracket { start: 0, .. }))
2969 .unwrap();
2970 if let IrEvent::OpenBracket { resolution, .. } = open {
2971 let r = resolution.as_ref().expect("inline link resolved");
2972 assert!(matches!(r.kind, LinkKind::Inline { .. }));
2973 if let LinkKind::Inline { dest, .. } = &r.kind {
2974 assert_eq!(dest, "/url");
2975 }
2976 }
2977 }
2978
2979 #[test]
2980 fn brackets_shortcut_resolves_only_with_refdef() {
2981 let opts = cm_opts();
2982 let text = "[foo]";
2983 let map = refdefs(["foo"]);
2984 let mut ir = build_ir(text, 0, text.len(), &opts);
2985 process_brackets(&mut ir, text, Some(&map), opts.dialect);
2986 let open = ir
2987 .iter()
2988 .find(|e| matches!(e, IrEvent::OpenBracket { start: 0, .. }))
2989 .unwrap();
2990 if let IrEvent::OpenBracket { resolution, .. } = open {
2991 assert!(matches!(
2992 resolution.as_ref().unwrap().kind,
2993 LinkKind::ShortcutReference
2994 ));
2995 }
2996 }
2997
2998 #[test]
2999 fn brackets_shortcut_falls_through_without_refdef() {
3000 // CMark example #523 mechanic: `[bar* baz]` is not a refdef, so
3001 // it must NOT resolve as a link — the brackets stay literal so
3002 // the inner `*` becomes available to the outer emphasis scanner.
3003 let opts = cm_opts();
3004 let text = "[bar* baz]";
3005 let mut ir = build_ir(text, 0, text.len(), &opts);
3006 process_brackets(&mut ir, text, None, opts.dialect);
3007 let open = ir
3008 .iter()
3009 .find(|e| matches!(e, IrEvent::OpenBracket { start: 0, .. }))
3010 .unwrap();
3011 if let IrEvent::OpenBracket { resolution, .. } = open {
3012 assert!(resolution.is_none(), "no refdef → bracket stays literal");
3013 }
3014 }
3015
3016 /// Spec #473: `*[bar*](/url)`. The link `[bar*](/url)` resolves; the
3017 /// outer `*...*` MUST NOT pair across the link's bracket boundary,
3018 /// because the inner `*` belongs to the link text.
3019 #[test]
3020 fn full_plans_emphasis_does_not_cross_resolved_link_boundary() {
3021 let opts = cm_opts();
3022 let text = "*[bar*](/url)";
3023 let plans = build_full_plans(text, 0, text.len(), &opts);
3024 // The leading `*` (at byte 0) must NOT be matched as an emphasis
3025 // opener — there's no closer outside the link, and the inner `*`
3026 // (at byte 5) is inside the resolved link's text range so it must
3027 // not be paired with byte 0.
3028 assert!(
3029 matches!(plans.emphasis.lookup(0), Some(DelimChar::Literal) | None),
3030 "outer `*` at byte 0 must not pair across link boundary, got {:?}",
3031 plans.emphasis.lookup(0)
3032 );
3033 // The link `[bar*](/url)` must resolve (opener at byte 1).
3034 assert!(
3035 matches!(plans.brackets.lookup(1), Some(BracketDispo::Open { .. })),
3036 "link [bar*](/url) must resolve at byte 1"
3037 );
3038 }
3039
3040 fn pandoc_opts() -> ParserOptions {
3041 let flavor = Flavor::Pandoc;
3042 ParserOptions {
3043 flavor,
3044 dialect: crate::options::Dialect::for_flavor(flavor),
3045 extensions: crate::options::Extensions::for_flavor(flavor),
3046 pandoc_compat: crate::options::PandocCompat::default(),
3047 refdef_labels: None,
3048 }
3049 }
3050
3051 /// Bug #2 (a): unresolved Pandoc bracket-shape with unmatched delim
3052 /// inside its text degrades to literal `[`/`]`. Outer emphasis pair
3053 /// across the (now-literal) brackets must form.
3054 #[test]
3055 fn full_plans_unresolved_bracket_degrades_when_inner_delim_unmatched() {
3056 let opts = pandoc_opts();
3057 let text = "*foo [bar*] baz*";
3058 let plans = build_full_plans(text, 0, text.len(), &opts);
3059 assert!(
3060 matches!(plans.brackets.lookup(5), Some(BracketDispo::Literal) | None),
3061 "degraded `[` at byte 5 must be Literal/None, got {:?}",
3062 plans.brackets.lookup(5)
3063 );
3064 assert!(
3065 matches!(plans.emphasis.lookup(0), Some(DelimChar::Open { .. })),
3066 "outer `*` at byte 0 must open Emph after degrade, got {:?}",
3067 plans.emphasis.lookup(0)
3068 );
3069 }
3070
3071 /// Intraword `_` (e.g. inside a URL like
3072 /// `hyperparameter_optimization`) is not flanking — `can_open` and
3073 /// `can_close` are both false — so it can never pair as emphasis.
3074 /// The degrade pass must not treat such delim runs as "failed
3075 /// emphasis attempts" and demote the surrounding bracket-shape to
3076 /// literal text, otherwise every URL/identifier inside an
3077 /// unresolved reference round-trips through `\[` / `\]` escapes
3078 /// under `tex_math_single_backslash` and reparses as display math.
3079 #[test]
3080 fn full_plans_unresolved_bracket_keeps_wrapper_with_intraword_underscore() {
3081 let opts = pandoc_opts();
3082 let text = "[foo_bar more]";
3083 let plans = build_full_plans(text, 0, text.len(), &opts);
3084 assert!(
3085 matches!(
3086 plans.brackets.lookup(0),
3087 Some(BracketDispo::UnresolvedReference { .. })
3088 ),
3089 "wrapper must be preserved across intraword `_`, got {:?}",
3090 plans.brackets.lookup(0)
3091 );
3092 }
3093
3094 /// Bug #2 (b): unresolved Pandoc bracket whose interior emphasis
3095 /// pairs cleanly keeps the wrapper (linter/LSP hook).
3096 #[test]
3097 fn full_plans_unresolved_bracket_keeps_wrapper_when_inner_paired() {
3098 let opts = pandoc_opts();
3099 let text = "[foo *bar*]";
3100 let plans = build_full_plans(text, 0, text.len(), &opts);
3101 assert!(
3102 matches!(
3103 plans.brackets.lookup(0),
3104 Some(BracketDispo::UnresolvedReference { .. })
3105 ),
3106 "wrapper must be preserved when inner emph pairs, got {:?}",
3107 plans.brackets.lookup(0)
3108 );
3109 }
3110
3111 /// Spec #533: `[foo *bar [baz][ref]*][ref]` with `[ref]: /uri`.
3112 /// Inner `[baz][ref]` resolves as a link; §6.3 link-in-link rule
3113 /// deactivates the outer `[foo ...][ref]` so it falls through to
3114 /// literal brackets. Emphasis `*bar [baz][ref]*` wraps the inner link.
3115 #[test]
3116 fn full_plans_link_in_link_suppression_for_reference_links() {
3117 let opts = cm_opts();
3118 let text = "[foo *bar [baz][ref]*][ref]";
3119 let mut opts_with_refs = opts.clone();
3120 let labels: HashSet<String> = ["ref".to_string()].into_iter().collect();
3121 opts_with_refs.refdef_labels = Some(std::sync::Arc::new(labels));
3122 let plans = build_full_plans(text, 0, text.len(), &opts_with_refs);
3123
3124 // Inner `[baz][ref]` opener is at byte 10 — must resolve.
3125 assert!(
3126 matches!(plans.brackets.lookup(10), Some(BracketDispo::Open { .. })),
3127 "inner [baz][ref] must resolve at byte 10, got {:?}",
3128 plans.brackets.lookup(10)
3129 );
3130 // Outer `[foo ...][ref]` opener is at byte 0 — must NOT resolve
3131 // (link-in-link suppression).
3132 assert!(
3133 matches!(plans.brackets.lookup(0), Some(BracketDispo::Literal) | None),
3134 "outer [foo ...][ref] must fall through to literal at byte 0, got {:?}",
3135 plans.brackets.lookup(0)
3136 );
3137 // Trailing `[ref]` after the outer `]` is at byte 22 — it's a
3138 // standalone shortcut reference and must resolve.
3139 assert!(
3140 matches!(plans.brackets.lookup(22), Some(BracketDispo::Open { .. })),
3141 "trailing [ref] must resolve at byte 22, got {:?}",
3142 plans.brackets.lookup(22)
3143 );
3144 // Emphasis `*...*` at bytes 5 and 20 must pair — the scoped
3145 // emphasis pass over the (deactivated) outer bracket's inner
3146 // event range pairs these.
3147 assert!(
3148 matches!(plans.emphasis.lookup(5), Some(DelimChar::Open { .. })),
3149 "emphasis opener at byte 5 must pair, got {:?}",
3150 plans.emphasis.lookup(5)
3151 );
3152 }
3153}