Skip to main content

panache_parser/parser/inlines/
inline_ir.rs

1//! Inline IR for both CommonMark and Pandoc dialects.
2//!
3//! The inline parsing pipeline runs in three passes over an intermediate
4//! representation (IR):
5//!
6//! 1. **Scan** ([`build_ir`]): walk the source bytes once, producing a flat
7//!    [`Vec<IrEvent>`]. Opaque higher-precedence constructs (escapes, code
8//!    spans, autolinks, raw HTML, plus Pandoc math / native spans / inline
9//!    footnotes / footnote references / citations / bracketed spans) are
10//!    skipped past as a single [`IrEvent::Construct`] event whose source
11//!    range is preserved for losslessness. Delimiter runs (`*`/`_`),
12//!    bracket markers (`[`, `![`, `]`), soft line breaks, and plain text
13//!    spans become distinct events.
14//!
15//! 2. **Process brackets** ([`process_brackets`]) — CommonMark §6.3: the
16//!    bracket-stack algorithm walks `]` markers left-to-right. For each
17//!    `]`, the algorithm finds the nearest active opener and tries to
18//!    resolve the pair as a link or image: inline `[text](dest)`, full
19//!    reference `[text][label]`, collapsed `[text][]`, or shortcut
20//!    `[text]`. Under CommonMark, reference forms are validated against
21//!    the document refdef map and a successful match deactivates all
22//!    earlier active openers (§6.3 "links may not contain other links").
23//!    Under Pandoc, reference forms resolve shape-only (any non-empty
24//!    label) and the deactivation pass is skipped; outer-wins nested-link
25//!    semantics are enforced by the emission walk's `suppress_inner_links`
26//!    flag instead.
27//!
28//! 3. **Process emphasis** ([`process_emphasis_in_range`]): the classic
29//!    delimiter-stack algorithm runs over the [`IrEvent::DelimRun`]
30//!    events, pairing openers with closers and recording matches on the
31//!    runs. Runs first scoped per resolved bracket pair (innermost
32//!    first), then a top-level pass over the residual events. Each match
33//!    consumes 1 or 2 inner-edge bytes from each side; leftover bytes
34//!    fall through to literal text. Dialect gates (Pandoc flanking rules,
35//!    mod-3 rejection, asymmetric (1,2)/(2,1) rejection, opener-count >= 4
36//!    rejection, triple-emph nesting flip, cascade-then-rerun) branch on
37//!    the `dialect` parameter.
38//!
39//! The emission walk in [`super::core::parse_inline_range_impl`] consumes
40//! three byte-keyed plans built by [`build_full_plans`]: an
41//! [`EmphasisPlan`] for delim-run dispositions, a [`BracketPlan`] for
42//! resolved link/image bracket pairs, and a [`ConstructPlan`] for
43//! standalone Pandoc constructs (inline footnotes, native spans, footnote
44//! references, citations, bracketed spans). Matched delim runs become
45//! `EMPHASIS` / `STRONG` nodes; matched bracket pairs become `LINK` /
46//! `IMAGE` nodes via the dispatcher's `try_parse_*` recognizers (called
47//! to *parse* a matched range, not to *resolve* it). Unmatched delims and
48//! brackets fall through to plain text.
49
50use crate::options::ParserOptions;
51use crate::parser::inlines::refdef_map::{RefdefMap, normalize_label};
52use std::collections::{BTreeMap, HashSet};
53
54#[derive(Debug, Clone, Copy, PartialEq, Eq)]
55pub enum EmphasisKind {
56    Emph,
57    Strong,
58}
59
60/// Disposition of a single delimiter byte after emphasis resolution.
61#[derive(Debug, Clone, Copy)]
62pub enum DelimChar {
63    /// Start of an opening marker. The marker spans `len` bytes from this
64    /// position; the matching closer starts at `partner` and spans
65    /// `partner_len` bytes.
66    Open {
67        len: u8,
68        partner: usize,
69        partner_len: u8,
70        kind: EmphasisKind,
71    },
72    /// Start of a closing marker. The matching opener starts at `partner`.
73    /// Emission jumps past close markers via the matching `Open` entry, so
74    /// this variant is only consulted defensively.
75    Close,
76    /// Unmatched delimiter byte; emit as literal text.
77    Literal,
78}
79
80/// Byte-keyed disposition map for `*` / `_` delimiter chars produced by
81/// the IR's emphasis pass and consumed by the inline emission walk.
82#[derive(Debug, Default, Clone)]
83pub struct EmphasisPlan {
84    by_pos: BTreeMap<usize, DelimChar>,
85}
86
87impl EmphasisPlan {
88    pub fn lookup(&self, pos: usize) -> Option<DelimChar> {
89        self.by_pos.get(&pos).copied()
90    }
91
92    pub fn is_empty(&self) -> bool {
93        self.by_pos.is_empty()
94    }
95
96    /// Construct an `EmphasisPlan` from a byte-keyed disposition map.
97    pub fn from_dispositions(by_pos: BTreeMap<usize, DelimChar>) -> Self {
98        Self { by_pos }
99    }
100}
101
102use super::bracketed_spans::try_parse_bracketed_span;
103use super::citations::{try_parse_bare_citation, try_parse_bracketed_citation};
104use super::code_spans::try_parse_code_span;
105use super::escapes::{EscapeType, try_parse_escape};
106use super::inline_footnotes::{try_parse_footnote_reference, try_parse_inline_footnote};
107use super::inline_html::try_parse_inline_html;
108use super::links::{
109    LinkScanContext, try_parse_autolink, try_parse_inline_image, try_parse_inline_link,
110    try_parse_reference_image, try_parse_reference_link,
111};
112use super::math::{
113    try_parse_display_math, try_parse_double_backslash_display_math,
114    try_parse_double_backslash_inline_math, try_parse_gfm_inline_math, try_parse_inline_math,
115    try_parse_single_backslash_display_math, try_parse_single_backslash_inline_math,
116};
117use super::native_spans::try_parse_native_span;
118
119/// One event in the inline IR.
120///
121/// Events partition the source byte range covered by the IR exactly: their
122/// `range()` values are contiguous and non-overlapping, so concatenating
123/// them reproduces the original input. This is the losslessness invariant
124/// the emission pass relies on.
125#[derive(Debug, Clone)]
126pub enum IrEvent {
127    /// Plain text byte span. Emitted as a single `TEXT` token, possibly
128    /// merged with adjacent literal-disposition delim/bracket bytes.
129    Text { start: usize, end: usize },
130
131    /// An opaque higher-precedence construct (escape, code span, autolink,
132    /// raw HTML). The emission pass re-parses these from the source byte
133    /// range using the existing per-construct emitters; we don't store a
134    /// pre-built `GreenNode` because `rowan::GreenNodeBuilder` doesn't
135    /// support inserting subtrees directly. The byte range is what makes
136    /// emission well-defined — the construct kind is recovered by the
137    /// emitter dispatching on the leading byte.
138    Construct {
139        start: usize,
140        end: usize,
141        kind: ConstructKind,
142    },
143
144    /// A `*` or `_` delimiter run. The `matches` vec is filled in by
145    /// [`process_emphasis`]; before that pass it is empty.
146    DelimRun {
147        ch: u8,
148        start: usize,
149        end: usize,
150        can_open: bool,
151        can_close: bool,
152        /// Matched fragments produced by `process_emphasis`. Each entry
153        /// is one `(byte_offset_within_run, len, partner_event_idx,
154        /// partner_byte_offset, kind, is_opener)` tuple. Empty until the
155        /// pass runs; possibly multiple entries when a single run matches
156        /// at multiple positions (e.g. a 4-run that closes 2+2 pairs).
157        matches: Vec<DelimMatch>,
158    },
159
160    /// `[` or `![` bracket marker. Resolved by [`process_brackets`].
161    OpenBracket {
162        start: usize,
163        /// `start + 1` for `[`, `start + 2` for `![`.
164        end: usize,
165        is_image: bool,
166        /// True until a later resolution rule deactivates this opener.
167        active: bool,
168        /// Filled in when the matching `CloseBracket` resolves the pair
169        /// to a link / image.
170        resolution: Option<BracketResolution>,
171        /// Pandoc-only: extents of an unresolved bracket-shape pattern
172        /// (full reference / collapsed / shortcut whose label doesn't
173        /// match a refdef). Mutually exclusive with `resolution:
174        /// Some(...)`. When `Some`, emission wraps `[start, end)` in
175        /// an `UNRESOLVED_REFERENCE` node so downstream tools can
176        /// attach behavior to the bracket-shape pattern. Always
177        /// `None` under `Dialect::CommonMark`.
178        unresolved_ref: Option<UnresolvedRefShape>,
179    },
180
181    /// `]` bracket marker. Resolved by [`process_brackets`].
182    CloseBracket {
183        pos: usize,
184        /// True if this `]` was paired with an opener and the pair was
185        /// turned into a link / image.
186        matched: bool,
187    },
188
189    /// A soft line break (a `\n` or `\r\n` ending a paragraph-internal
190    /// line). Includes the line-ending bytes verbatim.
191    SoftBreak { start: usize, end: usize },
192
193    /// A hard line break (`  \n` / `\\\n` / `   \n` etc.). Includes any
194    /// trailing-space bytes plus the line ending.
195    HardBreak { start: usize, end: usize },
196}
197
198impl IrEvent {
199    /// The source byte range this event covers.
200    pub fn range(&self) -> (usize, usize) {
201        match self {
202            IrEvent::Text { start, end }
203            | IrEvent::Construct { start, end, .. }
204            | IrEvent::DelimRun { start, end, .. }
205            | IrEvent::OpenBracket { start, end, .. }
206            | IrEvent::SoftBreak { start, end }
207            | IrEvent::HardBreak { start, end } => (*start, *end),
208            IrEvent::CloseBracket { pos, .. } => (*pos, *pos + 1),
209        }
210    }
211}
212
213/// Categorical tag for a [`IrEvent::Construct`] event so emission knows
214/// which parser to call to rebuild the CST subtree.
215#[derive(Debug, Clone, Copy, PartialEq, Eq)]
216pub enum ConstructKind {
217    /// `\X` literal-character escape (CommonMark §2.4).
218    Escape,
219    /// `` `code` `` span (§6.1).
220    CodeSpan,
221    /// `<scheme://...>` or `<email@host>` (§6.5).
222    Autolink,
223    /// `<tag ...>` and friends (§6.6).
224    InlineHtml,
225    /// Pandoc opaque construct that doesn't have a dedicated kind yet
226    /// (currently: math spans). Pre-recognised in `build_ir` under
227    /// `Dialect::Pandoc` solely so the emphasis pass treats the entire
228    /// construct as opaque and delim runs inside don't cross its
229    /// boundary. Emission re-parses the construct via the dispatcher's
230    /// existing `try_parse_*` chain.
231    PandocOpaque,
232    /// Pandoc inline footnote `^[note text]`. Recognised in `build_ir`
233    /// under `Dialect::Pandoc` and consumed by the emission walk via
234    /// the IR's `ConstructPlan`. The dispatcher's legacy `^[` branch
235    /// is gated to CommonMark dialect only.
236    InlineFootnote,
237    /// Pandoc native span `<span ...>...</span>`. Recognised in
238    /// `build_ir` under `Dialect::Pandoc` and consumed by the emission
239    /// walk via the IR's `ConstructPlan`. The dispatcher's legacy
240    /// `<span>` branch is gated to CommonMark dialect only.
241    NativeSpan,
242    /// Pandoc footnote reference `[^id]`. Recognised in `build_ir`
243    /// under `Dialect::Pandoc` and consumed by the emission walk via
244    /// the IR's `ConstructPlan`. The dispatcher's legacy `[^id]`
245    /// branch is gated to CommonMark dialect only.
246    FootnoteReference,
247    /// Pandoc bracketed citation `[@key]`, `[see @key, p. 1]`,
248    /// `[@a; @b]`. Recognised in `build_ir` under `Dialect::Pandoc`
249    /// and consumed by the emission walk via the IR's `ConstructPlan`.
250    /// The dispatcher's legacy `[@cite]` branch is gated to CommonMark
251    /// dialect only.
252    BracketedCitation,
253    /// Pandoc bare citation `@key` or `-@key` (author-in-text /
254    /// suppress-author). Recognised in `build_ir` under
255    /// `Dialect::Pandoc` and consumed by the emission walk via the
256    /// IR's `ConstructPlan`. The dispatcher's legacy `@` and `-@`
257    /// branches are gated to CommonMark dialect only.
258    BareCitation,
259    /// Pandoc bracketed span `[content]{attrs}`. Recognised in
260    /// `build_ir` under `Dialect::Pandoc` and consumed by the emission
261    /// walk via the IR's `ConstructPlan`. The dispatcher's legacy
262    /// `[text]{attrs}` branch is gated to CommonMark dialect only.
263    BracketedSpan,
264    /// Pandoc wikilink `[[url]]` / `[[url|title]]` / `![[url]]` /
265    /// `![[url|title]]`. Recognised in `build_ir` when either
266    /// `wikilinks_title_after_pipe` or `wikilinks_title_before_pipe` is
267    /// enabled. Dialect-agnostic (pandoc accepts the extension on both
268    /// `markdown+` and `commonmark+`). The emission walk dispatches via
269    /// the IR's `ConstructPlan`; the `is_image` variant is recovered by
270    /// peeking the leading byte of the source range.
271    WikiLink,
272}
273
274/// One matched fragment within a [`IrEvent::DelimRun`].
275#[derive(Debug, Clone, Copy)]
276pub struct DelimMatch {
277    /// Byte offset of this fragment relative to the run's `start`.
278    pub offset_in_run: u8,
279    /// Number of bytes in this fragment (1 or 2).
280    pub len: u8,
281    /// Whether this fragment is the opener (`true`) or closer of the pair.
282    pub is_opener: bool,
283    /// IR event index of the partner run.
284    pub partner_event: u32,
285    /// Byte offset within the partner run of the partner fragment.
286    pub partner_offset: u8,
287    /// Emphasis kind (Emph for `len == 1`, Strong for `len == 2`).
288    pub kind: EmphasisKind,
289}
290
291/// Pandoc-only: extents of an unresolved bracket-shape reference
292/// pattern. Recorded on `IrEvent::OpenBracket.unresolved_ref` when the
293/// no-resolution fall-through fires under `Dialect::Pandoc`.
294#[derive(Debug, Clone, Copy, PartialEq, Eq)]
295pub struct UnresolvedRefShape {
296    /// IR event index of the matching `CloseBracket`. Used by the
297    /// scoped-emphasis pass to treat the wrapper as a tree boundary.
298    pub close_event: u32,
299    /// One past the end of the inner text (the byte position of the
300    /// outer `]`). Combined with the opener's `end` field, this is the
301    /// inner text range that goes through normal inline parsing.
302    pub text_end: usize,
303    /// One past the end of the full bracket-shape pattern. For
304    /// shortcut form `[text]`: `close_pos + 1`. For collapsed
305    /// `[text][]`: `close_pos + 3`. For full `[text][label]`: the byte
306    /// after the closing `]` of `[label]`.
307    pub end: usize,
308}
309
310/// Successful bracket resolution: the `[`...`]` pair is a link or image.
311#[derive(Debug, Clone)]
312pub struct BracketResolution {
313    /// IR event index of the matching `CloseBracket`.
314    pub close_event: u32,
315    /// Source range of the link text (between `[`/`![` and `]`).
316    pub text_start: usize,
317    pub text_end: usize,
318    /// Source range of the link suffix (`(...)`, `[label]`, `[]`, or
319    /// empty for shortcut). When `kind == ShortcutReference`,
320    /// `suffix_start == suffix_end == close_pos + 1`.
321    pub suffix_start: usize,
322    pub suffix_end: usize,
323    pub kind: LinkKind,
324}
325
326/// What kind of link/image we resolved a bracket pair to.
327#[derive(Debug, Clone)]
328pub enum LinkKind {
329    /// `[text](dest)` or `[text](dest "title")`.
330    Inline { dest: String, title: Option<String> },
331    /// `[text][label]` — explicit reference.
332    FullReference { label: String },
333    /// `[text][]` — collapsed reference. Label is the link text.
334    CollapsedReference,
335    /// `[text]` — shortcut reference. Label is the link text.
336    ShortcutReference,
337}
338
339// ============================================================================
340// Pass 1: Scan
341// ============================================================================
342
343/// Scan `text[start..end]` once, producing a flat IR of events.
344///
345/// The scan is forward-only and never backtracks: each iteration either
346/// consumes a known construct (escape, code span, autolink, raw HTML),
347/// records a delim run / bracket marker / line break, or steps past a
348/// single UTF-8 boundary as plain text. Adjacent text bytes are coalesced
349/// into a single [`IrEvent::Text`] event by the run-flush step.
350pub fn build_ir(text: &str, start: usize, end: usize, config: &ParserOptions) -> Vec<IrEvent> {
351    let mut events = Vec::new();
352    build_ir_into(text, start, end, config, &mut events);
353    events
354}
355
356/// Like [`build_ir`] but writes into a caller-provided `Vec<IrEvent>`,
357/// clearing it first. Used by [`build_full_plans`] to amortise the
358/// per-call allocation through a thread-local scratch pool.
359pub(super) fn build_ir_into(
360    text: &str,
361    start: usize,
362    end: usize,
363    config: &ParserOptions,
364    events: &mut Vec<IrEvent>,
365) {
366    events.clear();
367    let bytes = text.as_bytes();
368    let exts = &config.extensions;
369    let is_commonmark = config.dialect == crate::options::Dialect::CommonMark;
370
371    let mut pos = start;
372    let mut text_run_start = start;
373    // Pandoc-only: extent of the current bracket-shape link/image's
374    // opaque range. While `pos < pandoc_bracket_extent`, autolinks /
375    // raw HTML / native spans are NOT recognised — pandoc-native
376    // treats `[link text]` as opaque to those constructs (CommonMark
377    // spec example #526 / #538). The lookahead at `[`/`![` sets this
378    // when a bracket-shape forms a valid link/image; once `pos`
379    // passes the extent, normal scanning resumes. CommonMark
380    // dialect's link-text-vs-autolink ordering is handled by the
381    // dispatcher's `try_parse_inline_link` rejecting outer matches
382    // when the link text contains a valid autolink (a different
383    // mechanism, see `LinkScanContext.skip_autolinks`).
384    let mut pandoc_bracket_extent: usize = 0;
385
386    // Pre-computed byte mask: `mask[b]` is `true` iff byte `b` could
387    // start any IR-recognised construct under the current dialect /
388    // extensions. Used to bulk-skip plain bytes between structural
389    // bytes — the per-byte branch chain below only runs at positions
390    // where a construct is actually possible. Non-ASCII bytes
391    // (>= 0x80) are never structural and are skipped together with
392    // ASCII plain text.
393    let mask = build_ir_byte_mask(config);
394
395    macro_rules! flush_text {
396        () => {
397            if pos > text_run_start {
398                events.push(IrEvent::Text {
399                    start: text_run_start,
400                    end: pos,
401                });
402            }
403        };
404    }
405
406    while pos < end {
407        // Fast-skip plain bytes. `text_run_start` is preserved across
408        // the skip so the next structural-event flush picks them up.
409        while pos < end && !mask[bytes[pos] as usize] {
410            pos += 1;
411        }
412        if pos >= end {
413            break;
414        }
415        let b = bytes[pos];
416
417        // Pandoc-only: at `[` or `![`, look ahead to see if this
418        // bracket-shape forms a valid link/image. If so, suppress
419        // autolink / raw HTML / native span recognition until `pos`
420        // passes the bracket-shape's end. Skipped if we're already
421        // inside an enclosing bracket-shape's opaque range.
422        if !is_commonmark
423            && pos >= pandoc_bracket_extent
424            && (b == b'[' || (b == b'!' && pos + 1 < end && bytes[pos + 1] == b'['))
425            && let Some(len) = try_pandoc_bracket_link_extent(text, pos, end, config)
426        {
427            pandoc_bracket_extent = pos + len;
428        }
429        let in_pandoc_bracket = !is_commonmark && pos < pandoc_bracket_extent;
430
431        // Backslash escape (§2.4) — including `\\\n` hard line break.
432        if b == b'\\'
433            && let Some((len, _ch, escape_type)) = try_parse_escape(&text[pos..])
434            && pos + len <= end
435        {
436            let enabled = match escape_type {
437                EscapeType::Literal => is_commonmark || exts.all_symbols_escapable,
438                EscapeType::HardLineBreak => exts.escaped_line_breaks,
439                EscapeType::NonbreakingSpace => exts.all_symbols_escapable,
440            };
441            if enabled {
442                flush_text!();
443                let kind = match escape_type {
444                    EscapeType::HardLineBreak => {
445                        events.push(IrEvent::HardBreak {
446                            start: pos,
447                            end: pos + len,
448                        });
449                        pos += len;
450                        text_run_start = pos;
451                        continue;
452                    }
453                    EscapeType::Literal | EscapeType::NonbreakingSpace => ConstructKind::Escape,
454                };
455                events.push(IrEvent::Construct {
456                    start: pos,
457                    end: pos + len,
458                    kind,
459                });
460                pos += len;
461                text_run_start = pos;
462                continue;
463            }
464        }
465
466        // Code span (§6.1) — opaque to emphasis and brackets.
467        if b == b'`'
468            && let Some((len, _, _, _)) = try_parse_code_span(&text[pos..])
469            && pos + len <= end
470        {
471            flush_text!();
472            events.push(IrEvent::Construct {
473                start: pos,
474                end: pos + len,
475                kind: ConstructKind::CodeSpan,
476            });
477            pos += len;
478            text_run_start = pos;
479            continue;
480        }
481
482        // Pandoc-only: math spans are opaque to emphasis. The legacy
483        // `parse_until_closer_with_nested_*` skip-list includes inline
484        // math; without recognising it here, delim runs inside `$math$`
485        // would be picked up by the emphasis pass and break losslessness
486        // (the dispatcher's math parser would later re-claim the bytes,
487        // duplicating content).
488        if !is_commonmark && let Some(len) = try_pandoc_math_opaque(text, pos, end, config) {
489            flush_text!();
490            events.push(IrEvent::Construct {
491                start: pos,
492                end: pos + len,
493                kind: ConstructKind::PandocOpaque,
494            });
495            pos += len;
496            text_run_start = pos;
497            continue;
498        }
499
500        // Pandoc-only: native span `<span ...>...</span>`. Must come
501        // before the generic autolink/raw-html branches so the open tag
502        // doesn't get claimed as inline HTML. Span content is opaque to
503        // the emphasis pass; emission consumes the event via the IR's
504        // `ConstructPlan`. Suppressed inside Pandoc bracket-shape
505        // link/image text.
506        if !is_commonmark
507            && !in_pandoc_bracket
508            && b == b'<'
509            && exts.native_spans
510            && let Some((len, _, _)) = try_parse_native_span(&text[pos..])
511            && pos + len <= end
512        {
513            flush_text!();
514            events.push(IrEvent::Construct {
515                start: pos,
516                end: pos + len,
517                kind: ConstructKind::NativeSpan,
518            });
519            pos += len;
520            text_run_start = pos;
521            continue;
522        }
523
524        // Autolink (§6.5) before raw HTML — autolinks are the more
525        // specific shape inside `<...>`. Both are suppressed inside
526        // Pandoc bracket-shape link/image text (pandoc-native treats
527        // link text as opaque to autolinks and raw HTML).
528        if b == b'<' && !in_pandoc_bracket {
529            if exts.autolinks
530                && let Some((len, _)) = try_parse_autolink(&text[pos..], is_commonmark)
531                && pos + len <= end
532            {
533                flush_text!();
534                events.push(IrEvent::Construct {
535                    start: pos,
536                    end: pos + len,
537                    kind: ConstructKind::Autolink,
538                });
539                pos += len;
540                text_run_start = pos;
541                continue;
542            }
543            if exts.raw_html
544                && let Some(len) = try_parse_inline_html(&text[pos..], config.dialect)
545                && pos + len <= end
546            {
547                flush_text!();
548                events.push(IrEvent::Construct {
549                    start: pos,
550                    end: pos + len,
551                    kind: ConstructKind::InlineHtml,
552                });
553                pos += len;
554                text_run_start = pos;
555                continue;
556            }
557        }
558
559        // Pandoc-only: inline footnote `^[note]`. Recognized at scan
560        // time so the emphasis pass treats it as opaque (delim runs
561        // inside the footnote can't pair with delim runs outside).
562        if !is_commonmark
563            && b == b'^'
564            && exts.inline_footnotes
565            && let Some((len, _)) = try_parse_inline_footnote(&text[pos..])
566            && pos + len <= end
567        {
568            flush_text!();
569            events.push(IrEvent::Construct {
570                start: pos,
571                end: pos + len,
572                kind: ConstructKind::InlineFootnote,
573            });
574            pos += len;
575            text_run_start = pos;
576            continue;
577        }
578
579        // Pandoc-only: footnote reference `[^id]`. Recognised at scan
580        // time so the emphasis pass treats it as opaque (delim runs
581        // inside the label can't pair with delim runs outside) and the
582        // emission walk dispatches it directly via the IR's
583        // `ConstructPlan`. Must come before the generic bracket-opaque
584        // scan so the dedicated kind wins.
585        if !is_commonmark
586            && b == b'['
587            && pos + 1 < end
588            && bytes[pos + 1] == b'^'
589            && exts.footnotes
590            && let Some((len, _)) = try_parse_footnote_reference(&text[pos..])
591            && pos + len <= end
592        {
593            flush_text!();
594            events.push(IrEvent::Construct {
595                start: pos,
596                end: pos + len,
597                kind: ConstructKind::FootnoteReference,
598            });
599            pos += len;
600            text_run_start = pos;
601            continue;
602        }
603
604        // Pandoc-only: bracketed citation `[@cite]`. Recognised at
605        // scan time so the emphasis pass treats it as opaque (delim
606        // runs inside the citation can't pair with delim runs outside)
607        // and the emission walk dispatches it directly via the IR's
608        // `ConstructPlan`. Must come before the generic bracket-opaque
609        // scan so the dedicated kind wins.
610        if !is_commonmark
611            && b == b'['
612            && exts.citations
613            && let Some((len, _)) = try_parse_bracketed_citation(&text[pos..])
614            && pos + len <= end
615        {
616            flush_text!();
617            events.push(IrEvent::Construct {
618                start: pos,
619                end: pos + len,
620                kind: ConstructKind::BracketedCitation,
621            });
622            pos += len;
623            text_run_start = pos;
624            continue;
625        }
626
627        // Pandoc-only: bare citation `@key` or `-@key`. Recognised at
628        // scan time so the emission walk dispatches it directly via
629        // the IR's `ConstructPlan`. Bare citations don't contain
630        // emphasis-eligible content, so opacity is moot here — IR
631        // participation is only for dispatch consolidation.
632        if !is_commonmark
633            && (b == b'@' || (b == b'-' && pos + 1 < end && bytes[pos + 1] == b'@'))
634            && (exts.citations || exts.quarto_crossrefs)
635            && let Some((len, _, _)) = try_parse_bare_citation(&text[pos..])
636            && pos + len <= end
637        {
638            flush_text!();
639            events.push(IrEvent::Construct {
640                start: pos,
641                end: pos + len,
642                kind: ConstructKind::BareCitation,
643            });
644            pos += len;
645            text_run_start = pos;
646            continue;
647        }
648
649        // Pandoc-only: bracketed span `[content]{attrs}`. Recognised
650        // at scan time so the emphasis pass treats it as opaque (delim
651        // runs inside the span content can't pair with delim runs
652        // outside) and the emission walk dispatches it directly via
653        // the IR's `ConstructPlan`. Must come before the generic
654        // bracket-opaque scan so the dedicated kind wins.
655        // `try_parse_bracketed_span` requires `]` to be immediately
656        // followed by `{`, so this never shadows inline links
657        // (`[text](url)`) or reference links (`[label][refdef]`) —
658        // those don't have the `{attrs}` suffix.
659        if !is_commonmark
660            && b == b'['
661            && exts.bracketed_spans
662            && let Some((len, _, _)) = try_parse_bracketed_span(&text[pos..])
663            && pos + len <= end
664        {
665            flush_text!();
666            events.push(IrEvent::Construct {
667                start: pos,
668                end: pos + len,
669                kind: ConstructKind::BracketedSpan,
670            });
671            pos += len;
672            text_run_start = pos;
673            continue;
674        }
675
676        // Wikilinks `[[url]]`, `[[url|title]]`, `![[url]]`,
677        // `![[url|title]]`. Recognised on either pipe-order extension
678        // and on both dialects (pandoc accepts the extension under both
679        // `markdown+` and `commonmark+`). Must precede the `![` and `[`
680        // bracket scans below so the wikilink shape wins over an
681        // image-bracket or link-bracket open.
682        if (b == b'[' || b == b'!')
683            && super::wikilinks::any_enabled(config)
684            && let Some(span) = super::wikilinks::try_parse_wikilink(text, pos, config)
685            && span.end <= end
686        {
687            flush_text!();
688            events.push(IrEvent::Construct {
689                start: span.start,
690                end: span.end,
691                kind: ConstructKind::WikiLink,
692            });
693            pos = span.end;
694            text_run_start = pos;
695            continue;
696        }
697
698        // `![` opens an image bracket. Recognised whenever any
699        // image-producing extension is on — `inline_images` for the
700        // `![alt](url)` form, or `reference_links` for the
701        // `![alt][label]` reference-image form (e.g. MultiMarkdown
702        // disables `inline_images` but uses reference images).
703        if b == b'!'
704            && pos + 1 < end
705            && bytes[pos + 1] == b'['
706            && (exts.inline_images || exts.reference_links)
707        {
708            flush_text!();
709            events.push(IrEvent::OpenBracket {
710                start: pos,
711                end: pos + 2,
712                is_image: true,
713                active: true,
714                resolution: None,
715                unresolved_ref: None,
716            });
717            pos += 2;
718            text_run_start = pos;
719            continue;
720        }
721
722        // `[` opens a link bracket. Recognised whenever any
723        // link-producing extension is on — `inline_links` for
724        // `[text](url)`, or `reference_links` for `[text][label]` /
725        // `[text]` shortcut form.
726        if b == b'[' && (exts.inline_links || exts.reference_links) {
727            flush_text!();
728            events.push(IrEvent::OpenBracket {
729                start: pos,
730                end: pos + 1,
731                is_image: false,
732                active: true,
733                resolution: None,
734                unresolved_ref: None,
735            });
736            pos += 1;
737            text_run_start = pos;
738            continue;
739        }
740
741        // `]` closes a link/image bracket.
742        if b == b']' {
743            flush_text!();
744            events.push(IrEvent::CloseBracket {
745                pos,
746                matched: false,
747            });
748            pos += 1;
749            text_run_start = pos;
750            continue;
751        }
752
753        // `*` or `_` delimiter run.
754        if b == b'*' || b == b'_' {
755            flush_text!();
756            let mut run_end = pos;
757            while run_end < end && bytes[run_end] == b {
758                run_end += 1;
759            }
760            let count = run_end - pos;
761            let (can_open, can_close) = compute_flanking(text, pos, count, b, config.dialect);
762            events.push(IrEvent::DelimRun {
763                ch: b,
764                start: pos,
765                end: run_end,
766                can_open,
767                can_close,
768                matches: Vec::new(),
769            });
770            pos = run_end;
771            text_run_start = pos;
772            continue;
773        }
774
775        // Hard line break: 2+ trailing spaces before newline. We detect
776        // this when we're sitting on a `\n` (or `\r\n`) and the preceding
777        // bytes within the current text run are spaces.
778        if b == b'\n' || (b == b'\r' && pos + 1 < end && bytes[pos + 1] == b'\n') {
779            // Count trailing spaces in the text accumulated so far.
780            let nl_len = if b == b'\r' { 2 } else { 1 };
781            let mut trailing_spaces = 0;
782            let mut s = pos;
783            while s > text_run_start && bytes[s - 1] == b' ' {
784                trailing_spaces += 1;
785                s -= 1;
786            }
787            if trailing_spaces >= 2 {
788                // Flush text *before* the trailing spaces.
789                if s > text_run_start {
790                    events.push(IrEvent::Text {
791                        start: text_run_start,
792                        end: s,
793                    });
794                }
795                events.push(IrEvent::HardBreak {
796                    start: s,
797                    end: pos + nl_len,
798                });
799                pos += nl_len;
800                text_run_start = pos;
801                continue;
802            }
803
804            // Soft line break: flush preceding text, emit the line ending
805            // as its own event so the emitter can render `NEWLINE` tokens
806            // verbatim.
807            flush_text!();
808            events.push(IrEvent::SoftBreak {
809                start: pos,
810                end: pos + nl_len,
811            });
812            pos += nl_len;
813            text_run_start = pos;
814            continue;
815        }
816
817        // Plain byte — advance one UTF-8 char.
818        let ch_len = text[pos..]
819            .chars()
820            .next()
821            .map_or(1, std::primitive::char::len_utf8);
822        pos += ch_len.max(1);
823    }
824
825    flush_text!();
826}
827
828/// Build a 256-entry mask: `mask[b]` is `true` iff byte `b` could start
829/// any IR-recognised construct under the current dialect / extensions.
830///
831/// This is the build-IR-specific superset of "is this byte interesting".
832/// Plain bytes between structural bytes are bulk-skipped via this mask
833/// in the [`build_ir`] hot loop; missing a byte here is a correctness
834/// bug (we'd skip past a real construct), but having extras only costs
835/// us a wasted branch round-trip.
836fn build_ir_byte_mask(config: &ParserOptions) -> [bool; 256] {
837    let mut mask = [false; 256];
838    let exts = &config.extensions;
839    let is_commonmark = config.dialect == crate::options::Dialect::CommonMark;
840
841    // Always structural for IR scanning:
842    //   `\n` / `\r` — soft / hard breaks
843    //   `\\`        — escape, hard line break, backslash math
844    //   `` ` ``     — code span (IR construct)
845    //   `*` / `_`   — emphasis delim runs (IR core)
846    mask[b'\n' as usize] = true;
847    mask[b'\r' as usize] = true;
848    mask[b'\\' as usize] = true;
849    mask[b'`' as usize] = true;
850    mask[b'*' as usize] = true;
851    mask[b'_' as usize] = true;
852
853    // Brackets: scanned whenever any bracket-shaped construct is
854    // reachable. `]` is structural unconditionally if `[` is — the IR
855    // emits a CloseBracket event regardless of which opener variant
856    // matches. `!` is gated on image-producing extensions; the leading
857    // `!` of `![alt]` is the only image entry point.
858    if exts.inline_links
859        || exts.reference_links
860        || exts.inline_images
861        || exts.bracketed_spans
862        || exts.footnotes
863        || exts.citations
864    {
865        mask[b'[' as usize] = true;
866        mask[b']' as usize] = true;
867    }
868    if exts.inline_images || exts.reference_links {
869        mask[b'!' as usize] = true;
870    }
871
872    // `<` covers autolinks, raw HTML, and Pandoc native spans.
873    if exts.autolinks || exts.raw_html || (!is_commonmark && exts.native_spans) {
874        mask[b'<' as usize] = true;
875    }
876
877    // `^` covers Pandoc inline footnotes (`^[...]` recognised in IR
878    // under Pandoc dialect). CM dialect inline footnotes go through
879    // the dispatcher, not the IR.
880    if !is_commonmark && exts.inline_footnotes {
881        mask[b'^' as usize] = true;
882    }
883
884    // `@` covers Pandoc bare citation `@key` and `[@cite]`. The leading
885    // `[` of `[@cite]` is already in the mask via the bracket gate;
886    // gating `@` here also covers the bare-citation form.
887    if !is_commonmark && (exts.citations || exts.quarto_crossrefs) {
888        mask[b'@' as usize] = true;
889        // `-` only matters as the first byte of `-@cite`. Tracking it
890        // here avoids missing the suppress-author bare citation form.
891        mask[b'-' as usize] = true;
892    }
893
894    // `$` covers Pandoc dollar / GFM math. CM doesn't recognise math
895    // in `build_ir`.
896    if !is_commonmark
897        && (exts.tex_math_dollars
898            || exts.tex_math_gfm
899            || exts.tex_math_single_backslash
900            || exts.tex_math_double_backslash)
901    {
902        mask[b'$' as usize] = true;
903    }
904
905    mask
906}
907
908// ============================================================================
909// Flanking (CommonMark §6.2)
910// ============================================================================
911
912fn compute_flanking(
913    text: &str,
914    pos: usize,
915    count: usize,
916    ch: u8,
917    dialect: crate::options::Dialect,
918) -> (bool, bool) {
919    if dialect == crate::options::Dialect::Pandoc {
920        // Pandoc-markdown's recursive-descent emphasis parser does NOT
921        // apply CommonMark §6.2 flanking rules. Instead it gates on:
922        //   - opener: must not be followed by whitespace (Pandoc
923        //     `try_parse_emphasis` line 247 in legacy core.rs).
924        //   - closer: no flanking gate at all (Pandoc-markdown's
925        //     `ender` parser only counts characters; see Markdown.hs
926        //     in pandoc/src/Text/Pandoc/Readers/Markdown.hs).
927        //   - underscore intraword hard rule: `_` adjacent to an
928        //     alphanumeric on either side cannot open / close
929        //     (Pandoc's `intraword_underscores` extension default).
930        let prev_char = (pos > 0).then(|| text[..pos].chars().last()).flatten();
931        let next_char = text.get(pos + count..).and_then(|s| s.chars().next());
932        let followed_by_ws = next_char.is_none_or(|c| c.is_whitespace());
933
934        let mut can_open = !followed_by_ws;
935        // Pandoc-markdown's `ender` (in pandoc/Readers/Markdown.hs)
936        // has no flanking restriction on closers — just a count match.
937        // Set can_close unconditionally; the per-pair match logic in
938        // `process_emphasis_in_range_filtered` constrains pairing via
939        // the equal-count rule.
940        let mut can_close = true;
941
942        if ch == b'_' {
943            let prev_is_alnum = prev_char.is_some_and(|c| c.is_alphanumeric());
944            let next_is_alnum = next_char.is_some_and(|c| c.is_alphanumeric());
945            if prev_is_alnum {
946                can_open = false;
947            }
948            if next_is_alnum {
949                can_close = false;
950            }
951        }
952
953        return (can_open, can_close);
954    }
955
956    // CommonMark §6.2 flanking.
957    let lf = is_left_flanking(text, pos, count);
958    let rf = is_right_flanking(text, pos, count);
959    if ch == b'*' {
960        (lf, rf)
961    } else {
962        let prev_char = (pos > 0).then(|| text[..pos].chars().last()).flatten();
963        let next_char = text.get(pos + count..).and_then(|s| s.chars().next());
964        let preceded_by_punct = prev_char.is_some_and(is_unicode_punct_or_symbol);
965        let followed_by_punct = next_char.is_some_and(is_unicode_punct_or_symbol);
966        let can_open = lf && (!rf || preceded_by_punct);
967        let can_close = rf && (!lf || followed_by_punct);
968        (can_open, can_close)
969    }
970}
971
972/// Pandoc-only: identify a math span starting at `pos` and return its
973/// byte length. Tries `$math$` and `$$display$$` (gated on
974/// `tex_math_dollars`), GFM `$math$` (gated on `tex_math_gfm`), and the
975/// `\(math\)` / `\[math\]` / `\\(math\\)` / `\\[math\\]` backslash
976/// forms (gated on `tex_math_single_backslash` / `_double_backslash`).
977/// Math content is opaque to emphasis: `$a * b$` must not produce an
978/// emphasis closer at the inner `*`.
979fn try_pandoc_math_opaque(
980    text: &str,
981    pos: usize,
982    end: usize,
983    config: &ParserOptions,
984) -> Option<usize> {
985    let bytes = text.as_bytes();
986    let exts = &config.extensions;
987    let b = bytes[pos];
988
989    if exts.tex_math_dollars && b == b'$' {
990        if let Some((len, _)) = try_parse_display_math(&text[pos..])
991            && pos + len <= end
992        {
993            return Some(len);
994        }
995        if let Some((len, _)) = try_parse_inline_math(&text[pos..])
996            && pos + len <= end
997        {
998            return Some(len);
999        }
1000    }
1001    if exts.tex_math_gfm
1002        && b == b'$'
1003        && let Some((len, _)) = try_parse_gfm_inline_math(&text[pos..])
1004        && pos + len <= end
1005    {
1006        return Some(len);
1007    }
1008    if exts.tex_math_double_backslash && b == b'\\' {
1009        if let Some((len, _)) = try_parse_double_backslash_display_math(&text[pos..])
1010            && pos + len <= end
1011        {
1012            return Some(len);
1013        }
1014        if let Some((len, _)) = try_parse_double_backslash_inline_math(&text[pos..])
1015            && pos + len <= end
1016        {
1017            return Some(len);
1018        }
1019    }
1020    if exts.tex_math_single_backslash && b == b'\\' {
1021        if let Some((len, _)) = try_parse_single_backslash_display_math(&text[pos..])
1022            && pos + len <= end
1023        {
1024            return Some(len);
1025        }
1026        if let Some((len, _)) = try_parse_single_backslash_inline_math(&text[pos..])
1027            && pos + len <= end
1028        {
1029            return Some(len);
1030        }
1031    }
1032    None
1033}
1034
1035/// Pandoc-only: identify a bracket-shaped opaque construct starting at
1036/// `pos` and return its byte length. Tries the dispatcher's precedence
1037/// order:
1038///   1. `![alt](dest)` inline image
1039///   2. `![alt][ref]` / `![alt]` reference image (shape-only opacity)
1040///   3. `[^id]` footnote reference
1041///   4. `[text](dest)` inline link
1042///   5. `[text][ref]` / `[text]` reference link (shape-only opacity)
1043///   6. `[@cite]` bracketed citation
1044///   7. `[text]{attrs}` bracketed span
1045///
1046/// Returns `None` if the bytes at `pos` don't open any recognised Pandoc
1047/// bracket-shaped construct. In that case the scanner falls through to
1048/// the generic `OpenBracket`/`CloseBracket` emission and the dispatcher
1049/// emits the bracket bytes as literal text (or as plain emphasis if the
1050/// pattern matches an opener).
1051/// Lookahead helper: at a `[` or `![` byte under Pandoc dialect, return
1052/// the total byte length of the bracket-shape link/image if it forms a
1053/// valid one, else `None`. Used by `build_ir` to suppress autolink /
1054/// raw HTML / native span recognition inside Pandoc link text —
1055/// pandoc-native treats link text as opaque to those constructs
1056/// (CommonMark spec example #526 / #538 differs). Mirrors the
1057/// dispatcher's `try_parse_*` precedence so the lookahead, the IR's
1058/// `process_brackets` resolution, and the dispatcher's emission agree
1059/// on the bracket-shape's byte boundaries.
1060fn try_pandoc_bracket_link_extent(
1061    text: &str,
1062    pos: usize,
1063    end: usize,
1064    config: &ParserOptions,
1065) -> Option<usize> {
1066    let bytes = text.as_bytes();
1067    let exts = &config.extensions;
1068    let ctx = LinkScanContext::from_options(config);
1069    let allow_shortcut = exts.shortcut_reference_links;
1070
1071    // `![...]` images.
1072    if bytes[pos] == b'!' {
1073        if pos + 1 >= end || bytes[pos + 1] != b'[' {
1074            return None;
1075        }
1076        if exts.inline_images
1077            && let Some((len, _, _, _)) = try_parse_inline_image(&text[pos..], ctx)
1078            && pos + len <= end
1079        {
1080            return Some(len);
1081        }
1082        if exts.reference_links
1083            && let Some((len, _, _, _, _)) =
1084                try_parse_reference_image(&text[pos..], allow_shortcut, exts.spaced_reference_links)
1085            && pos + len <= end
1086        {
1087            return Some(len);
1088        }
1089        return None;
1090    }
1091
1092    // `[...]` openers — try in dispatcher order. Footnote refs
1093    // (`[^id]`), bracketed citations (`[@cite]`), and bracketed spans
1094    // (`[text]{attrs}`) are recognised by their own dedicated branches
1095    // in `build_ir` and don't need this lookahead.
1096    if exts.inline_links
1097        && let Some((len, _, _, _)) = try_parse_inline_link(&text[pos..], false, ctx)
1098        && pos + len <= end
1099    {
1100        return Some(len);
1101    }
1102    if exts.reference_links
1103        && let Some((len, _, _, _, _)) = try_parse_reference_link(
1104            &text[pos..],
1105            allow_shortcut,
1106            exts.inline_links,
1107            exts.spaced_reference_links,
1108            ctx,
1109        )
1110        && pos + len <= end
1111    {
1112        return Some(len);
1113    }
1114
1115    None
1116}
1117
1118fn is_unicode_punct_or_symbol(c: char) -> bool {
1119    if c.is_ascii() {
1120        c.is_ascii_punctuation()
1121    } else {
1122        !c.is_alphanumeric() && !c.is_whitespace()
1123    }
1124}
1125
1126fn is_left_flanking(text: &str, run_start: usize, run_len: usize) -> bool {
1127    let after = run_start + run_len;
1128    let next_char = text.get(after..).and_then(|s| s.chars().next());
1129    let prev_char = (run_start > 0)
1130        .then(|| text[..run_start].chars().last())
1131        .flatten();
1132
1133    let followed_by_ws = next_char.is_none_or(|c| c.is_whitespace());
1134    if followed_by_ws {
1135        return false;
1136    }
1137    let followed_by_punct = next_char.is_some_and(is_unicode_punct_or_symbol);
1138    if !followed_by_punct {
1139        return true;
1140    }
1141    prev_char.is_none_or(|c| c.is_whitespace() || is_unicode_punct_or_symbol(c))
1142}
1143
1144fn is_right_flanking(text: &str, run_start: usize, run_len: usize) -> bool {
1145    let after = run_start + run_len;
1146    let next_char = text.get(after..).and_then(|s| s.chars().next());
1147    let prev_char = (run_start > 0)
1148        .then(|| text[..run_start].chars().last())
1149        .flatten();
1150
1151    let preceded_by_ws = prev_char.is_none_or(|c| c.is_whitespace());
1152    if preceded_by_ws {
1153        return false;
1154    }
1155    let preceded_by_punct = prev_char.is_some_and(is_unicode_punct_or_symbol);
1156    if !preceded_by_punct {
1157        return true;
1158    }
1159    next_char.is_none_or(|c| c.is_whitespace() || is_unicode_punct_or_symbol(c))
1160}
1161
1162// ============================================================================
1163// Pass 2: Process emphasis (CommonMark §6.2)
1164// ============================================================================
1165
1166/// Run the CommonMark §6.3 `process_emphasis` algorithm over the IR's
1167/// delim runs. Mutates the IR in place: matched runs gain entries in their
1168/// `matches` vec, unmatched bytes stay implicit (the emission pass treats
1169/// any byte not covered by a match as literal text).
1170///
1171/// The algorithm tracks a per-bucket `openers_bottom` exclusive lower
1172/// bound to keep walk-back bounded; consume rules and the §6.2 mod-3
1173/// rejection match the reference implementation.
1174pub fn process_emphasis(events: &mut [IrEvent], dialect: crate::options::Dialect) {
1175    process_emphasis_in_range(events, 0, events.len(), dialect);
1176}
1177
1178/// Range-scoped variant of [`process_emphasis`].
1179///
1180/// Only delim runs whose IR event index lies in `[lo, hi)` are considered.
1181/// Used by [`build_full_plans`] to run emphasis pairing inside each
1182/// resolved bracket pair *before* the global top-level pass, so emphasis
1183/// can never form across a link's bracket boundary (CommonMark §6.3
1184/// requires bracket resolution to happen first when at a `]`, with
1185/// emphasis processed on the link's inner range).
1186///
1187/// The function additionally skips delim runs that already carry a
1188/// recorded match in their `matches` vec — this lets the second
1189/// (top-level) pass reuse the same algorithm without re-pairing bytes
1190/// already consumed by inner-range passes.
1191pub fn process_emphasis_in_range(
1192    events: &mut [IrEvent],
1193    lo: usize,
1194    hi: usize,
1195    dialect: crate::options::Dialect,
1196) {
1197    process_emphasis_in_range_filtered(events, lo, hi, None, dialect);
1198}
1199
1200/// Internal variant of [`process_emphasis_in_range`] with an optional
1201/// exclusion bitmap. Event indices for which `excluded[i] == true` are
1202/// treated as if their delim run were already fully consumed — used by
1203/// [`build_full_plans`] to keep the top-level emphasis pass from pairing
1204/// across a resolved bracket pair's boundary (the inner delim runs of
1205/// such a pair belong to the link's inner range and were already paired
1206/// by the scoped pass).
1207fn process_emphasis_in_range_filtered(
1208    events: &mut [IrEvent],
1209    lo: usize,
1210    hi: usize,
1211    excluded: Option<&[bool]>,
1212    dialect: crate::options::Dialect,
1213) {
1214    let is_commonmark = dialect == crate::options::Dialect::CommonMark;
1215    if is_commonmark {
1216        run_emphasis_pass(events, lo, hi, excluded, dialect, &[], false);
1217        return;
1218    }
1219    // Pandoc dialect: cascade-then-rerun. Run the standard pass, then
1220    // invalidate Emph/Strong pairs whose inner range contains an
1221    // unmatched same-char run with both can_open && can_close (Pandoc's
1222    // recursive descent would have failed those outer pairs because the
1223    // inner content has a stray, ambiguous delimiter the recursive
1224    // parser cannot pair). The invalidated pairs go into a "rejected
1225    // list" that the next iteration of the standard pass consults to
1226    // pick a different opener for the same closer (or reject the
1227    // closer altogether). Iterate to a fixed point.
1228    //
1229    // The rerun (iter 2+) runs in `strict` mode: a candidate pair is
1230    // rejected if its inner range contains an unmatched same-char run
1231    // with count > pair.count. This mirrors pandoc-markdown's
1232    // recursive-descent semantics where, e.g. inside a failed outer
1233    // `**...**` Strong, the inner `one c` parser's `option2`
1234    // (`string [c,c] >> two c mempty`) greedily consumes a stray `**`
1235    // and prevents subsequent `*` runs from pairing as Emph. Without
1236    // this gate, `**foo *bar** baz*` would produce Emph[bar** baz]
1237    // after the outer Strong invalidation, but pandoc treats it as
1238    // all-literal because the inner `**` blocks the Emph match.
1239    let mut rejected: Vec<(usize, usize)> = Vec::new();
1240    let max_iters = events.len().saturating_add(2);
1241    let mut iter = 0;
1242    loop {
1243        let strict = iter > 0;
1244        run_emphasis_pass(events, lo, hi, excluded, dialect, &rejected, strict);
1245        let invalidations = pandoc_cascade_invalidate(events, excluded);
1246        if invalidations.is_empty() {
1247            break;
1248        }
1249        rejected.extend(invalidations);
1250        iter += 1;
1251        if iter >= max_iters {
1252            break;
1253        }
1254    }
1255    // Recovery for `***A **B** C***` patterns: synthesise the inner
1256    // Strong match the standard delim-stack algorithm can't reach.
1257    pandoc_inner_strong_recovery(events);
1258}
1259
1260/// One pass of the CommonMark §6.2 emphasis pairing algorithm over the
1261/// IR's [`DelimRun`](IrEvent::DelimRun) events in `[lo, hi)`. Pandoc
1262/// dialect gates apply when `dialect == Dialect::Pandoc`. The
1263/// `rejected_pairs` list (Pandoc only) excludes specific
1264/// (opener_event_idx, closer_event_idx) pairs from matching — used by
1265/// the cascade-then-rerun loop to prevent invalidated pairs from
1266/// re-forming on the next iteration.
1267fn run_emphasis_pass(
1268    events: &mut [IrEvent],
1269    lo: usize,
1270    hi: usize,
1271    excluded: Option<&[bool]>,
1272    dialect: crate::options::Dialect,
1273    rejected_pairs: &[(usize, usize)],
1274    strict_pandoc: bool,
1275) {
1276    let is_commonmark = dialect == crate::options::Dialect::CommonMark;
1277    let hi = hi.min(events.len());
1278    if lo >= hi {
1279        return;
1280    }
1281    // Indices of DelimRun events within [lo, hi), in order, that have
1282    // not already been fully consumed by an earlier scoped pass and that
1283    // are not in the optional exclusion bitmap.
1284    let mut delim_idxs: Vec<usize> = events[lo..hi]
1285        .iter()
1286        .enumerate()
1287        .filter_map(|(i, e)| {
1288            let abs = lo + i;
1289            match e {
1290                IrEvent::DelimRun { matches, .. }
1291                    if matches.is_empty()
1292                        && excluded.is_none_or(|ex| ex.get(abs).copied() != Some(true)) =>
1293                {
1294                    Some(abs)
1295                }
1296                _ => None,
1297            }
1298        })
1299        .collect();
1300    if delim_idxs.is_empty() {
1301        return;
1302    }
1303
1304    // Working state: count (remaining unmatched chars) and source_start
1305    // (first remaining char) per delim run. Indexed by position in
1306    // `delim_idxs`.
1307    let mut count: Vec<usize> = Vec::with_capacity(delim_idxs.len());
1308    let mut source_start: Vec<usize> = Vec::with_capacity(delim_idxs.len());
1309    let mut removed: Vec<bool> = vec![false; delim_idxs.len()];
1310
1311    for &ev_idx in &delim_idxs {
1312        if let IrEvent::DelimRun { start, end, .. } = &events[ev_idx] {
1313            count.push(end - start);
1314            source_start.push(*start);
1315        }
1316    }
1317
1318    // openers_bottom[ch_idx][len%3][can_open] → exclusive lower bound
1319    // (an index into `delim_idxs`, or None meaning "no bottom yet").
1320    let mut openers_bottom: [[[Option<usize>; 2]; 3]; 2] = [[[None; 2]; 3]; 2];
1321
1322    // First active index, scanning forward.
1323    let first_active =
1324        |removed: &[bool]| -> Option<usize> { (0..removed.len()).find(|&i| !removed[i]) };
1325    let next_active = |removed: &[bool], from: usize| -> Option<usize> {
1326        (from + 1..removed.len()).find(|&i| !removed[i])
1327    };
1328    let prev_active =
1329        |removed: &[bool], from: usize| -> Option<usize> { (0..from).rev().find(|&i| !removed[i]) };
1330
1331    let min_closer_count = 1usize;
1332    let mut closer_local = first_active(&removed);
1333    while let Some(c) = closer_local {
1334        let ev_c_idx = delim_idxs[c];
1335        let (ch_c, can_open_c, can_close_c) = match &events[ev_c_idx] {
1336            IrEvent::DelimRun {
1337                ch,
1338                can_open,
1339                can_close,
1340                ..
1341            } => (*ch, *can_open, *can_close),
1342            _ => unreachable!(),
1343        };
1344        if !can_close_c || removed[c] || count[c] < min_closer_count {
1345            closer_local = next_active(&removed, c);
1346            continue;
1347        }
1348
1349        let ch_idx = if ch_c == b'*' { 0 } else { 1 };
1350        let closer_mod = count[c] % 3;
1351        let closer_open_bucket = can_open_c as usize;
1352        let bottom = openers_bottom[ch_idx][closer_mod][closer_open_bucket];
1353
1354        // Walk back to find a compatible opener.
1355        let mut found_opener: Option<usize> = None;
1356        let mut walk = prev_active(&removed, c);
1357        while let Some(o) = walk {
1358            if Some(o) == bottom {
1359                break;
1360            }
1361            let ev_o_idx = delim_idxs[o];
1362            let (ch_o, can_open_o, can_close_o) = match &events[ev_o_idx] {
1363                IrEvent::DelimRun {
1364                    ch,
1365                    can_open,
1366                    can_close,
1367                    ..
1368                } => (*ch, *can_open, *can_close),
1369                _ => unreachable!(),
1370            };
1371            if !removed[o] && ch_o == ch_c && can_open_o {
1372                let oc_sum = count[o] + count[c];
1373                let opener_both = can_open_o && can_close_o;
1374                let closer_both = can_open_c && can_close_c;
1375                let mod3_reject = is_commonmark
1376                    && (opener_both || closer_both)
1377                    && oc_sum.is_multiple_of(3)
1378                    && !(count[o].is_multiple_of(3) && count[c].is_multiple_of(3));
1379                // Pandoc-markdown rejects emph/strong pairs whose counts
1380                // disagree in the exactly-(1,2) / (2,1) shape:
1381                //   - `**foo*` (2,1): `try_parse_two` looks only for a
1382                //     `**` closer; the lone `*` doesn't satisfy that.
1383                //   - `*foo**` (1,2): `try_parse_one` encountering `**`
1384                //     tries `try_parse_two`; absence of an inner `**`
1385                //     closer cascades the outer parse to fail.
1386                // Other count combinations DO match (verified against
1387                // `pandoc -f markdown`):
1388                //   - (1,3) / (3,1) → emph match, opposite-side
1389                //     leftover `**` literal.
1390                //   - (2,3) / (3,2) → strong match, single `*` literal.
1391                //   - (3,3) → STRONG(EM(...)) nested.
1392                //   - (1..3, 4+) → match (Pandoc's ender walks the
1393                //     closer run for a valid position; algorithm
1394                //     consumes leftmost via leftover-as-literal).
1395                // Opener count >= 4 is rejected (Pandoc's
1396                // `try_parse_emphasis` has no count-4+ dispatch).
1397                let pandoc_reject = !is_commonmark
1398                    && ((count[o] == 1 && count[c] == 2)
1399                        || (count[o] == 2 && count[c] == 1)
1400                        || count[o] >= 4);
1401                let pair_rejected = !is_commonmark && {
1402                    let oe = delim_idxs[o];
1403                    let ce = delim_idxs[c];
1404                    rejected_pairs.iter().any(|&(ro, rc)| ro == oe && rc == ce)
1405                };
1406                // Pandoc strict-rerun gate (iter 2+ only): block a
1407                // candidate pair if any unmatched same-char run between
1408                // its opener and closer has remaining count strictly
1409                // greater than the consume rule for this pair.
1410                // Mirrors pandoc-markdown's recursive descent where
1411                // `one c`'s `option2` (`string [c,c] >> two c`) would
1412                // greedily consume a stray higher-count run, blocking
1413                // the outer `one c` from finding its `ender c 1` —
1414                // e.g. `**foo *bar** baz*` after the outer Strong
1415                // invalidates: a naïve rerun pairs ev1 (`*`) ↔ ev3
1416                // (`*`) as Emph (consume=1), but pandoc treats the
1417                // `**` between as having "consumed" any further
1418                // matching, leaving everything literal.
1419                let strict_block = strict_pandoc && {
1420                    let tentative_consume = if !is_commonmark && count[o] >= 3 && count[c] >= 3 {
1421                        1
1422                    } else if count[o] >= 2 && count[c] >= 2 {
1423                        2
1424                    } else {
1425                        1
1426                    };
1427                    let lo_evt = delim_idxs[o] + 1;
1428                    let hi_evt = delim_idxs[c];
1429                    (lo_evt..hi_evt).any(|k| match &events[k] {
1430                        IrEvent::DelimRun {
1431                            ch: ch_k,
1432                            start,
1433                            end,
1434                            matches,
1435                            ..
1436                        } => {
1437                            *ch_k == ch_c && {
1438                                let total = end - start;
1439                                let consumed: usize = matches.iter().map(|m| m.len as usize).sum();
1440                                total.saturating_sub(consumed) > tentative_consume
1441                            }
1442                        }
1443                        _ => false,
1444                    })
1445                };
1446                if !mod3_reject && !pandoc_reject && !pair_rejected && !strict_block {
1447                    found_opener = Some(o);
1448                    break;
1449                }
1450            }
1451            if o == 0 {
1452                break;
1453            }
1454            walk = prev_active(&removed, o);
1455        }
1456
1457        if let Some(o) = found_opener {
1458            // Consume rule:
1459            //   CommonMark — consume 2 (Strong) when both sides have
1460            //     >= 2 chars, else 1 (Emph). For `***x***` (3,3) this
1461            //     produces EM(STRONG(...)) because the first match
1462            //     consumes 2 from each side (Strong outermost).
1463            //   Pandoc — when both sides have >= 3, consume 1 first
1464            //     (Emph innermost) leaving 2 + 2 to pair as Strong on
1465            //     the second pass. This produces STRONG(EM(...)) for
1466            //     `***x***`, matching Pandoc-markdown's recursive
1467            //     `try_parse_three` algorithm.
1468            let consume = if !is_commonmark && count[o] >= 3 && count[c] >= 3 {
1469                1
1470            } else if count[o] >= 2 && count[c] >= 2 {
1471                2
1472            } else {
1473                1
1474            };
1475            let kind = if consume == 2 {
1476                EmphasisKind::Strong
1477            } else {
1478                EmphasisKind::Emph
1479            };
1480
1481            // Opener consumes inner-edge (rightmost) chars.
1482            let opener_match_offset =
1483                source_start[o] + count[o] - consume - source_start_event(&events[delim_idxs[o]]);
1484            // Closer consumes inner-edge (leftmost) chars.
1485            let closer_match_offset = source_start[c] - source_start_event(&events[delim_idxs[c]]);
1486
1487            // Record match on opener.
1488            if let IrEvent::DelimRun { matches, .. } = &mut events[delim_idxs[o]] {
1489                matches.push(DelimMatch {
1490                    offset_in_run: opener_match_offset as u8,
1491                    len: consume as u8,
1492                    is_opener: true,
1493                    partner_event: delim_idxs[c] as u32,
1494                    partner_offset: closer_match_offset as u8,
1495                    kind,
1496                });
1497            }
1498            // Record match on closer.
1499            if let IrEvent::DelimRun { matches, .. } = &mut events[delim_idxs[c]] {
1500                matches.push(DelimMatch {
1501                    offset_in_run: closer_match_offset as u8,
1502                    len: consume as u8,
1503                    is_opener: false,
1504                    partner_event: delim_idxs[o] as u32,
1505                    partner_offset: opener_match_offset as u8,
1506                    kind,
1507                });
1508            }
1509
1510            count[o] -= consume;
1511            source_start[c] += consume;
1512            count[c] -= consume;
1513
1514            // Remove all openers strictly between o and c.
1515            let mut between = next_active(&removed, o);
1516            while let Some(idx) = between {
1517                if idx == c {
1518                    break;
1519                }
1520                removed[idx] = true;
1521                between = next_active(&removed, idx);
1522            }
1523
1524            if count[o] == 0 {
1525                removed[o] = true;
1526            }
1527            if count[c] == 0 {
1528                removed[c] = true;
1529                closer_local = next_active(&removed, c);
1530            }
1531            // Else re-process the same closer with reduced count.
1532        } else {
1533            openers_bottom[ch_idx][closer_mod][closer_open_bucket] = prev_active(&removed, c);
1534            if !can_open_c {
1535                removed[c] = true;
1536            }
1537            closer_local = next_active(&removed, c);
1538        }
1539    }
1540
1541    // No further mutation needed: matches are recorded; remaining bytes
1542    // stay implicit literal. Pandoc cascade is invoked by the caller
1543    // (`process_emphasis_in_range_filtered`) once per pass so it can
1544    // accumulate invalidations into a rejected-pairs list and re-run.
1545    let _ = (&mut delim_idxs, &mut openers_bottom, min_closer_count);
1546}
1547
1548/// Pandoc-only post-processing pass over [`process_emphasis_in_range_filtered`]
1549/// matches: invalidate any matched delim pair that contains an unmatched
1550/// same-character run between its opener and closer. Returns the list
1551/// of (opener_event_idx, closer_event_idx) pairs that were invalidated
1552/// in this call, so the caller can seed a rejected-pairs list and
1553/// re-run the standard pass — this lets Pandoc re-pair the inner runs
1554/// that the invalidated outer match would have stolen via
1555/// between-removal (e.g. `*foo **bar* baz**` → after the outer
1556/// `ev0..ev2` Emph is invalidated, `ev1..ev3` matches as Strong on the
1557/// next iteration).
1558fn pandoc_cascade_invalidate(
1559    events: &mut [IrEvent],
1560    excluded: Option<&[bool]>,
1561) -> Vec<(usize, usize)> {
1562    let mut invalidated_pairs: Vec<(usize, usize)> = Vec::new();
1563    // Early-exit: if there are no `DelimRun` events at all, the cascade
1564    // pass is a no-op. Avoids allocating the two scratch vecs below for
1565    // every range with no `*`/`_` runs (which is the common case for
1566    // ranges that contain only standalone constructs / brackets).
1567    if !events.iter().any(|e| matches!(e, IrEvent::DelimRun { .. })) {
1568        return invalidated_pairs;
1569    }
1570    let is_excluded = |k: usize| excluded.is_some_and(|ex| ex.get(k).copied() == Some(true));
1571    // Reuse two scratch vecs across the inner loop iterations instead
1572    // of `.collect()` each time. These are tiny per-paragraph
1573    // allocations but the function is called for every Pandoc inline
1574    // emphasis pass and shows up in malloc traffic.
1575    let mut total: Vec<usize> = Vec::with_capacity(events.len());
1576    let mut consumed: Vec<usize> = Vec::with_capacity(events.len());
1577    loop {
1578        total.clear();
1579        consumed.clear();
1580        // Compute total bytes (run length) and consumed bytes (sum of
1581        // match lens) per DelimRun event index.
1582        total.extend(events.iter().map(|e| match e {
1583            IrEvent::DelimRun { start, end, .. } => end - start,
1584            _ => 0,
1585        }));
1586        consumed.extend(events.iter().map(|e| match e {
1587            IrEvent::DelimRun { matches, .. } => matches.iter().map(|m| m.len as usize).sum(),
1588            _ => 0,
1589        }));
1590
1591        // Find a pair to invalidate. We invalidate one and restart so
1592        // the cascade can re-evaluate dependent pairs.
1593        let mut to_invalidate: Option<(usize, u8)> = None;
1594        'outer: for opener_idx in 0..events.len() {
1595            let IrEvent::DelimRun {
1596                ch: ch_o, matches, ..
1597            } = &events[opener_idx]
1598            else {
1599                continue;
1600            };
1601            for (mi, m) in matches.iter().enumerate() {
1602                if !m.is_opener {
1603                    continue;
1604                }
1605                let closer_idx = m.partner_event as usize;
1606                if closer_idx <= opener_idx || closer_idx >= events.len() {
1607                    continue;
1608                }
1609                // Scan events strictly between opener and closer for any
1610                // DelimRun with the same `ch`, unmatched bytes, AND
1611                // both `can_open` and `can_close` (i.e., the run could
1612                // have participated in pairing on both sides). A
1613                // can_open-only or can_close-only run is a one-sided
1614                // fragment (e.g. an isolated `*` after a backslash
1615                // escape) that the Pandoc recursive-descent path would
1616                // never have tried as a nested-strong opener — those
1617                // shouldn't cascade-invalidate the surrounding pair.
1618                for k in (opener_idx + 1)..closer_idx {
1619                    if is_excluded(k) {
1620                        continue;
1621                    }
1622                    if let IrEvent::DelimRun {
1623                        ch: ch_k,
1624                        can_open: co_k,
1625                        can_close: cc_k,
1626                        ..
1627                    } = &events[k]
1628                        && *ch_k == *ch_o
1629                        && consumed[k] < total[k]
1630                        && *co_k
1631                        && *cc_k
1632                    {
1633                        to_invalidate = Some((opener_idx, mi as u8));
1634                        break 'outer;
1635                    }
1636                }
1637            }
1638        }
1639
1640        let Some((opener_idx, mi)) = to_invalidate else {
1641            break;
1642        };
1643
1644        // Look up the partner event/offset before mutating.
1645        let (closer_idx, opener_offset) = match &events[opener_idx] {
1646            IrEvent::DelimRun { matches, .. } => {
1647                let m = matches[mi as usize];
1648                (m.partner_event as usize, m.offset_in_run)
1649            }
1650            _ => break,
1651        };
1652
1653        // Remove the opener match.
1654        if let IrEvent::DelimRun { matches, .. } = &mut events[opener_idx] {
1655            matches.remove(mi as usize);
1656        }
1657        // Remove the corresponding closer match (closer's match has
1658        // is_opener=false and partner_offset == opener's offset_in_run).
1659        if let IrEvent::DelimRun { matches, .. } = &mut events[closer_idx] {
1660            matches.retain(|m| m.is_opener || m.partner_offset != opener_offset);
1661        }
1662        invalidated_pairs.push((opener_idx, closer_idx));
1663    }
1664    invalidated_pairs
1665}
1666
1667/// Pandoc-only post-pass: recover the inner Strong match in
1668/// `***A **B** C***` patterns where the IR's standard pass produced
1669/// `Emph[Strong[A], "B**...** C"]` (matching the outer triple as
1670/// Strong+Emph but losing the inner `**...**`-as-Strong-of-`C` pair).
1671///
1672/// Pandoc's recursive descent here goes
1673/// `three c → ender c 2 → one c → option2 → two c`, producing
1674/// `Emph[Strong[A], "B", Strong[C]]` — two Strong nodes inside an outer
1675/// Emph. The standard delim-stack algorithm can't reach this pairing
1676/// because between-removal during the outer Emph match removes the
1677/// inner closer-side `**` (e.g. `bar**`) from the candidate pool.
1678///
1679/// This recovery scans Emph matches whose opener and closer originally
1680/// had count >= 3, and whose closer has unmatched bytes >= 2 after the
1681/// standard pass; for each, we look for an unmatched same-char
1682/// between-run with count >= 2 and `can_close = true` (the would-be
1683/// inner-Strong opener) and synthesise a Strong match that consumes
1684/// the leftmost 2 bytes of the closer (where the existing Emph match
1685/// shifts to the rightmost 1 byte). The byte-position rewrite lets
1686/// the CST emission produce well-nested `Emph[..., Strong[...]]` —
1687/// outer Emph close at the rightmost outer-triple byte, inner Strong
1688/// close at the leftmost two.
1689fn pandoc_inner_strong_recovery(events: &mut [IrEvent]) {
1690    let n = events.len();
1691    // (between_idx, opener_idx, closer_idx, len)
1692    let mut to_apply: Vec<(usize, usize, usize, u8)> = Vec::new();
1693
1694    for opener_idx in 0..n {
1695        let (open_total, open_matches_clone, ch_o) = match &events[opener_idx] {
1696            IrEvent::DelimRun {
1697                start,
1698                end,
1699                matches,
1700                ch,
1701                ..
1702            } => (*end - *start, matches.clone(), *ch),
1703            _ => continue,
1704        };
1705        if open_total < 3 {
1706            continue;
1707        }
1708
1709        for m in open_matches_clone.iter() {
1710            if !m.is_opener || m.kind != EmphasisKind::Emph {
1711                continue;
1712            }
1713            let closer_idx = m.partner_event as usize;
1714            if closer_idx <= opener_idx || closer_idx >= n {
1715                continue;
1716            }
1717
1718            let (close_total, close_consumed) = match &events[closer_idx] {
1719                IrEvent::DelimRun {
1720                    start,
1721                    end,
1722                    matches,
1723                    ..
1724                } => {
1725                    let total = end - start;
1726                    let consumed: usize = matches.iter().map(|m| m.len as usize).sum();
1727                    (total, consumed)
1728                }
1729                _ => continue,
1730            };
1731            if close_total < 3 {
1732                continue;
1733            }
1734            let leftover = close_total.saturating_sub(close_consumed);
1735            if leftover < 2 {
1736                continue;
1737            }
1738
1739            // Walk backward from closer-1 looking for the rightmost
1740            // unmatched same-char run with count >= 2 and
1741            // can_close=true.
1742            for k in ((opener_idx + 1)..closer_idx).rev() {
1743                if let IrEvent::DelimRun {
1744                    ch,
1745                    start,
1746                    end,
1747                    matches,
1748                    can_close,
1749                    ..
1750                } = &events[k]
1751                {
1752                    if *ch != ch_o || !*can_close {
1753                        continue;
1754                    }
1755                    let total = end - start;
1756                    let consumed: usize = matches.iter().map(|m| m.len as usize).sum();
1757                    let remaining = total.saturating_sub(consumed);
1758                    if remaining < 2 {
1759                        continue;
1760                    }
1761                    to_apply.push((k, opener_idx, closer_idx, 2));
1762                    break;
1763                }
1764            }
1765        }
1766    }
1767
1768    for (between_idx, opener_idx, closer_idx, len) in to_apply {
1769        // Find the existing Emph match on the closer side.
1770        let (closer_emph_match_idx, closer_emph_offset) = {
1771            let mut found: Option<(usize, u8)> = None;
1772            if let IrEvent::DelimRun { matches, .. } = &events[closer_idx] {
1773                for (mi, m) in matches.iter().enumerate() {
1774                    if !m.is_opener
1775                        && m.partner_event as usize == opener_idx
1776                        && m.kind == EmphasisKind::Emph
1777                    {
1778                        found = Some((mi, m.offset_in_run));
1779                        break;
1780                    }
1781                }
1782            }
1783            match found {
1784                Some(x) => x,
1785                None => continue,
1786            }
1787        };
1788
1789        // Find the corresponding Emph match on the opener side.
1790        let opener_emph_match_idx = {
1791            let mut found: Option<usize> = None;
1792            if let IrEvent::DelimRun { matches, .. } = &events[opener_idx] {
1793                for (mi, m) in matches.iter().enumerate() {
1794                    if m.is_opener
1795                        && m.partner_event as usize == closer_idx
1796                        && m.kind == EmphasisKind::Emph
1797                    {
1798                        found = Some(mi);
1799                        break;
1800                    }
1801                }
1802            }
1803            match found {
1804                Some(x) => x,
1805                None => continue,
1806            }
1807        };
1808
1809        // Shift the Emph closer's offset to the right of the new
1810        // Strong closer's bytes (Strong takes leftmost `len` bytes,
1811        // Emph takes the next byte).
1812        let new_closer_emph_offset = closer_emph_offset + len;
1813
1814        // Update closer's Emph offset_in_run.
1815        if let IrEvent::DelimRun { matches, .. } = &mut events[closer_idx] {
1816            matches[closer_emph_match_idx].offset_in_run = new_closer_emph_offset;
1817        }
1818        // Update opener's Emph partner_offset to point at the shifted
1819        // Emph closer position.
1820        if let IrEvent::DelimRun { matches, .. } = &mut events[opener_idx] {
1821            matches[opener_emph_match_idx].partner_offset = new_closer_emph_offset;
1822        }
1823
1824        // Add Strong opener match on the between-run.
1825        if let IrEvent::DelimRun { matches, .. } = &mut events[between_idx] {
1826            matches.push(DelimMatch {
1827                offset_in_run: 0,
1828                len,
1829                is_opener: true,
1830                partner_event: closer_idx as u32,
1831                partner_offset: closer_emph_offset,
1832                kind: EmphasisKind::Strong,
1833            });
1834        }
1835        // Add Strong closer match on the closer (at the original
1836        // pre-shift Emph-closer position; the bytes that were the
1837        // single Emph closer now become the leftmost 2 bytes of the
1838        // Strong closer).
1839        if let IrEvent::DelimRun { matches, .. } = &mut events[closer_idx] {
1840            matches.push(DelimMatch {
1841                offset_in_run: closer_emph_offset,
1842                len,
1843                is_opener: false,
1844                partner_event: between_idx as u32,
1845                partner_offset: 0,
1846                kind: EmphasisKind::Strong,
1847            });
1848        }
1849    }
1850}
1851
1852fn source_start_event(event: &IrEvent) -> usize {
1853    match event {
1854        IrEvent::DelimRun { start, .. } => *start,
1855        _ => unreachable!("source_start_event called on non-DelimRun"),
1856    }
1857}
1858
1859// ============================================================================
1860// Pass 3: Process brackets (CommonMark §6.3)
1861// ============================================================================
1862
1863/// Resolve `[`/`![`/`]` markers into link/image nodes per CommonMark §6.3
1864/// (with Pandoc-aware variations under `Dialect::Pandoc`).
1865///
1866/// Walks the IR forward looking for `]` markers. For each one, finds the
1867/// nearest active matching `[`/`![` and tries to resolve the bracket pair
1868/// as a link or image. Resolution is tried in spec order:
1869///
1870/// 1. Inline link / image: `[text](dest)` or `[text](dest "title")`.
1871/// 2. Full reference: `[text][label]`, where `label` is in `refdefs`.
1872/// 3. Collapsed reference: `[text][]`, where `text` (normalised) is in
1873///    `refdefs`.
1874/// 4. Shortcut reference: `[text]` not followed by `(` or `[`, where
1875///    `text` (normalised) is in `refdefs`.
1876///
1877/// On a match, the opener gets a `BracketResolution` and the closer is
1878/// flagged `matched`. Under `Dialect::CommonMark`, all earlier active link
1879/// openers are deactivated to implement the §6.3 "links may not contain
1880/// other links" rule (image brackets do not deactivate earlier link
1881/// openers — only links do). Under `Dialect::Pandoc`, the deactivate-pass
1882/// is skipped: pandoc-native is outer-wins for nested links (the inner
1883/// `[inner](u2)` of `[link [inner](u2)](u1)` is literal text inside the
1884/// outer link), and the dispatcher enforces this via a `suppress_inner_links`
1885/// flag during LINK-text recursion. So under Pandoc the IR can leave both
1886/// outer and inner resolved and trust the dispatcher to suppress inner
1887/// LINK emission.
1888///
1889/// On a miss the bracket pair stays opaque-as-literal and the closer is
1890/// dropped from the bracket stack so the next `]` can re-pair.
1891///
1892/// Reference-form resolution consults the refdef map under both
1893/// dialects (CommonMark §6.3 and Pandoc-markdown agree on the
1894/// document-scoped lookup rule). Under Pandoc, when a bracket-shape
1895/// pattern (`[text][label]`, `[text][]`, `[text]`) doesn't resolve to
1896/// a refdef, the opener is tagged with `unresolved_ref = Some(...)`
1897/// and the closer's `matched` is set to `true` so that
1898/// [`build_bracket_plan`] emits a [`BracketDispo::UnresolvedReference`]
1899/// keyed at the opener. Emission then wraps `[start, end)` in an
1900/// `UNRESOLVED_REFERENCE` node — distinct from `LINK` — so downstream
1901/// tools (linter, LSP) can attach behavior to the bracket-shape
1902/// pattern without the parser having to lie about resolution.
1903///
1904/// Under CommonMark, no `unresolved_ref` is recorded; the
1905/// no-resolution fall-through behaves as today (opener deactivated,
1906/// brackets emit as literal text).
1907pub fn process_brackets(
1908    events: &mut [IrEvent],
1909    text: &str,
1910    refdefs: Option<&RefdefMap>,
1911    dialect: crate::options::Dialect,
1912    allow_spaced: bool,
1913) {
1914    let empty: HashSet<String> = HashSet::new();
1915    let labels: &HashSet<String> = match refdefs {
1916        Some(map) => map.as_ref(),
1917        None => &empty,
1918    };
1919    let is_commonmark = dialect == crate::options::Dialect::CommonMark;
1920    // Refdef-aware label resolution under both dialects.
1921    let label_resolves =
1922        |key_norm: &str| -> bool { !key_norm.is_empty() && labels.contains(key_norm) };
1923
1924    // Walk forward through events, treating it as a linear scan for `]`.
1925    let mut i = 0;
1926    while i < events.len() {
1927        let close_pos = match &events[i] {
1928            IrEvent::CloseBracket { pos, .. } => *pos,
1929            _ => {
1930                i += 1;
1931                continue;
1932            }
1933        };
1934
1935        // Find the nearest active OpenBracket before `i`.
1936        let mut o = match find_active_opener(events, i) {
1937            Some(o) => o,
1938            None => {
1939                i += 1;
1940                continue;
1941            }
1942        };
1943
1944        let (open_end, is_image) = match &events[o] {
1945            IrEvent::OpenBracket { end, is_image, .. } => (*end, *is_image),
1946            _ => unreachable!(),
1947        };
1948        let text_start = open_end;
1949        let text_end = close_pos;
1950        let after_close = close_pos + 1;
1951
1952        // 1. Inline link / image.
1953        if let Some((suffix_end, dest, title)) = try_inline_suffix(text, after_close) {
1954            // §6.3 link-in-link rule (CommonMark): if this is a *link*
1955            // (not an image), and any earlier active link opener exists,
1956            // deactivate them. We also deactivate openers strictly before
1957            // `o` here because matching means the inner link wins; the
1958            // spec applies this *after* matching. Pandoc skips this —
1959            // outer-wins is enforced by the dispatcher's
1960            // `suppress_inner_links` flag during LINK-text recursion.
1961            if !is_image && is_commonmark {
1962                deactivate_earlier_link_openers(events, o);
1963            }
1964            commit_resolution(
1965                events,
1966                o,
1967                i,
1968                text_start,
1969                text_end,
1970                after_close,
1971                suffix_end,
1972                LinkKind::Inline { dest, title },
1973            );
1974            // Remove the opener from the bracket stack: it has been
1975            // matched (active=false will fall out automatically since
1976            // resolution is Some).
1977            mark_opener_resolved(events, o);
1978            i += 1;
1979            continue;
1980        }
1981
1982        // 2. Full reference link: `[text][label]`.
1983        let full_ref_suffix = try_full_reference_suffix(text, after_close, allow_spaced);
1984        if let Some((suffix_end, label_raw)) = &full_ref_suffix {
1985            let label_norm = normalize_label(label_raw);
1986            if label_resolves(&label_norm) {
1987                if !is_image && is_commonmark {
1988                    deactivate_earlier_link_openers(events, o);
1989                }
1990                commit_resolution(
1991                    events,
1992                    o,
1993                    i,
1994                    text_start,
1995                    text_end,
1996                    after_close,
1997                    *suffix_end,
1998                    LinkKind::FullReference {
1999                        label: label_raw.clone(),
2000                    },
2001                );
2002                mark_opener_resolved(events, o);
2003                i += 1;
2004                continue;
2005            }
2006            // Bracketed but unresolved label: §6.3 says we still treat
2007            // `[text][label]` as not-a-link, but the brackets get
2008            // consumed as literal text AND the shortcut form is
2009            // suppressed (since the `]` is followed by a link label).
2010        }
2011
2012        // 3. Collapsed `[]`.
2013        let link_text = &text[text_start..text_end];
2014        let link_text_norm = normalize_label(link_text);
2015        let (is_collapsed, collapsed_suffix_end) =
2016            collapsed_marker_span(text, after_close, allow_spaced)
2017                .map_or((false, after_close + 2), |end| (true, end));
2018
2019        if is_collapsed && label_resolves(&link_text_norm) {
2020            if !is_image && is_commonmark {
2021                deactivate_earlier_link_openers(events, o);
2022            }
2023            commit_resolution(
2024                events,
2025                o,
2026                i,
2027                text_start,
2028                text_end,
2029                after_close,
2030                collapsed_suffix_end,
2031                LinkKind::CollapsedReference,
2032            );
2033            mark_opener_resolved(events, o);
2034            i += 1;
2035            continue;
2036        }
2037        // `[text][]` with text not in refdefs — falls through to
2038        // literal text; shortcut is suppressed (followed by `[]`).
2039
2040        // 4. Shortcut form: `[text]` not followed by `[]` or `[label]`.
2041        // Per CommonMark §6.3: "A shortcut reference link consists of a
2042        // link label that matches a link reference definition elsewhere
2043        // in the document and is not followed by [] or a link label."
2044        // The full-ref / collapsed shape attempts above suppress the
2045        // shortcut even when their labels don't resolve — the bracket
2046        // bytes still get consumed as literal text.
2047        let shortcut_suppressed = full_ref_suffix.is_some() || is_collapsed;
2048        if !shortcut_suppressed && label_resolves(&link_text_norm) {
2049            if !is_image && is_commonmark {
2050                deactivate_earlier_link_openers(events, o);
2051            }
2052            commit_resolution(
2053                events,
2054                o,
2055                i,
2056                text_start,
2057                text_end,
2058                after_close,
2059                after_close,
2060                LinkKind::ShortcutReference,
2061            );
2062            mark_opener_resolved(events, o);
2063            i += 1;
2064            continue;
2065        }
2066
2067        // No resolution. Under Pandoc, the bracket pair is still a
2068        // recognisable reference shape (full / collapsed / shortcut) —
2069        // tag the opener with `unresolved_ref` so emission wraps it
2070        // in an `UNRESOLVED_REFERENCE` node, and mark the closer
2071        // matched so it doesn't fall through to a literal `]` token.
2072        // Under CommonMark, behavior unchanged: deactivate the opener,
2073        // brackets emit as literal text.
2074        //
2075        // Empty-component shapes (`[]`, `[][]`) aren't reference
2076        // patterns even in spirit — pandoc-native treats them as
2077        // literal text — so skip wrapping.
2078        let unresolved_shape = if !is_commonmark {
2079            let (end, has_substantive_label) =
2080                if let Some((suffix_end, label_raw)) = &full_ref_suffix {
2081                    (*suffix_end, !normalize_label(label_raw).is_empty())
2082                } else if is_collapsed {
2083                    (collapsed_suffix_end, !link_text_norm.is_empty())
2084                } else {
2085                    (after_close, !link_text_norm.is_empty())
2086                };
2087            if has_substantive_label {
2088                Some(UnresolvedRefShape {
2089                    close_event: i as u32,
2090                    text_end,
2091                    end,
2092                })
2093            } else {
2094                None
2095            }
2096        } else {
2097            None
2098        };
2099        if let IrEvent::OpenBracket {
2100            active,
2101            unresolved_ref,
2102            ..
2103        } = &mut events[o]
2104        {
2105            *active = false;
2106            *unresolved_ref = unresolved_shape;
2107        }
2108        if unresolved_shape.is_some()
2109            && let IrEvent::CloseBracket { matched, .. } = &mut events[i]
2110        {
2111            *matched = true;
2112        }
2113        let _ = &mut o;
2114        i += 1;
2115    }
2116}
2117
2118fn find_active_opener(events: &[IrEvent], close_idx: usize) -> Option<usize> {
2119    (0..close_idx).rev().find(|&i| {
2120        matches!(
2121            &events[i],
2122            IrEvent::OpenBracket {
2123                active: true,
2124                resolution: None,
2125                ..
2126            }
2127        )
2128    })
2129}
2130
2131fn deactivate_earlier_link_openers(events: &mut [IrEvent], open_idx: usize) {
2132    for ev in &mut events[..open_idx] {
2133        if let IrEvent::OpenBracket {
2134            is_image: false,
2135            active,
2136            resolution: None,
2137            ..
2138        } = ev
2139        {
2140            *active = false;
2141        }
2142    }
2143}
2144
2145fn mark_opener_resolved(events: &mut [IrEvent], open_idx: usize) {
2146    if let IrEvent::OpenBracket { active, .. } = &mut events[open_idx] {
2147        *active = false;
2148    }
2149}
2150
2151#[allow(clippy::too_many_arguments)]
2152fn commit_resolution(
2153    events: &mut [IrEvent],
2154    open_idx: usize,
2155    close_idx: usize,
2156    text_start: usize,
2157    text_end: usize,
2158    suffix_start: usize,
2159    suffix_end: usize,
2160    kind: LinkKind,
2161) {
2162    if let IrEvent::OpenBracket { resolution, .. } = &mut events[open_idx] {
2163        *resolution = Some(BracketResolution {
2164            close_event: close_idx as u32,
2165            text_start,
2166            text_end,
2167            suffix_start,
2168            suffix_end,
2169            kind,
2170        });
2171    }
2172    if let IrEvent::CloseBracket { matched, .. } = &mut events[close_idx] {
2173        *matched = true;
2174    }
2175}
2176
2177/// Try to parse `(dest)` or `(dest "title")` inline link suffix starting
2178/// at `text[pos]`. Returns `(end_pos_exclusive, dest, title)`.
2179fn try_inline_suffix(text: &str, pos: usize) -> Option<(usize, String, Option<String>)> {
2180    let bytes = text.as_bytes();
2181    if pos >= bytes.len() || bytes[pos] != b'(' {
2182        return None;
2183    }
2184    let mut p = pos + 1;
2185    // Skip leading whitespace.
2186    while p < bytes.len() && matches!(bytes[p], b' ' | b'\t' | b'\n') {
2187        p += 1;
2188    }
2189    // Empty `()` — link with empty destination.
2190    if p < bytes.len() && bytes[p] == b')' {
2191        return Some((p + 1, String::new(), None));
2192    }
2193
2194    // Parse destination.
2195    let (dest, dest_end) = parse_link_destination(text, p)?;
2196    p = dest_end;
2197
2198    // Skip whitespace.
2199    while p < bytes.len() && matches!(bytes[p], b' ' | b'\t' | b'\n') {
2200        p += 1;
2201    }
2202
2203    // Optional title.
2204    let mut title = None;
2205    if p < bytes.len() && matches!(bytes[p], b'"' | b'\'' | b'(') {
2206        let (t, t_end) = parse_link_title(text, p)?;
2207        title = Some(t);
2208        p = t_end;
2209        while p < bytes.len() && matches!(bytes[p], b' ' | b'\t' | b'\n') {
2210            p += 1;
2211        }
2212    }
2213
2214    if p >= bytes.len() || bytes[p] != b')' {
2215        return None;
2216    }
2217    Some((p + 1, dest, title))
2218}
2219
2220fn parse_link_destination(text: &str, start: usize) -> Option<(String, usize)> {
2221    let bytes = text.as_bytes();
2222    if start >= bytes.len() {
2223        return None;
2224    }
2225    if bytes[start] == b'<' {
2226        // <bracketed>
2227        let mut p = start + 1;
2228        let begin = p;
2229        while p < bytes.len() && bytes[p] != b'>' && bytes[p] != b'\n' && bytes[p] != b'<' {
2230            if bytes[p] == b'\\' && p + 1 < bytes.len() {
2231                p += 2;
2232            } else {
2233                p += 1;
2234            }
2235        }
2236        if p >= bytes.len() || bytes[p] != b'>' {
2237            return None;
2238        }
2239        let dest = text[begin..p].to_string();
2240        Some((dest, p + 1))
2241    } else {
2242        // unbracketed: balanced parens, no spaces, no controls
2243        let mut p = start;
2244        let mut paren_depth: i32 = 0;
2245        while p < bytes.len() {
2246            let b = bytes[p];
2247            if b == b'\\' && p + 1 < bytes.len() {
2248                p += 2;
2249                continue;
2250            }
2251            if b == b'(' {
2252                paren_depth += 1;
2253                p += 1;
2254                continue;
2255            }
2256            if b == b')' {
2257                if paren_depth == 0 {
2258                    break;
2259                }
2260                paren_depth -= 1;
2261                p += 1;
2262                continue;
2263            }
2264            if b == b' ' || b == b'\t' || b == b'\n' || b < 0x20 || b == 0x7f {
2265                break;
2266            }
2267            p += 1;
2268        }
2269        if p == start || paren_depth != 0 {
2270            return None;
2271        }
2272        Some((text[start..p].to_string(), p))
2273    }
2274}
2275
2276fn parse_link_title(text: &str, start: usize) -> Option<(String, usize)> {
2277    let bytes = text.as_bytes();
2278    let q = bytes[start];
2279    let close = match q {
2280        b'"' => b'"',
2281        b'\'' => b'\'',
2282        b'(' => b')',
2283        _ => return None,
2284    };
2285    let mut p = start + 1;
2286    let begin = p;
2287    while p < bytes.len() {
2288        let b = bytes[p];
2289        if b == b'\\' && p + 1 < bytes.len() {
2290            p += 2;
2291            continue;
2292        }
2293        if b == close {
2294            let title = text[begin..p].to_string();
2295            return Some((title, p + 1));
2296        }
2297        p += 1;
2298    }
2299    None
2300}
2301
2302/// Try to parse `[label]` after a `]`. Returns `(suffix_end, label_raw)`.
2303/// For the collapsed form `[]`, returns `None` here (handled separately
2304/// by `collapsed_marker_span`).
2305fn try_full_reference_suffix(
2306    text: &str,
2307    pos: usize,
2308    allow_spaced: bool,
2309) -> Option<(usize, String)> {
2310    let bytes = text.as_bytes();
2311    let bracket_pos = if allow_spaced {
2312        skip_spaced_ref_gap(bytes, pos)
2313    } else {
2314        pos
2315    };
2316    if bracket_pos >= bytes.len() || bytes[bracket_pos] != b'[' {
2317        return None;
2318    }
2319    let label_start = bracket_pos + 1;
2320    let mut p = label_start;
2321    let mut escape_next = false;
2322    while p < bytes.len() {
2323        if escape_next {
2324            escape_next = false;
2325            p += 1;
2326            continue;
2327        }
2328        match bytes[p] {
2329            b'\\' => {
2330                escape_next = true;
2331                p += 1;
2332            }
2333            b']' => break,
2334            b'[' => return None,
2335            b'\n' => {
2336                p += 1;
2337            }
2338            _ => p += 1,
2339        }
2340    }
2341    if p >= bytes.len() || bytes[p] != b']' {
2342        return None;
2343    }
2344    let label = text[label_start..p].to_string();
2345    if label.is_empty() {
2346        return None;
2347    }
2348    Some((p + 1, label))
2349}
2350
2351/// True when `text[pos..]` opens with the collapsed `[]` marker. Under
2352/// `spaced_reference_links`, whitespace before the `[]` is permitted; the
2353/// returned `Some(end)` reports the byte position past the closing `]`.
2354fn collapsed_marker_span(text: &str, pos: usize, allow_spaced: bool) -> Option<usize> {
2355    let bytes = text.as_bytes();
2356    let bracket_pos = if allow_spaced {
2357        skip_spaced_ref_gap(bytes, pos)
2358    } else {
2359        pos
2360    };
2361    if bytes.get(bracket_pos) == Some(&b'[') && bytes.get(bracket_pos + 1) == Some(&b']') {
2362        Some(bracket_pos + 2)
2363    } else {
2364        None
2365    }
2366}
2367
2368/// Skip the whitespace gap permitted by `spaced_reference_links` between a
2369/// closing `]` and the next opening `[`/`[]`: spaces, tabs, and at most one LF.
2370/// Block parsing already guarantees a blank line cannot appear inside a single
2371/// inline-parse range, so a single newline is the upper bound.
2372fn skip_spaced_ref_gap(bytes: &[u8], pos: usize) -> usize {
2373    let mut p = pos;
2374    let mut saw_newline = false;
2375    while p < bytes.len() {
2376        match bytes[p] {
2377            b' ' | b'\t' => p += 1,
2378            b'\n' if !saw_newline => {
2379                saw_newline = true;
2380                p += 1;
2381            }
2382            _ => break,
2383        }
2384    }
2385    p
2386}
2387
2388// ============================================================================
2389// Bracket plan — byte-position-keyed view of resolved brackets, consumed by
2390// the existing emission walk in `core::parse_inline_range_impl`.
2391// ============================================================================
2392
2393/// Disposition of a single bracket byte after [`process_brackets`].
2394#[derive(Debug, Clone)]
2395pub enum BracketDispo {
2396    /// `[` or `![` of a resolved link/image. Emission emits the LINK/IMAGE
2397    /// node and skips past `suffix_end`.
2398    Open {
2399        is_image: bool,
2400        text_start: usize,
2401        text_end: usize,
2402        suffix_start: usize,
2403        suffix_end: usize,
2404        kind: LinkKind,
2405    },
2406    /// Pandoc-only: `[` or `![` of a bracket-shape reference pattern
2407    /// whose label didn't resolve. Emission wraps `[start, end)` in an
2408    /// `UNRESOLVED_REFERENCE` node so downstream tools can attach
2409    /// behavior to the bracket-shape pattern. `text_start..text_end` is
2410    /// the inner text range (between the outer `[`/`![` and `]`).
2411    UnresolvedReference {
2412        is_image: bool,
2413        text_start: usize,
2414        text_end: usize,
2415        end: usize,
2416    },
2417    /// Bracket byte (one of `[`, `]`, or `!`) that fell through to literal
2418    /// text. Emission accumulates into the surrounding text run.
2419    Literal,
2420}
2421
2422/// A byte-keyed view of the IR's bracket resolutions.
2423#[derive(Debug, Default, Clone)]
2424pub struct BracketPlan {
2425    by_pos: BTreeMap<usize, BracketDispo>,
2426}
2427
2428impl BracketPlan {
2429    pub fn lookup(&self, pos: usize) -> Option<&BracketDispo> {
2430        self.by_pos.get(&pos)
2431    }
2432
2433    pub fn is_empty(&self) -> bool {
2434        self.by_pos.is_empty()
2435    }
2436}
2437
2438/// A standalone Pandoc inline construct recognised by `build_ir` and
2439/// dispatched directly from the emission walk. Carries the construct's
2440/// full source range so the emission walk can slice the content for the
2441/// existing `emit_*` helpers without re-running the recognition.
2442#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2443pub enum ConstructDispo {
2444    /// `^[note text]` — emit via `emit_inline_footnote` after slicing
2445    /// the inner content.
2446    InlineFootnote { end: usize },
2447    /// `<span ...>...</span>` — emit via `emit_native_span` after
2448    /// re-parsing the open-tag attributes from the source range.
2449    NativeSpan { end: usize },
2450    /// `[^id]` — emit via `emit_footnote_reference` after extracting
2451    /// the label id from the source range.
2452    FootnoteReference { end: usize },
2453    /// `[@cite]` — emit via `emit_bracketed_citation` after slicing
2454    /// the inner content.
2455    BracketedCitation { end: usize },
2456    /// `@key` or `-@key` — emit via `emit_bare_citation` (or
2457    /// `emit_crossref` when `is_quarto_crossref_key` matches and
2458    /// `extensions.quarto_crossrefs` is enabled).
2459    BareCitation { end: usize },
2460    /// `[content]{attrs}` — emit via `emit_bracketed_span` after
2461    /// slicing the inner content and attribute string.
2462    BracketedSpan { end: usize },
2463    /// `[[url]]` / `[[url|title]]` (or image variant `![[...]]`) —
2464    /// emit via `emit_wikilink` after re-locating the pipe within the
2465    /// source range.
2466    WikiLink { end: usize },
2467}
2468
2469/// A byte-keyed view of the IR's standalone Pandoc constructs that the
2470/// emission walk consumes directly: inline footnotes, native spans,
2471/// footnote references, bracketed citations, bare citations, and
2472/// bracketed spans. Recognition is authoritative in `build_ir` under
2473/// `Dialect::Pandoc`; the dispatcher's legacy branches for these
2474/// constructs (`^[`, `<span>`, `[^id]`, `[@cite]`, `@cite` / `-@cite`,
2475/// `[text]{attrs}`) are gated to `Dialect::CommonMark` only and only
2476/// fire when the relevant extension is explicitly enabled.
2477#[derive(Debug, Default, Clone)]
2478pub struct ConstructPlan {
2479    by_pos: BTreeMap<usize, ConstructDispo>,
2480}
2481
2482impl ConstructPlan {
2483    pub fn lookup(&self, pos: usize) -> Option<&ConstructDispo> {
2484        self.by_pos.get(&pos)
2485    }
2486
2487    pub fn is_empty(&self) -> bool {
2488        self.by_pos.is_empty()
2489    }
2490}
2491
2492/// Build a [`ConstructPlan`] from the resolved IR. Each
2493/// `Construct { kind: InlineFootnote | NativeSpan, .. }` becomes one
2494/// entry keyed at its start byte.
2495pub fn build_construct_plan(events: &[IrEvent]) -> ConstructPlan {
2496    let mut by_pos: BTreeMap<usize, ConstructDispo> = BTreeMap::new();
2497    for ev in events {
2498        if let IrEvent::Construct { start, end, kind } = ev {
2499            match kind {
2500                ConstructKind::InlineFootnote => {
2501                    by_pos.insert(*start, ConstructDispo::InlineFootnote { end: *end });
2502                }
2503                ConstructKind::NativeSpan => {
2504                    by_pos.insert(*start, ConstructDispo::NativeSpan { end: *end });
2505                }
2506                ConstructKind::FootnoteReference => {
2507                    by_pos.insert(*start, ConstructDispo::FootnoteReference { end: *end });
2508                }
2509                ConstructKind::BracketedCitation => {
2510                    by_pos.insert(*start, ConstructDispo::BracketedCitation { end: *end });
2511                }
2512                ConstructKind::BareCitation => {
2513                    by_pos.insert(*start, ConstructDispo::BareCitation { end: *end });
2514                }
2515                ConstructKind::BracketedSpan => {
2516                    by_pos.insert(*start, ConstructDispo::BracketedSpan { end: *end });
2517                }
2518                ConstructKind::WikiLink => {
2519                    by_pos.insert(*start, ConstructDispo::WikiLink { end: *end });
2520                }
2521                _ => {}
2522            }
2523        }
2524    }
2525    ConstructPlan { by_pos }
2526}
2527
2528/// Build a [`BracketPlan`] from the resolved IR. Each `OpenBracket`
2529/// resolution becomes an [`BracketDispo::Open`] keyed at the opener's
2530/// start byte. Unresolved openers and unmatched closers become
2531/// `BracketDispo::Literal` so the emission path can recognise them
2532/// without re-parsing.
2533pub fn build_bracket_plan(events: &[IrEvent]) -> BracketPlan {
2534    let mut by_pos: BTreeMap<usize, BracketDispo> = BTreeMap::new();
2535    for ev in events {
2536        match ev {
2537            IrEvent::OpenBracket {
2538                start,
2539                is_image,
2540                resolution: Some(res),
2541                ..
2542            } => {
2543                by_pos.insert(
2544                    *start,
2545                    BracketDispo::Open {
2546                        is_image: *is_image,
2547                        text_start: res.text_start,
2548                        text_end: res.text_end,
2549                        suffix_start: res.suffix_start,
2550                        suffix_end: res.suffix_end,
2551                        kind: res.kind.clone(),
2552                    },
2553                );
2554            }
2555            IrEvent::OpenBracket {
2556                start,
2557                end,
2558                is_image,
2559                resolution: None,
2560                unresolved_ref: Some(shape),
2561                ..
2562            } => {
2563                by_pos.insert(
2564                    *start,
2565                    BracketDispo::UnresolvedReference {
2566                        is_image: *is_image,
2567                        text_start: *end,
2568                        text_end: shape.text_end,
2569                        end: shape.end,
2570                    },
2571                );
2572            }
2573            IrEvent::OpenBracket {
2574                start,
2575                is_image,
2576                resolution: None,
2577                unresolved_ref: None,
2578                ..
2579            } => {
2580                let len = if *is_image { 2 } else { 1 };
2581                for off in 0..len {
2582                    by_pos.insert(*start + off, BracketDispo::Literal);
2583                }
2584            }
2585            IrEvent::CloseBracket {
2586                pos,
2587                matched: false,
2588            } => {
2589                by_pos.insert(*pos, BracketDispo::Literal);
2590            }
2591            _ => {}
2592        }
2593    }
2594    BracketPlan { by_pos }
2595}
2596
2597/// One-shot helper: build the IR, run all passes, and return the
2598/// bundled [`InlinePlans`] (emphasis dispositions, bracket resolutions,
2599/// and standalone Pandoc constructs) — packaged together so the inline
2600/// emission path can consume them in one go for either dialect.
2601///
2602/// Pass ordering follows the CommonMark §6.3 reference impl: bracket
2603/// resolution runs first, then emphasis is processed *scoped per resolved
2604/// bracket pair's inner event range*, then once more on the residual
2605/// top-level events. This prevents emphasis pairs from forming across a
2606/// link's bracket boundary, which the previous "all-emphasis-then-all-
2607/// brackets" order got wrong (e.g. spec example #473).
2608pub fn build_full_plans(
2609    text: &str,
2610    start: usize,
2611    end: usize,
2612    config: &ParserOptions,
2613) -> InlinePlans {
2614    let mut scratch = ScratchEvents::checkout();
2615    let bundle = scratch.inner.as_mut().unwrap();
2616    bundle.events.clear();
2617    bundle.bracket_pairs.clear();
2618    bundle.excluded.clear();
2619
2620    build_ir_into(text, start, end, config, &mut bundle.events);
2621    // §6.3 bracket resolution runs for both dialects. Under CommonMark
2622    // it enforces refdef-aware shortcut/collapsed/full-ref resolution
2623    // and the §6.3 link-in-link deactivation rule. Under Pandoc it
2624    // performs shape-only resolution (any non-empty label resolves) and
2625    // skips the deactivation pass — pandoc-native is outer-wins for
2626    // nested links and the dispatcher's `suppress_inner_links` flag
2627    // suppresses inner LINK emission during LINK-text recursion.
2628    process_brackets(
2629        &mut bundle.events,
2630        text,
2631        config.refdef_labels.as_ref(),
2632        config.dialect,
2633        config.extensions.spaced_reference_links,
2634    );
2635
2636    // Scoped emphasis pass per resolved bracket pair, innermost first.
2637    // We collect (open_idx, close_idx) pairs of resolved brackets and run
2638    // emphasis only over the events strictly between them. Innermost-first
2639    // ordering matters: an outer link wraps emphasis that wraps an inner
2640    // link, and the inner link's inner range must be paired before the
2641    // outer's inner range so the top-level pass sees consistent state.
2642    // Include both resolved-link bracket pairs and Pandoc unresolved-
2643    // reference bracket pairs in the scoping set. The latter wrap into
2644    // an `UNRESOLVED_REFERENCE` CST node, which is just as much a tree
2645    // boundary for emphasis as a resolved `LINK` — emphasis must not
2646    // pair across the wrapper's brackets, otherwise the emission walk
2647    // produces a non-tree-shaped CST.
2648    bundle.bracket_pairs.extend(
2649        bundle
2650            .events
2651            .iter()
2652            .enumerate()
2653            .filter_map(|(i, ev)| match ev {
2654                IrEvent::OpenBracket {
2655                    resolution: Some(res),
2656                    ..
2657                } => Some((i, res.close_event as usize)),
2658                IrEvent::OpenBracket {
2659                    resolution: None,
2660                    unresolved_ref: Some(shape),
2661                    ..
2662                } => Some((i, shape.close_event as usize)),
2663                _ => None,
2664            }),
2665    );
2666    // Innermost-first: sort by close_idx ascending, then open_idx descending.
2667    bundle
2668        .bracket_pairs
2669        .sort_by(|a, b| a.1.cmp(&b.1).then(b.0.cmp(&a.0)));
2670    // Iterate pairs by index so we can hold &mut bundle.events while
2671    // reading bundle.bracket_pairs (split borrow on disjoint fields).
2672    for i in 0..bundle.bracket_pairs.len() {
2673        let (open_idx, close_idx) = bundle.bracket_pairs[i];
2674        process_emphasis_in_range(&mut bundle.events, open_idx + 1, close_idx, config.dialect);
2675    }
2676
2677    // Pandoc-only degrade pass for unresolved bracket-shape patterns
2678    // whose interior left any delim-run byte unmatched after the scoped
2679    // emphasis pass. Pandoc-native degrades such brackets to literal `[`
2680    // / `]` text — the user's intent was clearly not a reference. The
2681    // bracket_pairs entry stays so the inner delims remain in the
2682    // top-level exclusion mask (otherwise they'd re-enter pairing and
2683    // could form Emph spans with delims outside, which pandoc never
2684    // does — see the bug_2_emphasis_crosses_brackets_pandoc fixture).
2685    // Flipping `unresolved_ref` to `None` makes `build_bracket_plan`
2686    // emit `BracketDispo::Literal` for the bracket bytes; flipping
2687    // `CloseBracket.matched` to `false` does the same for the `]`.
2688    for i in 0..bundle.bracket_pairs.len() {
2689        let (open_idx, close_idx) = bundle.bracket_pairs[i];
2690        let is_unresolved = matches!(
2691            &bundle.events[open_idx],
2692            IrEvent::OpenBracket {
2693                resolution: None,
2694                unresolved_ref: Some(_),
2695                ..
2696            }
2697        );
2698        if !is_unresolved {
2699            continue;
2700        }
2701        if !range_has_unmatched_delim_bytes(&bundle.events, open_idx + 1, close_idx) {
2702            continue;
2703        }
2704        if let IrEvent::OpenBracket { unresolved_ref, .. } = &mut bundle.events[open_idx] {
2705            *unresolved_ref = None;
2706        }
2707        if let IrEvent::CloseBracket { matched, .. } = &mut bundle.events[close_idx] {
2708            *matched = false;
2709        }
2710    }
2711
2712    // Top-level emphasis pass: handles delim runs that fall outside any
2713    // resolved bracket pair.
2714    let len = bundle.events.len();
2715    if bundle.bracket_pairs.is_empty() {
2716        // Fast path: no resolved brackets means no exclusion mask needed —
2717        // skip the resize-and-fill pass entirely. Common for prose
2718        // paragraphs without inline links.
2719        process_emphasis_in_range_filtered(&mut bundle.events, 0, len, None, config.dialect);
2720    } else {
2721        // Build exclusion bitmap: any delim run whose event index lies
2722        // inside a resolved bracket pair is excluded from the top-level
2723        // pass. Implements the §6.3 boundary rule: emphasis at the top
2724        // level must not pair across a link's brackets.
2725        bundle.excluded.resize(len, false);
2726        for &(open_idx, close_idx) in &bundle.bracket_pairs {
2727            for slot in bundle
2728                .excluded
2729                .iter_mut()
2730                .take(close_idx)
2731                .skip(open_idx + 1)
2732            {
2733                *slot = true;
2734            }
2735        }
2736        process_emphasis_in_range_filtered(
2737            &mut bundle.events,
2738            0,
2739            len,
2740            Some(&bundle.excluded),
2741            config.dialect,
2742        );
2743    }
2744
2745    InlinePlans {
2746        emphasis: build_emphasis_plan(&bundle.events),
2747        brackets: build_bracket_plan(&bundle.events),
2748        constructs: build_construct_plan(&bundle.events),
2749    }
2750}
2751
2752/// Returns true if any [`IrEvent::DelimRun`] in the event range
2753/// `[lo, hi)` has byte coverage from its `matches` vec that is less
2754/// than the run length — i.e. at least one byte of the run failed to
2755/// pair as emphasis. Used by the Pandoc unresolved-reference degrade
2756/// pass in [`build_full_plans`].
2757///
2758/// Delim runs whose flanking rules forbid both opening *and* closing
2759/// (e.g. intraword `_` inside `foo_bar`) are skipped: those bytes were
2760/// never a pairing candidate, so an "unmatched" count for them isn't
2761/// evidence of a failed emphasis attempt. Without this exclusion every
2762/// URL or identifier with an underscore inside an unresolved bracket
2763/// pair would spuriously degrade the bracket-shape to literal text.
2764fn range_has_unmatched_delim_bytes(events: &[IrEvent], lo: usize, hi: usize) -> bool {
2765    let hi = hi.min(events.len());
2766    for ev in &events[lo..hi] {
2767        if let IrEvent::DelimRun {
2768            start,
2769            end,
2770            matches,
2771            can_open,
2772            can_close,
2773            ..
2774        } = ev
2775        {
2776            if !can_open && !can_close {
2777                continue;
2778            }
2779            let total = end - start;
2780            let matched: usize = matches.iter().map(|m| m.len as usize).sum();
2781            if matched < total {
2782                return true;
2783            }
2784        }
2785    }
2786    false
2787}
2788
2789/// Thread-local pool of scratch buffers used by [`build_full_plans`].
2790///
2791/// `build_full_plans` checks out one bundle for the duration of the call
2792/// and returns it on drop so the next call (or a recursive nested call
2793/// from an inline emitter) reuses the allocations. The pool is
2794/// per-thread — the parser is single-threaded — and bounded so a
2795/// long-running editor session can't accumulate stale capacity.
2796struct ScratchEvents {
2797    inner: Option<ScratchBundle>,
2798}
2799
2800#[derive(Default)]
2801struct ScratchBundle {
2802    events: Vec<IrEvent>,
2803    bracket_pairs: Vec<(usize, usize)>,
2804    excluded: Vec<bool>,
2805}
2806
2807thread_local! {
2808    static IR_EVENT_POOL: std::cell::RefCell<Vec<ScratchBundle>> =
2809        const { std::cell::RefCell::new(Vec::new()) };
2810}
2811
2812impl ScratchEvents {
2813    fn checkout() -> Self {
2814        let bundle = IR_EVENT_POOL
2815            .with(|p| p.borrow_mut().pop())
2816            .unwrap_or_default();
2817        Self {
2818            inner: Some(bundle),
2819        }
2820    }
2821}
2822
2823impl Drop for ScratchEvents {
2824    fn drop(&mut self) {
2825        if let Some(mut bundle) = self.inner.take() {
2826            bundle.events.clear();
2827            bundle.bracket_pairs.clear();
2828            bundle.excluded.clear();
2829            // Cap pool depth at 8 (deepest realistic nested-link recursion)
2830            // and drop any bundle whose `events` grew past 8K (a single
2831            // pathological paragraph shouldn't pin a huge allocation
2832            // forever).
2833            if bundle.events.capacity() <= 8192 {
2834                IR_EVENT_POOL.with(|p| {
2835                    let mut pool = p.borrow_mut();
2836                    if pool.len() < 8 {
2837                        pool.push(bundle);
2838                    }
2839                });
2840            }
2841        }
2842    }
2843}
2844
2845/// Bundle of plans produced by [`build_full_plans`] and consumed by the
2846/// inline emission walk.
2847#[derive(Debug, Default, Clone)]
2848pub struct InlinePlans {
2849    pub emphasis: EmphasisPlan,
2850    pub brackets: BracketPlan,
2851    pub constructs: ConstructPlan,
2852}
2853
2854/// Convert the IR's delim-run match decisions into an [`EmphasisPlan`],
2855/// preserving the byte-keyed disposition shape the existing emission walk
2856/// consumes.
2857///
2858/// Each match on a [`DelimRun`](IrEvent::DelimRun) produces one entry in
2859/// the plan: the opener side records `Open` with the partner's source
2860/// byte and length; the closer side records `Close`. Bytes within a run
2861/// that are *not* covered by any match get a `Literal` entry, which the
2862/// emission walk uses to coalesce unmatched delimiter bytes with
2863/// surrounding plain text.
2864pub fn build_emphasis_plan(events: &[IrEvent]) -> EmphasisPlan {
2865    let mut by_pos: BTreeMap<usize, DelimChar> = BTreeMap::new();
2866    for ev in events {
2867        if let IrEvent::DelimRun {
2868            start,
2869            end,
2870            matches,
2871            ..
2872        } = ev
2873        {
2874            for m in matches {
2875                let pos = *start + m.offset_in_run as usize;
2876                let partner_run_start = match &events[m.partner_event as usize] {
2877                    IrEvent::DelimRun { start: ps, .. } => *ps,
2878                    _ => continue,
2879                };
2880                let partner_pos = partner_run_start + m.partner_offset as usize;
2881                if m.is_opener {
2882                    by_pos.insert(
2883                        pos,
2884                        DelimChar::Open {
2885                            len: m.len,
2886                            partner: partner_pos,
2887                            partner_len: m.len,
2888                            kind: m.kind,
2889                        },
2890                    );
2891                } else {
2892                    by_pos.insert(pos, DelimChar::Close);
2893                }
2894            }
2895            // Any remaining bytes (not covered by a match) are literal.
2896            for pos in *start..*end {
2897                by_pos.entry(pos).or_insert(DelimChar::Literal);
2898            }
2899        }
2900    }
2901    EmphasisPlan::from_dispositions(by_pos)
2902}
2903
2904#[cfg(test)]
2905mod tests {
2906    use super::*;
2907    use crate::options::Flavor;
2908    use crate::parser::inlines::inline_ir::DelimChar;
2909    use std::sync::Arc;
2910
2911    fn cm_opts() -> ParserOptions {
2912        let flavor = Flavor::CommonMark;
2913        ParserOptions {
2914            flavor,
2915            dialect: crate::options::Dialect::for_flavor(flavor),
2916            extensions: crate::options::Extensions::for_flavor(flavor),
2917            pandoc_compat: crate::options::PandocCompat::default(),
2918            crossref_prefixes: Vec::new(),
2919            refdef_labels: None,
2920        }
2921    }
2922
2923    fn refdefs<I: IntoIterator<Item = &'static str>>(labels: I) -> RefdefMap {
2924        Arc::new(labels.into_iter().map(|s| s.to_string()).collect())
2925    }
2926
2927    #[test]
2928    fn ir_event_range_covers_all_variants() {
2929        let txt = IrEvent::Text { start: 0, end: 5 };
2930        assert_eq!(txt.range(), (0, 5));
2931
2932        let close = IrEvent::CloseBracket {
2933            pos: 7,
2934            matched: false,
2935        };
2936        assert_eq!(close.range(), (7, 8));
2937
2938        let open = IrEvent::OpenBracket {
2939            start: 1,
2940            end: 3,
2941            is_image: true,
2942            active: true,
2943            resolution: None,
2944            unresolved_ref: None,
2945        };
2946        assert_eq!(open.range(), (1, 3));
2947    }
2948
2949    #[test]
2950    fn scan_records_text_and_delim_run() {
2951        let opts = cm_opts();
2952        let ir = build_ir("foo *bar*", 0, 9, &opts);
2953        // Expect: Text "foo ", DelimRun "*", Text "bar", DelimRun "*"
2954        assert!(matches!(ir[0], IrEvent::Text { start: 0, end: 4 }));
2955        assert!(matches!(
2956            ir[1],
2957            IrEvent::DelimRun {
2958                ch: b'*',
2959                start: 4,
2960                end: 5,
2961                ..
2962            }
2963        ));
2964        assert!(matches!(ir[2], IrEvent::Text { start: 5, end: 8 }));
2965        assert!(matches!(
2966            ir[3],
2967            IrEvent::DelimRun {
2968                ch: b'*',
2969                start: 8,
2970                end: 9,
2971                ..
2972            }
2973        ));
2974    }
2975
2976    #[test]
2977    fn scan_records_brackets() {
2978        let opts = cm_opts();
2979        let ir = build_ir("[foo]", 0, 5, &opts);
2980        assert!(matches!(
2981            ir[0],
2982            IrEvent::OpenBracket {
2983                start: 0,
2984                end: 1,
2985                is_image: false,
2986                ..
2987            }
2988        ));
2989        assert!(matches!(ir[1], IrEvent::Text { start: 1, end: 4 }));
2990        assert!(matches!(
2991            ir[2],
2992            IrEvent::CloseBracket {
2993                pos: 4,
2994                matched: false
2995            }
2996        ));
2997    }
2998
2999    #[test]
3000    fn scan_records_image_bracket() {
3001        let opts = cm_opts();
3002        let ir = build_ir("![alt]", 0, 6, &opts);
3003        assert!(matches!(
3004            ir[0],
3005            IrEvent::OpenBracket {
3006                start: 0,
3007                end: 2,
3008                is_image: true,
3009                ..
3010            }
3011        ));
3012    }
3013
3014    #[test]
3015    fn scan_handles_code_span_opacity() {
3016        let opts = cm_opts();
3017        let ir = build_ir("a `*x*` b", 0, 9, &opts);
3018        // Code span `*x*` should be a Construct, NOT delim runs.
3019        let has_delim_run = ir.iter().any(|e| matches!(e, IrEvent::DelimRun { .. }));
3020        assert!(
3021            !has_delim_run,
3022            "code span content should not produce delim runs"
3023        );
3024        assert!(ir.iter().any(|e| matches!(
3025            e,
3026            IrEvent::Construct {
3027                kind: ConstructKind::CodeSpan,
3028                ..
3029            }
3030        )));
3031    }
3032
3033    #[test]
3034    fn process_emphasis_simple_pair() {
3035        let opts = cm_opts();
3036        let mut ir = build_ir("*foo*", 0, 5, &opts);
3037        process_emphasis(&mut ir, opts.dialect);
3038        // First DelimRun (open) gets a match.
3039        let opener = ir
3040            .iter()
3041            .find(|e| matches!(e, IrEvent::DelimRun { start: 0, .. }))
3042            .unwrap();
3043        if let IrEvent::DelimRun { matches, .. } = opener {
3044            assert_eq!(matches.len(), 1);
3045            assert!(matches[0].is_opener);
3046            assert_eq!(matches[0].kind, EmphasisKind::Emph);
3047        }
3048    }
3049
3050    #[test]
3051    fn brackets_resolve_inline_link() {
3052        let opts = cm_opts();
3053        let mut ir = build_ir("[foo](/url)", 0, 11, &opts);
3054        process_brackets(&mut ir, "[foo](/url)", None, opts.dialect, false);
3055        let open = ir
3056            .iter()
3057            .find(|e| matches!(e, IrEvent::OpenBracket { start: 0, .. }))
3058            .unwrap();
3059        if let IrEvent::OpenBracket { resolution, .. } = open {
3060            let r = resolution.as_ref().expect("inline link resolved");
3061            assert!(matches!(r.kind, LinkKind::Inline { .. }));
3062            if let LinkKind::Inline { dest, .. } = &r.kind {
3063                assert_eq!(dest, "/url");
3064            }
3065        }
3066    }
3067
3068    #[test]
3069    fn brackets_shortcut_resolves_only_with_refdef() {
3070        let opts = cm_opts();
3071        let text = "[foo]";
3072        let map = refdefs(["foo"]);
3073        let mut ir = build_ir(text, 0, text.len(), &opts);
3074        process_brackets(&mut ir, text, Some(&map), opts.dialect, false);
3075        let open = ir
3076            .iter()
3077            .find(|e| matches!(e, IrEvent::OpenBracket { start: 0, .. }))
3078            .unwrap();
3079        if let IrEvent::OpenBracket { resolution, .. } = open {
3080            assert!(matches!(
3081                resolution.as_ref().unwrap().kind,
3082                LinkKind::ShortcutReference
3083            ));
3084        }
3085    }
3086
3087    #[test]
3088    fn brackets_shortcut_falls_through_without_refdef() {
3089        // CMark example #523 mechanic: `[bar* baz]` is not a refdef, so
3090        // it must NOT resolve as a link — the brackets stay literal so
3091        // the inner `*` becomes available to the outer emphasis scanner.
3092        let opts = cm_opts();
3093        let text = "[bar* baz]";
3094        let mut ir = build_ir(text, 0, text.len(), &opts);
3095        process_brackets(&mut ir, text, None, opts.dialect, false);
3096        let open = ir
3097            .iter()
3098            .find(|e| matches!(e, IrEvent::OpenBracket { start: 0, .. }))
3099            .unwrap();
3100        if let IrEvent::OpenBracket { resolution, .. } = open {
3101            assert!(resolution.is_none(), "no refdef → bracket stays literal");
3102        }
3103    }
3104
3105    /// Spec #473: `*[bar*](/url)`. The link `[bar*](/url)` resolves; the
3106    /// outer `*...*` MUST NOT pair across the link's bracket boundary,
3107    /// because the inner `*` belongs to the link text.
3108    #[test]
3109    fn full_plans_emphasis_does_not_cross_resolved_link_boundary() {
3110        let opts = cm_opts();
3111        let text = "*[bar*](/url)";
3112        let plans = build_full_plans(text, 0, text.len(), &opts);
3113        // The leading `*` (at byte 0) must NOT be matched as an emphasis
3114        // opener — there's no closer outside the link, and the inner `*`
3115        // (at byte 5) is inside the resolved link's text range so it must
3116        // not be paired with byte 0.
3117        assert!(
3118            matches!(plans.emphasis.lookup(0), Some(DelimChar::Literal) | None),
3119            "outer `*` at byte 0 must not pair across link boundary, got {:?}",
3120            plans.emphasis.lookup(0)
3121        );
3122        // The link `[bar*](/url)` must resolve (opener at byte 1).
3123        assert!(
3124            matches!(plans.brackets.lookup(1), Some(BracketDispo::Open { .. })),
3125            "link [bar*](/url) must resolve at byte 1"
3126        );
3127    }
3128
3129    fn pandoc_opts() -> ParserOptions {
3130        let flavor = Flavor::Pandoc;
3131        ParserOptions {
3132            flavor,
3133            dialect: crate::options::Dialect::for_flavor(flavor),
3134            extensions: crate::options::Extensions::for_flavor(flavor),
3135            pandoc_compat: crate::options::PandocCompat::default(),
3136            crossref_prefixes: Vec::new(),
3137            refdef_labels: None,
3138        }
3139    }
3140
3141    /// Bug #2 (a): unresolved Pandoc bracket-shape with unmatched delim
3142    /// inside its text degrades to literal `[`/`]`. Outer emphasis pair
3143    /// across the (now-literal) brackets must form.
3144    #[test]
3145    fn full_plans_unresolved_bracket_degrades_when_inner_delim_unmatched() {
3146        let opts = pandoc_opts();
3147        let text = "*foo [bar*] baz*";
3148        let plans = build_full_plans(text, 0, text.len(), &opts);
3149        assert!(
3150            matches!(plans.brackets.lookup(5), Some(BracketDispo::Literal) | None),
3151            "degraded `[` at byte 5 must be Literal/None, got {:?}",
3152            plans.brackets.lookup(5)
3153        );
3154        assert!(
3155            matches!(plans.emphasis.lookup(0), Some(DelimChar::Open { .. })),
3156            "outer `*` at byte 0 must open Emph after degrade, got {:?}",
3157            plans.emphasis.lookup(0)
3158        );
3159    }
3160
3161    /// Intraword `_` (e.g. inside a URL like
3162    /// `hyperparameter_optimization`) is not flanking — `can_open` and
3163    /// `can_close` are both false — so it can never pair as emphasis.
3164    /// The degrade pass must not treat such delim runs as "failed
3165    /// emphasis attempts" and demote the surrounding bracket-shape to
3166    /// literal text, otherwise every URL/identifier inside an
3167    /// unresolved reference round-trips through `\[` / `\]` escapes
3168    /// under `tex_math_single_backslash` and reparses as display math.
3169    #[test]
3170    fn full_plans_unresolved_bracket_keeps_wrapper_with_intraword_underscore() {
3171        let opts = pandoc_opts();
3172        let text = "[foo_bar more]";
3173        let plans = build_full_plans(text, 0, text.len(), &opts);
3174        assert!(
3175            matches!(
3176                plans.brackets.lookup(0),
3177                Some(BracketDispo::UnresolvedReference { .. })
3178            ),
3179            "wrapper must be preserved across intraword `_`, got {:?}",
3180            plans.brackets.lookup(0)
3181        );
3182    }
3183
3184    /// Bug #2 (b): unresolved Pandoc bracket whose interior emphasis
3185    /// pairs cleanly keeps the wrapper (linter/LSP hook).
3186    #[test]
3187    fn full_plans_unresolved_bracket_keeps_wrapper_when_inner_paired() {
3188        let opts = pandoc_opts();
3189        let text = "[foo *bar*]";
3190        let plans = build_full_plans(text, 0, text.len(), &opts);
3191        assert!(
3192            matches!(
3193                plans.brackets.lookup(0),
3194                Some(BracketDispo::UnresolvedReference { .. })
3195            ),
3196            "wrapper must be preserved when inner emph pairs, got {:?}",
3197            plans.brackets.lookup(0)
3198        );
3199    }
3200
3201    /// Spec #533: `[foo *bar [baz][ref]*][ref]` with `[ref]: /uri`.
3202    /// Inner `[baz][ref]` resolves as a link; §6.3 link-in-link rule
3203    /// deactivates the outer `[foo ...][ref]` so it falls through to
3204    /// literal brackets. Emphasis `*bar [baz][ref]*` wraps the inner link.
3205    #[test]
3206    fn full_plans_link_in_link_suppression_for_reference_links() {
3207        let opts = cm_opts();
3208        let text = "[foo *bar [baz][ref]*][ref]";
3209        let mut opts_with_refs = opts.clone();
3210        let labels: HashSet<String> = ["ref".to_string()].into_iter().collect();
3211        opts_with_refs.refdef_labels = Some(std::sync::Arc::new(labels));
3212        let plans = build_full_plans(text, 0, text.len(), &opts_with_refs);
3213
3214        // Inner `[baz][ref]` opener is at byte 10 — must resolve.
3215        assert!(
3216            matches!(plans.brackets.lookup(10), Some(BracketDispo::Open { .. })),
3217            "inner [baz][ref] must resolve at byte 10, got {:?}",
3218            plans.brackets.lookup(10)
3219        );
3220        // Outer `[foo ...][ref]` opener is at byte 0 — must NOT resolve
3221        // (link-in-link suppression).
3222        assert!(
3223            matches!(plans.brackets.lookup(0), Some(BracketDispo::Literal) | None),
3224            "outer [foo ...][ref] must fall through to literal at byte 0, got {:?}",
3225            plans.brackets.lookup(0)
3226        );
3227        // Trailing `[ref]` after the outer `]` is at byte 22 — it's a
3228        // standalone shortcut reference and must resolve.
3229        assert!(
3230            matches!(plans.brackets.lookup(22), Some(BracketDispo::Open { .. })),
3231            "trailing [ref] must resolve at byte 22, got {:?}",
3232            plans.brackets.lookup(22)
3233        );
3234        // Emphasis `*...*` at bytes 5 and 20 must pair — the scoped
3235        // emphasis pass over the (deactivated) outer bracket's inner
3236        // event range pairs these.
3237        assert!(
3238            matches!(plans.emphasis.lookup(5), Some(DelimChar::Open { .. })),
3239            "emphasis opener at byte 5 must pair, got {:?}",
3240            plans.emphasis.lookup(5)
3241        );
3242    }
3243}