Skip to main content

panache_parser/parser/inlines/
inline_ir.rs

1//! Inline IR for both CommonMark and Pandoc dialects.
2//!
3//! The inline parsing pipeline runs in three passes over an intermediate
4//! representation (IR):
5//!
6//! 1. **Scan** ([`build_ir`]): walk the source bytes once, producing a flat
7//!    [`Vec<IrEvent>`]. Opaque higher-precedence constructs (escapes, code
8//!    spans, autolinks, raw HTML, plus Pandoc math / native spans / inline
9//!    footnotes / footnote references / citations / bracketed spans) are
10//!    skipped past as a single [`IrEvent::Construct`] event whose source
11//!    range is preserved for losslessness. Delimiter runs (`*`/`_`),
12//!    bracket markers (`[`, `![`, `]`), soft line breaks, and plain text
13//!    spans become distinct events.
14//!
15//! 2. **Process brackets** ([`process_brackets`]) — CommonMark §6.3: the
16//!    bracket-stack algorithm walks `]` markers left-to-right. For each
17//!    `]`, the algorithm finds the nearest active opener and tries to
18//!    resolve the pair as a link or image: inline `[text](dest)`, full
19//!    reference `[text][label]`, collapsed `[text][]`, or shortcut
20//!    `[text]`. Under CommonMark, reference forms are validated against
21//!    the document refdef map and a successful match deactivates all
22//!    earlier active openers (§6.3 "links may not contain other links").
23//!    Under Pandoc, reference forms resolve shape-only (any non-empty
24//!    label) and the deactivation pass is skipped; outer-wins nested-link
25//!    semantics are enforced by the emission walk's `suppress_inner_links`
26//!    flag instead.
27//!
28//! 3. **Process emphasis** ([`process_emphasis_in_range`]): the classic
29//!    delimiter-stack algorithm runs over the [`IrEvent::DelimRun`]
30//!    events, pairing openers with closers and recording matches on the
31//!    runs. Runs first scoped per resolved bracket pair (innermost
32//!    first), then a top-level pass over the residual events. Each match
33//!    consumes 1 or 2 inner-edge bytes from each side; leftover bytes
34//!    fall through to literal text. Dialect gates (Pandoc flanking rules,
35//!    mod-3 rejection, asymmetric (1,2)/(2,1) rejection, opener-count >= 4
36//!    rejection, triple-emph nesting flip, cascade-then-rerun) branch on
37//!    the `dialect` parameter.
38//!
39//! The emission walk in [`super::core::parse_inline_range_impl`] consumes
40//! three byte-keyed plans built by [`build_full_plans`]: an
41//! [`EmphasisPlan`] for delim-run dispositions, a [`BracketPlan`] for
42//! resolved link/image bracket pairs, and a [`ConstructPlan`] for
43//! standalone Pandoc constructs (inline footnotes, native spans, footnote
44//! references, citations, bracketed spans). Matched delim runs become
45//! `EMPHASIS` / `STRONG` nodes; matched bracket pairs become `LINK` /
46//! `IMAGE` nodes via the dispatcher's `try_parse_*` recognizers (called
47//! to *parse* a matched range, not to *resolve* it). Unmatched delims and
48//! brackets fall through to plain text.
49
50use crate::options::ParserOptions;
51use crate::parser::inlines::refdef_map::{RefdefMap, normalize_label};
52use std::collections::{BTreeMap, HashSet};
53
54#[derive(Debug, Clone, Copy, PartialEq, Eq)]
55pub enum EmphasisKind {
56    Emph,
57    Strong,
58}
59
60/// Disposition of a single delimiter byte after emphasis resolution.
61#[derive(Debug, Clone, Copy)]
62pub enum DelimChar {
63    /// Start of an opening marker. The marker spans `len` bytes from this
64    /// position; the matching closer starts at `partner` and spans
65    /// `partner_len` bytes.
66    Open {
67        len: u8,
68        partner: usize,
69        partner_len: u8,
70        kind: EmphasisKind,
71    },
72    /// Start of a closing marker. The matching opener starts at `partner`.
73    /// Emission jumps past close markers via the matching `Open` entry, so
74    /// this variant is only consulted defensively.
75    Close,
76    /// Unmatched delimiter byte; emit as literal text.
77    Literal,
78}
79
80/// Byte-keyed disposition map for `*` / `_` delimiter chars produced by
81/// the IR's emphasis pass and consumed by the inline emission walk.
82#[derive(Debug, Default, Clone)]
83pub struct EmphasisPlan {
84    by_pos: BTreeMap<usize, DelimChar>,
85}
86
87impl EmphasisPlan {
88    pub fn lookup(&self, pos: usize) -> Option<DelimChar> {
89        self.by_pos.get(&pos).copied()
90    }
91
92    pub fn is_empty(&self) -> bool {
93        self.by_pos.is_empty()
94    }
95
96    /// Construct an `EmphasisPlan` from a byte-keyed disposition map.
97    pub fn from_dispositions(by_pos: BTreeMap<usize, DelimChar>) -> Self {
98        Self { by_pos }
99    }
100}
101
102use super::bracketed_spans::try_parse_bracketed_span;
103use super::citations::{try_parse_bare_citation, try_parse_bracketed_citation};
104use super::code_spans::try_parse_code_span;
105use super::escapes::{EscapeType, try_parse_escape};
106use super::inline_footnotes::{try_parse_footnote_reference, try_parse_inline_footnote};
107use super::inline_html::try_parse_inline_html;
108use super::links::{
109    LinkScanContext, try_parse_autolink, try_parse_inline_image, try_parse_inline_link,
110    try_parse_reference_image, try_parse_reference_link,
111};
112use super::math::{
113    try_parse_display_math, try_parse_double_backslash_display_math,
114    try_parse_double_backslash_inline_math, try_parse_gfm_inline_math, try_parse_inline_math,
115    try_parse_single_backslash_display_math, try_parse_single_backslash_inline_math,
116};
117use super::native_spans::try_parse_native_span;
118
119/// One event in the inline IR.
120///
121/// Events partition the source byte range covered by the IR exactly: their
122/// `range()` values are contiguous and non-overlapping, so concatenating
123/// them reproduces the original input. This is the losslessness invariant
124/// the emission pass relies on.
125#[derive(Debug, Clone)]
126pub enum IrEvent {
127    /// Plain text byte span. Emitted as a single `TEXT` token, possibly
128    /// merged with adjacent literal-disposition delim/bracket bytes.
129    Text { start: usize, end: usize },
130
131    /// An opaque higher-precedence construct (escape, code span, autolink,
132    /// raw HTML). The emission pass re-parses these from the source byte
133    /// range using the existing per-construct emitters; we don't store a
134    /// pre-built `GreenNode` because `rowan::GreenNodeBuilder` doesn't
135    /// support inserting subtrees directly. The byte range is what makes
136    /// emission well-defined — the construct kind is recovered by the
137    /// emitter dispatching on the leading byte.
138    Construct {
139        start: usize,
140        end: usize,
141        kind: ConstructKind,
142    },
143
144    /// A `*` or `_` delimiter run. The `matches` vec is filled in by
145    /// [`process_emphasis`]; before that pass it is empty.
146    DelimRun {
147        ch: u8,
148        start: usize,
149        end: usize,
150        can_open: bool,
151        can_close: bool,
152        /// Matched fragments produced by `process_emphasis`. Each entry
153        /// is one `(byte_offset_within_run, len, partner_event_idx,
154        /// partner_byte_offset, kind, is_opener)` tuple. Empty until the
155        /// pass runs; possibly multiple entries when a single run matches
156        /// at multiple positions (e.g. a 4-run that closes 2+2 pairs).
157        matches: Vec<DelimMatch>,
158    },
159
160    /// `[` or `![` bracket marker. Resolved by [`process_brackets`].
161    OpenBracket {
162        start: usize,
163        /// `start + 1` for `[`, `start + 2` for `![`.
164        end: usize,
165        is_image: bool,
166        /// True until a later resolution rule deactivates this opener.
167        active: bool,
168        /// Filled in when the matching `CloseBracket` resolves the pair
169        /// to a link / image.
170        resolution: Option<BracketResolution>,
171        /// Pandoc-only: extents of an unresolved bracket-shape pattern
172        /// (full reference / collapsed / shortcut whose label doesn't
173        /// match a refdef). Mutually exclusive with `resolution:
174        /// Some(...)`. When `Some`, emission wraps `[start, end)` in
175        /// an `UNRESOLVED_REFERENCE` node so downstream tools can
176        /// attach behavior to the bracket-shape pattern. Always
177        /// `None` under `Dialect::CommonMark`.
178        unresolved_ref: Option<UnresolvedRefShape>,
179    },
180
181    /// `]` bracket marker. Resolved by [`process_brackets`].
182    CloseBracket {
183        pos: usize,
184        /// True if this `]` was paired with an opener and the pair was
185        /// turned into a link / image.
186        matched: bool,
187    },
188
189    /// A soft line break (a `\n` or `\r\n` ending a paragraph-internal
190    /// line). Includes the line-ending bytes verbatim.
191    SoftBreak { start: usize, end: usize },
192
193    /// A hard line break (`  \n` / `\\\n` / `   \n` etc.). Includes any
194    /// trailing-space bytes plus the line ending.
195    HardBreak { start: usize, end: usize },
196}
197
198impl IrEvent {
199    /// The source byte range this event covers.
200    pub fn range(&self) -> (usize, usize) {
201        match self {
202            IrEvent::Text { start, end }
203            | IrEvent::Construct { start, end, .. }
204            | IrEvent::DelimRun { start, end, .. }
205            | IrEvent::OpenBracket { start, end, .. }
206            | IrEvent::SoftBreak { start, end }
207            | IrEvent::HardBreak { start, end } => (*start, *end),
208            IrEvent::CloseBracket { pos, .. } => (*pos, *pos + 1),
209        }
210    }
211}
212
213/// Categorical tag for a [`IrEvent::Construct`] event so emission knows
214/// which parser to call to rebuild the CST subtree.
215#[derive(Debug, Clone, Copy, PartialEq, Eq)]
216pub enum ConstructKind {
217    /// `\X` literal-character escape (CommonMark §2.4).
218    Escape,
219    /// `` `code` `` span (§6.1).
220    CodeSpan,
221    /// `<scheme://...>` or `<email@host>` (§6.5).
222    Autolink,
223    /// `<tag ...>` and friends (§6.6).
224    InlineHtml,
225    /// Pandoc opaque construct that doesn't have a dedicated kind yet
226    /// (currently: math spans). Pre-recognised in `build_ir` under
227    /// `Dialect::Pandoc` solely so the emphasis pass treats the entire
228    /// construct as opaque and delim runs inside don't cross its
229    /// boundary. Emission re-parses the construct via the dispatcher's
230    /// existing `try_parse_*` chain.
231    PandocOpaque,
232    /// Pandoc inline footnote `^[note text]`. Recognised in `build_ir`
233    /// under `Dialect::Pandoc` and consumed by the emission walk via
234    /// the IR's `ConstructPlan`. The dispatcher's legacy `^[` branch
235    /// is gated to CommonMark dialect only.
236    InlineFootnote,
237    /// Pandoc native span `<span ...>...</span>`. Recognised in
238    /// `build_ir` under `Dialect::Pandoc` and consumed by the emission
239    /// walk via the IR's `ConstructPlan`. The dispatcher's legacy
240    /// `<span>` branch is gated to CommonMark dialect only.
241    NativeSpan,
242    /// Pandoc footnote reference `[^id]`. Recognised in `build_ir`
243    /// under `Dialect::Pandoc` and consumed by the emission walk via
244    /// the IR's `ConstructPlan`. The dispatcher's legacy `[^id]`
245    /// branch is gated to CommonMark dialect only.
246    FootnoteReference,
247    /// Pandoc bracketed citation `[@key]`, `[see @key, p. 1]`,
248    /// `[@a; @b]`. Recognised in `build_ir` under `Dialect::Pandoc`
249    /// and consumed by the emission walk via the IR's `ConstructPlan`.
250    /// The dispatcher's legacy `[@cite]` branch is gated to CommonMark
251    /// dialect only.
252    BracketedCitation,
253    /// Pandoc bare citation `@key` or `-@key` (author-in-text /
254    /// suppress-author). Recognised in `build_ir` under
255    /// `Dialect::Pandoc` and consumed by the emission walk via the
256    /// IR's `ConstructPlan`. The dispatcher's legacy `@` and `-@`
257    /// branches are gated to CommonMark dialect only.
258    BareCitation,
259    /// Pandoc bracketed span `[content]{attrs}`. Recognised in
260    /// `build_ir` under `Dialect::Pandoc` and consumed by the emission
261    /// walk via the IR's `ConstructPlan`. The dispatcher's legacy
262    /// `[text]{attrs}` branch is gated to CommonMark dialect only.
263    BracketedSpan,
264}
265
266/// One matched fragment within a [`IrEvent::DelimRun`].
267#[derive(Debug, Clone, Copy)]
268pub struct DelimMatch {
269    /// Byte offset of this fragment relative to the run's `start`.
270    pub offset_in_run: u8,
271    /// Number of bytes in this fragment (1 or 2).
272    pub len: u8,
273    /// Whether this fragment is the opener (`true`) or closer of the pair.
274    pub is_opener: bool,
275    /// IR event index of the partner run.
276    pub partner_event: u32,
277    /// Byte offset within the partner run of the partner fragment.
278    pub partner_offset: u8,
279    /// Emphasis kind (Emph for `len == 1`, Strong for `len == 2`).
280    pub kind: EmphasisKind,
281}
282
283/// Pandoc-only: extents of an unresolved bracket-shape reference
284/// pattern. Recorded on `IrEvent::OpenBracket.unresolved_ref` when the
285/// no-resolution fall-through fires under `Dialect::Pandoc`.
286#[derive(Debug, Clone, Copy, PartialEq, Eq)]
287pub struct UnresolvedRefShape {
288    /// IR event index of the matching `CloseBracket`. Used by the
289    /// scoped-emphasis pass to treat the wrapper as a tree boundary.
290    pub close_event: u32,
291    /// One past the end of the inner text (the byte position of the
292    /// outer `]`). Combined with the opener's `end` field, this is the
293    /// inner text range that goes through normal inline parsing.
294    pub text_end: usize,
295    /// One past the end of the full bracket-shape pattern. For
296    /// shortcut form `[text]`: `close_pos + 1`. For collapsed
297    /// `[text][]`: `close_pos + 3`. For full `[text][label]`: the byte
298    /// after the closing `]` of `[label]`.
299    pub end: usize,
300}
301
302/// Successful bracket resolution: the `[`...`]` pair is a link or image.
303#[derive(Debug, Clone)]
304pub struct BracketResolution {
305    /// IR event index of the matching `CloseBracket`.
306    pub close_event: u32,
307    /// Source range of the link text (between `[`/`![` and `]`).
308    pub text_start: usize,
309    pub text_end: usize,
310    /// Source range of the link suffix (`(...)`, `[label]`, `[]`, or
311    /// empty for shortcut). When `kind == ShortcutReference`,
312    /// `suffix_start == suffix_end == close_pos + 1`.
313    pub suffix_start: usize,
314    pub suffix_end: usize,
315    pub kind: LinkKind,
316}
317
318/// What kind of link/image we resolved a bracket pair to.
319#[derive(Debug, Clone)]
320pub enum LinkKind {
321    /// `[text](dest)` or `[text](dest "title")`.
322    Inline { dest: String, title: Option<String> },
323    /// `[text][label]` — explicit reference.
324    FullReference { label: String },
325    /// `[text][]` — collapsed reference. Label is the link text.
326    CollapsedReference,
327    /// `[text]` — shortcut reference. Label is the link text.
328    ShortcutReference,
329}
330
331// ============================================================================
332// Pass 1: Scan
333// ============================================================================
334
335/// Scan `text[start..end]` once, producing a flat IR of events.
336///
337/// The scan is forward-only and never backtracks: each iteration either
338/// consumes a known construct (escape, code span, autolink, raw HTML),
339/// records a delim run / bracket marker / line break, or steps past a
340/// single UTF-8 boundary as plain text. Adjacent text bytes are coalesced
341/// into a single [`IrEvent::Text`] event by the run-flush step.
342pub fn build_ir(text: &str, start: usize, end: usize, config: &ParserOptions) -> Vec<IrEvent> {
343    let mut events = Vec::new();
344    build_ir_into(text, start, end, config, &mut events);
345    events
346}
347
348/// Like [`build_ir`] but writes into a caller-provided `Vec<IrEvent>`,
349/// clearing it first. Used by [`build_full_plans`] to amortise the
350/// per-call allocation through a thread-local scratch pool.
351pub(super) fn build_ir_into(
352    text: &str,
353    start: usize,
354    end: usize,
355    config: &ParserOptions,
356    events: &mut Vec<IrEvent>,
357) {
358    events.clear();
359    let bytes = text.as_bytes();
360    let exts = &config.extensions;
361    let is_commonmark = config.dialect == crate::options::Dialect::CommonMark;
362
363    let mut pos = start;
364    let mut text_run_start = start;
365    // Pandoc-only: extent of the current bracket-shape link/image's
366    // opaque range. While `pos < pandoc_bracket_extent`, autolinks /
367    // raw HTML / native spans are NOT recognised — pandoc-native
368    // treats `[link text]` as opaque to those constructs (CommonMark
369    // spec example #526 / #538). The lookahead at `[`/`![` sets this
370    // when a bracket-shape forms a valid link/image; once `pos`
371    // passes the extent, normal scanning resumes. CommonMark
372    // dialect's link-text-vs-autolink ordering is handled by the
373    // dispatcher's `try_parse_inline_link` rejecting outer matches
374    // when the link text contains a valid autolink (a different
375    // mechanism, see `LinkScanContext.skip_autolinks`).
376    let mut pandoc_bracket_extent: usize = 0;
377
378    // Pre-computed byte mask: `mask[b]` is `true` iff byte `b` could
379    // start any IR-recognised construct under the current dialect /
380    // extensions. Used to bulk-skip plain bytes between structural
381    // bytes — the per-byte branch chain below only runs at positions
382    // where a construct is actually possible. Non-ASCII bytes
383    // (>= 0x80) are never structural and are skipped together with
384    // ASCII plain text.
385    let mask = build_ir_byte_mask(config);
386
387    macro_rules! flush_text {
388        () => {
389            if pos > text_run_start {
390                events.push(IrEvent::Text {
391                    start: text_run_start,
392                    end: pos,
393                });
394            }
395        };
396    }
397
398    while pos < end {
399        // Fast-skip plain bytes. `text_run_start` is preserved across
400        // the skip so the next structural-event flush picks them up.
401        while pos < end && !mask[bytes[pos] as usize] {
402            pos += 1;
403        }
404        if pos >= end {
405            break;
406        }
407        let b = bytes[pos];
408
409        // Pandoc-only: at `[` or `![`, look ahead to see if this
410        // bracket-shape forms a valid link/image. If so, suppress
411        // autolink / raw HTML / native span recognition until `pos`
412        // passes the bracket-shape's end. Skipped if we're already
413        // inside an enclosing bracket-shape's opaque range.
414        if !is_commonmark
415            && pos >= pandoc_bracket_extent
416            && (b == b'[' || (b == b'!' && pos + 1 < end && bytes[pos + 1] == b'['))
417            && let Some(len) = try_pandoc_bracket_link_extent(text, pos, end, config)
418        {
419            pandoc_bracket_extent = pos + len;
420        }
421        let in_pandoc_bracket = !is_commonmark && pos < pandoc_bracket_extent;
422
423        // Backslash escape (§2.4) — including `\\\n` hard line break.
424        if b == b'\\'
425            && let Some((len, _ch, escape_type)) = try_parse_escape(&text[pos..])
426            && pos + len <= end
427        {
428            let enabled = match escape_type {
429                EscapeType::Literal => is_commonmark || exts.all_symbols_escapable,
430                EscapeType::HardLineBreak => exts.escaped_line_breaks,
431                EscapeType::NonbreakingSpace => exts.all_symbols_escapable,
432            };
433            if enabled {
434                flush_text!();
435                let kind = match escape_type {
436                    EscapeType::HardLineBreak => {
437                        events.push(IrEvent::HardBreak {
438                            start: pos,
439                            end: pos + len,
440                        });
441                        pos += len;
442                        text_run_start = pos;
443                        continue;
444                    }
445                    EscapeType::Literal | EscapeType::NonbreakingSpace => ConstructKind::Escape,
446                };
447                events.push(IrEvent::Construct {
448                    start: pos,
449                    end: pos + len,
450                    kind,
451                });
452                pos += len;
453                text_run_start = pos;
454                continue;
455            }
456        }
457
458        // Code span (§6.1) — opaque to emphasis and brackets.
459        if b == b'`'
460            && let Some((len, _, _, _)) = try_parse_code_span(&text[pos..])
461            && pos + len <= end
462        {
463            flush_text!();
464            events.push(IrEvent::Construct {
465                start: pos,
466                end: pos + len,
467                kind: ConstructKind::CodeSpan,
468            });
469            pos += len;
470            text_run_start = pos;
471            continue;
472        }
473
474        // Pandoc-only: math spans are opaque to emphasis. The legacy
475        // `parse_until_closer_with_nested_*` skip-list includes inline
476        // math; without recognising it here, delim runs inside `$math$`
477        // would be picked up by the emphasis pass and break losslessness
478        // (the dispatcher's math parser would later re-claim the bytes,
479        // duplicating content).
480        if !is_commonmark && let Some(len) = try_pandoc_math_opaque(text, pos, end, config) {
481            flush_text!();
482            events.push(IrEvent::Construct {
483                start: pos,
484                end: pos + len,
485                kind: ConstructKind::PandocOpaque,
486            });
487            pos += len;
488            text_run_start = pos;
489            continue;
490        }
491
492        // Pandoc-only: native span `<span ...>...</span>`. Must come
493        // before the generic autolink/raw-html branches so the open tag
494        // doesn't get claimed as inline HTML. Span content is opaque to
495        // the emphasis pass; emission consumes the event via the IR's
496        // `ConstructPlan`. Suppressed inside Pandoc bracket-shape
497        // link/image text.
498        if !is_commonmark
499            && !in_pandoc_bracket
500            && b == b'<'
501            && exts.native_spans
502            && let Some((len, _, _)) = try_parse_native_span(&text[pos..])
503            && pos + len <= end
504        {
505            flush_text!();
506            events.push(IrEvent::Construct {
507                start: pos,
508                end: pos + len,
509                kind: ConstructKind::NativeSpan,
510            });
511            pos += len;
512            text_run_start = pos;
513            continue;
514        }
515
516        // Autolink (§6.5) before raw HTML — autolinks are the more
517        // specific shape inside `<...>`. Both are suppressed inside
518        // Pandoc bracket-shape link/image text (pandoc-native treats
519        // link text as opaque to autolinks and raw HTML).
520        if b == b'<' && !in_pandoc_bracket {
521            if exts.autolinks
522                && let Some((len, _)) = try_parse_autolink(&text[pos..], is_commonmark)
523                && pos + len <= end
524            {
525                flush_text!();
526                events.push(IrEvent::Construct {
527                    start: pos,
528                    end: pos + len,
529                    kind: ConstructKind::Autolink,
530                });
531                pos += len;
532                text_run_start = pos;
533                continue;
534            }
535            if exts.raw_html
536                && let Some(len) = try_parse_inline_html(&text[pos..], config.dialect)
537                && pos + len <= end
538            {
539                flush_text!();
540                events.push(IrEvent::Construct {
541                    start: pos,
542                    end: pos + len,
543                    kind: ConstructKind::InlineHtml,
544                });
545                pos += len;
546                text_run_start = pos;
547                continue;
548            }
549        }
550
551        // Pandoc-only: inline footnote `^[note]`. Recognized at scan
552        // time so the emphasis pass treats it as opaque (delim runs
553        // inside the footnote can't pair with delim runs outside).
554        if !is_commonmark
555            && b == b'^'
556            && exts.inline_footnotes
557            && let Some((len, _)) = try_parse_inline_footnote(&text[pos..])
558            && pos + len <= end
559        {
560            flush_text!();
561            events.push(IrEvent::Construct {
562                start: pos,
563                end: pos + len,
564                kind: ConstructKind::InlineFootnote,
565            });
566            pos += len;
567            text_run_start = pos;
568            continue;
569        }
570
571        // Pandoc-only: footnote reference `[^id]`. Recognised at scan
572        // time so the emphasis pass treats it as opaque (delim runs
573        // inside the label can't pair with delim runs outside) and the
574        // emission walk dispatches it directly via the IR's
575        // `ConstructPlan`. Must come before the generic bracket-opaque
576        // scan so the dedicated kind wins.
577        if !is_commonmark
578            && b == b'['
579            && pos + 1 < end
580            && bytes[pos + 1] == b'^'
581            && exts.footnotes
582            && let Some((len, _)) = try_parse_footnote_reference(&text[pos..])
583            && pos + len <= end
584        {
585            flush_text!();
586            events.push(IrEvent::Construct {
587                start: pos,
588                end: pos + len,
589                kind: ConstructKind::FootnoteReference,
590            });
591            pos += len;
592            text_run_start = pos;
593            continue;
594        }
595
596        // Pandoc-only: bracketed citation `[@cite]`. Recognised at
597        // scan time so the emphasis pass treats it as opaque (delim
598        // runs inside the citation can't pair with delim runs outside)
599        // and the emission walk dispatches it directly via the IR's
600        // `ConstructPlan`. Must come before the generic bracket-opaque
601        // scan so the dedicated kind wins.
602        if !is_commonmark
603            && b == b'['
604            && exts.citations
605            && let Some((len, _)) = try_parse_bracketed_citation(&text[pos..])
606            && pos + len <= end
607        {
608            flush_text!();
609            events.push(IrEvent::Construct {
610                start: pos,
611                end: pos + len,
612                kind: ConstructKind::BracketedCitation,
613            });
614            pos += len;
615            text_run_start = pos;
616            continue;
617        }
618
619        // Pandoc-only: bare citation `@key` or `-@key`. Recognised at
620        // scan time so the emission walk dispatches it directly via
621        // the IR's `ConstructPlan`. Bare citations don't contain
622        // emphasis-eligible content, so opacity is moot here — IR
623        // participation is only for dispatch consolidation.
624        if !is_commonmark
625            && (b == b'@' || (b == b'-' && pos + 1 < end && bytes[pos + 1] == b'@'))
626            && (exts.citations || exts.quarto_crossrefs)
627            && let Some((len, _, _)) = try_parse_bare_citation(&text[pos..])
628            && pos + len <= end
629        {
630            flush_text!();
631            events.push(IrEvent::Construct {
632                start: pos,
633                end: pos + len,
634                kind: ConstructKind::BareCitation,
635            });
636            pos += len;
637            text_run_start = pos;
638            continue;
639        }
640
641        // Pandoc-only: bracketed span `[content]{attrs}`. Recognised
642        // at scan time so the emphasis pass treats it as opaque (delim
643        // runs inside the span content can't pair with delim runs
644        // outside) and the emission walk dispatches it directly via
645        // the IR's `ConstructPlan`. Must come before the generic
646        // bracket-opaque scan so the dedicated kind wins.
647        // `try_parse_bracketed_span` requires `]` to be immediately
648        // followed by `{`, so this never shadows inline links
649        // (`[text](url)`) or reference links (`[label][refdef]`) —
650        // those don't have the `{attrs}` suffix.
651        if !is_commonmark
652            && b == b'['
653            && exts.bracketed_spans
654            && let Some((len, _, _)) = try_parse_bracketed_span(&text[pos..])
655            && pos + len <= end
656        {
657            flush_text!();
658            events.push(IrEvent::Construct {
659                start: pos,
660                end: pos + len,
661                kind: ConstructKind::BracketedSpan,
662            });
663            pos += len;
664            text_run_start = pos;
665            continue;
666        }
667
668        // `![` opens an image bracket. Recognised whenever any
669        // image-producing extension is on — `inline_images` for the
670        // `![alt](url)` form, or `reference_links` for the
671        // `![alt][label]` reference-image form (e.g. MultiMarkdown
672        // disables `inline_images` but uses reference images).
673        if b == b'!'
674            && pos + 1 < end
675            && bytes[pos + 1] == b'['
676            && (exts.inline_images || exts.reference_links)
677        {
678            flush_text!();
679            events.push(IrEvent::OpenBracket {
680                start: pos,
681                end: pos + 2,
682                is_image: true,
683                active: true,
684                resolution: None,
685                unresolved_ref: None,
686            });
687            pos += 2;
688            text_run_start = pos;
689            continue;
690        }
691
692        // `[` opens a link bracket. Recognised whenever any
693        // link-producing extension is on — `inline_links` for
694        // `[text](url)`, or `reference_links` for `[text][label]` /
695        // `[text]` shortcut form.
696        if b == b'[' && (exts.inline_links || exts.reference_links) {
697            flush_text!();
698            events.push(IrEvent::OpenBracket {
699                start: pos,
700                end: pos + 1,
701                is_image: false,
702                active: true,
703                resolution: None,
704                unresolved_ref: None,
705            });
706            pos += 1;
707            text_run_start = pos;
708            continue;
709        }
710
711        // `]` closes a link/image bracket.
712        if b == b']' {
713            flush_text!();
714            events.push(IrEvent::CloseBracket {
715                pos,
716                matched: false,
717            });
718            pos += 1;
719            text_run_start = pos;
720            continue;
721        }
722
723        // `*` or `_` delimiter run.
724        if b == b'*' || b == b'_' {
725            flush_text!();
726            let mut run_end = pos;
727            while run_end < end && bytes[run_end] == b {
728                run_end += 1;
729            }
730            let count = run_end - pos;
731            let (can_open, can_close) = compute_flanking(text, pos, count, b, config.dialect);
732            events.push(IrEvent::DelimRun {
733                ch: b,
734                start: pos,
735                end: run_end,
736                can_open,
737                can_close,
738                matches: Vec::new(),
739            });
740            pos = run_end;
741            text_run_start = pos;
742            continue;
743        }
744
745        // Hard line break: 2+ trailing spaces before newline. We detect
746        // this when we're sitting on a `\n` (or `\r\n`) and the preceding
747        // bytes within the current text run are spaces.
748        if b == b'\n' || (b == b'\r' && pos + 1 < end && bytes[pos + 1] == b'\n') {
749            // Count trailing spaces in the text accumulated so far.
750            let nl_len = if b == b'\r' { 2 } else { 1 };
751            let mut trailing_spaces = 0;
752            let mut s = pos;
753            while s > text_run_start && bytes[s - 1] == b' ' {
754                trailing_spaces += 1;
755                s -= 1;
756            }
757            if trailing_spaces >= 2 {
758                // Flush text *before* the trailing spaces.
759                if s > text_run_start {
760                    events.push(IrEvent::Text {
761                        start: text_run_start,
762                        end: s,
763                    });
764                }
765                events.push(IrEvent::HardBreak {
766                    start: s,
767                    end: pos + nl_len,
768                });
769                pos += nl_len;
770                text_run_start = pos;
771                continue;
772            }
773
774            // Soft line break: flush preceding text, emit the line ending
775            // as its own event so the emitter can render `NEWLINE` tokens
776            // verbatim.
777            flush_text!();
778            events.push(IrEvent::SoftBreak {
779                start: pos,
780                end: pos + nl_len,
781            });
782            pos += nl_len;
783            text_run_start = pos;
784            continue;
785        }
786
787        // Plain byte — advance one UTF-8 char.
788        let ch_len = text[pos..]
789            .chars()
790            .next()
791            .map_or(1, std::primitive::char::len_utf8);
792        pos += ch_len.max(1);
793    }
794
795    flush_text!();
796}
797
798/// Build a 256-entry mask: `mask[b]` is `true` iff byte `b` could start
799/// any IR-recognised construct under the current dialect / extensions.
800///
801/// This is the build-IR-specific superset of "is this byte interesting".
802/// Plain bytes between structural bytes are bulk-skipped via this mask
803/// in the [`build_ir`] hot loop; missing a byte here is a correctness
804/// bug (we'd skip past a real construct), but having extras only costs
805/// us a wasted branch round-trip.
806fn build_ir_byte_mask(config: &ParserOptions) -> [bool; 256] {
807    let mut mask = [false; 256];
808    let exts = &config.extensions;
809    let is_commonmark = config.dialect == crate::options::Dialect::CommonMark;
810
811    // Always structural for IR scanning:
812    //   `\n` / `\r` — soft / hard breaks
813    //   `\\`        — escape, hard line break, backslash math
814    //   `` ` ``     — code span (IR construct)
815    //   `*` / `_`   — emphasis delim runs (IR core)
816    mask[b'\n' as usize] = true;
817    mask[b'\r' as usize] = true;
818    mask[b'\\' as usize] = true;
819    mask[b'`' as usize] = true;
820    mask[b'*' as usize] = true;
821    mask[b'_' as usize] = true;
822
823    // Brackets: scanned whenever any bracket-shaped construct is
824    // reachable. `]` is structural unconditionally if `[` is — the IR
825    // emits a CloseBracket event regardless of which opener variant
826    // matches. `!` is gated on image-producing extensions; the leading
827    // `!` of `![alt]` is the only image entry point.
828    if exts.inline_links
829        || exts.reference_links
830        || exts.inline_images
831        || exts.bracketed_spans
832        || exts.footnotes
833        || exts.citations
834    {
835        mask[b'[' as usize] = true;
836        mask[b']' as usize] = true;
837    }
838    if exts.inline_images || exts.reference_links {
839        mask[b'!' as usize] = true;
840    }
841
842    // `<` covers autolinks, raw HTML, and Pandoc native spans.
843    if exts.autolinks || exts.raw_html || (!is_commonmark && exts.native_spans) {
844        mask[b'<' as usize] = true;
845    }
846
847    // `^` covers Pandoc inline footnotes (`^[...]` recognised in IR
848    // under Pandoc dialect). CM dialect inline footnotes go through
849    // the dispatcher, not the IR.
850    if !is_commonmark && exts.inline_footnotes {
851        mask[b'^' as usize] = true;
852    }
853
854    // `@` covers Pandoc bare citation `@key` and `[@cite]`. The leading
855    // `[` of `[@cite]` is already in the mask via the bracket gate;
856    // gating `@` here also covers the bare-citation form.
857    if !is_commonmark && (exts.citations || exts.quarto_crossrefs) {
858        mask[b'@' as usize] = true;
859        // `-` only matters as the first byte of `-@cite`. Tracking it
860        // here avoids missing the suppress-author bare citation form.
861        mask[b'-' as usize] = true;
862    }
863
864    // `$` covers Pandoc dollar / GFM math. CM doesn't recognise math
865    // in `build_ir`.
866    if !is_commonmark
867        && (exts.tex_math_dollars
868            || exts.tex_math_gfm
869            || exts.tex_math_single_backslash
870            || exts.tex_math_double_backslash)
871    {
872        mask[b'$' as usize] = true;
873    }
874
875    mask
876}
877
878// ============================================================================
879// Flanking (CommonMark §6.2)
880// ============================================================================
881
882fn compute_flanking(
883    text: &str,
884    pos: usize,
885    count: usize,
886    ch: u8,
887    dialect: crate::options::Dialect,
888) -> (bool, bool) {
889    if dialect == crate::options::Dialect::Pandoc {
890        // Pandoc-markdown's recursive-descent emphasis parser does NOT
891        // apply CommonMark §6.2 flanking rules. Instead it gates on:
892        //   - opener: must not be followed by whitespace (Pandoc
893        //     `try_parse_emphasis` line 247 in legacy core.rs).
894        //   - closer: no flanking gate at all (Pandoc-markdown's
895        //     `ender` parser only counts characters; see Markdown.hs
896        //     in pandoc/src/Text/Pandoc/Readers/Markdown.hs).
897        //   - underscore intraword hard rule: `_` adjacent to an
898        //     alphanumeric on either side cannot open / close
899        //     (Pandoc's `intraword_underscores` extension default).
900        let prev_char = (pos > 0).then(|| text[..pos].chars().last()).flatten();
901        let next_char = text.get(pos + count..).and_then(|s| s.chars().next());
902        let followed_by_ws = next_char.is_none_or(|c| c.is_whitespace());
903
904        let mut can_open = !followed_by_ws;
905        // Pandoc-markdown's `ender` (in pandoc/Readers/Markdown.hs)
906        // has no flanking restriction on closers — just a count match.
907        // Set can_close unconditionally; the per-pair match logic in
908        // `process_emphasis_in_range_filtered` constrains pairing via
909        // the equal-count rule.
910        let mut can_close = true;
911
912        if ch == b'_' {
913            let prev_is_alnum = prev_char.is_some_and(|c| c.is_alphanumeric());
914            let next_is_alnum = next_char.is_some_and(|c| c.is_alphanumeric());
915            if prev_is_alnum {
916                can_open = false;
917            }
918            if next_is_alnum {
919                can_close = false;
920            }
921        }
922
923        return (can_open, can_close);
924    }
925
926    // CommonMark §6.2 flanking.
927    let lf = is_left_flanking(text, pos, count);
928    let rf = is_right_flanking(text, pos, count);
929    if ch == b'*' {
930        (lf, rf)
931    } else {
932        let prev_char = (pos > 0).then(|| text[..pos].chars().last()).flatten();
933        let next_char = text.get(pos + count..).and_then(|s| s.chars().next());
934        let preceded_by_punct = prev_char.is_some_and(is_unicode_punct_or_symbol);
935        let followed_by_punct = next_char.is_some_and(is_unicode_punct_or_symbol);
936        let can_open = lf && (!rf || preceded_by_punct);
937        let can_close = rf && (!lf || followed_by_punct);
938        (can_open, can_close)
939    }
940}
941
942/// Pandoc-only: identify a math span starting at `pos` and return its
943/// byte length. Tries `$math$` and `$$display$$` (gated on
944/// `tex_math_dollars`), GFM `$math$` (gated on `tex_math_gfm`), and the
945/// `\(math\)` / `\[math\]` / `\\(math\\)` / `\\[math\\]` backslash
946/// forms (gated on `tex_math_single_backslash` / `_double_backslash`).
947/// Math content is opaque to emphasis: `$a * b$` must not produce an
948/// emphasis closer at the inner `*`.
949fn try_pandoc_math_opaque(
950    text: &str,
951    pos: usize,
952    end: usize,
953    config: &ParserOptions,
954) -> Option<usize> {
955    let bytes = text.as_bytes();
956    let exts = &config.extensions;
957    let b = bytes[pos];
958
959    if exts.tex_math_dollars && b == b'$' {
960        if let Some((len, _)) = try_parse_display_math(&text[pos..])
961            && pos + len <= end
962        {
963            return Some(len);
964        }
965        if let Some((len, _)) = try_parse_inline_math(&text[pos..])
966            && pos + len <= end
967        {
968            return Some(len);
969        }
970    }
971    if exts.tex_math_gfm
972        && b == b'$'
973        && let Some((len, _)) = try_parse_gfm_inline_math(&text[pos..])
974        && pos + len <= end
975    {
976        return Some(len);
977    }
978    if exts.tex_math_double_backslash && b == b'\\' {
979        if let Some((len, _)) = try_parse_double_backslash_display_math(&text[pos..])
980            && pos + len <= end
981        {
982            return Some(len);
983        }
984        if let Some((len, _)) = try_parse_double_backslash_inline_math(&text[pos..])
985            && pos + len <= end
986        {
987            return Some(len);
988        }
989    }
990    if exts.tex_math_single_backslash && b == b'\\' {
991        if let Some((len, _)) = try_parse_single_backslash_display_math(&text[pos..])
992            && pos + len <= end
993        {
994            return Some(len);
995        }
996        if let Some((len, _)) = try_parse_single_backslash_inline_math(&text[pos..])
997            && pos + len <= end
998        {
999            return Some(len);
1000        }
1001    }
1002    None
1003}
1004
1005/// Pandoc-only: identify a bracket-shaped opaque construct starting at
1006/// `pos` and return its byte length. Tries the dispatcher's precedence
1007/// order:
1008///   1. `![alt](dest)` inline image
1009///   2. `![alt][ref]` / `![alt]` reference image (shape-only opacity)
1010///   3. `[^id]` footnote reference
1011///   4. `[text](dest)` inline link
1012///   5. `[text][ref]` / `[text]` reference link (shape-only opacity)
1013///   6. `[@cite]` bracketed citation
1014///   7. `[text]{attrs}` bracketed span
1015///
1016/// Returns `None` if the bytes at `pos` don't open any recognised Pandoc
1017/// bracket-shaped construct. In that case the scanner falls through to
1018/// the generic `OpenBracket`/`CloseBracket` emission and the dispatcher
1019/// emits the bracket bytes as literal text (or as plain emphasis if the
1020/// pattern matches an opener).
1021/// Lookahead helper: at a `[` or `![` byte under Pandoc dialect, return
1022/// the total byte length of the bracket-shape link/image if it forms a
1023/// valid one, else `None`. Used by `build_ir` to suppress autolink /
1024/// raw HTML / native span recognition inside Pandoc link text —
1025/// pandoc-native treats link text as opaque to those constructs
1026/// (CommonMark spec example #526 / #538 differs). Mirrors the
1027/// dispatcher's `try_parse_*` precedence so the lookahead, the IR's
1028/// `process_brackets` resolution, and the dispatcher's emission agree
1029/// on the bracket-shape's byte boundaries.
1030fn try_pandoc_bracket_link_extent(
1031    text: &str,
1032    pos: usize,
1033    end: usize,
1034    config: &ParserOptions,
1035) -> Option<usize> {
1036    let bytes = text.as_bytes();
1037    let exts = &config.extensions;
1038    let ctx = LinkScanContext::from_options(config);
1039    let allow_shortcut = exts.shortcut_reference_links;
1040
1041    // `![...]` images.
1042    if bytes[pos] == b'!' {
1043        if pos + 1 >= end || bytes[pos + 1] != b'[' {
1044            return None;
1045        }
1046        if exts.inline_images
1047            && let Some((len, _, _, _)) = try_parse_inline_image(&text[pos..], ctx)
1048            && pos + len <= end
1049        {
1050            return Some(len);
1051        }
1052        if exts.reference_links
1053            && let Some((len, _, _, _)) = try_parse_reference_image(&text[pos..], allow_shortcut)
1054            && pos + len <= end
1055        {
1056            return Some(len);
1057        }
1058        return None;
1059    }
1060
1061    // `[...]` openers — try in dispatcher order. Footnote refs
1062    // (`[^id]`), bracketed citations (`[@cite]`), and bracketed spans
1063    // (`[text]{attrs}`) are recognised by their own dedicated branches
1064    // in `build_ir` and don't need this lookahead.
1065    if exts.inline_links
1066        && let Some((len, _, _, _)) = try_parse_inline_link(&text[pos..], false, ctx)
1067        && pos + len <= end
1068    {
1069        return Some(len);
1070    }
1071    if exts.reference_links
1072        && let Some((len, _, _, _)) =
1073            try_parse_reference_link(&text[pos..], allow_shortcut, exts.inline_links, ctx)
1074        && pos + len <= end
1075    {
1076        return Some(len);
1077    }
1078
1079    None
1080}
1081
1082fn is_unicode_punct_or_symbol(c: char) -> bool {
1083    if c.is_ascii() {
1084        c.is_ascii_punctuation()
1085    } else {
1086        !c.is_alphanumeric() && !c.is_whitespace()
1087    }
1088}
1089
1090fn is_left_flanking(text: &str, run_start: usize, run_len: usize) -> bool {
1091    let after = run_start + run_len;
1092    let next_char = text.get(after..).and_then(|s| s.chars().next());
1093    let prev_char = (run_start > 0)
1094        .then(|| text[..run_start].chars().last())
1095        .flatten();
1096
1097    let followed_by_ws = next_char.is_none_or(|c| c.is_whitespace());
1098    if followed_by_ws {
1099        return false;
1100    }
1101    let followed_by_punct = next_char.is_some_and(is_unicode_punct_or_symbol);
1102    if !followed_by_punct {
1103        return true;
1104    }
1105    prev_char.is_none_or(|c| c.is_whitespace() || is_unicode_punct_or_symbol(c))
1106}
1107
1108fn is_right_flanking(text: &str, run_start: usize, run_len: usize) -> bool {
1109    let after = run_start + run_len;
1110    let next_char = text.get(after..).and_then(|s| s.chars().next());
1111    let prev_char = (run_start > 0)
1112        .then(|| text[..run_start].chars().last())
1113        .flatten();
1114
1115    let preceded_by_ws = prev_char.is_none_or(|c| c.is_whitespace());
1116    if preceded_by_ws {
1117        return false;
1118    }
1119    let preceded_by_punct = prev_char.is_some_and(is_unicode_punct_or_symbol);
1120    if !preceded_by_punct {
1121        return true;
1122    }
1123    next_char.is_none_or(|c| c.is_whitespace() || is_unicode_punct_or_symbol(c))
1124}
1125
1126// ============================================================================
1127// Pass 2: Process emphasis (CommonMark §6.2)
1128// ============================================================================
1129
1130/// Run the CommonMark §6.3 `process_emphasis` algorithm over the IR's
1131/// delim runs. Mutates the IR in place: matched runs gain entries in their
1132/// `matches` vec, unmatched bytes stay implicit (the emission pass treats
1133/// any byte not covered by a match as literal text).
1134///
1135/// The algorithm tracks a per-bucket `openers_bottom` exclusive lower
1136/// bound to keep walk-back bounded; consume rules and the §6.2 mod-3
1137/// rejection match the reference implementation.
1138pub fn process_emphasis(events: &mut [IrEvent], dialect: crate::options::Dialect) {
1139    process_emphasis_in_range(events, 0, events.len(), dialect);
1140}
1141
1142/// Range-scoped variant of [`process_emphasis`].
1143///
1144/// Only delim runs whose IR event index lies in `[lo, hi)` are considered.
1145/// Used by [`build_full_plans`] to run emphasis pairing inside each
1146/// resolved bracket pair *before* the global top-level pass, so emphasis
1147/// can never form across a link's bracket boundary (CommonMark §6.3
1148/// requires bracket resolution to happen first when at a `]`, with
1149/// emphasis processed on the link's inner range).
1150///
1151/// The function additionally skips delim runs that already carry a
1152/// recorded match in their `matches` vec — this lets the second
1153/// (top-level) pass reuse the same algorithm without re-pairing bytes
1154/// already consumed by inner-range passes.
1155pub fn process_emphasis_in_range(
1156    events: &mut [IrEvent],
1157    lo: usize,
1158    hi: usize,
1159    dialect: crate::options::Dialect,
1160) {
1161    process_emphasis_in_range_filtered(events, lo, hi, None, dialect);
1162}
1163
1164/// Internal variant of [`process_emphasis_in_range`] with an optional
1165/// exclusion bitmap. Event indices for which `excluded[i] == true` are
1166/// treated as if their delim run were already fully consumed — used by
1167/// [`build_full_plans`] to keep the top-level emphasis pass from pairing
1168/// across a resolved bracket pair's boundary (the inner delim runs of
1169/// such a pair belong to the link's inner range and were already paired
1170/// by the scoped pass).
1171fn process_emphasis_in_range_filtered(
1172    events: &mut [IrEvent],
1173    lo: usize,
1174    hi: usize,
1175    excluded: Option<&[bool]>,
1176    dialect: crate::options::Dialect,
1177) {
1178    let is_commonmark = dialect == crate::options::Dialect::CommonMark;
1179    if is_commonmark {
1180        run_emphasis_pass(events, lo, hi, excluded, dialect, &[], false);
1181        return;
1182    }
1183    // Pandoc dialect: cascade-then-rerun. Run the standard pass, then
1184    // invalidate Emph/Strong pairs whose inner range contains an
1185    // unmatched same-char run with both can_open && can_close (Pandoc's
1186    // recursive descent would have failed those outer pairs because the
1187    // inner content has a stray, ambiguous delimiter the recursive
1188    // parser cannot pair). The invalidated pairs go into a "rejected
1189    // list" that the next iteration of the standard pass consults to
1190    // pick a different opener for the same closer (or reject the
1191    // closer altogether). Iterate to a fixed point.
1192    //
1193    // The rerun (iter 2+) runs in `strict` mode: a candidate pair is
1194    // rejected if its inner range contains an unmatched same-char run
1195    // with count > pair.count. This mirrors pandoc-markdown's
1196    // recursive-descent semantics where, e.g. inside a failed outer
1197    // `**...**` Strong, the inner `one c` parser's `option2`
1198    // (`string [c,c] >> two c mempty`) greedily consumes a stray `**`
1199    // and prevents subsequent `*` runs from pairing as Emph. Without
1200    // this gate, `**foo *bar** baz*` would produce Emph[bar** baz]
1201    // after the outer Strong invalidation, but pandoc treats it as
1202    // all-literal because the inner `**` blocks the Emph match.
1203    let mut rejected: Vec<(usize, usize)> = Vec::new();
1204    let max_iters = events.len().saturating_add(2);
1205    let mut iter = 0;
1206    loop {
1207        let strict = iter > 0;
1208        run_emphasis_pass(events, lo, hi, excluded, dialect, &rejected, strict);
1209        let invalidations = pandoc_cascade_invalidate(events, excluded);
1210        if invalidations.is_empty() {
1211            break;
1212        }
1213        rejected.extend(invalidations);
1214        iter += 1;
1215        if iter >= max_iters {
1216            break;
1217        }
1218    }
1219    // Recovery for `***A **B** C***` patterns: synthesise the inner
1220    // Strong match the standard delim-stack algorithm can't reach.
1221    pandoc_inner_strong_recovery(events);
1222}
1223
1224/// One pass of the CommonMark §6.2 emphasis pairing algorithm over the
1225/// IR's [`DelimRun`](IrEvent::DelimRun) events in `[lo, hi)`. Pandoc
1226/// dialect gates apply when `dialect == Dialect::Pandoc`. The
1227/// `rejected_pairs` list (Pandoc only) excludes specific
1228/// (opener_event_idx, closer_event_idx) pairs from matching — used by
1229/// the cascade-then-rerun loop to prevent invalidated pairs from
1230/// re-forming on the next iteration.
1231fn run_emphasis_pass(
1232    events: &mut [IrEvent],
1233    lo: usize,
1234    hi: usize,
1235    excluded: Option<&[bool]>,
1236    dialect: crate::options::Dialect,
1237    rejected_pairs: &[(usize, usize)],
1238    strict_pandoc: bool,
1239) {
1240    let is_commonmark = dialect == crate::options::Dialect::CommonMark;
1241    let hi = hi.min(events.len());
1242    if lo >= hi {
1243        return;
1244    }
1245    // Indices of DelimRun events within [lo, hi), in order, that have
1246    // not already been fully consumed by an earlier scoped pass and that
1247    // are not in the optional exclusion bitmap.
1248    let mut delim_idxs: Vec<usize> = events[lo..hi]
1249        .iter()
1250        .enumerate()
1251        .filter_map(|(i, e)| {
1252            let abs = lo + i;
1253            match e {
1254                IrEvent::DelimRun { matches, .. }
1255                    if matches.is_empty()
1256                        && excluded.is_none_or(|ex| ex.get(abs).copied() != Some(true)) =>
1257                {
1258                    Some(abs)
1259                }
1260                _ => None,
1261            }
1262        })
1263        .collect();
1264    if delim_idxs.is_empty() {
1265        return;
1266    }
1267
1268    // Working state: count (remaining unmatched chars) and source_start
1269    // (first remaining char) per delim run. Indexed by position in
1270    // `delim_idxs`.
1271    let mut count: Vec<usize> = Vec::with_capacity(delim_idxs.len());
1272    let mut source_start: Vec<usize> = Vec::with_capacity(delim_idxs.len());
1273    let mut removed: Vec<bool> = vec![false; delim_idxs.len()];
1274
1275    for &ev_idx in &delim_idxs {
1276        if let IrEvent::DelimRun { start, end, .. } = &events[ev_idx] {
1277            count.push(end - start);
1278            source_start.push(*start);
1279        }
1280    }
1281
1282    // openers_bottom[ch_idx][len%3][can_open] → exclusive lower bound
1283    // (an index into `delim_idxs`, or None meaning "no bottom yet").
1284    let mut openers_bottom: [[[Option<usize>; 2]; 3]; 2] = [[[None; 2]; 3]; 2];
1285
1286    // First active index, scanning forward.
1287    let first_active =
1288        |removed: &[bool]| -> Option<usize> { (0..removed.len()).find(|&i| !removed[i]) };
1289    let next_active = |removed: &[bool], from: usize| -> Option<usize> {
1290        (from + 1..removed.len()).find(|&i| !removed[i])
1291    };
1292    let prev_active =
1293        |removed: &[bool], from: usize| -> Option<usize> { (0..from).rev().find(|&i| !removed[i]) };
1294
1295    let min_closer_count = 1usize;
1296    let mut closer_local = first_active(&removed);
1297    while let Some(c) = closer_local {
1298        let ev_c_idx = delim_idxs[c];
1299        let (ch_c, can_open_c, can_close_c) = match &events[ev_c_idx] {
1300            IrEvent::DelimRun {
1301                ch,
1302                can_open,
1303                can_close,
1304                ..
1305            } => (*ch, *can_open, *can_close),
1306            _ => unreachable!(),
1307        };
1308        if !can_close_c || removed[c] || count[c] < min_closer_count {
1309            closer_local = next_active(&removed, c);
1310            continue;
1311        }
1312
1313        let ch_idx = if ch_c == b'*' { 0 } else { 1 };
1314        let closer_mod = count[c] % 3;
1315        let closer_open_bucket = can_open_c as usize;
1316        let bottom = openers_bottom[ch_idx][closer_mod][closer_open_bucket];
1317
1318        // Walk back to find a compatible opener.
1319        let mut found_opener: Option<usize> = None;
1320        let mut walk = prev_active(&removed, c);
1321        while let Some(o) = walk {
1322            if Some(o) == bottom {
1323                break;
1324            }
1325            let ev_o_idx = delim_idxs[o];
1326            let (ch_o, can_open_o, can_close_o) = match &events[ev_o_idx] {
1327                IrEvent::DelimRun {
1328                    ch,
1329                    can_open,
1330                    can_close,
1331                    ..
1332                } => (*ch, *can_open, *can_close),
1333                _ => unreachable!(),
1334            };
1335            if !removed[o] && ch_o == ch_c && can_open_o {
1336                let oc_sum = count[o] + count[c];
1337                let opener_both = can_open_o && can_close_o;
1338                let closer_both = can_open_c && can_close_c;
1339                let mod3_reject = is_commonmark
1340                    && (opener_both || closer_both)
1341                    && oc_sum.is_multiple_of(3)
1342                    && !(count[o].is_multiple_of(3) && count[c].is_multiple_of(3));
1343                // Pandoc-markdown rejects emph/strong pairs whose counts
1344                // disagree in the exactly-(1,2) / (2,1) shape:
1345                //   - `**foo*` (2,1): `try_parse_two` looks only for a
1346                //     `**` closer; the lone `*` doesn't satisfy that.
1347                //   - `*foo**` (1,2): `try_parse_one` encountering `**`
1348                //     tries `try_parse_two`; absence of an inner `**`
1349                //     closer cascades the outer parse to fail.
1350                // Other count combinations DO match (verified against
1351                // `pandoc -f markdown`):
1352                //   - (1,3) / (3,1) → emph match, opposite-side
1353                //     leftover `**` literal.
1354                //   - (2,3) / (3,2) → strong match, single `*` literal.
1355                //   - (3,3) → STRONG(EM(...)) nested.
1356                //   - (1..3, 4+) → match (Pandoc's ender walks the
1357                //     closer run for a valid position; algorithm
1358                //     consumes leftmost via leftover-as-literal).
1359                // Opener count >= 4 is rejected (Pandoc's
1360                // `try_parse_emphasis` has no count-4+ dispatch).
1361                let pandoc_reject = !is_commonmark
1362                    && ((count[o] == 1 && count[c] == 2)
1363                        || (count[o] == 2 && count[c] == 1)
1364                        || count[o] >= 4);
1365                let pair_rejected = !is_commonmark && {
1366                    let oe = delim_idxs[o];
1367                    let ce = delim_idxs[c];
1368                    rejected_pairs.iter().any(|&(ro, rc)| ro == oe && rc == ce)
1369                };
1370                // Pandoc strict-rerun gate (iter 2+ only): block a
1371                // candidate pair if any unmatched same-char run between
1372                // its opener and closer has remaining count strictly
1373                // greater than the consume rule for this pair.
1374                // Mirrors pandoc-markdown's recursive descent where
1375                // `one c`'s `option2` (`string [c,c] >> two c`) would
1376                // greedily consume a stray higher-count run, blocking
1377                // the outer `one c` from finding its `ender c 1` —
1378                // e.g. `**foo *bar** baz*` after the outer Strong
1379                // invalidates: a naïve rerun pairs ev1 (`*`) ↔ ev3
1380                // (`*`) as Emph (consume=1), but pandoc treats the
1381                // `**` between as having "consumed" any further
1382                // matching, leaving everything literal.
1383                let strict_block = strict_pandoc && {
1384                    let tentative_consume = if !is_commonmark && count[o] >= 3 && count[c] >= 3 {
1385                        1
1386                    } else if count[o] >= 2 && count[c] >= 2 {
1387                        2
1388                    } else {
1389                        1
1390                    };
1391                    let lo_evt = delim_idxs[o] + 1;
1392                    let hi_evt = delim_idxs[c];
1393                    (lo_evt..hi_evt).any(|k| match &events[k] {
1394                        IrEvent::DelimRun {
1395                            ch: ch_k,
1396                            start,
1397                            end,
1398                            matches,
1399                            ..
1400                        } => {
1401                            *ch_k == ch_c && {
1402                                let total = end - start;
1403                                let consumed: usize = matches.iter().map(|m| m.len as usize).sum();
1404                                total.saturating_sub(consumed) > tentative_consume
1405                            }
1406                        }
1407                        _ => false,
1408                    })
1409                };
1410                if !mod3_reject && !pandoc_reject && !pair_rejected && !strict_block {
1411                    found_opener = Some(o);
1412                    break;
1413                }
1414            }
1415            if o == 0 {
1416                break;
1417            }
1418            walk = prev_active(&removed, o);
1419        }
1420
1421        if let Some(o) = found_opener {
1422            // Consume rule:
1423            //   CommonMark — consume 2 (Strong) when both sides have
1424            //     >= 2 chars, else 1 (Emph). For `***x***` (3,3) this
1425            //     produces EM(STRONG(...)) because the first match
1426            //     consumes 2 from each side (Strong outermost).
1427            //   Pandoc — when both sides have >= 3, consume 1 first
1428            //     (Emph innermost) leaving 2 + 2 to pair as Strong on
1429            //     the second pass. This produces STRONG(EM(...)) for
1430            //     `***x***`, matching Pandoc-markdown's recursive
1431            //     `try_parse_three` algorithm.
1432            let consume = if !is_commonmark && count[o] >= 3 && count[c] >= 3 {
1433                1
1434            } else if count[o] >= 2 && count[c] >= 2 {
1435                2
1436            } else {
1437                1
1438            };
1439            let kind = if consume == 2 {
1440                EmphasisKind::Strong
1441            } else {
1442                EmphasisKind::Emph
1443            };
1444
1445            // Opener consumes inner-edge (rightmost) chars.
1446            let opener_match_offset =
1447                source_start[o] + count[o] - consume - source_start_event(&events[delim_idxs[o]]);
1448            // Closer consumes inner-edge (leftmost) chars.
1449            let closer_match_offset = source_start[c] - source_start_event(&events[delim_idxs[c]]);
1450
1451            // Record match on opener.
1452            if let IrEvent::DelimRun { matches, .. } = &mut events[delim_idxs[o]] {
1453                matches.push(DelimMatch {
1454                    offset_in_run: opener_match_offset as u8,
1455                    len: consume as u8,
1456                    is_opener: true,
1457                    partner_event: delim_idxs[c] as u32,
1458                    partner_offset: closer_match_offset as u8,
1459                    kind,
1460                });
1461            }
1462            // Record match on closer.
1463            if let IrEvent::DelimRun { matches, .. } = &mut events[delim_idxs[c]] {
1464                matches.push(DelimMatch {
1465                    offset_in_run: closer_match_offset as u8,
1466                    len: consume as u8,
1467                    is_opener: false,
1468                    partner_event: delim_idxs[o] as u32,
1469                    partner_offset: opener_match_offset as u8,
1470                    kind,
1471                });
1472            }
1473
1474            count[o] -= consume;
1475            source_start[c] += consume;
1476            count[c] -= consume;
1477
1478            // Remove all openers strictly between o and c.
1479            let mut between = next_active(&removed, o);
1480            while let Some(idx) = between {
1481                if idx == c {
1482                    break;
1483                }
1484                removed[idx] = true;
1485                between = next_active(&removed, idx);
1486            }
1487
1488            if count[o] == 0 {
1489                removed[o] = true;
1490            }
1491            if count[c] == 0 {
1492                removed[c] = true;
1493                closer_local = next_active(&removed, c);
1494            }
1495            // Else re-process the same closer with reduced count.
1496        } else {
1497            openers_bottom[ch_idx][closer_mod][closer_open_bucket] = prev_active(&removed, c);
1498            if !can_open_c {
1499                removed[c] = true;
1500            }
1501            closer_local = next_active(&removed, c);
1502        }
1503    }
1504
1505    // No further mutation needed: matches are recorded; remaining bytes
1506    // stay implicit literal. Pandoc cascade is invoked by the caller
1507    // (`process_emphasis_in_range_filtered`) once per pass so it can
1508    // accumulate invalidations into a rejected-pairs list and re-run.
1509    let _ = (&mut delim_idxs, &mut openers_bottom, min_closer_count);
1510}
1511
1512/// Pandoc-only post-processing pass over [`process_emphasis_in_range_filtered`]
1513/// matches: invalidate any matched delim pair that contains an unmatched
1514/// same-character run between its opener and closer. Returns the list
1515/// of (opener_event_idx, closer_event_idx) pairs that were invalidated
1516/// in this call, so the caller can seed a rejected-pairs list and
1517/// re-run the standard pass — this lets Pandoc re-pair the inner runs
1518/// that the invalidated outer match would have stolen via
1519/// between-removal (e.g. `*foo **bar* baz**` → after the outer
1520/// `ev0..ev2` Emph is invalidated, `ev1..ev3` matches as Strong on the
1521/// next iteration).
1522fn pandoc_cascade_invalidate(
1523    events: &mut [IrEvent],
1524    excluded: Option<&[bool]>,
1525) -> Vec<(usize, usize)> {
1526    let mut invalidated_pairs: Vec<(usize, usize)> = Vec::new();
1527    // Early-exit: if there are no `DelimRun` events at all, the cascade
1528    // pass is a no-op. Avoids allocating the two scratch vecs below for
1529    // every range with no `*`/`_` runs (which is the common case for
1530    // ranges that contain only standalone constructs / brackets).
1531    if !events.iter().any(|e| matches!(e, IrEvent::DelimRun { .. })) {
1532        return invalidated_pairs;
1533    }
1534    let is_excluded = |k: usize| excluded.is_some_and(|ex| ex.get(k).copied() == Some(true));
1535    // Reuse two scratch vecs across the inner loop iterations instead
1536    // of `.collect()` each time. These are tiny per-paragraph
1537    // allocations but the function is called for every Pandoc inline
1538    // emphasis pass and shows up in malloc traffic.
1539    let mut total: Vec<usize> = Vec::with_capacity(events.len());
1540    let mut consumed: Vec<usize> = Vec::with_capacity(events.len());
1541    loop {
1542        total.clear();
1543        consumed.clear();
1544        // Compute total bytes (run length) and consumed bytes (sum of
1545        // match lens) per DelimRun event index.
1546        total.extend(events.iter().map(|e| match e {
1547            IrEvent::DelimRun { start, end, .. } => end - start,
1548            _ => 0,
1549        }));
1550        consumed.extend(events.iter().map(|e| match e {
1551            IrEvent::DelimRun { matches, .. } => matches.iter().map(|m| m.len as usize).sum(),
1552            _ => 0,
1553        }));
1554
1555        // Find a pair to invalidate. We invalidate one and restart so
1556        // the cascade can re-evaluate dependent pairs.
1557        let mut to_invalidate: Option<(usize, u8)> = None;
1558        'outer: for opener_idx in 0..events.len() {
1559            let IrEvent::DelimRun {
1560                ch: ch_o, matches, ..
1561            } = &events[opener_idx]
1562            else {
1563                continue;
1564            };
1565            for (mi, m) in matches.iter().enumerate() {
1566                if !m.is_opener {
1567                    continue;
1568                }
1569                let closer_idx = m.partner_event as usize;
1570                if closer_idx <= opener_idx || closer_idx >= events.len() {
1571                    continue;
1572                }
1573                // Scan events strictly between opener and closer for any
1574                // DelimRun with the same `ch`, unmatched bytes, AND
1575                // both `can_open` and `can_close` (i.e., the run could
1576                // have participated in pairing on both sides). A
1577                // can_open-only or can_close-only run is a one-sided
1578                // fragment (e.g. an isolated `*` after a backslash
1579                // escape) that the Pandoc recursive-descent path would
1580                // never have tried as a nested-strong opener — those
1581                // shouldn't cascade-invalidate the surrounding pair.
1582                for k in (opener_idx + 1)..closer_idx {
1583                    if is_excluded(k) {
1584                        continue;
1585                    }
1586                    if let IrEvent::DelimRun {
1587                        ch: ch_k,
1588                        can_open: co_k,
1589                        can_close: cc_k,
1590                        ..
1591                    } = &events[k]
1592                        && *ch_k == *ch_o
1593                        && consumed[k] < total[k]
1594                        && *co_k
1595                        && *cc_k
1596                    {
1597                        to_invalidate = Some((opener_idx, mi as u8));
1598                        break 'outer;
1599                    }
1600                }
1601            }
1602        }
1603
1604        let Some((opener_idx, mi)) = to_invalidate else {
1605            break;
1606        };
1607
1608        // Look up the partner event/offset before mutating.
1609        let (closer_idx, opener_offset) = match &events[opener_idx] {
1610            IrEvent::DelimRun { matches, .. } => {
1611                let m = matches[mi as usize];
1612                (m.partner_event as usize, m.offset_in_run)
1613            }
1614            _ => break,
1615        };
1616
1617        // Remove the opener match.
1618        if let IrEvent::DelimRun { matches, .. } = &mut events[opener_idx] {
1619            matches.remove(mi as usize);
1620        }
1621        // Remove the corresponding closer match (closer's match has
1622        // is_opener=false and partner_offset == opener's offset_in_run).
1623        if let IrEvent::DelimRun { matches, .. } = &mut events[closer_idx] {
1624            matches.retain(|m| m.is_opener || m.partner_offset != opener_offset);
1625        }
1626        invalidated_pairs.push((opener_idx, closer_idx));
1627    }
1628    invalidated_pairs
1629}
1630
1631/// Pandoc-only post-pass: recover the inner Strong match in
1632/// `***A **B** C***` patterns where the IR's standard pass produced
1633/// `Emph[Strong[A], "B**...** C"]` (matching the outer triple as
1634/// Strong+Emph but losing the inner `**...**`-as-Strong-of-`C` pair).
1635///
1636/// Pandoc's recursive descent here goes
1637/// `three c → ender c 2 → one c → option2 → two c`, producing
1638/// `Emph[Strong[A], "B", Strong[C]]` — two Strong nodes inside an outer
1639/// Emph. The standard delim-stack algorithm can't reach this pairing
1640/// because between-removal during the outer Emph match removes the
1641/// inner closer-side `**` (e.g. `bar**`) from the candidate pool.
1642///
1643/// This recovery scans Emph matches whose opener and closer originally
1644/// had count >= 3, and whose closer has unmatched bytes >= 2 after the
1645/// standard pass; for each, we look for an unmatched same-char
1646/// between-run with count >= 2 and `can_close = true` (the would-be
1647/// inner-Strong opener) and synthesise a Strong match that consumes
1648/// the leftmost 2 bytes of the closer (where the existing Emph match
1649/// shifts to the rightmost 1 byte). The byte-position rewrite lets
1650/// the CST emission produce well-nested `Emph[..., Strong[...]]` —
1651/// outer Emph close at the rightmost outer-triple byte, inner Strong
1652/// close at the leftmost two.
1653fn pandoc_inner_strong_recovery(events: &mut [IrEvent]) {
1654    let n = events.len();
1655    // (between_idx, opener_idx, closer_idx, len)
1656    let mut to_apply: Vec<(usize, usize, usize, u8)> = Vec::new();
1657
1658    for opener_idx in 0..n {
1659        let (open_total, open_matches_clone, ch_o) = match &events[opener_idx] {
1660            IrEvent::DelimRun {
1661                start,
1662                end,
1663                matches,
1664                ch,
1665                ..
1666            } => (*end - *start, matches.clone(), *ch),
1667            _ => continue,
1668        };
1669        if open_total < 3 {
1670            continue;
1671        }
1672
1673        for m in open_matches_clone.iter() {
1674            if !m.is_opener || m.kind != EmphasisKind::Emph {
1675                continue;
1676            }
1677            let closer_idx = m.partner_event as usize;
1678            if closer_idx <= opener_idx || closer_idx >= n {
1679                continue;
1680            }
1681
1682            let (close_total, close_consumed) = match &events[closer_idx] {
1683                IrEvent::DelimRun {
1684                    start,
1685                    end,
1686                    matches,
1687                    ..
1688                } => {
1689                    let total = end - start;
1690                    let consumed: usize = matches.iter().map(|m| m.len as usize).sum();
1691                    (total, consumed)
1692                }
1693                _ => continue,
1694            };
1695            if close_total < 3 {
1696                continue;
1697            }
1698            let leftover = close_total.saturating_sub(close_consumed);
1699            if leftover < 2 {
1700                continue;
1701            }
1702
1703            // Walk backward from closer-1 looking for the rightmost
1704            // unmatched same-char run with count >= 2 and
1705            // can_close=true.
1706            for k in ((opener_idx + 1)..closer_idx).rev() {
1707                if let IrEvent::DelimRun {
1708                    ch,
1709                    start,
1710                    end,
1711                    matches,
1712                    can_close,
1713                    ..
1714                } = &events[k]
1715                {
1716                    if *ch != ch_o || !*can_close {
1717                        continue;
1718                    }
1719                    let total = end - start;
1720                    let consumed: usize = matches.iter().map(|m| m.len as usize).sum();
1721                    let remaining = total.saturating_sub(consumed);
1722                    if remaining < 2 {
1723                        continue;
1724                    }
1725                    to_apply.push((k, opener_idx, closer_idx, 2));
1726                    break;
1727                }
1728            }
1729        }
1730    }
1731
1732    for (between_idx, opener_idx, closer_idx, len) in to_apply {
1733        // Find the existing Emph match on the closer side.
1734        let (closer_emph_match_idx, closer_emph_offset) = {
1735            let mut found: Option<(usize, u8)> = None;
1736            if let IrEvent::DelimRun { matches, .. } = &events[closer_idx] {
1737                for (mi, m) in matches.iter().enumerate() {
1738                    if !m.is_opener
1739                        && m.partner_event as usize == opener_idx
1740                        && m.kind == EmphasisKind::Emph
1741                    {
1742                        found = Some((mi, m.offset_in_run));
1743                        break;
1744                    }
1745                }
1746            }
1747            match found {
1748                Some(x) => x,
1749                None => continue,
1750            }
1751        };
1752
1753        // Find the corresponding Emph match on the opener side.
1754        let opener_emph_match_idx = {
1755            let mut found: Option<usize> = None;
1756            if let IrEvent::DelimRun { matches, .. } = &events[opener_idx] {
1757                for (mi, m) in matches.iter().enumerate() {
1758                    if m.is_opener
1759                        && m.partner_event as usize == closer_idx
1760                        && m.kind == EmphasisKind::Emph
1761                    {
1762                        found = Some(mi);
1763                        break;
1764                    }
1765                }
1766            }
1767            match found {
1768                Some(x) => x,
1769                None => continue,
1770            }
1771        };
1772
1773        // Shift the Emph closer's offset to the right of the new
1774        // Strong closer's bytes (Strong takes leftmost `len` bytes,
1775        // Emph takes the next byte).
1776        let new_closer_emph_offset = closer_emph_offset + len;
1777
1778        // Update closer's Emph offset_in_run.
1779        if let IrEvent::DelimRun { matches, .. } = &mut events[closer_idx] {
1780            matches[closer_emph_match_idx].offset_in_run = new_closer_emph_offset;
1781        }
1782        // Update opener's Emph partner_offset to point at the shifted
1783        // Emph closer position.
1784        if let IrEvent::DelimRun { matches, .. } = &mut events[opener_idx] {
1785            matches[opener_emph_match_idx].partner_offset = new_closer_emph_offset;
1786        }
1787
1788        // Add Strong opener match on the between-run.
1789        if let IrEvent::DelimRun { matches, .. } = &mut events[between_idx] {
1790            matches.push(DelimMatch {
1791                offset_in_run: 0,
1792                len,
1793                is_opener: true,
1794                partner_event: closer_idx as u32,
1795                partner_offset: closer_emph_offset,
1796                kind: EmphasisKind::Strong,
1797            });
1798        }
1799        // Add Strong closer match on the closer (at the original
1800        // pre-shift Emph-closer position; the bytes that were the
1801        // single Emph closer now become the leftmost 2 bytes of the
1802        // Strong closer).
1803        if let IrEvent::DelimRun { matches, .. } = &mut events[closer_idx] {
1804            matches.push(DelimMatch {
1805                offset_in_run: closer_emph_offset,
1806                len,
1807                is_opener: false,
1808                partner_event: between_idx as u32,
1809                partner_offset: 0,
1810                kind: EmphasisKind::Strong,
1811            });
1812        }
1813    }
1814}
1815
1816fn source_start_event(event: &IrEvent) -> usize {
1817    match event {
1818        IrEvent::DelimRun { start, .. } => *start,
1819        _ => unreachable!("source_start_event called on non-DelimRun"),
1820    }
1821}
1822
1823// ============================================================================
1824// Pass 3: Process brackets (CommonMark §6.3)
1825// ============================================================================
1826
1827/// Resolve `[`/`![`/`]` markers into link/image nodes per CommonMark §6.3
1828/// (with Pandoc-aware variations under `Dialect::Pandoc`).
1829///
1830/// Walks the IR forward looking for `]` markers. For each one, finds the
1831/// nearest active matching `[`/`![` and tries to resolve the bracket pair
1832/// as a link or image. Resolution is tried in spec order:
1833///
1834/// 1. Inline link / image: `[text](dest)` or `[text](dest "title")`.
1835/// 2. Full reference: `[text][label]`, where `label` is in `refdefs`.
1836/// 3. Collapsed reference: `[text][]`, where `text` (normalised) is in
1837///    `refdefs`.
1838/// 4. Shortcut reference: `[text]` not followed by `(` or `[`, where
1839///    `text` (normalised) is in `refdefs`.
1840///
1841/// On a match, the opener gets a `BracketResolution` and the closer is
1842/// flagged `matched`. Under `Dialect::CommonMark`, all earlier active link
1843/// openers are deactivated to implement the §6.3 "links may not contain
1844/// other links" rule (image brackets do not deactivate earlier link
1845/// openers — only links do). Under `Dialect::Pandoc`, the deactivate-pass
1846/// is skipped: pandoc-native is outer-wins for nested links (the inner
1847/// `[inner](u2)` of `[link [inner](u2)](u1)` is literal text inside the
1848/// outer link), and the dispatcher enforces this via a `suppress_inner_links`
1849/// flag during LINK-text recursion. So under Pandoc the IR can leave both
1850/// outer and inner resolved and trust the dispatcher to suppress inner
1851/// LINK emission.
1852///
1853/// On a miss the bracket pair stays opaque-as-literal and the closer is
1854/// dropped from the bracket stack so the next `]` can re-pair.
1855///
1856/// Reference-form resolution consults the refdef map under both
1857/// dialects (CommonMark §6.3 and Pandoc-markdown agree on the
1858/// document-scoped lookup rule). Under Pandoc, when a bracket-shape
1859/// pattern (`[text][label]`, `[text][]`, `[text]`) doesn't resolve to
1860/// a refdef, the opener is tagged with `unresolved_ref = Some(...)`
1861/// and the closer's `matched` is set to `true` so that
1862/// [`build_bracket_plan`] emits a [`BracketDispo::UnresolvedReference`]
1863/// keyed at the opener. Emission then wraps `[start, end)` in an
1864/// `UNRESOLVED_REFERENCE` node — distinct from `LINK` — so downstream
1865/// tools (linter, LSP) can attach behavior to the bracket-shape
1866/// pattern without the parser having to lie about resolution.
1867///
1868/// Under CommonMark, no `unresolved_ref` is recorded; the
1869/// no-resolution fall-through behaves as today (opener deactivated,
1870/// brackets emit as literal text).
1871pub fn process_brackets(
1872    events: &mut [IrEvent],
1873    text: &str,
1874    refdefs: Option<&RefdefMap>,
1875    dialect: crate::options::Dialect,
1876) {
1877    let empty: HashSet<String> = HashSet::new();
1878    let labels: &HashSet<String> = match refdefs {
1879        Some(map) => map.as_ref(),
1880        None => &empty,
1881    };
1882    let is_commonmark = dialect == crate::options::Dialect::CommonMark;
1883    // Refdef-aware label resolution under both dialects.
1884    let label_resolves =
1885        |key_norm: &str| -> bool { !key_norm.is_empty() && labels.contains(key_norm) };
1886
1887    // Walk forward through events, treating it as a linear scan for `]`.
1888    let mut i = 0;
1889    while i < events.len() {
1890        let close_pos = match &events[i] {
1891            IrEvent::CloseBracket { pos, .. } => *pos,
1892            _ => {
1893                i += 1;
1894                continue;
1895            }
1896        };
1897
1898        // Find the nearest active OpenBracket before `i`.
1899        let mut o = match find_active_opener(events, i) {
1900            Some(o) => o,
1901            None => {
1902                i += 1;
1903                continue;
1904            }
1905        };
1906
1907        let (open_end, is_image) = match &events[o] {
1908            IrEvent::OpenBracket { end, is_image, .. } => (*end, *is_image),
1909            _ => unreachable!(),
1910        };
1911        let text_start = open_end;
1912        let text_end = close_pos;
1913        let after_close = close_pos + 1;
1914
1915        // 1. Inline link / image.
1916        if let Some((suffix_end, dest, title)) = try_inline_suffix(text, after_close) {
1917            // §6.3 link-in-link rule (CommonMark): if this is a *link*
1918            // (not an image), and any earlier active link opener exists,
1919            // deactivate them. We also deactivate openers strictly before
1920            // `o` here because matching means the inner link wins; the
1921            // spec applies this *after* matching. Pandoc skips this —
1922            // outer-wins is enforced by the dispatcher's
1923            // `suppress_inner_links` flag during LINK-text recursion.
1924            if !is_image && is_commonmark {
1925                deactivate_earlier_link_openers(events, o);
1926            }
1927            commit_resolution(
1928                events,
1929                o,
1930                i,
1931                text_start,
1932                text_end,
1933                after_close,
1934                suffix_end,
1935                LinkKind::Inline { dest, title },
1936            );
1937            // Remove the opener from the bracket stack: it has been
1938            // matched (active=false will fall out automatically since
1939            // resolution is Some).
1940            mark_opener_resolved(events, o);
1941            i += 1;
1942            continue;
1943        }
1944
1945        // 2. Full reference link: `[text][label]`.
1946        let full_ref_suffix = try_full_reference_suffix(text, after_close);
1947        if let Some((suffix_end, label_raw)) = &full_ref_suffix {
1948            let label_norm = normalize_label(label_raw);
1949            if label_resolves(&label_norm) {
1950                if !is_image && is_commonmark {
1951                    deactivate_earlier_link_openers(events, o);
1952                }
1953                commit_resolution(
1954                    events,
1955                    o,
1956                    i,
1957                    text_start,
1958                    text_end,
1959                    after_close,
1960                    *suffix_end,
1961                    LinkKind::FullReference {
1962                        label: label_raw.clone(),
1963                    },
1964                );
1965                mark_opener_resolved(events, o);
1966                i += 1;
1967                continue;
1968            }
1969            // Bracketed but unresolved label: §6.3 says we still treat
1970            // `[text][label]` as not-a-link, but the brackets get
1971            // consumed as literal text AND the shortcut form is
1972            // suppressed (since the `]` is followed by a link label).
1973        }
1974
1975        // 3. Collapsed `[]`.
1976        let link_text = &text[text_start..text_end];
1977        let link_text_norm = normalize_label(link_text);
1978        let is_collapsed = is_collapsed_marker(text, after_close);
1979        let collapsed_suffix_end = after_close + 2;
1980
1981        if is_collapsed && label_resolves(&link_text_norm) {
1982            if !is_image && is_commonmark {
1983                deactivate_earlier_link_openers(events, o);
1984            }
1985            commit_resolution(
1986                events,
1987                o,
1988                i,
1989                text_start,
1990                text_end,
1991                after_close,
1992                collapsed_suffix_end,
1993                LinkKind::CollapsedReference,
1994            );
1995            mark_opener_resolved(events, o);
1996            i += 1;
1997            continue;
1998        }
1999        // `[text][]` with text not in refdefs — falls through to
2000        // literal text; shortcut is suppressed (followed by `[]`).
2001
2002        // 4. Shortcut form: `[text]` not followed by `[]` or `[label]`.
2003        // Per CommonMark §6.3: "A shortcut reference link consists of a
2004        // link label that matches a link reference definition elsewhere
2005        // in the document and is not followed by [] or a link label."
2006        // The full-ref / collapsed shape attempts above suppress the
2007        // shortcut even when their labels don't resolve — the bracket
2008        // bytes still get consumed as literal text.
2009        let shortcut_suppressed = full_ref_suffix.is_some() || is_collapsed;
2010        if !shortcut_suppressed && label_resolves(&link_text_norm) {
2011            if !is_image && is_commonmark {
2012                deactivate_earlier_link_openers(events, o);
2013            }
2014            commit_resolution(
2015                events,
2016                o,
2017                i,
2018                text_start,
2019                text_end,
2020                after_close,
2021                after_close,
2022                LinkKind::ShortcutReference,
2023            );
2024            mark_opener_resolved(events, o);
2025            i += 1;
2026            continue;
2027        }
2028
2029        // No resolution. Under Pandoc, the bracket pair is still a
2030        // recognisable reference shape (full / collapsed / shortcut) —
2031        // tag the opener with `unresolved_ref` so emission wraps it
2032        // in an `UNRESOLVED_REFERENCE` node, and mark the closer
2033        // matched so it doesn't fall through to a literal `]` token.
2034        // Under CommonMark, behavior unchanged: deactivate the opener,
2035        // brackets emit as literal text.
2036        //
2037        // Empty-component shapes (`[]`, `[][]`) aren't reference
2038        // patterns even in spirit — pandoc-native treats them as
2039        // literal text — so skip wrapping.
2040        let unresolved_shape = if !is_commonmark {
2041            let (end, has_substantive_label) =
2042                if let Some((suffix_end, label_raw)) = &full_ref_suffix {
2043                    (*suffix_end, !normalize_label(label_raw).is_empty())
2044                } else if is_collapsed {
2045                    (collapsed_suffix_end, !link_text_norm.is_empty())
2046                } else {
2047                    (after_close, !link_text_norm.is_empty())
2048                };
2049            if has_substantive_label {
2050                Some(UnresolvedRefShape {
2051                    close_event: i as u32,
2052                    text_end,
2053                    end,
2054                })
2055            } else {
2056                None
2057            }
2058        } else {
2059            None
2060        };
2061        if let IrEvent::OpenBracket {
2062            active,
2063            unresolved_ref,
2064            ..
2065        } = &mut events[o]
2066        {
2067            *active = false;
2068            *unresolved_ref = unresolved_shape;
2069        }
2070        if unresolved_shape.is_some()
2071            && let IrEvent::CloseBracket { matched, .. } = &mut events[i]
2072        {
2073            *matched = true;
2074        }
2075        let _ = &mut o;
2076        i += 1;
2077    }
2078}
2079
2080fn find_active_opener(events: &[IrEvent], close_idx: usize) -> Option<usize> {
2081    (0..close_idx).rev().find(|&i| {
2082        matches!(
2083            &events[i],
2084            IrEvent::OpenBracket {
2085                active: true,
2086                resolution: None,
2087                ..
2088            }
2089        )
2090    })
2091}
2092
2093fn deactivate_earlier_link_openers(events: &mut [IrEvent], open_idx: usize) {
2094    for ev in &mut events[..open_idx] {
2095        if let IrEvent::OpenBracket {
2096            is_image: false,
2097            active,
2098            resolution: None,
2099            ..
2100        } = ev
2101        {
2102            *active = false;
2103        }
2104    }
2105}
2106
2107fn mark_opener_resolved(events: &mut [IrEvent], open_idx: usize) {
2108    if let IrEvent::OpenBracket { active, .. } = &mut events[open_idx] {
2109        *active = false;
2110    }
2111}
2112
2113#[allow(clippy::too_many_arguments)]
2114fn commit_resolution(
2115    events: &mut [IrEvent],
2116    open_idx: usize,
2117    close_idx: usize,
2118    text_start: usize,
2119    text_end: usize,
2120    suffix_start: usize,
2121    suffix_end: usize,
2122    kind: LinkKind,
2123) {
2124    if let IrEvent::OpenBracket { resolution, .. } = &mut events[open_idx] {
2125        *resolution = Some(BracketResolution {
2126            close_event: close_idx as u32,
2127            text_start,
2128            text_end,
2129            suffix_start,
2130            suffix_end,
2131            kind,
2132        });
2133    }
2134    if let IrEvent::CloseBracket { matched, .. } = &mut events[close_idx] {
2135        *matched = true;
2136    }
2137}
2138
2139/// Try to parse `(dest)` or `(dest "title")` inline link suffix starting
2140/// at `text[pos]`. Returns `(end_pos_exclusive, dest, title)`.
2141fn try_inline_suffix(text: &str, pos: usize) -> Option<(usize, String, Option<String>)> {
2142    let bytes = text.as_bytes();
2143    if pos >= bytes.len() || bytes[pos] != b'(' {
2144        return None;
2145    }
2146    let mut p = pos + 1;
2147    // Skip leading whitespace.
2148    while p < bytes.len() && matches!(bytes[p], b' ' | b'\t' | b'\n') {
2149        p += 1;
2150    }
2151    // Empty `()` — link with empty destination.
2152    if p < bytes.len() && bytes[p] == b')' {
2153        return Some((p + 1, String::new(), None));
2154    }
2155
2156    // Parse destination.
2157    let (dest, dest_end) = parse_link_destination(text, p)?;
2158    p = dest_end;
2159
2160    // Skip whitespace.
2161    while p < bytes.len() && matches!(bytes[p], b' ' | b'\t' | b'\n') {
2162        p += 1;
2163    }
2164
2165    // Optional title.
2166    let mut title = None;
2167    if p < bytes.len() && matches!(bytes[p], b'"' | b'\'' | b'(') {
2168        let (t, t_end) = parse_link_title(text, p)?;
2169        title = Some(t);
2170        p = t_end;
2171        while p < bytes.len() && matches!(bytes[p], b' ' | b'\t' | b'\n') {
2172            p += 1;
2173        }
2174    }
2175
2176    if p >= bytes.len() || bytes[p] != b')' {
2177        return None;
2178    }
2179    Some((p + 1, dest, title))
2180}
2181
2182fn parse_link_destination(text: &str, start: usize) -> Option<(String, usize)> {
2183    let bytes = text.as_bytes();
2184    if start >= bytes.len() {
2185        return None;
2186    }
2187    if bytes[start] == b'<' {
2188        // <bracketed>
2189        let mut p = start + 1;
2190        let begin = p;
2191        while p < bytes.len() && bytes[p] != b'>' && bytes[p] != b'\n' && bytes[p] != b'<' {
2192            if bytes[p] == b'\\' && p + 1 < bytes.len() {
2193                p += 2;
2194            } else {
2195                p += 1;
2196            }
2197        }
2198        if p >= bytes.len() || bytes[p] != b'>' {
2199            return None;
2200        }
2201        let dest = text[begin..p].to_string();
2202        Some((dest, p + 1))
2203    } else {
2204        // unbracketed: balanced parens, no spaces, no controls
2205        let mut p = start;
2206        let mut paren_depth: i32 = 0;
2207        while p < bytes.len() {
2208            let b = bytes[p];
2209            if b == b'\\' && p + 1 < bytes.len() {
2210                p += 2;
2211                continue;
2212            }
2213            if b == b'(' {
2214                paren_depth += 1;
2215                p += 1;
2216                continue;
2217            }
2218            if b == b')' {
2219                if paren_depth == 0 {
2220                    break;
2221                }
2222                paren_depth -= 1;
2223                p += 1;
2224                continue;
2225            }
2226            if b == b' ' || b == b'\t' || b == b'\n' || b < 0x20 || b == 0x7f {
2227                break;
2228            }
2229            p += 1;
2230        }
2231        if p == start || paren_depth != 0 {
2232            return None;
2233        }
2234        Some((text[start..p].to_string(), p))
2235    }
2236}
2237
2238fn parse_link_title(text: &str, start: usize) -> Option<(String, usize)> {
2239    let bytes = text.as_bytes();
2240    let q = bytes[start];
2241    let close = match q {
2242        b'"' => b'"',
2243        b'\'' => b'\'',
2244        b'(' => b')',
2245        _ => return None,
2246    };
2247    let mut p = start + 1;
2248    let begin = p;
2249    while p < bytes.len() {
2250        let b = bytes[p];
2251        if b == b'\\' && p + 1 < bytes.len() {
2252            p += 2;
2253            continue;
2254        }
2255        if b == close {
2256            let title = text[begin..p].to_string();
2257            return Some((title, p + 1));
2258        }
2259        p += 1;
2260    }
2261    None
2262}
2263
2264/// Try to parse `[label]` after a `]`. Returns `(suffix_end, label_raw)`.
2265/// For the collapsed form `[]`, returns `None` here (handled separately
2266/// by `is_collapsed_marker`).
2267fn try_full_reference_suffix(text: &str, pos: usize) -> Option<(usize, String)> {
2268    let bytes = text.as_bytes();
2269    if pos >= bytes.len() || bytes[pos] != b'[' {
2270        return None;
2271    }
2272    let label_start = pos + 1;
2273    let mut p = label_start;
2274    let mut escape_next = false;
2275    while p < bytes.len() {
2276        if escape_next {
2277            escape_next = false;
2278            p += 1;
2279            continue;
2280        }
2281        match bytes[p] {
2282            b'\\' => {
2283                escape_next = true;
2284                p += 1;
2285            }
2286            b']' => break,
2287            b'[' => return None,
2288            b'\n' => {
2289                p += 1;
2290            }
2291            _ => p += 1,
2292        }
2293    }
2294    if p >= bytes.len() || bytes[p] != b']' {
2295        return None;
2296    }
2297    let label = text[label_start..p].to_string();
2298    if label.is_empty() {
2299        return None;
2300    }
2301    Some((p + 1, label))
2302}
2303
2304fn is_collapsed_marker(text: &str, pos: usize) -> bool {
2305    text.as_bytes().get(pos) == Some(&b'[') && text.as_bytes().get(pos + 1) == Some(&b']')
2306}
2307
2308// ============================================================================
2309// Bracket plan — byte-position-keyed view of resolved brackets, consumed by
2310// the existing emission walk in `core::parse_inline_range_impl`.
2311// ============================================================================
2312
2313/// Disposition of a single bracket byte after [`process_brackets`].
2314#[derive(Debug, Clone)]
2315pub enum BracketDispo {
2316    /// `[` or `![` of a resolved link/image. Emission emits the LINK/IMAGE
2317    /// node and skips past `suffix_end`.
2318    Open {
2319        is_image: bool,
2320        text_start: usize,
2321        text_end: usize,
2322        suffix_start: usize,
2323        suffix_end: usize,
2324        kind: LinkKind,
2325    },
2326    /// Pandoc-only: `[` or `![` of a bracket-shape reference pattern
2327    /// whose label didn't resolve. Emission wraps `[start, end)` in an
2328    /// `UNRESOLVED_REFERENCE` node so downstream tools can attach
2329    /// behavior to the bracket-shape pattern. `text_start..text_end` is
2330    /// the inner text range (between the outer `[`/`![` and `]`).
2331    UnresolvedReference {
2332        is_image: bool,
2333        text_start: usize,
2334        text_end: usize,
2335        end: usize,
2336    },
2337    /// Bracket byte (one of `[`, `]`, or `!`) that fell through to literal
2338    /// text. Emission accumulates into the surrounding text run.
2339    Literal,
2340}
2341
2342/// A byte-keyed view of the IR's bracket resolutions.
2343#[derive(Debug, Default, Clone)]
2344pub struct BracketPlan {
2345    by_pos: BTreeMap<usize, BracketDispo>,
2346}
2347
2348impl BracketPlan {
2349    pub fn lookup(&self, pos: usize) -> Option<&BracketDispo> {
2350        self.by_pos.get(&pos)
2351    }
2352
2353    pub fn is_empty(&self) -> bool {
2354        self.by_pos.is_empty()
2355    }
2356}
2357
2358/// A standalone Pandoc inline construct recognised by `build_ir` and
2359/// dispatched directly from the emission walk. Carries the construct's
2360/// full source range so the emission walk can slice the content for the
2361/// existing `emit_*` helpers without re-running the recognition.
2362#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2363pub enum ConstructDispo {
2364    /// `^[note text]` — emit via `emit_inline_footnote` after slicing
2365    /// the inner content.
2366    InlineFootnote { end: usize },
2367    /// `<span ...>...</span>` — emit via `emit_native_span` after
2368    /// re-parsing the open-tag attributes from the source range.
2369    NativeSpan { end: usize },
2370    /// `[^id]` — emit via `emit_footnote_reference` after extracting
2371    /// the label id from the source range.
2372    FootnoteReference { end: usize },
2373    /// `[@cite]` — emit via `emit_bracketed_citation` after slicing
2374    /// the inner content.
2375    BracketedCitation { end: usize },
2376    /// `@key` or `-@key` — emit via `emit_bare_citation` (or
2377    /// `emit_crossref` when `is_quarto_crossref_key` matches and
2378    /// `extensions.quarto_crossrefs` is enabled).
2379    BareCitation { end: usize },
2380    /// `[content]{attrs}` — emit via `emit_bracketed_span` after
2381    /// slicing the inner content and attribute string.
2382    BracketedSpan { end: usize },
2383}
2384
2385/// A byte-keyed view of the IR's standalone Pandoc constructs that the
2386/// emission walk consumes directly: inline footnotes, native spans,
2387/// footnote references, bracketed citations, bare citations, and
2388/// bracketed spans. Recognition is authoritative in `build_ir` under
2389/// `Dialect::Pandoc`; the dispatcher's legacy branches for these
2390/// constructs (`^[`, `<span>`, `[^id]`, `[@cite]`, `@cite` / `-@cite`,
2391/// `[text]{attrs}`) are gated to `Dialect::CommonMark` only and only
2392/// fire when the relevant extension is explicitly enabled.
2393#[derive(Debug, Default, Clone)]
2394pub struct ConstructPlan {
2395    by_pos: BTreeMap<usize, ConstructDispo>,
2396}
2397
2398impl ConstructPlan {
2399    pub fn lookup(&self, pos: usize) -> Option<&ConstructDispo> {
2400        self.by_pos.get(&pos)
2401    }
2402
2403    pub fn is_empty(&self) -> bool {
2404        self.by_pos.is_empty()
2405    }
2406}
2407
2408/// Build a [`ConstructPlan`] from the resolved IR. Each
2409/// `Construct { kind: InlineFootnote | NativeSpan, .. }` becomes one
2410/// entry keyed at its start byte.
2411pub fn build_construct_plan(events: &[IrEvent]) -> ConstructPlan {
2412    let mut by_pos: BTreeMap<usize, ConstructDispo> = BTreeMap::new();
2413    for ev in events {
2414        if let IrEvent::Construct { start, end, kind } = ev {
2415            match kind {
2416                ConstructKind::InlineFootnote => {
2417                    by_pos.insert(*start, ConstructDispo::InlineFootnote { end: *end });
2418                }
2419                ConstructKind::NativeSpan => {
2420                    by_pos.insert(*start, ConstructDispo::NativeSpan { end: *end });
2421                }
2422                ConstructKind::FootnoteReference => {
2423                    by_pos.insert(*start, ConstructDispo::FootnoteReference { end: *end });
2424                }
2425                ConstructKind::BracketedCitation => {
2426                    by_pos.insert(*start, ConstructDispo::BracketedCitation { end: *end });
2427                }
2428                ConstructKind::BareCitation => {
2429                    by_pos.insert(*start, ConstructDispo::BareCitation { end: *end });
2430                }
2431                ConstructKind::BracketedSpan => {
2432                    by_pos.insert(*start, ConstructDispo::BracketedSpan { end: *end });
2433                }
2434                _ => {}
2435            }
2436        }
2437    }
2438    ConstructPlan { by_pos }
2439}
2440
2441/// Build a [`BracketPlan`] from the resolved IR. Each `OpenBracket`
2442/// resolution becomes an [`BracketDispo::Open`] keyed at the opener's
2443/// start byte. Unresolved openers and unmatched closers become
2444/// `BracketDispo::Literal` so the emission path can recognise them
2445/// without re-parsing.
2446pub fn build_bracket_plan(events: &[IrEvent]) -> BracketPlan {
2447    let mut by_pos: BTreeMap<usize, BracketDispo> = BTreeMap::new();
2448    for ev in events {
2449        match ev {
2450            IrEvent::OpenBracket {
2451                start,
2452                is_image,
2453                resolution: Some(res),
2454                ..
2455            } => {
2456                by_pos.insert(
2457                    *start,
2458                    BracketDispo::Open {
2459                        is_image: *is_image,
2460                        text_start: res.text_start,
2461                        text_end: res.text_end,
2462                        suffix_start: res.suffix_start,
2463                        suffix_end: res.suffix_end,
2464                        kind: res.kind.clone(),
2465                    },
2466                );
2467            }
2468            IrEvent::OpenBracket {
2469                start,
2470                end,
2471                is_image,
2472                resolution: None,
2473                unresolved_ref: Some(shape),
2474                ..
2475            } => {
2476                by_pos.insert(
2477                    *start,
2478                    BracketDispo::UnresolvedReference {
2479                        is_image: *is_image,
2480                        text_start: *end,
2481                        text_end: shape.text_end,
2482                        end: shape.end,
2483                    },
2484                );
2485            }
2486            IrEvent::OpenBracket {
2487                start,
2488                is_image,
2489                resolution: None,
2490                unresolved_ref: None,
2491                ..
2492            } => {
2493                let len = if *is_image { 2 } else { 1 };
2494                for off in 0..len {
2495                    by_pos.insert(*start + off, BracketDispo::Literal);
2496                }
2497            }
2498            IrEvent::CloseBracket {
2499                pos,
2500                matched: false,
2501            } => {
2502                by_pos.insert(*pos, BracketDispo::Literal);
2503            }
2504            _ => {}
2505        }
2506    }
2507    BracketPlan { by_pos }
2508}
2509
2510/// One-shot helper: build the IR, run all passes, and return the
2511/// bundled [`InlinePlans`] (emphasis dispositions, bracket resolutions,
2512/// and standalone Pandoc constructs) — packaged together so the inline
2513/// emission path can consume them in one go for either dialect.
2514///
2515/// Pass ordering follows the CommonMark §6.3 reference impl: bracket
2516/// resolution runs first, then emphasis is processed *scoped per resolved
2517/// bracket pair's inner event range*, then once more on the residual
2518/// top-level events. This prevents emphasis pairs from forming across a
2519/// link's bracket boundary, which the previous "all-emphasis-then-all-
2520/// brackets" order got wrong (e.g. spec example #473).
2521pub fn build_full_plans(
2522    text: &str,
2523    start: usize,
2524    end: usize,
2525    config: &ParserOptions,
2526) -> InlinePlans {
2527    let mut scratch = ScratchEvents::checkout();
2528    let bundle = scratch.inner.as_mut().unwrap();
2529    bundle.events.clear();
2530    bundle.bracket_pairs.clear();
2531    bundle.excluded.clear();
2532
2533    build_ir_into(text, start, end, config, &mut bundle.events);
2534    // §6.3 bracket resolution runs for both dialects. Under CommonMark
2535    // it enforces refdef-aware shortcut/collapsed/full-ref resolution
2536    // and the §6.3 link-in-link deactivation rule. Under Pandoc it
2537    // performs shape-only resolution (any non-empty label resolves) and
2538    // skips the deactivation pass — pandoc-native is outer-wins for
2539    // nested links and the dispatcher's `suppress_inner_links` flag
2540    // suppresses inner LINK emission during LINK-text recursion.
2541    process_brackets(
2542        &mut bundle.events,
2543        text,
2544        config.refdef_labels.as_ref(),
2545        config.dialect,
2546    );
2547
2548    // Scoped emphasis pass per resolved bracket pair, innermost first.
2549    // We collect (open_idx, close_idx) pairs of resolved brackets and run
2550    // emphasis only over the events strictly between them. Innermost-first
2551    // ordering matters: an outer link wraps emphasis that wraps an inner
2552    // link, and the inner link's inner range must be paired before the
2553    // outer's inner range so the top-level pass sees consistent state.
2554    // Include both resolved-link bracket pairs and Pandoc unresolved-
2555    // reference bracket pairs in the scoping set. The latter wrap into
2556    // an `UNRESOLVED_REFERENCE` CST node, which is just as much a tree
2557    // boundary for emphasis as a resolved `LINK` — emphasis must not
2558    // pair across the wrapper's brackets, otherwise the emission walk
2559    // produces a non-tree-shaped CST.
2560    bundle.bracket_pairs.extend(
2561        bundle
2562            .events
2563            .iter()
2564            .enumerate()
2565            .filter_map(|(i, ev)| match ev {
2566                IrEvent::OpenBracket {
2567                    resolution: Some(res),
2568                    ..
2569                } => Some((i, res.close_event as usize)),
2570                IrEvent::OpenBracket {
2571                    resolution: None,
2572                    unresolved_ref: Some(shape),
2573                    ..
2574                } => Some((i, shape.close_event as usize)),
2575                _ => None,
2576            }),
2577    );
2578    // Innermost-first: sort by close_idx ascending, then open_idx descending.
2579    bundle
2580        .bracket_pairs
2581        .sort_by(|a, b| a.1.cmp(&b.1).then(b.0.cmp(&a.0)));
2582    // Iterate pairs by index so we can hold &mut bundle.events while
2583    // reading bundle.bracket_pairs (split borrow on disjoint fields).
2584    for i in 0..bundle.bracket_pairs.len() {
2585        let (open_idx, close_idx) = bundle.bracket_pairs[i];
2586        process_emphasis_in_range(&mut bundle.events, open_idx + 1, close_idx, config.dialect);
2587    }
2588
2589    // Pandoc-only degrade pass for unresolved bracket-shape patterns
2590    // whose interior left any delim-run byte unmatched after the scoped
2591    // emphasis pass. Pandoc-native degrades such brackets to literal `[`
2592    // / `]` text — the user's intent was clearly not a reference. The
2593    // bracket_pairs entry stays so the inner delims remain in the
2594    // top-level exclusion mask (otherwise they'd re-enter pairing and
2595    // could form Emph spans with delims outside, which pandoc never
2596    // does — see the bug_2_emphasis_crosses_brackets_pandoc fixture).
2597    // Flipping `unresolved_ref` to `None` makes `build_bracket_plan`
2598    // emit `BracketDispo::Literal` for the bracket bytes; flipping
2599    // `CloseBracket.matched` to `false` does the same for the `]`.
2600    for i in 0..bundle.bracket_pairs.len() {
2601        let (open_idx, close_idx) = bundle.bracket_pairs[i];
2602        let is_unresolved = matches!(
2603            &bundle.events[open_idx],
2604            IrEvent::OpenBracket {
2605                resolution: None,
2606                unresolved_ref: Some(_),
2607                ..
2608            }
2609        );
2610        if !is_unresolved {
2611            continue;
2612        }
2613        if !range_has_unmatched_delim_bytes(&bundle.events, open_idx + 1, close_idx) {
2614            continue;
2615        }
2616        if let IrEvent::OpenBracket { unresolved_ref, .. } = &mut bundle.events[open_idx] {
2617            *unresolved_ref = None;
2618        }
2619        if let IrEvent::CloseBracket { matched, .. } = &mut bundle.events[close_idx] {
2620            *matched = false;
2621        }
2622    }
2623
2624    // Top-level emphasis pass: handles delim runs that fall outside any
2625    // resolved bracket pair.
2626    let len = bundle.events.len();
2627    if bundle.bracket_pairs.is_empty() {
2628        // Fast path: no resolved brackets means no exclusion mask needed —
2629        // skip the resize-and-fill pass entirely. Common for prose
2630        // paragraphs without inline links.
2631        process_emphasis_in_range_filtered(&mut bundle.events, 0, len, None, config.dialect);
2632    } else {
2633        // Build exclusion bitmap: any delim run whose event index lies
2634        // inside a resolved bracket pair is excluded from the top-level
2635        // pass. Implements the §6.3 boundary rule: emphasis at the top
2636        // level must not pair across a link's brackets.
2637        bundle.excluded.resize(len, false);
2638        for &(open_idx, close_idx) in &bundle.bracket_pairs {
2639            for slot in bundle
2640                .excluded
2641                .iter_mut()
2642                .take(close_idx)
2643                .skip(open_idx + 1)
2644            {
2645                *slot = true;
2646            }
2647        }
2648        process_emphasis_in_range_filtered(
2649            &mut bundle.events,
2650            0,
2651            len,
2652            Some(&bundle.excluded),
2653            config.dialect,
2654        );
2655    }
2656
2657    InlinePlans {
2658        emphasis: build_emphasis_plan(&bundle.events),
2659        brackets: build_bracket_plan(&bundle.events),
2660        constructs: build_construct_plan(&bundle.events),
2661    }
2662}
2663
2664/// Returns true if any [`IrEvent::DelimRun`] in the event range
2665/// `[lo, hi)` has byte coverage from its `matches` vec that is less
2666/// than the run length — i.e. at least one byte of the run failed to
2667/// pair as emphasis. Used by the Pandoc unresolved-reference degrade
2668/// pass in [`build_full_plans`].
2669///
2670/// Delim runs whose flanking rules forbid both opening *and* closing
2671/// (e.g. intraword `_` inside `foo_bar`) are skipped: those bytes were
2672/// never a pairing candidate, so an "unmatched" count for them isn't
2673/// evidence of a failed emphasis attempt. Without this exclusion every
2674/// URL or identifier with an underscore inside an unresolved bracket
2675/// pair would spuriously degrade the bracket-shape to literal text.
2676fn range_has_unmatched_delim_bytes(events: &[IrEvent], lo: usize, hi: usize) -> bool {
2677    let hi = hi.min(events.len());
2678    for ev in &events[lo..hi] {
2679        if let IrEvent::DelimRun {
2680            start,
2681            end,
2682            matches,
2683            can_open,
2684            can_close,
2685            ..
2686        } = ev
2687        {
2688            if !can_open && !can_close {
2689                continue;
2690            }
2691            let total = end - start;
2692            let matched: usize = matches.iter().map(|m| m.len as usize).sum();
2693            if matched < total {
2694                return true;
2695            }
2696        }
2697    }
2698    false
2699}
2700
2701/// Thread-local pool of scratch buffers used by [`build_full_plans`].
2702///
2703/// `build_full_plans` checks out one bundle for the duration of the call
2704/// and returns it on drop so the next call (or a recursive nested call
2705/// from an inline emitter) reuses the allocations. The pool is
2706/// per-thread — the parser is single-threaded — and bounded so a
2707/// long-running editor session can't accumulate stale capacity.
2708struct ScratchEvents {
2709    inner: Option<ScratchBundle>,
2710}
2711
2712#[derive(Default)]
2713struct ScratchBundle {
2714    events: Vec<IrEvent>,
2715    bracket_pairs: Vec<(usize, usize)>,
2716    excluded: Vec<bool>,
2717}
2718
2719thread_local! {
2720    static IR_EVENT_POOL: std::cell::RefCell<Vec<ScratchBundle>> =
2721        const { std::cell::RefCell::new(Vec::new()) };
2722}
2723
2724impl ScratchEvents {
2725    fn checkout() -> Self {
2726        let bundle = IR_EVENT_POOL
2727            .with(|p| p.borrow_mut().pop())
2728            .unwrap_or_default();
2729        Self {
2730            inner: Some(bundle),
2731        }
2732    }
2733}
2734
2735impl Drop for ScratchEvents {
2736    fn drop(&mut self) {
2737        if let Some(mut bundle) = self.inner.take() {
2738            bundle.events.clear();
2739            bundle.bracket_pairs.clear();
2740            bundle.excluded.clear();
2741            // Cap pool depth at 8 (deepest realistic nested-link recursion)
2742            // and drop any bundle whose `events` grew past 8K (a single
2743            // pathological paragraph shouldn't pin a huge allocation
2744            // forever).
2745            if bundle.events.capacity() <= 8192 {
2746                IR_EVENT_POOL.with(|p| {
2747                    let mut pool = p.borrow_mut();
2748                    if pool.len() < 8 {
2749                        pool.push(bundle);
2750                    }
2751                });
2752            }
2753        }
2754    }
2755}
2756
2757/// Bundle of plans produced by [`build_full_plans`] and consumed by the
2758/// inline emission walk.
2759#[derive(Debug, Default, Clone)]
2760pub struct InlinePlans {
2761    pub emphasis: EmphasisPlan,
2762    pub brackets: BracketPlan,
2763    pub constructs: ConstructPlan,
2764}
2765
2766/// Convert the IR's delim-run match decisions into an [`EmphasisPlan`],
2767/// preserving the byte-keyed disposition shape the existing emission walk
2768/// consumes.
2769///
2770/// Each match on a [`DelimRun`](IrEvent::DelimRun) produces one entry in
2771/// the plan: the opener side records `Open` with the partner's source
2772/// byte and length; the closer side records `Close`. Bytes within a run
2773/// that are *not* covered by any match get a `Literal` entry, which the
2774/// emission walk uses to coalesce unmatched delimiter bytes with
2775/// surrounding plain text.
2776pub fn build_emphasis_plan(events: &[IrEvent]) -> EmphasisPlan {
2777    let mut by_pos: BTreeMap<usize, DelimChar> = BTreeMap::new();
2778    for ev in events {
2779        if let IrEvent::DelimRun {
2780            start,
2781            end,
2782            matches,
2783            ..
2784        } = ev
2785        {
2786            for m in matches {
2787                let pos = *start + m.offset_in_run as usize;
2788                let partner_run_start = match &events[m.partner_event as usize] {
2789                    IrEvent::DelimRun { start: ps, .. } => *ps,
2790                    _ => continue,
2791                };
2792                let partner_pos = partner_run_start + m.partner_offset as usize;
2793                if m.is_opener {
2794                    by_pos.insert(
2795                        pos,
2796                        DelimChar::Open {
2797                            len: m.len,
2798                            partner: partner_pos,
2799                            partner_len: m.len,
2800                            kind: m.kind,
2801                        },
2802                    );
2803                } else {
2804                    by_pos.insert(pos, DelimChar::Close);
2805                }
2806            }
2807            // Any remaining bytes (not covered by a match) are literal.
2808            for pos in *start..*end {
2809                by_pos.entry(pos).or_insert(DelimChar::Literal);
2810            }
2811        }
2812    }
2813    EmphasisPlan::from_dispositions(by_pos)
2814}
2815
2816#[cfg(test)]
2817mod tests {
2818    use super::*;
2819    use crate::options::Flavor;
2820    use crate::parser::inlines::inline_ir::DelimChar;
2821    use std::sync::Arc;
2822
2823    fn cm_opts() -> ParserOptions {
2824        let flavor = Flavor::CommonMark;
2825        ParserOptions {
2826            flavor,
2827            dialect: crate::options::Dialect::for_flavor(flavor),
2828            extensions: crate::options::Extensions::for_flavor(flavor),
2829            pandoc_compat: crate::options::PandocCompat::default(),
2830            refdef_labels: None,
2831        }
2832    }
2833
2834    fn refdefs<I: IntoIterator<Item = &'static str>>(labels: I) -> RefdefMap {
2835        Arc::new(labels.into_iter().map(|s| s.to_string()).collect())
2836    }
2837
2838    #[test]
2839    fn ir_event_range_covers_all_variants() {
2840        let txt = IrEvent::Text { start: 0, end: 5 };
2841        assert_eq!(txt.range(), (0, 5));
2842
2843        let close = IrEvent::CloseBracket {
2844            pos: 7,
2845            matched: false,
2846        };
2847        assert_eq!(close.range(), (7, 8));
2848
2849        let open = IrEvent::OpenBracket {
2850            start: 1,
2851            end: 3,
2852            is_image: true,
2853            active: true,
2854            resolution: None,
2855            unresolved_ref: None,
2856        };
2857        assert_eq!(open.range(), (1, 3));
2858    }
2859
2860    #[test]
2861    fn scan_records_text_and_delim_run() {
2862        let opts = cm_opts();
2863        let ir = build_ir("foo *bar*", 0, 9, &opts);
2864        // Expect: Text "foo ", DelimRun "*", Text "bar", DelimRun "*"
2865        assert!(matches!(ir[0], IrEvent::Text { start: 0, end: 4 }));
2866        assert!(matches!(
2867            ir[1],
2868            IrEvent::DelimRun {
2869                ch: b'*',
2870                start: 4,
2871                end: 5,
2872                ..
2873            }
2874        ));
2875        assert!(matches!(ir[2], IrEvent::Text { start: 5, end: 8 }));
2876        assert!(matches!(
2877            ir[3],
2878            IrEvent::DelimRun {
2879                ch: b'*',
2880                start: 8,
2881                end: 9,
2882                ..
2883            }
2884        ));
2885    }
2886
2887    #[test]
2888    fn scan_records_brackets() {
2889        let opts = cm_opts();
2890        let ir = build_ir("[foo]", 0, 5, &opts);
2891        assert!(matches!(
2892            ir[0],
2893            IrEvent::OpenBracket {
2894                start: 0,
2895                end: 1,
2896                is_image: false,
2897                ..
2898            }
2899        ));
2900        assert!(matches!(ir[1], IrEvent::Text { start: 1, end: 4 }));
2901        assert!(matches!(
2902            ir[2],
2903            IrEvent::CloseBracket {
2904                pos: 4,
2905                matched: false
2906            }
2907        ));
2908    }
2909
2910    #[test]
2911    fn scan_records_image_bracket() {
2912        let opts = cm_opts();
2913        let ir = build_ir("![alt]", 0, 6, &opts);
2914        assert!(matches!(
2915            ir[0],
2916            IrEvent::OpenBracket {
2917                start: 0,
2918                end: 2,
2919                is_image: true,
2920                ..
2921            }
2922        ));
2923    }
2924
2925    #[test]
2926    fn scan_handles_code_span_opacity() {
2927        let opts = cm_opts();
2928        let ir = build_ir("a `*x*` b", 0, 9, &opts);
2929        // Code span `*x*` should be a Construct, NOT delim runs.
2930        let has_delim_run = ir.iter().any(|e| matches!(e, IrEvent::DelimRun { .. }));
2931        assert!(
2932            !has_delim_run,
2933            "code span content should not produce delim runs"
2934        );
2935        assert!(ir.iter().any(|e| matches!(
2936            e,
2937            IrEvent::Construct {
2938                kind: ConstructKind::CodeSpan,
2939                ..
2940            }
2941        )));
2942    }
2943
2944    #[test]
2945    fn process_emphasis_simple_pair() {
2946        let opts = cm_opts();
2947        let mut ir = build_ir("*foo*", 0, 5, &opts);
2948        process_emphasis(&mut ir, opts.dialect);
2949        // First DelimRun (open) gets a match.
2950        let opener = ir
2951            .iter()
2952            .find(|e| matches!(e, IrEvent::DelimRun { start: 0, .. }))
2953            .unwrap();
2954        if let IrEvent::DelimRun { matches, .. } = opener {
2955            assert_eq!(matches.len(), 1);
2956            assert!(matches[0].is_opener);
2957            assert_eq!(matches[0].kind, EmphasisKind::Emph);
2958        }
2959    }
2960
2961    #[test]
2962    fn brackets_resolve_inline_link() {
2963        let opts = cm_opts();
2964        let mut ir = build_ir("[foo](/url)", 0, 11, &opts);
2965        process_brackets(&mut ir, "[foo](/url)", None, opts.dialect);
2966        let open = ir
2967            .iter()
2968            .find(|e| matches!(e, IrEvent::OpenBracket { start: 0, .. }))
2969            .unwrap();
2970        if let IrEvent::OpenBracket { resolution, .. } = open {
2971            let r = resolution.as_ref().expect("inline link resolved");
2972            assert!(matches!(r.kind, LinkKind::Inline { .. }));
2973            if let LinkKind::Inline { dest, .. } = &r.kind {
2974                assert_eq!(dest, "/url");
2975            }
2976        }
2977    }
2978
2979    #[test]
2980    fn brackets_shortcut_resolves_only_with_refdef() {
2981        let opts = cm_opts();
2982        let text = "[foo]";
2983        let map = refdefs(["foo"]);
2984        let mut ir = build_ir(text, 0, text.len(), &opts);
2985        process_brackets(&mut ir, text, Some(&map), opts.dialect);
2986        let open = ir
2987            .iter()
2988            .find(|e| matches!(e, IrEvent::OpenBracket { start: 0, .. }))
2989            .unwrap();
2990        if let IrEvent::OpenBracket { resolution, .. } = open {
2991            assert!(matches!(
2992                resolution.as_ref().unwrap().kind,
2993                LinkKind::ShortcutReference
2994            ));
2995        }
2996    }
2997
2998    #[test]
2999    fn brackets_shortcut_falls_through_without_refdef() {
3000        // CMark example #523 mechanic: `[bar* baz]` is not a refdef, so
3001        // it must NOT resolve as a link — the brackets stay literal so
3002        // the inner `*` becomes available to the outer emphasis scanner.
3003        let opts = cm_opts();
3004        let text = "[bar* baz]";
3005        let mut ir = build_ir(text, 0, text.len(), &opts);
3006        process_brackets(&mut ir, text, None, opts.dialect);
3007        let open = ir
3008            .iter()
3009            .find(|e| matches!(e, IrEvent::OpenBracket { start: 0, .. }))
3010            .unwrap();
3011        if let IrEvent::OpenBracket { resolution, .. } = open {
3012            assert!(resolution.is_none(), "no refdef → bracket stays literal");
3013        }
3014    }
3015
3016    /// Spec #473: `*[bar*](/url)`. The link `[bar*](/url)` resolves; the
3017    /// outer `*...*` MUST NOT pair across the link's bracket boundary,
3018    /// because the inner `*` belongs to the link text.
3019    #[test]
3020    fn full_plans_emphasis_does_not_cross_resolved_link_boundary() {
3021        let opts = cm_opts();
3022        let text = "*[bar*](/url)";
3023        let plans = build_full_plans(text, 0, text.len(), &opts);
3024        // The leading `*` (at byte 0) must NOT be matched as an emphasis
3025        // opener — there's no closer outside the link, and the inner `*`
3026        // (at byte 5) is inside the resolved link's text range so it must
3027        // not be paired with byte 0.
3028        assert!(
3029            matches!(plans.emphasis.lookup(0), Some(DelimChar::Literal) | None),
3030            "outer `*` at byte 0 must not pair across link boundary, got {:?}",
3031            plans.emphasis.lookup(0)
3032        );
3033        // The link `[bar*](/url)` must resolve (opener at byte 1).
3034        assert!(
3035            matches!(plans.brackets.lookup(1), Some(BracketDispo::Open { .. })),
3036            "link [bar*](/url) must resolve at byte 1"
3037        );
3038    }
3039
3040    fn pandoc_opts() -> ParserOptions {
3041        let flavor = Flavor::Pandoc;
3042        ParserOptions {
3043            flavor,
3044            dialect: crate::options::Dialect::for_flavor(flavor),
3045            extensions: crate::options::Extensions::for_flavor(flavor),
3046            pandoc_compat: crate::options::PandocCompat::default(),
3047            refdef_labels: None,
3048        }
3049    }
3050
3051    /// Bug #2 (a): unresolved Pandoc bracket-shape with unmatched delim
3052    /// inside its text degrades to literal `[`/`]`. Outer emphasis pair
3053    /// across the (now-literal) brackets must form.
3054    #[test]
3055    fn full_plans_unresolved_bracket_degrades_when_inner_delim_unmatched() {
3056        let opts = pandoc_opts();
3057        let text = "*foo [bar*] baz*";
3058        let plans = build_full_plans(text, 0, text.len(), &opts);
3059        assert!(
3060            matches!(plans.brackets.lookup(5), Some(BracketDispo::Literal) | None),
3061            "degraded `[` at byte 5 must be Literal/None, got {:?}",
3062            plans.brackets.lookup(5)
3063        );
3064        assert!(
3065            matches!(plans.emphasis.lookup(0), Some(DelimChar::Open { .. })),
3066            "outer `*` at byte 0 must open Emph after degrade, got {:?}",
3067            plans.emphasis.lookup(0)
3068        );
3069    }
3070
3071    /// Intraword `_` (e.g. inside a URL like
3072    /// `hyperparameter_optimization`) is not flanking — `can_open` and
3073    /// `can_close` are both false — so it can never pair as emphasis.
3074    /// The degrade pass must not treat such delim runs as "failed
3075    /// emphasis attempts" and demote the surrounding bracket-shape to
3076    /// literal text, otherwise every URL/identifier inside an
3077    /// unresolved reference round-trips through `\[` / `\]` escapes
3078    /// under `tex_math_single_backslash` and reparses as display math.
3079    #[test]
3080    fn full_plans_unresolved_bracket_keeps_wrapper_with_intraword_underscore() {
3081        let opts = pandoc_opts();
3082        let text = "[foo_bar more]";
3083        let plans = build_full_plans(text, 0, text.len(), &opts);
3084        assert!(
3085            matches!(
3086                plans.brackets.lookup(0),
3087                Some(BracketDispo::UnresolvedReference { .. })
3088            ),
3089            "wrapper must be preserved across intraword `_`, got {:?}",
3090            plans.brackets.lookup(0)
3091        );
3092    }
3093
3094    /// Bug #2 (b): unresolved Pandoc bracket whose interior emphasis
3095    /// pairs cleanly keeps the wrapper (linter/LSP hook).
3096    #[test]
3097    fn full_plans_unresolved_bracket_keeps_wrapper_when_inner_paired() {
3098        let opts = pandoc_opts();
3099        let text = "[foo *bar*]";
3100        let plans = build_full_plans(text, 0, text.len(), &opts);
3101        assert!(
3102            matches!(
3103                plans.brackets.lookup(0),
3104                Some(BracketDispo::UnresolvedReference { .. })
3105            ),
3106            "wrapper must be preserved when inner emph pairs, got {:?}",
3107            plans.brackets.lookup(0)
3108        );
3109    }
3110
3111    /// Spec #533: `[foo *bar [baz][ref]*][ref]` with `[ref]: /uri`.
3112    /// Inner `[baz][ref]` resolves as a link; §6.3 link-in-link rule
3113    /// deactivates the outer `[foo ...][ref]` so it falls through to
3114    /// literal brackets. Emphasis `*bar [baz][ref]*` wraps the inner link.
3115    #[test]
3116    fn full_plans_link_in_link_suppression_for_reference_links() {
3117        let opts = cm_opts();
3118        let text = "[foo *bar [baz][ref]*][ref]";
3119        let mut opts_with_refs = opts.clone();
3120        let labels: HashSet<String> = ["ref".to_string()].into_iter().collect();
3121        opts_with_refs.refdef_labels = Some(std::sync::Arc::new(labels));
3122        let plans = build_full_plans(text, 0, text.len(), &opts_with_refs);
3123
3124        // Inner `[baz][ref]` opener is at byte 10 — must resolve.
3125        assert!(
3126            matches!(plans.brackets.lookup(10), Some(BracketDispo::Open { .. })),
3127            "inner [baz][ref] must resolve at byte 10, got {:?}",
3128            plans.brackets.lookup(10)
3129        );
3130        // Outer `[foo ...][ref]` opener is at byte 0 — must NOT resolve
3131        // (link-in-link suppression).
3132        assert!(
3133            matches!(plans.brackets.lookup(0), Some(BracketDispo::Literal) | None),
3134            "outer [foo ...][ref] must fall through to literal at byte 0, got {:?}",
3135            plans.brackets.lookup(0)
3136        );
3137        // Trailing `[ref]` after the outer `]` is at byte 22 — it's a
3138        // standalone shortcut reference and must resolve.
3139        assert!(
3140            matches!(plans.brackets.lookup(22), Some(BracketDispo::Open { .. })),
3141            "trailing [ref] must resolve at byte 22, got {:?}",
3142            plans.brackets.lookup(22)
3143        );
3144        // Emphasis `*...*` at bytes 5 and 20 must pair — the scoped
3145        // emphasis pass over the (deactivated) outer bracket's inner
3146        // event range pairs these.
3147        assert!(
3148            matches!(plans.emphasis.lookup(5), Some(DelimChar::Open { .. })),
3149            "emphasis opener at byte 5 must pair, got {:?}",
3150            plans.emphasis.lookup(5)
3151        );
3152    }
3153}