Skip to main content

satteri_pulldown_cmark/
parse.rs

1// Copyright 2017 Google Inc. All rights reserved.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19// THE SOFTWARE.
20
21//! Tree-based two pass parser.
22
23use alloc::{borrow::ToOwned, boxed::Box, collections::VecDeque, string::String, vec::Vec};
24use core::{
25    cmp::{max, min},
26    iter::FusedIterator,
27    num::NonZeroUsize,
28    ops::{Index, Range},
29};
30use rustc_hash::FxHashMap;
31use unicase::UniCase;
32
33use crate::{
34    firstpass::run_first_pass,
35    linklabel::{scan_link_label_rest, FootnoteLabel, LinkLabel, ReferenceLabel},
36    mdx::*,
37    scanners::*,
38    strings::CowStr,
39    tree::{Tree, TreeIndex},
40    Alignment, BlockQuoteKind, CodeBlockKind, DirectiveKind, Event, HeadingLevel, LinkType,
41    MetadataBlockKind, Options, Tag, TagEnd,
42};
43
44// Allowing arbitrary depth nested parentheses inside link destinations
45// can create denial of service vulnerabilities if we're not careful.
46// The simplest countermeasure is to limit their depth, which is
47// explicitly allowed by the spec as long as the limit is at least 3:
48// https://spec.commonmark.org/0.29/#link-destination
49pub(crate) const LINK_MAX_NESTED_PARENS: usize = 32;
50
51#[derive(Debug, Default, Clone, Copy)]
52pub(crate) struct Item {
53    pub start: usize,
54    pub end: usize,
55    pub body: ItemBody,
56}
57
58#[derive(Debug, PartialEq, Clone, Copy, Default)]
59pub(crate) enum ItemBody {
60    // These are possible inline items, need to be resolved in second pass.
61
62    // repeats, can_open, can_close
63    MaybeEmphasis(usize, bool, bool),
64    // preceded_by_backslash, brace context
65    MaybeMath(bool, u8),
66    // quote byte, can_open, can_close
67    MaybeSmartQuote(u8, bool, bool),
68    MaybeCode(usize, bool), // number of backticks, preceded by backslash
69    MaybeHtml,
70    MaybeLinkOpen,
71    // bool indicates whether or not the preceding section could be a reference
72    MaybeLinkClose(bool),
73    MaybeImage,
74
75    // These are inline items after resolution.
76    Emphasis,
77    Strong,
78    Strikethrough,
79    Superscript,
80    Subscript,
81    Math(CowIndex, bool), // true for display math
82    Code(CowIndex),
83    Link(LinkIndex),
84    Image(LinkIndex),
85    FootnoteReference(CowIndex),
86    TaskListMarker(bool), // true for checked
87
88    // These are also inline items.
89    InlineHtml,
90    OwnedInlineHtml(CowIndex),
91    SynthesizeText(CowIndex),
92    SynthesizeChar(char),
93    Html,
94    Text {
95        backslash_escaped: bool,
96    },
97    SoftBreak,
98    // true = is backlash
99    HardBreak(bool),
100
101    // Dummy node at the top of the tree - should not be used otherwise!
102    #[default]
103    Root,
104
105    // These are block items.
106    Paragraph,
107    TightParagraph,
108    Rule,
109    Heading(HeadingLevel, Option<HeadingIndex>), // heading level
110    FencedCodeBlock(CowIndex),
111    MathBlock(CowIndex), // meta string (info after $$)
112    // bool: true = lazy/no-extend (block was opened as a single-line
113    // synthetic split, e.g. after an empty list item closed via blank
114    // line); arena_build's trailing-indent extension must skip it.
115    IndentCodeBlock(bool),
116    HtmlBlock(bool), // true = trim trailing newline from value (type 6/7
117    // always; type 1-5 only when their closer pattern was found, not when
118    // the block ran out of input at EOF)
119    BlockQuote(Option<BlockQuoteKind>),
120    ContainerDirective(u8, DirectiveIndex), // (fence length, directive data)
121    LeafDirective(DirectiveIndex),
122    TextDirective(DirectiveIndex),
123    List(bool, u8, u64),   // is_tight, list character, list start index
124    ListItem(usize, bool), // indent level, spread (loose item)
125    FootnoteDefinition(CowIndex),
126    MetadataBlock(MetadataBlockKind),
127
128    // Definition lists
129    DefinitionList(bool), // is_tight
130    // gets turned into either a paragraph or a definition list title,
131    // depending on whether there's a definition after it
132    MaybeDefinitionListTitle,
133    DefinitionListTitle,
134    DefinitionListDefinition(usize),
135
136    // Tables
137    Table(AlignmentIndex),
138    TableHead,
139    TableRow,
140    TableCell,
141
142    // MDX
143    MdxJsxFlowElement(JsxElementIndex),
144    MdxJsxTextElement(JsxElementIndex),
145    MdxFlowExpression(CowIndex),
146    MdxTextExpression(CowIndex),
147    MdxEsm(CowIndex),
148}
149
150impl ItemBody {
151    pub(crate) fn is_maybe_inline(&self) -> bool {
152        use ItemBody::*;
153        matches!(
154            *self,
155            MaybeEmphasis(..)
156                | MaybeMath(..)
157                | MaybeSmartQuote(..)
158                | MaybeCode(..)
159                | MaybeHtml
160                | MaybeLinkOpen
161                | MaybeLinkClose(..)
162                | MaybeImage
163        )
164    }
165    pub(crate) fn is_block_level(&self) -> bool {
166        !self.is_inline() && !matches!(self, ItemBody::Root)
167    }
168    fn is_inline(&self) -> bool {
169        use ItemBody::*;
170        matches!(
171            *self,
172            MaybeEmphasis(..)
173                | MaybeMath(..)
174                | MaybeSmartQuote(..)
175                | MaybeCode(..)
176                | MaybeHtml
177                | MaybeLinkOpen
178                | MaybeLinkClose(..)
179                | MaybeImage
180                | Emphasis
181                | Strong
182                | Strikethrough
183                | Math(..)
184                | Code(..)
185                | Link(..)
186                | Image(..)
187                | FootnoteReference(..)
188                | TaskListMarker(..)
189                | InlineHtml
190                | OwnedInlineHtml(..)
191                | SynthesizeText(..)
192                | SynthesizeChar(..)
193                | Html
194                | Text { .. }
195                | SoftBreak
196                | HardBreak(..)
197        )
198    }
199}
200
201#[derive(Debug)]
202pub struct BrokenLink<'a> {
203    pub span: core::ops::Range<usize>,
204    pub link_type: LinkType,
205    pub reference: CowStr<'a>,
206}
207
208/// Markdown event iterator.
209pub struct Parser<'input, CB = DefaultParserCallbacks> {
210    callbacks: CB,
211    inner: ParserInner<'input>,
212}
213
214// Inner state for `Parser`, extracted so that it can remain generic over the callback without
215// re-compiling complex logic for each instantiation of the generic type.
216pub(crate) struct ParserInner<'input> {
217    pub(crate) text: &'input str,
218    pub(crate) options: Options,
219    pub(crate) tree: Tree<Item>,
220    pub(crate) allocs: Allocations<'input>,
221    html_scan_guard: HtmlScanGuard,
222
223    // https://github.com/pulldown-cmark/pulldown-cmark/issues/844
224    // Consider this example:
225    //
226    //     [x]: xxx...
227    //     [x]
228    //     [x]
229    //     [x]
230    //
231    // Which expands to this HTML:
232    //
233    //     <a href="xxx...">x</a>
234    //     <a href="xxx...">x</a>
235    //     <a href="xxx...">x</a>
236    //
237    // This is quadratic growth, because it's filling in the area of a square.
238    // To prevent this, track how much it's expanded and limit it.
239    link_ref_expansion_limit: usize,
240
241    /// MDX validation errors collected during inline parsing.
242    pub(crate) mdx_errors: Vec<(usize, String)>,
243
244    // used by inline passes. store them here for reuse
245    inline_stack: InlineStack,
246    link_stack: LinkStack,
247    wikilink_stack: LinkStack,
248    code_delims: CodeDelims,
249    math_delims: MathDelims,
250}
251
252impl<'input, CB> core::fmt::Debug for Parser<'input, CB> {
253    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
254        // Only print the fields that have public types.
255        f.debug_struct("Parser")
256            .field("text", &self.inner.text)
257            .field("options", &self.inner.options)
258            .field("callbacks", &..)
259            .finish()
260    }
261}
262
263impl<'a> BrokenLink<'a> {
264    /// Moves the link into version with a static lifetime.
265    ///
266    /// The `reference` member is cloned to a Boxed or Inline version.
267    pub fn into_static(self) -> BrokenLink<'static> {
268        BrokenLink {
269            span: self.span.clone(),
270            link_type: self.link_type,
271            reference: self.reference.into_string().into(),
272        }
273    }
274}
275
276impl<'input> Parser<'input, DefaultParserCallbacks> {
277    /// Creates a new event iterator for a markdown string without any options enabled.
278    pub fn new(text: &'input str) -> Self {
279        Self::new_ext(text, Options::empty())
280    }
281
282    /// Creates a new event iterator for a markdown string with given options.
283    pub fn new_ext(text: &'input str, options: Options) -> Self {
284        Self::new_with_callbacks(text, options, DefaultParserCallbacks)
285    }
286}
287
288impl<'input, CB: ParserCallbacks<'input>> Parser<'input, CB> {
289    /// Creates a new event iterator for markdown text with given options and callbacks.
290    ///
291    /// ```
292    /// # use satteri_pulldown_cmark::{BrokenLink, CowStr, Event, Options, Parser, ParserCallbacks, Tag};
293    /// struct CustomCallbacks;
294    /// impl<'input> ParserCallbacks<'input> for CustomCallbacks {
295    ///     fn handle_broken_link(
296    ///         &mut self,
297    ///         link: BrokenLink<'input>,
298    ///     ) -> Option<(CowStr<'input>, CowStr<'input>)> {
299    ///         Some(("https://target".into(), link.reference))
300    ///     }
301    /// }
302    ///
303    /// let mut parser =
304    ///     Parser::new_with_callbacks("[broken]", Options::empty(), CustomCallbacks);
305    ///
306    /// assert!(matches!(
307    ///     parser.nth(1),
308    ///     Some(Event::Start(Tag::Link { .. }))
309    /// ));
310    /// ```
311    ///
312    /// See the [`ParserCallbacks`] trait for a list of callbacks that can be overridden.
313    pub fn new_with_callbacks(text: &'input str, options: Options, callbacks: CB) -> Self {
314        let (mut tree, allocs, _firstpass_mdx_errors) = run_first_pass(text, options);
315        tree.reset();
316        let inline_stack = Default::default();
317        let link_stack = Default::default();
318        let wikilink_stack = Default::default();
319        let html_scan_guard = Default::default();
320        Parser {
321            callbacks,
322
323            inner: ParserInner {
324                text,
325                options,
326                tree,
327                allocs,
328                inline_stack,
329                link_stack,
330                wikilink_stack,
331                html_scan_guard,
332                // always allow 100KiB
333                link_ref_expansion_limit: text.len().max(100_000),
334                mdx_errors: Vec::new(),
335                code_delims: CodeDelims::new(),
336                math_delims: MathDelims::new(),
337            },
338        }
339    }
340
341    /// Returns a reference to the internal `RefDefs` object, which provides access
342    /// to the internal map of reference definitions.
343    pub fn reference_definitions(&self) -> &RefDefs<'_> {
344        &self.inner.allocs.refdefs
345    }
346
347    /// Returns MDX validation errors collected during parsing.
348    /// Only populated when [`Options::ENABLE_MDX`] is active.
349    pub fn mdx_errors(&self) -> &[(usize, String)] {
350        &self.inner.mdx_errors
351    }
352
353    /// Consumes the event iterator and produces an iterator that produces
354    /// `(Event, Range)` pairs, where the `Range` value maps to the corresponding
355    /// range in the markdown source.
356    pub fn into_offset_iter(self) -> OffsetIter<'input, CB> {
357        OffsetIter { parser: self }
358    }
359}
360
361impl<'input, F> Parser<'input, BrokenLinkCallback<F>> {
362    /// In case the parser encounters any potential links that have a broken
363    /// reference (e.g `[foo]` when there is no `[foo]: ` entry at the bottom)
364    /// the provided callback will be called with the reference name,
365    /// and the returned pair will be used as the link URL and title if it is not
366    /// `None`.
367    ///
368    /// This constructor is provided for backwards compatibility.
369    /// This and other callbacks can also be customized with [`Parser::new_with_callbacks`].
370    pub fn new_with_broken_link_callback(
371        text: &'input str,
372        options: Options,
373        broken_link_callback: Option<F>,
374    ) -> Self
375    where
376        F: FnMut(BrokenLink<'input>) -> Option<(CowStr<'input>, CowStr<'input>)>,
377    {
378        Self::new_with_callbacks(text, options, BrokenLinkCallback(broken_link_callback))
379    }
380}
381
382impl<'input> ParserInner<'input> {
383    pub(crate) fn new(text: &'input str, options: Options) -> Self {
384        let (mut tree, allocs, firstpass_mdx_errors) = run_first_pass(text, options);
385        tree.reset();
386        ParserInner {
387            text,
388            options,
389            tree,
390            allocs,
391            inline_stack: Default::default(),
392            link_stack: Default::default(),
393            wikilink_stack: Default::default(),
394            html_scan_guard: Default::default(),
395            link_ref_expansion_limit: text.len().max(100_000),
396            mdx_errors: firstpass_mdx_errors,
397            code_delims: CodeDelims::new(),
398            math_delims: MathDelims::new(),
399        }
400    }
401
402    /// Use a link label to fetch a type, url, and title.
403    ///
404    /// This function enforces the [`link_ref_expansion_limit`].
405    /// If it returns Some, it also consumes some of the fuel.
406    /// If we're out of fuel, it immediately returns None.
407    ///
408    /// The URL and title are found in the [`RefDefs`] map.
409    /// If they're not there, and a callback was provided by the user,
410    /// `handle_broken_link` will be invoked and given the opportunity
411    /// to provide a fallback.
412    ///
413    /// The link type (that's "link" or "image") depends on the usage site, and
414    /// is provided by the caller of this function.
415    /// This function returns a new one because, if it has to invoke a callback
416    /// to find the information, the link type is [mapped to an unknown type].
417    ///
418    /// [mapped to an unknown type]: crate::LinkType::to_unknown
419    /// [`link_ref_expansion_limit`]: Self::link_ref_expansion_limit
420    fn fetch_link_type_url_title(
421        &mut self,
422        link_label: CowStr<'input>,
423        span: Range<usize>,
424        link_type: LinkType,
425        callbacks: &mut dyn ParserCallbacks<'input>,
426    ) -> Option<(LinkType, CowStr<'input>, CowStr<'input>)> {
427        if self.link_ref_expansion_limit == 0 {
428            return None;
429        }
430
431        let (link_type, url, title) = self
432            .allocs
433            .refdefs
434            .get(link_label.as_ref())
435            .map(|matching_def| {
436                // found a matching definition!
437                let title = matching_def
438                    .title
439                    .as_ref()
440                    .cloned()
441                    .unwrap_or_else(|| "".into());
442                let url = matching_def.dest.clone();
443                (link_type, url, title)
444            })
445            .or_else(|| {
446                // Construct a BrokenLink struct, which will be passed to the callback
447                let broken_link = BrokenLink {
448                    span,
449                    link_type,
450                    reference: link_label,
451                };
452
453                callbacks
454                    .handle_broken_link(broken_link)
455                    .map(|(url, title)| (link_type.to_unknown(), url, title))
456            })?;
457
458        // Limit expansion from link references.
459        // This isn't a problem for footnotes, because multiple references to the same one
460        // reuse the same node, but links/images get their HREF/SRC copied.
461        self.link_ref_expansion_limit = self
462            .link_ref_expansion_limit
463            .saturating_sub(url.len() + title.len());
464
465        Some((link_type, url, title))
466    }
467
468    /// Handle inline markup.
469    ///
470    /// When the parser encounters any item indicating potential inline markup, all
471    /// inline markup passes are run on the remainder of the chain.
472    ///
473    /// Note: there's some potential for optimization here, but that's future work.
474    pub(crate) fn handle_inline(&mut self, callbacks: &mut dyn ParserCallbacks<'input>) {
475        self.handle_inline_pass1(callbacks);
476        // Resolve attention (emphasis/strong) and strikethrough/sub/sup.
477        // micromark runs each construct's `resolveAll` in the order each
478        // construct first fires; whichever marker appears first in the
479        // block decides whether emphasis or strikethrough resolves
480        // first. This matters when their would-be spans cross:
481        //   * `*~bar~*`  – first marker `*` → emphasis first, then
482        //     strikethrough inside the emphasis.
483        //   * `~_~:_<`   – first marker `~` → strikethrough first,
484        //     capturing `_` as content; `_` at offset 4 is then alone.
485        //   * `_/~z)*~*nf` – first marker `_`, no `_` closer → emphasis
486        //     first (pairs `*..*`); `~..~` would cross the emphasis so
487        //     it can't form in the second pass.
488        // Each pass is recursive: after pairing at root, it descends
489        // into already-formed spans so that inner markers (e.g.
490        // `~_a_~` → `_a_` inside the strikethrough) still resolve.
491        let st_enabled = self.options.contains(Options::ENABLE_STRIKETHROUGH)
492            || self.options.contains(Options::ENABLE_SUBSCRIPT)
493            || self.options.contains(Options::ENABLE_SUPERSCRIPT);
494        if !st_enabled {
495            self.handle_emphasis_pass();
496            return;
497        }
498        let strikethrough_first = matches!(
499            self.first_inline_marker_char(self.tree.cur()),
500            Some(b'~') | Some(b'^')
501        );
502        if strikethrough_first {
503            self.handle_tildes_carets_pass();
504            self.handle_emphasis_pass();
505        } else {
506            self.handle_emphasis_pass();
507            self.handle_tildes_carets_pass();
508        }
509    }
510
511    /// Find the first MaybeEmphasis token in `start..` whose character
512    /// is one of `*` `_` `~` `^`. Used to pick the resolve order.
513    fn first_inline_marker_char(&self, start: Option<TreeIndex>) -> Option<u8> {
514        let mut cur = start;
515        while let Some(cur_ix) = cur {
516            if let ItemBody::MaybeEmphasis(_, _, _) = self.tree[cur_ix].item.body {
517                let c = self.text.as_bytes()[self.tree[cur_ix].item.start];
518                if matches!(c, b'*' | b'_' | b'~' | b'^') {
519                    return Some(c);
520                }
521            }
522            cur = self.tree[cur_ix].next;
523        }
524        None
525    }
526
527    /// Recursive emphasis pass. Processes `*`/`_` MaybeEmphasis at this
528    /// scope, then descends into any inline containers (Emphasis,
529    /// Strong, Strikethrough, Link, Image, etc.) to do the same in
530    /// their children.
531    fn handle_emphasis_pass(&mut self) {
532        let start = self.tree.cur();
533        self.resolve_emphasis_recursive(start);
534    }
535
536    fn resolve_emphasis_recursive(&mut self, start: Option<TreeIndex>) {
537        // Save and reset the shared inline_stack so each scope works
538        // with a fresh one. Smart-quote state is local to
539        // `handle_emphasis_in_scope`, no save needed.
540        let saved = core::mem::take(&mut self.inline_stack);
541        self.handle_emphasis_in_scope(start);
542        self.inline_stack = saved;
543
544        let mut cur = start;
545        while let Some(cur_ix) = cur {
546            let next = self.tree[cur_ix].next;
547            match self.tree[cur_ix].item.body {
548                ItemBody::Emphasis
549                | ItemBody::Strong
550                | ItemBody::Strikethrough
551                | ItemBody::Subscript
552                | ItemBody::Superscript
553                | ItemBody::Link(_)
554                | ItemBody::Image(_) => {
555                    let child = self.tree[cur_ix].child;
556                    self.resolve_emphasis_recursive(child);
557                }
558                _ => {}
559            }
560            cur = next;
561        }
562    }
563
564    /// Handle inline HTML, code spans, and links.
565    ///
566    /// This function handles both inline HTML and code spans, because they have
567    /// the same precedence. It also handles links, even though they have lower
568    /// precedence, because the URL of links must not be processed.
569    fn handle_inline_pass1(&mut self, callbacks: &mut dyn ParserCallbacks<'input>) {
570        let mut cur = self.tree.cur();
571        let mut prev = None;
572
573        let block_end = self.tree[self.tree.peek_up().unwrap()].item.end;
574        let block_text = &self.text[..block_end];
575
576        while let Some(mut cur_ix) = cur {
577            match self.tree[cur_ix].item.body {
578                ItemBody::MaybeHtml => {
579                    // MDX inline JSX: check before HTML
580                    if self.options.contains(Options::ENABLE_MDX) {
581                        let start = self.tree[cur_ix].item.start;
582                        let next_byte = block_text.as_bytes().get(start + 1).copied();
583
584                        // In MDX, `<!` is not valid (no HTML comments).
585                        if next_byte == Some(b'!') {
586                            self.mdx_errors.push((
587                                start,
588                                "Unexpected character `!` (U+0021) before name, expected a \
589                                 character that can start a name, such as a letter, `$`, or `_` \
590                                 (note: to create a comment in MDX, use `{/* text */}`)"
591                                    .to_string(),
592                            ));
593                            self.tree[cur_ix].item.body = ItemBody::Text {
594                                backslash_escaped: false,
595                            };
596                            prev = cur;
597                            cur = self.tree[cur_ix].next;
598                            continue;
599                        }
600
601                        if let Some(total_len) =
602                            scan_mdx_inline_jsx(&block_text.as_bytes()[start..])
603                        {
604                            let end = start + total_len;
605                            let node = scan_nodes_to_ix(&self.tree, self.tree[cur_ix].next, end);
606                            let raw = &block_text[start..end];
607                            let col = crate::mdx::column_at(block_text.as_bytes(), start);
608                            let jsx_data = crate::mdx::parse_jsx_tag_with_column(raw, col, 0);
609                            let mut allocator = oxc_allocator::Allocator::default();
610                            crate::mdx::validate_jsx_expressions(
611                                &jsx_data.attrs,
612                                start,
613                                &mut allocator,
614                                &mut self.mdx_errors,
615                            );
616                            let jsx_ix = self.allocs.allocate_jsx_element(jsx_data);
617                            self.tree[cur_ix].item.body = ItemBody::MdxJsxTextElement(jsx_ix);
618                            self.tree[cur_ix].item.end = end;
619                            self.tree[cur_ix].next = node;
620                            prev = cur;
621                            cur = node;
622                            if let Some(node_ix) = cur {
623                                self.tree[node_ix].item.start =
624                                    max(self.tree[node_ix].item.start, end);
625                            }
626                            continue;
627                        }
628
629                        // mdx-js fallback rule:
630                        //   `<` + space/tab → always literal `<` (text).
631                        //   `<` + newline   → JSX tag may span lines; treat
632                        //                      as text only if the next
633                        //                      non-whitespace byte is benign
634                        //                      (not `>`, not EOF/blank-line)
635                        //                      AND the line containing it
636                        //                      isn't a setext underline
637                        //                      (`-`+ or `=`+), which would
638                        //                      promote the `<` into a heading
639                        //                      whose JSX validation fails.
640                        //   `<` + anything else (incl. EOF) → parse error
641                        //                      (`<\`, `<,`, `<{`, `<<`, `<.`,
642                        //                       …).
643                        let bytes_block = block_text.as_bytes();
644                        let is_text_fallback = match next_byte {
645                            Some(b' ' | b'\t') => true,
646                            Some(b'\n' | b'\r') => {
647                                // Skip whitespace + container prefixes when
648                                // probing for the first significant byte
649                                // after `\n`. A `>` at line start inside a
650                                // blockquote is the container marker, not a
651                                // JSX-like delimiter.
652                                let bq_depth = self
653                                    .tree
654                                    .walk_spine()
655                                    .filter(|&&ix| {
656                                        matches!(self.tree[ix].item.body, ItemBody::BlockQuote(..))
657                                    })
658                                    .count();
659                                let mut probe = start + 1;
660                                loop {
661                                    while probe < bytes_block.len()
662                                        && matches!(
663                                            bytes_block[probe],
664                                            b' ' | b'\t' | b'\n' | b'\r'
665                                        )
666                                    {
667                                        probe += 1;
668                                    }
669                                    if bq_depth == 0
670                                        || probe >= bytes_block.len()
671                                        || bytes_block[probe] != b'>'
672                                    {
673                                        break;
674                                    }
675                                    let mut consumed = 0;
676                                    while consumed < bq_depth
677                                        && probe < bytes_block.len()
678                                        && bytes_block[probe] == b'>'
679                                    {
680                                        probe += 1;
681                                        if probe < bytes_block.len() && bytes_block[probe] == b' ' {
682                                            probe += 1;
683                                        }
684                                        consumed += 1;
685                                    }
686                                }
687                                if probe >= bytes_block.len() || bytes_block[probe] == b'>' {
688                                    false
689                                } else {
690                                    // Reject if `probe`'s line is a setext
691                                    // underline (only `-` or only `=`, then
692                                    // optional whitespace to EOL/EOF) AND
693                                    // would actually promote the `<`-line
694                                    // to a heading. Inside a blockquote
695                                    // container the underline line is
696                                    // typically a lazy continuation (no
697                                    // `>` prefix) and doesn't promote, so
698                                    // skip the rejection.
699                                    let underline_char = bytes_block[probe];
700                                    if !matches!(underline_char, b'-' | b'=') {
701                                        true
702                                    } else {
703                                        let mut q = probe;
704                                        while q < bytes_block.len()
705                                            && bytes_block[q] == underline_char
706                                        {
707                                            q += 1;
708                                        }
709                                        while q < bytes_block.len()
710                                            && matches!(bytes_block[q], b' ' | b'\t')
711                                        {
712                                            q += 1;
713                                        }
714                                        let at_eol = q >= bytes_block.len()
715                                            || matches!(bytes_block[q], b'\n' | b'\r');
716                                        if !at_eol {
717                                            true
718                                        } else {
719                                            // Container check: a blockquote
720                                            // `>` (possibly after up to 3
721                                            // spaces) on the line opening
722                                            // the `<` means the underline
723                                            // line would need the same
724                                            // prefix to actually promote a
725                                            // setext heading. Without it,
726                                            // the underline is lazy
727                                            // paragraph continuation, so
728                                            // accept as text.
729                                            //
730                                            // Same for listitems: if the
731                                            // spine has a ListItem and the
732                                            // underline line starts at a
733                                            // column less than the listitem
734                                            // content column, it's lazy
735                                            // continuation and doesn't
736                                            // promote — accept as text.
737                                            let mut ls = start;
738                                            while ls > 0
739                                                && !matches!(bytes_block[ls - 1], b'\n' | b'\r')
740                                            {
741                                                ls -= 1;
742                                            }
743                                            let mut k = ls;
744                                            let mut sp = 0;
745                                            while k < start && bytes_block[k] == b' ' && sp < 3 {
746                                                k += 1;
747                                                sp += 1;
748                                            }
749                                            if k < start && bytes_block[k] == b'>' {
750                                                true
751                                            } else {
752                                                // Underline line start.
753                                                let mut us = probe;
754                                                while us > 0
755                                                    && !matches!(bytes_block[us - 1], b'\n' | b'\r')
756                                                {
757                                                    us -= 1;
758                                                }
759                                                let mut underline_col = 0;
760                                                let mut uk = us;
761                                                while uk < probe && bytes_block[uk] == b' ' {
762                                                    uk += 1;
763                                                    underline_col += 1;
764                                                }
765                                                let listitem_indent = self
766                                                    .tree
767                                                    .walk_spine()
768                                                    .filter_map(|&ix| {
769                                                        match self.tree[ix].item.body {
770                                                            ItemBody::ListItem(indent, _) => {
771                                                                Some(indent)
772                                                            }
773                                                            _ => None,
774                                                        }
775                                                    })
776                                                    .next();
777                                                let in_blockquote =
778                                                    self.tree.walk_spine().any(|&ix| {
779                                                        matches!(
780                                                            self.tree[ix].item.body,
781                                                            ItemBody::BlockQuote(..)
782                                                        )
783                                                    });
784                                                // BlockQuote container: an
785                                                // underline line missing the
786                                                // `>` prefix is lazy
787                                                // continuation and doesn't
788                                                // promote. Detect by checking
789                                                // the underline line's source
790                                                // (not block_text, which has
791                                                // already stripped the
792                                                // prefix).
793                                                let bq_lazy = if in_blockquote {
794                                                    underline_col < 1
795                                                        || !bytes_block[us..probe].contains(&b'>')
796                                                } else {
797                                                    false
798                                                };
799                                                matches!(listitem_indent, Some(i) if underline_col < i)
800                                                    || bq_lazy
801                                            }
802                                        }
803                                    }
804                                }
805                            }
806                            _ => false,
807                        };
808                        if !is_text_fallback {
809                            self.mdx_errors.push((
810                                start,
811                                "Unexpected character after `<`, expected a valid JSX tag \
812                                 (note: to create a link in MDX, use `[text](url)`)"
813                                    .to_string(),
814                            ));
815                        }
816
817                        self.tree[cur_ix].item.body = ItemBody::Text {
818                            backslash_escaped: false,
819                        };
820                        prev = cur;
821                        cur = self.tree[cur_ix].next;
822                        continue;
823                    }
824
825                    let next = self.tree[cur_ix].next;
826                    let autolink = if let Some(next_ix) = next {
827                        scan_autolink(block_text, self.tree[next_ix].item.start)
828                    } else {
829                        None
830                    };
831
832                    if let Some((ix, uri, link_type)) = autolink {
833                        let node = scan_nodes_to_ix(&self.tree, next, ix);
834                        let text_node = self.tree.create_node(Item {
835                            start: self.tree[cur_ix].item.start + 1,
836                            end: ix - 1,
837                            body: ItemBody::Text {
838                                backslash_escaped: false,
839                            },
840                        });
841                        let link_ix =
842                            self.allocs
843                                .allocate_link(link_type, uri, "".into(), "".into());
844                        self.tree[cur_ix].item.body = ItemBody::Link(link_ix);
845                        self.tree[cur_ix].item.end = ix;
846                        self.tree[cur_ix].next = node;
847                        self.tree[cur_ix].child = Some(text_node);
848                        prev = cur;
849                        cur = node;
850                        if let Some(node_ix) = cur {
851                            let orig_start = self.tree[node_ix].item.start;
852                            let new_start = max(orig_start, ix);
853                            self.tree[node_ix].item.start = new_start;
854                            // When the autolink's closing `>` consumed the byte
855                            // that was the target of a preceding `\` escape,
856                            // the trailing text's `backslash_escaped` flag is
857                            // stale — clear it so arena_build doesn't extend
858                            // the text node's source span back over bytes the
859                            // link now owns. Mirrors the inline-link fix.
860                            if new_start > orig_start {
861                                if let ItemBody::Text { backslash_escaped } =
862                                    &mut self.tree[node_ix].item.body
863                                {
864                                    *backslash_escaped = false;
865                                }
866                            }
867                        }
868                        continue;
869                    } else {
870                        let inline_html = next.and_then(|next_ix| {
871                            self.scan_inline_html(
872                                block_text.as_bytes(),
873                                self.tree[next_ix].item.start,
874                            )
875                        });
876                        if let Some((span, ix)) = inline_html {
877                            let node = scan_nodes_to_ix(&self.tree, next, ix);
878                            self.tree[cur_ix].item.body = if !span.is_empty() {
879                                let converted_string =
880                                    String::from_utf8(span).expect("invalid utf8");
881                                ItemBody::OwnedInlineHtml(
882                                    self.allocs.allocate_cow(converted_string.into()),
883                                )
884                            } else {
885                                ItemBody::InlineHtml
886                            };
887                            self.tree[cur_ix].item.end = ix;
888                            self.tree[cur_ix].next = node;
889                            prev = cur;
890                            cur = node;
891                            if let Some(node_ix) = cur {
892                                let orig_start = self.tree[node_ix].item.start;
893                                let new_start = max(orig_start, ix);
894                                self.tree[node_ix].item.start = new_start;
895                                // Inline HTML may consume bytes that a `\X`
896                                // escape was attached to (e.g. `\*` inside
897                                // an attribute value). Clear the stale flag
898                                // so arena_build doesn't extend the trail
899                                // back over bytes the HTML now owns.
900                                if new_start > orig_start {
901                                    if let ItemBody::Text { backslash_escaped } =
902                                        &mut self.tree[node_ix].item.body
903                                    {
904                                        *backslash_escaped = false;
905                                    }
906                                }
907                            }
908                            continue;
909                        }
910                    }
911                    self.tree[cur_ix].item.body = ItemBody::Text {
912                        backslash_escaped: false,
913                    };
914                }
915                ItemBody::MaybeMath(preceded_by_backslash, _brace_context) => {
916                    if preceded_by_backslash {
917                        self.tree[cur_ix].item.body = ItemBody::Text {
918                            backslash_escaped: true,
919                        };
920                        prev = cur;
921                        cur = self.tree[cur_ix].next;
922                        continue;
923                    }
924                    // Count consecutive $ from the opening position
925                    let mut open_count = 1usize;
926                    let mut open_end = cur_ix;
927                    {
928                        let mut peek = self.tree[cur_ix].next;
929                        while let Some(peek_ix) = peek {
930                            if matches!(self.tree[peek_ix].item.body, ItemBody::MaybeMath(..))
931                                && self.tree[peek_ix].item.start == self.tree[open_end].item.end
932                            {
933                                open_count += 1;
934                                open_end = peek_ix;
935                                peek = self.tree[peek_ix].next;
936                            } else {
937                                break;
938                            }
939                        }
940                    }
941
942                    // Scan forward for a matching run of the same count
943                    let mut scan = self.tree[open_end].next;
944                    let mut close_ix = None;
945                    while let Some(scan_ix) = scan {
946                        if matches!(self.tree[scan_ix].item.body, ItemBody::MaybeMath(..)) {
947                            let mut run = 1usize;
948                            let mut run_end = scan_ix;
949                            let mut peek = self.tree[scan_ix].next;
950                            while let Some(peek_ix) = peek {
951                                if matches!(self.tree[peek_ix].item.body, ItemBody::MaybeMath(..))
952                                    && self.tree[peek_ix].item.start == self.tree[run_end].item.end
953                                {
954                                    run += 1;
955                                    run_end = peek_ix;
956                                    peek = self.tree[peek_ix].next;
957                                } else {
958                                    break;
959                                }
960                            }
961                            if run == open_count {
962                                close_ix = Some(scan_ix);
963                                break;
964                            }
965                            // Skip past this non-matching run
966                            scan = self.tree[run_end].next;
967                            continue;
968                        }
969                        scan = self.tree[scan_ix].next;
970                    }
971
972                    if let Some(scan_ix) = close_ix {
973                        self.make_math_span(cur_ix, scan_ix);
974                    } else {
975                        let mut fail_ix = cur_ix;
976                        loop {
977                            self.tree[fail_ix].item.body = ItemBody::Text {
978                                backslash_escaped: false,
979                            };
980                            if fail_ix == open_end {
981                                break;
982                            }
983                            if let Some(next) = self.tree[fail_ix].next {
984                                fail_ix = next;
985                            } else {
986                                break;
987                            }
988                        }
989                    }
990                }
991                ItemBody::MaybeCode(mut search_count, preceded_by_backslash) => {
992                    if preceded_by_backslash {
993                        search_count -= 1;
994                        if search_count == 0 {
995                            self.tree[cur_ix].item.body = ItemBody::Text {
996                                backslash_escaped: true,
997                            };
998                            prev = cur;
999                            cur = self.tree[cur_ix].next;
1000                            continue;
1001                        }
1002                    }
1003
1004                    if self.code_delims.is_populated() {
1005                        // we have previously scanned all codeblock delimiters,
1006                        // so we can reuse that work
1007                        if let Some(scan_ix) = self.code_delims.find(cur_ix, search_count) {
1008                            self.make_code_span(cur_ix, scan_ix, preceded_by_backslash);
1009                        } else {
1010                            self.tree[cur_ix].item.body = ItemBody::Text {
1011                                backslash_escaped: preceded_by_backslash,
1012                            };
1013                        }
1014                    } else {
1015                        // we haven't previously scanned all codeblock delimiters,
1016                        // so walk the AST
1017                        let mut scan = if search_count > 0 {
1018                            self.tree[cur_ix].next
1019                        } else {
1020                            None
1021                        };
1022                        while let Some(scan_ix) = scan {
1023                            if let ItemBody::MaybeCode(delim_count, _) =
1024                                self.tree[scan_ix].item.body
1025                            {
1026                                if search_count == delim_count {
1027                                    self.make_code_span(cur_ix, scan_ix, preceded_by_backslash);
1028                                    self.code_delims.clear();
1029                                    break;
1030                                } else {
1031                                    self.code_delims.insert(delim_count, scan_ix);
1032                                }
1033                            }
1034                            scan = self.tree[scan_ix].next;
1035                        }
1036                        if scan.is_none() {
1037                            self.tree[cur_ix].item.body = ItemBody::Text {
1038                                backslash_escaped: preceded_by_backslash,
1039                            };
1040                        }
1041                    }
1042                }
1043                ItemBody::MaybeLinkOpen => {
1044                    self.tree[cur_ix].item.body = ItemBody::Text {
1045                        backslash_escaped: false,
1046                    };
1047                    let link_open_doubled = self.tree[cur_ix]
1048                        .next
1049                        .map(|ix| self.tree[ix].item.body == ItemBody::MaybeLinkOpen)
1050                        .unwrap_or(false);
1051                    if self.options.contains(Options::ENABLE_WIKILINKS) && link_open_doubled {
1052                        self.wikilink_stack.push(LinkStackEl {
1053                            node: cur_ix,
1054                            ty: LinkStackTy::Link,
1055                        });
1056                    }
1057                    self.link_stack.push(LinkStackEl {
1058                        node: cur_ix,
1059                        ty: LinkStackTy::Link,
1060                    });
1061                }
1062                ItemBody::MaybeImage => {
1063                    self.tree[cur_ix].item.body = ItemBody::Text {
1064                        backslash_escaped: false,
1065                    };
1066                    let link_open_doubled = self.tree[cur_ix]
1067                        .next
1068                        .map(|ix| self.tree[ix].item.body == ItemBody::MaybeLinkOpen)
1069                        .unwrap_or(false);
1070                    if self.options.contains(Options::ENABLE_WIKILINKS) && link_open_doubled {
1071                        self.wikilink_stack.push(LinkStackEl {
1072                            node: cur_ix,
1073                            ty: LinkStackTy::Image,
1074                        });
1075                    }
1076                    self.link_stack.push(LinkStackEl {
1077                        node: cur_ix,
1078                        ty: LinkStackTy::Image,
1079                    });
1080                }
1081                ItemBody::MaybeLinkClose(could_be_ref) => {
1082                    self.tree[cur_ix].item.body = ItemBody::Text {
1083                        backslash_escaped: false,
1084                    };
1085                    let tos_link = self.link_stack.pop();
1086                    if self.options.contains(Options::ENABLE_WIKILINKS)
1087                        && self.tree[cur_ix]
1088                            .next
1089                            .map(|ix| {
1090                                matches!(self.tree[ix].item.body, ItemBody::MaybeLinkClose(..))
1091                            })
1092                            .unwrap_or(false)
1093                    {
1094                        if let Some(node) = self.handle_wikilink(block_text, cur_ix, prev) {
1095                            cur = self.tree[node].next;
1096                            continue;
1097                        }
1098                    }
1099                    if let Some(tos) = tos_link {
1100                        // skip rendering if already in a link, unless its an
1101                        // image
1102                        if tos.ty != LinkStackTy::Image
1103                            && matches!(
1104                                self.tree[self.tree.peek_up().unwrap()].item.body,
1105                                ItemBody::Link(..)
1106                            )
1107                        {
1108                            continue;
1109                        }
1110                        if tos.ty == LinkStackTy::Disabled {
1111                            continue;
1112                        }
1113                        let next = self.tree[cur_ix].next;
1114                        if let Some((next_ix, url, title)) =
1115                            self.scan_inline_link(block_text, self.tree[cur_ix].item.end, next)
1116                        {
1117                            let next_node = scan_nodes_to_ix(&self.tree, next, next_ix);
1118                            if let Some(prev_ix) = prev {
1119                                self.tree[prev_ix].next = None;
1120                            }
1121                            cur = Some(tos.node);
1122                            cur_ix = tos.node;
1123                            let link_ix =
1124                                self.allocs
1125                                    .allocate_link(LinkType::Inline, url, title, "".into());
1126                            self.tree[cur_ix].item.body = if tos.ty == LinkStackTy::Image {
1127                                ItemBody::Image(link_ix)
1128                            } else {
1129                                ItemBody::Link(link_ix)
1130                            };
1131                            self.tree[cur_ix].child = self.tree[cur_ix].next;
1132                            self.tree[cur_ix].next = next_node;
1133                            self.tree[cur_ix].item.end = next_ix;
1134                            if let Some(next_node_ix) = next_node {
1135                                let orig_start = self.tree[next_node_ix].item.start;
1136                                let new_start = max(orig_start, next_ix);
1137                                self.tree[next_node_ix].item.start = new_start;
1138                                // If the text node's start was advanced past
1139                                // its original position (the link's URL or
1140                                // title consumed the bytes the escape was
1141                                // attached to), the `backslash_escaped`
1142                                // flag no longer applies — clear it so the
1143                                // arena-build position fixup doesn't extend
1144                                // the text node's source span back over
1145                                // bytes already owned by the link.
1146                                if new_start > orig_start {
1147                                    if let ItemBody::Text { backslash_escaped } =
1148                                        &mut self.tree[next_node_ix].item.body
1149                                    {
1150                                        *backslash_escaped = false;
1151                                    }
1152                                }
1153                            }
1154
1155                            if tos.ty == LinkStackTy::Link {
1156                                self.disable_all_links();
1157                            }
1158                        } else {
1159                            // Footnote-first check: if the first bracket content is
1160                            // `[^X]` where `X` has a matching footnote definition,
1161                            // emit a FootnoteReference regardless of what follows.
1162                            // Otherwise `[^X][Y]` would be resolved as a link whose
1163                            // text happens to start with `^`, which diverges from
1164                            // remark-gfm's two-node parse (footnote + trailing ref).
1165                            let first_bracket_start = self.tree[tos.node].item.start;
1166                            let first_bracket_end = self.tree[cur_ix].item.end;
1167                            let first_bracket_text =
1168                                &self.text[first_bracket_start..first_bracket_end];
1169                            if let Some((_, ReferenceLabel::Footnote(footlabel))) =
1170                                scan_link_label(&self.tree, first_bracket_text, self.options)
1171                            {
1172                                if self.allocs.footdefs.contains(&footlabel) {
1173                                    let footref = self.allocs.allocate_cow(footlabel);
1174                                    if let Some(def) = self
1175                                        .allocs
1176                                        .footdefs
1177                                        .get_mut(self.allocs.cows[footref.0].to_owned())
1178                                    {
1179                                        def.use_count += 1;
1180                                    }
1181                                    let footnote_ix = if tos.ty == LinkStackTy::Image {
1182                                        self.tree[tos.node].next = Some(cur_ix);
1183                                        self.tree[tos.node].child = None;
1184                                        self.tree[tos.node].item.body =
1185                                            ItemBody::SynthesizeChar('!');
1186                                        self.tree[cur_ix].item.start =
1187                                            self.tree[tos.node].item.start + 1;
1188                                        self.tree[tos.node].item.end =
1189                                            self.tree[tos.node].item.start + 1;
1190                                        cur_ix
1191                                    } else {
1192                                        tos.node
1193                                    };
1194                                    self.tree[footnote_ix].next = next;
1195                                    self.tree[footnote_ix].child = None;
1196                                    self.tree[footnote_ix].item.body =
1197                                        ItemBody::FootnoteReference(footref);
1198                                    self.tree[footnote_ix].item.end = first_bracket_end;
1199                                    prev = Some(footnote_ix);
1200                                    cur = next;
1201                                    self.link_stack.clear();
1202                                    continue;
1203                                }
1204                            }
1205                            // ok, so its not an inline link. maybe it is a reference
1206                            // to a defined link?
1207                            let scan_result =
1208                                scan_reference(&self.tree, block_text, next, self.options);
1209                            let (node_after_link, link_type) = match scan_result {
1210                                // [label][reference]
1211                                RefScan::LinkLabel(_, end_ix) => {
1212                                    // Toggle reference viability of the last closing bracket,
1213                                    // so that we can skip it on future iterations in case
1214                                    // it fails in this one. In particular, we won't call
1215                                    // the broken link callback twice on one reference.
1216                                    let reference_close_node = if let Some(node) =
1217                                        scan_nodes_to_ix(&self.tree, next, end_ix - 1)
1218                                    {
1219                                        node
1220                                    } else {
1221                                        continue;
1222                                    };
1223                                    self.tree[reference_close_node].item.body =
1224                                        ItemBody::MaybeLinkClose(false);
1225                                    let next_node = self.tree[reference_close_node].next;
1226
1227                                    (next_node, LinkType::Reference)
1228                                }
1229                                // [reference][]
1230                                RefScan::Collapsed(next_node) => {
1231                                    // This reference has already been tried, and it's not
1232                                    // valid. Skip it.
1233                                    if !could_be_ref {
1234                                        continue;
1235                                    }
1236                                    (next_node, LinkType::Collapsed)
1237                                }
1238                                // [X][^Y] — full-reference form with a footnote-shaped
1239                                // second label. Per CommonMark the full-ref has to
1240                                // resolve to a link definition, which `^Y` never will;
1241                                // shortcut fallback is NOT tried. Leave both brackets
1242                                // literal and let `[^Y]` be parsed as a footnote on
1243                                // its own MaybeLinkClose iteration.
1244                                RefScan::UnexpectedFootnote => continue,
1245                                // `[text][invalid_label]` — the `[` after `[text]`
1246                                // started a label slot but it wasn't a valid label
1247                                // (e.g. unescaped `[` inside). Spec: a shortcut link
1248                                // can't be followed by `[`, so don't fall back to
1249                                // shortcut. Leave both brackets literal.
1250                                RefScan::FailedInvalidLabel => continue,
1251                                // [shortcut]
1252                                //
1253                                // [shortcut]: /blah
1254                                RefScan::Failed => {
1255                                    if !could_be_ref {
1256                                        continue;
1257                                    }
1258                                    (next, LinkType::Shortcut)
1259                                }
1260                            };
1261
1262                            // FIXME: references and labels are mixed in the naming of variables
1263                            // below. Disambiguate!
1264
1265                            // (label, source_ix end)
1266                            let label: Option<(ReferenceLabel<'input>, usize)> = match scan_result {
1267                                RefScan::LinkLabel(l, end_ix) => {
1268                                    Some((ReferenceLabel::Link(l), end_ix))
1269                                }
1270                                RefScan::Collapsed(..)
1271                                | RefScan::Failed
1272                                | RefScan::FailedInvalidLabel
1273                                | RefScan::UnexpectedFootnote => {
1274                                    // No label? maybe it is a shortcut reference
1275                                    let label_start = self.tree[tos.node].item.end - 1;
1276                                    let label_end = self.tree[cur_ix].item.end;
1277                                    scan_link_label(
1278                                        &self.tree,
1279                                        &self.text[label_start..label_end],
1280                                        self.options,
1281                                    )
1282                                    .map(|(ix, label)| (label, label_start + ix))
1283                                    .filter(|(_, end)| *end == label_end)
1284                                }
1285                            };
1286
1287                            let id = match &label {
1288                                Some(
1289                                    (ReferenceLabel::Link(l), _) | (ReferenceLabel::Footnote(l), _),
1290                                ) => l.clone(),
1291                                None => "".into(),
1292                            };
1293
1294                            // see if it's a footnote reference
1295                            if let Some((ReferenceLabel::Footnote(l), end)) = label {
1296                                let footref = self.allocs.allocate_cow(l);
1297                                if let Some(def) = self
1298                                    .allocs
1299                                    .footdefs
1300                                    .get_mut(self.allocs.cows[footref.0].to_owned())
1301                                {
1302                                    def.use_count += 1;
1303                                }
1304                                if self.allocs.footdefs.contains(&self.allocs.cows[footref.0]) {
1305                                    // If this came from a MaybeImage, then the `!` prefix
1306                                    // isn't part of the footnote reference.
1307                                    let footnote_ix = if tos.ty == LinkStackTy::Image {
1308                                        self.tree[tos.node].next = Some(cur_ix);
1309                                        self.tree[tos.node].child = None;
1310                                        self.tree[tos.node].item.body =
1311                                            ItemBody::SynthesizeChar('!');
1312                                        self.tree[cur_ix].item.start =
1313                                            self.tree[tos.node].item.start + 1;
1314                                        self.tree[tos.node].item.end =
1315                                            self.tree[tos.node].item.start + 1;
1316                                        cur_ix
1317                                    } else {
1318                                        tos.node
1319                                    };
1320                                    // use `next` instead of `node_after_link` because
1321                                    // node_after_link is calculated for a [collapsed][] link,
1322                                    // which footnotes don't support.
1323                                    self.tree[footnote_ix].next = next;
1324                                    self.tree[footnote_ix].child = None;
1325                                    self.tree[footnote_ix].item.body =
1326                                        ItemBody::FootnoteReference(footref);
1327                                    self.tree[footnote_ix].item.end = end;
1328                                    prev = Some(footnote_ix);
1329                                    cur = next;
1330                                    self.link_stack.clear();
1331                                    continue;
1332                                }
1333                            } else if let Some((ReferenceLabel::Link(link_label), end)) = label {
1334                                if let Some((def_link_type, url, title)) = self
1335                                    .fetch_link_type_url_title(
1336                                        link_label,
1337                                        (self.tree[tos.node].item.start)..end,
1338                                        link_type,
1339                                        callbacks,
1340                                    )
1341                                {
1342                                    let link_ix =
1343                                        self.allocs.allocate_link(def_link_type, url, title, id);
1344                                    self.tree[tos.node].item.body = if tos.ty == LinkStackTy::Image
1345                                    {
1346                                        ItemBody::Image(link_ix)
1347                                    } else {
1348                                        ItemBody::Link(link_ix)
1349                                    };
1350                                    let label_node = self.tree[tos.node].next;
1351
1352                                    // lets do some tree surgery to add the link to the tree
1353                                    // 1st: skip the label node and close node
1354                                    self.tree[tos.node].next = node_after_link;
1355
1356                                    // then, if it exists, add the label node as a child to the link node
1357                                    if label_node != cur {
1358                                        self.tree[tos.node].child = label_node;
1359
1360                                        // finally: disconnect list of children
1361                                        if let Some(prev_ix) = prev {
1362                                            self.tree[prev_ix].next = None;
1363                                        }
1364                                    }
1365
1366                                    self.tree[tos.node].item.end = end;
1367
1368                                    // set up cur so next node will be node_after_link
1369                                    cur = Some(tos.node);
1370                                    cur_ix = tos.node;
1371
1372                                    if tos.ty == LinkStackTy::Link {
1373                                        self.disable_all_links();
1374                                    }
1375                                }
1376                            }
1377                        }
1378                    }
1379                }
1380                _ => {}
1381            }
1382            prev = cur;
1383            cur = self.tree[cur_ix].next;
1384        }
1385        self.link_stack.clear();
1386        self.wikilink_stack.clear();
1387        self.code_delims.clear();
1388        self.math_delims.clear();
1389    }
1390
1391    /// Handles a wikilink.
1392    ///
1393    /// This function may bail early in case the link is malformed, so this
1394    /// acts as a control flow guard. Returns the link node if a wikilink was
1395    /// found and created.
1396    fn handle_wikilink(
1397        &mut self,
1398        block_text: &'input str,
1399        cur_ix: TreeIndex,
1400        prev: Option<TreeIndex>,
1401    ) -> Option<TreeIndex> {
1402        let next_ix = self.tree[cur_ix].next.unwrap();
1403        // this is a wikilink closing delim, try popping from
1404        // the wikilink stack
1405        if let Some(tos) = self.wikilink_stack.pop() {
1406            if tos.ty == LinkStackTy::Disabled {
1407                return None;
1408            }
1409            // fetches the beginning of the wikilink body
1410            let Some(body_node) = self.tree[tos.node].next.and_then(|ix| self.tree[ix].next) else {
1411                // skip if no next node exists, like at end of input
1412                return None;
1413            };
1414            let start_ix = self.tree[body_node].item.start;
1415            let end_ix = self.tree[cur_ix].item.start;
1416            let wikilink = match scan_wikilink_pipe(
1417                block_text,
1418                start_ix, // bounded by closing tag
1419                end_ix - start_ix,
1420            ) {
1421                Some((rest, wikitext)) => {
1422                    // bail early if the wikiname would be empty
1423                    if wikitext.is_empty() {
1424                        return None;
1425                    }
1426                    // [[WikiName|rest]]
1427                    let body_node = scan_nodes_to_ix(&self.tree, Some(body_node), rest);
1428                    if let Some(body_node) = body_node {
1429                        // break node so passes can actually format
1430                        // the display text
1431                        self.tree[body_node].item.start = rest;
1432                        Some((true, body_node, wikitext))
1433                    } else {
1434                        None
1435                    }
1436                }
1437                None => {
1438                    let wikitext = &block_text[start_ix..end_ix];
1439                    // bail early if the wikiname would be empty
1440                    if wikitext.is_empty() {
1441                        return None;
1442                    }
1443                    let body_node = self.tree.create_node(Item {
1444                        start: start_ix,
1445                        end: end_ix,
1446                        body: ItemBody::Text {
1447                            backslash_escaped: false,
1448                        },
1449                    });
1450                    Some((false, body_node, wikitext))
1451                }
1452            };
1453
1454            if let Some((has_pothole, body_node, wikiname)) = wikilink {
1455                let link_ix = self.allocs.allocate_link(
1456                    LinkType::WikiLink { has_pothole },
1457                    wikiname.into(),
1458                    "".into(),
1459                    "".into(),
1460                );
1461                if let Some(prev_ix) = prev {
1462                    self.tree[prev_ix].next = None;
1463                }
1464                if tos.ty == LinkStackTy::Image {
1465                    self.tree[tos.node].item.body = ItemBody::Image(link_ix);
1466                } else {
1467                    self.tree[tos.node].item.body = ItemBody::Link(link_ix);
1468                }
1469                self.tree[tos.node].child = Some(body_node);
1470                self.tree[tos.node].next = self.tree[next_ix].next;
1471                self.tree[tos.node].item.end = end_ix + 2;
1472                self.disable_all_links();
1473                return Some(tos.node);
1474            }
1475        }
1476
1477        None
1478    }
1479
1480    fn handle_emphasis_in_scope(&mut self, start: Option<TreeIndex>) {
1481        let mut prev = None;
1482        let mut prev_ix: TreeIndex;
1483        let mut cur = start;
1484
1485        let mut single_quote_open: Option<TreeIndex> = None;
1486        let mut double_quote_open: bool = false;
1487
1488        while let Some(mut cur_ix) = cur {
1489            match self.tree[cur_ix].item.body {
1490                ItemBody::MaybeEmphasis(mut count, can_open, can_close) => {
1491                    let run_length = count;
1492                    let c = self.text.as_bytes()[self.tree[cur_ix].item.start];
1493                    let both = can_open && can_close;
1494                    // Defer `~`/`^` resolution to the post-pass.
1495                    // Without lookahead, the single-pass can't tell whether an
1496                    // earlier `*`/`_` opener will pair (in which case the
1497                    // `~`/`^` should match inside the future emphasis) or
1498                    // remain unmatched (in which case `~`/`^` would cross the
1499                    // boundary). micromark handles this with a separate
1500                    // strikethrough resolve phase that runs after emphasis.
1501                    if c == b'~' || c == b'^' {
1502                        prev_ix = cur_ix + count - 1;
1503                        prev = Some(prev_ix);
1504                        cur = self.tree[prev_ix].next;
1505                        continue;
1506                    }
1507                    if can_close {
1508                        while let Some(el) =
1509                            self.inline_stack
1510                                .find_match(&mut self.tree, c, run_length, count, both)
1511                        {
1512                            // have a match!
1513                            if let Some(prev_ix) = prev {
1514                                self.tree[prev_ix].next = None;
1515                            }
1516                            // Consume at most two markers per inner-loop pass
1517                            // (one `<strong>`/`<em>` per match), matching
1518                            // micromark's `use = open>1 && close>1 ? 2 : 1`.
1519                            // The outer `while let` then drives nesting by
1520                            // re-running `find_match` with the leftover
1521                            // counts, which is how `***foo***` becomes
1522                            // `<em><strong>foo</strong></em>` instead of one
1523                            // flat match.
1524                            let match_count = min(2, min(count, el.count));
1525                            // start, end are tree node indices
1526                            let mut end = cur_ix - 1;
1527                            let mut start = el.start + el.count;
1528
1529                            // work from the inside out
1530                            while start > el.start + el.count - match_count {
1531                                let inc = if start > el.start + el.count - match_count + 1 {
1532                                    2
1533                                } else {
1534                                    1
1535                                };
1536                                let ty = if c == b'~' {
1537                                    if inc == 2 {
1538                                        if self.options.contains(Options::ENABLE_STRIKETHROUGH) {
1539                                            ItemBody::Strikethrough
1540                                        } else {
1541                                            ItemBody::Text {
1542                                                backslash_escaped: false,
1543                                            }
1544                                        }
1545                                    } else if self.options.contains(Options::ENABLE_SUBSCRIPT) {
1546                                        ItemBody::Subscript
1547                                    } else if self.options.contains(Options::ENABLE_STRIKETHROUGH) {
1548                                        ItemBody::Strikethrough
1549                                    } else {
1550                                        ItemBody::Text {
1551                                            backslash_escaped: false,
1552                                        }
1553                                    }
1554                                } else if c == b'^' {
1555                                    if self.options.contains(Options::ENABLE_SUPERSCRIPT) {
1556                                        ItemBody::Superscript
1557                                    } else {
1558                                        ItemBody::Text {
1559                                            backslash_escaped: false,
1560                                        }
1561                                    }
1562                                } else if inc == 2 {
1563                                    ItemBody::Strong
1564                                } else {
1565                                    ItemBody::Emphasis
1566                                };
1567
1568                                let root = start - inc;
1569                                end = end + inc;
1570                                self.tree[root].item.body = ty;
1571                                self.tree[root].item.end = self.tree[end].item.end;
1572                                self.tree[root].child = Some(start);
1573                                self.tree[root].next = None;
1574                                start = root;
1575                            }
1576
1577                            // set next for top most emph level
1578                            prev_ix = el.start + el.count - match_count;
1579                            prev = Some(prev_ix);
1580                            cur = self.tree[cur_ix + match_count - 1].next;
1581                            self.tree[prev_ix].next = cur;
1582
1583                            if el.count > match_count {
1584                                self.inline_stack.push(InlineEl {
1585                                    start: el.start,
1586                                    count: el.count - match_count,
1587                                    run_length: el.run_length,
1588                                    c: el.c,
1589                                    both: el.both,
1590                                })
1591                            }
1592                            count -= match_count;
1593                            if count > 0 {
1594                                cur_ix = cur.unwrap();
1595                            } else {
1596                                break;
1597                            }
1598                        }
1599                    }
1600                    if count > 0 {
1601                        if can_open {
1602                            self.inline_stack.push(InlineEl {
1603                                start: cur_ix,
1604                                run_length,
1605                                count,
1606                                c,
1607                                both,
1608                            });
1609                        } else {
1610                            for i in 0..count {
1611                                self.tree[cur_ix + i].item.body = ItemBody::Text {
1612                                    backslash_escaped: false,
1613                                };
1614                            }
1615                        }
1616                        prev_ix = cur_ix + count - 1;
1617                        prev = Some(prev_ix);
1618                        cur = self.tree[prev_ix].next;
1619                    }
1620                }
1621                ItemBody::MaybeSmartQuote(c, can_open, can_close) => {
1622                    self.tree[cur_ix].item.body = match c {
1623                        b'\'' => {
1624                            if let (Some(open_ix), true) = (single_quote_open, can_close) {
1625                                self.tree[open_ix].item.body = ItemBody::SynthesizeChar('‘');
1626                                single_quote_open = None;
1627                            } else if can_open {
1628                                single_quote_open = Some(cur_ix);
1629                            }
1630                            ItemBody::SynthesizeChar('’')
1631                        }
1632                        _ /* double quote */ => {
1633                            if can_close && double_quote_open {
1634                                double_quote_open = false;
1635                                ItemBody::SynthesizeChar('”')
1636                            } else {
1637                                if can_open && !double_quote_open {
1638                                    double_quote_open = true;
1639                                }
1640                                ItemBody::SynthesizeChar('“')
1641                            }
1642                        }
1643                    };
1644                    prev = cur;
1645                    cur = self.tree[cur_ix].next;
1646                }
1647                ItemBody::HardBreak(true) => {
1648                    if self.tree[cur_ix].next.is_none() {
1649                        self.tree[cur_ix].item.body = ItemBody::SynthesizeChar('\\');
1650                    }
1651                    prev = cur;
1652                    cur = self.tree[cur_ix].next;
1653                }
1654                _ => {
1655                    prev = cur;
1656                    cur = self.tree[cur_ix].next;
1657                }
1658            }
1659        }
1660        self.inline_stack.pop_all(&mut self.tree);
1661    }
1662
1663    /// Second-pass strikethrough/sub/sup resolution. Walks the tree
1664    /// hierarchically and resolves `~`/`^` MaybeEmphasis tokens within
1665    /// each inline scope independently. This matches micromark's
1666    /// post-emphasis resolve phase: a `~..~` pair only forms when both
1667    /// ends lie within the same enclosing scope (root, emphasis, link,
1668    /// etc.). Multi-char `~~` strikethrough was already resolved in
1669    /// the main pass.
1670    fn handle_tildes_carets_pass(&mut self) {
1671        let start = self.tree.cur();
1672        self.resolve_tildes_carets_in_scope(start);
1673    }
1674    fn resolve_tildes_carets_in_scope(&mut self, start: Option<TreeIndex>) {
1675        let mut stack: Vec<InlineEl> = Vec::new();
1676        let mut cur = start;
1677        let mut prev: Option<TreeIndex> = None;
1678        while let Some(mut cur_ix) = cur {
1679            match self.tree[cur_ix].item.body {
1680                ItemBody::MaybeEmphasis(count, can_open, can_close) => {
1681                    let c = self.text.as_bytes()[self.tree[cur_ix].item.start];
1682                    if c != b'~' && c != b'^' {
1683                        prev = Some(cur_ix);
1684                        cur = self.tree[cur_ix].next;
1685                        continue;
1686                    }
1687                    let run_length = count;
1688                    let mut remaining = count;
1689                    if can_close {
1690                        while remaining > 0 {
1691                            let res = stack
1692                                .iter()
1693                                .enumerate()
1694                                .rfind(|(_, el)| el.c == c && el.run_length == run_length);
1695                            let Some((matching_ix, matching_el)) = res else {
1696                                break;
1697                            };
1698                            let matching_el = *matching_el;
1699                            if let Some(prev_ix) = prev {
1700                                self.tree[prev_ix].next = None;
1701                            }
1702                            // Convert intermediate `~`/`^` openers above the
1703                            // match to text — they failed to find a pair.
1704                            for el in &stack[(matching_ix + 1)..] {
1705                                for i in 0..el.count {
1706                                    self.tree[el.start + i].item.body = ItemBody::Text {
1707                                        backslash_escaped: false,
1708                                    };
1709                                }
1710                            }
1711                            stack.truncate(matching_ix);
1712                            let match_count =
1713                                core::cmp::min(2, core::cmp::min(remaining, matching_el.count));
1714                            let mut end = cur_ix - 1;
1715                            let mut sub_start = matching_el.start + matching_el.count;
1716                            while sub_start > matching_el.start + matching_el.count - match_count {
1717                                let inc = if sub_start
1718                                    > matching_el.start + matching_el.count - match_count + 1
1719                                {
1720                                    2
1721                                } else {
1722                                    1
1723                                };
1724                                let ty = if c == b'~' {
1725                                    if inc == 2 {
1726                                        if self.options.contains(Options::ENABLE_STRIKETHROUGH) {
1727                                            ItemBody::Strikethrough
1728                                        } else {
1729                                            ItemBody::Text {
1730                                                backslash_escaped: false,
1731                                            }
1732                                        }
1733                                    } else if self.options.contains(Options::ENABLE_SUBSCRIPT) {
1734                                        ItemBody::Subscript
1735                                    } else if self.options.contains(Options::ENABLE_STRIKETHROUGH) {
1736                                        ItemBody::Strikethrough
1737                                    } else {
1738                                        ItemBody::Text {
1739                                            backslash_escaped: false,
1740                                        }
1741                                    }
1742                                } else if self.options.contains(Options::ENABLE_SUPERSCRIPT) {
1743                                    ItemBody::Superscript
1744                                } else {
1745                                    ItemBody::Text {
1746                                        backslash_escaped: false,
1747                                    }
1748                                };
1749                                let root = sub_start - inc;
1750                                end = end + inc;
1751                                self.tree[root].item.body = ty;
1752                                self.tree[root].item.end = self.tree[end].item.end;
1753                                self.tree[root].child = Some(sub_start);
1754                                self.tree[root].next = None;
1755                                sub_start = root;
1756                            }
1757                            let new_prev_ix = matching_el.start + matching_el.count - match_count;
1758                            let new_cur = self.tree[cur_ix + match_count - 1].next;
1759                            self.tree[new_prev_ix].next = new_cur;
1760                            prev = Some(new_prev_ix);
1761                            if matching_el.count > match_count {
1762                                stack.push(InlineEl {
1763                                    start: matching_el.start,
1764                                    count: matching_el.count - match_count,
1765                                    run_length: matching_el.run_length,
1766                                    c: matching_el.c,
1767                                    both: matching_el.both,
1768                                });
1769                            }
1770                            remaining -= match_count;
1771                            if remaining > 0 {
1772                                let Some(next_cur) = new_cur else { break };
1773                                cur_ix = next_cur;
1774                            } else {
1775                                break;
1776                            }
1777                        }
1778                    }
1779                    if remaining > 0 {
1780                        if can_open {
1781                            stack.push(InlineEl {
1782                                start: cur_ix,
1783                                count: remaining,
1784                                run_length,
1785                                c,
1786                                both: can_open && can_close,
1787                            });
1788                        } else {
1789                            for i in 0..remaining {
1790                                self.tree[cur_ix + i].item.body = ItemBody::Text {
1791                                    backslash_escaped: false,
1792                                };
1793                            }
1794                        }
1795                        let prev_ix = cur_ix + remaining - 1;
1796                        prev = Some(prev_ix);
1797                        cur = self.tree[prev_ix].next;
1798                    } else {
1799                        cur = self.tree[prev.unwrap()].next;
1800                    }
1801                    continue;
1802                }
1803                ItemBody::Emphasis
1804                | ItemBody::Strong
1805                | ItemBody::Strikethrough
1806                | ItemBody::Subscript
1807                | ItemBody::Superscript
1808                | ItemBody::Link(_)
1809                | ItemBody::Image(_) => {
1810                    let child = self.tree[cur_ix].child;
1811                    self.resolve_tildes_carets_in_scope(child);
1812                }
1813                _ => {}
1814            }
1815            prev = Some(cur_ix);
1816            cur = self.tree[cur_ix].next;
1817        }
1818        // End of scope: any remaining openers couldn't find a closer.
1819        for el in stack {
1820            for i in 0..el.count {
1821                self.tree[el.start + i].item.body = ItemBody::Text {
1822                    backslash_escaped: false,
1823                };
1824            }
1825        }
1826    }
1827
1828    fn disable_all_links(&mut self) {
1829        self.link_stack.disable_all_links();
1830        self.wikilink_stack.disable_all_links();
1831    }
1832
1833    /// Returns next byte index, url and title.
1834    fn scan_inline_link(
1835        &self,
1836        underlying: &'input str,
1837        mut ix: usize,
1838        node: Option<TreeIndex>,
1839    ) -> Option<(usize, CowStr<'input>, CowStr<'input>)> {
1840        if underlying.as_bytes().get(ix) != Some(&b'(') {
1841            return None;
1842        }
1843        ix += 1;
1844
1845        let scan_separator = |ix: &mut usize| {
1846            *ix += scan_while(&underlying.as_bytes()[*ix..], is_ascii_whitespace_no_nl);
1847            if let Some(bl) = scan_eol(&underlying.as_bytes()[*ix..]) {
1848                *ix += bl;
1849                *ix += skip_container_prefixes(
1850                    &self.tree,
1851                    &underlying.as_bytes()[*ix..],
1852                    self.options,
1853                );
1854            }
1855            *ix += scan_while(&underlying.as_bytes()[*ix..], is_ascii_whitespace_no_nl);
1856        };
1857
1858        scan_separator(&mut ix);
1859
1860        let (dest_length, dest) = scan_link_dest(underlying, ix, LINK_MAX_NESTED_PARENS)?;
1861        let dest = unescape(dest, self.tree.is_in_table());
1862        ix += dest_length;
1863
1864        scan_separator(&mut ix);
1865
1866        let title = if let Some((bytes_scanned, t)) = self.scan_link_title(underlying, ix, node) {
1867            ix += bytes_scanned;
1868            scan_separator(&mut ix);
1869            t
1870        } else {
1871            "".into()
1872        };
1873        if underlying.as_bytes().get(ix) != Some(&b')') {
1874            return None;
1875        }
1876        ix += 1;
1877
1878        Some((ix, dest, title))
1879    }
1880
1881    // returns (bytes scanned, title cow)
1882    fn scan_link_title(
1883        &self,
1884        text: &'input str,
1885        start_ix: usize,
1886        node: Option<TreeIndex>,
1887    ) -> Option<(usize, CowStr<'input>)> {
1888        let bytes = text.as_bytes();
1889        let open = match bytes.get(start_ix) {
1890            Some(b @ b'\'') | Some(b @ b'\"') | Some(b @ b'(') => *b,
1891            _ => return None,
1892        };
1893        let close = if open == b'(' { b')' } else { open };
1894
1895        let mut title = String::new();
1896        let mut mark = start_ix + 1;
1897        let mut i = start_ix + 1;
1898
1899        while i < bytes.len() {
1900            let c = bytes[i];
1901
1902            if c == close {
1903                let cow = if title.is_empty() {
1904                    (i - start_ix + 1, text[mark..i].into())
1905                } else {
1906                    title.push_str(&text[mark..i]);
1907                    (i - start_ix + 1, title.into())
1908                };
1909
1910                return Some(cow);
1911            }
1912            if c == open {
1913                return None;
1914            }
1915
1916            if c == b'\n' || c == b'\r' {
1917                if let Some(node_ix) = scan_nodes_to_ix(&self.tree, node, i + 1) {
1918                    if self.tree[node_ix].item.start > i {
1919                        title.push_str(&text[mark..i]);
1920                        title.push('\n');
1921                        i = self.tree[node_ix].item.start;
1922                        mark = i;
1923                        continue;
1924                    }
1925                }
1926            }
1927            if c == b'&' {
1928                if let (n, Some(value)) = scan_entity(&bytes[i..]) {
1929                    title.push_str(&text[mark..i]);
1930                    title.push_str(&value);
1931                    i += n;
1932                    mark = i;
1933                    continue;
1934                }
1935            }
1936            if self.tree.is_in_table()
1937                && c == b'\\'
1938                && i + 2 < bytes.len()
1939                && bytes[i + 1] == b'\\'
1940                && bytes[i + 2] == b'|'
1941            {
1942                // this runs if there are an even number of pipes in a table
1943                // if it's odd, then it gets parsed as normal
1944                title.push_str(&text[mark..i]);
1945                i += 2;
1946                mark = i;
1947            }
1948            if c == b'\\' && i + 1 < bytes.len() && is_ascii_punctuation(bytes[i + 1]) {
1949                title.push_str(&text[mark..i]);
1950                i += 1;
1951                mark = i;
1952            }
1953
1954            i += 1;
1955        }
1956
1957        None
1958    }
1959
1960    fn make_math_span(&mut self, open: TreeIndex, close: TreeIndex) {
1961        // Find the end of the opening run of consecutive $ tokens
1962        let mut open_end = open;
1963        {
1964            let mut peek = self.tree[open].next;
1965            while let Some(peek_ix) = peek {
1966                if matches!(self.tree[peek_ix].item.body, ItemBody::MaybeMath(..))
1967                    && self.tree[peek_ix].item.start == self.tree[open_end].item.end
1968                    && peek_ix != close
1969                {
1970                    open_end = peek_ix;
1971                    peek = self.tree[peek_ix].next;
1972                } else {
1973                    break;
1974                }
1975            }
1976        }
1977        // Find the end of the closing run
1978        let mut close_end = close;
1979        {
1980            let mut peek = self.tree[close].next;
1981            while let Some(peek_ix) = peek {
1982                if matches!(self.tree[peek_ix].item.body, ItemBody::MaybeMath(..))
1983                    && self.tree[peek_ix].item.start == self.tree[close_end].item.end
1984                {
1985                    close_end = peek_ix;
1986                    peek = self.tree[peek_ix].next;
1987                } else {
1988                    break;
1989                }
1990            }
1991        }
1992
1993        let span_start = self.tree[open_end].item.end;
1994        let span_end = self.tree[close].item.start;
1995
1996        if span_start > span_end {
1997            self.tree[open].item.body = ItemBody::Text {
1998                backslash_escaped: false,
1999            };
2000            return;
2001        }
2002
2003        let spanned_text = &self.text[span_start..span_end];
2004        let spanned_bytes = spanned_text.as_bytes();
2005        let mut buf: Option<String> = None;
2006
2007        let mut start_ix = 0;
2008        let mut ix = 0;
2009        while ix < spanned_bytes.len() {
2010            let c = spanned_bytes[ix];
2011            if c == b'\r' || c == b'\n' {
2012                ix += 1;
2013                let buf = buf.get_or_insert_with(|| String::with_capacity(spanned_bytes.len()));
2014                buf.push_str(&spanned_text[start_ix..ix]);
2015                // Use the full source bytes from this position (not just
2016                // the span slice) so scan_containers can see the real
2017                // line content past the closing backtick. With only the
2018                // span slice, a partial-indent line followed by buffer
2019                // end (e.g. `    ` + closing) was misread as EOL by
2020                // is_at_eol — letting the ListItem container "match" the
2021                // 4 spaces of a 5-indent item and over-strip the code
2022                // span's trailing whitespace.
2023                let from = span_start + ix;
2024                let (scanned, leftover) = skip_container_prefixes_with_remaining(
2025                    &self.tree,
2026                    &self.text.as_bytes()[from..],
2027                    self.options,
2028                );
2029                let scanned = scanned.min(spanned_bytes.len() - ix);
2030                ix += scanned;
2031                start_ix = ix;
2032                // Preserve leftover virtual columns from a tab the
2033                // container only partially consumed (e.g. `\t` in a 2-col
2034                // listitem leaves 2 spaces of content).
2035                for _ in 0..leftover {
2036                    buf.push(' ');
2037                }
2038            } else if c == b'\\'
2039                && spanned_bytes.get(ix + 1) == Some(&b'|')
2040                && self.tree.is_in_table()
2041            {
2042                let buf = buf.get_or_insert_with(|| String::with_capacity(spanned_bytes.len()));
2043                buf.push_str(&spanned_text[start_ix..ix]);
2044                buf.push('|');
2045                ix += 2;
2046                start_ix = ix;
2047            } else {
2048                ix += 1;
2049            }
2050        }
2051
2052        let (opening, closing, all_spaces) = {
2053            let s = if let Some(buf) = &mut buf {
2054                buf.push_str(&spanned_text[start_ix..]);
2055                &buf[..]
2056            } else {
2057                spanned_text
2058            };
2059            (
2060                matches!(s.as_bytes().first(), Some(b' ' | b'\n')),
2061                matches!(s.as_bytes().last(), Some(b' ' | b'\n')),
2062                s.bytes().all(|b| b == b' ' || b == b'\n'),
2063            )
2064        };
2065
2066        let cow: CowStr<'input> = if !all_spaces && opening && closing {
2067            if let Some(mut buf) = buf {
2068                if !buf.is_empty() {
2069                    buf.remove(0);
2070                    buf.pop();
2071                }
2072                buf.into()
2073            } else {
2074                spanned_text[1..(spanned_text.len() - 1).max(1)].into()
2075            }
2076        } else if let Some(buf) = buf {
2077            buf.into()
2078        } else {
2079            spanned_text.into()
2080        };
2081
2082        self.tree[open].item.body = ItemBody::Math(self.allocs.allocate_cow(cow), false);
2083        self.tree[open].item.end = self.tree[close_end].item.end;
2084        self.tree[open].next = self.tree[close_end].next;
2085    }
2086
2087    /// Make a code span.
2088    ///
2089    /// Both `open` and `close` are matching MaybeCode items.
2090    fn make_code_span(&mut self, open: TreeIndex, close: TreeIndex, preceding_backslash: bool) {
2091        let span_start = self.tree[open].item.end;
2092        let span_end = self.tree[close].item.start;
2093        let mut buf: Option<String> = None;
2094
2095        let spanned_text = &self.text[span_start..span_end];
2096        let spanned_bytes = spanned_text.as_bytes();
2097        let mut start_ix = 0;
2098        let mut ix = 0;
2099        while ix < spanned_bytes.len() {
2100            let c = spanned_bytes[ix];
2101            if c == b'\r' || c == b'\n' {
2102                let buf = buf.get_or_insert_with(|| String::with_capacity(spanned_bytes.len()));
2103                buf.push_str(&spanned_text[start_ix..ix]);
2104                buf.push('\n');
2105                ix += 1;
2106                if c == b'\r' && spanned_bytes.get(ix) == Some(&b'\n') {
2107                    ix += 1;
2108                }
2109                // Use the full source bytes from this position (not just
2110                // the span slice) so scan_containers can see the real
2111                // line content past the closing backtick. With only the
2112                // span slice, a partial-indent line followed by buffer
2113                // end (e.g. `    ` + closing) was misread as EOL by
2114                // is_at_eol — letting the ListItem container "match" the
2115                // 4 spaces of a 5-indent item and over-strip the code
2116                // span's trailing whitespace.
2117                let from = span_start + ix;
2118                let (scanned, leftover) = skip_container_prefixes_with_remaining(
2119                    &self.tree,
2120                    &self.text.as_bytes()[from..],
2121                    self.options,
2122                );
2123                let scanned = scanned.min(spanned_bytes.len() - ix);
2124                ix += scanned;
2125                start_ix = ix;
2126                // Preserve leftover virtual columns from a tab the
2127                // container only partially consumed (e.g. `\t` in a 2-col
2128                // listitem leaves 2 spaces of content).
2129                for _ in 0..leftover {
2130                    buf.push(' ');
2131                }
2132            } else if c == b'\\'
2133                && spanned_bytes.get(ix + 1) == Some(&b'|')
2134                && self.tree.is_in_table()
2135            {
2136                let buf = buf.get_or_insert_with(|| String::with_capacity(spanned_bytes.len()));
2137                buf.push_str(&spanned_text[start_ix..ix]);
2138                buf.push('|');
2139                ix += 2;
2140                start_ix = ix;
2141            } else {
2142                ix += 1;
2143            }
2144        }
2145
2146        let (opening, closing, all_spaces) = {
2147            let s = if let Some(buf) = &mut buf {
2148                buf.push_str(&spanned_text[start_ix..]);
2149                &buf[..]
2150            } else {
2151                spanned_text
2152            };
2153            (
2154                matches!(s.as_bytes().first(), Some(b' ' | b'\n')),
2155                matches!(s.as_bytes().last(), Some(b' ' | b'\n')),
2156                s.bytes().all(|b| b == b' ' || b == b'\n'),
2157            )
2158        };
2159
2160        let cow: CowStr<'input> = if !all_spaces && opening && closing {
2161            if let Some(mut buf) = buf {
2162                if !buf.is_empty() {
2163                    buf.remove(0);
2164                    buf.pop();
2165                }
2166                buf.into()
2167            } else {
2168                spanned_text[1..(spanned_text.len() - 1).max(1)].into()
2169            }
2170        } else if let Some(buf) = buf {
2171            buf.into()
2172        } else {
2173            spanned_text.into()
2174        };
2175
2176        if preceding_backslash {
2177            self.tree[open].item.body = ItemBody::Text {
2178                backslash_escaped: true,
2179            };
2180            self.tree[open].item.end = self.tree[open].item.start + 1;
2181            self.tree[open].next = Some(close);
2182            self.tree[close].item.body = ItemBody::Code(self.allocs.allocate_cow(cow));
2183            self.tree[close].item.start = self.tree[open].item.start + 1;
2184        } else {
2185            self.tree[open].item.body = ItemBody::Code(self.allocs.allocate_cow(cow));
2186            self.tree[open].item.end = self.tree[close].item.end;
2187            self.tree[open].next = self.tree[close].next;
2188        }
2189
2190        // MDX: errors recorded in pass 1 for `{` inside what turned out to be a
2191        // code span are false positives — the `{` is literal text.
2192        if !self.mdx_errors.is_empty() {
2193            self.mdx_errors
2194                .retain(|(offset, _)| *offset < span_start || *offset >= span_end);
2195        }
2196    }
2197
2198    /// On success, returns a buffer containing the inline html and byte offset.
2199    /// When no bytes were skipped, the buffer will be empty and the html can be
2200    /// represented as a subslice of the input string.
2201    fn scan_inline_html(&mut self, bytes: &[u8], ix: usize) -> Option<(Vec<u8>, usize)> {
2202        let c = *bytes.get(ix)?;
2203        if c == b'!' {
2204            Some((
2205                vec![],
2206                scan_inline_html_comment(bytes, ix + 1, &mut self.html_scan_guard)?,
2207            ))
2208        } else if c == b'?' {
2209            Some((
2210                vec![],
2211                scan_inline_html_processing(bytes, ix + 1, &mut self.html_scan_guard)?,
2212            ))
2213        } else {
2214            let (span, i) = scan_html_block_inner(
2215                // Subtract 1 to include the < character
2216                &bytes[(ix - 1)..],
2217                Some(&|bytes| skip_container_prefixes(&self.tree, bytes, self.options)),
2218            )?;
2219            Some((span, i + ix - 1))
2220        }
2221    }
2222}
2223
2224/// Returns number of containers scanned.
2225pub(crate) fn scan_containers(
2226    tree: &Tree<Item>,
2227    line_start: &mut LineStart<'_>,
2228    options: Options,
2229) -> usize {
2230    let mut i = 0;
2231    for &node_ix in tree.walk_spine() {
2232        match tree[node_ix].item.body {
2233            ItemBody::BlockQuote(..) => {
2234                let save = line_start.clone();
2235                // In MDX mode indented code blocks are disabled, so the
2236                // ≤3-space cap on blockquote prefix indent doesn't apply —
2237                // tab- or 4+-space-indented `>` should still continue the
2238                // blockquote (matches micromark + remark-mdx).
2239                if options.contains(Options::ENABLE_MDX) {
2240                    line_start.scan_all_space();
2241                } else {
2242                    let _ = line_start.scan_space(3);
2243                }
2244                if !line_start.scan_blockquote_marker() {
2245                    *line_start = save;
2246                    break;
2247                }
2248            }
2249            ItemBody::ListItem(indent, _) => {
2250                let save = line_start.clone();
2251                if !line_start.scan_space(indent) && !line_start.is_at_eol() {
2252                    *line_start = save;
2253                    break;
2254                }
2255            }
2256            ItemBody::DefinitionListDefinition(indent) => {
2257                let save = line_start.clone();
2258                if !line_start.scan_space(indent) && !line_start.is_at_eol() {
2259                    *line_start = save;
2260                    break;
2261                }
2262            }
2263            ItemBody::FootnoteDefinition(..) if options.contains(Options::ENABLE_FOOTNOTES) => {
2264                let save = line_start.clone();
2265                if !line_start.scan_space(4) && !line_start.is_at_eol() {
2266                    *line_start = save;
2267                    break;
2268                }
2269            }
2270            _ => (),
2271        }
2272        i += 1;
2273    }
2274    i
2275}
2276
2277pub(crate) fn skip_container_prefixes(tree: &Tree<Item>, bytes: &[u8], options: Options) -> usize {
2278    let mut line_start = LineStart::new(bytes);
2279    let _ = scan_containers(tree, &mut line_start, options);
2280    line_start.bytes_scanned()
2281}
2282
2283/// Like `skip_container_prefixes`, but also returns the leftover virtual
2284/// space columns from tab-stop expansion past the last consumed container
2285/// prefix. Used by math-span content extraction to faithfully reproduce
2286/// indentation that the container "ate" only partially — e.g. a single
2287/// `\t` (4 cols) in a list item with 2-col content indent leaves 2
2288/// trailing spaces of content.
2289fn skip_container_prefixes_with_remaining(
2290    tree: &Tree<Item>,
2291    bytes: &[u8],
2292    options: Options,
2293) -> (usize, usize) {
2294    let mut line_start = LineStart::new(bytes);
2295    let _ = scan_containers(tree, &mut line_start, options);
2296    (line_start.bytes_scanned(), line_start.remaining_space())
2297}
2298
2299impl Tree<Item> {
2300    pub(crate) fn append_text(&mut self, start: usize, end: usize, backslash_escaped: bool) {
2301        if end > start {
2302            if let Some(ix) = self.cur() {
2303                if matches!(self[ix].item.body, ItemBody::Text { .. }) && self[ix].item.end == start
2304                {
2305                    self[ix].item.end = end;
2306                    return;
2307                }
2308            }
2309            self.append(Item {
2310                start,
2311                end,
2312                body: ItemBody::Text { backslash_escaped },
2313            });
2314        }
2315    }
2316    /// Returns true if the current node is inside a table.
2317    ///
2318    /// If `cur` is an ItemBody::Table, it would return false,
2319    /// but since the `TableRow` and `TableHead` and `TableCell`
2320    /// are children of the table, anything doing inline parsing
2321    /// doesn't need to care about that.
2322    pub(crate) fn is_in_table(&self) -> bool {
2323        fn might_be_in_table(item: &Item) -> bool {
2324            item.body.is_inline()
2325                || matches!(item.body, |ItemBody::TableHead| ItemBody::TableRow
2326                    | ItemBody::TableCell)
2327        }
2328        for &ix in self.walk_spine().rev() {
2329            if matches!(self[ix].item.body, ItemBody::Table(_)) {
2330                return true;
2331            }
2332            if !might_be_in_table(&self[ix].item) {
2333                return false;
2334            }
2335        }
2336        false
2337    }
2338}
2339
2340#[derive(Copy, Clone, Debug)]
2341struct InlineEl {
2342    /// offset of tree node
2343    start: TreeIndex,
2344    /// number of delimiters available for matching
2345    count: usize,
2346    /// length of the run that these delimiters came from
2347    run_length: usize,
2348    /// b'*', b'_', or b'~'
2349    c: u8,
2350    /// can both open and close
2351    both: bool,
2352}
2353
2354#[derive(Debug, Clone, Default)]
2355struct InlineStack {
2356    stack: Vec<InlineEl>,
2357    // Lower bounds for matching indices in the stack. For example
2358    // a strikethrough delimiter will never match with any element
2359    // in the stack with index smaller than
2360    // `lower_bounds[InlineStack::TILDES]`.
2361    lower_bounds: [usize; 10],
2362}
2363
2364impl InlineStack {
2365    /// These are indices into the lower bounds array.
2366    /// Not both refers to the property that the delimiter can not both
2367    /// be opener as a closer.
2368    const UNDERSCORE_NOT_BOTH: usize = 0;
2369    const ASTERISK_NOT_BOTH: usize = 1;
2370    const ASTERISK_BASE: usize = 2;
2371    const TILDES: usize = 5;
2372    const UNDERSCORE_BASE: usize = 6;
2373    const CIRCUMFLEXES: usize = 9;
2374
2375    fn pop_all(&mut self, tree: &mut Tree<Item>) {
2376        for el in self.stack.drain(..) {
2377            for i in 0..el.count {
2378                tree[el.start + i].item.body = ItemBody::Text {
2379                    backslash_escaped: false,
2380                };
2381            }
2382        }
2383        self.lower_bounds = [0; 10];
2384    }
2385
2386    fn get_lowerbound(&self, c: u8, count: usize, both: bool) -> usize {
2387        if c == b'_' {
2388            let mod3_lower = self.lower_bounds[InlineStack::UNDERSCORE_BASE + count % 3];
2389            if both {
2390                mod3_lower
2391            } else {
2392                min(
2393                    mod3_lower,
2394                    self.lower_bounds[InlineStack::UNDERSCORE_NOT_BOTH],
2395                )
2396            }
2397        } else if c == b'*' {
2398            let mod3_lower = self.lower_bounds[InlineStack::ASTERISK_BASE + count % 3];
2399            if both {
2400                mod3_lower
2401            } else {
2402                min(
2403                    mod3_lower,
2404                    self.lower_bounds[InlineStack::ASTERISK_NOT_BOTH],
2405                )
2406            }
2407        } else if c == b'^' {
2408            self.lower_bounds[InlineStack::CIRCUMFLEXES]
2409        } else {
2410            self.lower_bounds[InlineStack::TILDES]
2411        }
2412    }
2413
2414    fn set_lowerbound(&mut self, c: u8, count: usize, both: bool, new_bound: usize) {
2415        if c == b'_' {
2416            if both {
2417                self.lower_bounds[InlineStack::UNDERSCORE_BASE + count % 3] = new_bound;
2418            } else {
2419                self.lower_bounds[InlineStack::UNDERSCORE_NOT_BOTH] = new_bound;
2420            }
2421        } else if c == b'*' {
2422            self.lower_bounds[InlineStack::ASTERISK_BASE + count % 3] = new_bound;
2423            if !both {
2424                self.lower_bounds[InlineStack::ASTERISK_NOT_BOTH] = new_bound;
2425            }
2426        } else if c == b'^' {
2427            self.lower_bounds[InlineStack::CIRCUMFLEXES] = new_bound;
2428        } else {
2429            self.lower_bounds[InlineStack::TILDES] = new_bound;
2430        }
2431    }
2432
2433    fn truncate(&mut self, new_bound: usize) {
2434        self.stack.truncate(new_bound);
2435        for lower_bound in &mut self.lower_bounds {
2436            if *lower_bound > new_bound {
2437                *lower_bound = new_bound;
2438            }
2439        }
2440    }
2441
2442    /// Find an opener that can match `c` of original `run_length`.
2443    ///
2444    /// `current_count` is the **remaining** length of the closer being
2445    /// processed (chars not yet consumed by earlier inner-loop matches).
2446    /// We use it for CommonMark rule 9 (the "mod 3" both-side rule) so
2447    /// that after a partial consumption like `3*foo *bar**` the outer `*`
2448    /// can pair with what's left of the `**` — micromark re-evaluates the
2449    /// rule using only the *current* run lengths on each side.
2450    ///
2451    /// `run_length` is the original closer length; it stays stable across
2452    /// inner-loop iterations and is what the lower-bounds optimisation and
2453    /// the strict tilde/caret length check key off.
2454    fn find_match(
2455        &mut self,
2456        tree: &mut Tree<Item>,
2457        c: u8,
2458        run_length: usize,
2459        current_count: usize,
2460        both: bool,
2461    ) -> Option<InlineEl> {
2462        // Use current_count (the post-partial-consumption remaining length)
2463        // for the rule-9 mod-3 lowerbound key, not run_length. After an
2464        // inner-loop pass consumes part of the closer, the remaining
2465        // length sits in a different mod-3 bucket and may now satisfy
2466        // rule 9 with openers the earlier (longer) attempt failed
2467        // against. Keying on run_length would carry over the earlier
2468        // failure into the new bucket and block valid matches like the
2469        // outer `*` in `cz*x` `*foo***bar***baz` (closer `***` partial
2470        // remainder 1 should still reach the opener at offset 2).
2471        let lowerbound = min(
2472            self.stack.len(),
2473            self.get_lowerbound(c, current_count, both),
2474        );
2475        let res = self.stack[lowerbound..]
2476            .iter()
2477            .cloned()
2478            .enumerate()
2479            .rfind(|(_, el)| {
2480                if (c == b'~' || c == b'^') && run_length != el.run_length {
2481                    return false;
2482                }
2483                // Rule 9 (mod-3): for `*`/`_`, the openers on the stack are
2484                // checked against the *current* lengths — `el.count` reflects
2485                // remaining-after-partial-consumption when an opener has been
2486                // re-pushed, and `current_count` is the remaining closer.
2487                el.c == c
2488                    && (!both && !el.both
2489                        || !(current_count + el.count).is_multiple_of(3)
2490                        || current_count.is_multiple_of(3))
2491            });
2492
2493        if let Some((matching_ix, matching_el)) = res {
2494            let matching_ix = matching_ix + lowerbound;
2495            for el in &self.stack[(matching_ix + 1)..] {
2496                for i in 0..el.count {
2497                    tree[el.start + i].item.body = ItemBody::Text {
2498                        backslash_escaped: false,
2499                    };
2500                }
2501            }
2502            self.truncate(matching_ix);
2503            Some(matching_el)
2504        } else {
2505            // For `*`/`_`, the lower-bound optimisation is safe because their
2506            // matching rule (CM "rule of three") is monotonic across future
2507            // closers with the same count. Tildes/carets match strictly by
2508            // equal run-length, so a failure at run-length 2 must not close
2509            // the door on a later run-length 1 closer matching an earlier
2510            // run-length 1 opener still on the stack. Key the bound by
2511            // `current_count` (the post-partial-consumption length) so it
2512            // applies only to closers whose remaining bucket actually
2513            // shares this failure mode.
2514            if c != b'~' && c != b'^' {
2515                self.set_lowerbound(c, current_count, both, self.stack.len());
2516            }
2517            None
2518        }
2519    }
2520
2521    fn trim_lower_bound(&mut self, ix: usize) {
2522        self.lower_bounds[ix] = self.lower_bounds[ix].min(self.stack.len());
2523    }
2524
2525    fn push(&mut self, el: InlineEl) {
2526        if el.c == b'~' {
2527            self.trim_lower_bound(InlineStack::TILDES);
2528        } else if el.c == b'^' {
2529            self.trim_lower_bound(InlineStack::CIRCUMFLEXES);
2530        }
2531        self.stack.push(el)
2532    }
2533}
2534
2535#[derive(Debug, Clone)]
2536enum RefScan<'a> {
2537    // label, source ix of label end
2538    LinkLabel(CowStr<'a>, usize),
2539    // contains next node index
2540    Collapsed(Option<TreeIndex>),
2541    UnexpectedFootnote,
2542    Failed,
2543    // `[text][...]` where `[...]` started but is an invalid label
2544    // (e.g. contains unescaped `[`). The shortcut form for `[text]` is
2545    // suppressed because the spec says a shortcut link must NOT be
2546    // followed by `[` — even if that `[` doesn't form a valid label.
2547    FailedInvalidLabel,
2548}
2549
2550/// Skips forward within a block to a node which spans (ends inclusive) the given
2551/// index into the source.
2552fn scan_nodes_to_ix(
2553    tree: &Tree<Item>,
2554    mut node: Option<TreeIndex>,
2555    ix: usize,
2556) -> Option<TreeIndex> {
2557    while let Some(node_ix) = node {
2558        if tree[node_ix].item.end <= ix {
2559            node = tree[node_ix].next;
2560        } else {
2561            break;
2562        }
2563    }
2564    node
2565}
2566
2567/// Scans an inline link label, which cannot be interrupted.
2568/// Returns number of bytes (including brackets) and label on success.
2569fn scan_link_label<'text>(
2570    tree: &Tree<Item>,
2571    text: &'text str,
2572    options: Options,
2573) -> Option<(usize, ReferenceLabel<'text>)> {
2574    let bytes = text.as_bytes();
2575    if bytes.len() < 2 || bytes[0] != b'[' {
2576        return None;
2577    }
2578    let linebreak_handler = |bytes: &[u8]| Some(skip_container_prefixes(tree, bytes, options));
2579    if options.contains(Options::ENABLE_FOOTNOTES)
2580        && b'^' == bytes[1]
2581        && bytes.get(2) != Some(&b']')
2582    {
2583        // GFM footnote labels don't wrap across line breaks.
2584        let linebreak_handler: &dyn Fn(&[u8]) -> Option<usize> = &|_| None;
2585        if let Some((byte_index, cow)) =
2586            scan_link_label_rest(&text[2..], linebreak_handler, tree.is_in_table())
2587        {
2588            return Some((byte_index + 2, ReferenceLabel::Footnote(cow)));
2589        }
2590    }
2591    let (byte_index, cow) =
2592        scan_link_label_rest(&text[1..], &linebreak_handler, tree.is_in_table())?;
2593    Some((byte_index + 1, ReferenceLabel::Link(cow)))
2594}
2595
2596fn scan_reference<'b>(
2597    tree: &Tree<Item>,
2598    text: &'b str,
2599    cur: Option<TreeIndex>,
2600    options: Options,
2601) -> RefScan<'b> {
2602    let cur_ix = match cur {
2603        None => return RefScan::Failed,
2604        Some(cur_ix) => cur_ix,
2605    };
2606    let start = tree[cur_ix].item.start;
2607    let tail = &text.as_bytes()[start..];
2608
2609    // If the `[` opening the candidate label was escaped in source
2610    // (preceded by an odd run of backslashes), it's a literal `[` and
2611    // can't start a reference label. Without this check the label
2612    // scanner walks raw source, which doesn't know that pulldown-cmark
2613    // already absorbed the `\` into a backslash-escape token, and it
2614    // would falsely consume `\[foo]` as `[foo]`.
2615    if tail.first() == Some(&b'[') && start > 0 {
2616        let src = text.as_bytes();
2617        let mut backslashes = 0usize;
2618        let mut j = start;
2619        while j > 0 && src[j - 1] == b'\\' {
2620            backslashes += 1;
2621            j -= 1;
2622        }
2623        if backslashes % 2 == 1 {
2624            return RefScan::Failed;
2625        }
2626    }
2627
2628    if tail.starts_with(b"[]") {
2629        // The trailing `]` of the collapsed reference must already exist as a
2630        // tree node — pulldown-cmark emits each bracket as its own item, and
2631        // we only reach here when `tail` already contains `]`. Defensive
2632        // fallback to `Failed` if that invariant is somehow broken.
2633        let Some(closing_node) = tree[cur_ix].next else {
2634            return RefScan::Failed;
2635        };
2636        RefScan::Collapsed(tree[closing_node].next)
2637    } else {
2638        let label = scan_link_label(tree, &text[start..], options);
2639        match label {
2640            Some((ix, ReferenceLabel::Link(label))) => RefScan::LinkLabel(label, start + ix),
2641            Some((_ix, ReferenceLabel::Footnote(_label))) => RefScan::UnexpectedFootnote,
2642            None => {
2643                // If `[text]` is followed by `[` that looked like a label
2644                // opener, the shortcut form is suppressed even though the
2645                // label parse failed (CommonMark requires shortcut links
2646                // not be followed by `[`).
2647                if tail.starts_with(b"[") {
2648                    RefScan::FailedInvalidLabel
2649                } else {
2650                    RefScan::Failed
2651                }
2652            }
2653        }
2654    }
2655}
2656
2657#[derive(Clone, Default)]
2658struct LinkStack {
2659    inner: Vec<LinkStackEl>,
2660    disabled_ix: usize,
2661}
2662
2663impl LinkStack {
2664    fn push(&mut self, el: LinkStackEl) {
2665        self.inner.push(el);
2666    }
2667
2668    fn pop(&mut self) -> Option<LinkStackEl> {
2669        let el = self.inner.pop();
2670        self.disabled_ix = core::cmp::min(self.disabled_ix, self.inner.len());
2671        el
2672    }
2673
2674    fn clear(&mut self) {
2675        self.inner.clear();
2676        self.disabled_ix = 0;
2677    }
2678
2679    fn disable_all_links(&mut self) {
2680        for el in &mut self.inner[self.disabled_ix..] {
2681            if el.ty == LinkStackTy::Link {
2682                el.ty = LinkStackTy::Disabled;
2683            }
2684        }
2685        self.disabled_ix = self.inner.len();
2686    }
2687}
2688
2689#[derive(Clone, Debug)]
2690struct LinkStackEl {
2691    node: TreeIndex,
2692    ty: LinkStackTy,
2693}
2694
2695#[derive(PartialEq, Clone, Debug)]
2696enum LinkStackTy {
2697    Link,
2698    Image,
2699    Disabled,
2700}
2701
2702/// Contains the destination URL, title and source span of a reference definition.
2703#[derive(Clone, Debug)]
2704pub struct LinkDef<'a> {
2705    pub dest: CowStr<'a>,
2706    pub title: Option<CowStr<'a>>,
2707    pub span: Range<usize>,
2708}
2709
2710impl<'a> LinkDef<'a> {
2711    pub fn into_static(self) -> LinkDef<'static> {
2712        LinkDef {
2713            dest: self.dest.into_static(),
2714            title: self.title.map(|s| s.into_static()),
2715            span: self.span,
2716        }
2717    }
2718}
2719
2720/// Contains the destination URL, title and source span of a reference definition.
2721#[derive(Clone, Debug)]
2722pub struct FootnoteDef {
2723    pub use_count: usize,
2724}
2725
2726/// Tracks tree indices of code span delimiters of each length. It should prevent
2727/// quadratic scanning behaviours by providing (amortized) constant time lookups.
2728struct CodeDelims {
2729    inner: FxHashMap<usize, VecDeque<TreeIndex>>,
2730    seen_first: bool,
2731}
2732
2733impl CodeDelims {
2734    fn new() -> Self {
2735        Self {
2736            inner: Default::default(),
2737            seen_first: false,
2738        }
2739    }
2740
2741    fn insert(&mut self, count: usize, ix: TreeIndex) {
2742        if self.seen_first {
2743            self.inner.entry(count).or_default().push_back(ix);
2744        } else {
2745            // Skip the first insert, since that delimiter will always
2746            // be an opener and not a closer.
2747            self.seen_first = true;
2748        }
2749    }
2750
2751    fn is_populated(&self) -> bool {
2752        !self.inner.is_empty()
2753    }
2754
2755    fn find(&mut self, open_ix: TreeIndex, count: usize) -> Option<TreeIndex> {
2756        while let Some(ix) = self.inner.get_mut(&count)?.pop_front() {
2757            if ix > open_ix {
2758                return Some(ix);
2759            }
2760        }
2761        None
2762    }
2763
2764    fn clear(&mut self) {
2765        self.inner.clear();
2766        self.seen_first = false;
2767    }
2768}
2769
2770/// Tracks brace contexts and delimiter length for math delimiters.
2771/// Provides amortized constant-time lookups.
2772struct MathDelims {
2773    inner: FxHashMap<u8, VecDeque<(TreeIndex, bool, bool)>>,
2774}
2775
2776impl MathDelims {
2777    fn new() -> Self {
2778        Self {
2779            inner: Default::default(),
2780        }
2781    }
2782
2783    fn clear(&mut self) {
2784        self.inner.clear();
2785    }
2786}
2787
2788#[derive(Copy, Clone, PartialEq, Eq, Debug)]
2789pub(crate) struct LinkIndex(usize);
2790
2791#[derive(Copy, Clone, PartialEq, Eq, Debug)]
2792pub(crate) struct CowIndex(usize);
2793
2794#[derive(Copy, Clone, PartialEq, Eq, Debug)]
2795pub(crate) struct AlignmentIndex(usize);
2796
2797#[derive(Copy, Clone, PartialEq, Eq, Debug)]
2798pub(crate) struct HeadingIndex(NonZeroUsize);
2799
2800#[derive(Copy, Clone, PartialEq, Eq, Debug)]
2801pub(crate) struct JsxElementIndex(usize);
2802
2803#[derive(Copy, Clone, PartialEq, Eq, Debug)]
2804pub(crate) struct DirectiveIndex(usize);
2805
2806/// A parsed JSX attribute.
2807#[derive(Debug, Clone)]
2808pub(crate) enum JsxAttr<'a> {
2809    Boolean(CowStr<'a>),
2810    Literal(CowStr<'a>, CowStr<'a>),
2811    Expression(CowStr<'a>, CowStr<'a>),
2812    Spread(CowStr<'a>),
2813}
2814
2815impl<'a> JsxAttr<'a> {
2816    pub fn into_static(self) -> JsxAttr<'static> {
2817        match self {
2818            JsxAttr::Boolean(n) => JsxAttr::Boolean(n.into_static()),
2819            JsxAttr::Literal(n, v) => JsxAttr::Literal(n.into_static(), v.into_static()),
2820            JsxAttr::Expression(n, v) => JsxAttr::Expression(n.into_static(), v.into_static()),
2821            JsxAttr::Spread(v) => JsxAttr::Spread(v.into_static()),
2822        }
2823    }
2824}
2825
2826/// Pre-parsed JSX element data (name + attributes + tag classification).
2827#[derive(Debug, Clone)]
2828pub(crate) struct JsxElementData<'a> {
2829    pub name: CowStr<'a>,
2830    pub attrs: Vec<JsxAttr<'a>>,
2831    pub raw: CowStr<'a>,
2832    pub is_closing: bool,
2833    pub is_self_closing: bool,
2834}
2835
2836impl<'a> JsxElementData<'a> {
2837    pub fn into_static(self) -> JsxElementData<'static> {
2838        JsxElementData {
2839            name: self.name.into_static(),
2840            attrs: self.attrs.into_iter().map(|a| a.into_static()).collect(),
2841            raw: self.raw.into_static(),
2842            is_closing: self.is_closing,
2843            is_self_closing: self.is_self_closing,
2844        }
2845    }
2846}
2847
2848#[derive(Debug, Clone)]
2849pub(crate) struct DirectiveAttrData<'a> {
2850    pub name: CowStr<'a>,
2851    pub attributes: Vec<(CowStr<'a>, CowStr<'a>)>,
2852    pub label_start: usize,
2853    pub label_end: usize,
2854    /// Cols of leading whitespace before `:::` on the opening line, after
2855    /// outer-container prefix stripping. Mirrors micromark-extension-directive's
2856    /// `initialSize`, which controls how much the directive body's per-line
2857    /// linePrefix is stripped (up to `initialSize + 1` cols). Only meaningful
2858    /// for container directives — leaf/text directives leave this 0.
2859    pub initial_size: u8,
2860}
2861
2862#[derive(Clone)]
2863pub(crate) struct Allocations<'a> {
2864    pub refdefs: RefDefs<'a>,
2865    /// Every refdef occurrence in source order, including duplicates that
2866    /// `refdefs` drops (it's a map and only keeps the first per label, since
2867    /// resolution picks the first match per CommonMark). Used to emit every
2868    /// definition as its own mdast `definition` node.
2869    pub refdefs_all: Vec<(LinkLabel<'a>, LinkDef<'a>)>,
2870    pub footdefs: FootnoteDefs<'a>,
2871    links: Vec<(LinkType, CowStr<'a>, CowStr<'a>, CowStr<'a>)>,
2872    cows: Vec<CowStr<'a>>,
2873    alignments: Vec<Vec<Alignment>>,
2874    headings: Vec<HeadingAttributes<'a>>,
2875    jsx_elements: Vec<JsxElementData<'a>>,
2876    directives: Vec<DirectiveAttrData<'a>>,
2877}
2878
2879/// Used by the heading attributes extension.
2880#[derive(Clone)]
2881pub(crate) struct HeadingAttributes<'a> {
2882    pub id: Option<CowStr<'a>>,
2883    pub classes: Vec<CowStr<'a>>,
2884    pub attrs: Vec<(CowStr<'a>, Option<CowStr<'a>>)>,
2885}
2886
2887/// Keeps track of the reference definitions defined in the document.
2888#[derive(Clone, Default, Debug)]
2889pub struct RefDefs<'input>(pub(crate) FxHashMap<LinkLabel<'input>, LinkDef<'input>>);
2890
2891/// Keeps track of the footnote definitions defined in the document.
2892#[derive(Clone, Default, Debug)]
2893pub struct FootnoteDefs<'input>(pub(crate) FxHashMap<FootnoteLabel<'input>, FootnoteDef>);
2894
2895impl<'input, 'b, 's> RefDefs<'input>
2896where
2897    's: 'b,
2898{
2899    /// Performs a lookup on reference label using unicode case folding.
2900    pub fn get(&'s self, key: &'b str) -> Option<&'b LinkDef<'input>> {
2901        self.0.get(&UniCase::new(key.into()))
2902    }
2903
2904    /// Provides an iterator over all the document's reference definitions.
2905    pub fn iter(&'s self) -> impl Iterator<Item = (&'s str, &'s LinkDef<'input>)> {
2906        self.0.iter().map(|(k, v)| (k.as_ref(), v))
2907    }
2908}
2909
2910impl<'input, 'b, 's> FootnoteDefs<'input>
2911where
2912    's: 'b,
2913{
2914    /// Performs a lookup on reference label using unicode case folding.
2915    pub fn contains(&'s self, key: &'b str) -> bool {
2916        self.0.contains_key(&UniCase::new(key.into()))
2917    }
2918    /// Performs a lookup on reference label using unicode case folding.
2919    pub fn get_mut(&'s mut self, key: CowStr<'input>) -> Option<&'s mut FootnoteDef> {
2920        self.0.get_mut(&UniCase::new(key))
2921    }
2922}
2923
2924impl<'a> Allocations<'a> {
2925    pub fn new() -> Self {
2926        Self {
2927            refdefs: RefDefs::default(),
2928            refdefs_all: Vec::new(),
2929            footdefs: FootnoteDefs::default(),
2930            links: Vec::with_capacity(128),
2931            cows: Vec::new(),
2932            alignments: Vec::new(),
2933            headings: Vec::new(),
2934            jsx_elements: Vec::new(),
2935            directives: Vec::new(),
2936        }
2937    }
2938
2939    pub fn allocate_cow(&mut self, cow: CowStr<'a>) -> CowIndex {
2940        let ix = self.cows.len();
2941        self.cows.push(cow);
2942        CowIndex(ix)
2943    }
2944
2945    pub fn allocate_link(
2946        &mut self,
2947        ty: LinkType,
2948        url: CowStr<'a>,
2949        title: CowStr<'a>,
2950        id: CowStr<'a>,
2951    ) -> LinkIndex {
2952        let ix = self.links.len();
2953        self.links.push((ty, url, title, id));
2954        LinkIndex(ix)
2955    }
2956
2957    pub fn allocate_alignment(&mut self, alignment: Vec<Alignment>) -> AlignmentIndex {
2958        let ix = self.alignments.len();
2959        self.alignments.push(alignment);
2960        AlignmentIndex(ix)
2961    }
2962
2963    pub fn allocate_heading(&mut self, attrs: HeadingAttributes<'a>) -> HeadingIndex {
2964        let ix = self.headings.len();
2965        self.headings.push(attrs);
2966        // This won't panic. `self.headings.len()` can't be `usize::MAX` since
2967        // such a long Vec cannot fit in memory.
2968        let ix_nonzero = NonZeroUsize::new(ix.wrapping_add(1)).expect("too many headings");
2969        HeadingIndex(ix_nonzero)
2970    }
2971
2972    pub fn take_cow(&mut self, ix: CowIndex) -> CowStr<'a> {
2973        core::mem::replace(&mut self.cows[ix.0], "".into())
2974    }
2975
2976    pub fn take_link(&mut self, ix: LinkIndex) -> (LinkType, CowStr<'a>, CowStr<'a>, CowStr<'a>) {
2977        let default_link = (LinkType::ShortcutUnknown, "".into(), "".into(), "".into());
2978        core::mem::replace(&mut self.links[ix.0], default_link)
2979    }
2980
2981    pub fn take_alignment(&mut self, ix: AlignmentIndex) -> Vec<Alignment> {
2982        core::mem::take(&mut self.alignments[ix.0])
2983    }
2984
2985    pub fn allocate_jsx_element(&mut self, data: JsxElementData<'a>) -> JsxElementIndex {
2986        let ix = self.jsx_elements.len();
2987        self.jsx_elements.push(data);
2988        JsxElementIndex(ix)
2989    }
2990
2991    pub fn allocate_directive(&mut self, data: DirectiveAttrData<'a>) -> DirectiveIndex {
2992        let ix = self.directives.len();
2993        self.directives.push(data);
2994        DirectiveIndex(ix)
2995    }
2996
2997    pub fn take_directive(&mut self, ix: DirectiveIndex) -> DirectiveAttrData<'a> {
2998        core::mem::replace(
2999            &mut self.directives[ix.0],
3000            DirectiveAttrData {
3001                name: "".into(),
3002                attributes: Vec::new(),
3003                label_start: 0,
3004                label_end: 0,
3005                initial_size: 0,
3006            },
3007        )
3008    }
3009
3010    pub fn directive_ref(&self, ix: DirectiveIndex) -> &DirectiveAttrData<'a> {
3011        &self.directives[ix.0]
3012    }
3013
3014    pub fn take_jsx_element(&mut self, ix: JsxElementIndex) -> JsxElementData<'a> {
3015        core::mem::replace(
3016            &mut self.jsx_elements[ix.0],
3017            JsxElementData {
3018                name: "".into(),
3019                attrs: Vec::new(),
3020                raw: "".into(),
3021                is_closing: false,
3022                is_self_closing: false,
3023            },
3024        )
3025    }
3026}
3027
3028impl<'a> Index<CowIndex> for Allocations<'a> {
3029    type Output = CowStr<'a>;
3030
3031    fn index(&self, ix: CowIndex) -> &Self::Output {
3032        self.cows.index(ix.0)
3033    }
3034}
3035
3036impl<'a> Index<LinkIndex> for Allocations<'a> {
3037    type Output = (LinkType, CowStr<'a>, CowStr<'a>, CowStr<'a>);
3038
3039    fn index(&self, ix: LinkIndex) -> &Self::Output {
3040        self.links.index(ix.0)
3041    }
3042}
3043
3044impl<'a> Index<AlignmentIndex> for Allocations<'a> {
3045    type Output = Vec<Alignment>;
3046
3047    fn index(&self, ix: AlignmentIndex) -> &Self::Output {
3048        self.alignments.index(ix.0)
3049    }
3050}
3051
3052impl<'a> Index<HeadingIndex> for Allocations<'a> {
3053    type Output = HeadingAttributes<'a>;
3054
3055    fn index(&self, ix: HeadingIndex) -> &Self::Output {
3056        self.headings.index(ix.0.get() - 1)
3057    }
3058}
3059
3060/// A struct containing information on the reachability of certain inline HTML
3061/// elements. In particular, for cdata elements (`<![CDATA[`), processing
3062/// elements (`<?`) and declarations (`<!DECLARATION`). The respectives usizes
3063/// represent the indices before which a scan will always fail and can hence
3064/// be skipped.
3065#[derive(Clone, Default)]
3066pub(crate) struct HtmlScanGuard {
3067    pub cdata: usize,
3068    pub processing: usize,
3069    pub declaration: usize,
3070    pub comment: usize,
3071}
3072
3073/// Trait to customize [`Parser`] behavior with callbacks. See [`Parser::new_with_callbacks`].
3074///
3075/// All methods have a default implementation, so you can choose which ones to override.
3076pub trait ParserCallbacks<'input> {
3077    /// Potentially provide a custom definition for a broken link.
3078    ///
3079    /// In case the parser encounters any potential links that have a broken
3080    /// reference (e.g `[foo]` when there is no `[foo]: ` entry at the bottom)
3081    /// this callback will be called with information about the reference,
3082    /// and the returned pair will be used as the link URL and title if it is not
3083    /// `None`.
3084    fn handle_broken_link(
3085        &mut self,
3086        #[allow(unused_variables)] link: BrokenLink<'input>,
3087    ) -> Option<(CowStr<'input>, CowStr<'input>)> {
3088        None
3089    }
3090}
3091
3092/// Wrapper to implement [`ParserCallbacks::handle_broken_link`] with a closure.
3093///
3094/// Used internally by [`Parser::new_with_broken_link_callback`].
3095#[allow(missing_debug_implementations)]
3096pub struct BrokenLinkCallback<F>(Option<F>);
3097
3098impl<'input, F> ParserCallbacks<'input> for BrokenLinkCallback<F>
3099where
3100    F: FnMut(BrokenLink<'input>) -> Option<(CowStr<'input>, CowStr<'input>)>,
3101{
3102    fn handle_broken_link(
3103        &mut self,
3104        link: BrokenLink<'input>,
3105    ) -> Option<(CowStr<'input>, CowStr<'input>)> {
3106        self.0.as_mut().and_then(|cb| cb(link))
3107    }
3108}
3109
3110impl<'input> ParserCallbacks<'input> for Box<dyn ParserCallbacks<'input>> {
3111    fn handle_broken_link(
3112        &mut self,
3113        link: BrokenLink<'input>,
3114    ) -> Option<(CowStr<'input>, CowStr<'input>)> {
3115        (**self).handle_broken_link(link)
3116    }
3117}
3118
3119/// [Parser] callbacks that do nothing.
3120///
3121/// Used when no custom callbacks are provided.
3122#[allow(missing_debug_implementations)]
3123pub struct DefaultParserCallbacks;
3124
3125impl<'input> ParserCallbacks<'input> for DefaultParserCallbacks {}
3126
3127/// Markdown event and source range iterator.
3128///
3129/// Generates tuples where the first element is the markdown event and the second
3130/// is a the corresponding range in the source string.
3131///
3132/// Constructed from a `Parser` using its
3133/// [`into_offset_iter`](struct.Parser.html#method.into_offset_iter) method.
3134#[derive(Debug)]
3135pub struct OffsetIter<'a, CB> {
3136    parser: Parser<'a, CB>,
3137}
3138
3139impl<'a, CB: ParserCallbacks<'a>> OffsetIter<'a, CB> {
3140    /// Returns a reference to the internal reference definition tracker.
3141    pub fn reference_definitions(&self) -> &RefDefs<'_> {
3142        self.parser.reference_definitions()
3143    }
3144
3145    /// Returns MDX validation errors collected during parsing.
3146    pub fn mdx_errors(&self) -> &[(usize, String)] {
3147        self.parser.mdx_errors()
3148    }
3149}
3150
3151impl<'a, CB: ParserCallbacks<'a>> Iterator for OffsetIter<'a, CB> {
3152    type Item = (Event<'a>, Range<usize>);
3153
3154    fn next(&mut self) -> Option<Self::Item> {
3155        self.parser
3156            .inner
3157            .next_event_range(&mut self.parser.callbacks)
3158    }
3159}
3160
3161impl<'a, CB: ParserCallbacks<'a>> Iterator for Parser<'a, CB> {
3162    type Item = Event<'a>;
3163
3164    fn next(&mut self) -> Option<Event<'a>> {
3165        self.inner
3166            .next_event_range(&mut self.callbacks)
3167            .map(|(event, _range)| event)
3168    }
3169}
3170
3171impl<'a, CB: ParserCallbacks<'a>> FusedIterator for Parser<'a, CB> {}
3172
3173impl<'input> ParserInner<'input> {
3174    fn next_event_range(
3175        &mut self,
3176        callbacks: &mut dyn ParserCallbacks<'input>,
3177    ) -> Option<(Event<'input>, Range<usize>)> {
3178        match self.tree.cur() {
3179            None => {
3180                let ix = self.tree.pop()?;
3181                let ix = if matches!(self.tree[ix].item.body, ItemBody::TightParagraph) {
3182                    // tight paragraphs emit nothing
3183                    self.tree.next_sibling(ix);
3184                    return self.next_event_range(callbacks);
3185                } else {
3186                    ix
3187                };
3188                let tag_end = body_to_tag_end(&self.tree[ix].item.body);
3189                self.tree.next_sibling(ix);
3190                let span = self.tree[ix].item.start..self.tree[ix].item.end;
3191                debug_assert!(span.start <= span.end);
3192                Some((Event::End(tag_end), span))
3193            }
3194            Some(cur_ix) => {
3195                let cur_ix = if matches!(self.tree[cur_ix].item.body, ItemBody::TightParagraph) {
3196                    // tight paragraphs emit nothing
3197                    self.tree.push();
3198                    self.tree.cur().unwrap()
3199                } else {
3200                    cur_ix
3201                };
3202                if self.tree[cur_ix].item.body.is_maybe_inline() {
3203                    self.handle_inline(callbacks);
3204                }
3205
3206                let node = self.tree[cur_ix];
3207                let item = node.item;
3208                let event = item_to_event(item, self.text, &mut self.allocs);
3209                if let Event::Start(..) = event {
3210                    self.tree.push();
3211                } else {
3212                    self.tree.next_sibling(cur_ix);
3213                }
3214                debug_assert!(item.start <= item.end);
3215                Some((event, item.start..item.end))
3216            }
3217        }
3218    }
3219}
3220
3221fn body_to_tag_end(body: &ItemBody) -> TagEnd {
3222    match *body {
3223        ItemBody::Paragraph => TagEnd::Paragraph,
3224        ItemBody::Emphasis => TagEnd::Emphasis,
3225        ItemBody::Superscript => TagEnd::Superscript,
3226        ItemBody::Subscript => TagEnd::Subscript,
3227        ItemBody::Strong => TagEnd::Strong,
3228        ItemBody::Strikethrough => TagEnd::Strikethrough,
3229        ItemBody::Link(..) => TagEnd::Link,
3230        ItemBody::Image(..) => TagEnd::Image,
3231        ItemBody::Heading(level, _) => TagEnd::Heading(level),
3232        ItemBody::IndentCodeBlock(..) | ItemBody::FencedCodeBlock(..) | ItemBody::MathBlock(..) => {
3233            TagEnd::CodeBlock
3234        }
3235        ItemBody::ContainerDirective(..) => TagEnd::Directive(DirectiveKind::Container),
3236        ItemBody::LeafDirective(..) => TagEnd::Directive(DirectiveKind::Leaf),
3237        ItemBody::TextDirective(..) => TagEnd::Directive(DirectiveKind::Text),
3238        ItemBody::BlockQuote(kind) => TagEnd::BlockQuote(kind),
3239        ItemBody::HtmlBlock(_) => TagEnd::HtmlBlock,
3240        ItemBody::List(_, c, _) => {
3241            let is_ordered = c == b'.' || c == b')';
3242            TagEnd::List(is_ordered)
3243        }
3244        ItemBody::ListItem(_, _) => TagEnd::Item,
3245        ItemBody::TableHead => TagEnd::TableHead,
3246        ItemBody::TableCell => TagEnd::TableCell,
3247        ItemBody::TableRow => TagEnd::TableRow,
3248        ItemBody::Table(..) => TagEnd::Table,
3249        ItemBody::FootnoteDefinition(..) => TagEnd::FootnoteDefinition,
3250        ItemBody::MetadataBlock(kind) => TagEnd::MetadataBlock(kind),
3251        ItemBody::DefinitionList(_) => TagEnd::DefinitionList,
3252        ItemBody::DefinitionListTitle => TagEnd::DefinitionListTitle,
3253        ItemBody::DefinitionListDefinition(_) => TagEnd::DefinitionListDefinition,
3254        ItemBody::MdxJsxFlowElement(..) => TagEnd::MdxJsxFlowElement,
3255        ItemBody::MdxJsxTextElement(..) => TagEnd::MdxJsxTextElement,
3256        _ => panic!("unexpected item body {:?}", body),
3257    }
3258}
3259
3260fn item_to_event<'a>(item: Item, text: &'a str, allocs: &mut Allocations<'a>) -> Event<'a> {
3261    let tag = match item.body {
3262        ItemBody::Text { .. } => return Event::Text(text[item.start..item.end].into()),
3263        ItemBody::Code(cow_ix) => return Event::Code(allocs.take_cow(cow_ix)),
3264        ItemBody::SynthesizeText(cow_ix) => return Event::Text(allocs.take_cow(cow_ix)),
3265        ItemBody::SynthesizeChar(c) => return Event::Text(c.into()),
3266        ItemBody::HtmlBlock(_) => Tag::HtmlBlock,
3267        ItemBody::Html => return Event::Html(text[item.start..item.end].into()),
3268        ItemBody::InlineHtml => return Event::InlineHtml(text[item.start..item.end].into()),
3269        ItemBody::OwnedInlineHtml(cow_ix) => return Event::InlineHtml(allocs.take_cow(cow_ix)),
3270        ItemBody::SoftBreak => return Event::SoftBreak,
3271        ItemBody::HardBreak(_) => return Event::HardBreak,
3272        ItemBody::FootnoteReference(cow_ix) => {
3273            return Event::FootnoteReference(allocs.take_cow(cow_ix))
3274        }
3275        ItemBody::TaskListMarker(checked) => return Event::TaskListMarker(checked),
3276        ItemBody::Rule => return Event::Rule,
3277        ItemBody::Paragraph => Tag::Paragraph,
3278        ItemBody::Emphasis => Tag::Emphasis,
3279        ItemBody::Superscript => Tag::Superscript,
3280        ItemBody::Subscript => Tag::Subscript,
3281        ItemBody::Strong => Tag::Strong,
3282        ItemBody::Strikethrough => Tag::Strikethrough,
3283        ItemBody::Link(link_ix) => {
3284            let (link_type, dest_url, title, id) = allocs.take_link(link_ix);
3285            Tag::Link {
3286                link_type,
3287                dest_url,
3288                title,
3289                id,
3290            }
3291        }
3292        ItemBody::Image(link_ix) => {
3293            let (link_type, dest_url, title, id) = allocs.take_link(link_ix);
3294            Tag::Image {
3295                link_type,
3296                dest_url,
3297                title,
3298                id,
3299            }
3300        }
3301        ItemBody::Heading(level, Some(heading_ix)) => {
3302            let HeadingAttributes { id, classes, attrs } = allocs.index(heading_ix);
3303            Tag::Heading {
3304                level,
3305                id: id.clone(),
3306                classes: classes.clone(),
3307                attrs: attrs.clone(),
3308            }
3309        }
3310        ItemBody::Heading(level, None) => Tag::Heading {
3311            level,
3312            id: None,
3313            classes: Vec::new(),
3314            attrs: Vec::new(),
3315        },
3316        ItemBody::MathBlock(cow_ix) => {
3317            Tag::CodeBlock(CodeBlockKind::Fenced(allocs.take_cow(cow_ix)))
3318        }
3319        ItemBody::FencedCodeBlock(cow_ix) => {
3320            Tag::CodeBlock(CodeBlockKind::Fenced(allocs.take_cow(cow_ix)))
3321        }
3322        ItemBody::IndentCodeBlock(..) => Tag::CodeBlock(CodeBlockKind::Indented),
3323        ItemBody::ContainerDirective(_, dir_ix)
3324        | ItemBody::LeafDirective(dir_ix)
3325        | ItemBody::TextDirective(dir_ix) => {
3326            let kind = match item.body {
3327                ItemBody::ContainerDirective(..) => DirectiveKind::Container,
3328                ItemBody::LeafDirective(..) => DirectiveKind::Leaf,
3329                _ => DirectiveKind::Text,
3330            };
3331            let dir = allocs.take_directive(dir_ix);
3332            Tag::Directive {
3333                kind,
3334                name: dir.name,
3335                attributes: dir.attributes,
3336            }
3337        }
3338        ItemBody::BlockQuote(kind) => Tag::BlockQuote(kind),
3339        ItemBody::List(is_tight, c, listitem_start) => {
3340            if c == b'.' || c == b')' {
3341                Tag::List(Some(listitem_start), is_tight)
3342            } else {
3343                Tag::List(None, is_tight)
3344            }
3345        }
3346        ItemBody::ListItem(_, _) => Tag::Item,
3347        ItemBody::TableHead => Tag::TableHead,
3348        ItemBody::TableCell => Tag::TableCell,
3349        ItemBody::TableRow => Tag::TableRow,
3350        ItemBody::Table(alignment_ix) => Tag::Table(allocs.take_alignment(alignment_ix)),
3351        ItemBody::FootnoteDefinition(cow_ix) => Tag::FootnoteDefinition(allocs.take_cow(cow_ix)),
3352        ItemBody::MetadataBlock(kind) => Tag::MetadataBlock(kind),
3353        ItemBody::Math(cow_ix, is_display) => {
3354            return if is_display {
3355                Event::DisplayMath(allocs.take_cow(cow_ix))
3356            } else {
3357                Event::InlineMath(allocs.take_cow(cow_ix))
3358            }
3359        }
3360        ItemBody::DefinitionList(_) => Tag::DefinitionList,
3361        ItemBody::DefinitionListTitle => Tag::DefinitionListTitle,
3362        ItemBody::DefinitionListDefinition(_) => Tag::DefinitionListDefinition,
3363        ItemBody::MdxJsxFlowElement(jsx_ix) => {
3364            let jsx = allocs.take_jsx_element(jsx_ix);
3365            Tag::MdxJsxFlowElement(jsx.raw)
3366        }
3367        ItemBody::MdxJsxTextElement(jsx_ix) => {
3368            let jsx = allocs.take_jsx_element(jsx_ix);
3369            Tag::MdxJsxTextElement(jsx.raw)
3370        }
3371        ItemBody::MdxFlowExpression(cow_ix) => {
3372            return Event::MdxFlowExpression(allocs.take_cow(cow_ix))
3373        }
3374        ItemBody::MdxTextExpression(cow_ix) => {
3375            return Event::MdxTextExpression(allocs.take_cow(cow_ix))
3376        }
3377        ItemBody::MdxEsm(cow_ix) => return Event::MdxEsm(allocs.take_cow(cow_ix)),
3378        _ => panic!("unexpected item body {:?}", item.body),
3379    };
3380
3381    Event::Start(tag)
3382}
3383
3384#[cfg(test)]
3385mod test {
3386    use alloc::{borrow::ToOwned, string::ToString, vec::Vec};
3387
3388    use super::*;
3389    use crate::tree::Node;
3390
3391    // TODO: move these tests to tests/html.rs?
3392
3393    fn parser_with_extensions(text: &str) -> Parser<'_> {
3394        let mut opts = Options::empty();
3395        opts.insert(Options::ENABLE_TABLES);
3396        opts.insert(Options::ENABLE_FOOTNOTES);
3397        opts.insert(Options::ENABLE_STRIKETHROUGH);
3398        opts.insert(Options::ENABLE_SUPERSCRIPT);
3399        opts.insert(Options::ENABLE_SUBSCRIPT);
3400        opts.insert(Options::ENABLE_TASKLISTS);
3401
3402        Parser::new_ext(text, opts)
3403    }
3404
3405    #[test]
3406    #[cfg(target_pointer_width = "64")]
3407    fn node_size() {
3408        let node_size = core::mem::size_of::<Node<Item>>();
3409        assert_eq!(48, node_size);
3410    }
3411
3412    #[test]
3413    #[cfg(target_pointer_width = "64")]
3414    fn body_size() {
3415        let body_size = core::mem::size_of::<ItemBody>();
3416        assert_eq!(16, body_size);
3417    }
3418
3419    #[test]
3420    fn single_open_fish_bracket() {
3421        // dont crash
3422        assert_eq!(3, Parser::new("<").count());
3423    }
3424
3425    #[test]
3426    fn lone_hashtag() {
3427        // dont crash
3428        assert_eq!(2, Parser::new("#").count());
3429    }
3430
3431    #[test]
3432    fn lots_of_backslashes() {
3433        // dont crash
3434        Parser::new("\\\\\r\r").count();
3435        Parser::new("\\\r\r\\.\\\\\r\r\\.\\").count();
3436    }
3437
3438    #[test]
3439    fn issue_1030() {
3440        let mut opts = Options::empty();
3441        opts.insert(Options::ENABLE_WIKILINKS);
3442
3443        let parser = Parser::new_ext("For a new ferrari, [[Wikientry|click here]]!", opts);
3444
3445        let offsets = parser
3446            .into_offset_iter()
3447            .map(|(_ev, range)| range)
3448            .collect::<Vec<_>>();
3449        let expected_offsets = vec![
3450            (0..44),  // Paragraph START
3451            (0..19),  // `For a new ferrari, `
3452            (19..43), // Wikilink START
3453            (31..41), // `click here`
3454            (19..43), // Wikilink END
3455            (43..44), // `!`
3456            (0..44),  // Paragraph END
3457        ];
3458        assert_eq!(offsets, expected_offsets);
3459    }
3460
3461    #[test]
3462    fn issue_320() {
3463        // dont crash
3464        parser_with_extensions(":\r\t> |\r:\r\t> |\r").count();
3465    }
3466
3467    #[test]
3468    fn issue_319() {
3469        // dont crash
3470        parser_with_extensions("|\r-]([^|\r-]([^").count();
3471        parser_with_extensions("|\r\r=][^|\r\r=][^car").count();
3472    }
3473
3474    #[test]
3475    fn issue_303() {
3476        // dont crash
3477        parser_with_extensions("[^\r\ra]").count();
3478        parser_with_extensions("\r\r]Z[^\x00\r\r]Z[^\x00").count();
3479    }
3480
3481    #[test]
3482    fn issue_313() {
3483        // dont crash
3484        parser_with_extensions("*]0[^\r\r*]0[^").count();
3485        parser_with_extensions("[^\r> `][^\r> `][^\r> `][").count();
3486    }
3487
3488    #[test]
3489    fn issue_311() {
3490        // dont crash
3491        parser_with_extensions("\\\u{0d}-\u{09}\\\u{0d}-\u{09}").count();
3492    }
3493
3494    #[test]
3495    fn issue_283() {
3496        let input = core::str::from_utf8(b"\xf0\x9b\xb2\x9f<td:^\xf0\x9b\xb2\x9f").unwrap();
3497        // dont crash
3498        parser_with_extensions(input).count();
3499    }
3500
3501    #[test]
3502    fn issue_289() {
3503        // dont crash
3504        parser_with_extensions("> - \\\n> - ").count();
3505        parser_with_extensions("- \n\n").count();
3506    }
3507
3508    #[test]
3509    fn issue_306() {
3510        // dont crash
3511        parser_with_extensions("*\r_<__*\r_<__*\r_<__*\r_<__").count();
3512    }
3513
3514    #[test]
3515    fn issue_305() {
3516        // dont crash
3517        parser_with_extensions("_6**6*_*").count();
3518    }
3519
3520    #[test]
3521    fn another_emphasis_panic() {
3522        parser_with_extensions("*__#_#__*").count();
3523    }
3524
3525    #[test]
3526    fn offset_iter() {
3527        let event_offsets: Vec<_> = Parser::new("*hello* world")
3528            .into_offset_iter()
3529            .map(|(_ev, range)| range)
3530            .collect();
3531        let expected_offsets = vec![(0..13), (0..7), (1..6), (0..7), (7..13), (0..13)];
3532        assert_eq!(expected_offsets, event_offsets);
3533    }
3534
3535    #[test]
3536    fn reference_link_offsets() {
3537        let range =
3538            Parser::new("# H1\n[testing][Some reference]\n\n[Some reference]: https://github.com")
3539                .into_offset_iter()
3540                .filter_map(|(ev, range)| match ev {
3541                    Event::Start(
3542                        Tag::Link {
3543                            link_type: LinkType::Reference,
3544                            ..
3545                        },
3546                        ..,
3547                    ) => Some(range),
3548                    _ => None,
3549                })
3550                .next()
3551                .unwrap();
3552        assert_eq!(5..30, range);
3553    }
3554
3555    #[test]
3556    fn footnote_offsets() {
3557        let range = parser_with_extensions("Testing this[^1] out.\n\n[^1]: Footnote.")
3558            .into_offset_iter()
3559            .filter_map(|(ev, range)| match ev {
3560                Event::FootnoteReference(..) => Some(range),
3561                _ => None,
3562            })
3563            .next()
3564            .unwrap();
3565        assert_eq!(12..16, range);
3566    }
3567
3568    #[test]
3569    fn footnote_offsets_exclamation() {
3570        let mut immediately_before_footnote = None;
3571        let range = parser_with_extensions("Testing this![^1] out.\n\n[^1]: Footnote.")
3572            .into_offset_iter()
3573            .filter_map(|(ev, range)| match ev {
3574                Event::FootnoteReference(..) => Some(range),
3575                _ => {
3576                    immediately_before_footnote = Some((ev, range));
3577                    None
3578                }
3579            })
3580            .next()
3581            .unwrap();
3582        assert_eq!(13..17, range);
3583        if let (Event::Text(exclamation), range_exclamation) =
3584            immediately_before_footnote.as_ref().unwrap()
3585        {
3586            assert_eq!("!", &exclamation[..]);
3587            assert_eq!(&(12..13), range_exclamation);
3588        } else {
3589            panic!("what came first, then? {immediately_before_footnote:?}");
3590        }
3591    }
3592
3593    #[test]
3594    fn table_offset() {
3595        let markdown = "a\n\nTesting|This|Outtt\n--|:--:|--:\nSome Data|Other data|asdf";
3596        let event_offset = parser_with_extensions(markdown)
3597            .into_offset_iter()
3598            .map(|(_ev, range)| range)
3599            .nth(3)
3600            .unwrap();
3601        let expected_offset = 3..59;
3602        assert_eq!(expected_offset, event_offset);
3603    }
3604
3605    #[test]
3606    fn table_cell_span() {
3607        let markdown = "a|b|c\n--|--|--\na|  |c";
3608        let event_offset = parser_with_extensions(markdown)
3609            .into_offset_iter()
3610            .filter_map(|(ev, span)| match ev {
3611                Event::Start(Tag::TableCell) => Some(span),
3612                _ => None,
3613            })
3614            .nth(4)
3615            .unwrap();
3616        // Cell span includes the leading `|` delimiter (matching remark).
3617        let expected_offset_start = "a|b|c\n--|--|--\na".len();
3618        assert_eq!(
3619            expected_offset_start..(expected_offset_start + 3),
3620            event_offset
3621        );
3622    }
3623
3624    #[test]
3625    fn offset_iter_issue_378() {
3626        let event_offsets: Vec<_> = Parser::new("a [b](c) d")
3627            .into_offset_iter()
3628            .map(|(_ev, range)| range)
3629            .collect();
3630        let expected_offsets = vec![(0..10), (0..2), (2..8), (3..4), (2..8), (8..10), (0..10)];
3631        assert_eq!(expected_offsets, event_offsets);
3632    }
3633
3634    #[test]
3635    fn offset_iter_issue_404() {
3636        let event_offsets: Vec<_> = Parser::new("###\n")
3637            .into_offset_iter()
3638            .map(|(_ev, range)| range)
3639            .collect();
3640        let expected_offsets = vec![(0..4), (0..4)];
3641        assert_eq!(expected_offsets, event_offsets);
3642    }
3643
3644    #[test]
3645    fn broken_links_called_only_once() {
3646        for &(markdown, expected) in &[
3647            ("See also [`g()`][crate::g].", 1),
3648            ("See also [`g()`][crate::g][].", 1),
3649            ("[brokenlink1] some other node [brokenlink2]", 2),
3650        ] {
3651            let mut times_called = 0;
3652            let callback = &mut |_broken_link: BrokenLink| {
3653                times_called += 1;
3654                None
3655            };
3656            let parser =
3657                Parser::new_with_broken_link_callback(markdown, Options::empty(), Some(callback));
3658            for _ in parser {}
3659            assert_eq!(times_called, expected);
3660        }
3661    }
3662
3663    #[test]
3664    fn simple_broken_link_callback() {
3665        let test_str = "This is a link w/o def: [hello][world]";
3666        let mut callback = |broken_link: BrokenLink| {
3667            assert_eq!("world", broken_link.reference.as_ref());
3668            assert_eq!(&test_str[broken_link.span], "[hello][world]");
3669            let url = "YOLO".into();
3670            let title = "SWAG".to_owned().into();
3671            Some((url, title))
3672        };
3673        let parser =
3674            Parser::new_with_broken_link_callback(test_str, Options::empty(), Some(&mut callback));
3675        let mut link_tag_count = 0;
3676        for (typ, url, title, id) in parser.filter_map(|event| match event {
3677            Event::Start(Tag::Link {
3678                link_type,
3679                dest_url,
3680                title,
3681                id,
3682            }) => Some((link_type, dest_url, title, id)),
3683            _ => None,
3684        }) {
3685            link_tag_count += 1;
3686            assert_eq!(typ, LinkType::ReferenceUnknown);
3687            assert_eq!(url.as_ref(), "YOLO");
3688            assert_eq!(title.as_ref(), "SWAG");
3689            assert_eq!(id.as_ref(), "world");
3690        }
3691        assert!(link_tag_count > 0);
3692    }
3693
3694    #[test]
3695    fn code_block_kind_check_fenced() {
3696        let parser = Parser::new("hello\n```test\ntadam\n```");
3697        let mut found = 0;
3698        for (ev, _range) in parser.into_offset_iter() {
3699            if let Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(syntax))) = ev {
3700                assert_eq!(syntax.as_ref(), "test");
3701                found += 1;
3702            }
3703        }
3704        assert_eq!(found, 1);
3705    }
3706
3707    #[test]
3708    fn code_block_kind_check_indented() {
3709        let parser = Parser::new("hello\n\n    ```test\n    tadam\nhello");
3710        let mut found = 0;
3711        for (ev, _range) in parser.into_offset_iter() {
3712            if let Event::Start(Tag::CodeBlock(CodeBlockKind::Indented)) = ev {
3713                found += 1;
3714            }
3715        }
3716        assert_eq!(found, 1);
3717    }
3718
3719    #[test]
3720    fn ref_defs() {
3721        let input = r###"[a B c]: http://example.com
3722[another]: https://google.com
3723
3724text
3725
3726[final ONE]: http://wikipedia.org
3727"###;
3728        let mut parser = Parser::new(input);
3729
3730        assert!(parser.reference_definitions().get("a b c").is_some());
3731        assert!(parser.reference_definitions().get("nope").is_none());
3732
3733        if let Some(_event) = parser.next() {
3734            // testing keys with shorter lifetimes than parser and its input
3735            let s = "final one".to_owned();
3736            let link_def = parser.reference_definitions().get(&s).unwrap();
3737            let span = &input[link_def.span.clone()];
3738            assert_eq!(span, "[final ONE]: http://wikipedia.org");
3739        }
3740    }
3741
3742    #[test]
3743    #[allow(clippy::extra_unused_lifetimes)]
3744    fn common_lifetime_patterns_allowed<'b>() {
3745        let temporary_str = String::from("xyz");
3746
3747        // NOTE: this is a limitation of Rust, it doesn't allow putting lifetime parameters on the closure itself.
3748        // Hack it by attaching the lifetime to the test function instead.
3749        // TODO: why is the `'b` lifetime required at all? Changing it to `'_` breaks things :(
3750        let mut closure = |link: BrokenLink<'b>| Some(("#".into(), link.reference));
3751
3752        fn function(link: BrokenLink<'_>) -> Option<(CowStr<'_>, CowStr<'_>)> {
3753            Some(("#".into(), link.reference))
3754        }
3755
3756        for _ in Parser::new_with_broken_link_callback(
3757            "static lifetime",
3758            Options::empty(),
3759            Some(&mut closure),
3760        ) {}
3761        /* This fails to compile. Because the closure can't say `for <'a> fn(BrokenLink<'a>) ->
3762         * CowStr<'a>` and has to use the enclosing `'b` lifetime parameter, `temporary_str` lives
3763         * shorter than `'b`. I think this is unlikely to occur in real life, and if it does, the
3764         * fix is simple: move it out to a function that allows annotating the lifetimes.
3765         */
3766        //for _ in Parser::new_with_broken_link_callback(&temporary_str, Options::empty(), Some(&mut callback)) {
3767        //}
3768
3769        for _ in Parser::new_with_broken_link_callback(
3770            "static lifetime",
3771            Options::empty(),
3772            Some(&mut function),
3773        ) {}
3774        for _ in Parser::new_with_broken_link_callback(
3775            &temporary_str,
3776            Options::empty(),
3777            Some(&mut function),
3778        ) {}
3779    }
3780
3781    #[test]
3782    fn inline_html_inside_blockquote() {
3783        // Regression for #960
3784        let input = "> <foo\n> bar>";
3785        let events: Vec<_> = Parser::new(input).collect();
3786        let expected = [
3787            Event::Start(Tag::BlockQuote(None)),
3788            Event::Start(Tag::Paragraph),
3789            Event::InlineHtml(CowStr::Boxed("<foo\nbar>".to_string().into())),
3790            Event::End(TagEnd::Paragraph),
3791            Event::End(TagEnd::BlockQuote(None)),
3792        ];
3793        assert_eq!(&events, &expected);
3794    }
3795
3796    #[test]
3797    fn wikilink_has_pothole() {
3798        let input = "[[foo]] [[bar|baz]]";
3799        let events: Vec<_> = Parser::new_ext(input, Options::ENABLE_WIKILINKS).collect();
3800        let expected = [
3801            Event::Start(Tag::Paragraph),
3802            Event::Start(Tag::Link {
3803                link_type: LinkType::WikiLink { has_pothole: false },
3804                dest_url: CowStr::Borrowed("foo"),
3805                title: CowStr::Borrowed(""),
3806                id: CowStr::Borrowed(""),
3807            }),
3808            Event::Text(CowStr::Borrowed("foo")),
3809            Event::End(TagEnd::Link),
3810            Event::Text(CowStr::Borrowed(" ")),
3811            Event::Start(Tag::Link {
3812                link_type: LinkType::WikiLink { has_pothole: true },
3813                dest_url: CowStr::Borrowed("bar"),
3814                title: CowStr::Borrowed(""),
3815                id: CowStr::Borrowed(""),
3816            }),
3817            Event::Text(CowStr::Borrowed("baz")),
3818            Event::End(TagEnd::Link),
3819            Event::End(TagEnd::Paragraph),
3820        ];
3821        assert_eq!(&events, &expected);
3822    }
3823
3824    fn mdx_parser(text: &str) -> Parser<'_> {
3825        Parser::new_ext(text, Options::ENABLE_MDX)
3826    }
3827
3828    #[test]
3829    fn mdx_esm_import() {
3830        let events: Vec<_> = mdx_parser("import {Chart} from './chart.js'\n").collect();
3831        assert_eq!(events.len(), 1);
3832        assert!(matches!(&events[0], Event::MdxEsm(s) if s.contains("import")));
3833    }
3834
3835    #[test]
3836    fn mdx_esm_export() {
3837        let events: Vec<_> = mdx_parser("export const meta = {}\n").collect();
3838        assert_eq!(events.len(), 1);
3839        assert!(matches!(&events[0], Event::MdxEsm(s) if s.contains("export")));
3840    }
3841
3842    #[test]
3843    fn mdx_flow_expression() {
3844        let events: Vec<_> = mdx_parser("{1 + 1}\n").collect();
3845        assert_eq!(events.len(), 1);
3846        assert!(matches!(&events[0], Event::MdxFlowExpression(s) if s.as_ref() == "1 + 1"));
3847    }
3848
3849    #[test]
3850    fn mdx_jsx_flow_self_closing() {
3851        let events: Vec<_> = mdx_parser("<Chart values={[1,2,3]} />\n").collect();
3852        assert!(!events.is_empty());
3853        assert!(
3854            matches!(&events[0], Event::Start(Tag::MdxJsxFlowElement(s)) if s.contains("Chart"))
3855        );
3856    }
3857
3858    #[test]
3859    fn mdx_jsx_flow_fragment() {
3860        let events: Vec<_> = mdx_parser("<>\n").collect();
3861        assert!(!events.is_empty());
3862        assert!(matches!(
3863            &events[0],
3864            Event::Start(Tag::MdxJsxFlowElement(_))
3865        ));
3866    }
3867
3868    #[test]
3869    fn mdx_inline_expression() {
3870        let events: Vec<_> = mdx_parser("hello {name} world\n").collect();
3871        let has_expr = events
3872            .iter()
3873            .any(|e| matches!(e, Event::MdxTextExpression(s) if s.as_ref() == "name"));
3874        assert!(
3875            has_expr,
3876            "Expected inline MDX expression, got: {:?}",
3877            events
3878        );
3879    }
3880
3881    #[test]
3882    fn mdx_inline_jsx() {
3883        let events: Vec<_> = mdx_parser("hello <Badge /> world\n").collect();
3884        let has_jsx = events
3885            .iter()
3886            .any(|e| matches!(e, Event::Start(Tag::MdxJsxTextElement(s)) if s.contains("Badge")));
3887        assert!(has_jsx, "Expected inline MDX JSX, got: {:?}", events);
3888    }
3889
3890    #[test]
3891    fn mdx_all_tags_are_jsx() {
3892        // In MDX mode, all tags (including lowercase) are JSX, not HTML.
3893        let events: Vec<_> = mdx_parser("hello <em>world</em>\n").collect();
3894        let has_jsx = events
3895            .iter()
3896            .any(|e| matches!(e, Event::Start(Tag::MdxJsxTextElement(_))));
3897        assert!(has_jsx, "In MDX mode, <em> should be JSX: {:?}", events);
3898    }
3899
3900    #[test]
3901    fn mdx_does_not_interfere_without_flag() {
3902        // Without ENABLE_MDX, none of this should be parsed as MDX.
3903        let events: Vec<_> = Parser::new("import foo from 'bar'\n").collect();
3904        // Should be a regular paragraph.
3905        assert!(events
3906            .iter()
3907            .any(|e| matches!(e, Event::Start(Tag::Paragraph))));
3908    }
3909
3910    #[test]
3911    fn mdx_expression_in_heading() {
3912        let events: Vec<_> = mdx_parser("# {title}\n").collect();
3913        let has_heading = events
3914            .iter()
3915            .any(|e| matches!(e, Event::Start(Tag::Heading { .. })));
3916        assert!(has_heading, "Should have a heading");
3917        let has_expr = events
3918            .iter()
3919            .any(|e| matches!(e, Event::MdxTextExpression(s) if s.as_ref() == "title"));
3920        assert!(
3921            has_expr,
3922            "Heading should contain MdxTextExpression, got: {:?}",
3923            events
3924        );
3925    }
3926
3927    #[test]
3928    fn mdx_expression_mixed_text_in_heading() {
3929        let events: Vec<_> = mdx_parser("## Hello {name}\n").collect();
3930        let has_text = events
3931            .iter()
3932            .any(|e| matches!(e, Event::Text(s) if s.contains("Hello")));
3933        let has_expr = events
3934            .iter()
3935            .any(|e| matches!(e, Event::MdxTextExpression(s) if s.as_ref() == "name"));
3936        assert!(has_text, "Should have text, got: {:?}", events);
3937        assert!(has_expr, "Should have expression, got: {:?}", events);
3938    }
3939}