Skip to main content

satteri_pulldown_cmark/
parse.rs

1// Copyright 2017 Google Inc. All rights reserved.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19// THE SOFTWARE.
20
21//! Tree-based two pass parser.
22
23use alloc::{borrow::ToOwned, boxed::Box, collections::VecDeque, string::String, vec::Vec};
24use core::{
25    cmp::{max, min},
26    iter::FusedIterator,
27    num::NonZeroUsize,
28    ops::{Index, Range},
29};
30use rustc_hash::FxHashMap;
31use unicase::UniCase;
32
33#[cfg(feature = "mdx")]
34use crate::mdx::*;
35use crate::{
36    firstpass::run_first_pass,
37    linklabel::{scan_link_label_rest, FootnoteLabel, LinkLabel, ReferenceLabel},
38    scanners::*,
39    strings::CowStr,
40    tree::{Tree, TreeIndex},
41    Alignment, BlockQuoteKind, CodeBlockKind, DirectiveKind, Event, HeadingLevel, LinkType,
42    MetadataBlockKind, Options, Tag, TagEnd,
43};
44
45// Allowing arbitrary depth nested parentheses inside link destinations
46// can create denial of service vulnerabilities if we're not careful.
47// The simplest countermeasure is to limit their depth, which is
48// explicitly allowed by the spec as long as the limit is at least 3:
49// https://spec.commonmark.org/0.29/#link-destination
50pub(crate) const LINK_MAX_NESTED_PARENS: usize = 32;
51
52#[derive(Debug, Default, Clone, Copy)]
53pub(crate) struct Item {
54    pub start: usize,
55    pub end: usize,
56    pub body: ItemBody,
57}
58
59#[derive(Debug, PartialEq, Clone, Copy, Default)]
60pub(crate) enum ItemBody {
61    // These are possible inline items, need to be resolved in second pass.
62
63    // repeats, can_open, can_close
64    MaybeEmphasis(usize, bool, bool),
65    // preceded_by_backslash, brace context
66    MaybeMath(bool, u8),
67    // quote byte, can_open, can_close
68    MaybeSmartQuote(u8, bool, bool),
69    MaybeCode(usize, bool), // number of backticks, preceded by backslash
70    MaybeHtml,
71    MaybeLinkOpen,
72    // bool indicates whether or not the preceding section could be a reference
73    MaybeLinkClose(bool),
74    MaybeImage,
75
76    // These are inline items after resolution.
77    Emphasis,
78    Strong,
79    Strikethrough,
80    Superscript,
81    Subscript,
82    Math(CowIndex, bool), // true for display math
83    Code(CowIndex),
84    Link(LinkIndex),
85    Image(LinkIndex),
86    FootnoteReference(CowIndex),
87    TaskListMarker(bool), // true for checked
88
89    // These are also inline items.
90    InlineHtml,
91    OwnedInlineHtml(CowIndex),
92    SynthesizeText(CowIndex),
93    SynthesizeChar(char),
94    Html,
95    Text {
96        backslash_escaped: bool,
97    },
98    SoftBreak,
99    // true = is backlash
100    HardBreak(bool),
101
102    // Dummy node at the top of the tree - should not be used otherwise!
103    #[default]
104    Root,
105
106    // These are block items.
107    Paragraph,
108    TightParagraph,
109    Rule,
110    Heading(HeadingLevel, Option<HeadingIndex>), // heading level
111    FencedCodeBlock(CowIndex),
112    MathBlock(CowIndex), // meta string (info after $$)
113    // bool: true = lazy/no-extend (block was opened as a single-line
114    // synthetic split, e.g. after an empty list item closed via blank
115    // line); arena_build's trailing-indent extension must skip it.
116    IndentCodeBlock(bool),
117    HtmlBlock(bool), // true = trim trailing newline from value (type 6/7
118    // always; type 1-5 only when their closer pattern was found, not when
119    // the block ran out of input at EOF)
120    BlockQuote(Option<BlockQuoteKind>),
121    ContainerDirective(u8, DirectiveIndex), // (fence length, directive data)
122    LeafDirective(DirectiveIndex),
123    TextDirective(DirectiveIndex),
124    // A container directive's `[label]`, holding inline content. Emitted as a
125    // `paragraph` with `data.directiveLabel = true`. Its children are tokenized
126    // by the normal inline pass, so emphasis/strong/links resolve naturally.
127    DirectiveLabel,
128    List(bool, u8, u64),   // is_tight, list character, list start index
129    ListItem(usize, bool), // indent level, spread (loose item)
130    FootnoteDefinition(CowIndex),
131    MetadataBlock(MetadataBlockKind),
132
133    // Definition lists
134    DefinitionList(bool), // is_tight
135    // gets turned into either a paragraph or a definition list title,
136    // depending on whether there's a definition after it
137    MaybeDefinitionListTitle,
138    DefinitionListTitle,
139    DefinitionListDefinition(usize),
140
141    // Tables
142    Table(AlignmentIndex),
143    TableHead,
144    TableRow,
145    TableCell,
146
147    // MDX
148    #[cfg(feature = "mdx")]
149    MdxJsxFlowElement(JsxElementIndex),
150    #[cfg(feature = "mdx")]
151    MdxJsxTextElement(JsxElementIndex),
152    #[cfg(feature = "mdx")]
153    MdxFlowExpression(CowIndex),
154    #[cfg(feature = "mdx")]
155    MdxTextExpression(CowIndex),
156    #[cfg(feature = "mdx")]
157    MdxEsm(CowIndex),
158}
159
160impl ItemBody {
161    pub(crate) fn is_maybe_inline(&self) -> bool {
162        use ItemBody::*;
163        matches!(
164            *self,
165            MaybeEmphasis(..)
166                | MaybeMath(..)
167                | MaybeSmartQuote(..)
168                | MaybeCode(..)
169                | MaybeHtml
170                | MaybeLinkOpen
171                | MaybeLinkClose(..)
172                | MaybeImage
173        )
174    }
175    pub(crate) fn is_block_level(&self) -> bool {
176        !self.is_inline() && !matches!(self, ItemBody::Root)
177    }
178    fn is_inline(&self) -> bool {
179        use ItemBody::*;
180        matches!(
181            *self,
182            MaybeEmphasis(..)
183                | MaybeMath(..)
184                | MaybeSmartQuote(..)
185                | MaybeCode(..)
186                | MaybeHtml
187                | MaybeLinkOpen
188                | MaybeLinkClose(..)
189                | MaybeImage
190                | Emphasis
191                | Strong
192                | Strikethrough
193                | Math(..)
194                | Code(..)
195                | Link(..)
196                | Image(..)
197                | FootnoteReference(..)
198                | TaskListMarker(..)
199                | InlineHtml
200                | OwnedInlineHtml(..)
201                | SynthesizeText(..)
202                | SynthesizeChar(..)
203                | Html
204                | Text { .. }
205                | SoftBreak
206                | HardBreak(..)
207        )
208    }
209}
210
211#[derive(Debug)]
212pub struct BrokenLink<'a> {
213    pub span: core::ops::Range<usize>,
214    pub link_type: LinkType,
215    pub reference: CowStr<'a>,
216}
217
218/// Markdown event iterator.
219pub struct Parser<'input, CB = DefaultParserCallbacks> {
220    callbacks: CB,
221    inner: ParserInner<'input>,
222}
223
224// Inner state for `Parser`, extracted so that it can remain generic over the callback without
225// re-compiling complex logic for each instantiation of the generic type.
226pub(crate) struct ParserInner<'input> {
227    pub(crate) text: &'input str,
228    pub(crate) options: Options,
229    pub(crate) tree: Tree<Item>,
230    pub(crate) allocs: Allocations<'input>,
231    html_scan_guard: HtmlScanGuard,
232
233    // https://github.com/pulldown-cmark/pulldown-cmark/issues/844
234    // Consider this example:
235    //
236    //     [x]: xxx...
237    //     [x]
238    //     [x]
239    //     [x]
240    //
241    // Which expands to this HTML:
242    //
243    //     <a href="xxx...">x</a>
244    //     <a href="xxx...">x</a>
245    //     <a href="xxx...">x</a>
246    //
247    // This is quadratic growth, because it's filling in the area of a square.
248    // To prevent this, track how much it's expanded and limit it.
249    link_ref_expansion_limit: usize,
250
251    /// MDX validation errors collected during inline parsing.
252    pub(crate) mdx_errors: Vec<(usize, String)>,
253
254    // used by inline passes. store them here for reuse
255    inline_stack: InlineStack,
256    link_stack: LinkStack,
257    wikilink_stack: LinkStack,
258    code_delims: CodeDelims,
259    math_delims: MathDelims,
260}
261
262impl<'input, CB> core::fmt::Debug for Parser<'input, CB> {
263    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
264        // Only print the fields that have public types.
265        f.debug_struct("Parser")
266            .field("text", &self.inner.text)
267            .field("options", &self.inner.options)
268            .field("callbacks", &..)
269            .finish()
270    }
271}
272
273impl<'a> BrokenLink<'a> {
274    /// Moves the link into version with a static lifetime.
275    ///
276    /// The `reference` member is cloned to a Boxed or Inline version.
277    pub fn into_static(self) -> BrokenLink<'static> {
278        BrokenLink {
279            span: self.span.clone(),
280            link_type: self.link_type,
281            reference: self.reference.into_string().into(),
282        }
283    }
284}
285
286impl<'input> Parser<'input, DefaultParserCallbacks> {
287    /// Creates a new event iterator for a markdown string without any options enabled.
288    pub fn new(text: &'input str) -> Self {
289        Self::new_ext(text, Options::empty())
290    }
291
292    /// Creates a new event iterator for a markdown string with given options.
293    pub fn new_ext(text: &'input str, options: Options) -> Self {
294        Self::new_with_callbacks(text, options, DefaultParserCallbacks)
295    }
296}
297
298impl<'input, CB: ParserCallbacks<'input>> Parser<'input, CB> {
299    /// Creates a new event iterator for markdown text with given options and callbacks.
300    ///
301    /// ```
302    /// # use satteri_pulldown_cmark::{BrokenLink, CowStr, Event, Options, Parser, ParserCallbacks, Tag};
303    /// struct CustomCallbacks;
304    /// impl<'input> ParserCallbacks<'input> for CustomCallbacks {
305    ///     fn handle_broken_link(
306    ///         &mut self,
307    ///         link: BrokenLink<'input>,
308    ///     ) -> Option<(CowStr<'input>, CowStr<'input>)> {
309    ///         Some(("https://target".into(), link.reference))
310    ///     }
311    /// }
312    ///
313    /// let mut parser =
314    ///     Parser::new_with_callbacks("[broken]", Options::empty(), CustomCallbacks);
315    ///
316    /// assert!(matches!(
317    ///     parser.nth(1),
318    ///     Some(Event::Start(Tag::Link { .. }))
319    /// ));
320    /// ```
321    ///
322    /// See the [`ParserCallbacks`] trait for a list of callbacks that can be overridden.
323    pub fn new_with_callbacks(text: &'input str, options: Options, callbacks: CB) -> Self {
324        let (mut tree, allocs, _firstpass_mdx_errors) = run_first_pass(text, options);
325        tree.reset();
326        let inline_stack = Default::default();
327        let link_stack = Default::default();
328        let wikilink_stack = Default::default();
329        let html_scan_guard = Default::default();
330        Parser {
331            callbacks,
332
333            inner: ParserInner {
334                text,
335                options,
336                tree,
337                allocs,
338                inline_stack,
339                link_stack,
340                wikilink_stack,
341                html_scan_guard,
342                // always allow 100KiB
343                link_ref_expansion_limit: text.len().max(100_000),
344                mdx_errors: Vec::new(),
345                code_delims: CodeDelims::new(),
346                math_delims: MathDelims::new(),
347            },
348        }
349    }
350
351    /// Returns a reference to the internal `RefDefs` object, which provides access
352    /// to the internal map of reference definitions.
353    pub fn reference_definitions(&self) -> &RefDefs<'_> {
354        &self.inner.allocs.refdefs
355    }
356
357    /// Returns MDX validation errors collected during parsing.
358    /// Only populated when [`Options::ENABLE_MDX`] is active.
359    pub fn mdx_errors(&self) -> &[(usize, String)] {
360        &self.inner.mdx_errors
361    }
362
363    /// Consumes the event iterator and produces an iterator that produces
364    /// `(Event, Range)` pairs, where the `Range` value maps to the corresponding
365    /// range in the markdown source.
366    pub fn into_offset_iter(self) -> OffsetIter<'input, CB> {
367        OffsetIter { parser: self }
368    }
369}
370
371impl<'input, F> Parser<'input, BrokenLinkCallback<F>> {
372    /// In case the parser encounters any potential links that have a broken
373    /// reference (e.g `[foo]` when there is no `[foo]: ` entry at the bottom)
374    /// the provided callback will be called with the reference name,
375    /// and the returned pair will be used as the link URL and title if it is not
376    /// `None`.
377    ///
378    /// This constructor is provided for backwards compatibility.
379    /// This and other callbacks can also be customized with [`Parser::new_with_callbacks`].
380    pub fn new_with_broken_link_callback(
381        text: &'input str,
382        options: Options,
383        broken_link_callback: Option<F>,
384    ) -> Self
385    where
386        F: FnMut(BrokenLink<'input>) -> Option<(CowStr<'input>, CowStr<'input>)>,
387    {
388        Self::new_with_callbacks(text, options, BrokenLinkCallback(broken_link_callback))
389    }
390}
391
392impl<'input> ParserInner<'input> {
393    pub(crate) fn new(text: &'input str, options: Options) -> Self {
394        let (mut tree, allocs, firstpass_mdx_errors) = run_first_pass(text, options);
395        tree.reset();
396        ParserInner {
397            text,
398            options,
399            tree,
400            allocs,
401            inline_stack: Default::default(),
402            link_stack: Default::default(),
403            wikilink_stack: Default::default(),
404            html_scan_guard: Default::default(),
405            link_ref_expansion_limit: text.len().max(100_000),
406            mdx_errors: firstpass_mdx_errors,
407            code_delims: CodeDelims::new(),
408            math_delims: MathDelims::new(),
409        }
410    }
411
412    /// Use a link label to fetch a type, url, and title.
413    ///
414    /// This function enforces the [`link_ref_expansion_limit`].
415    /// If it returns Some, it also consumes some of the fuel.
416    /// If we're out of fuel, it immediately returns None.
417    ///
418    /// The URL and title are found in the [`RefDefs`] map.
419    /// If they're not there, and a callback was provided by the user,
420    /// `handle_broken_link` will be invoked and given the opportunity
421    /// to provide a fallback.
422    ///
423    /// The link type (that's "link" or "image") depends on the usage site, and
424    /// is provided by the caller of this function.
425    /// This function returns a new one because, if it has to invoke a callback
426    /// to find the information, the link type is [mapped to an unknown type].
427    ///
428    /// [mapped to an unknown type]: crate::LinkType::to_unknown
429    /// [`link_ref_expansion_limit`]: Self::link_ref_expansion_limit
430    fn fetch_link_type_url_title(
431        &mut self,
432        link_label: CowStr<'input>,
433        span: Range<usize>,
434        link_type: LinkType,
435        callbacks: &mut dyn ParserCallbacks<'input>,
436    ) -> Option<(LinkType, CowStr<'input>, CowStr<'input>)> {
437        if self.link_ref_expansion_limit == 0 {
438            return None;
439        }
440
441        let (link_type, url, title) = self
442            .allocs
443            .refdefs
444            .get(link_label.as_ref())
445            .map(|matching_def| {
446                // found a matching definition!
447                let title = matching_def
448                    .title
449                    .as_ref()
450                    .cloned()
451                    .unwrap_or_else(|| "".into());
452                let url = matching_def.dest.clone();
453                (link_type, url, title)
454            })
455            .or_else(|| {
456                // Construct a BrokenLink struct, which will be passed to the callback
457                let broken_link = BrokenLink {
458                    span,
459                    link_type,
460                    reference: link_label,
461                };
462
463                callbacks
464                    .handle_broken_link(broken_link)
465                    .map(|(url, title)| (link_type.to_unknown(), url, title))
466            })?;
467
468        // Limit expansion from link references.
469        // This isn't a problem for footnotes, because multiple references to the same one
470        // reuse the same node, but links/images get their HREF/SRC copied.
471        self.link_ref_expansion_limit = self
472            .link_ref_expansion_limit
473            .saturating_sub(url.len() + title.len());
474
475        Some((link_type, url, title))
476    }
477
478    /// Handle inline markup.
479    ///
480    /// When the parser encounters any item indicating potential inline markup, all
481    /// inline markup passes are run on the remainder of the chain.
482    ///
483    /// Note: there's some potential for optimization here, but that's future work.
484    pub(crate) fn handle_inline(&mut self, callbacks: &mut dyn ParserCallbacks<'input>) {
485        self.handle_inline_pass1(callbacks);
486        // Resolve attention (emphasis/strong) and strikethrough/sub/sup.
487        // micromark runs each construct's `resolveAll` in the order each
488        // construct first fires; whichever marker appears first in the
489        // block decides whether emphasis or strikethrough resolves
490        // first. This matters when their would-be spans cross:
491        //   * `*~bar~*`  – first marker `*` → emphasis first, then
492        //     strikethrough inside the emphasis.
493        //   * `~_~:_<`   – first marker `~` → strikethrough first,
494        //     capturing `_` as content; `_` at offset 4 is then alone.
495        //   * `_/~z)*~*nf` – first marker `_`, no `_` closer → emphasis
496        //     first (pairs `*..*`); `~..~` would cross the emphasis so
497        //     it can't form in the second pass.
498        // Each pass is recursive: after pairing at root, it descends
499        // into already-formed spans so that inner markers (e.g.
500        // `~_a_~` → `_a_` inside the strikethrough) still resolve.
501        let st_enabled = self.options.contains(Options::ENABLE_STRIKETHROUGH)
502            || self.options.contains(Options::ENABLE_SUBSCRIPT)
503            || self.options.contains(Options::ENABLE_SUPERSCRIPT);
504        if !st_enabled {
505            self.handle_emphasis_pass();
506            return;
507        }
508        let strikethrough_first = matches!(
509            self.first_inline_marker_char(self.tree.cur()),
510            Some(b'~') | Some(b'^')
511        );
512        if strikethrough_first {
513            self.handle_tildes_carets_pass();
514            self.handle_emphasis_pass();
515        } else {
516            self.handle_emphasis_pass();
517            self.handle_tildes_carets_pass();
518        }
519    }
520
521    /// Find the first MaybeEmphasis token in `start..` whose character
522    /// is one of `*` `_` `~` `^`. Used to pick the resolve order.
523    fn first_inline_marker_char(&self, start: Option<TreeIndex>) -> Option<u8> {
524        let mut cur = start;
525        while let Some(cur_ix) = cur {
526            if let ItemBody::MaybeEmphasis(_, _, _) = self.tree[cur_ix].item.body {
527                let c = self.text.as_bytes()[self.tree[cur_ix].item.start];
528                if matches!(c, b'*' | b'_' | b'~' | b'^') {
529                    return Some(c);
530                }
531            }
532            cur = self.tree[cur_ix].next;
533        }
534        None
535    }
536
537    /// Recursive emphasis pass. Processes `*`/`_` MaybeEmphasis at this
538    /// scope, then descends into any inline containers (Emphasis,
539    /// Strong, Strikethrough, Link, Image, etc.) to do the same in
540    /// their children.
541    fn handle_emphasis_pass(&mut self) {
542        let start = self.tree.cur();
543        self.resolve_emphasis_recursive(start);
544    }
545
546    fn resolve_emphasis_recursive(&mut self, start: Option<TreeIndex>) {
547        // Save and reset the shared inline_stack so each scope works
548        // with a fresh one. Smart-quote state is local to
549        // `handle_emphasis_in_scope`, no save needed.
550        let saved = core::mem::take(&mut self.inline_stack);
551        self.handle_emphasis_in_scope(start);
552        self.inline_stack = saved;
553
554        let mut cur = start;
555        while let Some(cur_ix) = cur {
556            let next = self.tree[cur_ix].next;
557            match self.tree[cur_ix].item.body {
558                ItemBody::Emphasis
559                | ItemBody::Strong
560                | ItemBody::Strikethrough
561                | ItemBody::Subscript
562                | ItemBody::Superscript
563                | ItemBody::Link(_)
564                | ItemBody::Image(_) => {
565                    let child = self.tree[cur_ix].child;
566                    self.resolve_emphasis_recursive(child);
567                }
568                _ => {}
569            }
570            cur = next;
571        }
572    }
573
574    /// Handle inline HTML, code spans, and links.
575    ///
576    /// This function handles both inline HTML and code spans, because they have
577    /// the same precedence. It also handles links, even though they have lower
578    /// precedence, because the URL of links must not be processed.
579    fn handle_inline_pass1(&mut self, callbacks: &mut dyn ParserCallbacks<'input>) {
580        let mut cur = self.tree.cur();
581        let mut prev = None;
582
583        let block_end = self.tree[self.tree.peek_up().unwrap()].item.end;
584        let block_text = &self.text[..block_end];
585
586        while let Some(mut cur_ix) = cur {
587            match self.tree[cur_ix].item.body {
588                ItemBody::MaybeHtml => {
589                    // MDX inline JSX: check before HTML
590                    #[cfg(feature = "mdx")]
591                    if self.options.contains(Options::ENABLE_MDX) {
592                        let start = self.tree[cur_ix].item.start;
593                        let next_byte = block_text.as_bytes().get(start + 1).copied();
594
595                        // In MDX, `<!` is not valid (no HTML comments).
596                        if next_byte == Some(b'!') {
597                            self.mdx_errors.push((
598                                start,
599                                "Unexpected character `!` (U+0021) before name, expected a \
600                                 character that can start a name, such as a letter, `$`, or `_` \
601                                 (note: to create a comment in MDX, use `{/* text */}`)"
602                                    .to_string(),
603                            ));
604                            self.tree[cur_ix].item.body = ItemBody::Text {
605                                backslash_escaped: false,
606                            };
607                            prev = cur;
608                            cur = self.tree[cur_ix].next;
609                            continue;
610                        }
611
612                        if let Some(total_len) =
613                            scan_mdx_inline_jsx(&block_text.as_bytes()[start..])
614                        {
615                            let end = start + total_len;
616                            let node = scan_nodes_to_ix(&self.tree, self.tree[cur_ix].next, end);
617                            let raw = &block_text[start..end];
618                            let col = crate::mdx::column_at(block_text.as_bytes(), start);
619                            let jsx_data = crate::mdx::parse_jsx_tag_with_column(raw, col, 0);
620                            let mut allocator = oxc_allocator::Allocator::default();
621                            crate::mdx::validate_jsx_expressions(
622                                &jsx_data.attrs,
623                                start,
624                                &mut allocator,
625                                &mut self.mdx_errors,
626                            );
627                            let jsx_ix = self.allocs.allocate_jsx_element(jsx_data);
628                            self.tree[cur_ix].item.body = ItemBody::MdxJsxTextElement(jsx_ix);
629                            self.tree[cur_ix].item.end = end;
630                            self.tree[cur_ix].next = node;
631                            prev = cur;
632                            cur = node;
633                            if let Some(node_ix) = cur {
634                                self.tree[node_ix].item.start =
635                                    max(self.tree[node_ix].item.start, end);
636                            }
637                            continue;
638                        }
639
640                        // mdx-js fallback rule:
641                        //   `<` + space/tab → always literal `<` (text).
642                        //   `<` + newline   → JSX tag may span lines; treat
643                        //                      as text only if the next
644                        //                      non-whitespace byte is benign
645                        //                      (not `>`, not EOF/blank-line)
646                        //                      AND the line containing it
647                        //                      isn't a setext underline
648                        //                      (`-`+ or `=`+), which would
649                        //                      promote the `<` into a heading
650                        //                      whose JSX validation fails.
651                        //   `<` + anything else (incl. EOF) → parse error
652                        //                      (`<\`, `<,`, `<{`, `<<`, `<.`,
653                        //                       …).
654                        let bytes_block = block_text.as_bytes();
655                        let is_text_fallback = match next_byte {
656                            Some(b' ' | b'\t') => true,
657                            Some(b'\n' | b'\r') => {
658                                // Skip whitespace + container prefixes when
659                                // probing for the first significant byte
660                                // after `\n`. A `>` at line start inside a
661                                // blockquote is the container marker, not a
662                                // JSX-like delimiter.
663                                let bq_depth = self
664                                    .tree
665                                    .walk_spine()
666                                    .filter(|&&ix| {
667                                        matches!(self.tree[ix].item.body, ItemBody::BlockQuote(..))
668                                    })
669                                    .count();
670                                let mut probe = start + 1;
671                                loop {
672                                    while probe < bytes_block.len()
673                                        && matches!(
674                                            bytes_block[probe],
675                                            b' ' | b'\t' | b'\n' | b'\r'
676                                        )
677                                    {
678                                        probe += 1;
679                                    }
680                                    if bq_depth == 0
681                                        || probe >= bytes_block.len()
682                                        || bytes_block[probe] != b'>'
683                                    {
684                                        break;
685                                    }
686                                    let mut consumed = 0;
687                                    while consumed < bq_depth
688                                        && probe < bytes_block.len()
689                                        && bytes_block[probe] == b'>'
690                                    {
691                                        probe += 1;
692                                        if probe < bytes_block.len() && bytes_block[probe] == b' ' {
693                                            probe += 1;
694                                        }
695                                        consumed += 1;
696                                    }
697                                }
698                                if probe >= bytes_block.len() || bytes_block[probe] == b'>' {
699                                    false
700                                } else {
701                                    // Reject if `probe`'s line is a setext
702                                    // underline (only `-` or only `=`, then
703                                    // optional whitespace to EOL/EOF) AND
704                                    // would actually promote the `<`-line
705                                    // to a heading. Inside a blockquote
706                                    // container the underline line is
707                                    // typically a lazy continuation (no
708                                    // `>` prefix) and doesn't promote, so
709                                    // skip the rejection.
710                                    let underline_char = bytes_block[probe];
711                                    if !matches!(underline_char, b'-' | b'=') {
712                                        true
713                                    } else {
714                                        let mut q = probe;
715                                        while q < bytes_block.len()
716                                            && bytes_block[q] == underline_char
717                                        {
718                                            q += 1;
719                                        }
720                                        while q < bytes_block.len()
721                                            && matches!(bytes_block[q], b' ' | b'\t')
722                                        {
723                                            q += 1;
724                                        }
725                                        let at_eol = q >= bytes_block.len()
726                                            || matches!(bytes_block[q], b'\n' | b'\r');
727                                        if !at_eol {
728                                            true
729                                        } else {
730                                            // Container check: a blockquote
731                                            // `>` (possibly after up to 3
732                                            // spaces) on the line opening
733                                            // the `<` means the underline
734                                            // line would need the same
735                                            // prefix to actually promote a
736                                            // setext heading. Without it,
737                                            // the underline is lazy
738                                            // paragraph continuation, so
739                                            // accept as text.
740                                            //
741                                            // Same for listitems: if the
742                                            // spine has a ListItem and the
743                                            // underline line starts at a
744                                            // column less than the listitem
745                                            // content column, it's lazy
746                                            // continuation and doesn't
747                                            // promote — accept as text.
748                                            let mut ls = start;
749                                            while ls > 0
750                                                && !matches!(bytes_block[ls - 1], b'\n' | b'\r')
751                                            {
752                                                ls -= 1;
753                                            }
754                                            let mut k = ls;
755                                            let mut sp = 0;
756                                            while k < start && bytes_block[k] == b' ' && sp < 3 {
757                                                k += 1;
758                                                sp += 1;
759                                            }
760                                            if k < start && bytes_block[k] == b'>' {
761                                                true
762                                            } else {
763                                                // Underline line start.
764                                                let mut us = probe;
765                                                while us > 0
766                                                    && !matches!(bytes_block[us - 1], b'\n' | b'\r')
767                                                {
768                                                    us -= 1;
769                                                }
770                                                let mut underline_col = 0;
771                                                let mut uk = us;
772                                                while uk < probe && bytes_block[uk] == b' ' {
773                                                    uk += 1;
774                                                    underline_col += 1;
775                                                }
776                                                let listitem_indent = self
777                                                    .tree
778                                                    .walk_spine()
779                                                    .filter_map(|&ix| {
780                                                        match self.tree[ix].item.body {
781                                                            ItemBody::ListItem(indent, _) => {
782                                                                Some(indent)
783                                                            }
784                                                            _ => None,
785                                                        }
786                                                    })
787                                                    .next();
788                                                let in_blockquote =
789                                                    self.tree.walk_spine().any(|&ix| {
790                                                        matches!(
791                                                            self.tree[ix].item.body,
792                                                            ItemBody::BlockQuote(..)
793                                                        )
794                                                    });
795                                                // BlockQuote container: an
796                                                // underline line missing the
797                                                // `>` prefix is lazy
798                                                // continuation and doesn't
799                                                // promote. Detect by checking
800                                                // the underline line's source
801                                                // (not block_text, which has
802                                                // already stripped the
803                                                // prefix).
804                                                let bq_lazy = if in_blockquote {
805                                                    underline_col < 1
806                                                        || !bytes_block[us..probe].contains(&b'>')
807                                                } else {
808                                                    false
809                                                };
810                                                matches!(listitem_indent, Some(i) if underline_col < i)
811                                                    || bq_lazy
812                                            }
813                                        }
814                                    }
815                                }
816                            }
817                            _ => false,
818                        };
819                        if !is_text_fallback {
820                            self.mdx_errors.push((
821                                start,
822                                "Unexpected character after `<`, expected a valid JSX tag \
823                                 (note: to create a link in MDX, use `[text](url)`)"
824                                    .to_string(),
825                            ));
826                        }
827
828                        self.tree[cur_ix].item.body = ItemBody::Text {
829                            backslash_escaped: false,
830                        };
831                        prev = cur;
832                        cur = self.tree[cur_ix].next;
833                        continue;
834                    }
835
836                    let next = self.tree[cur_ix].next;
837                    let autolink = if let Some(next_ix) = next {
838                        scan_autolink(block_text, self.tree[next_ix].item.start)
839                    } else {
840                        None
841                    };
842
843                    if let Some((ix, uri, link_type)) = autolink {
844                        let node = scan_nodes_to_ix(&self.tree, next, ix);
845                        let text_node = self.tree.create_node(Item {
846                            start: self.tree[cur_ix].item.start + 1,
847                            end: ix - 1,
848                            body: ItemBody::Text {
849                                backslash_escaped: false,
850                            },
851                        });
852                        let link_ix =
853                            self.allocs
854                                .allocate_link(link_type, uri, "".into(), "".into());
855                        self.tree[cur_ix].item.body = ItemBody::Link(link_ix);
856                        self.tree[cur_ix].item.end = ix;
857                        self.tree[cur_ix].next = node;
858                        self.tree[cur_ix].child = Some(text_node);
859                        prev = cur;
860                        cur = node;
861                        if let Some(node_ix) = cur {
862                            let orig_start = self.tree[node_ix].item.start;
863                            let new_start = max(orig_start, ix);
864                            self.tree[node_ix].item.start = new_start;
865                            // When the autolink's closing `>` consumed the byte
866                            // that was the target of a preceding `\` escape,
867                            // the trailing text's `backslash_escaped` flag is
868                            // stale — clear it so arena_build doesn't extend
869                            // the text node's source span back over bytes the
870                            // link now owns. Mirrors the inline-link fix.
871                            if new_start > orig_start {
872                                if let ItemBody::Text { backslash_escaped } =
873                                    &mut self.tree[node_ix].item.body
874                                {
875                                    *backslash_escaped = false;
876                                }
877                            }
878                        }
879                        continue;
880                    } else {
881                        let inline_html = next.and_then(|next_ix| {
882                            self.scan_inline_html(
883                                block_text.as_bytes(),
884                                self.tree[next_ix].item.start,
885                            )
886                        });
887                        if let Some((span, ix)) = inline_html {
888                            let node = scan_nodes_to_ix(&self.tree, next, ix);
889                            self.tree[cur_ix].item.body = if !span.is_empty() {
890                                let converted_string =
891                                    String::from_utf8(span).expect("invalid utf8");
892                                ItemBody::OwnedInlineHtml(
893                                    self.allocs.allocate_cow(converted_string.into()),
894                                )
895                            } else {
896                                ItemBody::InlineHtml
897                            };
898                            self.tree[cur_ix].item.end = ix;
899                            self.tree[cur_ix].next = node;
900                            prev = cur;
901                            cur = node;
902                            if let Some(node_ix) = cur {
903                                let orig_start = self.tree[node_ix].item.start;
904                                let new_start = max(orig_start, ix);
905                                self.tree[node_ix].item.start = new_start;
906                                // Inline HTML may consume bytes that a `\X`
907                                // escape was attached to (e.g. `\*` inside
908                                // an attribute value). Clear the stale flag
909                                // so arena_build doesn't extend the trail
910                                // back over bytes the HTML now owns.
911                                if new_start > orig_start {
912                                    if let ItemBody::Text { backslash_escaped } =
913                                        &mut self.tree[node_ix].item.body
914                                    {
915                                        *backslash_escaped = false;
916                                    }
917                                }
918                            }
919                            continue;
920                        }
921                    }
922                    self.tree[cur_ix].item.body = ItemBody::Text {
923                        backslash_escaped: false,
924                    };
925                }
926                ItemBody::MaybeMath(preceded_by_backslash, _brace_context) => {
927                    if preceded_by_backslash {
928                        self.tree[cur_ix].item.body = ItemBody::Text {
929                            backslash_escaped: true,
930                        };
931                        prev = cur;
932                        cur = self.tree[cur_ix].next;
933                        continue;
934                    }
935                    // Count consecutive $ from the opening position
936                    let mut open_count = 1usize;
937                    let mut open_end = cur_ix;
938                    {
939                        let mut peek = self.tree[cur_ix].next;
940                        while let Some(peek_ix) = peek {
941                            if matches!(self.tree[peek_ix].item.body, ItemBody::MaybeMath(..))
942                                && self.tree[peek_ix].item.start == self.tree[open_end].item.end
943                            {
944                                open_count += 1;
945                                open_end = peek_ix;
946                                peek = self.tree[peek_ix].next;
947                            } else {
948                                break;
949                            }
950                        }
951                    }
952
953                    // Single- and multi-dollar math can be toggled
954                    // independently (mirroring remark-math's
955                    // `singleDollarTextMath`). When this run's length isn't
956                    // an enabled delimiter, the `$` is literal text — so
957                    // prose like `$50 to $100` never becomes a math span.
958                    let count_enabled = if open_count == 1 {
959                        self.options.contains(Options::ENABLE_MATH_SINGLE_DOLLAR)
960                    } else {
961                        self.options.contains(Options::ENABLE_MATH_MULTI_DOLLAR)
962                    };
963                    if !count_enabled {
964                        let mut text_ix = cur_ix;
965                        loop {
966                            self.tree[text_ix].item.body = ItemBody::Text {
967                                backslash_escaped: false,
968                            };
969                            if text_ix == open_end {
970                                break;
971                            }
972                            match self.tree[text_ix].next {
973                                Some(next) => text_ix = next,
974                                None => break,
975                            }
976                        }
977                        prev = cur;
978                        cur = self.tree[cur_ix].next;
979                        continue;
980                    }
981
982                    // Scan forward for a matching run of the same count
983                    let mut scan = self.tree[open_end].next;
984                    let mut close_ix = None;
985                    while let Some(scan_ix) = scan {
986                        if matches!(self.tree[scan_ix].item.body, ItemBody::MaybeMath(..)) {
987                            let mut run = 1usize;
988                            let mut run_end = scan_ix;
989                            let mut peek = self.tree[scan_ix].next;
990                            while let Some(peek_ix) = peek {
991                                if matches!(self.tree[peek_ix].item.body, ItemBody::MaybeMath(..))
992                                    && self.tree[peek_ix].item.start == self.tree[run_end].item.end
993                                {
994                                    run += 1;
995                                    run_end = peek_ix;
996                                    peek = self.tree[peek_ix].next;
997                                } else {
998                                    break;
999                                }
1000                            }
1001                            if run == open_count {
1002                                close_ix = Some(scan_ix);
1003                                break;
1004                            }
1005                            // Skip past this non-matching run
1006                            scan = self.tree[run_end].next;
1007                            continue;
1008                        }
1009                        scan = self.tree[scan_ix].next;
1010                    }
1011
1012                    if let Some(scan_ix) = close_ix {
1013                        self.make_math_span(cur_ix, scan_ix);
1014                    } else {
1015                        let mut fail_ix = cur_ix;
1016                        loop {
1017                            self.tree[fail_ix].item.body = ItemBody::Text {
1018                                backslash_escaped: false,
1019                            };
1020                            if fail_ix == open_end {
1021                                break;
1022                            }
1023                            if let Some(next) = self.tree[fail_ix].next {
1024                                fail_ix = next;
1025                            } else {
1026                                break;
1027                            }
1028                        }
1029                    }
1030                }
1031                ItemBody::MaybeCode(mut search_count, preceded_by_backslash) => {
1032                    if preceded_by_backslash {
1033                        search_count -= 1;
1034                        if search_count == 0 {
1035                            self.tree[cur_ix].item.body = ItemBody::Text {
1036                                backslash_escaped: true,
1037                            };
1038                            prev = cur;
1039                            cur = self.tree[cur_ix].next;
1040                            continue;
1041                        }
1042                    }
1043
1044                    if self.code_delims.is_populated() {
1045                        // we have previously scanned all codeblock delimiters,
1046                        // so we can reuse that work
1047                        if let Some(scan_ix) = self.code_delims.find(cur_ix, search_count) {
1048                            self.make_code_span(cur_ix, scan_ix, preceded_by_backslash);
1049                        } else {
1050                            self.tree[cur_ix].item.body = ItemBody::Text {
1051                                backslash_escaped: preceded_by_backslash,
1052                            };
1053                        }
1054                    } else {
1055                        // we haven't previously scanned all codeblock delimiters,
1056                        // so walk the AST
1057                        let mut scan = if search_count > 0 {
1058                            self.tree[cur_ix].next
1059                        } else {
1060                            None
1061                        };
1062                        while let Some(scan_ix) = scan {
1063                            if let ItemBody::MaybeCode(delim_count, _) =
1064                                self.tree[scan_ix].item.body
1065                            {
1066                                if search_count == delim_count {
1067                                    self.make_code_span(cur_ix, scan_ix, preceded_by_backslash);
1068                                    self.code_delims.clear();
1069                                    break;
1070                                } else {
1071                                    self.code_delims.insert(delim_count, scan_ix);
1072                                }
1073                            }
1074                            scan = self.tree[scan_ix].next;
1075                        }
1076                        if scan.is_none() {
1077                            self.tree[cur_ix].item.body = ItemBody::Text {
1078                                backslash_escaped: preceded_by_backslash,
1079                            };
1080                        }
1081                    }
1082                }
1083                ItemBody::MaybeLinkOpen => {
1084                    self.tree[cur_ix].item.body = ItemBody::Text {
1085                        backslash_escaped: false,
1086                    };
1087                    let link_open_doubled = self.tree[cur_ix]
1088                        .next
1089                        .map(|ix| self.tree[ix].item.body == ItemBody::MaybeLinkOpen)
1090                        .unwrap_or(false);
1091                    if self.options.contains(Options::ENABLE_WIKILINKS) && link_open_doubled {
1092                        self.wikilink_stack.push(LinkStackEl {
1093                            node: cur_ix,
1094                            ty: LinkStackTy::Link,
1095                        });
1096                    }
1097                    self.link_stack.push(LinkStackEl {
1098                        node: cur_ix,
1099                        ty: LinkStackTy::Link,
1100                    });
1101                }
1102                ItemBody::MaybeImage => {
1103                    self.tree[cur_ix].item.body = ItemBody::Text {
1104                        backslash_escaped: false,
1105                    };
1106                    let link_open_doubled = self.tree[cur_ix]
1107                        .next
1108                        .map(|ix| self.tree[ix].item.body == ItemBody::MaybeLinkOpen)
1109                        .unwrap_or(false);
1110                    if self.options.contains(Options::ENABLE_WIKILINKS) && link_open_doubled {
1111                        self.wikilink_stack.push(LinkStackEl {
1112                            node: cur_ix,
1113                            ty: LinkStackTy::Image,
1114                        });
1115                    }
1116                    self.link_stack.push(LinkStackEl {
1117                        node: cur_ix,
1118                        ty: LinkStackTy::Image,
1119                    });
1120                }
1121                ItemBody::MaybeLinkClose(could_be_ref) => {
1122                    self.tree[cur_ix].item.body = ItemBody::Text {
1123                        backslash_escaped: false,
1124                    };
1125                    let tos_link = self.link_stack.pop();
1126                    if self.options.contains(Options::ENABLE_WIKILINKS)
1127                        && self.tree[cur_ix]
1128                            .next
1129                            .map(|ix| {
1130                                matches!(self.tree[ix].item.body, ItemBody::MaybeLinkClose(..))
1131                            })
1132                            .unwrap_or(false)
1133                    {
1134                        if let Some(node) = self.handle_wikilink(block_text, cur_ix, prev) {
1135                            cur = self.tree[node].next;
1136                            continue;
1137                        }
1138                    }
1139                    if let Some(tos) = tos_link {
1140                        // skip rendering if already in a link, unless its an
1141                        // image
1142                        if tos.ty != LinkStackTy::Image
1143                            && matches!(
1144                                self.tree[self.tree.peek_up().unwrap()].item.body,
1145                                ItemBody::Link(..)
1146                            )
1147                        {
1148                            continue;
1149                        }
1150                        if tos.ty == LinkStackTy::Disabled {
1151                            continue;
1152                        }
1153                        let next = self.tree[cur_ix].next;
1154                        if let Some((next_ix, url, title)) =
1155                            self.scan_inline_link(block_text, self.tree[cur_ix].item.end, next)
1156                        {
1157                            let next_node = scan_nodes_to_ix(&self.tree, next, next_ix);
1158                            if let Some(prev_ix) = prev {
1159                                self.tree[prev_ix].next = None;
1160                            }
1161                            cur = Some(tos.node);
1162                            cur_ix = tos.node;
1163                            let link_ix =
1164                                self.allocs
1165                                    .allocate_link(LinkType::Inline, url, title, "".into());
1166                            self.tree[cur_ix].item.body = if tos.ty == LinkStackTy::Image {
1167                                ItemBody::Image(link_ix)
1168                            } else {
1169                                ItemBody::Link(link_ix)
1170                            };
1171                            self.tree[cur_ix].child = self.tree[cur_ix].next;
1172                            self.tree[cur_ix].next = next_node;
1173                            self.tree[cur_ix].item.end = next_ix;
1174                            if let Some(next_node_ix) = next_node {
1175                                let orig_start = self.tree[next_node_ix].item.start;
1176                                let new_start = max(orig_start, next_ix);
1177                                self.tree[next_node_ix].item.start = new_start;
1178                                // If the text node's start was advanced past
1179                                // its original position (the link's URL or
1180                                // title consumed the bytes the escape was
1181                                // attached to), the `backslash_escaped`
1182                                // flag no longer applies — clear it so the
1183                                // arena-build position fixup doesn't extend
1184                                // the text node's source span back over
1185                                // bytes already owned by the link.
1186                                if new_start > orig_start {
1187                                    if let ItemBody::Text { backslash_escaped } =
1188                                        &mut self.tree[next_node_ix].item.body
1189                                    {
1190                                        *backslash_escaped = false;
1191                                    }
1192                                }
1193                            }
1194
1195                            if tos.ty == LinkStackTy::Link {
1196                                self.disable_all_links();
1197                            }
1198                        } else {
1199                            // Footnote-first check: if the first bracket content is
1200                            // `[^X]` where `X` has a matching footnote definition,
1201                            // emit a FootnoteReference regardless of what follows.
1202                            // Otherwise `[^X][Y]` would be resolved as a link whose
1203                            // text happens to start with `^`, which diverges from
1204                            // remark-gfm's two-node parse (footnote + trailing ref).
1205                            let first_bracket_start = self.tree[tos.node].item.start;
1206                            let first_bracket_end = self.tree[cur_ix].item.end;
1207                            let first_bracket_text =
1208                                &self.text[first_bracket_start..first_bracket_end];
1209                            if let Some((_, ReferenceLabel::Footnote(footlabel))) =
1210                                scan_link_label(&self.tree, first_bracket_text, self.options)
1211                            {
1212                                if self.allocs.footdefs.contains(&footlabel) {
1213                                    let footref = self.allocs.allocate_cow(footlabel);
1214                                    if let Some(def) = self
1215                                        .allocs
1216                                        .footdefs
1217                                        .get_mut(self.allocs.cows[footref.0].to_owned())
1218                                    {
1219                                        def.use_count += 1;
1220                                    }
1221                                    let footnote_ix = if tos.ty == LinkStackTy::Image {
1222                                        self.tree[tos.node].next = Some(cur_ix);
1223                                        self.tree[tos.node].child = None;
1224                                        self.tree[tos.node].item.body =
1225                                            ItemBody::SynthesizeChar('!');
1226                                        self.tree[cur_ix].item.start =
1227                                            self.tree[tos.node].item.start + 1;
1228                                        self.tree[tos.node].item.end =
1229                                            self.tree[tos.node].item.start + 1;
1230                                        cur_ix
1231                                    } else {
1232                                        tos.node
1233                                    };
1234                                    self.tree[footnote_ix].next = next;
1235                                    self.tree[footnote_ix].child = None;
1236                                    self.tree[footnote_ix].item.body =
1237                                        ItemBody::FootnoteReference(footref);
1238                                    self.tree[footnote_ix].item.end = first_bracket_end;
1239                                    prev = Some(footnote_ix);
1240                                    cur = next;
1241                                    self.link_stack.clear();
1242                                    continue;
1243                                }
1244                            }
1245                            // ok, so its not an inline link. maybe it is a reference
1246                            // to a defined link?
1247                            let scan_result =
1248                                scan_reference(&self.tree, block_text, next, self.options);
1249                            let (node_after_link, link_type) = match scan_result {
1250                                // [label][reference]
1251                                RefScan::LinkLabel(_, end_ix) => {
1252                                    // Toggle reference viability of the last closing bracket,
1253                                    // so that we can skip it on future iterations in case
1254                                    // it fails in this one. In particular, we won't call
1255                                    // the broken link callback twice on one reference.
1256                                    let reference_close_node = if let Some(node) =
1257                                        scan_nodes_to_ix(&self.tree, next, end_ix - 1)
1258                                    {
1259                                        node
1260                                    } else {
1261                                        continue;
1262                                    };
1263                                    self.tree[reference_close_node].item.body =
1264                                        ItemBody::MaybeLinkClose(false);
1265                                    let next_node = self.tree[reference_close_node].next;
1266
1267                                    (next_node, LinkType::Reference)
1268                                }
1269                                // [reference][]
1270                                RefScan::Collapsed(next_node) => {
1271                                    // This reference has already been tried, and it's not
1272                                    // valid. Skip it.
1273                                    if !could_be_ref {
1274                                        continue;
1275                                    }
1276                                    (next_node, LinkType::Collapsed)
1277                                }
1278                                // [X][^Y] — full-reference form with a footnote-shaped
1279                                // second label. Per CommonMark the full-ref has to
1280                                // resolve to a link definition, which `^Y` never will;
1281                                // shortcut fallback is NOT tried. Leave both brackets
1282                                // literal and let `[^Y]` be parsed as a footnote on
1283                                // its own MaybeLinkClose iteration.
1284                                RefScan::UnexpectedFootnote => continue,
1285                                // `[text][invalid_label]` — the `[` after `[text]`
1286                                // started a label slot but it wasn't a valid label
1287                                // (e.g. unescaped `[` inside). Spec: a shortcut link
1288                                // can't be followed by `[`, so don't fall back to
1289                                // shortcut. Leave both brackets literal.
1290                                RefScan::FailedInvalidLabel => continue,
1291                                // [shortcut]
1292                                //
1293                                // [shortcut]: /blah
1294                                RefScan::Failed => {
1295                                    if !could_be_ref {
1296                                        continue;
1297                                    }
1298                                    (next, LinkType::Shortcut)
1299                                }
1300                            };
1301
1302                            // FIXME: references and labels are mixed in the naming of variables
1303                            // below. Disambiguate!
1304
1305                            // (label, source_ix end)
1306                            let label: Option<(ReferenceLabel<'input>, usize)> = match scan_result {
1307                                RefScan::LinkLabel(l, end_ix) => {
1308                                    Some((ReferenceLabel::Link(l), end_ix))
1309                                }
1310                                RefScan::Collapsed(..)
1311                                | RefScan::Failed
1312                                | RefScan::FailedInvalidLabel
1313                                | RefScan::UnexpectedFootnote => {
1314                                    // No label? maybe it is a shortcut reference
1315                                    let label_start = self.tree[tos.node].item.end - 1;
1316                                    let label_end = self.tree[cur_ix].item.end;
1317                                    scan_link_label(
1318                                        &self.tree,
1319                                        &self.text[label_start..label_end],
1320                                        self.options,
1321                                    )
1322                                    .map(|(ix, label)| (label, label_start + ix))
1323                                    .filter(|(_, end)| *end == label_end)
1324                                }
1325                            };
1326
1327                            let id = match &label {
1328                                Some(
1329                                    (ReferenceLabel::Link(l), _) | (ReferenceLabel::Footnote(l), _),
1330                                ) => l.clone(),
1331                                None => "".into(),
1332                            };
1333
1334                            // see if it's a footnote reference
1335                            if let Some((ReferenceLabel::Footnote(l), end)) = label {
1336                                let footref = self.allocs.allocate_cow(l);
1337                                if let Some(def) = self
1338                                    .allocs
1339                                    .footdefs
1340                                    .get_mut(self.allocs.cows[footref.0].to_owned())
1341                                {
1342                                    def.use_count += 1;
1343                                }
1344                                if self.allocs.footdefs.contains(&self.allocs.cows[footref.0]) {
1345                                    // If this came from a MaybeImage, then the `!` prefix
1346                                    // isn't part of the footnote reference.
1347                                    let footnote_ix = if tos.ty == LinkStackTy::Image {
1348                                        self.tree[tos.node].next = Some(cur_ix);
1349                                        self.tree[tos.node].child = None;
1350                                        self.tree[tos.node].item.body =
1351                                            ItemBody::SynthesizeChar('!');
1352                                        self.tree[cur_ix].item.start =
1353                                            self.tree[tos.node].item.start + 1;
1354                                        self.tree[tos.node].item.end =
1355                                            self.tree[tos.node].item.start + 1;
1356                                        cur_ix
1357                                    } else {
1358                                        tos.node
1359                                    };
1360                                    // use `next` instead of `node_after_link` because
1361                                    // node_after_link is calculated for a [collapsed][] link,
1362                                    // which footnotes don't support.
1363                                    self.tree[footnote_ix].next = next;
1364                                    self.tree[footnote_ix].child = None;
1365                                    self.tree[footnote_ix].item.body =
1366                                        ItemBody::FootnoteReference(footref);
1367                                    self.tree[footnote_ix].item.end = end;
1368                                    prev = Some(footnote_ix);
1369                                    cur = next;
1370                                    self.link_stack.clear();
1371                                    continue;
1372                                }
1373                            } else if let Some((ReferenceLabel::Link(link_label), end)) = label {
1374                                if let Some((def_link_type, url, title)) = self
1375                                    .fetch_link_type_url_title(
1376                                        link_label,
1377                                        (self.tree[tos.node].item.start)..end,
1378                                        link_type,
1379                                        callbacks,
1380                                    )
1381                                {
1382                                    let link_ix =
1383                                        self.allocs.allocate_link(def_link_type, url, title, id);
1384                                    self.tree[tos.node].item.body = if tos.ty == LinkStackTy::Image
1385                                    {
1386                                        ItemBody::Image(link_ix)
1387                                    } else {
1388                                        ItemBody::Link(link_ix)
1389                                    };
1390                                    let label_node = self.tree[tos.node].next;
1391
1392                                    // lets do some tree surgery to add the link to the tree
1393                                    // 1st: skip the label node and close node
1394                                    self.tree[tos.node].next = node_after_link;
1395
1396                                    // then, if it exists, add the label node as a child to the link node
1397                                    if label_node != cur {
1398                                        self.tree[tos.node].child = label_node;
1399
1400                                        // finally: disconnect list of children
1401                                        if let Some(prev_ix) = prev {
1402                                            self.tree[prev_ix].next = None;
1403                                        }
1404                                    }
1405
1406                                    self.tree[tos.node].item.end = end;
1407
1408                                    // set up cur so next node will be node_after_link
1409                                    cur = Some(tos.node);
1410                                    cur_ix = tos.node;
1411
1412                                    if tos.ty == LinkStackTy::Link {
1413                                        self.disable_all_links();
1414                                    }
1415                                }
1416                            }
1417                        }
1418                    }
1419                }
1420                _ => {}
1421            }
1422            prev = cur;
1423            cur = self.tree[cur_ix].next;
1424        }
1425        self.link_stack.clear();
1426        self.wikilink_stack.clear();
1427        self.code_delims.clear();
1428        self.math_delims.clear();
1429    }
1430
1431    /// Handles a wikilink.
1432    ///
1433    /// This function may bail early in case the link is malformed, so this
1434    /// acts as a control flow guard. Returns the link node if a wikilink was
1435    /// found and created.
1436    fn handle_wikilink(
1437        &mut self,
1438        block_text: &'input str,
1439        cur_ix: TreeIndex,
1440        prev: Option<TreeIndex>,
1441    ) -> Option<TreeIndex> {
1442        let next_ix = self.tree[cur_ix].next.unwrap();
1443        // this is a wikilink closing delim, try popping from
1444        // the wikilink stack
1445        if let Some(tos) = self.wikilink_stack.pop() {
1446            if tos.ty == LinkStackTy::Disabled {
1447                return None;
1448            }
1449            // fetches the beginning of the wikilink body
1450            let Some(body_node) = self.tree[tos.node].next.and_then(|ix| self.tree[ix].next) else {
1451                // skip if no next node exists, like at end of input
1452                return None;
1453            };
1454            let start_ix = self.tree[body_node].item.start;
1455            let end_ix = self.tree[cur_ix].item.start;
1456            let wikilink = match scan_wikilink_pipe(
1457                block_text,
1458                start_ix, // bounded by closing tag
1459                end_ix - start_ix,
1460            ) {
1461                Some((rest, wikitext)) => {
1462                    // bail early if the wikiname would be empty
1463                    if wikitext.is_empty() {
1464                        return None;
1465                    }
1466                    // [[WikiName|rest]]
1467                    let body_node = scan_nodes_to_ix(&self.tree, Some(body_node), rest);
1468                    if let Some(body_node) = body_node {
1469                        // break node so passes can actually format
1470                        // the display text
1471                        self.tree[body_node].item.start = rest;
1472                        Some((true, body_node, wikitext))
1473                    } else {
1474                        None
1475                    }
1476                }
1477                None => {
1478                    let wikitext = &block_text[start_ix..end_ix];
1479                    // bail early if the wikiname would be empty
1480                    if wikitext.is_empty() {
1481                        return None;
1482                    }
1483                    let body_node = self.tree.create_node(Item {
1484                        start: start_ix,
1485                        end: end_ix,
1486                        body: ItemBody::Text {
1487                            backslash_escaped: false,
1488                        },
1489                    });
1490                    Some((false, body_node, wikitext))
1491                }
1492            };
1493
1494            if let Some((has_pothole, body_node, wikiname)) = wikilink {
1495                let link_ix = self.allocs.allocate_link(
1496                    LinkType::WikiLink { has_pothole },
1497                    wikiname.into(),
1498                    "".into(),
1499                    "".into(),
1500                );
1501                if let Some(prev_ix) = prev {
1502                    self.tree[prev_ix].next = None;
1503                }
1504                if tos.ty == LinkStackTy::Image {
1505                    self.tree[tos.node].item.body = ItemBody::Image(link_ix);
1506                } else {
1507                    self.tree[tos.node].item.body = ItemBody::Link(link_ix);
1508                }
1509                self.tree[tos.node].child = Some(body_node);
1510                self.tree[tos.node].next = self.tree[next_ix].next;
1511                self.tree[tos.node].item.end = end_ix + 2;
1512                self.disable_all_links();
1513                return Some(tos.node);
1514            }
1515        }
1516
1517        None
1518    }
1519
1520    fn handle_emphasis_in_scope(&mut self, start: Option<TreeIndex>) {
1521        let mut prev = None;
1522        let mut prev_ix: TreeIndex;
1523        let mut cur = start;
1524
1525        let mut single_quote_open: Option<TreeIndex> = None;
1526        let mut double_quote_open: bool = false;
1527
1528        while let Some(mut cur_ix) = cur {
1529            match self.tree[cur_ix].item.body {
1530                ItemBody::MaybeEmphasis(mut count, can_open, can_close) => {
1531                    let run_length = count;
1532                    let c = self.text.as_bytes()[self.tree[cur_ix].item.start];
1533                    let both = can_open && can_close;
1534                    // Defer `~`/`^` resolution to the post-pass.
1535                    // Without lookahead, the single-pass can't tell whether an
1536                    // earlier `*`/`_` opener will pair (in which case the
1537                    // `~`/`^` should match inside the future emphasis) or
1538                    // remain unmatched (in which case `~`/`^` would cross the
1539                    // boundary). micromark handles this with a separate
1540                    // strikethrough resolve phase that runs after emphasis.
1541                    if c == b'~' || c == b'^' {
1542                        prev_ix = cur_ix + count - 1;
1543                        prev = Some(prev_ix);
1544                        cur = self.tree[prev_ix].next;
1545                        continue;
1546                    }
1547                    if can_close {
1548                        while let Some(el) =
1549                            self.inline_stack
1550                                .find_match(&mut self.tree, c, run_length, count, both)
1551                        {
1552                            // have a match!
1553                            if let Some(prev_ix) = prev {
1554                                self.tree[prev_ix].next = None;
1555                            }
1556                            // Consume at most two markers per inner-loop pass
1557                            // (one `<strong>`/`<em>` per match), matching
1558                            // micromark's `use = open>1 && close>1 ? 2 : 1`.
1559                            // The outer `while let` then drives nesting by
1560                            // re-running `find_match` with the leftover
1561                            // counts, which is how `***foo***` becomes
1562                            // `<em><strong>foo</strong></em>` instead of one
1563                            // flat match.
1564                            let match_count = min(2, min(count, el.count));
1565                            // start, end are tree node indices
1566                            let mut end = cur_ix - 1;
1567                            let mut start = el.start + el.count;
1568
1569                            // work from the inside out
1570                            while start > el.start + el.count - match_count {
1571                                let inc = if start > el.start + el.count - match_count + 1 {
1572                                    2
1573                                } else {
1574                                    1
1575                                };
1576                                let ty = if c == b'~' {
1577                                    if inc == 2 {
1578                                        if self.options.contains(Options::ENABLE_STRIKETHROUGH) {
1579                                            ItemBody::Strikethrough
1580                                        } else {
1581                                            ItemBody::Text {
1582                                                backslash_escaped: false,
1583                                            }
1584                                        }
1585                                    } else if self.options.contains(Options::ENABLE_SUBSCRIPT) {
1586                                        ItemBody::Subscript
1587                                    } else if self.options.contains(Options::ENABLE_STRIKETHROUGH) {
1588                                        ItemBody::Strikethrough
1589                                    } else {
1590                                        ItemBody::Text {
1591                                            backslash_escaped: false,
1592                                        }
1593                                    }
1594                                } else if c == b'^' {
1595                                    if self.options.contains(Options::ENABLE_SUPERSCRIPT) {
1596                                        ItemBody::Superscript
1597                                    } else {
1598                                        ItemBody::Text {
1599                                            backslash_escaped: false,
1600                                        }
1601                                    }
1602                                } else if inc == 2 {
1603                                    ItemBody::Strong
1604                                } else {
1605                                    ItemBody::Emphasis
1606                                };
1607
1608                                let root = start - inc;
1609                                end = end + inc;
1610                                self.tree[root].item.body = ty;
1611                                self.tree[root].item.end = self.tree[end].item.end;
1612                                self.tree[root].child = Some(start);
1613                                self.tree[root].next = None;
1614                                start = root;
1615                            }
1616
1617                            // set next for top most emph level
1618                            prev_ix = el.start + el.count - match_count;
1619                            prev = Some(prev_ix);
1620                            cur = self.tree[cur_ix + match_count - 1].next;
1621                            self.tree[prev_ix].next = cur;
1622
1623                            if el.count > match_count {
1624                                self.inline_stack.push(InlineEl {
1625                                    start: el.start,
1626                                    count: el.count - match_count,
1627                                    run_length: el.run_length,
1628                                    c: el.c,
1629                                    both: el.both,
1630                                })
1631                            }
1632                            count -= match_count;
1633                            if count > 0 {
1634                                cur_ix = cur.unwrap();
1635                            } else {
1636                                break;
1637                            }
1638                        }
1639                    }
1640                    if count > 0 {
1641                        if can_open {
1642                            self.inline_stack.push(InlineEl {
1643                                start: cur_ix,
1644                                run_length,
1645                                count,
1646                                c,
1647                                both,
1648                            });
1649                        } else {
1650                            for i in 0..count {
1651                                self.tree[cur_ix + i].item.body = ItemBody::Text {
1652                                    backslash_escaped: false,
1653                                };
1654                            }
1655                        }
1656                        prev_ix = cur_ix + count - 1;
1657                        prev = Some(prev_ix);
1658                        cur = self.tree[prev_ix].next;
1659                    }
1660                }
1661                ItemBody::MaybeSmartQuote(c, can_open, can_close) => {
1662                    self.tree[cur_ix].item.body = match c {
1663                        b'\'' => {
1664                            if let (Some(open_ix), true) = (single_quote_open, can_close) {
1665                                self.tree[open_ix].item.body = ItemBody::SynthesizeChar('‘');
1666                                single_quote_open = None;
1667                            } else if can_open {
1668                                single_quote_open = Some(cur_ix);
1669                            }
1670                            ItemBody::SynthesizeChar('’')
1671                        }
1672                        _ /* double quote */ => {
1673                            if can_close && double_quote_open {
1674                                double_quote_open = false;
1675                                ItemBody::SynthesizeChar('”')
1676                            } else {
1677                                if can_open && !double_quote_open {
1678                                    double_quote_open = true;
1679                                }
1680                                ItemBody::SynthesizeChar('“')
1681                            }
1682                        }
1683                    };
1684                    prev = cur;
1685                    cur = self.tree[cur_ix].next;
1686                }
1687                ItemBody::HardBreak(true) => {
1688                    if self.tree[cur_ix].next.is_none() {
1689                        self.tree[cur_ix].item.body = ItemBody::SynthesizeChar('\\');
1690                    }
1691                    prev = cur;
1692                    cur = self.tree[cur_ix].next;
1693                }
1694                _ => {
1695                    prev = cur;
1696                    cur = self.tree[cur_ix].next;
1697                }
1698            }
1699        }
1700        self.inline_stack.pop_all(&mut self.tree);
1701    }
1702
1703    /// Second-pass strikethrough/sub/sup resolution. Walks the tree
1704    /// hierarchically and resolves `~`/`^` MaybeEmphasis tokens within
1705    /// each inline scope independently. This matches micromark's
1706    /// post-emphasis resolve phase: a `~..~` pair only forms when both
1707    /// ends lie within the same enclosing scope (root, emphasis, link,
1708    /// etc.). Multi-char `~~` strikethrough was already resolved in
1709    /// the main pass.
1710    fn handle_tildes_carets_pass(&mut self) {
1711        let start = self.tree.cur();
1712        self.resolve_tildes_carets_in_scope(start);
1713    }
1714    fn resolve_tildes_carets_in_scope(&mut self, start: Option<TreeIndex>) {
1715        let mut stack: Vec<InlineEl> = Vec::new();
1716        let mut cur = start;
1717        let mut prev: Option<TreeIndex> = None;
1718        while let Some(mut cur_ix) = cur {
1719            match self.tree[cur_ix].item.body {
1720                ItemBody::MaybeEmphasis(count, can_open, can_close) => {
1721                    let c = self.text.as_bytes()[self.tree[cur_ix].item.start];
1722                    if c != b'~' && c != b'^' {
1723                        prev = Some(cur_ix);
1724                        cur = self.tree[cur_ix].next;
1725                        continue;
1726                    }
1727                    let run_length = count;
1728                    let mut remaining = count;
1729                    if can_close {
1730                        while remaining > 0 {
1731                            let res = stack
1732                                .iter()
1733                                .enumerate()
1734                                .rfind(|(_, el)| el.c == c && el.run_length == run_length);
1735                            let Some((matching_ix, matching_el)) = res else {
1736                                break;
1737                            };
1738                            let matching_el = *matching_el;
1739                            if let Some(prev_ix) = prev {
1740                                self.tree[prev_ix].next = None;
1741                            }
1742                            // Convert intermediate `~`/`^` openers above the
1743                            // match to text — they failed to find a pair.
1744                            for el in &stack[(matching_ix + 1)..] {
1745                                for i in 0..el.count {
1746                                    self.tree[el.start + i].item.body = ItemBody::Text {
1747                                        backslash_escaped: false,
1748                                    };
1749                                }
1750                            }
1751                            stack.truncate(matching_ix);
1752                            let match_count =
1753                                core::cmp::min(2, core::cmp::min(remaining, matching_el.count));
1754                            let mut end = cur_ix - 1;
1755                            let mut sub_start = matching_el.start + matching_el.count;
1756                            while sub_start > matching_el.start + matching_el.count - match_count {
1757                                let inc = if sub_start
1758                                    > matching_el.start + matching_el.count - match_count + 1
1759                                {
1760                                    2
1761                                } else {
1762                                    1
1763                                };
1764                                let ty = if c == b'~' {
1765                                    if inc == 2 {
1766                                        if self.options.contains(Options::ENABLE_STRIKETHROUGH) {
1767                                            ItemBody::Strikethrough
1768                                        } else {
1769                                            ItemBody::Text {
1770                                                backslash_escaped: false,
1771                                            }
1772                                        }
1773                                    } else if self.options.contains(Options::ENABLE_SUBSCRIPT) {
1774                                        ItemBody::Subscript
1775                                    } else if self.options.contains(Options::ENABLE_STRIKETHROUGH) {
1776                                        ItemBody::Strikethrough
1777                                    } else {
1778                                        ItemBody::Text {
1779                                            backslash_escaped: false,
1780                                        }
1781                                    }
1782                                } else if self.options.contains(Options::ENABLE_SUPERSCRIPT) {
1783                                    ItemBody::Superscript
1784                                } else {
1785                                    ItemBody::Text {
1786                                        backslash_escaped: false,
1787                                    }
1788                                };
1789                                let root = sub_start - inc;
1790                                end = end + inc;
1791                                self.tree[root].item.body = ty;
1792                                self.tree[root].item.end = self.tree[end].item.end;
1793                                self.tree[root].child = Some(sub_start);
1794                                self.tree[root].next = None;
1795                                sub_start = root;
1796                            }
1797                            let new_prev_ix = matching_el.start + matching_el.count - match_count;
1798                            let new_cur = self.tree[cur_ix + match_count - 1].next;
1799                            self.tree[new_prev_ix].next = new_cur;
1800                            prev = Some(new_prev_ix);
1801                            if matching_el.count > match_count {
1802                                stack.push(InlineEl {
1803                                    start: matching_el.start,
1804                                    count: matching_el.count - match_count,
1805                                    run_length: matching_el.run_length,
1806                                    c: matching_el.c,
1807                                    both: matching_el.both,
1808                                });
1809                            }
1810                            remaining -= match_count;
1811                            if remaining > 0 {
1812                                let Some(next_cur) = new_cur else { break };
1813                                cur_ix = next_cur;
1814                            } else {
1815                                break;
1816                            }
1817                        }
1818                    }
1819                    if remaining > 0 {
1820                        if can_open {
1821                            stack.push(InlineEl {
1822                                start: cur_ix,
1823                                count: remaining,
1824                                run_length,
1825                                c,
1826                                both: can_open && can_close,
1827                            });
1828                        } else {
1829                            for i in 0..remaining {
1830                                self.tree[cur_ix + i].item.body = ItemBody::Text {
1831                                    backslash_escaped: false,
1832                                };
1833                            }
1834                        }
1835                        let prev_ix = cur_ix + remaining - 1;
1836                        prev = Some(prev_ix);
1837                        cur = self.tree[prev_ix].next;
1838                    } else {
1839                        cur = self.tree[prev.unwrap()].next;
1840                    }
1841                    continue;
1842                }
1843                ItemBody::Emphasis
1844                | ItemBody::Strong
1845                | ItemBody::Strikethrough
1846                | ItemBody::Subscript
1847                | ItemBody::Superscript
1848                | ItemBody::Link(_)
1849                | ItemBody::Image(_) => {
1850                    let child = self.tree[cur_ix].child;
1851                    self.resolve_tildes_carets_in_scope(child);
1852                }
1853                _ => {}
1854            }
1855            prev = Some(cur_ix);
1856            cur = self.tree[cur_ix].next;
1857        }
1858        // End of scope: any remaining openers couldn't find a closer.
1859        for el in stack {
1860            for i in 0..el.count {
1861                self.tree[el.start + i].item.body = ItemBody::Text {
1862                    backslash_escaped: false,
1863                };
1864            }
1865        }
1866    }
1867
1868    fn disable_all_links(&mut self) {
1869        self.link_stack.disable_all_links();
1870        self.wikilink_stack.disable_all_links();
1871    }
1872
1873    /// Returns next byte index, url and title.
1874    fn scan_inline_link(
1875        &self,
1876        underlying: &'input str,
1877        mut ix: usize,
1878        node: Option<TreeIndex>,
1879    ) -> Option<(usize, CowStr<'input>, CowStr<'input>)> {
1880        if underlying.as_bytes().get(ix) != Some(&b'(') {
1881            return None;
1882        }
1883        ix += 1;
1884
1885        let scan_separator = |ix: &mut usize| {
1886            *ix += scan_while(&underlying.as_bytes()[*ix..], is_ascii_whitespace_no_nl);
1887            if let Some(bl) = scan_eol(&underlying.as_bytes()[*ix..]) {
1888                *ix += bl;
1889                *ix += skip_container_prefixes(
1890                    &self.tree,
1891                    &underlying.as_bytes()[*ix..],
1892                    self.options,
1893                );
1894            }
1895            *ix += scan_while(&underlying.as_bytes()[*ix..], is_ascii_whitespace_no_nl);
1896        };
1897
1898        scan_separator(&mut ix);
1899
1900        let (dest_length, dest) = scan_link_dest(underlying, ix, LINK_MAX_NESTED_PARENS)?;
1901        let dest = unescape(dest, self.tree.is_in_table());
1902        ix += dest_length;
1903
1904        scan_separator(&mut ix);
1905
1906        let title = if let Some((bytes_scanned, t)) = self.scan_link_title(underlying, ix, node) {
1907            ix += bytes_scanned;
1908            scan_separator(&mut ix);
1909            t
1910        } else {
1911            "".into()
1912        };
1913        if underlying.as_bytes().get(ix) != Some(&b')') {
1914            return None;
1915        }
1916        ix += 1;
1917
1918        Some((ix, dest, title))
1919    }
1920
1921    // returns (bytes scanned, title cow)
1922    fn scan_link_title(
1923        &self,
1924        text: &'input str,
1925        start_ix: usize,
1926        node: Option<TreeIndex>,
1927    ) -> Option<(usize, CowStr<'input>)> {
1928        let bytes = text.as_bytes();
1929        let open = match bytes.get(start_ix) {
1930            Some(b @ b'\'') | Some(b @ b'\"') | Some(b @ b'(') => *b,
1931            _ => return None,
1932        };
1933        let close = if open == b'(' { b')' } else { open };
1934
1935        let mut title = String::new();
1936        let mut mark = start_ix + 1;
1937        let mut i = start_ix + 1;
1938
1939        while i < bytes.len() {
1940            let c = bytes[i];
1941
1942            if c == close {
1943                let cow = if title.is_empty() {
1944                    (i - start_ix + 1, text[mark..i].into())
1945                } else {
1946                    title.push_str(&text[mark..i]);
1947                    (i - start_ix + 1, title.into())
1948                };
1949
1950                return Some(cow);
1951            }
1952            if c == open {
1953                return None;
1954            }
1955
1956            if c == b'\n' || c == b'\r' {
1957                if let Some(node_ix) = scan_nodes_to_ix(&self.tree, node, i + 1) {
1958                    if self.tree[node_ix].item.start > i {
1959                        title.push_str(&text[mark..i]);
1960                        title.push('\n');
1961                        i = self.tree[node_ix].item.start;
1962                        mark = i;
1963                        continue;
1964                    }
1965                }
1966            }
1967            if c == b'&' {
1968                if let (n, Some(value)) = scan_entity(&bytes[i..]) {
1969                    title.push_str(&text[mark..i]);
1970                    title.push_str(&value);
1971                    i += n;
1972                    mark = i;
1973                    continue;
1974                }
1975            }
1976            if self.tree.is_in_table()
1977                && c == b'\\'
1978                && i + 2 < bytes.len()
1979                && bytes[i + 1] == b'\\'
1980                && bytes[i + 2] == b'|'
1981            {
1982                // this runs if there are an even number of pipes in a table
1983                // if it's odd, then it gets parsed as normal
1984                title.push_str(&text[mark..i]);
1985                i += 2;
1986                mark = i;
1987            }
1988            if c == b'\\' && i + 1 < bytes.len() && is_ascii_punctuation(bytes[i + 1]) {
1989                title.push_str(&text[mark..i]);
1990                i += 1;
1991                mark = i;
1992            }
1993
1994            i += 1;
1995        }
1996
1997        None
1998    }
1999
2000    fn make_math_span(&mut self, open: TreeIndex, close: TreeIndex) {
2001        // Find the end of the opening run of consecutive $ tokens
2002        let mut open_end = open;
2003        {
2004            let mut peek = self.tree[open].next;
2005            while let Some(peek_ix) = peek {
2006                if matches!(self.tree[peek_ix].item.body, ItemBody::MaybeMath(..))
2007                    && self.tree[peek_ix].item.start == self.tree[open_end].item.end
2008                    && peek_ix != close
2009                {
2010                    open_end = peek_ix;
2011                    peek = self.tree[peek_ix].next;
2012                } else {
2013                    break;
2014                }
2015            }
2016        }
2017        // Find the end of the closing run
2018        let mut close_end = close;
2019        {
2020            let mut peek = self.tree[close].next;
2021            while let Some(peek_ix) = peek {
2022                if matches!(self.tree[peek_ix].item.body, ItemBody::MaybeMath(..))
2023                    && self.tree[peek_ix].item.start == self.tree[close_end].item.end
2024                {
2025                    close_end = peek_ix;
2026                    peek = self.tree[peek_ix].next;
2027                } else {
2028                    break;
2029                }
2030            }
2031        }
2032
2033        let span_start = self.tree[open_end].item.end;
2034        let span_end = self.tree[close].item.start;
2035
2036        if span_start > span_end {
2037            self.tree[open].item.body = ItemBody::Text {
2038                backslash_escaped: false,
2039            };
2040            return;
2041        }
2042
2043        let spanned_text = &self.text[span_start..span_end];
2044        let spanned_bytes = spanned_text.as_bytes();
2045        let mut buf: Option<String> = None;
2046
2047        let mut start_ix = 0;
2048        let mut ix = 0;
2049        while ix < spanned_bytes.len() {
2050            let c = spanned_bytes[ix];
2051            if c == b'\r' || c == b'\n' {
2052                ix += 1;
2053                let buf = buf.get_or_insert_with(|| String::with_capacity(spanned_bytes.len()));
2054                buf.push_str(&spanned_text[start_ix..ix]);
2055                // Use the full source bytes from this position (not just
2056                // the span slice) so scan_containers can see the real
2057                // line content past the closing backtick. With only the
2058                // span slice, a partial-indent line followed by buffer
2059                // end (e.g. `    ` + closing) was misread as EOL by
2060                // is_at_eol — letting the ListItem container "match" the
2061                // 4 spaces of a 5-indent item and over-strip the code
2062                // span's trailing whitespace.
2063                let from = span_start + ix;
2064                let (scanned, leftover) = skip_container_prefixes_with_remaining(
2065                    &self.tree,
2066                    &self.text.as_bytes()[from..],
2067                    self.options,
2068                );
2069                let scanned = scanned.min(spanned_bytes.len() - ix);
2070                ix += scanned;
2071                start_ix = ix;
2072                // Preserve leftover virtual columns from a tab the
2073                // container only partially consumed (e.g. `\t` in a 2-col
2074                // listitem leaves 2 spaces of content).
2075                for _ in 0..leftover {
2076                    buf.push(' ');
2077                }
2078            } else if c == b'\\'
2079                && spanned_bytes.get(ix + 1) == Some(&b'|')
2080                && self.tree.is_in_table()
2081            {
2082                let buf = buf.get_or_insert_with(|| String::with_capacity(spanned_bytes.len()));
2083                buf.push_str(&spanned_text[start_ix..ix]);
2084                buf.push('|');
2085                ix += 2;
2086                start_ix = ix;
2087            } else {
2088                ix += 1;
2089            }
2090        }
2091
2092        let (opening, closing, all_spaces) = {
2093            let s = if let Some(buf) = &mut buf {
2094                buf.push_str(&spanned_text[start_ix..]);
2095                &buf[..]
2096            } else {
2097                spanned_text
2098            };
2099            (
2100                matches!(s.as_bytes().first(), Some(b' ' | b'\n')),
2101                matches!(s.as_bytes().last(), Some(b' ' | b'\n')),
2102                s.bytes().all(|b| b == b' ' || b == b'\n'),
2103            )
2104        };
2105
2106        let cow: CowStr<'input> = if !all_spaces && opening && closing {
2107            if let Some(mut buf) = buf {
2108                if !buf.is_empty() {
2109                    buf.remove(0);
2110                    buf.pop();
2111                }
2112                buf.into()
2113            } else {
2114                spanned_text[1..(spanned_text.len() - 1).max(1)].into()
2115            }
2116        } else if let Some(buf) = buf {
2117            buf.into()
2118        } else {
2119            spanned_text.into()
2120        };
2121
2122        self.tree[open].item.body = ItemBody::Math(self.allocs.allocate_cow(cow), false);
2123        self.tree[open].item.end = self.tree[close_end].item.end;
2124        self.tree[open].next = self.tree[close_end].next;
2125    }
2126
2127    /// Make a code span.
2128    ///
2129    /// Both `open` and `close` are matching MaybeCode items.
2130    fn make_code_span(&mut self, open: TreeIndex, close: TreeIndex, preceding_backslash: bool) {
2131        let span_start = self.tree[open].item.end;
2132        let span_end = self.tree[close].item.start;
2133        let mut buf: Option<String> = None;
2134
2135        let spanned_text = &self.text[span_start..span_end];
2136        let spanned_bytes = spanned_text.as_bytes();
2137        let mut start_ix = 0;
2138        let mut ix = 0;
2139        while ix < spanned_bytes.len() {
2140            let c = spanned_bytes[ix];
2141            if c == b'\r' || c == b'\n' {
2142                let buf = buf.get_or_insert_with(|| String::with_capacity(spanned_bytes.len()));
2143                buf.push_str(&spanned_text[start_ix..ix]);
2144                buf.push('\n');
2145                ix += 1;
2146                if c == b'\r' && spanned_bytes.get(ix) == Some(&b'\n') {
2147                    ix += 1;
2148                }
2149                // Use the full source bytes from this position (not just
2150                // the span slice) so scan_containers can see the real
2151                // line content past the closing backtick. With only the
2152                // span slice, a partial-indent line followed by buffer
2153                // end (e.g. `    ` + closing) was misread as EOL by
2154                // is_at_eol — letting the ListItem container "match" the
2155                // 4 spaces of a 5-indent item and over-strip the code
2156                // span's trailing whitespace.
2157                let from = span_start + ix;
2158                let (scanned, leftover) = skip_container_prefixes_with_remaining(
2159                    &self.tree,
2160                    &self.text.as_bytes()[from..],
2161                    self.options,
2162                );
2163                let scanned = scanned.min(spanned_bytes.len() - ix);
2164                ix += scanned;
2165                start_ix = ix;
2166                // Preserve leftover virtual columns from a tab the
2167                // container only partially consumed (e.g. `\t` in a 2-col
2168                // listitem leaves 2 spaces of content).
2169                for _ in 0..leftover {
2170                    buf.push(' ');
2171                }
2172            } else if c == b'\\'
2173                && spanned_bytes.get(ix + 1) == Some(&b'|')
2174                && self.tree.is_in_table()
2175            {
2176                let buf = buf.get_or_insert_with(|| String::with_capacity(spanned_bytes.len()));
2177                buf.push_str(&spanned_text[start_ix..ix]);
2178                buf.push('|');
2179                ix += 2;
2180                start_ix = ix;
2181            } else {
2182                ix += 1;
2183            }
2184        }
2185
2186        let (opening, closing, all_spaces) = {
2187            let s = if let Some(buf) = &mut buf {
2188                buf.push_str(&spanned_text[start_ix..]);
2189                &buf[..]
2190            } else {
2191                spanned_text
2192            };
2193            (
2194                matches!(s.as_bytes().first(), Some(b' ' | b'\n')),
2195                matches!(s.as_bytes().last(), Some(b' ' | b'\n')),
2196                s.bytes().all(|b| b == b' ' || b == b'\n'),
2197            )
2198        };
2199
2200        let cow: CowStr<'input> = if !all_spaces && opening && closing {
2201            if let Some(mut buf) = buf {
2202                if !buf.is_empty() {
2203                    buf.remove(0);
2204                    buf.pop();
2205                }
2206                buf.into()
2207            } else {
2208                spanned_text[1..(spanned_text.len() - 1).max(1)].into()
2209            }
2210        } else if let Some(buf) = buf {
2211            buf.into()
2212        } else {
2213            spanned_text.into()
2214        };
2215
2216        if preceding_backslash {
2217            self.tree[open].item.body = ItemBody::Text {
2218                backslash_escaped: true,
2219            };
2220            self.tree[open].item.end = self.tree[open].item.start + 1;
2221            self.tree[open].next = Some(close);
2222            self.tree[close].item.body = ItemBody::Code(self.allocs.allocate_cow(cow));
2223            self.tree[close].item.start = self.tree[open].item.start + 1;
2224        } else {
2225            self.tree[open].item.body = ItemBody::Code(self.allocs.allocate_cow(cow));
2226            self.tree[open].item.end = self.tree[close].item.end;
2227            self.tree[open].next = self.tree[close].next;
2228        }
2229
2230        // MDX: errors recorded in pass 1 for `{` inside what turned out to be a
2231        // code span are false positives — the `{` is literal text.
2232        if !self.mdx_errors.is_empty() {
2233            self.mdx_errors
2234                .retain(|(offset, _)| *offset < span_start || *offset >= span_end);
2235        }
2236    }
2237
2238    /// On success, returns a buffer containing the inline html and byte offset.
2239    /// When no bytes were skipped, the buffer will be empty and the html can be
2240    /// represented as a subslice of the input string.
2241    fn scan_inline_html(&mut self, bytes: &[u8], ix: usize) -> Option<(Vec<u8>, usize)> {
2242        let c = *bytes.get(ix)?;
2243        if c == b'!' {
2244            Some((
2245                vec![],
2246                scan_inline_html_comment(bytes, ix + 1, &mut self.html_scan_guard)?,
2247            ))
2248        } else if c == b'?' {
2249            Some((
2250                vec![],
2251                scan_inline_html_processing(bytes, ix + 1, &mut self.html_scan_guard)?,
2252            ))
2253        } else {
2254            let (span, i) = scan_html_block_inner(
2255                // Subtract 1 to include the < character
2256                &bytes[(ix - 1)..],
2257                Some(&|bytes| skip_container_prefixes(&self.tree, bytes, self.options)),
2258            )?;
2259            Some((span, i + ix - 1))
2260        }
2261    }
2262}
2263
2264/// Returns number of containers scanned.
2265pub(crate) fn scan_containers(
2266    tree: &Tree<Item>,
2267    line_start: &mut LineStart<'_>,
2268    options: Options,
2269) -> usize {
2270    let mut i = 0;
2271    for &node_ix in tree.walk_spine() {
2272        match tree[node_ix].item.body {
2273            ItemBody::BlockQuote(..) => {
2274                let save = line_start.clone();
2275                // In MDX mode indented code blocks are disabled, so the
2276                // ≤3-space cap on blockquote prefix indent doesn't apply —
2277                // tab- or 4+-space-indented `>` should still continue the
2278                // blockquote (matches micromark + remark-mdx).
2279                if options.contains(Options::ENABLE_MDX) {
2280                    line_start.scan_all_space();
2281                } else {
2282                    let _ = line_start.scan_space(3);
2283                }
2284                if !line_start.scan_blockquote_marker() {
2285                    *line_start = save;
2286                    break;
2287                }
2288            }
2289            ItemBody::ListItem(indent, _) => {
2290                let save = line_start.clone();
2291                if !line_start.scan_space(indent) && !line_start.is_at_eol() {
2292                    *line_start = save;
2293                    break;
2294                }
2295            }
2296            ItemBody::DefinitionListDefinition(indent) => {
2297                let save = line_start.clone();
2298                if !line_start.scan_space(indent) && !line_start.is_at_eol() {
2299                    *line_start = save;
2300                    break;
2301                }
2302            }
2303            ItemBody::FootnoteDefinition(..) if options.contains(Options::ENABLE_FOOTNOTES) => {
2304                let save = line_start.clone();
2305                if !line_start.scan_space(4) && !line_start.is_at_eol() {
2306                    *line_start = save;
2307                    break;
2308                }
2309            }
2310            _ => (),
2311        }
2312        i += 1;
2313    }
2314    i
2315}
2316
2317pub(crate) fn skip_container_prefixes(tree: &Tree<Item>, bytes: &[u8], options: Options) -> usize {
2318    let mut line_start = LineStart::new(bytes);
2319    let _ = scan_containers(tree, &mut line_start, options);
2320    line_start.bytes_scanned()
2321}
2322
2323/// Like `skip_container_prefixes`, but also returns the leftover virtual
2324/// space columns from tab-stop expansion past the last consumed container
2325/// prefix. Used by math-span content extraction to faithfully reproduce
2326/// indentation that the container "ate" only partially — e.g. a single
2327/// `\t` (4 cols) in a list item with 2-col content indent leaves 2
2328/// trailing spaces of content.
2329fn skip_container_prefixes_with_remaining(
2330    tree: &Tree<Item>,
2331    bytes: &[u8],
2332    options: Options,
2333) -> (usize, usize) {
2334    let mut line_start = LineStart::new(bytes);
2335    let _ = scan_containers(tree, &mut line_start, options);
2336    (line_start.bytes_scanned(), line_start.remaining_space())
2337}
2338
2339impl Tree<Item> {
2340    pub(crate) fn append_text(&mut self, start: usize, end: usize, backslash_escaped: bool) {
2341        if end > start {
2342            if let Some(ix) = self.cur() {
2343                if matches!(self[ix].item.body, ItemBody::Text { .. }) && self[ix].item.end == start
2344                {
2345                    self[ix].item.end = end;
2346                    return;
2347                }
2348            }
2349            self.append(Item {
2350                start,
2351                end,
2352                body: ItemBody::Text { backslash_escaped },
2353            });
2354        }
2355    }
2356    /// Returns true if the current node is inside a table.
2357    ///
2358    /// If `cur` is an ItemBody::Table, it would return false,
2359    /// but since the `TableRow` and `TableHead` and `TableCell`
2360    /// are children of the table, anything doing inline parsing
2361    /// doesn't need to care about that.
2362    pub(crate) fn is_in_table(&self) -> bool {
2363        fn might_be_in_table(item: &Item) -> bool {
2364            item.body.is_inline()
2365                || matches!(item.body, |ItemBody::TableHead| ItemBody::TableRow
2366                    | ItemBody::TableCell)
2367        }
2368        for &ix in self.walk_spine().rev() {
2369            if matches!(self[ix].item.body, ItemBody::Table(_)) {
2370                return true;
2371            }
2372            if !might_be_in_table(&self[ix].item) {
2373                return false;
2374            }
2375        }
2376        false
2377    }
2378}
2379
2380#[derive(Copy, Clone, Debug)]
2381struct InlineEl {
2382    /// offset of tree node
2383    start: TreeIndex,
2384    /// number of delimiters available for matching
2385    count: usize,
2386    /// length of the run that these delimiters came from
2387    run_length: usize,
2388    /// b'*', b'_', or b'~'
2389    c: u8,
2390    /// can both open and close
2391    both: bool,
2392}
2393
2394#[derive(Debug, Clone, Default)]
2395struct InlineStack {
2396    stack: Vec<InlineEl>,
2397    // Lower bounds for matching indices in the stack. For example
2398    // a strikethrough delimiter will never match with any element
2399    // in the stack with index smaller than
2400    // `lower_bounds[InlineStack::TILDES]`.
2401    lower_bounds: [usize; 10],
2402}
2403
2404impl InlineStack {
2405    /// These are indices into the lower bounds array.
2406    /// Not both refers to the property that the delimiter can not both
2407    /// be opener as a closer.
2408    const UNDERSCORE_NOT_BOTH: usize = 0;
2409    const ASTERISK_NOT_BOTH: usize = 1;
2410    const ASTERISK_BASE: usize = 2;
2411    const TILDES: usize = 5;
2412    const UNDERSCORE_BASE: usize = 6;
2413    const CIRCUMFLEXES: usize = 9;
2414
2415    fn pop_all(&mut self, tree: &mut Tree<Item>) {
2416        for el in self.stack.drain(..) {
2417            for i in 0..el.count {
2418                tree[el.start + i].item.body = ItemBody::Text {
2419                    backslash_escaped: false,
2420                };
2421            }
2422        }
2423        self.lower_bounds = [0; 10];
2424    }
2425
2426    fn get_lowerbound(&self, c: u8, count: usize, both: bool) -> usize {
2427        if c == b'_' {
2428            let mod3_lower = self.lower_bounds[InlineStack::UNDERSCORE_BASE + count % 3];
2429            if both {
2430                mod3_lower
2431            } else {
2432                min(
2433                    mod3_lower,
2434                    self.lower_bounds[InlineStack::UNDERSCORE_NOT_BOTH],
2435                )
2436            }
2437        } else if c == b'*' {
2438            let mod3_lower = self.lower_bounds[InlineStack::ASTERISK_BASE + count % 3];
2439            if both {
2440                mod3_lower
2441            } else {
2442                min(
2443                    mod3_lower,
2444                    self.lower_bounds[InlineStack::ASTERISK_NOT_BOTH],
2445                )
2446            }
2447        } else if c == b'^' {
2448            self.lower_bounds[InlineStack::CIRCUMFLEXES]
2449        } else {
2450            self.lower_bounds[InlineStack::TILDES]
2451        }
2452    }
2453
2454    fn set_lowerbound(&mut self, c: u8, count: usize, both: bool, new_bound: usize) {
2455        if c == b'_' {
2456            if both {
2457                self.lower_bounds[InlineStack::UNDERSCORE_BASE + count % 3] = new_bound;
2458            } else {
2459                self.lower_bounds[InlineStack::UNDERSCORE_NOT_BOTH] = new_bound;
2460            }
2461        } else if c == b'*' {
2462            self.lower_bounds[InlineStack::ASTERISK_BASE + count % 3] = new_bound;
2463            if !both {
2464                self.lower_bounds[InlineStack::ASTERISK_NOT_BOTH] = new_bound;
2465            }
2466        } else if c == b'^' {
2467            self.lower_bounds[InlineStack::CIRCUMFLEXES] = new_bound;
2468        } else {
2469            self.lower_bounds[InlineStack::TILDES] = new_bound;
2470        }
2471    }
2472
2473    fn truncate(&mut self, new_bound: usize) {
2474        self.stack.truncate(new_bound);
2475        for lower_bound in &mut self.lower_bounds {
2476            if *lower_bound > new_bound {
2477                *lower_bound = new_bound;
2478            }
2479        }
2480    }
2481
2482    /// Find an opener that can match `c` of original `run_length`.
2483    ///
2484    /// `current_count` is the **remaining** length of the closer being
2485    /// processed (chars not yet consumed by earlier inner-loop matches).
2486    /// We use it for CommonMark rule 9 (the "mod 3" both-side rule) so
2487    /// that after a partial consumption like `3*foo *bar**` the outer `*`
2488    /// can pair with what's left of the `**` — micromark re-evaluates the
2489    /// rule using only the *current* run lengths on each side.
2490    ///
2491    /// `run_length` is the original closer length; it stays stable across
2492    /// inner-loop iterations and is what the lower-bounds optimisation and
2493    /// the strict tilde/caret length check key off.
2494    fn find_match(
2495        &mut self,
2496        tree: &mut Tree<Item>,
2497        c: u8,
2498        run_length: usize,
2499        current_count: usize,
2500        both: bool,
2501    ) -> Option<InlineEl> {
2502        // Use current_count (the post-partial-consumption remaining length)
2503        // for the rule-9 mod-3 lowerbound key, not run_length. After an
2504        // inner-loop pass consumes part of the closer, the remaining
2505        // length sits in a different mod-3 bucket and may now satisfy
2506        // rule 9 with openers the earlier (longer) attempt failed
2507        // against. Keying on run_length would carry over the earlier
2508        // failure into the new bucket and block valid matches like the
2509        // outer `*` in `cz*x` `*foo***bar***baz` (closer `***` partial
2510        // remainder 1 should still reach the opener at offset 2).
2511        let lowerbound = min(
2512            self.stack.len(),
2513            self.get_lowerbound(c, current_count, both),
2514        );
2515        let res = self.stack[lowerbound..]
2516            .iter()
2517            .cloned()
2518            .enumerate()
2519            .rfind(|(_, el)| {
2520                if (c == b'~' || c == b'^') && run_length != el.run_length {
2521                    return false;
2522                }
2523                // Rule 9 (mod-3): for `*`/`_`, the openers on the stack are
2524                // checked against the *current* lengths — `el.count` reflects
2525                // remaining-after-partial-consumption when an opener has been
2526                // re-pushed, and `current_count` is the remaining closer.
2527                el.c == c
2528                    && (!both && !el.both
2529                        || !(current_count + el.count).is_multiple_of(3)
2530                        || current_count.is_multiple_of(3))
2531            });
2532
2533        if let Some((matching_ix, matching_el)) = res {
2534            let matching_ix = matching_ix + lowerbound;
2535            for el in &self.stack[(matching_ix + 1)..] {
2536                for i in 0..el.count {
2537                    tree[el.start + i].item.body = ItemBody::Text {
2538                        backslash_escaped: false,
2539                    };
2540                }
2541            }
2542            self.truncate(matching_ix);
2543            Some(matching_el)
2544        } else {
2545            // For `*`/`_`, the lower-bound optimisation is safe because their
2546            // matching rule (CM "rule of three") is monotonic across future
2547            // closers with the same count. Tildes/carets match strictly by
2548            // equal run-length, so a failure at run-length 2 must not close
2549            // the door on a later run-length 1 closer matching an earlier
2550            // run-length 1 opener still on the stack. Key the bound by
2551            // `current_count` (the post-partial-consumption length) so it
2552            // applies only to closers whose remaining bucket actually
2553            // shares this failure mode.
2554            if c != b'~' && c != b'^' {
2555                self.set_lowerbound(c, current_count, both, self.stack.len());
2556            }
2557            None
2558        }
2559    }
2560
2561    fn trim_lower_bound(&mut self, ix: usize) {
2562        self.lower_bounds[ix] = self.lower_bounds[ix].min(self.stack.len());
2563    }
2564
2565    fn push(&mut self, el: InlineEl) {
2566        if el.c == b'~' {
2567            self.trim_lower_bound(InlineStack::TILDES);
2568        } else if el.c == b'^' {
2569            self.trim_lower_bound(InlineStack::CIRCUMFLEXES);
2570        }
2571        self.stack.push(el)
2572    }
2573}
2574
2575#[derive(Debug, Clone)]
2576enum RefScan<'a> {
2577    // label, source ix of label end
2578    LinkLabel(CowStr<'a>, usize),
2579    // contains next node index
2580    Collapsed(Option<TreeIndex>),
2581    UnexpectedFootnote,
2582    Failed,
2583    // `[text][...]` where `[...]` started but is an invalid label
2584    // (e.g. contains unescaped `[`). The shortcut form for `[text]` is
2585    // suppressed because the spec says a shortcut link must NOT be
2586    // followed by `[` — even if that `[` doesn't form a valid label.
2587    FailedInvalidLabel,
2588}
2589
2590/// Skips forward within a block to a node which spans (ends inclusive) the given
2591/// index into the source.
2592fn scan_nodes_to_ix(
2593    tree: &Tree<Item>,
2594    mut node: Option<TreeIndex>,
2595    ix: usize,
2596) -> Option<TreeIndex> {
2597    while let Some(node_ix) = node {
2598        if tree[node_ix].item.end <= ix {
2599            node = tree[node_ix].next;
2600        } else {
2601            break;
2602        }
2603    }
2604    node
2605}
2606
2607/// Scans an inline link label, which cannot be interrupted.
2608/// Returns number of bytes (including brackets) and label on success.
2609fn scan_link_label<'text>(
2610    tree: &Tree<Item>,
2611    text: &'text str,
2612    options: Options,
2613) -> Option<(usize, ReferenceLabel<'text>)> {
2614    let bytes = text.as_bytes();
2615    if bytes.len() < 2 || bytes[0] != b'[' {
2616        return None;
2617    }
2618    let linebreak_handler = |bytes: &[u8]| Some(skip_container_prefixes(tree, bytes, options));
2619    if options.contains(Options::ENABLE_FOOTNOTES)
2620        && b'^' == bytes[1]
2621        && bytes.get(2) != Some(&b']')
2622    {
2623        // GFM footnote labels don't wrap across line breaks.
2624        let linebreak_handler: &dyn Fn(&[u8]) -> Option<usize> = &|_| None;
2625        if let Some((byte_index, cow)) =
2626            scan_link_label_rest(&text[2..], linebreak_handler, tree.is_in_table())
2627        {
2628            return Some((byte_index + 2, ReferenceLabel::Footnote(cow)));
2629        }
2630    }
2631    let (byte_index, cow) =
2632        scan_link_label_rest(&text[1..], &linebreak_handler, tree.is_in_table())?;
2633    Some((byte_index + 1, ReferenceLabel::Link(cow)))
2634}
2635
2636fn scan_reference<'b>(
2637    tree: &Tree<Item>,
2638    text: &'b str,
2639    cur: Option<TreeIndex>,
2640    options: Options,
2641) -> RefScan<'b> {
2642    let cur_ix = match cur {
2643        None => return RefScan::Failed,
2644        Some(cur_ix) => cur_ix,
2645    };
2646    let start = tree[cur_ix].item.start;
2647    let tail = &text.as_bytes()[start..];
2648
2649    // If the `[` opening the candidate label was escaped in source
2650    // (preceded by an odd run of backslashes), it's a literal `[` and
2651    // can't start a reference label. Without this check the label
2652    // scanner walks raw source, which doesn't know that pulldown-cmark
2653    // already absorbed the `\` into a backslash-escape token, and it
2654    // would falsely consume `\[foo]` as `[foo]`.
2655    if tail.first() == Some(&b'[') && start > 0 {
2656        let src = text.as_bytes();
2657        let mut backslashes = 0usize;
2658        let mut j = start;
2659        while j > 0 && src[j - 1] == b'\\' {
2660            backslashes += 1;
2661            j -= 1;
2662        }
2663        if backslashes % 2 == 1 {
2664            return RefScan::Failed;
2665        }
2666    }
2667
2668    if tail.starts_with(b"[]") {
2669        // The trailing `]` of the collapsed reference must already exist as a
2670        // tree node — pulldown-cmark emits each bracket as its own item, and
2671        // we only reach here when `tail` already contains `]`. Defensive
2672        // fallback to `Failed` if that invariant is somehow broken.
2673        let Some(closing_node) = tree[cur_ix].next else {
2674            return RefScan::Failed;
2675        };
2676        RefScan::Collapsed(tree[closing_node].next)
2677    } else {
2678        let label = scan_link_label(tree, &text[start..], options);
2679        match label {
2680            Some((ix, ReferenceLabel::Link(label))) => RefScan::LinkLabel(label, start + ix),
2681            Some((_ix, ReferenceLabel::Footnote(_label))) => RefScan::UnexpectedFootnote,
2682            None => {
2683                // If `[text]` is followed by `[` that looked like a label
2684                // opener, the shortcut form is suppressed even though the
2685                // label parse failed (CommonMark requires shortcut links
2686                // not be followed by `[`).
2687                if tail.starts_with(b"[") {
2688                    RefScan::FailedInvalidLabel
2689                } else {
2690                    RefScan::Failed
2691                }
2692            }
2693        }
2694    }
2695}
2696
2697#[derive(Clone, Default)]
2698struct LinkStack {
2699    inner: Vec<LinkStackEl>,
2700    disabled_ix: usize,
2701}
2702
2703impl LinkStack {
2704    fn push(&mut self, el: LinkStackEl) {
2705        self.inner.push(el);
2706    }
2707
2708    fn pop(&mut self) -> Option<LinkStackEl> {
2709        let el = self.inner.pop();
2710        self.disabled_ix = core::cmp::min(self.disabled_ix, self.inner.len());
2711        el
2712    }
2713
2714    fn clear(&mut self) {
2715        self.inner.clear();
2716        self.disabled_ix = 0;
2717    }
2718
2719    fn disable_all_links(&mut self) {
2720        for el in &mut self.inner[self.disabled_ix..] {
2721            if el.ty == LinkStackTy::Link {
2722                el.ty = LinkStackTy::Disabled;
2723            }
2724        }
2725        self.disabled_ix = self.inner.len();
2726    }
2727}
2728
2729#[derive(Clone, Debug)]
2730struct LinkStackEl {
2731    node: TreeIndex,
2732    ty: LinkStackTy,
2733}
2734
2735#[derive(PartialEq, Clone, Debug)]
2736enum LinkStackTy {
2737    Link,
2738    Image,
2739    Disabled,
2740}
2741
2742/// Contains the destination URL, title and source span of a reference definition.
2743#[derive(Clone, Debug)]
2744pub struct LinkDef<'a> {
2745    pub dest: CowStr<'a>,
2746    pub title: Option<CowStr<'a>>,
2747    pub span: Range<usize>,
2748}
2749
2750impl<'a> LinkDef<'a> {
2751    pub fn into_static(self) -> LinkDef<'static> {
2752        LinkDef {
2753            dest: self.dest.into_static(),
2754            title: self.title.map(|s| s.into_static()),
2755            span: self.span,
2756        }
2757    }
2758}
2759
2760/// Contains the destination URL, title and source span of a reference definition.
2761#[derive(Clone, Debug)]
2762pub struct FootnoteDef {
2763    pub use_count: usize,
2764}
2765
2766/// Tracks tree indices of code span delimiters of each length. It should prevent
2767/// quadratic scanning behaviours by providing (amortized) constant time lookups.
2768struct CodeDelims {
2769    inner: FxHashMap<usize, VecDeque<TreeIndex>>,
2770    seen_first: bool,
2771}
2772
2773impl CodeDelims {
2774    fn new() -> Self {
2775        Self {
2776            inner: Default::default(),
2777            seen_first: false,
2778        }
2779    }
2780
2781    fn insert(&mut self, count: usize, ix: TreeIndex) {
2782        if self.seen_first {
2783            self.inner.entry(count).or_default().push_back(ix);
2784        } else {
2785            // Skip the first insert, since that delimiter will always
2786            // be an opener and not a closer.
2787            self.seen_first = true;
2788        }
2789    }
2790
2791    fn is_populated(&self) -> bool {
2792        !self.inner.is_empty()
2793    }
2794
2795    fn find(&mut self, open_ix: TreeIndex, count: usize) -> Option<TreeIndex> {
2796        while let Some(ix) = self.inner.get_mut(&count)?.pop_front() {
2797            if ix > open_ix {
2798                return Some(ix);
2799            }
2800        }
2801        None
2802    }
2803
2804    fn clear(&mut self) {
2805        self.inner.clear();
2806        self.seen_first = false;
2807    }
2808}
2809
2810/// Tracks brace contexts and delimiter length for math delimiters.
2811/// Provides amortized constant-time lookups.
2812struct MathDelims {
2813    inner: FxHashMap<u8, VecDeque<(TreeIndex, bool, bool)>>,
2814}
2815
2816impl MathDelims {
2817    fn new() -> Self {
2818        Self {
2819            inner: Default::default(),
2820        }
2821    }
2822
2823    fn clear(&mut self) {
2824        self.inner.clear();
2825    }
2826}
2827
2828#[derive(Copy, Clone, PartialEq, Eq, Debug)]
2829pub(crate) struct LinkIndex(usize);
2830
2831#[derive(Copy, Clone, PartialEq, Eq, Debug)]
2832pub(crate) struct CowIndex(usize);
2833
2834#[derive(Copy, Clone, PartialEq, Eq, Debug)]
2835pub(crate) struct AlignmentIndex(usize);
2836
2837#[derive(Copy, Clone, PartialEq, Eq, Debug)]
2838pub(crate) struct HeadingIndex(NonZeroUsize);
2839
2840#[cfg(feature = "mdx")]
2841#[derive(Copy, Clone, PartialEq, Eq, Debug)]
2842pub(crate) struct JsxElementIndex(usize);
2843
2844#[derive(Copy, Clone, PartialEq, Eq, Debug)]
2845pub(crate) struct DirectiveIndex(usize);
2846
2847/// A parsed JSX attribute.
2848#[cfg(feature = "mdx")]
2849#[derive(Debug, Clone)]
2850pub(crate) enum JsxAttr<'a> {
2851    Boolean(CowStr<'a>),
2852    Literal(CowStr<'a>, CowStr<'a>),
2853    Expression(CowStr<'a>, CowStr<'a>),
2854    Spread(CowStr<'a>),
2855}
2856
2857#[cfg(feature = "mdx")]
2858impl<'a> JsxAttr<'a> {
2859    pub fn into_static(self) -> JsxAttr<'static> {
2860        match self {
2861            JsxAttr::Boolean(n) => JsxAttr::Boolean(n.into_static()),
2862            JsxAttr::Literal(n, v) => JsxAttr::Literal(n.into_static(), v.into_static()),
2863            JsxAttr::Expression(n, v) => JsxAttr::Expression(n.into_static(), v.into_static()),
2864            JsxAttr::Spread(v) => JsxAttr::Spread(v.into_static()),
2865        }
2866    }
2867}
2868
2869/// Pre-parsed JSX element data (name + attributes + tag classification).
2870#[cfg(feature = "mdx")]
2871#[derive(Debug, Clone)]
2872pub(crate) struct JsxElementData<'a> {
2873    pub name: CowStr<'a>,
2874    pub attrs: Vec<JsxAttr<'a>>,
2875    pub raw: CowStr<'a>,
2876    pub is_closing: bool,
2877    pub is_self_closing: bool,
2878}
2879
2880#[cfg(feature = "mdx")]
2881impl<'a> JsxElementData<'a> {
2882    pub fn into_static(self) -> JsxElementData<'static> {
2883        JsxElementData {
2884            name: self.name.into_static(),
2885            attrs: self.attrs.into_iter().map(|a| a.into_static()).collect(),
2886            raw: self.raw.into_static(),
2887            is_closing: self.is_closing,
2888            is_self_closing: self.is_self_closing,
2889        }
2890    }
2891}
2892
2893#[derive(Debug, Clone)]
2894pub(crate) struct DirectiveAttrData<'a> {
2895    pub name: CowStr<'a>,
2896    pub attributes: Vec<(CowStr<'a>, CowStr<'a>)>,
2897    pub label_start: usize,
2898    pub label_end: usize,
2899    /// Cols of leading whitespace before `:::` on the opening line, after
2900    /// outer-container prefix stripping. Mirrors micromark-extension-directive's
2901    /// `initialSize`, which controls how much the directive body's per-line
2902    /// linePrefix is stripped (up to `initialSize + 1` cols). Only meaningful
2903    /// for container directives — leaf/text directives leave this 0.
2904    pub initial_size: u8,
2905}
2906
2907#[derive(Clone)]
2908pub(crate) struct Allocations<'a> {
2909    pub refdefs: RefDefs<'a>,
2910    /// Every refdef occurrence in source order, including duplicates that
2911    /// `refdefs` drops (it's a map and only keeps the first per label, since
2912    /// resolution picks the first match per CommonMark). Used to emit every
2913    /// definition as its own mdast `definition` node.
2914    pub refdefs_all: Vec<(LinkLabel<'a>, LinkDef<'a>)>,
2915    pub footdefs: FootnoteDefs<'a>,
2916    links: Vec<(LinkType, CowStr<'a>, CowStr<'a>, CowStr<'a>)>,
2917    cows: Vec<CowStr<'a>>,
2918    alignments: Vec<Vec<Alignment>>,
2919    headings: Vec<HeadingAttributes<'a>>,
2920    #[cfg(feature = "mdx")]
2921    jsx_elements: Vec<JsxElementData<'a>>,
2922    directives: Vec<DirectiveAttrData<'a>>,
2923}
2924
2925/// Used by the heading attributes extension.
2926#[derive(Clone)]
2927pub(crate) struct HeadingAttributes<'a> {
2928    pub id: Option<CowStr<'a>>,
2929    pub classes: Vec<CowStr<'a>>,
2930    pub attrs: Vec<(CowStr<'a>, Option<CowStr<'a>>)>,
2931}
2932
2933/// Keeps track of the reference definitions defined in the document.
2934#[derive(Clone, Default, Debug)]
2935pub struct RefDefs<'input>(pub(crate) FxHashMap<LinkLabel<'input>, LinkDef<'input>>);
2936
2937/// Keeps track of the footnote definitions defined in the document.
2938#[derive(Clone, Default, Debug)]
2939pub struct FootnoteDefs<'input>(pub(crate) FxHashMap<FootnoteLabel<'input>, FootnoteDef>);
2940
2941impl<'input, 'b, 's> RefDefs<'input>
2942where
2943    's: 'b,
2944{
2945    /// Performs a lookup on reference label using unicode case folding.
2946    pub fn get(&'s self, key: &'b str) -> Option<&'b LinkDef<'input>> {
2947        self.0.get(&UniCase::new(key.into()))
2948    }
2949
2950    /// Provides an iterator over all the document's reference definitions.
2951    pub fn iter(&'s self) -> impl Iterator<Item = (&'s str, &'s LinkDef<'input>)> {
2952        self.0.iter().map(|(k, v)| (k.as_ref(), v))
2953    }
2954}
2955
2956impl<'input, 'b, 's> FootnoteDefs<'input>
2957where
2958    's: 'b,
2959{
2960    /// Performs a lookup on reference label using unicode case folding.
2961    pub fn contains(&'s self, key: &'b str) -> bool {
2962        self.0.contains_key(&UniCase::new(key.into()))
2963    }
2964    /// Performs a lookup on reference label using unicode case folding.
2965    pub fn get_mut(&'s mut self, key: CowStr<'input>) -> Option<&'s mut FootnoteDef> {
2966        self.0.get_mut(&UniCase::new(key))
2967    }
2968}
2969
2970impl<'a> Allocations<'a> {
2971    pub fn new() -> Self {
2972        Self {
2973            refdefs: RefDefs::default(),
2974            refdefs_all: Vec::new(),
2975            footdefs: FootnoteDefs::default(),
2976            links: Vec::with_capacity(128),
2977            cows: Vec::new(),
2978            alignments: Vec::new(),
2979            headings: Vec::new(),
2980            #[cfg(feature = "mdx")]
2981            jsx_elements: Vec::new(),
2982            directives: Vec::new(),
2983        }
2984    }
2985
2986    pub fn allocate_cow(&mut self, cow: CowStr<'a>) -> CowIndex {
2987        let ix = self.cows.len();
2988        self.cows.push(cow);
2989        CowIndex(ix)
2990    }
2991
2992    pub fn allocate_link(
2993        &mut self,
2994        ty: LinkType,
2995        url: CowStr<'a>,
2996        title: CowStr<'a>,
2997        id: CowStr<'a>,
2998    ) -> LinkIndex {
2999        let ix = self.links.len();
3000        self.links.push((ty, url, title, id));
3001        LinkIndex(ix)
3002    }
3003
3004    pub fn allocate_alignment(&mut self, alignment: Vec<Alignment>) -> AlignmentIndex {
3005        let ix = self.alignments.len();
3006        self.alignments.push(alignment);
3007        AlignmentIndex(ix)
3008    }
3009
3010    pub fn allocate_heading(&mut self, attrs: HeadingAttributes<'a>) -> HeadingIndex {
3011        let ix = self.headings.len();
3012        self.headings.push(attrs);
3013        // This won't panic. `self.headings.len()` can't be `usize::MAX` since
3014        // such a long Vec cannot fit in memory.
3015        let ix_nonzero = NonZeroUsize::new(ix.wrapping_add(1)).expect("too many headings");
3016        HeadingIndex(ix_nonzero)
3017    }
3018
3019    pub fn take_cow(&mut self, ix: CowIndex) -> CowStr<'a> {
3020        core::mem::replace(&mut self.cows[ix.0], "".into())
3021    }
3022
3023    pub fn take_link(&mut self, ix: LinkIndex) -> (LinkType, CowStr<'a>, CowStr<'a>, CowStr<'a>) {
3024        let default_link = (LinkType::ShortcutUnknown, "".into(), "".into(), "".into());
3025        core::mem::replace(&mut self.links[ix.0], default_link)
3026    }
3027
3028    pub fn take_alignment(&mut self, ix: AlignmentIndex) -> Vec<Alignment> {
3029        core::mem::take(&mut self.alignments[ix.0])
3030    }
3031
3032    #[cfg(feature = "mdx")]
3033    pub fn allocate_jsx_element(&mut self, data: JsxElementData<'a>) -> JsxElementIndex {
3034        let ix = self.jsx_elements.len();
3035        self.jsx_elements.push(data);
3036        JsxElementIndex(ix)
3037    }
3038
3039    pub fn allocate_directive(&mut self, data: DirectiveAttrData<'a>) -> DirectiveIndex {
3040        let ix = self.directives.len();
3041        self.directives.push(data);
3042        DirectiveIndex(ix)
3043    }
3044
3045    pub fn take_directive(&mut self, ix: DirectiveIndex) -> DirectiveAttrData<'a> {
3046        core::mem::replace(
3047            &mut self.directives[ix.0],
3048            DirectiveAttrData {
3049                name: "".into(),
3050                attributes: Vec::new(),
3051                label_start: 0,
3052                label_end: 0,
3053                initial_size: 0,
3054            },
3055        )
3056    }
3057
3058    pub fn directive_ref(&self, ix: DirectiveIndex) -> &DirectiveAttrData<'a> {
3059        &self.directives[ix.0]
3060    }
3061
3062    #[cfg(feature = "mdx")]
3063    pub fn take_jsx_element(&mut self, ix: JsxElementIndex) -> JsxElementData<'a> {
3064        core::mem::replace(
3065            &mut self.jsx_elements[ix.0],
3066            JsxElementData {
3067                name: "".into(),
3068                attrs: Vec::new(),
3069                raw: "".into(),
3070                is_closing: false,
3071                is_self_closing: false,
3072            },
3073        )
3074    }
3075}
3076
3077impl<'a> Index<CowIndex> for Allocations<'a> {
3078    type Output = CowStr<'a>;
3079
3080    fn index(&self, ix: CowIndex) -> &Self::Output {
3081        self.cows.index(ix.0)
3082    }
3083}
3084
3085impl<'a> Index<LinkIndex> for Allocations<'a> {
3086    type Output = (LinkType, CowStr<'a>, CowStr<'a>, CowStr<'a>);
3087
3088    fn index(&self, ix: LinkIndex) -> &Self::Output {
3089        self.links.index(ix.0)
3090    }
3091}
3092
3093impl<'a> Index<AlignmentIndex> for Allocations<'a> {
3094    type Output = Vec<Alignment>;
3095
3096    fn index(&self, ix: AlignmentIndex) -> &Self::Output {
3097        self.alignments.index(ix.0)
3098    }
3099}
3100
3101impl<'a> Index<HeadingIndex> for Allocations<'a> {
3102    type Output = HeadingAttributes<'a>;
3103
3104    fn index(&self, ix: HeadingIndex) -> &Self::Output {
3105        self.headings.index(ix.0.get() - 1)
3106    }
3107}
3108
3109/// A struct containing information on the reachability of certain inline HTML
3110/// elements. In particular, for cdata elements (`<![CDATA[`), processing
3111/// elements (`<?`) and declarations (`<!DECLARATION`). The respectives usizes
3112/// represent the indices before which a scan will always fail and can hence
3113/// be skipped.
3114#[derive(Clone, Default)]
3115pub(crate) struct HtmlScanGuard {
3116    pub cdata: usize,
3117    pub processing: usize,
3118    pub declaration: usize,
3119    pub comment: usize,
3120}
3121
3122/// Trait to customize [`Parser`] behavior with callbacks. See [`Parser::new_with_callbacks`].
3123///
3124/// All methods have a default implementation, so you can choose which ones to override.
3125pub trait ParserCallbacks<'input> {
3126    /// Potentially provide a custom definition for a broken link.
3127    ///
3128    /// In case the parser encounters any potential links that have a broken
3129    /// reference (e.g `[foo]` when there is no `[foo]: ` entry at the bottom)
3130    /// this callback will be called with information about the reference,
3131    /// and the returned pair will be used as the link URL and title if it is not
3132    /// `None`.
3133    fn handle_broken_link(
3134        &mut self,
3135        #[allow(unused_variables)] link: BrokenLink<'input>,
3136    ) -> Option<(CowStr<'input>, CowStr<'input>)> {
3137        None
3138    }
3139}
3140
3141/// Wrapper to implement [`ParserCallbacks::handle_broken_link`] with a closure.
3142///
3143/// Used internally by [`Parser::new_with_broken_link_callback`].
3144#[allow(missing_debug_implementations)]
3145pub struct BrokenLinkCallback<F>(Option<F>);
3146
3147impl<'input, F> ParserCallbacks<'input> for BrokenLinkCallback<F>
3148where
3149    F: FnMut(BrokenLink<'input>) -> Option<(CowStr<'input>, CowStr<'input>)>,
3150{
3151    fn handle_broken_link(
3152        &mut self,
3153        link: BrokenLink<'input>,
3154    ) -> Option<(CowStr<'input>, CowStr<'input>)> {
3155        self.0.as_mut().and_then(|cb| cb(link))
3156    }
3157}
3158
3159impl<'input> ParserCallbacks<'input> for Box<dyn ParserCallbacks<'input>> {
3160    fn handle_broken_link(
3161        &mut self,
3162        link: BrokenLink<'input>,
3163    ) -> Option<(CowStr<'input>, CowStr<'input>)> {
3164        (**self).handle_broken_link(link)
3165    }
3166}
3167
3168/// [Parser] callbacks that do nothing.
3169///
3170/// Used when no custom callbacks are provided.
3171#[allow(missing_debug_implementations)]
3172pub struct DefaultParserCallbacks;
3173
3174impl<'input> ParserCallbacks<'input> for DefaultParserCallbacks {}
3175
3176/// Markdown event and source range iterator.
3177///
3178/// Generates tuples where the first element is the markdown event and the second
3179/// is a the corresponding range in the source string.
3180///
3181/// Constructed from a `Parser` using its
3182/// [`into_offset_iter`](struct.Parser.html#method.into_offset_iter) method.
3183#[derive(Debug)]
3184pub struct OffsetIter<'a, CB> {
3185    parser: Parser<'a, CB>,
3186}
3187
3188impl<'a, CB: ParserCallbacks<'a>> OffsetIter<'a, CB> {
3189    /// Returns a reference to the internal reference definition tracker.
3190    pub fn reference_definitions(&self) -> &RefDefs<'_> {
3191        self.parser.reference_definitions()
3192    }
3193
3194    /// Returns MDX validation errors collected during parsing.
3195    pub fn mdx_errors(&self) -> &[(usize, String)] {
3196        self.parser.mdx_errors()
3197    }
3198}
3199
3200impl<'a, CB: ParserCallbacks<'a>> Iterator for OffsetIter<'a, CB> {
3201    type Item = (Event<'a>, Range<usize>);
3202
3203    fn next(&mut self) -> Option<Self::Item> {
3204        self.parser
3205            .inner
3206            .next_event_range(&mut self.parser.callbacks)
3207    }
3208}
3209
3210impl<'a, CB: ParserCallbacks<'a>> Iterator for Parser<'a, CB> {
3211    type Item = Event<'a>;
3212
3213    fn next(&mut self) -> Option<Event<'a>> {
3214        self.inner
3215            .next_event_range(&mut self.callbacks)
3216            .map(|(event, _range)| event)
3217    }
3218}
3219
3220impl<'a, CB: ParserCallbacks<'a>> FusedIterator for Parser<'a, CB> {}
3221
3222impl<'input> ParserInner<'input> {
3223    fn next_event_range(
3224        &mut self,
3225        callbacks: &mut dyn ParserCallbacks<'input>,
3226    ) -> Option<(Event<'input>, Range<usize>)> {
3227        match self.tree.cur() {
3228            None => {
3229                let ix = self.tree.pop()?;
3230                let ix = if matches!(self.tree[ix].item.body, ItemBody::TightParagraph) {
3231                    // tight paragraphs emit nothing
3232                    self.tree.next_sibling(ix);
3233                    return self.next_event_range(callbacks);
3234                } else {
3235                    ix
3236                };
3237                let tag_end = body_to_tag_end(&self.tree[ix].item.body);
3238                self.tree.next_sibling(ix);
3239                let span = self.tree[ix].item.start..self.tree[ix].item.end;
3240                debug_assert!(span.start <= span.end);
3241                Some((Event::End(tag_end), span))
3242            }
3243            Some(cur_ix) => {
3244                let cur_ix = if matches!(self.tree[cur_ix].item.body, ItemBody::TightParagraph) {
3245                    // tight paragraphs emit nothing
3246                    self.tree.push();
3247                    self.tree.cur().unwrap()
3248                } else {
3249                    cur_ix
3250                };
3251                if self.tree[cur_ix].item.body.is_maybe_inline() {
3252                    self.handle_inline(callbacks);
3253                }
3254
3255                let node = self.tree[cur_ix];
3256                let item = node.item;
3257                let event = item_to_event(item, self.text, &mut self.allocs);
3258                if let Event::Start(..) = event {
3259                    self.tree.push();
3260                } else {
3261                    self.tree.next_sibling(cur_ix);
3262                }
3263                debug_assert!(item.start <= item.end);
3264                Some((event, item.start..item.end))
3265            }
3266        }
3267    }
3268}
3269
3270fn body_to_tag_end(body: &ItemBody) -> TagEnd {
3271    match *body {
3272        ItemBody::Paragraph => TagEnd::Paragraph,
3273        ItemBody::Emphasis => TagEnd::Emphasis,
3274        ItemBody::Superscript => TagEnd::Superscript,
3275        ItemBody::Subscript => TagEnd::Subscript,
3276        ItemBody::Strong => TagEnd::Strong,
3277        ItemBody::Strikethrough => TagEnd::Strikethrough,
3278        ItemBody::Link(..) => TagEnd::Link,
3279        ItemBody::Image(..) => TagEnd::Image,
3280        ItemBody::Heading(level, _) => TagEnd::Heading(level),
3281        ItemBody::IndentCodeBlock(..) | ItemBody::FencedCodeBlock(..) | ItemBody::MathBlock(..) => {
3282            TagEnd::CodeBlock
3283        }
3284        ItemBody::ContainerDirective(..) => TagEnd::Directive(DirectiveKind::Container),
3285        ItemBody::LeafDirective(..) => TagEnd::Directive(DirectiveKind::Leaf),
3286        ItemBody::TextDirective(..) => TagEnd::Directive(DirectiveKind::Text),
3287        ItemBody::BlockQuote(kind) => TagEnd::BlockQuote(kind),
3288        ItemBody::HtmlBlock(_) => TagEnd::HtmlBlock,
3289        ItemBody::List(_, c, _) => {
3290            let is_ordered = c == b'.' || c == b')';
3291            TagEnd::List(is_ordered)
3292        }
3293        ItemBody::ListItem(_, _) => TagEnd::Item,
3294        ItemBody::TableHead => TagEnd::TableHead,
3295        ItemBody::TableCell => TagEnd::TableCell,
3296        ItemBody::TableRow => TagEnd::TableRow,
3297        ItemBody::Table(..) => TagEnd::Table,
3298        ItemBody::FootnoteDefinition(..) => TagEnd::FootnoteDefinition,
3299        ItemBody::MetadataBlock(kind) => TagEnd::MetadataBlock(kind),
3300        ItemBody::DefinitionList(_) => TagEnd::DefinitionList,
3301        ItemBody::DefinitionListTitle => TagEnd::DefinitionListTitle,
3302        ItemBody::DefinitionListDefinition(_) => TagEnd::DefinitionListDefinition,
3303        #[cfg(feature = "mdx")]
3304        ItemBody::MdxJsxFlowElement(..) => TagEnd::MdxJsxFlowElement,
3305        #[cfg(feature = "mdx")]
3306        ItemBody::MdxJsxTextElement(..) => TagEnd::MdxJsxTextElement,
3307        _ => panic!("unexpected item body {:?}", body),
3308    }
3309}
3310
3311fn item_to_event<'a>(item: Item, text: &'a str, allocs: &mut Allocations<'a>) -> Event<'a> {
3312    let tag = match item.body {
3313        ItemBody::Text { .. } => return Event::Text(text[item.start..item.end].into()),
3314        ItemBody::Code(cow_ix) => return Event::Code(allocs.take_cow(cow_ix)),
3315        ItemBody::SynthesizeText(cow_ix) => return Event::Text(allocs.take_cow(cow_ix)),
3316        ItemBody::SynthesizeChar(c) => return Event::Text(c.into()),
3317        ItemBody::HtmlBlock(_) => Tag::HtmlBlock,
3318        ItemBody::Html => return Event::Html(text[item.start..item.end].into()),
3319        ItemBody::InlineHtml => return Event::InlineHtml(text[item.start..item.end].into()),
3320        ItemBody::OwnedInlineHtml(cow_ix) => return Event::InlineHtml(allocs.take_cow(cow_ix)),
3321        ItemBody::SoftBreak => return Event::SoftBreak,
3322        ItemBody::HardBreak(_) => return Event::HardBreak,
3323        ItemBody::FootnoteReference(cow_ix) => {
3324            return Event::FootnoteReference(allocs.take_cow(cow_ix))
3325        }
3326        ItemBody::TaskListMarker(checked) => return Event::TaskListMarker(checked),
3327        ItemBody::Rule => return Event::Rule,
3328        ItemBody::Paragraph => Tag::Paragraph,
3329        ItemBody::Emphasis => Tag::Emphasis,
3330        ItemBody::Superscript => Tag::Superscript,
3331        ItemBody::Subscript => Tag::Subscript,
3332        ItemBody::Strong => Tag::Strong,
3333        ItemBody::Strikethrough => Tag::Strikethrough,
3334        ItemBody::Link(link_ix) => {
3335            let (link_type, dest_url, title, id) = allocs.take_link(link_ix);
3336            Tag::Link {
3337                link_type,
3338                dest_url,
3339                title,
3340                id,
3341            }
3342        }
3343        ItemBody::Image(link_ix) => {
3344            let (link_type, dest_url, title, id) = allocs.take_link(link_ix);
3345            Tag::Image {
3346                link_type,
3347                dest_url,
3348                title,
3349                id,
3350            }
3351        }
3352        ItemBody::Heading(level, Some(heading_ix)) => {
3353            let HeadingAttributes { id, classes, attrs } = allocs.index(heading_ix);
3354            Tag::Heading {
3355                level,
3356                id: id.clone(),
3357                classes: classes.clone(),
3358                attrs: attrs.clone(),
3359            }
3360        }
3361        ItemBody::Heading(level, None) => Tag::Heading {
3362            level,
3363            id: None,
3364            classes: Vec::new(),
3365            attrs: Vec::new(),
3366        },
3367        ItemBody::MathBlock(cow_ix) => {
3368            Tag::CodeBlock(CodeBlockKind::Fenced(allocs.take_cow(cow_ix)))
3369        }
3370        ItemBody::FencedCodeBlock(cow_ix) => {
3371            Tag::CodeBlock(CodeBlockKind::Fenced(allocs.take_cow(cow_ix)))
3372        }
3373        ItemBody::IndentCodeBlock(..) => Tag::CodeBlock(CodeBlockKind::Indented),
3374        ItemBody::ContainerDirective(_, dir_ix)
3375        | ItemBody::LeafDirective(dir_ix)
3376        | ItemBody::TextDirective(dir_ix) => {
3377            let kind = match item.body {
3378                ItemBody::ContainerDirective(..) => DirectiveKind::Container,
3379                ItemBody::LeafDirective(..) => DirectiveKind::Leaf,
3380                _ => DirectiveKind::Text,
3381            };
3382            let dir = allocs.take_directive(dir_ix);
3383            Tag::Directive {
3384                kind,
3385                name: dir.name,
3386                attributes: dir.attributes,
3387            }
3388        }
3389        ItemBody::BlockQuote(kind) => Tag::BlockQuote(kind),
3390        ItemBody::List(is_tight, c, listitem_start) => {
3391            if c == b'.' || c == b')' {
3392                Tag::List(Some(listitem_start), is_tight)
3393            } else {
3394                Tag::List(None, is_tight)
3395            }
3396        }
3397        ItemBody::ListItem(_, _) => Tag::Item,
3398        ItemBody::TableHead => Tag::TableHead,
3399        ItemBody::TableCell => Tag::TableCell,
3400        ItemBody::TableRow => Tag::TableRow,
3401        ItemBody::Table(alignment_ix) => Tag::Table(allocs.take_alignment(alignment_ix)),
3402        ItemBody::FootnoteDefinition(cow_ix) => Tag::FootnoteDefinition(allocs.take_cow(cow_ix)),
3403        ItemBody::MetadataBlock(kind) => Tag::MetadataBlock(kind),
3404        ItemBody::Math(cow_ix, is_display) => {
3405            return if is_display {
3406                Event::DisplayMath(allocs.take_cow(cow_ix))
3407            } else {
3408                Event::InlineMath(allocs.take_cow(cow_ix))
3409            }
3410        }
3411        ItemBody::DefinitionList(_) => Tag::DefinitionList,
3412        ItemBody::DefinitionListTitle => Tag::DefinitionListTitle,
3413        ItemBody::DefinitionListDefinition(_) => Tag::DefinitionListDefinition,
3414        #[cfg(feature = "mdx")]
3415        ItemBody::MdxJsxFlowElement(jsx_ix) => {
3416            let jsx = allocs.take_jsx_element(jsx_ix);
3417            Tag::MdxJsxFlowElement(jsx.raw)
3418        }
3419        #[cfg(feature = "mdx")]
3420        ItemBody::MdxJsxTextElement(jsx_ix) => {
3421            let jsx = allocs.take_jsx_element(jsx_ix);
3422            Tag::MdxJsxTextElement(jsx.raw)
3423        }
3424        #[cfg(feature = "mdx")]
3425        ItemBody::MdxFlowExpression(cow_ix) => {
3426            return Event::MdxFlowExpression(allocs.take_cow(cow_ix))
3427        }
3428        #[cfg(feature = "mdx")]
3429        ItemBody::MdxTextExpression(cow_ix) => {
3430            return Event::MdxTextExpression(allocs.take_cow(cow_ix))
3431        }
3432        #[cfg(feature = "mdx")]
3433        ItemBody::MdxEsm(cow_ix) => return Event::MdxEsm(allocs.take_cow(cow_ix)),
3434        _ => panic!("unexpected item body {:?}", item.body),
3435    };
3436
3437    Event::Start(tag)
3438}
3439
3440#[cfg(test)]
3441mod test {
3442    use alloc::{borrow::ToOwned, string::ToString, vec::Vec};
3443
3444    use super::*;
3445    use crate::tree::Node;
3446
3447    // TODO: move these tests to tests/html.rs?
3448
3449    fn parser_with_extensions(text: &str) -> Parser<'_> {
3450        let mut opts = Options::empty();
3451        opts.insert(Options::ENABLE_TABLES);
3452        opts.insert(Options::ENABLE_FOOTNOTES);
3453        opts.insert(Options::ENABLE_STRIKETHROUGH);
3454        opts.insert(Options::ENABLE_SUPERSCRIPT);
3455        opts.insert(Options::ENABLE_SUBSCRIPT);
3456        opts.insert(Options::ENABLE_TASKLISTS);
3457
3458        Parser::new_ext(text, opts)
3459    }
3460
3461    #[test]
3462    #[cfg(target_pointer_width = "64")]
3463    fn node_size() {
3464        let node_size = core::mem::size_of::<Node<Item>>();
3465        assert_eq!(48, node_size);
3466    }
3467
3468    #[test]
3469    #[cfg(target_pointer_width = "64")]
3470    fn body_size() {
3471        let body_size = core::mem::size_of::<ItemBody>();
3472        assert_eq!(16, body_size);
3473    }
3474
3475    #[test]
3476    fn single_open_fish_bracket() {
3477        // dont crash
3478        assert_eq!(3, Parser::new("<").count());
3479    }
3480
3481    #[test]
3482    fn lone_hashtag() {
3483        // dont crash
3484        assert_eq!(2, Parser::new("#").count());
3485    }
3486
3487    #[test]
3488    fn lots_of_backslashes() {
3489        // dont crash
3490        Parser::new("\\\\\r\r").count();
3491        Parser::new("\\\r\r\\.\\\\\r\r\\.\\").count();
3492    }
3493
3494    #[test]
3495    fn issue_1030() {
3496        let mut opts = Options::empty();
3497        opts.insert(Options::ENABLE_WIKILINKS);
3498
3499        let parser = Parser::new_ext("For a new ferrari, [[Wikientry|click here]]!", opts);
3500
3501        let offsets = parser
3502            .into_offset_iter()
3503            .map(|(_ev, range)| range)
3504            .collect::<Vec<_>>();
3505        let expected_offsets = vec![
3506            (0..44),  // Paragraph START
3507            (0..19),  // `For a new ferrari, `
3508            (19..43), // Wikilink START
3509            (31..41), // `click here`
3510            (19..43), // Wikilink END
3511            (43..44), // `!`
3512            (0..44),  // Paragraph END
3513        ];
3514        assert_eq!(offsets, expected_offsets);
3515    }
3516
3517    #[test]
3518    fn issue_320() {
3519        // dont crash
3520        parser_with_extensions(":\r\t> |\r:\r\t> |\r").count();
3521    }
3522
3523    #[test]
3524    fn issue_319() {
3525        // dont crash
3526        parser_with_extensions("|\r-]([^|\r-]([^").count();
3527        parser_with_extensions("|\r\r=][^|\r\r=][^car").count();
3528    }
3529
3530    #[test]
3531    fn issue_303() {
3532        // dont crash
3533        parser_with_extensions("[^\r\ra]").count();
3534        parser_with_extensions("\r\r]Z[^\x00\r\r]Z[^\x00").count();
3535    }
3536
3537    #[test]
3538    fn issue_313() {
3539        // dont crash
3540        parser_with_extensions("*]0[^\r\r*]0[^").count();
3541        parser_with_extensions("[^\r> `][^\r> `][^\r> `][").count();
3542    }
3543
3544    #[test]
3545    fn issue_311() {
3546        // dont crash
3547        parser_with_extensions("\\\u{0d}-\u{09}\\\u{0d}-\u{09}").count();
3548    }
3549
3550    #[test]
3551    fn issue_283() {
3552        let input = core::str::from_utf8(b"\xf0\x9b\xb2\x9f<td:^\xf0\x9b\xb2\x9f").unwrap();
3553        // dont crash
3554        parser_with_extensions(input).count();
3555    }
3556
3557    #[test]
3558    fn issue_289() {
3559        // dont crash
3560        parser_with_extensions("> - \\\n> - ").count();
3561        parser_with_extensions("- \n\n").count();
3562    }
3563
3564    #[test]
3565    fn issue_306() {
3566        // dont crash
3567        parser_with_extensions("*\r_<__*\r_<__*\r_<__*\r_<__").count();
3568    }
3569
3570    #[test]
3571    fn issue_305() {
3572        // dont crash
3573        parser_with_extensions("_6**6*_*").count();
3574    }
3575
3576    #[test]
3577    fn another_emphasis_panic() {
3578        parser_with_extensions("*__#_#__*").count();
3579    }
3580
3581    #[test]
3582    fn offset_iter() {
3583        let event_offsets: Vec<_> = Parser::new("*hello* world")
3584            .into_offset_iter()
3585            .map(|(_ev, range)| range)
3586            .collect();
3587        let expected_offsets = vec![(0..13), (0..7), (1..6), (0..7), (7..13), (0..13)];
3588        assert_eq!(expected_offsets, event_offsets);
3589    }
3590
3591    #[test]
3592    fn reference_link_offsets() {
3593        let range =
3594            Parser::new("# H1\n[testing][Some reference]\n\n[Some reference]: https://github.com")
3595                .into_offset_iter()
3596                .filter_map(|(ev, range)| match ev {
3597                    Event::Start(
3598                        Tag::Link {
3599                            link_type: LinkType::Reference,
3600                            ..
3601                        },
3602                        ..,
3603                    ) => Some(range),
3604                    _ => None,
3605                })
3606                .next()
3607                .unwrap();
3608        assert_eq!(5..30, range);
3609    }
3610
3611    #[test]
3612    fn footnote_offsets() {
3613        let range = parser_with_extensions("Testing this[^1] out.\n\n[^1]: Footnote.")
3614            .into_offset_iter()
3615            .filter_map(|(ev, range)| match ev {
3616                Event::FootnoteReference(..) => Some(range),
3617                _ => None,
3618            })
3619            .next()
3620            .unwrap();
3621        assert_eq!(12..16, range);
3622    }
3623
3624    #[test]
3625    fn footnote_offsets_exclamation() {
3626        let mut immediately_before_footnote = None;
3627        let range = parser_with_extensions("Testing this![^1] out.\n\n[^1]: Footnote.")
3628            .into_offset_iter()
3629            .filter_map(|(ev, range)| match ev {
3630                Event::FootnoteReference(..) => Some(range),
3631                _ => {
3632                    immediately_before_footnote = Some((ev, range));
3633                    None
3634                }
3635            })
3636            .next()
3637            .unwrap();
3638        assert_eq!(13..17, range);
3639        if let (Event::Text(exclamation), range_exclamation) =
3640            immediately_before_footnote.as_ref().unwrap()
3641        {
3642            assert_eq!("!", &exclamation[..]);
3643            assert_eq!(&(12..13), range_exclamation);
3644        } else {
3645            panic!("what came first, then? {immediately_before_footnote:?}");
3646        }
3647    }
3648
3649    #[test]
3650    fn table_offset() {
3651        let markdown = "a\n\nTesting|This|Outtt\n--|:--:|--:\nSome Data|Other data|asdf";
3652        let event_offset = parser_with_extensions(markdown)
3653            .into_offset_iter()
3654            .map(|(_ev, range)| range)
3655            .nth(3)
3656            .unwrap();
3657        let expected_offset = 3..59;
3658        assert_eq!(expected_offset, event_offset);
3659    }
3660
3661    #[test]
3662    fn table_cell_span() {
3663        let markdown = "a|b|c\n--|--|--\na|  |c";
3664        let event_offset = parser_with_extensions(markdown)
3665            .into_offset_iter()
3666            .filter_map(|(ev, span)| match ev {
3667                Event::Start(Tag::TableCell) => Some(span),
3668                _ => None,
3669            })
3670            .nth(4)
3671            .unwrap();
3672        // Cell span includes the leading `|` delimiter (matching remark).
3673        let expected_offset_start = "a|b|c\n--|--|--\na".len();
3674        assert_eq!(
3675            expected_offset_start..(expected_offset_start + 3),
3676            event_offset
3677        );
3678    }
3679
3680    #[test]
3681    fn offset_iter_issue_378() {
3682        let event_offsets: Vec<_> = Parser::new("a [b](c) d")
3683            .into_offset_iter()
3684            .map(|(_ev, range)| range)
3685            .collect();
3686        let expected_offsets = vec![(0..10), (0..2), (2..8), (3..4), (2..8), (8..10), (0..10)];
3687        assert_eq!(expected_offsets, event_offsets);
3688    }
3689
3690    #[test]
3691    fn offset_iter_issue_404() {
3692        let event_offsets: Vec<_> = Parser::new("###\n")
3693            .into_offset_iter()
3694            .map(|(_ev, range)| range)
3695            .collect();
3696        let expected_offsets = vec![(0..4), (0..4)];
3697        assert_eq!(expected_offsets, event_offsets);
3698    }
3699
3700    #[test]
3701    fn broken_links_called_only_once() {
3702        for &(markdown, expected) in &[
3703            ("See also [`g()`][crate::g].", 1),
3704            ("See also [`g()`][crate::g][].", 1),
3705            ("[brokenlink1] some other node [brokenlink2]", 2),
3706        ] {
3707            let mut times_called = 0;
3708            let callback = &mut |_broken_link: BrokenLink| {
3709                times_called += 1;
3710                None
3711            };
3712            let parser =
3713                Parser::new_with_broken_link_callback(markdown, Options::empty(), Some(callback));
3714            for _ in parser {}
3715            assert_eq!(times_called, expected);
3716        }
3717    }
3718
3719    #[test]
3720    fn simple_broken_link_callback() {
3721        let test_str = "This is a link w/o def: [hello][world]";
3722        let mut callback = |broken_link: BrokenLink| {
3723            assert_eq!("world", broken_link.reference.as_ref());
3724            assert_eq!(&test_str[broken_link.span], "[hello][world]");
3725            let url = "YOLO".into();
3726            let title = "SWAG".to_owned().into();
3727            Some((url, title))
3728        };
3729        let parser =
3730            Parser::new_with_broken_link_callback(test_str, Options::empty(), Some(&mut callback));
3731        let mut link_tag_count = 0;
3732        for (typ, url, title, id) in parser.filter_map(|event| match event {
3733            Event::Start(Tag::Link {
3734                link_type,
3735                dest_url,
3736                title,
3737                id,
3738            }) => Some((link_type, dest_url, title, id)),
3739            _ => None,
3740        }) {
3741            link_tag_count += 1;
3742            assert_eq!(typ, LinkType::ReferenceUnknown);
3743            assert_eq!(url.as_ref(), "YOLO");
3744            assert_eq!(title.as_ref(), "SWAG");
3745            assert_eq!(id.as_ref(), "world");
3746        }
3747        assert!(link_tag_count > 0);
3748    }
3749
3750    #[test]
3751    fn code_block_kind_check_fenced() {
3752        let parser = Parser::new("hello\n```test\ntadam\n```");
3753        let mut found = 0;
3754        for (ev, _range) in parser.into_offset_iter() {
3755            if let Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(syntax))) = ev {
3756                assert_eq!(syntax.as_ref(), "test");
3757                found += 1;
3758            }
3759        }
3760        assert_eq!(found, 1);
3761    }
3762
3763    #[test]
3764    fn code_block_kind_check_indented() {
3765        let parser = Parser::new("hello\n\n    ```test\n    tadam\nhello");
3766        let mut found = 0;
3767        for (ev, _range) in parser.into_offset_iter() {
3768            if let Event::Start(Tag::CodeBlock(CodeBlockKind::Indented)) = ev {
3769                found += 1;
3770            }
3771        }
3772        assert_eq!(found, 1);
3773    }
3774
3775    #[test]
3776    fn ref_defs() {
3777        let input = r###"[a B c]: http://example.com
3778[another]: https://google.com
3779
3780text
3781
3782[final ONE]: http://wikipedia.org
3783"###;
3784        let mut parser = Parser::new(input);
3785
3786        assert!(parser.reference_definitions().get("a b c").is_some());
3787        assert!(parser.reference_definitions().get("nope").is_none());
3788
3789        if let Some(_event) = parser.next() {
3790            // testing keys with shorter lifetimes than parser and its input
3791            let s = "final one".to_owned();
3792            let link_def = parser.reference_definitions().get(&s).unwrap();
3793            let span = &input[link_def.span.clone()];
3794            assert_eq!(span, "[final ONE]: http://wikipedia.org");
3795        }
3796    }
3797
3798    #[test]
3799    #[allow(clippy::extra_unused_lifetimes)]
3800    fn common_lifetime_patterns_allowed<'b>() {
3801        let temporary_str = String::from("xyz");
3802
3803        // NOTE: this is a limitation of Rust, it doesn't allow putting lifetime parameters on the closure itself.
3804        // Hack it by attaching the lifetime to the test function instead.
3805        // TODO: why is the `'b` lifetime required at all? Changing it to `'_` breaks things :(
3806        let mut closure = |link: BrokenLink<'b>| Some(("#".into(), link.reference));
3807
3808        fn function(link: BrokenLink<'_>) -> Option<(CowStr<'_>, CowStr<'_>)> {
3809            Some(("#".into(), link.reference))
3810        }
3811
3812        for _ in Parser::new_with_broken_link_callback(
3813            "static lifetime",
3814            Options::empty(),
3815            Some(&mut closure),
3816        ) {}
3817        /* This fails to compile. Because the closure can't say `for <'a> fn(BrokenLink<'a>) ->
3818         * CowStr<'a>` and has to use the enclosing `'b` lifetime parameter, `temporary_str` lives
3819         * shorter than `'b`. I think this is unlikely to occur in real life, and if it does, the
3820         * fix is simple: move it out to a function that allows annotating the lifetimes.
3821         */
3822        //for _ in Parser::new_with_broken_link_callback(&temporary_str, Options::empty(), Some(&mut callback)) {
3823        //}
3824
3825        for _ in Parser::new_with_broken_link_callback(
3826            "static lifetime",
3827            Options::empty(),
3828            Some(&mut function),
3829        ) {}
3830        for _ in Parser::new_with_broken_link_callback(
3831            &temporary_str,
3832            Options::empty(),
3833            Some(&mut function),
3834        ) {}
3835    }
3836
3837    #[test]
3838    fn inline_html_inside_blockquote() {
3839        // Regression for #960
3840        let input = "> <foo\n> bar>";
3841        let events: Vec<_> = Parser::new(input).collect();
3842        let expected = [
3843            Event::Start(Tag::BlockQuote(None)),
3844            Event::Start(Tag::Paragraph),
3845            Event::InlineHtml(CowStr::Boxed("<foo\nbar>".to_string().into())),
3846            Event::End(TagEnd::Paragraph),
3847            Event::End(TagEnd::BlockQuote(None)),
3848        ];
3849        assert_eq!(&events, &expected);
3850    }
3851
3852    #[test]
3853    fn wikilink_has_pothole() {
3854        let input = "[[foo]] [[bar|baz]]";
3855        let events: Vec<_> = Parser::new_ext(input, Options::ENABLE_WIKILINKS).collect();
3856        let expected = [
3857            Event::Start(Tag::Paragraph),
3858            Event::Start(Tag::Link {
3859                link_type: LinkType::WikiLink { has_pothole: false },
3860                dest_url: CowStr::Borrowed("foo"),
3861                title: CowStr::Borrowed(""),
3862                id: CowStr::Borrowed(""),
3863            }),
3864            Event::Text(CowStr::Borrowed("foo")),
3865            Event::End(TagEnd::Link),
3866            Event::Text(CowStr::Borrowed(" ")),
3867            Event::Start(Tag::Link {
3868                link_type: LinkType::WikiLink { has_pothole: true },
3869                dest_url: CowStr::Borrowed("bar"),
3870                title: CowStr::Borrowed(""),
3871                id: CowStr::Borrowed(""),
3872            }),
3873            Event::Text(CowStr::Borrowed("baz")),
3874            Event::End(TagEnd::Link),
3875            Event::End(TagEnd::Paragraph),
3876        ];
3877        assert_eq!(&events, &expected);
3878    }
3879
3880    #[cfg(feature = "mdx")]
3881    fn mdx_parser(text: &str) -> Parser<'_> {
3882        Parser::new_ext(text, Options::ENABLE_MDX)
3883    }
3884
3885    #[cfg(feature = "mdx")]
3886    #[test]
3887    fn mdx_esm_import() {
3888        let events: Vec<_> = mdx_parser("import {Chart} from './chart.js'\n").collect();
3889        assert_eq!(events.len(), 1);
3890        assert!(matches!(&events[0], Event::MdxEsm(s) if s.contains("import")));
3891    }
3892
3893    #[cfg(feature = "mdx")]
3894    #[test]
3895    fn mdx_esm_export() {
3896        let events: Vec<_> = mdx_parser("export const meta = {}\n").collect();
3897        assert_eq!(events.len(), 1);
3898        assert!(matches!(&events[0], Event::MdxEsm(s) if s.contains("export")));
3899    }
3900
3901    #[cfg(feature = "mdx")]
3902    #[test]
3903    fn mdx_flow_expression() {
3904        let events: Vec<_> = mdx_parser("{1 + 1}\n").collect();
3905        assert_eq!(events.len(), 1);
3906        assert!(matches!(&events[0], Event::MdxFlowExpression(s) if s.as_ref() == "1 + 1"));
3907    }
3908
3909    #[cfg(feature = "mdx")]
3910    #[test]
3911    fn mdx_jsx_flow_self_closing() {
3912        let events: Vec<_> = mdx_parser("<Chart values={[1,2,3]} />\n").collect();
3913        assert!(!events.is_empty());
3914        assert!(
3915            matches!(&events[0], Event::Start(Tag::MdxJsxFlowElement(s)) if s.contains("Chart"))
3916        );
3917    }
3918
3919    #[cfg(feature = "mdx")]
3920    #[test]
3921    fn mdx_jsx_flow_fragment() {
3922        let events: Vec<_> = mdx_parser("<>\n").collect();
3923        assert!(!events.is_empty());
3924        assert!(matches!(
3925            &events[0],
3926            Event::Start(Tag::MdxJsxFlowElement(_))
3927        ));
3928    }
3929
3930    #[cfg(feature = "mdx")]
3931    #[test]
3932    fn mdx_inline_expression() {
3933        let events: Vec<_> = mdx_parser("hello {name} world\n").collect();
3934        let has_expr = events
3935            .iter()
3936            .any(|e| matches!(e, Event::MdxTextExpression(s) if s.as_ref() == "name"));
3937        assert!(
3938            has_expr,
3939            "Expected inline MDX expression, got: {:?}",
3940            events
3941        );
3942    }
3943
3944    #[cfg(feature = "mdx")]
3945    #[test]
3946    fn mdx_inline_jsx() {
3947        let events: Vec<_> = mdx_parser("hello <Badge /> world\n").collect();
3948        let has_jsx = events
3949            .iter()
3950            .any(|e| matches!(e, Event::Start(Tag::MdxJsxTextElement(s)) if s.contains("Badge")));
3951        assert!(has_jsx, "Expected inline MDX JSX, got: {:?}", events);
3952    }
3953
3954    #[cfg(feature = "mdx")]
3955    #[test]
3956    fn mdx_all_tags_are_jsx() {
3957        // In MDX mode, all tags (including lowercase) are JSX, not HTML.
3958        let events: Vec<_> = mdx_parser("hello <em>world</em>\n").collect();
3959        let has_jsx = events
3960            .iter()
3961            .any(|e| matches!(e, Event::Start(Tag::MdxJsxTextElement(_))));
3962        assert!(has_jsx, "In MDX mode, <em> should be JSX: {:?}", events);
3963    }
3964
3965    #[test]
3966    fn mdx_does_not_interfere_without_flag() {
3967        // Without ENABLE_MDX, none of this should be parsed as MDX.
3968        let events: Vec<_> = Parser::new("import foo from 'bar'\n").collect();
3969        // Should be a regular paragraph.
3970        assert!(events
3971            .iter()
3972            .any(|e| matches!(e, Event::Start(Tag::Paragraph))));
3973    }
3974
3975    #[cfg(feature = "mdx")]
3976    #[test]
3977    fn mdx_expression_in_heading() {
3978        let events: Vec<_> = mdx_parser("# {title}\n").collect();
3979        let has_heading = events
3980            .iter()
3981            .any(|e| matches!(e, Event::Start(Tag::Heading { .. })));
3982        assert!(has_heading, "Should have a heading");
3983        let has_expr = events
3984            .iter()
3985            .any(|e| matches!(e, Event::MdxTextExpression(s) if s.as_ref() == "title"));
3986        assert!(
3987            has_expr,
3988            "Heading should contain MdxTextExpression, got: {:?}",
3989            events
3990        );
3991    }
3992
3993    #[cfg(feature = "mdx")]
3994    #[test]
3995    fn mdx_expression_mixed_text_in_heading() {
3996        let events: Vec<_> = mdx_parser("## Hello {name}\n").collect();
3997        let has_text = events
3998            .iter()
3999            .any(|e| matches!(e, Event::Text(s) if s.contains("Hello")));
4000        let has_expr = events
4001            .iter()
4002            .any(|e| matches!(e, Event::MdxTextExpression(s) if s.as_ref() == "name"));
4003        assert!(has_text, "Should have text, got: {:?}", events);
4004        assert!(has_expr, "Should have expression, got: {:?}", events);
4005    }
4006}