Skip to main content

satteri_pulldown_cmark/
parse.rs

1// Copyright 2017 Google Inc. All rights reserved.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19// THE SOFTWARE.
20
21//! Tree-based two pass parser.
22
23use alloc::{borrow::ToOwned, boxed::Box, collections::VecDeque, string::String, vec::Vec};
24use core::{
25    cmp::{max, min},
26    iter::FusedIterator,
27    num::NonZeroUsize,
28    ops::{Index, Range},
29};
30use rustc_hash::FxHashMap;
31use unicase::UniCase;
32
33use crate::{
34    firstpass::run_first_pass,
35    linklabel::{scan_link_label_rest, FootnoteLabel, LinkLabel, ReferenceLabel},
36    mdx::*,
37    scanners::*,
38    strings::CowStr,
39    tree::{Tree, TreeIndex},
40    Alignment, BlockQuoteKind, CodeBlockKind, DirectiveKind, Event, HeadingLevel, LinkType,
41    MetadataBlockKind, Options, Tag, TagEnd,
42};
43
44// Allowing arbitrary depth nested parentheses inside link destinations
45// can create denial of service vulnerabilities if we're not careful.
46// The simplest countermeasure is to limit their depth, which is
47// explicitly allowed by the spec as long as the limit is at least 3:
48// https://spec.commonmark.org/0.29/#link-destination
49pub(crate) const LINK_MAX_NESTED_PARENS: usize = 32;
50
51#[derive(Debug, Default, Clone, Copy)]
52pub(crate) struct Item {
53    pub start: usize,
54    pub end: usize,
55    pub body: ItemBody,
56}
57
58#[derive(Debug, PartialEq, Clone, Copy, Default)]
59pub(crate) enum ItemBody {
60    // These are possible inline items, need to be resolved in second pass.
61
62    // repeats, can_open, can_close
63    MaybeEmphasis(usize, bool, bool),
64    // preceded_by_backslash, brace context
65    MaybeMath(bool, u8),
66    // quote byte, can_open, can_close
67    MaybeSmartQuote(u8, bool, bool),
68    MaybeCode(usize, bool), // number of backticks, preceded by backslash
69    MaybeHtml,
70    MaybeLinkOpen,
71    // bool indicates whether or not the preceding section could be a reference
72    MaybeLinkClose(bool),
73    MaybeImage,
74
75    // These are inline items after resolution.
76    Emphasis,
77    Strong,
78    Strikethrough,
79    Superscript,
80    Subscript,
81    Math(CowIndex, bool), // true for display math
82    Code(CowIndex),
83    Link(LinkIndex),
84    Image(LinkIndex),
85    FootnoteReference(CowIndex),
86    TaskListMarker(bool), // true for checked
87
88    // These are also inline items.
89    InlineHtml,
90    OwnedInlineHtml(CowIndex),
91    SynthesizeText(CowIndex),
92    SynthesizeChar(char),
93    Html,
94    Text {
95        backslash_escaped: bool,
96    },
97    SoftBreak,
98    // true = is backlash
99    HardBreak(bool),
100
101    // Dummy node at the top of the tree - should not be used otherwise!
102    #[default]
103    Root,
104
105    // These are block items.
106    Paragraph,
107    TightParagraph,
108    Rule,
109    Heading(HeadingLevel, Option<HeadingIndex>), // heading level
110    FencedCodeBlock(CowIndex),
111    MathBlock(CowIndex), // meta string (info after $$)
112    IndentCodeBlock,
113    HtmlBlock(bool), // true = type 6/7 (blank-line-terminated)
114    BlockQuote(Option<BlockQuoteKind>),
115    ContainerDirective(u8, DirectiveIndex), // (fence length, directive data)
116    LeafDirective(DirectiveIndex),
117    TextDirective(DirectiveIndex),
118    List(bool, u8, u64),   // is_tight, list character, list start index
119    ListItem(usize, bool), // indent level, spread (loose item)
120    FootnoteDefinition(CowIndex),
121    MetadataBlock(MetadataBlockKind),
122
123    // Definition lists
124    DefinitionList(bool), // is_tight
125    // gets turned into either a paragraph or a definition list title,
126    // depending on whether there's a definition after it
127    MaybeDefinitionListTitle,
128    DefinitionListTitle,
129    DefinitionListDefinition(usize),
130
131    // Tables
132    Table(AlignmentIndex),
133    TableHead,
134    TableRow,
135    TableCell,
136
137    // MDX
138    MdxJsxFlowElement(JsxElementIndex),
139    MdxJsxTextElement(JsxElementIndex),
140    MdxFlowExpression(CowIndex),
141    MdxTextExpression(CowIndex),
142    MdxEsm(CowIndex),
143}
144
145impl ItemBody {
146    pub(crate) fn is_maybe_inline(&self) -> bool {
147        use ItemBody::*;
148        matches!(
149            *self,
150            MaybeEmphasis(..)
151                | MaybeMath(..)
152                | MaybeSmartQuote(..)
153                | MaybeCode(..)
154                | MaybeHtml
155                | MaybeLinkOpen
156                | MaybeLinkClose(..)
157                | MaybeImage
158        )
159    }
160    pub(crate) fn is_block_level(&self) -> bool {
161        !self.is_inline() && !matches!(self, ItemBody::Root)
162    }
163    fn is_inline(&self) -> bool {
164        use ItemBody::*;
165        matches!(
166            *self,
167            MaybeEmphasis(..)
168                | MaybeMath(..)
169                | MaybeSmartQuote(..)
170                | MaybeCode(..)
171                | MaybeHtml
172                | MaybeLinkOpen
173                | MaybeLinkClose(..)
174                | MaybeImage
175                | Emphasis
176                | Strong
177                | Strikethrough
178                | Math(..)
179                | Code(..)
180                | Link(..)
181                | Image(..)
182                | FootnoteReference(..)
183                | TaskListMarker(..)
184                | InlineHtml
185                | OwnedInlineHtml(..)
186                | SynthesizeText(..)
187                | SynthesizeChar(..)
188                | Html
189                | Text { .. }
190                | SoftBreak
191                | HardBreak(..)
192        )
193    }
194}
195
196#[derive(Debug)]
197pub struct BrokenLink<'a> {
198    pub span: core::ops::Range<usize>,
199    pub link_type: LinkType,
200    pub reference: CowStr<'a>,
201}
202
203/// Markdown event iterator.
204pub struct Parser<'input, CB = DefaultParserCallbacks> {
205    callbacks: CB,
206    inner: ParserInner<'input>,
207}
208
209// Inner state for `Parser`, extracted so that it can remain generic over the callback without
210// re-compiling complex logic for each instantiation of the generic type.
211pub(crate) struct ParserInner<'input> {
212    pub(crate) text: &'input str,
213    pub(crate) options: Options,
214    pub(crate) tree: Tree<Item>,
215    pub(crate) allocs: Allocations<'input>,
216    html_scan_guard: HtmlScanGuard,
217
218    // https://github.com/pulldown-cmark/pulldown-cmark/issues/844
219    // Consider this example:
220    //
221    //     [x]: xxx...
222    //     [x]
223    //     [x]
224    //     [x]
225    //
226    // Which expands to this HTML:
227    //
228    //     <a href="xxx...">x</a>
229    //     <a href="xxx...">x</a>
230    //     <a href="xxx...">x</a>
231    //
232    // This is quadratic growth, because it's filling in the area of a square.
233    // To prevent this, track how much it's expanded and limit it.
234    link_ref_expansion_limit: usize,
235
236    /// MDX validation errors collected during inline parsing.
237    pub(crate) mdx_errors: Vec<(usize, String)>,
238
239    // used by inline passes. store them here for reuse
240    inline_stack: InlineStack,
241    link_stack: LinkStack,
242    wikilink_stack: LinkStack,
243    code_delims: CodeDelims,
244    math_delims: MathDelims,
245}
246
247impl<'input, CB> core::fmt::Debug for Parser<'input, CB> {
248    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
249        // Only print the fields that have public types.
250        f.debug_struct("Parser")
251            .field("text", &self.inner.text)
252            .field("options", &self.inner.options)
253            .field("callbacks", &..)
254            .finish()
255    }
256}
257
258impl<'a> BrokenLink<'a> {
259    /// Moves the link into version with a static lifetime.
260    ///
261    /// The `reference` member is cloned to a Boxed or Inline version.
262    pub fn into_static(self) -> BrokenLink<'static> {
263        BrokenLink {
264            span: self.span.clone(),
265            link_type: self.link_type,
266            reference: self.reference.into_string().into(),
267        }
268    }
269}
270
271impl<'input> Parser<'input, DefaultParserCallbacks> {
272    /// Creates a new event iterator for a markdown string without any options enabled.
273    pub fn new(text: &'input str) -> Self {
274        Self::new_ext(text, Options::empty())
275    }
276
277    /// Creates a new event iterator for a markdown string with given options.
278    pub fn new_ext(text: &'input str, options: Options) -> Self {
279        Self::new_with_callbacks(text, options, DefaultParserCallbacks)
280    }
281}
282
283impl<'input, CB: ParserCallbacks<'input>> Parser<'input, CB> {
284    /// Creates a new event iterator for markdown text with given options and callbacks.
285    ///
286    /// ```
287    /// # use satteri_pulldown_cmark::{BrokenLink, CowStr, Event, Options, Parser, ParserCallbacks, Tag};
288    /// struct CustomCallbacks;
289    /// impl<'input> ParserCallbacks<'input> for CustomCallbacks {
290    ///     fn handle_broken_link(
291    ///         &mut self,
292    ///         link: BrokenLink<'input>,
293    ///     ) -> Option<(CowStr<'input>, CowStr<'input>)> {
294    ///         Some(("https://target".into(), link.reference))
295    ///     }
296    /// }
297    ///
298    /// let mut parser =
299    ///     Parser::new_with_callbacks("[broken]", Options::empty(), CustomCallbacks);
300    ///
301    /// assert!(matches!(
302    ///     parser.nth(1),
303    ///     Some(Event::Start(Tag::Link { .. }))
304    /// ));
305    /// ```
306    ///
307    /// See the [`ParserCallbacks`] trait for a list of callbacks that can be overridden.
308    pub fn new_with_callbacks(text: &'input str, options: Options, callbacks: CB) -> Self {
309        let (mut tree, allocs, _firstpass_mdx_errors) = run_first_pass(text, options);
310        tree.reset();
311        let inline_stack = Default::default();
312        let link_stack = Default::default();
313        let wikilink_stack = Default::default();
314        let html_scan_guard = Default::default();
315        Parser {
316            callbacks,
317
318            inner: ParserInner {
319                text,
320                options,
321                tree,
322                allocs,
323                inline_stack,
324                link_stack,
325                wikilink_stack,
326                html_scan_guard,
327                // always allow 100KiB
328                link_ref_expansion_limit: text.len().max(100_000),
329                mdx_errors: Vec::new(),
330                code_delims: CodeDelims::new(),
331                math_delims: MathDelims::new(),
332            },
333        }
334    }
335
336    /// Returns a reference to the internal `RefDefs` object, which provides access
337    /// to the internal map of reference definitions.
338    pub fn reference_definitions(&self) -> &RefDefs<'_> {
339        &self.inner.allocs.refdefs
340    }
341
342    /// Returns MDX validation errors collected during parsing.
343    /// Only populated when [`Options::ENABLE_MDX`] is active.
344    pub fn mdx_errors(&self) -> &[(usize, String)] {
345        &self.inner.mdx_errors
346    }
347
348    /// Consumes the event iterator and produces an iterator that produces
349    /// `(Event, Range)` pairs, where the `Range` value maps to the corresponding
350    /// range in the markdown source.
351    pub fn into_offset_iter(self) -> OffsetIter<'input, CB> {
352        OffsetIter { parser: self }
353    }
354}
355
356impl<'input, F> Parser<'input, BrokenLinkCallback<F>> {
357    /// In case the parser encounters any potential links that have a broken
358    /// reference (e.g `[foo]` when there is no `[foo]: ` entry at the bottom)
359    /// the provided callback will be called with the reference name,
360    /// and the returned pair will be used as the link URL and title if it is not
361    /// `None`.
362    ///
363    /// This constructor is provided for backwards compatibility.
364    /// This and other callbacks can also be customized with [`Parser::new_with_callbacks`].
365    pub fn new_with_broken_link_callback(
366        text: &'input str,
367        options: Options,
368        broken_link_callback: Option<F>,
369    ) -> Self
370    where
371        F: FnMut(BrokenLink<'input>) -> Option<(CowStr<'input>, CowStr<'input>)>,
372    {
373        Self::new_with_callbacks(text, options, BrokenLinkCallback(broken_link_callback))
374    }
375}
376
377impl<'input> ParserInner<'input> {
378    pub(crate) fn new(text: &'input str, options: Options) -> Self {
379        let (mut tree, allocs, firstpass_mdx_errors) = run_first_pass(text, options);
380        tree.reset();
381        ParserInner {
382            text,
383            options,
384            tree,
385            allocs,
386            inline_stack: Default::default(),
387            link_stack: Default::default(),
388            wikilink_stack: Default::default(),
389            html_scan_guard: Default::default(),
390            link_ref_expansion_limit: text.len().max(100_000),
391            mdx_errors: firstpass_mdx_errors,
392            code_delims: CodeDelims::new(),
393            math_delims: MathDelims::new(),
394        }
395    }
396
397    /// Use a link label to fetch a type, url, and title.
398    ///
399    /// This function enforces the [`link_ref_expansion_limit`].
400    /// If it returns Some, it also consumes some of the fuel.
401    /// If we're out of fuel, it immediately returns None.
402    ///
403    /// The URL and title are found in the [`RefDefs`] map.
404    /// If they're not there, and a callback was provided by the user,
405    /// `handle_broken_link` will be invoked and given the opportunity
406    /// to provide a fallback.
407    ///
408    /// The link type (that's "link" or "image") depends on the usage site, and
409    /// is provided by the caller of this function.
410    /// This function returns a new one because, if it has to invoke a callback
411    /// to find the information, the link type is [mapped to an unknown type].
412    ///
413    /// [mapped to an unknown type]: crate::LinkType::to_unknown
414    /// [`link_ref_expansion_limit`]: Self::link_ref_expansion_limit
415    fn fetch_link_type_url_title(
416        &mut self,
417        link_label: CowStr<'input>,
418        span: Range<usize>,
419        link_type: LinkType,
420        callbacks: &mut dyn ParserCallbacks<'input>,
421    ) -> Option<(LinkType, CowStr<'input>, CowStr<'input>)> {
422        if self.link_ref_expansion_limit == 0 {
423            return None;
424        }
425
426        let (link_type, url, title) = self
427            .allocs
428            .refdefs
429            .get(link_label.as_ref())
430            .map(|matching_def| {
431                // found a matching definition!
432                let title = matching_def
433                    .title
434                    .as_ref()
435                    .cloned()
436                    .unwrap_or_else(|| "".into());
437                let url = matching_def.dest.clone();
438                (link_type, url, title)
439            })
440            .or_else(|| {
441                // Construct a BrokenLink struct, which will be passed to the callback
442                let broken_link = BrokenLink {
443                    span,
444                    link_type,
445                    reference: link_label,
446                };
447
448                callbacks
449                    .handle_broken_link(broken_link)
450                    .map(|(url, title)| (link_type.to_unknown(), url, title))
451            })?;
452
453        // Limit expansion from link references.
454        // This isn't a problem for footnotes, because multiple references to the same one
455        // reuse the same node, but links/images get their HREF/SRC copied.
456        self.link_ref_expansion_limit = self
457            .link_ref_expansion_limit
458            .saturating_sub(url.len() + title.len());
459
460        Some((link_type, url, title))
461    }
462
463    /// Handle inline markup.
464    ///
465    /// When the parser encounters any item indicating potential inline markup, all
466    /// inline markup passes are run on the remainder of the chain.
467    ///
468    /// Note: there's some potential for optimization here, but that's future work.
469    pub(crate) fn handle_inline(&mut self, callbacks: &mut dyn ParserCallbacks<'input>) {
470        self.handle_inline_pass1(callbacks);
471        self.handle_emphasis_and_hard_break();
472    }
473
474    /// Handle inline HTML, code spans, and links.
475    ///
476    /// This function handles both inline HTML and code spans, because they have
477    /// the same precedence. It also handles links, even though they have lower
478    /// precedence, because the URL of links must not be processed.
479    fn handle_inline_pass1(&mut self, callbacks: &mut dyn ParserCallbacks<'input>) {
480        let mut cur = self.tree.cur();
481        let mut prev = None;
482
483        let block_end = self.tree[self.tree.peek_up().unwrap()].item.end;
484        let block_text = &self.text[..block_end];
485
486        while let Some(mut cur_ix) = cur {
487            match self.tree[cur_ix].item.body {
488                ItemBody::MaybeHtml => {
489                    // MDX inline JSX: check before HTML
490                    if self.options.contains(Options::ENABLE_MDX) {
491                        let start = self.tree[cur_ix].item.start;
492                        let next_byte = block_text.as_bytes().get(start + 1).copied();
493
494                        // In MDX, `<!` is not valid (no HTML comments).
495                        if next_byte == Some(b'!') {
496                            self.mdx_errors.push((
497                                start,
498                                "Unexpected character `!` (U+0021) before name, expected a \
499                                 character that can start a name, such as a letter, `$`, or `_` \
500                                 (note: to create a comment in MDX, use `{/* text */}`)"
501                                    .to_string(),
502                            ));
503                            self.tree[cur_ix].item.body = ItemBody::Text {
504                                backslash_escaped: false,
505                            };
506                            prev = cur;
507                            cur = self.tree[cur_ix].next;
508                            continue;
509                        }
510
511                        if let Some(total_len) =
512                            scan_mdx_inline_jsx(&block_text.as_bytes()[start..])
513                        {
514                            let end = start + total_len;
515                            let node = scan_nodes_to_ix(&self.tree, self.tree[cur_ix].next, end);
516                            let raw = &block_text[start..end];
517                            let col = crate::mdx::column_at(block_text.as_bytes(), start);
518                            let jsx_data = crate::mdx::parse_jsx_tag_with_column(raw, col);
519                            let jsx_ix = self.allocs.allocate_jsx_element(jsx_data);
520                            self.tree[cur_ix].item.body = ItemBody::MdxJsxTextElement(jsx_ix);
521                            self.tree[cur_ix].item.end = end;
522                            self.tree[cur_ix].next = node;
523                            prev = cur;
524                            cur = node;
525                            if let Some(node_ix) = cur {
526                                self.tree[node_ix].item.start =
527                                    max(self.tree[node_ix].item.start, end);
528                            }
529                            continue;
530                        }
531
532                        // In MDX, `<` followed by a letter, `/`, or `>` must be
533                        // valid JSX.  If the JSX scan failed, record an error.
534                        if matches!(next_byte, Some(b'a'..=b'z' | b'A'..=b'Z' | b'/' | b'>')) {
535                            self.mdx_errors.push((
536                                start,
537                                "Unexpected character after `<`, expected a valid JSX tag \
538                                 (note: to create a link in MDX, use `[text](url)`)"
539                                    .to_string(),
540                            ));
541                        }
542
543                        self.tree[cur_ix].item.body = ItemBody::Text {
544                            backslash_escaped: false,
545                        };
546                        prev = cur;
547                        cur = self.tree[cur_ix].next;
548                        continue;
549                    }
550
551                    let next = self.tree[cur_ix].next;
552                    let autolink = if let Some(next_ix) = next {
553                        scan_autolink(block_text, self.tree[next_ix].item.start)
554                    } else {
555                        None
556                    };
557
558                    if let Some((ix, uri, link_type)) = autolink {
559                        let node = scan_nodes_to_ix(&self.tree, next, ix);
560                        let text_node = self.tree.create_node(Item {
561                            start: self.tree[cur_ix].item.start + 1,
562                            end: ix - 1,
563                            body: ItemBody::Text {
564                                backslash_escaped: false,
565                            },
566                        });
567                        let link_ix =
568                            self.allocs
569                                .allocate_link(link_type, uri, "".into(), "".into());
570                        self.tree[cur_ix].item.body = ItemBody::Link(link_ix);
571                        self.tree[cur_ix].item.end = ix;
572                        self.tree[cur_ix].next = node;
573                        self.tree[cur_ix].child = Some(text_node);
574                        prev = cur;
575                        cur = node;
576                        if let Some(node_ix) = cur {
577                            self.tree[node_ix].item.start = max(self.tree[node_ix].item.start, ix);
578                        }
579                        continue;
580                    } else {
581                        let inline_html = next.and_then(|next_ix| {
582                            self.scan_inline_html(
583                                block_text.as_bytes(),
584                                self.tree[next_ix].item.start,
585                            )
586                        });
587                        if let Some((span, ix)) = inline_html {
588                            let node = scan_nodes_to_ix(&self.tree, next, ix);
589                            self.tree[cur_ix].item.body = if !span.is_empty() {
590                                let converted_string =
591                                    String::from_utf8(span).expect("invalid utf8");
592                                ItemBody::OwnedInlineHtml(
593                                    self.allocs.allocate_cow(converted_string.into()),
594                                )
595                            } else {
596                                ItemBody::InlineHtml
597                            };
598                            self.tree[cur_ix].item.end = ix;
599                            self.tree[cur_ix].next = node;
600                            prev = cur;
601                            cur = node;
602                            if let Some(node_ix) = cur {
603                                self.tree[node_ix].item.start =
604                                    max(self.tree[node_ix].item.start, ix);
605                            }
606                            continue;
607                        }
608                    }
609                    self.tree[cur_ix].item.body = ItemBody::Text {
610                        backslash_escaped: false,
611                    };
612                }
613                ItemBody::MaybeMath(preceded_by_backslash, _brace_context) => {
614                    if preceded_by_backslash {
615                        self.tree[cur_ix].item.body = ItemBody::Text {
616                            backslash_escaped: true,
617                        };
618                        prev = cur;
619                        cur = self.tree[cur_ix].next;
620                        continue;
621                    }
622                    // Count consecutive $ from the opening position
623                    let mut open_count = 1usize;
624                    let mut open_end = cur_ix;
625                    {
626                        let mut peek = self.tree[cur_ix].next;
627                        while let Some(peek_ix) = peek {
628                            if matches!(self.tree[peek_ix].item.body, ItemBody::MaybeMath(..))
629                                && self.tree[peek_ix].item.start == self.tree[open_end].item.end
630                            {
631                                open_count += 1;
632                                open_end = peek_ix;
633                                peek = self.tree[peek_ix].next;
634                            } else {
635                                break;
636                            }
637                        }
638                    }
639
640                    // Scan forward for a matching run of the same count
641                    let mut scan = self.tree[open_end].next;
642                    let mut close_ix = None;
643                    while let Some(scan_ix) = scan {
644                        if matches!(self.tree[scan_ix].item.body, ItemBody::MaybeMath(..)) {
645                            let mut run = 1usize;
646                            let mut run_end = scan_ix;
647                            let mut peek = self.tree[scan_ix].next;
648                            while let Some(peek_ix) = peek {
649                                if matches!(self.tree[peek_ix].item.body, ItemBody::MaybeMath(..))
650                                    && self.tree[peek_ix].item.start == self.tree[run_end].item.end
651                                {
652                                    run += 1;
653                                    run_end = peek_ix;
654                                    peek = self.tree[peek_ix].next;
655                                } else {
656                                    break;
657                                }
658                            }
659                            if run == open_count {
660                                close_ix = Some(scan_ix);
661                                break;
662                            }
663                            // Skip past this non-matching run
664                            scan = self.tree[run_end].next;
665                            continue;
666                        }
667                        scan = self.tree[scan_ix].next;
668                    }
669
670                    if let Some(scan_ix) = close_ix {
671                        self.make_math_span(cur_ix, scan_ix);
672                    } else {
673                        let mut fail_ix = cur_ix;
674                        loop {
675                            self.tree[fail_ix].item.body = ItemBody::Text {
676                                backslash_escaped: false,
677                            };
678                            if fail_ix == open_end {
679                                break;
680                            }
681                            if let Some(next) = self.tree[fail_ix].next {
682                                fail_ix = next;
683                            } else {
684                                break;
685                            }
686                        }
687                    }
688                }
689                ItemBody::MaybeCode(mut search_count, preceded_by_backslash) => {
690                    if preceded_by_backslash {
691                        search_count -= 1;
692                        if search_count == 0 {
693                            self.tree[cur_ix].item.body = ItemBody::Text {
694                                backslash_escaped: true,
695                            };
696                            prev = cur;
697                            cur = self.tree[cur_ix].next;
698                            continue;
699                        }
700                    }
701
702                    if self.code_delims.is_populated() {
703                        // we have previously scanned all codeblock delimiters,
704                        // so we can reuse that work
705                        if let Some(scan_ix) = self.code_delims.find(cur_ix, search_count) {
706                            self.make_code_span(cur_ix, scan_ix, preceded_by_backslash);
707                        } else {
708                            self.tree[cur_ix].item.body = ItemBody::Text {
709                                backslash_escaped: preceded_by_backslash,
710                            };
711                        }
712                    } else {
713                        // we haven't previously scanned all codeblock delimiters,
714                        // so walk the AST
715                        let mut scan = if search_count > 0 {
716                            self.tree[cur_ix].next
717                        } else {
718                            None
719                        };
720                        while let Some(scan_ix) = scan {
721                            if let ItemBody::MaybeCode(delim_count, _) =
722                                self.tree[scan_ix].item.body
723                            {
724                                if search_count == delim_count {
725                                    self.make_code_span(cur_ix, scan_ix, preceded_by_backslash);
726                                    self.code_delims.clear();
727                                    break;
728                                } else {
729                                    self.code_delims.insert(delim_count, scan_ix);
730                                }
731                            }
732                            scan = self.tree[scan_ix].next;
733                        }
734                        if scan.is_none() {
735                            self.tree[cur_ix].item.body = ItemBody::Text {
736                                backslash_escaped: preceded_by_backslash,
737                            };
738                        }
739                    }
740                }
741                ItemBody::MaybeLinkOpen => {
742                    self.tree[cur_ix].item.body = ItemBody::Text {
743                        backslash_escaped: false,
744                    };
745                    let link_open_doubled = self.tree[cur_ix]
746                        .next
747                        .map(|ix| self.tree[ix].item.body == ItemBody::MaybeLinkOpen)
748                        .unwrap_or(false);
749                    if self.options.contains(Options::ENABLE_WIKILINKS) && link_open_doubled {
750                        self.wikilink_stack.push(LinkStackEl {
751                            node: cur_ix,
752                            ty: LinkStackTy::Link,
753                        });
754                    }
755                    self.link_stack.push(LinkStackEl {
756                        node: cur_ix,
757                        ty: LinkStackTy::Link,
758                    });
759                }
760                ItemBody::MaybeImage => {
761                    self.tree[cur_ix].item.body = ItemBody::Text {
762                        backslash_escaped: false,
763                    };
764                    let link_open_doubled = self.tree[cur_ix]
765                        .next
766                        .map(|ix| self.tree[ix].item.body == ItemBody::MaybeLinkOpen)
767                        .unwrap_or(false);
768                    if self.options.contains(Options::ENABLE_WIKILINKS) && link_open_doubled {
769                        self.wikilink_stack.push(LinkStackEl {
770                            node: cur_ix,
771                            ty: LinkStackTy::Image,
772                        });
773                    }
774                    self.link_stack.push(LinkStackEl {
775                        node: cur_ix,
776                        ty: LinkStackTy::Image,
777                    });
778                }
779                ItemBody::MaybeLinkClose(could_be_ref) => {
780                    self.tree[cur_ix].item.body = ItemBody::Text {
781                        backslash_escaped: false,
782                    };
783                    let tos_link = self.link_stack.pop();
784                    if self.options.contains(Options::ENABLE_WIKILINKS)
785                        && self.tree[cur_ix]
786                            .next
787                            .map(|ix| {
788                                matches!(self.tree[ix].item.body, ItemBody::MaybeLinkClose(..))
789                            })
790                            .unwrap_or(false)
791                    {
792                        if let Some(node) = self.handle_wikilink(block_text, cur_ix, prev) {
793                            cur = self.tree[node].next;
794                            continue;
795                        }
796                    }
797                    if let Some(tos) = tos_link {
798                        // skip rendering if already in a link, unless its an
799                        // image
800                        if tos.ty != LinkStackTy::Image
801                            && matches!(
802                                self.tree[self.tree.peek_up().unwrap()].item.body,
803                                ItemBody::Link(..)
804                            )
805                        {
806                            continue;
807                        }
808                        if tos.ty == LinkStackTy::Disabled {
809                            continue;
810                        }
811                        let next = self.tree[cur_ix].next;
812                        if let Some((next_ix, url, title)) =
813                            self.scan_inline_link(block_text, self.tree[cur_ix].item.end, next)
814                        {
815                            let next_node = scan_nodes_to_ix(&self.tree, next, next_ix);
816                            if let Some(prev_ix) = prev {
817                                self.tree[prev_ix].next = None;
818                            }
819                            cur = Some(tos.node);
820                            cur_ix = tos.node;
821                            let link_ix =
822                                self.allocs
823                                    .allocate_link(LinkType::Inline, url, title, "".into());
824                            self.tree[cur_ix].item.body = if tos.ty == LinkStackTy::Image {
825                                ItemBody::Image(link_ix)
826                            } else {
827                                ItemBody::Link(link_ix)
828                            };
829                            self.tree[cur_ix].child = self.tree[cur_ix].next;
830                            self.tree[cur_ix].next = next_node;
831                            self.tree[cur_ix].item.end = next_ix;
832                            if let Some(next_node_ix) = next_node {
833                                self.tree[next_node_ix].item.start =
834                                    max(self.tree[next_node_ix].item.start, next_ix);
835                            }
836
837                            if tos.ty == LinkStackTy::Link {
838                                self.disable_all_links();
839                            }
840                        } else {
841                            // Footnote-first check: if the first bracket content is
842                            // `[^X]` where `X` has a matching footnote definition,
843                            // emit a FootnoteReference regardless of what follows.
844                            // Otherwise `[^X][Y]` would be resolved as a link whose
845                            // text happens to start with `^`, which diverges from
846                            // remark-gfm's two-node parse (footnote + trailing ref).
847                            let first_bracket_start = self.tree[tos.node].item.start;
848                            let first_bracket_end = self.tree[cur_ix].item.end;
849                            let first_bracket_text =
850                                &self.text[first_bracket_start..first_bracket_end];
851                            if let Some((_, ReferenceLabel::Footnote(footlabel))) =
852                                scan_link_label(&self.tree, first_bracket_text, self.options)
853                            {
854                                if self.allocs.footdefs.contains(&footlabel) {
855                                    let footref = self.allocs.allocate_cow(footlabel);
856                                    if let Some(def) = self
857                                        .allocs
858                                        .footdefs
859                                        .get_mut(self.allocs.cows[footref.0].to_owned())
860                                    {
861                                        def.use_count += 1;
862                                    }
863                                    let footnote_ix = if tos.ty == LinkStackTy::Image {
864                                        self.tree[tos.node].next = Some(cur_ix);
865                                        self.tree[tos.node].child = None;
866                                        self.tree[tos.node].item.body =
867                                            ItemBody::SynthesizeChar('!');
868                                        self.tree[cur_ix].item.start =
869                                            self.tree[tos.node].item.start + 1;
870                                        self.tree[tos.node].item.end =
871                                            self.tree[tos.node].item.start + 1;
872                                        cur_ix
873                                    } else {
874                                        tos.node
875                                    };
876                                    self.tree[footnote_ix].next = next;
877                                    self.tree[footnote_ix].child = None;
878                                    self.tree[footnote_ix].item.body =
879                                        ItemBody::FootnoteReference(footref);
880                                    self.tree[footnote_ix].item.end = first_bracket_end;
881                                    prev = Some(footnote_ix);
882                                    cur = next;
883                                    self.link_stack.clear();
884                                    continue;
885                                }
886                            }
887                            // ok, so its not an inline link. maybe it is a reference
888                            // to a defined link?
889                            let scan_result =
890                                scan_reference(&self.tree, block_text, next, self.options);
891                            let (node_after_link, link_type) = match scan_result {
892                                // [label][reference]
893                                RefScan::LinkLabel(_, end_ix) => {
894                                    // Toggle reference viability of the last closing bracket,
895                                    // so that we can skip it on future iterations in case
896                                    // it fails in this one. In particular, we won't call
897                                    // the broken link callback twice on one reference.
898                                    let reference_close_node = if let Some(node) =
899                                        scan_nodes_to_ix(&self.tree, next, end_ix - 1)
900                                    {
901                                        node
902                                    } else {
903                                        continue;
904                                    };
905                                    self.tree[reference_close_node].item.body =
906                                        ItemBody::MaybeLinkClose(false);
907                                    let next_node = self.tree[reference_close_node].next;
908
909                                    (next_node, LinkType::Reference)
910                                }
911                                // [reference][]
912                                RefScan::Collapsed(next_node) => {
913                                    // This reference has already been tried, and it's not
914                                    // valid. Skip it.
915                                    if !could_be_ref {
916                                        continue;
917                                    }
918                                    (next_node, LinkType::Collapsed)
919                                }
920                                // [X][^Y] — full-reference form with a footnote-shaped
921                                // second label. Per CommonMark the full-ref has to
922                                // resolve to a link definition, which `^Y` never will;
923                                // shortcut fallback is NOT tried. Leave both brackets
924                                // literal and let `[^Y]` be parsed as a footnote on
925                                // its own MaybeLinkClose iteration.
926                                RefScan::UnexpectedFootnote => continue,
927                                // [shortcut]
928                                //
929                                // [shortcut]: /blah
930                                RefScan::Failed => {
931                                    if !could_be_ref {
932                                        continue;
933                                    }
934                                    (next, LinkType::Shortcut)
935                                }
936                            };
937
938                            // FIXME: references and labels are mixed in the naming of variables
939                            // below. Disambiguate!
940
941                            // (label, source_ix end)
942                            let label: Option<(ReferenceLabel<'input>, usize)> = match scan_result {
943                                RefScan::LinkLabel(l, end_ix) => {
944                                    Some((ReferenceLabel::Link(l), end_ix))
945                                }
946                                RefScan::Collapsed(..)
947                                | RefScan::Failed
948                                | RefScan::UnexpectedFootnote => {
949                                    // No label? maybe it is a shortcut reference
950                                    let label_start = self.tree[tos.node].item.end - 1;
951                                    let label_end = self.tree[cur_ix].item.end;
952                                    scan_link_label(
953                                        &self.tree,
954                                        &self.text[label_start..label_end],
955                                        self.options,
956                                    )
957                                    .map(|(ix, label)| (label, label_start + ix))
958                                    .filter(|(_, end)| *end == label_end)
959                                }
960                            };
961
962                            let id = match &label {
963                                Some(
964                                    (ReferenceLabel::Link(l), _) | (ReferenceLabel::Footnote(l), _),
965                                ) => l.clone(),
966                                None => "".into(),
967                            };
968
969                            // see if it's a footnote reference
970                            if let Some((ReferenceLabel::Footnote(l), end)) = label {
971                                let footref = self.allocs.allocate_cow(l);
972                                if let Some(def) = self
973                                    .allocs
974                                    .footdefs
975                                    .get_mut(self.allocs.cows[footref.0].to_owned())
976                                {
977                                    def.use_count += 1;
978                                }
979                                if self.allocs.footdefs.contains(&self.allocs.cows[footref.0]) {
980                                    // If this came from a MaybeImage, then the `!` prefix
981                                    // isn't part of the footnote reference.
982                                    let footnote_ix = if tos.ty == LinkStackTy::Image {
983                                        self.tree[tos.node].next = Some(cur_ix);
984                                        self.tree[tos.node].child = None;
985                                        self.tree[tos.node].item.body =
986                                            ItemBody::SynthesizeChar('!');
987                                        self.tree[cur_ix].item.start =
988                                            self.tree[tos.node].item.start + 1;
989                                        self.tree[tos.node].item.end =
990                                            self.tree[tos.node].item.start + 1;
991                                        cur_ix
992                                    } else {
993                                        tos.node
994                                    };
995                                    // use `next` instead of `node_after_link` because
996                                    // node_after_link is calculated for a [collapsed][] link,
997                                    // which footnotes don't support.
998                                    self.tree[footnote_ix].next = next;
999                                    self.tree[footnote_ix].child = None;
1000                                    self.tree[footnote_ix].item.body =
1001                                        ItemBody::FootnoteReference(footref);
1002                                    self.tree[footnote_ix].item.end = end;
1003                                    prev = Some(footnote_ix);
1004                                    cur = next;
1005                                    self.link_stack.clear();
1006                                    continue;
1007                                }
1008                            } else if let Some((ReferenceLabel::Link(link_label), end)) = label {
1009                                if let Some((def_link_type, url, title)) = self
1010                                    .fetch_link_type_url_title(
1011                                        link_label,
1012                                        (self.tree[tos.node].item.start)..end,
1013                                        link_type,
1014                                        callbacks,
1015                                    )
1016                                {
1017                                    let link_ix =
1018                                        self.allocs.allocate_link(def_link_type, url, title, id);
1019                                    self.tree[tos.node].item.body = if tos.ty == LinkStackTy::Image
1020                                    {
1021                                        ItemBody::Image(link_ix)
1022                                    } else {
1023                                        ItemBody::Link(link_ix)
1024                                    };
1025                                    let label_node = self.tree[tos.node].next;
1026
1027                                    // lets do some tree surgery to add the link to the tree
1028                                    // 1st: skip the label node and close node
1029                                    self.tree[tos.node].next = node_after_link;
1030
1031                                    // then, if it exists, add the label node as a child to the link node
1032                                    if label_node != cur {
1033                                        self.tree[tos.node].child = label_node;
1034
1035                                        // finally: disconnect list of children
1036                                        if let Some(prev_ix) = prev {
1037                                            self.tree[prev_ix].next = None;
1038                                        }
1039                                    }
1040
1041                                    self.tree[tos.node].item.end = end;
1042
1043                                    // set up cur so next node will be node_after_link
1044                                    cur = Some(tos.node);
1045                                    cur_ix = tos.node;
1046
1047                                    if tos.ty == LinkStackTy::Link {
1048                                        self.disable_all_links();
1049                                    }
1050                                }
1051                            }
1052                        }
1053                    }
1054                }
1055                _ => {}
1056            }
1057            prev = cur;
1058            cur = self.tree[cur_ix].next;
1059        }
1060        self.link_stack.clear();
1061        self.wikilink_stack.clear();
1062        self.code_delims.clear();
1063        self.math_delims.clear();
1064    }
1065
1066    /// Handles a wikilink.
1067    ///
1068    /// This function may bail early in case the link is malformed, so this
1069    /// acts as a control flow guard. Returns the link node if a wikilink was
1070    /// found and created.
1071    fn handle_wikilink(
1072        &mut self,
1073        block_text: &'input str,
1074        cur_ix: TreeIndex,
1075        prev: Option<TreeIndex>,
1076    ) -> Option<TreeIndex> {
1077        let next_ix = self.tree[cur_ix].next.unwrap();
1078        // this is a wikilink closing delim, try popping from
1079        // the wikilink stack
1080        if let Some(tos) = self.wikilink_stack.pop() {
1081            if tos.ty == LinkStackTy::Disabled {
1082                return None;
1083            }
1084            // fetches the beginning of the wikilink body
1085            let Some(body_node) = self.tree[tos.node].next.and_then(|ix| self.tree[ix].next) else {
1086                // skip if no next node exists, like at end of input
1087                return None;
1088            };
1089            let start_ix = self.tree[body_node].item.start;
1090            let end_ix = self.tree[cur_ix].item.start;
1091            let wikilink = match scan_wikilink_pipe(
1092                block_text,
1093                start_ix, // bounded by closing tag
1094                end_ix - start_ix,
1095            ) {
1096                Some((rest, wikitext)) => {
1097                    // bail early if the wikiname would be empty
1098                    if wikitext.is_empty() {
1099                        return None;
1100                    }
1101                    // [[WikiName|rest]]
1102                    let body_node = scan_nodes_to_ix(&self.tree, Some(body_node), rest);
1103                    if let Some(body_node) = body_node {
1104                        // break node so passes can actually format
1105                        // the display text
1106                        self.tree[body_node].item.start = rest;
1107                        Some((true, body_node, wikitext))
1108                    } else {
1109                        None
1110                    }
1111                }
1112                None => {
1113                    let wikitext = &block_text[start_ix..end_ix];
1114                    // bail early if the wikiname would be empty
1115                    if wikitext.is_empty() {
1116                        return None;
1117                    }
1118                    let body_node = self.tree.create_node(Item {
1119                        start: start_ix,
1120                        end: end_ix,
1121                        body: ItemBody::Text {
1122                            backslash_escaped: false,
1123                        },
1124                    });
1125                    Some((false, body_node, wikitext))
1126                }
1127            };
1128
1129            if let Some((has_pothole, body_node, wikiname)) = wikilink {
1130                let link_ix = self.allocs.allocate_link(
1131                    LinkType::WikiLink { has_pothole },
1132                    wikiname.into(),
1133                    "".into(),
1134                    "".into(),
1135                );
1136                if let Some(prev_ix) = prev {
1137                    self.tree[prev_ix].next = None;
1138                }
1139                if tos.ty == LinkStackTy::Image {
1140                    self.tree[tos.node].item.body = ItemBody::Image(link_ix);
1141                } else {
1142                    self.tree[tos.node].item.body = ItemBody::Link(link_ix);
1143                }
1144                self.tree[tos.node].child = Some(body_node);
1145                self.tree[tos.node].next = self.tree[next_ix].next;
1146                self.tree[tos.node].item.end = end_ix + 2;
1147                self.disable_all_links();
1148                return Some(tos.node);
1149            }
1150        }
1151
1152        None
1153    }
1154
1155    fn handle_emphasis_and_hard_break(&mut self) {
1156        let mut prev = None;
1157        let mut prev_ix: TreeIndex;
1158        let mut cur = self.tree.cur();
1159
1160        let mut single_quote_open: Option<TreeIndex> = None;
1161        let mut double_quote_open: bool = false;
1162
1163        while let Some(mut cur_ix) = cur {
1164            match self.tree[cur_ix].item.body {
1165                ItemBody::MaybeEmphasis(mut count, can_open, can_close) => {
1166                    let run_length = count;
1167                    let c = self.text.as_bytes()[self.tree[cur_ix].item.start];
1168                    let both = can_open && can_close;
1169                    if can_close {
1170                        while let Some(el) =
1171                            self.inline_stack
1172                                .find_match(&mut self.tree, c, run_length, both)
1173                        {
1174                            // have a match!
1175                            if let Some(prev_ix) = prev {
1176                                self.tree[prev_ix].next = None;
1177                            }
1178                            let match_count = min(count, el.count);
1179                            // start, end are tree node indices
1180                            let mut end = cur_ix - 1;
1181                            let mut start = el.start + el.count;
1182
1183                            // work from the inside out
1184                            while start > el.start + el.count - match_count {
1185                                let inc = if start > el.start + el.count - match_count + 1 {
1186                                    2
1187                                } else {
1188                                    1
1189                                };
1190                                let ty = if c == b'~' {
1191                                    if inc == 2 {
1192                                        if self.options.contains(Options::ENABLE_STRIKETHROUGH) {
1193                                            ItemBody::Strikethrough
1194                                        } else {
1195                                            ItemBody::Text {
1196                                                backslash_escaped: false,
1197                                            }
1198                                        }
1199                                    } else if self.options.contains(Options::ENABLE_SUBSCRIPT) {
1200                                        ItemBody::Subscript
1201                                    } else if self.options.contains(Options::ENABLE_STRIKETHROUGH) {
1202                                        ItemBody::Strikethrough
1203                                    } else {
1204                                        ItemBody::Text {
1205                                            backslash_escaped: false,
1206                                        }
1207                                    }
1208                                } else if c == b'^' {
1209                                    if self.options.contains(Options::ENABLE_SUPERSCRIPT) {
1210                                        ItemBody::Superscript
1211                                    } else {
1212                                        ItemBody::Text {
1213                                            backslash_escaped: false,
1214                                        }
1215                                    }
1216                                } else if inc == 2 {
1217                                    ItemBody::Strong
1218                                } else {
1219                                    ItemBody::Emphasis
1220                                };
1221
1222                                let root = start - inc;
1223                                end = end + inc;
1224                                self.tree[root].item.body = ty;
1225                                self.tree[root].item.end = self.tree[end].item.end;
1226                                self.tree[root].child = Some(start);
1227                                self.tree[root].next = None;
1228                                start = root;
1229                            }
1230
1231                            // set next for top most emph level
1232                            prev_ix = el.start + el.count - match_count;
1233                            prev = Some(prev_ix);
1234                            cur = self.tree[cur_ix + match_count - 1].next;
1235                            self.tree[prev_ix].next = cur;
1236
1237                            if el.count > match_count {
1238                                self.inline_stack.push(InlineEl {
1239                                    start: el.start,
1240                                    count: el.count - match_count,
1241                                    run_length: el.run_length,
1242                                    c: el.c,
1243                                    both: el.both,
1244                                })
1245                            }
1246                            count -= match_count;
1247                            if count > 0 {
1248                                cur_ix = cur.unwrap();
1249                            } else {
1250                                break;
1251                            }
1252                        }
1253                    }
1254                    if count > 0 {
1255                        if can_open {
1256                            self.inline_stack.push(InlineEl {
1257                                start: cur_ix,
1258                                run_length,
1259                                count,
1260                                c,
1261                                both,
1262                            });
1263                        } else {
1264                            for i in 0..count {
1265                                self.tree[cur_ix + i].item.body = ItemBody::Text {
1266                                    backslash_escaped: false,
1267                                };
1268                            }
1269                        }
1270                        prev_ix = cur_ix + count - 1;
1271                        prev = Some(prev_ix);
1272                        cur = self.tree[prev_ix].next;
1273                    }
1274                }
1275                ItemBody::MaybeSmartQuote(c, can_open, can_close) => {
1276                    self.tree[cur_ix].item.body = match c {
1277                        b'\'' => {
1278                            if let (Some(open_ix), true) = (single_quote_open, can_close) {
1279                                self.tree[open_ix].item.body = ItemBody::SynthesizeChar('‘');
1280                                single_quote_open = None;
1281                            } else if can_open {
1282                                single_quote_open = Some(cur_ix);
1283                            }
1284                            ItemBody::SynthesizeChar('’')
1285                        }
1286                        _ /* double quote */ => {
1287                            if can_close && double_quote_open {
1288                                double_quote_open = false;
1289                                ItemBody::SynthesizeChar('”')
1290                            } else {
1291                                if can_open && !double_quote_open {
1292                                    double_quote_open = true;
1293                                }
1294                                ItemBody::SynthesizeChar('“')
1295                            }
1296                        }
1297                    };
1298                    prev = cur;
1299                    cur = self.tree[cur_ix].next;
1300                }
1301                ItemBody::HardBreak(true) => {
1302                    if self.tree[cur_ix].next.is_none() {
1303                        self.tree[cur_ix].item.body = ItemBody::SynthesizeChar('\\');
1304                    }
1305                    prev = cur;
1306                    cur = self.tree[cur_ix].next;
1307                }
1308                _ => {
1309                    prev = cur;
1310                    cur = self.tree[cur_ix].next;
1311                }
1312            }
1313        }
1314        self.inline_stack.pop_all(&mut self.tree);
1315    }
1316
1317    fn disable_all_links(&mut self) {
1318        self.link_stack.disable_all_links();
1319        self.wikilink_stack.disable_all_links();
1320    }
1321
1322    /// Returns next byte index, url and title.
1323    fn scan_inline_link(
1324        &self,
1325        underlying: &'input str,
1326        mut ix: usize,
1327        node: Option<TreeIndex>,
1328    ) -> Option<(usize, CowStr<'input>, CowStr<'input>)> {
1329        if underlying.as_bytes().get(ix) != Some(&b'(') {
1330            return None;
1331        }
1332        ix += 1;
1333
1334        let scan_separator = |ix: &mut usize| {
1335            *ix += scan_while(&underlying.as_bytes()[*ix..], is_ascii_whitespace_no_nl);
1336            if let Some(bl) = scan_eol(&underlying.as_bytes()[*ix..]) {
1337                *ix += bl;
1338                *ix += skip_container_prefixes(
1339                    &self.tree,
1340                    &underlying.as_bytes()[*ix..],
1341                    self.options,
1342                );
1343            }
1344            *ix += scan_while(&underlying.as_bytes()[*ix..], is_ascii_whitespace_no_nl);
1345        };
1346
1347        scan_separator(&mut ix);
1348
1349        let (dest_length, dest) = scan_link_dest(underlying, ix, LINK_MAX_NESTED_PARENS)?;
1350        let dest = unescape(dest, self.tree.is_in_table());
1351        ix += dest_length;
1352
1353        scan_separator(&mut ix);
1354
1355        let title = if let Some((bytes_scanned, t)) = self.scan_link_title(underlying, ix, node) {
1356            ix += bytes_scanned;
1357            scan_separator(&mut ix);
1358            t
1359        } else {
1360            "".into()
1361        };
1362        if underlying.as_bytes().get(ix) != Some(&b')') {
1363            return None;
1364        }
1365        ix += 1;
1366
1367        Some((ix, dest, title))
1368    }
1369
1370    // returns (bytes scanned, title cow)
1371    fn scan_link_title(
1372        &self,
1373        text: &'input str,
1374        start_ix: usize,
1375        node: Option<TreeIndex>,
1376    ) -> Option<(usize, CowStr<'input>)> {
1377        let bytes = text.as_bytes();
1378        let open = match bytes.get(start_ix) {
1379            Some(b @ b'\'') | Some(b @ b'\"') | Some(b @ b'(') => *b,
1380            _ => return None,
1381        };
1382        let close = if open == b'(' { b')' } else { open };
1383
1384        let mut title = String::new();
1385        let mut mark = start_ix + 1;
1386        let mut i = start_ix + 1;
1387
1388        while i < bytes.len() {
1389            let c = bytes[i];
1390
1391            if c == close {
1392                let cow = if title.is_empty() {
1393                    (i - start_ix + 1, text[mark..i].into())
1394                } else {
1395                    title.push_str(&text[mark..i]);
1396                    (i - start_ix + 1, title.into())
1397                };
1398
1399                return Some(cow);
1400            }
1401            if c == open {
1402                return None;
1403            }
1404
1405            if c == b'\n' || c == b'\r' {
1406                if let Some(node_ix) = scan_nodes_to_ix(&self.tree, node, i + 1) {
1407                    if self.tree[node_ix].item.start > i {
1408                        title.push_str(&text[mark..i]);
1409                        title.push('\n');
1410                        i = self.tree[node_ix].item.start;
1411                        mark = i;
1412                        continue;
1413                    }
1414                }
1415            }
1416            if c == b'&' {
1417                if let (n, Some(value)) = scan_entity(&bytes[i..]) {
1418                    title.push_str(&text[mark..i]);
1419                    title.push_str(&value);
1420                    i += n;
1421                    mark = i;
1422                    continue;
1423                }
1424            }
1425            if self.tree.is_in_table()
1426                && c == b'\\'
1427                && i + 2 < bytes.len()
1428                && bytes[i + 1] == b'\\'
1429                && bytes[i + 2] == b'|'
1430            {
1431                // this runs if there are an even number of pipes in a table
1432                // if it's odd, then it gets parsed as normal
1433                title.push_str(&text[mark..i]);
1434                i += 2;
1435                mark = i;
1436            }
1437            if c == b'\\' && i + 1 < bytes.len() && is_ascii_punctuation(bytes[i + 1]) {
1438                title.push_str(&text[mark..i]);
1439                i += 1;
1440                mark = i;
1441            }
1442
1443            i += 1;
1444        }
1445
1446        None
1447    }
1448
1449    fn make_math_span(&mut self, open: TreeIndex, close: TreeIndex) {
1450        // Find the end of the opening run of consecutive $ tokens
1451        let mut open_end = open;
1452        {
1453            let mut peek = self.tree[open].next;
1454            while let Some(peek_ix) = peek {
1455                if matches!(self.tree[peek_ix].item.body, ItemBody::MaybeMath(..))
1456                    && self.tree[peek_ix].item.start == self.tree[open_end].item.end
1457                    && peek_ix != close
1458                {
1459                    open_end = peek_ix;
1460                    peek = self.tree[peek_ix].next;
1461                } else {
1462                    break;
1463                }
1464            }
1465        }
1466        // Find the end of the closing run
1467        let mut close_end = close;
1468        {
1469            let mut peek = self.tree[close].next;
1470            while let Some(peek_ix) = peek {
1471                if matches!(self.tree[peek_ix].item.body, ItemBody::MaybeMath(..))
1472                    && self.tree[peek_ix].item.start == self.tree[close_end].item.end
1473                {
1474                    close_end = peek_ix;
1475                    peek = self.tree[peek_ix].next;
1476                } else {
1477                    break;
1478                }
1479            }
1480        }
1481
1482        let span_start = self.tree[open_end].item.end;
1483        let span_end = self.tree[close].item.start;
1484
1485        if span_start > span_end {
1486            self.tree[open].item.body = ItemBody::Text {
1487                backslash_escaped: false,
1488            };
1489            return;
1490        }
1491
1492        let spanned_text = &self.text[span_start..span_end];
1493        let spanned_bytes = spanned_text.as_bytes();
1494        let mut buf: Option<String> = None;
1495
1496        let mut start_ix = 0;
1497        let mut ix = 0;
1498        while ix < spanned_bytes.len() {
1499            let c = spanned_bytes[ix];
1500            if c == b'\r' || c == b'\n' {
1501                ix += 1;
1502                let buf = buf.get_or_insert_with(|| String::with_capacity(spanned_bytes.len()));
1503                buf.push_str(&spanned_text[start_ix..ix]);
1504                ix += skip_container_prefixes(&self.tree, &spanned_bytes[ix..], self.options);
1505                start_ix = ix;
1506            } else if c == b'\\'
1507                && spanned_bytes.get(ix + 1) == Some(&b'|')
1508                && self.tree.is_in_table()
1509            {
1510                let buf = buf.get_or_insert_with(|| String::with_capacity(spanned_bytes.len()));
1511                buf.push_str(&spanned_text[start_ix..ix]);
1512                buf.push('|');
1513                ix += 2;
1514                start_ix = ix;
1515            } else {
1516                ix += 1;
1517            }
1518        }
1519
1520        let (opening, closing, all_spaces) = {
1521            let s = if let Some(buf) = &mut buf {
1522                buf.push_str(&spanned_text[start_ix..]);
1523                &buf[..]
1524            } else {
1525                spanned_text
1526            };
1527            (
1528                matches!(s.as_bytes().first(), Some(b' ' | b'\n')),
1529                matches!(s.as_bytes().last(), Some(b' ' | b'\n')),
1530                s.bytes().all(|b| b == b' ' || b == b'\n'),
1531            )
1532        };
1533
1534        let cow: CowStr<'input> = if !all_spaces && opening && closing {
1535            if let Some(mut buf) = buf {
1536                if !buf.is_empty() {
1537                    buf.remove(0);
1538                    buf.pop();
1539                }
1540                buf.into()
1541            } else {
1542                spanned_text[1..(spanned_text.len() - 1).max(1)].into()
1543            }
1544        } else if let Some(buf) = buf {
1545            buf.into()
1546        } else {
1547            spanned_text.into()
1548        };
1549
1550        self.tree[open].item.body = ItemBody::Math(self.allocs.allocate_cow(cow), false);
1551        self.tree[open].item.end = self.tree[close_end].item.end;
1552        self.tree[open].next = self.tree[close_end].next;
1553    }
1554
1555    /// Make a code span.
1556    ///
1557    /// Both `open` and `close` are matching MaybeCode items.
1558    fn make_code_span(&mut self, open: TreeIndex, close: TreeIndex, preceding_backslash: bool) {
1559        let span_start = self.tree[open].item.end;
1560        let span_end = self.tree[close].item.start;
1561        let mut buf: Option<String> = None;
1562
1563        let spanned_text = &self.text[span_start..span_end];
1564        let spanned_bytes = spanned_text.as_bytes();
1565        let mut start_ix = 0;
1566        let mut ix = 0;
1567        while ix < spanned_bytes.len() {
1568            let c = spanned_bytes[ix];
1569            if c == b'\r' || c == b'\n' {
1570                let buf = buf.get_or_insert_with(|| String::with_capacity(spanned_bytes.len()));
1571                buf.push_str(&spanned_text[start_ix..ix]);
1572                buf.push('\n');
1573                ix += 1;
1574                if c == b'\r' && spanned_bytes.get(ix) == Some(&b'\n') {
1575                    ix += 1;
1576                }
1577                ix += skip_container_prefixes(&self.tree, &spanned_bytes[ix..], self.options);
1578                start_ix = ix;
1579            } else if c == b'\\'
1580                && spanned_bytes.get(ix + 1) == Some(&b'|')
1581                && self.tree.is_in_table()
1582            {
1583                let buf = buf.get_or_insert_with(|| String::with_capacity(spanned_bytes.len()));
1584                buf.push_str(&spanned_text[start_ix..ix]);
1585                buf.push('|');
1586                ix += 2;
1587                start_ix = ix;
1588            } else {
1589                ix += 1;
1590            }
1591        }
1592
1593        let (opening, closing, all_spaces) = {
1594            let s = if let Some(buf) = &mut buf {
1595                buf.push_str(&spanned_text[start_ix..]);
1596                &buf[..]
1597            } else {
1598                spanned_text
1599            };
1600            (
1601                matches!(s.as_bytes().first(), Some(b' ' | b'\n')),
1602                matches!(s.as_bytes().last(), Some(b' ' | b'\n')),
1603                s.bytes().all(|b| b == b' ' || b == b'\n'),
1604            )
1605        };
1606
1607        let cow: CowStr<'input> = if !all_spaces && opening && closing {
1608            if let Some(mut buf) = buf {
1609                if !buf.is_empty() {
1610                    buf.remove(0);
1611                    buf.pop();
1612                }
1613                buf.into()
1614            } else {
1615                spanned_text[1..(spanned_text.len() - 1).max(1)].into()
1616            }
1617        } else if let Some(buf) = buf {
1618            buf.into()
1619        } else {
1620            spanned_text.into()
1621        };
1622
1623        if preceding_backslash {
1624            self.tree[open].item.body = ItemBody::Text {
1625                backslash_escaped: true,
1626            };
1627            self.tree[open].item.end = self.tree[open].item.start + 1;
1628            self.tree[open].next = Some(close);
1629            self.tree[close].item.body = ItemBody::Code(self.allocs.allocate_cow(cow));
1630            self.tree[close].item.start = self.tree[open].item.start + 1;
1631        } else {
1632            self.tree[open].item.body = ItemBody::Code(self.allocs.allocate_cow(cow));
1633            self.tree[open].item.end = self.tree[close].item.end;
1634            self.tree[open].next = self.tree[close].next;
1635        }
1636
1637        // MDX: errors recorded in pass 1 for `{` inside what turned out to be a
1638        // code span are false positives — the `{` is literal text.
1639        if !self.mdx_errors.is_empty() {
1640            self.mdx_errors
1641                .retain(|(offset, _)| *offset < span_start || *offset >= span_end);
1642        }
1643    }
1644
1645    /// On success, returns a buffer containing the inline html and byte offset.
1646    /// When no bytes were skipped, the buffer will be empty and the html can be
1647    /// represented as a subslice of the input string.
1648    fn scan_inline_html(&mut self, bytes: &[u8], ix: usize) -> Option<(Vec<u8>, usize)> {
1649        let c = *bytes.get(ix)?;
1650        if c == b'!' {
1651            Some((
1652                vec![],
1653                scan_inline_html_comment(bytes, ix + 1, &mut self.html_scan_guard)?,
1654            ))
1655        } else if c == b'?' {
1656            Some((
1657                vec![],
1658                scan_inline_html_processing(bytes, ix + 1, &mut self.html_scan_guard)?,
1659            ))
1660        } else {
1661            let (span, i) = scan_html_block_inner(
1662                // Subtract 1 to include the < character
1663                &bytes[(ix - 1)..],
1664                Some(&|bytes| skip_container_prefixes(&self.tree, bytes, self.options)),
1665            )?;
1666            Some((span, i + ix - 1))
1667        }
1668    }
1669}
1670
1671/// Returns number of containers scanned.
1672pub(crate) fn scan_containers(
1673    tree: &Tree<Item>,
1674    line_start: &mut LineStart<'_>,
1675    options: Options,
1676) -> usize {
1677    let mut i = 0;
1678    for &node_ix in tree.walk_spine() {
1679        match tree[node_ix].item.body {
1680            ItemBody::BlockQuote(..) => {
1681                let save = line_start.clone();
1682                let _ = line_start.scan_space(3);
1683                if !line_start.scan_blockquote_marker() {
1684                    *line_start = save;
1685                    break;
1686                }
1687            }
1688            ItemBody::ListItem(indent, _) => {
1689                let save = line_start.clone();
1690                if !line_start.scan_space(indent) && !line_start.is_at_eol() {
1691                    *line_start = save;
1692                    break;
1693                }
1694            }
1695            ItemBody::DefinitionListDefinition(indent) => {
1696                let save = line_start.clone();
1697                if !line_start.scan_space(indent) && !line_start.is_at_eol() {
1698                    *line_start = save;
1699                    break;
1700                }
1701            }
1702            ItemBody::FootnoteDefinition(..) if options.contains(Options::ENABLE_FOOTNOTES) => {
1703                let save = line_start.clone();
1704                if !line_start.scan_space(4) && !line_start.is_at_eol() {
1705                    *line_start = save;
1706                    break;
1707                }
1708            }
1709            _ => (),
1710        }
1711        i += 1;
1712    }
1713    i
1714}
1715
1716pub(crate) fn skip_container_prefixes(tree: &Tree<Item>, bytes: &[u8], options: Options) -> usize {
1717    let mut line_start = LineStart::new(bytes);
1718    let _ = scan_containers(tree, &mut line_start, options);
1719    line_start.bytes_scanned()
1720}
1721
1722impl Tree<Item> {
1723    pub(crate) fn append_text(&mut self, start: usize, end: usize, backslash_escaped: bool) {
1724        if end > start {
1725            if let Some(ix) = self.cur() {
1726                if matches!(self[ix].item.body, ItemBody::Text { .. }) && self[ix].item.end == start
1727                {
1728                    self[ix].item.end = end;
1729                    return;
1730                }
1731            }
1732            self.append(Item {
1733                start,
1734                end,
1735                body: ItemBody::Text { backslash_escaped },
1736            });
1737        }
1738    }
1739    /// Returns true if the current node is inside a table.
1740    ///
1741    /// If `cur` is an ItemBody::Table, it would return false,
1742    /// but since the `TableRow` and `TableHead` and `TableCell`
1743    /// are children of the table, anything doing inline parsing
1744    /// doesn't need to care about that.
1745    pub(crate) fn is_in_table(&self) -> bool {
1746        fn might_be_in_table(item: &Item) -> bool {
1747            item.body.is_inline()
1748                || matches!(item.body, |ItemBody::TableHead| ItemBody::TableRow
1749                    | ItemBody::TableCell)
1750        }
1751        for &ix in self.walk_spine().rev() {
1752            if matches!(self[ix].item.body, ItemBody::Table(_)) {
1753                return true;
1754            }
1755            if !might_be_in_table(&self[ix].item) {
1756                return false;
1757            }
1758        }
1759        false
1760    }
1761}
1762
1763#[derive(Copy, Clone, Debug)]
1764struct InlineEl {
1765    /// offset of tree node
1766    start: TreeIndex,
1767    /// number of delimiters available for matching
1768    count: usize,
1769    /// length of the run that these delimiters came from
1770    run_length: usize,
1771    /// b'*', b'_', or b'~'
1772    c: u8,
1773    /// can both open and close
1774    both: bool,
1775}
1776
1777#[derive(Debug, Clone, Default)]
1778struct InlineStack {
1779    stack: Vec<InlineEl>,
1780    // Lower bounds for matching indices in the stack. For example
1781    // a strikethrough delimiter will never match with any element
1782    // in the stack with index smaller than
1783    // `lower_bounds[InlineStack::TILDES]`.
1784    lower_bounds: [usize; 10],
1785}
1786
1787impl InlineStack {
1788    /// These are indices into the lower bounds array.
1789    /// Not both refers to the property that the delimiter can not both
1790    /// be opener as a closer.
1791    const UNDERSCORE_NOT_BOTH: usize = 0;
1792    const ASTERISK_NOT_BOTH: usize = 1;
1793    const ASTERISK_BASE: usize = 2;
1794    const TILDES: usize = 5;
1795    const UNDERSCORE_BASE: usize = 6;
1796    const CIRCUMFLEXES: usize = 9;
1797
1798    fn pop_all(&mut self, tree: &mut Tree<Item>) {
1799        for el in self.stack.drain(..) {
1800            for i in 0..el.count {
1801                tree[el.start + i].item.body = ItemBody::Text {
1802                    backslash_escaped: false,
1803                };
1804            }
1805        }
1806        self.lower_bounds = [0; 10];
1807    }
1808
1809    fn get_lowerbound(&self, c: u8, count: usize, both: bool) -> usize {
1810        if c == b'_' {
1811            let mod3_lower = self.lower_bounds[InlineStack::UNDERSCORE_BASE + count % 3];
1812            if both {
1813                mod3_lower
1814            } else {
1815                min(
1816                    mod3_lower,
1817                    self.lower_bounds[InlineStack::UNDERSCORE_NOT_BOTH],
1818                )
1819            }
1820        } else if c == b'*' {
1821            let mod3_lower = self.lower_bounds[InlineStack::ASTERISK_BASE + count % 3];
1822            if both {
1823                mod3_lower
1824            } else {
1825                min(
1826                    mod3_lower,
1827                    self.lower_bounds[InlineStack::ASTERISK_NOT_BOTH],
1828                )
1829            }
1830        } else if c == b'^' {
1831            self.lower_bounds[InlineStack::CIRCUMFLEXES]
1832        } else {
1833            self.lower_bounds[InlineStack::TILDES]
1834        }
1835    }
1836
1837    fn set_lowerbound(&mut self, c: u8, count: usize, both: bool, new_bound: usize) {
1838        if c == b'_' {
1839            if both {
1840                self.lower_bounds[InlineStack::UNDERSCORE_BASE + count % 3] = new_bound;
1841            } else {
1842                self.lower_bounds[InlineStack::UNDERSCORE_NOT_BOTH] = new_bound;
1843            }
1844        } else if c == b'*' {
1845            self.lower_bounds[InlineStack::ASTERISK_BASE + count % 3] = new_bound;
1846            if !both {
1847                self.lower_bounds[InlineStack::ASTERISK_NOT_BOTH] = new_bound;
1848            }
1849        } else if c == b'^' {
1850            self.lower_bounds[InlineStack::CIRCUMFLEXES] = new_bound;
1851        } else {
1852            self.lower_bounds[InlineStack::TILDES] = new_bound;
1853        }
1854    }
1855
1856    fn truncate(&mut self, new_bound: usize) {
1857        self.stack.truncate(new_bound);
1858        for lower_bound in &mut self.lower_bounds {
1859            if *lower_bound > new_bound {
1860                *lower_bound = new_bound;
1861            }
1862        }
1863    }
1864
1865    fn find_match(
1866        &mut self,
1867        tree: &mut Tree<Item>,
1868        c: u8,
1869        run_length: usize,
1870        both: bool,
1871    ) -> Option<InlineEl> {
1872        let lowerbound = min(self.stack.len(), self.get_lowerbound(c, run_length, both));
1873        let res = self.stack[lowerbound..]
1874            .iter()
1875            .cloned()
1876            .enumerate()
1877            .rfind(|(_, el)| {
1878                if (c == b'~' || c == b'^') && run_length != el.run_length {
1879                    return false;
1880                }
1881                el.c == c
1882                    && (!both && !el.both
1883                        || !(run_length + el.run_length).is_multiple_of(3)
1884                        || run_length.is_multiple_of(3))
1885            });
1886
1887        if let Some((matching_ix, matching_el)) = res {
1888            let matching_ix = matching_ix + lowerbound;
1889            for el in &self.stack[(matching_ix + 1)..] {
1890                for i in 0..el.count {
1891                    tree[el.start + i].item.body = ItemBody::Text {
1892                        backslash_escaped: false,
1893                    };
1894                }
1895            }
1896            self.truncate(matching_ix);
1897            Some(matching_el)
1898        } else {
1899            // For `*`/`_`, the lower-bound optimisation is safe because their
1900            // matching rule (CM "rule of three") is monotonic across future
1901            // closers with the same count. Tildes/carets match strictly by
1902            // equal run-length, so a failure at run-length 2 must not close
1903            // the door on a later run-length 1 closer matching an earlier
1904            // run-length 1 opener still on the stack.
1905            if c != b'~' && c != b'^' {
1906                self.set_lowerbound(c, run_length, both, self.stack.len());
1907            }
1908            None
1909        }
1910    }
1911
1912    fn trim_lower_bound(&mut self, ix: usize) {
1913        self.lower_bounds[ix] = self.lower_bounds[ix].min(self.stack.len());
1914    }
1915
1916    fn push(&mut self, el: InlineEl) {
1917        if el.c == b'~' {
1918            self.trim_lower_bound(InlineStack::TILDES);
1919        } else if el.c == b'^' {
1920            self.trim_lower_bound(InlineStack::CIRCUMFLEXES);
1921        }
1922        self.stack.push(el)
1923    }
1924}
1925
1926#[derive(Debug, Clone)]
1927enum RefScan<'a> {
1928    // label, source ix of label end
1929    LinkLabel(CowStr<'a>, usize),
1930    // contains next node index
1931    Collapsed(Option<TreeIndex>),
1932    UnexpectedFootnote,
1933    Failed,
1934}
1935
1936/// Skips forward within a block to a node which spans (ends inclusive) the given
1937/// index into the source.
1938fn scan_nodes_to_ix(
1939    tree: &Tree<Item>,
1940    mut node: Option<TreeIndex>,
1941    ix: usize,
1942) -> Option<TreeIndex> {
1943    while let Some(node_ix) = node {
1944        if tree[node_ix].item.end <= ix {
1945            node = tree[node_ix].next;
1946        } else {
1947            break;
1948        }
1949    }
1950    node
1951}
1952
1953/// Scans an inline link label, which cannot be interrupted.
1954/// Returns number of bytes (including brackets) and label on success.
1955fn scan_link_label<'text>(
1956    tree: &Tree<Item>,
1957    text: &'text str,
1958    options: Options,
1959) -> Option<(usize, ReferenceLabel<'text>)> {
1960    let bytes = text.as_bytes();
1961    if bytes.len() < 2 || bytes[0] != b'[' {
1962        return None;
1963    }
1964    let linebreak_handler = |bytes: &[u8]| Some(skip_container_prefixes(tree, bytes, options));
1965    if options.contains(Options::ENABLE_FOOTNOTES)
1966        && b'^' == bytes[1]
1967        && bytes.get(2) != Some(&b']')
1968    {
1969        // GFM footnote labels don't wrap across line breaks.
1970        let linebreak_handler: &dyn Fn(&[u8]) -> Option<usize> = &|_| None;
1971        if let Some((byte_index, cow)) =
1972            scan_link_label_rest(&text[2..], linebreak_handler, tree.is_in_table())
1973        {
1974            return Some((byte_index + 2, ReferenceLabel::Footnote(cow)));
1975        }
1976    }
1977    let (byte_index, cow) =
1978        scan_link_label_rest(&text[1..], &linebreak_handler, tree.is_in_table())?;
1979    Some((byte_index + 1, ReferenceLabel::Link(cow)))
1980}
1981
1982fn scan_reference<'b>(
1983    tree: &Tree<Item>,
1984    text: &'b str,
1985    cur: Option<TreeIndex>,
1986    options: Options,
1987) -> RefScan<'b> {
1988    let cur_ix = match cur {
1989        None => return RefScan::Failed,
1990        Some(cur_ix) => cur_ix,
1991    };
1992    let start = tree[cur_ix].item.start;
1993    let tail = &text.as_bytes()[start..];
1994
1995    if tail.starts_with(b"[]") {
1996        // The trailing `]` of the collapsed reference must already exist as a
1997        // tree node — pulldown-cmark emits each bracket as its own item, and
1998        // we only reach here when `tail` already contains `]`. Defensive
1999        // fallback to `Failed` if that invariant is somehow broken.
2000        let Some(closing_node) = tree[cur_ix].next else {
2001            return RefScan::Failed;
2002        };
2003        RefScan::Collapsed(tree[closing_node].next)
2004    } else {
2005        let label = scan_link_label(tree, &text[start..], options);
2006        match label {
2007            Some((ix, ReferenceLabel::Link(label))) => RefScan::LinkLabel(label, start + ix),
2008            Some((_ix, ReferenceLabel::Footnote(_label))) => RefScan::UnexpectedFootnote,
2009            None => RefScan::Failed,
2010        }
2011    }
2012}
2013
2014#[derive(Clone, Default)]
2015struct LinkStack {
2016    inner: Vec<LinkStackEl>,
2017    disabled_ix: usize,
2018}
2019
2020impl LinkStack {
2021    fn push(&mut self, el: LinkStackEl) {
2022        self.inner.push(el);
2023    }
2024
2025    fn pop(&mut self) -> Option<LinkStackEl> {
2026        let el = self.inner.pop();
2027        self.disabled_ix = core::cmp::min(self.disabled_ix, self.inner.len());
2028        el
2029    }
2030
2031    fn clear(&mut self) {
2032        self.inner.clear();
2033        self.disabled_ix = 0;
2034    }
2035
2036    fn disable_all_links(&mut self) {
2037        for el in &mut self.inner[self.disabled_ix..] {
2038            if el.ty == LinkStackTy::Link {
2039                el.ty = LinkStackTy::Disabled;
2040            }
2041        }
2042        self.disabled_ix = self.inner.len();
2043    }
2044}
2045
2046#[derive(Clone, Debug)]
2047struct LinkStackEl {
2048    node: TreeIndex,
2049    ty: LinkStackTy,
2050}
2051
2052#[derive(PartialEq, Clone, Debug)]
2053enum LinkStackTy {
2054    Link,
2055    Image,
2056    Disabled,
2057}
2058
2059/// Contains the destination URL, title and source span of a reference definition.
2060#[derive(Clone, Debug)]
2061pub struct LinkDef<'a> {
2062    pub dest: CowStr<'a>,
2063    pub title: Option<CowStr<'a>>,
2064    pub span: Range<usize>,
2065}
2066
2067impl<'a> LinkDef<'a> {
2068    pub fn into_static(self) -> LinkDef<'static> {
2069        LinkDef {
2070            dest: self.dest.into_static(),
2071            title: self.title.map(|s| s.into_static()),
2072            span: self.span,
2073        }
2074    }
2075}
2076
2077/// Contains the destination URL, title and source span of a reference definition.
2078#[derive(Clone, Debug)]
2079pub struct FootnoteDef {
2080    pub use_count: usize,
2081}
2082
2083/// Tracks tree indices of code span delimiters of each length. It should prevent
2084/// quadratic scanning behaviours by providing (amortized) constant time lookups.
2085struct CodeDelims {
2086    inner: FxHashMap<usize, VecDeque<TreeIndex>>,
2087    seen_first: bool,
2088}
2089
2090impl CodeDelims {
2091    fn new() -> Self {
2092        Self {
2093            inner: Default::default(),
2094            seen_first: false,
2095        }
2096    }
2097
2098    fn insert(&mut self, count: usize, ix: TreeIndex) {
2099        if self.seen_first {
2100            self.inner.entry(count).or_default().push_back(ix);
2101        } else {
2102            // Skip the first insert, since that delimiter will always
2103            // be an opener and not a closer.
2104            self.seen_first = true;
2105        }
2106    }
2107
2108    fn is_populated(&self) -> bool {
2109        !self.inner.is_empty()
2110    }
2111
2112    fn find(&mut self, open_ix: TreeIndex, count: usize) -> Option<TreeIndex> {
2113        while let Some(ix) = self.inner.get_mut(&count)?.pop_front() {
2114            if ix > open_ix {
2115                return Some(ix);
2116            }
2117        }
2118        None
2119    }
2120
2121    fn clear(&mut self) {
2122        self.inner.clear();
2123        self.seen_first = false;
2124    }
2125}
2126
2127/// Tracks brace contexts and delimiter length for math delimiters.
2128/// Provides amortized constant-time lookups.
2129struct MathDelims {
2130    inner: FxHashMap<u8, VecDeque<(TreeIndex, bool, bool)>>,
2131}
2132
2133impl MathDelims {
2134    fn new() -> Self {
2135        Self {
2136            inner: Default::default(),
2137        }
2138    }
2139
2140    fn clear(&mut self) {
2141        self.inner.clear();
2142    }
2143}
2144
2145#[derive(Copy, Clone, PartialEq, Eq, Debug)]
2146pub(crate) struct LinkIndex(usize);
2147
2148#[derive(Copy, Clone, PartialEq, Eq, Debug)]
2149pub(crate) struct CowIndex(usize);
2150
2151#[derive(Copy, Clone, PartialEq, Eq, Debug)]
2152pub(crate) struct AlignmentIndex(usize);
2153
2154#[derive(Copy, Clone, PartialEq, Eq, Debug)]
2155pub(crate) struct HeadingIndex(NonZeroUsize);
2156
2157#[derive(Copy, Clone, PartialEq, Eq, Debug)]
2158pub(crate) struct JsxElementIndex(usize);
2159
2160#[derive(Copy, Clone, PartialEq, Eq, Debug)]
2161pub(crate) struct DirectiveIndex(usize);
2162
2163/// A parsed JSX attribute.
2164#[derive(Debug, Clone)]
2165pub(crate) enum JsxAttr<'a> {
2166    Boolean(CowStr<'a>),
2167    Literal(CowStr<'a>, CowStr<'a>),
2168    Expression(CowStr<'a>, CowStr<'a>),
2169    Spread(CowStr<'a>),
2170}
2171
2172impl<'a> JsxAttr<'a> {
2173    pub fn into_static(self) -> JsxAttr<'static> {
2174        match self {
2175            JsxAttr::Boolean(n) => JsxAttr::Boolean(n.into_static()),
2176            JsxAttr::Literal(n, v) => JsxAttr::Literal(n.into_static(), v.into_static()),
2177            JsxAttr::Expression(n, v) => JsxAttr::Expression(n.into_static(), v.into_static()),
2178            JsxAttr::Spread(v) => JsxAttr::Spread(v.into_static()),
2179        }
2180    }
2181}
2182
2183/// Pre-parsed JSX element data (name + attributes + tag classification).
2184#[derive(Debug, Clone)]
2185pub(crate) struct JsxElementData<'a> {
2186    pub name: CowStr<'a>,
2187    pub attrs: Vec<JsxAttr<'a>>,
2188    pub raw: CowStr<'a>,
2189    pub is_closing: bool,
2190    pub is_self_closing: bool,
2191}
2192
2193impl<'a> JsxElementData<'a> {
2194    pub fn into_static(self) -> JsxElementData<'static> {
2195        JsxElementData {
2196            name: self.name.into_static(),
2197            attrs: self.attrs.into_iter().map(|a| a.into_static()).collect(),
2198            raw: self.raw.into_static(),
2199            is_closing: self.is_closing,
2200            is_self_closing: self.is_self_closing,
2201        }
2202    }
2203}
2204
2205#[derive(Debug, Clone)]
2206pub(crate) struct DirectiveAttrData<'a> {
2207    pub name: CowStr<'a>,
2208    pub attributes: Vec<(CowStr<'a>, CowStr<'a>)>,
2209    pub label_start: usize,
2210    pub label_end: usize,
2211}
2212
2213#[derive(Clone)]
2214pub(crate) struct Allocations<'a> {
2215    pub refdefs: RefDefs<'a>,
2216    pub footdefs: FootnoteDefs<'a>,
2217    links: Vec<(LinkType, CowStr<'a>, CowStr<'a>, CowStr<'a>)>,
2218    cows: Vec<CowStr<'a>>,
2219    alignments: Vec<Vec<Alignment>>,
2220    headings: Vec<HeadingAttributes<'a>>,
2221    jsx_elements: Vec<JsxElementData<'a>>,
2222    directives: Vec<DirectiveAttrData<'a>>,
2223}
2224
2225/// Used by the heading attributes extension.
2226#[derive(Clone)]
2227pub(crate) struct HeadingAttributes<'a> {
2228    pub id: Option<CowStr<'a>>,
2229    pub classes: Vec<CowStr<'a>>,
2230    pub attrs: Vec<(CowStr<'a>, Option<CowStr<'a>>)>,
2231}
2232
2233/// Keeps track of the reference definitions defined in the document.
2234#[derive(Clone, Default, Debug)]
2235pub struct RefDefs<'input>(pub(crate) FxHashMap<LinkLabel<'input>, LinkDef<'input>>);
2236
2237/// Keeps track of the footnote definitions defined in the document.
2238#[derive(Clone, Default, Debug)]
2239pub struct FootnoteDefs<'input>(pub(crate) FxHashMap<FootnoteLabel<'input>, FootnoteDef>);
2240
2241impl<'input, 'b, 's> RefDefs<'input>
2242where
2243    's: 'b,
2244{
2245    /// Performs a lookup on reference label using unicode case folding.
2246    pub fn get(&'s self, key: &'b str) -> Option<&'b LinkDef<'input>> {
2247        self.0.get(&UniCase::new(key.into()))
2248    }
2249
2250    /// Provides an iterator over all the document's reference definitions.
2251    pub fn iter(&'s self) -> impl Iterator<Item = (&'s str, &'s LinkDef<'input>)> {
2252        self.0.iter().map(|(k, v)| (k.as_ref(), v))
2253    }
2254}
2255
2256impl<'input, 'b, 's> FootnoteDefs<'input>
2257where
2258    's: 'b,
2259{
2260    /// Performs a lookup on reference label using unicode case folding.
2261    pub fn contains(&'s self, key: &'b str) -> bool {
2262        self.0.contains_key(&UniCase::new(key.into()))
2263    }
2264    /// Performs a lookup on reference label using unicode case folding.
2265    pub fn get_mut(&'s mut self, key: CowStr<'input>) -> Option<&'s mut FootnoteDef> {
2266        self.0.get_mut(&UniCase::new(key))
2267    }
2268}
2269
2270impl<'a> Allocations<'a> {
2271    pub fn new() -> Self {
2272        Self {
2273            refdefs: RefDefs::default(),
2274            footdefs: FootnoteDefs::default(),
2275            links: Vec::with_capacity(128),
2276            cows: Vec::new(),
2277            alignments: Vec::new(),
2278            headings: Vec::new(),
2279            jsx_elements: Vec::new(),
2280            directives: Vec::new(),
2281        }
2282    }
2283
2284    pub fn allocate_cow(&mut self, cow: CowStr<'a>) -> CowIndex {
2285        let ix = self.cows.len();
2286        self.cows.push(cow);
2287        CowIndex(ix)
2288    }
2289
2290    pub fn allocate_link(
2291        &mut self,
2292        ty: LinkType,
2293        url: CowStr<'a>,
2294        title: CowStr<'a>,
2295        id: CowStr<'a>,
2296    ) -> LinkIndex {
2297        let ix = self.links.len();
2298        self.links.push((ty, url, title, id));
2299        LinkIndex(ix)
2300    }
2301
2302    pub fn allocate_alignment(&mut self, alignment: Vec<Alignment>) -> AlignmentIndex {
2303        let ix = self.alignments.len();
2304        self.alignments.push(alignment);
2305        AlignmentIndex(ix)
2306    }
2307
2308    pub fn allocate_heading(&mut self, attrs: HeadingAttributes<'a>) -> HeadingIndex {
2309        let ix = self.headings.len();
2310        self.headings.push(attrs);
2311        // This won't panic. `self.headings.len()` can't be `usize::MAX` since
2312        // such a long Vec cannot fit in memory.
2313        let ix_nonzero = NonZeroUsize::new(ix.wrapping_add(1)).expect("too many headings");
2314        HeadingIndex(ix_nonzero)
2315    }
2316
2317    pub fn take_cow(&mut self, ix: CowIndex) -> CowStr<'a> {
2318        core::mem::replace(&mut self.cows[ix.0], "".into())
2319    }
2320
2321    pub fn take_link(&mut self, ix: LinkIndex) -> (LinkType, CowStr<'a>, CowStr<'a>, CowStr<'a>) {
2322        let default_link = (LinkType::ShortcutUnknown, "".into(), "".into(), "".into());
2323        core::mem::replace(&mut self.links[ix.0], default_link)
2324    }
2325
2326    pub fn take_alignment(&mut self, ix: AlignmentIndex) -> Vec<Alignment> {
2327        core::mem::take(&mut self.alignments[ix.0])
2328    }
2329
2330    pub fn allocate_jsx_element(&mut self, data: JsxElementData<'a>) -> JsxElementIndex {
2331        let ix = self.jsx_elements.len();
2332        self.jsx_elements.push(data);
2333        JsxElementIndex(ix)
2334    }
2335
2336    pub fn allocate_directive(&mut self, data: DirectiveAttrData<'a>) -> DirectiveIndex {
2337        let ix = self.directives.len();
2338        self.directives.push(data);
2339        DirectiveIndex(ix)
2340    }
2341
2342    pub fn take_directive(&mut self, ix: DirectiveIndex) -> DirectiveAttrData<'a> {
2343        core::mem::replace(
2344            &mut self.directives[ix.0],
2345            DirectiveAttrData {
2346                name: "".into(),
2347                attributes: Vec::new(),
2348                label_start: 0,
2349                label_end: 0,
2350            },
2351        )
2352    }
2353
2354    pub fn directive_ref(&self, ix: DirectiveIndex) -> &DirectiveAttrData<'a> {
2355        &self.directives[ix.0]
2356    }
2357
2358    pub fn take_jsx_element(&mut self, ix: JsxElementIndex) -> JsxElementData<'a> {
2359        core::mem::replace(
2360            &mut self.jsx_elements[ix.0],
2361            JsxElementData {
2362                name: "".into(),
2363                attrs: Vec::new(),
2364                raw: "".into(),
2365                is_closing: false,
2366                is_self_closing: false,
2367            },
2368        )
2369    }
2370}
2371
2372impl<'a> Index<CowIndex> for Allocations<'a> {
2373    type Output = CowStr<'a>;
2374
2375    fn index(&self, ix: CowIndex) -> &Self::Output {
2376        self.cows.index(ix.0)
2377    }
2378}
2379
2380impl<'a> Index<LinkIndex> for Allocations<'a> {
2381    type Output = (LinkType, CowStr<'a>, CowStr<'a>, CowStr<'a>);
2382
2383    fn index(&self, ix: LinkIndex) -> &Self::Output {
2384        self.links.index(ix.0)
2385    }
2386}
2387
2388impl<'a> Index<AlignmentIndex> for Allocations<'a> {
2389    type Output = Vec<Alignment>;
2390
2391    fn index(&self, ix: AlignmentIndex) -> &Self::Output {
2392        self.alignments.index(ix.0)
2393    }
2394}
2395
2396impl<'a> Index<HeadingIndex> for Allocations<'a> {
2397    type Output = HeadingAttributes<'a>;
2398
2399    fn index(&self, ix: HeadingIndex) -> &Self::Output {
2400        self.headings.index(ix.0.get() - 1)
2401    }
2402}
2403
2404/// A struct containing information on the reachability of certain inline HTML
2405/// elements. In particular, for cdata elements (`<![CDATA[`), processing
2406/// elements (`<?`) and declarations (`<!DECLARATION`). The respectives usizes
2407/// represent the indices before which a scan will always fail and can hence
2408/// be skipped.
2409#[derive(Clone, Default)]
2410pub(crate) struct HtmlScanGuard {
2411    pub cdata: usize,
2412    pub processing: usize,
2413    pub declaration: usize,
2414    pub comment: usize,
2415}
2416
2417/// Trait to customize [`Parser`] behavior with callbacks. See [`Parser::new_with_callbacks`].
2418///
2419/// All methods have a default implementation, so you can choose which ones to override.
2420pub trait ParserCallbacks<'input> {
2421    /// Potentially provide a custom definition for a broken link.
2422    ///
2423    /// In case the parser encounters any potential links that have a broken
2424    /// reference (e.g `[foo]` when there is no `[foo]: ` entry at the bottom)
2425    /// this callback will be called with information about the reference,
2426    /// and the returned pair will be used as the link URL and title if it is not
2427    /// `None`.
2428    fn handle_broken_link(
2429        &mut self,
2430        #[allow(unused_variables)] link: BrokenLink<'input>,
2431    ) -> Option<(CowStr<'input>, CowStr<'input>)> {
2432        None
2433    }
2434}
2435
2436/// Wrapper to implement [`ParserCallbacks::handle_broken_link`] with a closure.
2437///
2438/// Used internally by [`Parser::new_with_broken_link_callback`].
2439#[allow(missing_debug_implementations)]
2440pub struct BrokenLinkCallback<F>(Option<F>);
2441
2442impl<'input, F> ParserCallbacks<'input> for BrokenLinkCallback<F>
2443where
2444    F: FnMut(BrokenLink<'input>) -> Option<(CowStr<'input>, CowStr<'input>)>,
2445{
2446    fn handle_broken_link(
2447        &mut self,
2448        link: BrokenLink<'input>,
2449    ) -> Option<(CowStr<'input>, CowStr<'input>)> {
2450        self.0.as_mut().and_then(|cb| cb(link))
2451    }
2452}
2453
2454impl<'input> ParserCallbacks<'input> for Box<dyn ParserCallbacks<'input>> {
2455    fn handle_broken_link(
2456        &mut self,
2457        link: BrokenLink<'input>,
2458    ) -> Option<(CowStr<'input>, CowStr<'input>)> {
2459        (**self).handle_broken_link(link)
2460    }
2461}
2462
2463/// [Parser] callbacks that do nothing.
2464///
2465/// Used when no custom callbacks are provided.
2466#[allow(missing_debug_implementations)]
2467pub struct DefaultParserCallbacks;
2468
2469impl<'input> ParserCallbacks<'input> for DefaultParserCallbacks {}
2470
2471/// Markdown event and source range iterator.
2472///
2473/// Generates tuples where the first element is the markdown event and the second
2474/// is a the corresponding range in the source string.
2475///
2476/// Constructed from a `Parser` using its
2477/// [`into_offset_iter`](struct.Parser.html#method.into_offset_iter) method.
2478#[derive(Debug)]
2479pub struct OffsetIter<'a, CB> {
2480    parser: Parser<'a, CB>,
2481}
2482
2483impl<'a, CB: ParserCallbacks<'a>> OffsetIter<'a, CB> {
2484    /// Returns a reference to the internal reference definition tracker.
2485    pub fn reference_definitions(&self) -> &RefDefs<'_> {
2486        self.parser.reference_definitions()
2487    }
2488
2489    /// Returns MDX validation errors collected during parsing.
2490    pub fn mdx_errors(&self) -> &[(usize, String)] {
2491        self.parser.mdx_errors()
2492    }
2493}
2494
2495impl<'a, CB: ParserCallbacks<'a>> Iterator for OffsetIter<'a, CB> {
2496    type Item = (Event<'a>, Range<usize>);
2497
2498    fn next(&mut self) -> Option<Self::Item> {
2499        self.parser
2500            .inner
2501            .next_event_range(&mut self.parser.callbacks)
2502    }
2503}
2504
2505impl<'a, CB: ParserCallbacks<'a>> Iterator for Parser<'a, CB> {
2506    type Item = Event<'a>;
2507
2508    fn next(&mut self) -> Option<Event<'a>> {
2509        self.inner
2510            .next_event_range(&mut self.callbacks)
2511            .map(|(event, _range)| event)
2512    }
2513}
2514
2515impl<'a, CB: ParserCallbacks<'a>> FusedIterator for Parser<'a, CB> {}
2516
2517impl<'input> ParserInner<'input> {
2518    fn next_event_range(
2519        &mut self,
2520        callbacks: &mut dyn ParserCallbacks<'input>,
2521    ) -> Option<(Event<'input>, Range<usize>)> {
2522        match self.tree.cur() {
2523            None => {
2524                let ix = self.tree.pop()?;
2525                let ix = if matches!(self.tree[ix].item.body, ItemBody::TightParagraph) {
2526                    // tight paragraphs emit nothing
2527                    self.tree.next_sibling(ix);
2528                    return self.next_event_range(callbacks);
2529                } else {
2530                    ix
2531                };
2532                let tag_end = body_to_tag_end(&self.tree[ix].item.body);
2533                self.tree.next_sibling(ix);
2534                let span = self.tree[ix].item.start..self.tree[ix].item.end;
2535                debug_assert!(span.start <= span.end);
2536                Some((Event::End(tag_end), span))
2537            }
2538            Some(cur_ix) => {
2539                let cur_ix = if matches!(self.tree[cur_ix].item.body, ItemBody::TightParagraph) {
2540                    // tight paragraphs emit nothing
2541                    self.tree.push();
2542                    self.tree.cur().unwrap()
2543                } else {
2544                    cur_ix
2545                };
2546                if self.tree[cur_ix].item.body.is_maybe_inline() {
2547                    self.handle_inline(callbacks);
2548                }
2549
2550                let node = self.tree[cur_ix];
2551                let item = node.item;
2552                let event = item_to_event(item, self.text, &mut self.allocs);
2553                if let Event::Start(..) = event {
2554                    self.tree.push();
2555                } else {
2556                    self.tree.next_sibling(cur_ix);
2557                }
2558                debug_assert!(item.start <= item.end);
2559                Some((event, item.start..item.end))
2560            }
2561        }
2562    }
2563}
2564
2565fn body_to_tag_end(body: &ItemBody) -> TagEnd {
2566    match *body {
2567        ItemBody::Paragraph => TagEnd::Paragraph,
2568        ItemBody::Emphasis => TagEnd::Emphasis,
2569        ItemBody::Superscript => TagEnd::Superscript,
2570        ItemBody::Subscript => TagEnd::Subscript,
2571        ItemBody::Strong => TagEnd::Strong,
2572        ItemBody::Strikethrough => TagEnd::Strikethrough,
2573        ItemBody::Link(..) => TagEnd::Link,
2574        ItemBody::Image(..) => TagEnd::Image,
2575        ItemBody::Heading(level, _) => TagEnd::Heading(level),
2576        ItemBody::IndentCodeBlock | ItemBody::FencedCodeBlock(..) | ItemBody::MathBlock(..) => {
2577            TagEnd::CodeBlock
2578        }
2579        ItemBody::ContainerDirective(..) => TagEnd::Directive(DirectiveKind::Container),
2580        ItemBody::LeafDirective(..) => TagEnd::Directive(DirectiveKind::Leaf),
2581        ItemBody::TextDirective(..) => TagEnd::Directive(DirectiveKind::Text),
2582        ItemBody::BlockQuote(kind) => TagEnd::BlockQuote(kind),
2583        ItemBody::HtmlBlock(_) => TagEnd::HtmlBlock,
2584        ItemBody::List(_, c, _) => {
2585            let is_ordered = c == b'.' || c == b')';
2586            TagEnd::List(is_ordered)
2587        }
2588        ItemBody::ListItem(_, _) => TagEnd::Item,
2589        ItemBody::TableHead => TagEnd::TableHead,
2590        ItemBody::TableCell => TagEnd::TableCell,
2591        ItemBody::TableRow => TagEnd::TableRow,
2592        ItemBody::Table(..) => TagEnd::Table,
2593        ItemBody::FootnoteDefinition(..) => TagEnd::FootnoteDefinition,
2594        ItemBody::MetadataBlock(kind) => TagEnd::MetadataBlock(kind),
2595        ItemBody::DefinitionList(_) => TagEnd::DefinitionList,
2596        ItemBody::DefinitionListTitle => TagEnd::DefinitionListTitle,
2597        ItemBody::DefinitionListDefinition(_) => TagEnd::DefinitionListDefinition,
2598        ItemBody::MdxJsxFlowElement(..) => TagEnd::MdxJsxFlowElement,
2599        ItemBody::MdxJsxTextElement(..) => TagEnd::MdxJsxTextElement,
2600        _ => panic!("unexpected item body {:?}", body),
2601    }
2602}
2603
2604fn item_to_event<'a>(item: Item, text: &'a str, allocs: &mut Allocations<'a>) -> Event<'a> {
2605    let tag = match item.body {
2606        ItemBody::Text { .. } => return Event::Text(text[item.start..item.end].into()),
2607        ItemBody::Code(cow_ix) => return Event::Code(allocs.take_cow(cow_ix)),
2608        ItemBody::SynthesizeText(cow_ix) => return Event::Text(allocs.take_cow(cow_ix)),
2609        ItemBody::SynthesizeChar(c) => return Event::Text(c.into()),
2610        ItemBody::HtmlBlock(_) => Tag::HtmlBlock,
2611        ItemBody::Html => return Event::Html(text[item.start..item.end].into()),
2612        ItemBody::InlineHtml => return Event::InlineHtml(text[item.start..item.end].into()),
2613        ItemBody::OwnedInlineHtml(cow_ix) => return Event::InlineHtml(allocs.take_cow(cow_ix)),
2614        ItemBody::SoftBreak => return Event::SoftBreak,
2615        ItemBody::HardBreak(_) => return Event::HardBreak,
2616        ItemBody::FootnoteReference(cow_ix) => {
2617            return Event::FootnoteReference(allocs.take_cow(cow_ix))
2618        }
2619        ItemBody::TaskListMarker(checked) => return Event::TaskListMarker(checked),
2620        ItemBody::Rule => return Event::Rule,
2621        ItemBody::Paragraph => Tag::Paragraph,
2622        ItemBody::Emphasis => Tag::Emphasis,
2623        ItemBody::Superscript => Tag::Superscript,
2624        ItemBody::Subscript => Tag::Subscript,
2625        ItemBody::Strong => Tag::Strong,
2626        ItemBody::Strikethrough => Tag::Strikethrough,
2627        ItemBody::Link(link_ix) => {
2628            let (link_type, dest_url, title, id) = allocs.take_link(link_ix);
2629            Tag::Link {
2630                link_type,
2631                dest_url,
2632                title,
2633                id,
2634            }
2635        }
2636        ItemBody::Image(link_ix) => {
2637            let (link_type, dest_url, title, id) = allocs.take_link(link_ix);
2638            Tag::Image {
2639                link_type,
2640                dest_url,
2641                title,
2642                id,
2643            }
2644        }
2645        ItemBody::Heading(level, Some(heading_ix)) => {
2646            let HeadingAttributes { id, classes, attrs } = allocs.index(heading_ix);
2647            Tag::Heading {
2648                level,
2649                id: id.clone(),
2650                classes: classes.clone(),
2651                attrs: attrs.clone(),
2652            }
2653        }
2654        ItemBody::Heading(level, None) => Tag::Heading {
2655            level,
2656            id: None,
2657            classes: Vec::new(),
2658            attrs: Vec::new(),
2659        },
2660        ItemBody::MathBlock(cow_ix) => {
2661            Tag::CodeBlock(CodeBlockKind::Fenced(allocs.take_cow(cow_ix)))
2662        }
2663        ItemBody::FencedCodeBlock(cow_ix) => {
2664            Tag::CodeBlock(CodeBlockKind::Fenced(allocs.take_cow(cow_ix)))
2665        }
2666        ItemBody::IndentCodeBlock => Tag::CodeBlock(CodeBlockKind::Indented),
2667        ItemBody::ContainerDirective(_, dir_ix)
2668        | ItemBody::LeafDirective(dir_ix)
2669        | ItemBody::TextDirective(dir_ix) => {
2670            let kind = match item.body {
2671                ItemBody::ContainerDirective(..) => DirectiveKind::Container,
2672                ItemBody::LeafDirective(..) => DirectiveKind::Leaf,
2673                _ => DirectiveKind::Text,
2674            };
2675            let dir = allocs.take_directive(dir_ix);
2676            Tag::Directive {
2677                kind,
2678                name: dir.name,
2679                attributes: dir.attributes,
2680            }
2681        }
2682        ItemBody::BlockQuote(kind) => Tag::BlockQuote(kind),
2683        ItemBody::List(is_tight, c, listitem_start) => {
2684            if c == b'.' || c == b')' {
2685                Tag::List(Some(listitem_start), is_tight)
2686            } else {
2687                Tag::List(None, is_tight)
2688            }
2689        }
2690        ItemBody::ListItem(_, _) => Tag::Item,
2691        ItemBody::TableHead => Tag::TableHead,
2692        ItemBody::TableCell => Tag::TableCell,
2693        ItemBody::TableRow => Tag::TableRow,
2694        ItemBody::Table(alignment_ix) => Tag::Table(allocs.take_alignment(alignment_ix)),
2695        ItemBody::FootnoteDefinition(cow_ix) => Tag::FootnoteDefinition(allocs.take_cow(cow_ix)),
2696        ItemBody::MetadataBlock(kind) => Tag::MetadataBlock(kind),
2697        ItemBody::Math(cow_ix, is_display) => {
2698            return if is_display {
2699                Event::DisplayMath(allocs.take_cow(cow_ix))
2700            } else {
2701                Event::InlineMath(allocs.take_cow(cow_ix))
2702            }
2703        }
2704        ItemBody::DefinitionList(_) => Tag::DefinitionList,
2705        ItemBody::DefinitionListTitle => Tag::DefinitionListTitle,
2706        ItemBody::DefinitionListDefinition(_) => Tag::DefinitionListDefinition,
2707        ItemBody::MdxJsxFlowElement(jsx_ix) => {
2708            let jsx = allocs.take_jsx_element(jsx_ix);
2709            Tag::MdxJsxFlowElement(jsx.raw)
2710        }
2711        ItemBody::MdxJsxTextElement(jsx_ix) => {
2712            let jsx = allocs.take_jsx_element(jsx_ix);
2713            Tag::MdxJsxTextElement(jsx.raw)
2714        }
2715        ItemBody::MdxFlowExpression(cow_ix) => {
2716            return Event::MdxFlowExpression(allocs.take_cow(cow_ix))
2717        }
2718        ItemBody::MdxTextExpression(cow_ix) => {
2719            return Event::MdxTextExpression(allocs.take_cow(cow_ix))
2720        }
2721        ItemBody::MdxEsm(cow_ix) => return Event::MdxEsm(allocs.take_cow(cow_ix)),
2722        _ => panic!("unexpected item body {:?}", item.body),
2723    };
2724
2725    Event::Start(tag)
2726}
2727
2728#[cfg(test)]
2729mod test {
2730    use alloc::{borrow::ToOwned, string::ToString, vec::Vec};
2731
2732    use super::*;
2733    use crate::tree::Node;
2734
2735    // TODO: move these tests to tests/html.rs?
2736
2737    fn parser_with_extensions(text: &str) -> Parser<'_> {
2738        let mut opts = Options::empty();
2739        opts.insert(Options::ENABLE_TABLES);
2740        opts.insert(Options::ENABLE_FOOTNOTES);
2741        opts.insert(Options::ENABLE_STRIKETHROUGH);
2742        opts.insert(Options::ENABLE_SUPERSCRIPT);
2743        opts.insert(Options::ENABLE_SUBSCRIPT);
2744        opts.insert(Options::ENABLE_TASKLISTS);
2745
2746        Parser::new_ext(text, opts)
2747    }
2748
2749    #[test]
2750    #[cfg(target_pointer_width = "64")]
2751    fn node_size() {
2752        let node_size = core::mem::size_of::<Node<Item>>();
2753        assert_eq!(48, node_size);
2754    }
2755
2756    #[test]
2757    #[cfg(target_pointer_width = "64")]
2758    fn body_size() {
2759        let body_size = core::mem::size_of::<ItemBody>();
2760        assert_eq!(16, body_size);
2761    }
2762
2763    #[test]
2764    fn single_open_fish_bracket() {
2765        // dont crash
2766        assert_eq!(3, Parser::new("<").count());
2767    }
2768
2769    #[test]
2770    fn lone_hashtag() {
2771        // dont crash
2772        assert_eq!(2, Parser::new("#").count());
2773    }
2774
2775    #[test]
2776    fn lots_of_backslashes() {
2777        // dont crash
2778        Parser::new("\\\\\r\r").count();
2779        Parser::new("\\\r\r\\.\\\\\r\r\\.\\").count();
2780    }
2781
2782    #[test]
2783    fn issue_1030() {
2784        let mut opts = Options::empty();
2785        opts.insert(Options::ENABLE_WIKILINKS);
2786
2787        let parser = Parser::new_ext("For a new ferrari, [[Wikientry|click here]]!", opts);
2788
2789        let offsets = parser
2790            .into_offset_iter()
2791            .map(|(_ev, range)| range)
2792            .collect::<Vec<_>>();
2793        let expected_offsets = vec![
2794            (0..44),  // Paragraph START
2795            (0..19),  // `For a new ferrari, `
2796            (19..43), // Wikilink START
2797            (31..41), // `click here`
2798            (19..43), // Wikilink END
2799            (43..44), // `!`
2800            (0..44),  // Paragraph END
2801        ];
2802        assert_eq!(offsets, expected_offsets);
2803    }
2804
2805    #[test]
2806    fn issue_320() {
2807        // dont crash
2808        parser_with_extensions(":\r\t> |\r:\r\t> |\r").count();
2809    }
2810
2811    #[test]
2812    fn issue_319() {
2813        // dont crash
2814        parser_with_extensions("|\r-]([^|\r-]([^").count();
2815        parser_with_extensions("|\r\r=][^|\r\r=][^car").count();
2816    }
2817
2818    #[test]
2819    fn issue_303() {
2820        // dont crash
2821        parser_with_extensions("[^\r\ra]").count();
2822        parser_with_extensions("\r\r]Z[^\x00\r\r]Z[^\x00").count();
2823    }
2824
2825    #[test]
2826    fn issue_313() {
2827        // dont crash
2828        parser_with_extensions("*]0[^\r\r*]0[^").count();
2829        parser_with_extensions("[^\r> `][^\r> `][^\r> `][").count();
2830    }
2831
2832    #[test]
2833    fn issue_311() {
2834        // dont crash
2835        parser_with_extensions("\\\u{0d}-\u{09}\\\u{0d}-\u{09}").count();
2836    }
2837
2838    #[test]
2839    fn issue_283() {
2840        let input = core::str::from_utf8(b"\xf0\x9b\xb2\x9f<td:^\xf0\x9b\xb2\x9f").unwrap();
2841        // dont crash
2842        parser_with_extensions(input).count();
2843    }
2844
2845    #[test]
2846    fn issue_289() {
2847        // dont crash
2848        parser_with_extensions("> - \\\n> - ").count();
2849        parser_with_extensions("- \n\n").count();
2850    }
2851
2852    #[test]
2853    fn issue_306() {
2854        // dont crash
2855        parser_with_extensions("*\r_<__*\r_<__*\r_<__*\r_<__").count();
2856    }
2857
2858    #[test]
2859    fn issue_305() {
2860        // dont crash
2861        parser_with_extensions("_6**6*_*").count();
2862    }
2863
2864    #[test]
2865    fn another_emphasis_panic() {
2866        parser_with_extensions("*__#_#__*").count();
2867    }
2868
2869    #[test]
2870    fn offset_iter() {
2871        let event_offsets: Vec<_> = Parser::new("*hello* world")
2872            .into_offset_iter()
2873            .map(|(_ev, range)| range)
2874            .collect();
2875        let expected_offsets = vec![(0..13), (0..7), (1..6), (0..7), (7..13), (0..13)];
2876        assert_eq!(expected_offsets, event_offsets);
2877    }
2878
2879    #[test]
2880    fn reference_link_offsets() {
2881        let range =
2882            Parser::new("# H1\n[testing][Some reference]\n\n[Some reference]: https://github.com")
2883                .into_offset_iter()
2884                .filter_map(|(ev, range)| match ev {
2885                    Event::Start(
2886                        Tag::Link {
2887                            link_type: LinkType::Reference,
2888                            ..
2889                        },
2890                        ..,
2891                    ) => Some(range),
2892                    _ => None,
2893                })
2894                .next()
2895                .unwrap();
2896        assert_eq!(5..30, range);
2897    }
2898
2899    #[test]
2900    fn footnote_offsets() {
2901        let range = parser_with_extensions("Testing this[^1] out.\n\n[^1]: Footnote.")
2902            .into_offset_iter()
2903            .filter_map(|(ev, range)| match ev {
2904                Event::FootnoteReference(..) => Some(range),
2905                _ => None,
2906            })
2907            .next()
2908            .unwrap();
2909        assert_eq!(12..16, range);
2910    }
2911
2912    #[test]
2913    fn footnote_offsets_exclamation() {
2914        let mut immediately_before_footnote = None;
2915        let range = parser_with_extensions("Testing this![^1] out.\n\n[^1]: Footnote.")
2916            .into_offset_iter()
2917            .filter_map(|(ev, range)| match ev {
2918                Event::FootnoteReference(..) => Some(range),
2919                _ => {
2920                    immediately_before_footnote = Some((ev, range));
2921                    None
2922                }
2923            })
2924            .next()
2925            .unwrap();
2926        assert_eq!(13..17, range);
2927        if let (Event::Text(exclamation), range_exclamation) =
2928            immediately_before_footnote.as_ref().unwrap()
2929        {
2930            assert_eq!("!", &exclamation[..]);
2931            assert_eq!(&(12..13), range_exclamation);
2932        } else {
2933            panic!("what came first, then? {immediately_before_footnote:?}");
2934        }
2935    }
2936
2937    #[test]
2938    fn table_offset() {
2939        let markdown = "a\n\nTesting|This|Outtt\n--|:--:|--:\nSome Data|Other data|asdf";
2940        let event_offset = parser_with_extensions(markdown)
2941            .into_offset_iter()
2942            .map(|(_ev, range)| range)
2943            .nth(3)
2944            .unwrap();
2945        let expected_offset = 3..59;
2946        assert_eq!(expected_offset, event_offset);
2947    }
2948
2949    #[test]
2950    fn table_cell_span() {
2951        let markdown = "a|b|c\n--|--|--\na|  |c";
2952        let event_offset = parser_with_extensions(markdown)
2953            .into_offset_iter()
2954            .filter_map(|(ev, span)| match ev {
2955                Event::Start(Tag::TableCell) => Some(span),
2956                _ => None,
2957            })
2958            .nth(4)
2959            .unwrap();
2960        // Cell span includes the leading `|` delimiter (matching remark).
2961        let expected_offset_start = "a|b|c\n--|--|--\na".len();
2962        assert_eq!(
2963            expected_offset_start..(expected_offset_start + 3),
2964            event_offset
2965        );
2966    }
2967
2968    #[test]
2969    fn offset_iter_issue_378() {
2970        let event_offsets: Vec<_> = Parser::new("a [b](c) d")
2971            .into_offset_iter()
2972            .map(|(_ev, range)| range)
2973            .collect();
2974        let expected_offsets = vec![(0..10), (0..2), (2..8), (3..4), (2..8), (8..10), (0..10)];
2975        assert_eq!(expected_offsets, event_offsets);
2976    }
2977
2978    #[test]
2979    fn offset_iter_issue_404() {
2980        let event_offsets: Vec<_> = Parser::new("###\n")
2981            .into_offset_iter()
2982            .map(|(_ev, range)| range)
2983            .collect();
2984        let expected_offsets = vec![(0..4), (0..4)];
2985        assert_eq!(expected_offsets, event_offsets);
2986    }
2987
2988    #[test]
2989    fn broken_links_called_only_once() {
2990        for &(markdown, expected) in &[
2991            ("See also [`g()`][crate::g].", 1),
2992            ("See also [`g()`][crate::g][].", 1),
2993            ("[brokenlink1] some other node [brokenlink2]", 2),
2994        ] {
2995            let mut times_called = 0;
2996            let callback = &mut |_broken_link: BrokenLink| {
2997                times_called += 1;
2998                None
2999            };
3000            let parser =
3001                Parser::new_with_broken_link_callback(markdown, Options::empty(), Some(callback));
3002            for _ in parser {}
3003            assert_eq!(times_called, expected);
3004        }
3005    }
3006
3007    #[test]
3008    fn simple_broken_link_callback() {
3009        let test_str = "This is a link w/o def: [hello][world]";
3010        let mut callback = |broken_link: BrokenLink| {
3011            assert_eq!("world", broken_link.reference.as_ref());
3012            assert_eq!(&test_str[broken_link.span], "[hello][world]");
3013            let url = "YOLO".into();
3014            let title = "SWAG".to_owned().into();
3015            Some((url, title))
3016        };
3017        let parser =
3018            Parser::new_with_broken_link_callback(test_str, Options::empty(), Some(&mut callback));
3019        let mut link_tag_count = 0;
3020        for (typ, url, title, id) in parser.filter_map(|event| match event {
3021            Event::Start(Tag::Link {
3022                link_type,
3023                dest_url,
3024                title,
3025                id,
3026            }) => Some((link_type, dest_url, title, id)),
3027            _ => None,
3028        }) {
3029            link_tag_count += 1;
3030            assert_eq!(typ, LinkType::ReferenceUnknown);
3031            assert_eq!(url.as_ref(), "YOLO");
3032            assert_eq!(title.as_ref(), "SWAG");
3033            assert_eq!(id.as_ref(), "world");
3034        }
3035        assert!(link_tag_count > 0);
3036    }
3037
3038    #[test]
3039    fn code_block_kind_check_fenced() {
3040        let parser = Parser::new("hello\n```test\ntadam\n```");
3041        let mut found = 0;
3042        for (ev, _range) in parser.into_offset_iter() {
3043            if let Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(syntax))) = ev {
3044                assert_eq!(syntax.as_ref(), "test");
3045                found += 1;
3046            }
3047        }
3048        assert_eq!(found, 1);
3049    }
3050
3051    #[test]
3052    fn code_block_kind_check_indented() {
3053        let parser = Parser::new("hello\n\n    ```test\n    tadam\nhello");
3054        let mut found = 0;
3055        for (ev, _range) in parser.into_offset_iter() {
3056            if let Event::Start(Tag::CodeBlock(CodeBlockKind::Indented)) = ev {
3057                found += 1;
3058            }
3059        }
3060        assert_eq!(found, 1);
3061    }
3062
3063    #[test]
3064    fn ref_defs() {
3065        let input = r###"[a B c]: http://example.com
3066[another]: https://google.com
3067
3068text
3069
3070[final ONE]: http://wikipedia.org
3071"###;
3072        let mut parser = Parser::new(input);
3073
3074        assert!(parser.reference_definitions().get("a b c").is_some());
3075        assert!(parser.reference_definitions().get("nope").is_none());
3076
3077        if let Some(_event) = parser.next() {
3078            // testing keys with shorter lifetimes than parser and its input
3079            let s = "final one".to_owned();
3080            let link_def = parser.reference_definitions().get(&s).unwrap();
3081            let span = &input[link_def.span.clone()];
3082            assert_eq!(span, "[final ONE]: http://wikipedia.org");
3083        }
3084    }
3085
3086    #[test]
3087    #[allow(clippy::extra_unused_lifetimes)]
3088    fn common_lifetime_patterns_allowed<'b>() {
3089        let temporary_str = String::from("xyz");
3090
3091        // NOTE: this is a limitation of Rust, it doesn't allow putting lifetime parameters on the closure itself.
3092        // Hack it by attaching the lifetime to the test function instead.
3093        // TODO: why is the `'b` lifetime required at all? Changing it to `'_` breaks things :(
3094        let mut closure = |link: BrokenLink<'b>| Some(("#".into(), link.reference));
3095
3096        fn function(link: BrokenLink<'_>) -> Option<(CowStr<'_>, CowStr<'_>)> {
3097            Some(("#".into(), link.reference))
3098        }
3099
3100        for _ in Parser::new_with_broken_link_callback(
3101            "static lifetime",
3102            Options::empty(),
3103            Some(&mut closure),
3104        ) {}
3105        /* This fails to compile. Because the closure can't say `for <'a> fn(BrokenLink<'a>) ->
3106         * CowStr<'a>` and has to use the enclosing `'b` lifetime parameter, `temporary_str` lives
3107         * shorter than `'b`. I think this is unlikely to occur in real life, and if it does, the
3108         * fix is simple: move it out to a function that allows annotating the lifetimes.
3109         */
3110        //for _ in Parser::new_with_broken_link_callback(&temporary_str, Options::empty(), Some(&mut callback)) {
3111        //}
3112
3113        for _ in Parser::new_with_broken_link_callback(
3114            "static lifetime",
3115            Options::empty(),
3116            Some(&mut function),
3117        ) {}
3118        for _ in Parser::new_with_broken_link_callback(
3119            &temporary_str,
3120            Options::empty(),
3121            Some(&mut function),
3122        ) {}
3123    }
3124
3125    #[test]
3126    fn inline_html_inside_blockquote() {
3127        // Regression for #960
3128        let input = "> <foo\n> bar>";
3129        let events: Vec<_> = Parser::new(input).collect();
3130        let expected = [
3131            Event::Start(Tag::BlockQuote(None)),
3132            Event::Start(Tag::Paragraph),
3133            Event::InlineHtml(CowStr::Boxed("<foo\nbar>".to_string().into())),
3134            Event::End(TagEnd::Paragraph),
3135            Event::End(TagEnd::BlockQuote(None)),
3136        ];
3137        assert_eq!(&events, &expected);
3138    }
3139
3140    #[test]
3141    fn wikilink_has_pothole() {
3142        let input = "[[foo]] [[bar|baz]]";
3143        let events: Vec<_> = Parser::new_ext(input, Options::ENABLE_WIKILINKS).collect();
3144        let expected = [
3145            Event::Start(Tag::Paragraph),
3146            Event::Start(Tag::Link {
3147                link_type: LinkType::WikiLink { has_pothole: false },
3148                dest_url: CowStr::Borrowed("foo"),
3149                title: CowStr::Borrowed(""),
3150                id: CowStr::Borrowed(""),
3151            }),
3152            Event::Text(CowStr::Borrowed("foo")),
3153            Event::End(TagEnd::Link),
3154            Event::Text(CowStr::Borrowed(" ")),
3155            Event::Start(Tag::Link {
3156                link_type: LinkType::WikiLink { has_pothole: true },
3157                dest_url: CowStr::Borrowed("bar"),
3158                title: CowStr::Borrowed(""),
3159                id: CowStr::Borrowed(""),
3160            }),
3161            Event::Text(CowStr::Borrowed("baz")),
3162            Event::End(TagEnd::Link),
3163            Event::End(TagEnd::Paragraph),
3164        ];
3165        assert_eq!(&events, &expected);
3166    }
3167
3168    fn mdx_parser(text: &str) -> Parser<'_> {
3169        Parser::new_ext(text, Options::ENABLE_MDX)
3170    }
3171
3172    #[test]
3173    fn mdx_esm_import() {
3174        let events: Vec<_> = mdx_parser("import {Chart} from './chart.js'\n").collect();
3175        assert_eq!(events.len(), 1);
3176        assert!(matches!(&events[0], Event::MdxEsm(s) if s.contains("import")));
3177    }
3178
3179    #[test]
3180    fn mdx_esm_export() {
3181        let events: Vec<_> = mdx_parser("export const meta = {}\n").collect();
3182        assert_eq!(events.len(), 1);
3183        assert!(matches!(&events[0], Event::MdxEsm(s) if s.contains("export")));
3184    }
3185
3186    #[test]
3187    fn mdx_flow_expression() {
3188        let events: Vec<_> = mdx_parser("{1 + 1}\n").collect();
3189        assert_eq!(events.len(), 1);
3190        assert!(matches!(&events[0], Event::MdxFlowExpression(s) if s.as_ref() == "1 + 1"));
3191    }
3192
3193    #[test]
3194    fn mdx_jsx_flow_self_closing() {
3195        let events: Vec<_> = mdx_parser("<Chart values={[1,2,3]} />\n").collect();
3196        assert!(!events.is_empty());
3197        assert!(
3198            matches!(&events[0], Event::Start(Tag::MdxJsxFlowElement(s)) if s.contains("Chart"))
3199        );
3200    }
3201
3202    #[test]
3203    fn mdx_jsx_flow_fragment() {
3204        let events: Vec<_> = mdx_parser("<>\n").collect();
3205        assert!(!events.is_empty());
3206        assert!(matches!(
3207            &events[0],
3208            Event::Start(Tag::MdxJsxFlowElement(_))
3209        ));
3210    }
3211
3212    #[test]
3213    fn mdx_inline_expression() {
3214        let events: Vec<_> = mdx_parser("hello {name} world\n").collect();
3215        let has_expr = events
3216            .iter()
3217            .any(|e| matches!(e, Event::MdxTextExpression(s) if s.as_ref() == "name"));
3218        assert!(
3219            has_expr,
3220            "Expected inline MDX expression, got: {:?}",
3221            events
3222        );
3223    }
3224
3225    #[test]
3226    fn mdx_inline_jsx() {
3227        let events: Vec<_> = mdx_parser("hello <Badge /> world\n").collect();
3228        let has_jsx = events
3229            .iter()
3230            .any(|e| matches!(e, Event::Start(Tag::MdxJsxTextElement(s)) if s.contains("Badge")));
3231        assert!(has_jsx, "Expected inline MDX JSX, got: {:?}", events);
3232    }
3233
3234    #[test]
3235    fn mdx_all_tags_are_jsx() {
3236        // In MDX mode, all tags (including lowercase) are JSX, not HTML.
3237        let events: Vec<_> = mdx_parser("hello <em>world</em>\n").collect();
3238        let has_jsx = events
3239            .iter()
3240            .any(|e| matches!(e, Event::Start(Tag::MdxJsxTextElement(_))));
3241        assert!(has_jsx, "In MDX mode, <em> should be JSX: {:?}", events);
3242    }
3243
3244    #[test]
3245    fn mdx_does_not_interfere_without_flag() {
3246        // Without ENABLE_MDX, none of this should be parsed as MDX.
3247        let events: Vec<_> = Parser::new("import foo from 'bar'\n").collect();
3248        // Should be a regular paragraph.
3249        assert!(events
3250            .iter()
3251            .any(|e| matches!(e, Event::Start(Tag::Paragraph))));
3252    }
3253
3254    #[test]
3255    fn mdx_expression_in_heading() {
3256        let events: Vec<_> = mdx_parser("# {title}\n").collect();
3257        let has_heading = events
3258            .iter()
3259            .any(|e| matches!(e, Event::Start(Tag::Heading { .. })));
3260        assert!(has_heading, "Should have a heading");
3261        let has_expr = events
3262            .iter()
3263            .any(|e| matches!(e, Event::MdxTextExpression(s) if s.as_ref() == "title"));
3264        assert!(
3265            has_expr,
3266            "Heading should contain MdxTextExpression, got: {:?}",
3267            events
3268        );
3269    }
3270
3271    #[test]
3272    fn mdx_expression_mixed_text_in_heading() {
3273        let events: Vec<_> = mdx_parser("## Hello {name}\n").collect();
3274        let has_text = events
3275            .iter()
3276            .any(|e| matches!(e, Event::Text(s) if s.contains("Hello")));
3277        let has_expr = events
3278            .iter()
3279            .any(|e| matches!(e, Event::MdxTextExpression(s) if s.as_ref() == "name"));
3280        assert!(has_text, "Should have text, got: {:?}", events);
3281        assert!(has_expr, "Should have expression, got: {:?}", events);
3282    }
3283}