Skip to main content

satteri_pulldown_cmark/
parse.rs

1// Copyright 2017 Google Inc. All rights reserved.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19// THE SOFTWARE.
20
21//! Tree-based two pass parser.
22
23use alloc::{borrow::ToOwned, boxed::Box, collections::VecDeque, string::String, vec::Vec};
24use core::{
25    cmp::{max, min},
26    iter::FusedIterator,
27    num::NonZeroUsize,
28    ops::{Index, Range},
29};
30use rustc_hash::FxHashMap;
31use unicase::UniCase;
32
33use crate::{
34    firstpass::run_first_pass,
35    linklabel::{scan_link_label_rest, FootnoteLabel, LinkLabel, ReferenceLabel},
36    mdx::*,
37    scanners::*,
38    strings::CowStr,
39    tree::{Tree, TreeIndex},
40    Alignment, BlockQuoteKind, CodeBlockKind, ContainerKind, Event, HeadingLevel, LinkType,
41    MetadataBlockKind, Options, Tag, TagEnd,
42};
43
44// Allowing arbitrary depth nested parentheses inside link destinations
45// can create denial of service vulnerabilities if we're not careful.
46// The simplest countermeasure is to limit their depth, which is
47// explicitly allowed by the spec as long as the limit is at least 3:
48// https://spec.commonmark.org/0.29/#link-destination
49pub(crate) const LINK_MAX_NESTED_PARENS: usize = 32;
50
51#[derive(Debug, Default, Clone, Copy)]
52pub(crate) struct Item {
53    pub start: usize,
54    pub end: usize,
55    pub body: ItemBody,
56}
57
58#[derive(Debug, PartialEq, Clone, Copy, Default)]
59pub(crate) enum ItemBody {
60    // These are possible inline items, need to be resolved in second pass.
61
62    // repeats, can_open, can_close
63    MaybeEmphasis(usize, bool, bool),
64    // can_open, can_close, brace context
65    MaybeMath(bool, bool, u8),
66    // quote byte, can_open, can_close
67    MaybeSmartQuote(u8, bool, bool),
68    MaybeCode(usize, bool), // number of backticks, preceded by backslash
69    MaybeHtml,
70    MaybeLinkOpen,
71    // bool indicates whether or not the preceding section could be a reference
72    MaybeLinkClose(bool),
73    MaybeImage,
74
75    // These are inline items after resolution.
76    Emphasis,
77    Strong,
78    Strikethrough,
79    Superscript,
80    Subscript,
81    Math(CowIndex, bool), // true for display math
82    Code(CowIndex),
83    Link(LinkIndex),
84    Image(LinkIndex),
85    FootnoteReference(CowIndex),
86    TaskListMarker(bool), // true for checked
87
88    // These are also inline items.
89    InlineHtml,
90    OwnedInlineHtml(CowIndex),
91    SynthesizeText(CowIndex),
92    SynthesizeChar(char),
93    Html,
94    Text {
95        backslash_escaped: bool,
96    },
97    SoftBreak,
98    // true = is backlash
99    HardBreak(bool),
100
101    // Dummy node at the top of the tree - should not be used otherwise!
102    #[default]
103    Root,
104
105    // These are block items.
106    Paragraph,
107    TightParagraph,
108    Rule,
109    Heading(HeadingLevel, Option<HeadingIndex>), // heading level
110    FencedCodeBlock(CowIndex),
111    IndentCodeBlock,
112    HtmlBlock,
113    BlockQuote(Option<BlockQuoteKind>),
114    Container(u8, ContainerKind, CowIndex), // (fence length, specific renderer, descriptor used in renderer)
115    List(bool, u8, u64),                    // is_tight, list character, list start index
116    ListItem(usize),                        // indent level
117    FootnoteDefinition(CowIndex),
118    MetadataBlock(MetadataBlockKind),
119
120    // Definition lists
121    DefinitionList(bool), // is_tight
122    // gets turned into either a paragraph or a definition list title,
123    // depending on whether there's a definition after it
124    MaybeDefinitionListTitle,
125    DefinitionListTitle,
126    DefinitionListDefinition(usize),
127
128    // Tables
129    Table(AlignmentIndex),
130    TableHead,
131    TableRow,
132    TableCell,
133
134    // MDX
135    MdxJsxFlowElement(JsxElementIndex),
136    MdxJsxTextElement(JsxElementIndex),
137    MdxFlowExpression(CowIndex),
138    MdxTextExpression(CowIndex),
139    MdxEsm(CowIndex),
140}
141
142impl ItemBody {
143    pub(crate) fn is_maybe_inline(&self) -> bool {
144        use ItemBody::*;
145        matches!(
146            *self,
147            MaybeEmphasis(..)
148                | MaybeMath(..)
149                | MaybeSmartQuote(..)
150                | MaybeCode(..)
151                | MaybeHtml
152                | MaybeLinkOpen
153                | MaybeLinkClose(..)
154                | MaybeImage
155        )
156    }
157    fn is_inline(&self) -> bool {
158        use ItemBody::*;
159        matches!(
160            *self,
161            MaybeEmphasis(..)
162                | MaybeMath(..)
163                | MaybeSmartQuote(..)
164                | MaybeCode(..)
165                | MaybeHtml
166                | MaybeLinkOpen
167                | MaybeLinkClose(..)
168                | MaybeImage
169                | Emphasis
170                | Strong
171                | Strikethrough
172                | Math(..)
173                | Code(..)
174                | Link(..)
175                | Image(..)
176                | FootnoteReference(..)
177                | TaskListMarker(..)
178                | InlineHtml
179                | OwnedInlineHtml(..)
180                | SynthesizeText(..)
181                | SynthesizeChar(..)
182                | Html
183                | Text { .. }
184                | SoftBreak
185                | HardBreak(..)
186        )
187    }
188}
189
190#[derive(Debug)]
191pub struct BrokenLink<'a> {
192    pub span: core::ops::Range<usize>,
193    pub link_type: LinkType,
194    pub reference: CowStr<'a>,
195}
196
197/// Markdown event iterator.
198pub struct Parser<'input, CB = DefaultParserCallbacks> {
199    callbacks: CB,
200    inner: ParserInner<'input>,
201}
202
203// Inner state for `Parser`, extracted so that it can remain generic over the callback without
204// re-compiling complex logic for each instantiation of the generic type.
205pub(crate) struct ParserInner<'input> {
206    pub(crate) text: &'input str,
207    pub(crate) options: Options,
208    pub(crate) tree: Tree<Item>,
209    pub(crate) allocs: Allocations<'input>,
210    html_scan_guard: HtmlScanGuard,
211
212    // https://github.com/pulldown-cmark/pulldown-cmark/issues/844
213    // Consider this example:
214    //
215    //     [x]: xxx...
216    //     [x]
217    //     [x]
218    //     [x]
219    //
220    // Which expands to this HTML:
221    //
222    //     <a href="xxx...">x</a>
223    //     <a href="xxx...">x</a>
224    //     <a href="xxx...">x</a>
225    //
226    // This is quadratic growth, because it's filling in the area of a square.
227    // To prevent this, track how much it's expanded and limit it.
228    link_ref_expansion_limit: usize,
229
230    /// MDX validation errors collected during inline parsing.
231    pub(crate) mdx_errors: Vec<(usize, String)>,
232
233    // used by inline passes. store them here for reuse
234    inline_stack: InlineStack,
235    link_stack: LinkStack,
236    wikilink_stack: LinkStack,
237    code_delims: CodeDelims,
238    math_delims: MathDelims,
239}
240
241impl<'input, CB> core::fmt::Debug for Parser<'input, CB> {
242    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
243        // Only print the fields that have public types.
244        f.debug_struct("Parser")
245            .field("text", &self.inner.text)
246            .field("options", &self.inner.options)
247            .field("callbacks", &..)
248            .finish()
249    }
250}
251
252impl<'a> BrokenLink<'a> {
253    /// Moves the link into version with a static lifetime.
254    ///
255    /// The `reference` member is cloned to a Boxed or Inline version.
256    pub fn into_static(self) -> BrokenLink<'static> {
257        BrokenLink {
258            span: self.span.clone(),
259            link_type: self.link_type,
260            reference: self.reference.into_string().into(),
261        }
262    }
263}
264
265impl<'input> Parser<'input, DefaultParserCallbacks> {
266    /// Creates a new event iterator for a markdown string without any options enabled.
267    pub fn new(text: &'input str) -> Self {
268        Self::new_ext(text, Options::empty())
269    }
270
271    /// Creates a new event iterator for a markdown string with given options.
272    pub fn new_ext(text: &'input str, options: Options) -> Self {
273        Self::new_with_callbacks(text, options, DefaultParserCallbacks)
274    }
275}
276
277impl<'input, CB: ParserCallbacks<'input>> Parser<'input, CB> {
278    /// Creates a new event iterator for markdown text with given options and callbacks.
279    ///
280    /// ```
281    /// # use satteri_pulldown_cmark::{BrokenLink, CowStr, Event, Options, Parser, ParserCallbacks, Tag};
282    /// struct CustomCallbacks;
283    /// impl<'input> ParserCallbacks<'input> for CustomCallbacks {
284    ///     fn handle_broken_link(
285    ///         &mut self,
286    ///         link: BrokenLink<'input>,
287    ///     ) -> Option<(CowStr<'input>, CowStr<'input>)> {
288    ///         Some(("https://target".into(), link.reference))
289    ///     }
290    /// }
291    ///
292    /// let mut parser =
293    ///     Parser::new_with_callbacks("[broken]", Options::empty(), CustomCallbacks);
294    ///
295    /// assert!(matches!(
296    ///     parser.nth(1),
297    ///     Some(Event::Start(Tag::Link { .. }))
298    /// ));
299    /// ```
300    ///
301    /// See the [`ParserCallbacks`] trait for a list of callbacks that can be overridden.
302    pub fn new_with_callbacks(text: &'input str, options: Options, callbacks: CB) -> Self {
303        let (mut tree, allocs, _firstpass_mdx_errors) = run_first_pass(text, options);
304        tree.reset();
305        let inline_stack = Default::default();
306        let link_stack = Default::default();
307        let wikilink_stack = Default::default();
308        let html_scan_guard = Default::default();
309        Parser {
310            callbacks,
311
312            inner: ParserInner {
313                text,
314                options,
315                tree,
316                allocs,
317                inline_stack,
318                link_stack,
319                wikilink_stack,
320                html_scan_guard,
321                // always allow 100KiB
322                link_ref_expansion_limit: text.len().max(100_000),
323                mdx_errors: Vec::new(),
324                code_delims: CodeDelims::new(),
325                math_delims: MathDelims::new(),
326            },
327        }
328    }
329
330    /// Returns a reference to the internal `RefDefs` object, which provides access
331    /// to the internal map of reference definitions.
332    pub fn reference_definitions(&self) -> &RefDefs<'_> {
333        &self.inner.allocs.refdefs
334    }
335
336    /// Returns MDX validation errors collected during parsing.
337    /// Only populated when [`Options::ENABLE_MDX`] is active.
338    pub fn mdx_errors(&self) -> &[(usize, String)] {
339        &self.inner.mdx_errors
340    }
341
342    /// Consumes the event iterator and produces an iterator that produces
343    /// `(Event, Range)` pairs, where the `Range` value maps to the corresponding
344    /// range in the markdown source.
345    pub fn into_offset_iter(self) -> OffsetIter<'input, CB> {
346        OffsetIter { parser: self }
347    }
348}
349
350impl<'input, F> Parser<'input, BrokenLinkCallback<F>> {
351    /// In case the parser encounters any potential links that have a broken
352    /// reference (e.g `[foo]` when there is no `[foo]: ` entry at the bottom)
353    /// the provided callback will be called with the reference name,
354    /// and the returned pair will be used as the link URL and title if it is not
355    /// `None`.
356    ///
357    /// This constructor is provided for backwards compatibility.
358    /// This and other callbacks can also be customized with [`Parser::new_with_callbacks`].
359    pub fn new_with_broken_link_callback(
360        text: &'input str,
361        options: Options,
362        broken_link_callback: Option<F>,
363    ) -> Self
364    where
365        F: FnMut(BrokenLink<'input>) -> Option<(CowStr<'input>, CowStr<'input>)>,
366    {
367        Self::new_with_callbacks(text, options, BrokenLinkCallback(broken_link_callback))
368    }
369}
370
371impl<'input> ParserInner<'input> {
372    pub(crate) fn new(text: &'input str, options: Options) -> Self {
373        let (mut tree, allocs, firstpass_mdx_errors) = run_first_pass(text, options);
374        tree.reset();
375        ParserInner {
376            text,
377            options,
378            tree,
379            allocs,
380            inline_stack: Default::default(),
381            link_stack: Default::default(),
382            wikilink_stack: Default::default(),
383            html_scan_guard: Default::default(),
384            link_ref_expansion_limit: text.len().max(100_000),
385            mdx_errors: firstpass_mdx_errors,
386            code_delims: CodeDelims::new(),
387            math_delims: MathDelims::new(),
388        }
389    }
390
391    /// Use a link label to fetch a type, url, and title.
392    ///
393    /// This function enforces the [`link_ref_expansion_limit`].
394    /// If it returns Some, it also consumes some of the fuel.
395    /// If we're out of fuel, it immediately returns None.
396    ///
397    /// The URL and title are found in the [`RefDefs`] map.
398    /// If they're not there, and a callback was provided by the user,
399    /// `handle_broken_link` will be invoked and given the opportunity
400    /// to provide a fallback.
401    ///
402    /// The link type (that's "link" or "image") depends on the usage site, and
403    /// is provided by the caller of this function.
404    /// This function returns a new one because, if it has to invoke a callback
405    /// to find the information, the link type is [mapped to an unknown type].
406    ///
407    /// [mapped to an unknown type]: crate::LinkType::to_unknown
408    /// [`link_ref_expansion_limit`]: Self::link_ref_expansion_limit
409    fn fetch_link_type_url_title(
410        &mut self,
411        link_label: CowStr<'input>,
412        span: Range<usize>,
413        link_type: LinkType,
414        callbacks: &mut dyn ParserCallbacks<'input>,
415    ) -> Option<(LinkType, CowStr<'input>, CowStr<'input>)> {
416        if self.link_ref_expansion_limit == 0 {
417            return None;
418        }
419
420        let (link_type, url, title) = self
421            .allocs
422            .refdefs
423            .get(link_label.as_ref())
424            .map(|matching_def| {
425                // found a matching definition!
426                let title = matching_def
427                    .title
428                    .as_ref()
429                    .cloned()
430                    .unwrap_or_else(|| "".into());
431                let url = matching_def.dest.clone();
432                (link_type, url, title)
433            })
434            .or_else(|| {
435                // Construct a BrokenLink struct, which will be passed to the callback
436                let broken_link = BrokenLink {
437                    span,
438                    link_type,
439                    reference: link_label,
440                };
441
442                callbacks
443                    .handle_broken_link(broken_link)
444                    .map(|(url, title)| (link_type.to_unknown(), url, title))
445            })?;
446
447        // Limit expansion from link references.
448        // This isn't a problem for footnotes, because multiple references to the same one
449        // reuse the same node, but links/images get their HREF/SRC copied.
450        self.link_ref_expansion_limit = self
451            .link_ref_expansion_limit
452            .saturating_sub(url.len() + title.len());
453
454        Some((link_type, url, title))
455    }
456
457    /// Handle inline markup.
458    ///
459    /// When the parser encounters any item indicating potential inline markup, all
460    /// inline markup passes are run on the remainder of the chain.
461    ///
462    /// Note: there's some potential for optimization here, but that's future work.
463    pub(crate) fn handle_inline(&mut self, callbacks: &mut dyn ParserCallbacks<'input>) {
464        self.handle_inline_pass1(callbacks);
465        self.handle_emphasis_and_hard_break();
466    }
467
468    /// Handle inline HTML, code spans, and links.
469    ///
470    /// This function handles both inline HTML and code spans, because they have
471    /// the same precedence. It also handles links, even though they have lower
472    /// precedence, because the URL of links must not be processed.
473    fn handle_inline_pass1(&mut self, callbacks: &mut dyn ParserCallbacks<'input>) {
474        let mut cur = self.tree.cur();
475        let mut prev = None;
476
477        let block_end = self.tree[self.tree.peek_up().unwrap()].item.end;
478        let block_text = &self.text[..block_end];
479
480        while let Some(mut cur_ix) = cur {
481            match self.tree[cur_ix].item.body {
482                ItemBody::MaybeHtml => {
483                    // MDX inline JSX: check before HTML
484                    if self.options.contains(Options::ENABLE_MDX) {
485                        let start = self.tree[cur_ix].item.start;
486                        let next_byte = block_text.as_bytes().get(start + 1).copied();
487
488                        // In MDX, `<!` is not valid (no HTML comments).
489                        if next_byte == Some(b'!') {
490                            self.mdx_errors.push((
491                                start,
492                                "Unexpected character `!` (U+0021) before name, expected a \
493                                 character that can start a name, such as a letter, `$`, or `_` \
494                                 (note: to create a comment in MDX, use `{/* text */}`)"
495                                    .to_string(),
496                            ));
497                            self.tree[cur_ix].item.body = ItemBody::Text {
498                                backslash_escaped: false,
499                            };
500                            prev = cur;
501                            cur = self.tree[cur_ix].next;
502                            continue;
503                        }
504
505                        if let Some(total_len) =
506                            scan_mdx_inline_jsx(&block_text.as_bytes()[start..])
507                        {
508                            let end = start + total_len;
509                            let node = scan_nodes_to_ix(&self.tree, self.tree[cur_ix].next, end);
510                            let raw = &block_text[start..end];
511                            let jsx_data = crate::mdx::parse_jsx_tag(raw);
512                            let jsx_ix = self.allocs.allocate_jsx_element(jsx_data);
513                            self.tree[cur_ix].item.body = ItemBody::MdxJsxTextElement(jsx_ix);
514                            self.tree[cur_ix].item.end = end;
515                            self.tree[cur_ix].next = node;
516                            prev = cur;
517                            cur = node;
518                            if let Some(node_ix) = cur {
519                                self.tree[node_ix].item.start =
520                                    max(self.tree[node_ix].item.start, end);
521                            }
522                            continue;
523                        }
524
525                        // In MDX, `<` followed by a letter, `/`, or `>` must be
526                        // valid JSX.  If the JSX scan failed, record an error.
527                        if matches!(next_byte, Some(b'a'..=b'z' | b'A'..=b'Z' | b'/' | b'>')) {
528                            self.mdx_errors.push((
529                                start,
530                                "Unexpected character after `<`, expected a valid JSX tag \
531                                 (note: to create a link in MDX, use `[text](url)`)"
532                                    .to_string(),
533                            ));
534                        }
535
536                        self.tree[cur_ix].item.body = ItemBody::Text {
537                            backslash_escaped: false,
538                        };
539                        prev = cur;
540                        cur = self.tree[cur_ix].next;
541                        continue;
542                    }
543
544                    let next = self.tree[cur_ix].next;
545                    let autolink = if let Some(next_ix) = next {
546                        scan_autolink(block_text, self.tree[next_ix].item.start)
547                    } else {
548                        None
549                    };
550
551                    if let Some((ix, uri, link_type)) = autolink {
552                        let node = scan_nodes_to_ix(&self.tree, next, ix);
553                        let text_node = self.tree.create_node(Item {
554                            start: self.tree[cur_ix].item.start + 1,
555                            end: ix - 1,
556                            body: ItemBody::Text {
557                                backslash_escaped: false,
558                            },
559                        });
560                        let link_ix =
561                            self.allocs
562                                .allocate_link(link_type, uri, "".into(), "".into());
563                        self.tree[cur_ix].item.body = ItemBody::Link(link_ix);
564                        self.tree[cur_ix].item.end = ix;
565                        self.tree[cur_ix].next = node;
566                        self.tree[cur_ix].child = Some(text_node);
567                        prev = cur;
568                        cur = node;
569                        if let Some(node_ix) = cur {
570                            self.tree[node_ix].item.start = max(self.tree[node_ix].item.start, ix);
571                        }
572                        continue;
573                    } else {
574                        let inline_html = next.and_then(|next_ix| {
575                            self.scan_inline_html(
576                                block_text.as_bytes(),
577                                self.tree[next_ix].item.start,
578                            )
579                        });
580                        if let Some((span, ix)) = inline_html {
581                            let node = scan_nodes_to_ix(&self.tree, next, ix);
582                            self.tree[cur_ix].item.body = if !span.is_empty() {
583                                let converted_string =
584                                    String::from_utf8(span).expect("invalid utf8");
585                                ItemBody::OwnedInlineHtml(
586                                    self.allocs.allocate_cow(converted_string.into()),
587                                )
588                            } else {
589                                ItemBody::InlineHtml
590                            };
591                            self.tree[cur_ix].item.end = ix;
592                            self.tree[cur_ix].next = node;
593                            prev = cur;
594                            cur = node;
595                            if let Some(node_ix) = cur {
596                                self.tree[node_ix].item.start =
597                                    max(self.tree[node_ix].item.start, ix);
598                            }
599                            continue;
600                        }
601                    }
602                    self.tree[cur_ix].item.body = ItemBody::Text {
603                        backslash_escaped: false,
604                    };
605                }
606                ItemBody::MaybeMath(can_open, _can_close, brace_context) => {
607                    if !can_open {
608                        self.tree[cur_ix].item.body = ItemBody::Text {
609                            backslash_escaped: false,
610                        };
611                        prev = cur;
612                        cur = self.tree[cur_ix].next;
613                        continue;
614                    }
615                    let is_display = self.tree[cur_ix].next.is_some_and(|next_ix| {
616                        matches!(
617                            self.tree[next_ix].item.body,
618                            ItemBody::MaybeMath(_can_open, _can_close, _brace_context)
619                        )
620                    });
621                    let result = if self.math_delims.is_populated() {
622                        // we have previously scanned all math environment delimiters,
623                        // so we can reuse that work
624                        self.math_delims
625                            .find(&self.tree, cur_ix, is_display, brace_context)
626                    } else {
627                        // we haven't previously scanned all math delimiters,
628                        // so walk the AST
629                        let mut scan = self.tree[cur_ix].next;
630                        if is_display {
631                            // a display delimiter, `$$`, is actually two delimiters
632                            // skip the second one
633                            scan = self.tree[scan.unwrap()].next;
634                        }
635                        let mut invalid = false;
636                        while let Some(scan_ix) = scan {
637                            if let ItemBody::MaybeMath(_can_open, can_close, delim_brace_context) =
638                                self.tree[scan_ix].item.body
639                            {
640                                let delim_is_display =
641                                    self.tree[scan_ix].next.is_some_and(|next_ix| {
642                                        matches!(
643                                            self.tree[next_ix].item.body,
644                                            ItemBody::MaybeMath(
645                                                _can_open,
646                                                _can_close,
647                                                _brace_context
648                                            )
649                                        )
650                                    });
651                                if !invalid && delim_brace_context == brace_context {
652                                    if (!is_display && can_close)
653                                        || (is_display && delim_is_display)
654                                    {
655                                        // This will skip ahead past everything we
656                                        // just inserted. Needed for correctness to
657                                        // ensure that a new scan is done after this item.
658                                        self.math_delims.clear();
659                                        break;
660                                    } else {
661                                        // Math cannot contain $, so the current item
662                                        // is invalid. Keep scanning to fill math_delims.
663                                        invalid = true;
664                                    }
665                                }
666                                self.math_delims.insert(
667                                    delim_is_display,
668                                    delim_brace_context,
669                                    scan_ix,
670                                    can_close,
671                                );
672                            }
673                            scan = self.tree[scan_ix].next;
674                        }
675                        scan
676                    };
677
678                    if let Some(scan_ix) = result {
679                        self.make_math_span(cur_ix, scan_ix);
680                    } else {
681                        self.tree[cur_ix].item.body = ItemBody::Text {
682                            backslash_escaped: false,
683                        };
684                    }
685                }
686                ItemBody::MaybeCode(mut search_count, preceded_by_backslash) => {
687                    if preceded_by_backslash {
688                        search_count -= 1;
689                        if search_count == 0 {
690                            self.tree[cur_ix].item.body = ItemBody::Text {
691                                backslash_escaped: false,
692                            };
693                            prev = cur;
694                            cur = self.tree[cur_ix].next;
695                            continue;
696                        }
697                    }
698
699                    if self.code_delims.is_populated() {
700                        // we have previously scanned all codeblock delimiters,
701                        // so we can reuse that work
702                        if let Some(scan_ix) = self.code_delims.find(cur_ix, search_count) {
703                            self.make_code_span(cur_ix, scan_ix, preceded_by_backslash);
704                        } else {
705                            self.tree[cur_ix].item.body = ItemBody::Text {
706                                backslash_escaped: false,
707                            };
708                        }
709                    } else {
710                        // we haven't previously scanned all codeblock delimiters,
711                        // so walk the AST
712                        let mut scan = if search_count > 0 {
713                            self.tree[cur_ix].next
714                        } else {
715                            None
716                        };
717                        while let Some(scan_ix) = scan {
718                            if let ItemBody::MaybeCode(delim_count, _) =
719                                self.tree[scan_ix].item.body
720                            {
721                                if search_count == delim_count {
722                                    self.make_code_span(cur_ix, scan_ix, preceded_by_backslash);
723                                    self.code_delims.clear();
724                                    break;
725                                } else {
726                                    self.code_delims.insert(delim_count, scan_ix);
727                                }
728                            }
729                            scan = self.tree[scan_ix].next;
730                        }
731                        if scan.is_none() {
732                            self.tree[cur_ix].item.body = ItemBody::Text {
733                                backslash_escaped: false,
734                            };
735                        }
736                    }
737                }
738                ItemBody::MaybeLinkOpen => {
739                    self.tree[cur_ix].item.body = ItemBody::Text {
740                        backslash_escaped: false,
741                    };
742                    let link_open_doubled = self.tree[cur_ix]
743                        .next
744                        .map(|ix| self.tree[ix].item.body == ItemBody::MaybeLinkOpen)
745                        .unwrap_or(false);
746                    if self.options.contains(Options::ENABLE_WIKILINKS) && link_open_doubled {
747                        self.wikilink_stack.push(LinkStackEl {
748                            node: cur_ix,
749                            ty: LinkStackTy::Link,
750                        });
751                    }
752                    self.link_stack.push(LinkStackEl {
753                        node: cur_ix,
754                        ty: LinkStackTy::Link,
755                    });
756                }
757                ItemBody::MaybeImage => {
758                    self.tree[cur_ix].item.body = ItemBody::Text {
759                        backslash_escaped: false,
760                    };
761                    let link_open_doubled = self.tree[cur_ix]
762                        .next
763                        .map(|ix| self.tree[ix].item.body == ItemBody::MaybeLinkOpen)
764                        .unwrap_or(false);
765                    if self.options.contains(Options::ENABLE_WIKILINKS) && link_open_doubled {
766                        self.wikilink_stack.push(LinkStackEl {
767                            node: cur_ix,
768                            ty: LinkStackTy::Image,
769                        });
770                    }
771                    self.link_stack.push(LinkStackEl {
772                        node: cur_ix,
773                        ty: LinkStackTy::Image,
774                    });
775                }
776                ItemBody::MaybeLinkClose(could_be_ref) => {
777                    self.tree[cur_ix].item.body = ItemBody::Text {
778                        backslash_escaped: false,
779                    };
780                    let tos_link = self.link_stack.pop();
781                    if self.options.contains(Options::ENABLE_WIKILINKS)
782                        && self.tree[cur_ix]
783                            .next
784                            .map(|ix| {
785                                matches!(self.tree[ix].item.body, ItemBody::MaybeLinkClose(..))
786                            })
787                            .unwrap_or(false)
788                    {
789                        if let Some(node) = self.handle_wikilink(block_text, cur_ix, prev) {
790                            cur = self.tree[node].next;
791                            continue;
792                        }
793                    }
794                    if let Some(tos) = tos_link {
795                        // skip rendering if already in a link, unless its an
796                        // image
797                        if tos.ty != LinkStackTy::Image
798                            && matches!(
799                                self.tree[self.tree.peek_up().unwrap()].item.body,
800                                ItemBody::Link(..)
801                            )
802                        {
803                            continue;
804                        }
805                        if tos.ty == LinkStackTy::Disabled {
806                            continue;
807                        }
808                        let next = self.tree[cur_ix].next;
809                        if let Some((next_ix, url, title)) =
810                            self.scan_inline_link(block_text, self.tree[cur_ix].item.end, next)
811                        {
812                            let next_node = scan_nodes_to_ix(&self.tree, next, next_ix);
813                            if let Some(prev_ix) = prev {
814                                self.tree[prev_ix].next = None;
815                            }
816                            cur = Some(tos.node);
817                            cur_ix = tos.node;
818                            let link_ix =
819                                self.allocs
820                                    .allocate_link(LinkType::Inline, url, title, "".into());
821                            self.tree[cur_ix].item.body = if tos.ty == LinkStackTy::Image {
822                                ItemBody::Image(link_ix)
823                            } else {
824                                ItemBody::Link(link_ix)
825                            };
826                            self.tree[cur_ix].child = self.tree[cur_ix].next;
827                            self.tree[cur_ix].next = next_node;
828                            self.tree[cur_ix].item.end = next_ix;
829                            if let Some(next_node_ix) = next_node {
830                                self.tree[next_node_ix].item.start =
831                                    max(self.tree[next_node_ix].item.start, next_ix);
832                            }
833
834                            if tos.ty == LinkStackTy::Link {
835                                self.disable_all_links();
836                            }
837                        } else {
838                            // ok, so its not an inline link. maybe it is a reference
839                            // to a defined link?
840                            let scan_result =
841                                scan_reference(&self.tree, block_text, next, self.options);
842                            let (node_after_link, link_type) = match scan_result {
843                                // [label][reference]
844                                RefScan::LinkLabel(_, end_ix) => {
845                                    // Toggle reference viability of the last closing bracket,
846                                    // so that we can skip it on future iterations in case
847                                    // it fails in this one. In particular, we won't call
848                                    // the broken link callback twice on one reference.
849                                    let reference_close_node = if let Some(node) =
850                                        scan_nodes_to_ix(&self.tree, next, end_ix - 1)
851                                    {
852                                        node
853                                    } else {
854                                        continue;
855                                    };
856                                    self.tree[reference_close_node].item.body =
857                                        ItemBody::MaybeLinkClose(false);
858                                    let next_node = self.tree[reference_close_node].next;
859
860                                    (next_node, LinkType::Reference)
861                                }
862                                // [reference][]
863                                RefScan::Collapsed(next_node) => {
864                                    // This reference has already been tried, and it's not
865                                    // valid. Skip it.
866                                    if !could_be_ref {
867                                        continue;
868                                    }
869                                    (next_node, LinkType::Collapsed)
870                                }
871                                // [shortcut]
872                                //
873                                // [shortcut]: /blah
874                                RefScan::Failed | RefScan::UnexpectedFootnote => {
875                                    if !could_be_ref {
876                                        continue;
877                                    }
878                                    (next, LinkType::Shortcut)
879                                }
880                            };
881
882                            // FIXME: references and labels are mixed in the naming of variables
883                            // below. Disambiguate!
884
885                            // (label, source_ix end)
886                            let label: Option<(ReferenceLabel<'input>, usize)> = match scan_result {
887                                RefScan::LinkLabel(l, end_ix) => {
888                                    Some((ReferenceLabel::Link(l), end_ix))
889                                }
890                                RefScan::Collapsed(..)
891                                | RefScan::Failed
892                                | RefScan::UnexpectedFootnote => {
893                                    // No label? maybe it is a shortcut reference
894                                    let label_start = self.tree[tos.node].item.end - 1;
895                                    let label_end = self.tree[cur_ix].item.end;
896                                    scan_link_label(
897                                        &self.tree,
898                                        &self.text[label_start..label_end],
899                                        self.options,
900                                    )
901                                    .map(|(ix, label)| (label, label_start + ix))
902                                    .filter(|(_, end)| *end == label_end)
903                                }
904                            };
905
906                            let id = match &label {
907                                Some(
908                                    (ReferenceLabel::Link(l), _) | (ReferenceLabel::Footnote(l), _),
909                                ) => l.clone(),
910                                None => "".into(),
911                            };
912
913                            // see if it's a footnote reference
914                            if let Some((ReferenceLabel::Footnote(l), end)) = label {
915                                let footref = self.allocs.allocate_cow(l);
916                                if let Some(def) = self
917                                    .allocs
918                                    .footdefs
919                                    .get_mut(self.allocs.cows[footref.0].to_owned())
920                                {
921                                    def.use_count += 1;
922                                }
923                                if !self.options.has_gfm_footnotes()
924                                    || self.allocs.footdefs.contains(&self.allocs.cows[footref.0])
925                                {
926                                    // If this came from a MaybeImage, then the `!` prefix
927                                    // isn't part of the footnote reference.
928                                    let footnote_ix = if tos.ty == LinkStackTy::Image {
929                                        self.tree[tos.node].next = Some(cur_ix);
930                                        self.tree[tos.node].child = None;
931                                        self.tree[tos.node].item.body =
932                                            ItemBody::SynthesizeChar('!');
933                                        self.tree[cur_ix].item.start =
934                                            self.tree[tos.node].item.start + 1;
935                                        self.tree[tos.node].item.end =
936                                            self.tree[tos.node].item.start + 1;
937                                        cur_ix
938                                    } else {
939                                        tos.node
940                                    };
941                                    // use `next` instead of `node_after_link` because
942                                    // node_after_link is calculated for a [collapsed][] link,
943                                    // which footnotes don't support.
944                                    self.tree[footnote_ix].next = next;
945                                    self.tree[footnote_ix].child = None;
946                                    self.tree[footnote_ix].item.body =
947                                        ItemBody::FootnoteReference(footref);
948                                    self.tree[footnote_ix].item.end = end;
949                                    prev = Some(footnote_ix);
950                                    cur = next;
951                                    self.link_stack.clear();
952                                    continue;
953                                }
954                            } else if let Some((ReferenceLabel::Link(link_label), end)) = label {
955                                if let Some((def_link_type, url, title)) = self
956                                    .fetch_link_type_url_title(
957                                        link_label,
958                                        (self.tree[tos.node].item.start)..end,
959                                        link_type,
960                                        callbacks,
961                                    )
962                                {
963                                    let link_ix =
964                                        self.allocs.allocate_link(def_link_type, url, title, id);
965                                    self.tree[tos.node].item.body = if tos.ty == LinkStackTy::Image
966                                    {
967                                        ItemBody::Image(link_ix)
968                                    } else {
969                                        ItemBody::Link(link_ix)
970                                    };
971                                    let label_node = self.tree[tos.node].next;
972
973                                    // lets do some tree surgery to add the link to the tree
974                                    // 1st: skip the label node and close node
975                                    self.tree[tos.node].next = node_after_link;
976
977                                    // then, if it exists, add the label node as a child to the link node
978                                    if label_node != cur {
979                                        self.tree[tos.node].child = label_node;
980
981                                        // finally: disconnect list of children
982                                        if let Some(prev_ix) = prev {
983                                            self.tree[prev_ix].next = None;
984                                        }
985                                    }
986
987                                    self.tree[tos.node].item.end = end;
988
989                                    // set up cur so next node will be node_after_link
990                                    cur = Some(tos.node);
991                                    cur_ix = tos.node;
992
993                                    if tos.ty == LinkStackTy::Link {
994                                        self.disable_all_links();
995                                    }
996                                }
997                            }
998                        }
999                    }
1000                }
1001                _ => {}
1002            }
1003            prev = cur;
1004            cur = self.tree[cur_ix].next;
1005        }
1006        self.link_stack.clear();
1007        self.wikilink_stack.clear();
1008        self.code_delims.clear();
1009        self.math_delims.clear();
1010    }
1011
1012    /// Handles a wikilink.
1013    ///
1014    /// This function may bail early in case the link is malformed, so this
1015    /// acts as a control flow guard. Returns the link node if a wikilink was
1016    /// found and created.
1017    fn handle_wikilink(
1018        &mut self,
1019        block_text: &'input str,
1020        cur_ix: TreeIndex,
1021        prev: Option<TreeIndex>,
1022    ) -> Option<TreeIndex> {
1023        let next_ix = self.tree[cur_ix].next.unwrap();
1024        // this is a wikilink closing delim, try popping from
1025        // the wikilink stack
1026        if let Some(tos) = self.wikilink_stack.pop() {
1027            if tos.ty == LinkStackTy::Disabled {
1028                return None;
1029            }
1030            // fetches the beginning of the wikilink body
1031            let Some(body_node) = self.tree[tos.node].next.and_then(|ix| self.tree[ix].next) else {
1032                // skip if no next node exists, like at end of input
1033                return None;
1034            };
1035            let start_ix = self.tree[body_node].item.start;
1036            let end_ix = self.tree[cur_ix].item.start;
1037            let wikilink = match scan_wikilink_pipe(
1038                block_text,
1039                start_ix, // bounded by closing tag
1040                end_ix - start_ix,
1041            ) {
1042                Some((rest, wikitext)) => {
1043                    // bail early if the wikiname would be empty
1044                    if wikitext.is_empty() {
1045                        return None;
1046                    }
1047                    // [[WikiName|rest]]
1048                    let body_node = scan_nodes_to_ix(&self.tree, Some(body_node), rest);
1049                    if let Some(body_node) = body_node {
1050                        // break node so passes can actually format
1051                        // the display text
1052                        self.tree[body_node].item.start = rest;
1053                        Some((true, body_node, wikitext))
1054                    } else {
1055                        None
1056                    }
1057                }
1058                None => {
1059                    let wikitext = &block_text[start_ix..end_ix];
1060                    // bail early if the wikiname would be empty
1061                    if wikitext.is_empty() {
1062                        return None;
1063                    }
1064                    let body_node = self.tree.create_node(Item {
1065                        start: start_ix,
1066                        end: end_ix,
1067                        body: ItemBody::Text {
1068                            backslash_escaped: false,
1069                        },
1070                    });
1071                    Some((false, body_node, wikitext))
1072                }
1073            };
1074
1075            if let Some((has_pothole, body_node, wikiname)) = wikilink {
1076                let link_ix = self.allocs.allocate_link(
1077                    LinkType::WikiLink { has_pothole },
1078                    wikiname.into(),
1079                    "".into(),
1080                    "".into(),
1081                );
1082                if let Some(prev_ix) = prev {
1083                    self.tree[prev_ix].next = None;
1084                }
1085                if tos.ty == LinkStackTy::Image {
1086                    self.tree[tos.node].item.body = ItemBody::Image(link_ix);
1087                } else {
1088                    self.tree[tos.node].item.body = ItemBody::Link(link_ix);
1089                }
1090                self.tree[tos.node].child = Some(body_node);
1091                self.tree[tos.node].next = self.tree[next_ix].next;
1092                self.tree[tos.node].item.end = end_ix + 2;
1093                self.disable_all_links();
1094                return Some(tos.node);
1095            }
1096        }
1097
1098        None
1099    }
1100
1101    fn handle_emphasis_and_hard_break(&mut self) {
1102        let mut prev = None;
1103        let mut prev_ix: TreeIndex;
1104        let mut cur = self.tree.cur();
1105
1106        let mut single_quote_open: Option<TreeIndex> = None;
1107        let mut double_quote_open: bool = false;
1108
1109        while let Some(mut cur_ix) = cur {
1110            match self.tree[cur_ix].item.body {
1111                ItemBody::MaybeEmphasis(mut count, can_open, can_close) => {
1112                    let run_length = count;
1113                    let c = self.text.as_bytes()[self.tree[cur_ix].item.start];
1114                    let both = can_open && can_close;
1115                    if can_close {
1116                        while let Some(el) =
1117                            self.inline_stack
1118                                .find_match(&mut self.tree, c, run_length, both)
1119                        {
1120                            // have a match!
1121                            if let Some(prev_ix) = prev {
1122                                self.tree[prev_ix].next = None;
1123                            }
1124                            let match_count = min(count, el.count);
1125                            // start, end are tree node indices
1126                            let mut end = cur_ix - 1;
1127                            let mut start = el.start + el.count;
1128
1129                            // work from the inside out
1130                            while start > el.start + el.count - match_count {
1131                                let inc = if start > el.start + el.count - match_count + 1 {
1132                                    2
1133                                } else {
1134                                    1
1135                                };
1136                                let ty = if c == b'~' {
1137                                    if inc == 2 {
1138                                        if self.options.contains(Options::ENABLE_STRIKETHROUGH) {
1139                                            ItemBody::Strikethrough
1140                                        } else {
1141                                            ItemBody::Text {
1142                                                backslash_escaped: false,
1143                                            }
1144                                        }
1145                                    } else if self.options.contains(Options::ENABLE_SUBSCRIPT) {
1146                                        ItemBody::Subscript
1147                                    } else if self.options.contains(Options::ENABLE_STRIKETHROUGH) {
1148                                        ItemBody::Strikethrough
1149                                    } else {
1150                                        ItemBody::Text {
1151                                            backslash_escaped: false,
1152                                        }
1153                                    }
1154                                } else if c == b'^' {
1155                                    if self.options.contains(Options::ENABLE_SUPERSCRIPT) {
1156                                        ItemBody::Superscript
1157                                    } else {
1158                                        ItemBody::Text {
1159                                            backslash_escaped: false,
1160                                        }
1161                                    }
1162                                } else if inc == 2 {
1163                                    ItemBody::Strong
1164                                } else {
1165                                    ItemBody::Emphasis
1166                                };
1167
1168                                let root = start - inc;
1169                                end = end + inc;
1170                                self.tree[root].item.body = ty;
1171                                self.tree[root].item.end = self.tree[end].item.end;
1172                                self.tree[root].child = Some(start);
1173                                self.tree[root].next = None;
1174                                start = root;
1175                            }
1176
1177                            // set next for top most emph level
1178                            prev_ix = el.start + el.count - match_count;
1179                            prev = Some(prev_ix);
1180                            cur = self.tree[cur_ix + match_count - 1].next;
1181                            self.tree[prev_ix].next = cur;
1182
1183                            if el.count > match_count {
1184                                self.inline_stack.push(InlineEl {
1185                                    start: el.start,
1186                                    count: el.count - match_count,
1187                                    run_length: el.run_length,
1188                                    c: el.c,
1189                                    both: el.both,
1190                                })
1191                            }
1192                            count -= match_count;
1193                            if count > 0 {
1194                                cur_ix = cur.unwrap();
1195                            } else {
1196                                break;
1197                            }
1198                        }
1199                    }
1200                    if count > 0 {
1201                        if can_open {
1202                            self.inline_stack.push(InlineEl {
1203                                start: cur_ix,
1204                                run_length,
1205                                count,
1206                                c,
1207                                both,
1208                            });
1209                        } else {
1210                            for i in 0..count {
1211                                self.tree[cur_ix + i].item.body = ItemBody::Text {
1212                                    backslash_escaped: false,
1213                                };
1214                            }
1215                        }
1216                        prev_ix = cur_ix + count - 1;
1217                        prev = Some(prev_ix);
1218                        cur = self.tree[prev_ix].next;
1219                    }
1220                }
1221                ItemBody::MaybeSmartQuote(c, can_open, can_close) => {
1222                    self.tree[cur_ix].item.body = match c {
1223                        b'\'' => {
1224                            if let (Some(open_ix), true) = (single_quote_open, can_close) {
1225                                self.tree[open_ix].item.body = ItemBody::SynthesizeChar('‘');
1226                                single_quote_open = None;
1227                            } else if can_open {
1228                                single_quote_open = Some(cur_ix);
1229                            }
1230                            ItemBody::SynthesizeChar('’')
1231                        }
1232                        _ /* double quote */ => {
1233                            if can_close && double_quote_open {
1234                                double_quote_open = false;
1235                                ItemBody::SynthesizeChar('”')
1236                            } else {
1237                                if can_open && !double_quote_open {
1238                                    double_quote_open = true;
1239                                }
1240                                ItemBody::SynthesizeChar('“')
1241                            }
1242                        }
1243                    };
1244                    prev = cur;
1245                    cur = self.tree[cur_ix].next;
1246                }
1247                ItemBody::HardBreak(true) => {
1248                    if self.tree[cur_ix].next.is_none() {
1249                        self.tree[cur_ix].item.body = ItemBody::SynthesizeChar('\\');
1250                    }
1251                    prev = cur;
1252                    cur = self.tree[cur_ix].next;
1253                }
1254                _ => {
1255                    prev = cur;
1256                    cur = self.tree[cur_ix].next;
1257                }
1258            }
1259        }
1260        self.inline_stack.pop_all(&mut self.tree);
1261    }
1262
1263    fn disable_all_links(&mut self) {
1264        self.link_stack.disable_all_links();
1265        self.wikilink_stack.disable_all_links();
1266    }
1267
1268    /// Returns next byte index, url and title.
1269    fn scan_inline_link(
1270        &self,
1271        underlying: &'input str,
1272        mut ix: usize,
1273        node: Option<TreeIndex>,
1274    ) -> Option<(usize, CowStr<'input>, CowStr<'input>)> {
1275        if underlying.as_bytes().get(ix) != Some(&b'(') {
1276            return None;
1277        }
1278        ix += 1;
1279
1280        let scan_separator = |ix: &mut usize| {
1281            *ix += scan_while(&underlying.as_bytes()[*ix..], is_ascii_whitespace_no_nl);
1282            if let Some(bl) = scan_eol(&underlying.as_bytes()[*ix..]) {
1283                *ix += bl;
1284                *ix += skip_container_prefixes(
1285                    &self.tree,
1286                    &underlying.as_bytes()[*ix..],
1287                    self.options,
1288                );
1289            }
1290            *ix += scan_while(&underlying.as_bytes()[*ix..], is_ascii_whitespace_no_nl);
1291        };
1292
1293        scan_separator(&mut ix);
1294
1295        let (dest_length, dest) = scan_link_dest(underlying, ix, LINK_MAX_NESTED_PARENS)?;
1296        let dest = unescape(dest, self.tree.is_in_table());
1297        ix += dest_length;
1298
1299        scan_separator(&mut ix);
1300
1301        let title = if let Some((bytes_scanned, t)) = self.scan_link_title(underlying, ix, node) {
1302            ix += bytes_scanned;
1303            scan_separator(&mut ix);
1304            t
1305        } else {
1306            "".into()
1307        };
1308        if underlying.as_bytes().get(ix) != Some(&b')') {
1309            return None;
1310        }
1311        ix += 1;
1312
1313        Some((ix, dest, title))
1314    }
1315
1316    // returns (bytes scanned, title cow)
1317    fn scan_link_title(
1318        &self,
1319        text: &'input str,
1320        start_ix: usize,
1321        node: Option<TreeIndex>,
1322    ) -> Option<(usize, CowStr<'input>)> {
1323        let bytes = text.as_bytes();
1324        let open = match bytes.get(start_ix) {
1325            Some(b @ b'\'') | Some(b @ b'\"') | Some(b @ b'(') => *b,
1326            _ => return None,
1327        };
1328        let close = if open == b'(' { b')' } else { open };
1329
1330        let mut title = String::new();
1331        let mut mark = start_ix + 1;
1332        let mut i = start_ix + 1;
1333
1334        while i < bytes.len() {
1335            let c = bytes[i];
1336
1337            if c == close {
1338                let cow = if mark == 1 {
1339                    (i - start_ix + 1, text[mark..i].into())
1340                } else {
1341                    title.push_str(&text[mark..i]);
1342                    (i - start_ix + 1, title.into())
1343                };
1344
1345                return Some(cow);
1346            }
1347            if c == open {
1348                return None;
1349            }
1350
1351            if c == b'\n' || c == b'\r' {
1352                if let Some(node_ix) = scan_nodes_to_ix(&self.tree, node, i + 1) {
1353                    if self.tree[node_ix].item.start > i {
1354                        title.push_str(&text[mark..i]);
1355                        title.push('\n');
1356                        i = self.tree[node_ix].item.start;
1357                        mark = i;
1358                        continue;
1359                    }
1360                }
1361            }
1362            if c == b'&' {
1363                if let (n, Some(value)) = scan_entity(&bytes[i..]) {
1364                    title.push_str(&text[mark..i]);
1365                    title.push_str(&value);
1366                    i += n;
1367                    mark = i;
1368                    continue;
1369                }
1370            }
1371            if self.tree.is_in_table()
1372                && c == b'\\'
1373                && i + 2 < bytes.len()
1374                && bytes[i + 1] == b'\\'
1375                && bytes[i + 2] == b'|'
1376            {
1377                // this runs if there are an even number of pipes in a table
1378                // if it's odd, then it gets parsed as normal
1379                title.push_str(&text[mark..i]);
1380                i += 2;
1381                mark = i;
1382            }
1383            if c == b'\\' && i + 1 < bytes.len() && is_ascii_punctuation(bytes[i + 1]) {
1384                title.push_str(&text[mark..i]);
1385                i += 1;
1386                mark = i;
1387            }
1388
1389            i += 1;
1390        }
1391
1392        None
1393    }
1394
1395    fn make_math_span(&mut self, open: TreeIndex, mut close: TreeIndex) {
1396        let start_is_display = self.tree[open].next.filter(|&next_ix| {
1397            next_ix != close
1398                && matches!(
1399                    self.tree[next_ix].item.body,
1400                    ItemBody::MaybeMath(_can_open, _can_close, _brace_context)
1401                )
1402        });
1403        let end_is_display = self.tree[close].next.filter(|&next_ix| {
1404            matches!(
1405                self.tree[next_ix].item.body,
1406                ItemBody::MaybeMath(_can_open, _can_close, _brace_context)
1407            )
1408        });
1409        let is_display = start_is_display.is_some() && end_is_display.is_some();
1410        if is_display {
1411            // This unwrap() can't panic, because if the next variable were None, end_is_display would be None
1412            close = self.tree[close].next.unwrap();
1413            self.tree[open].next = Some(close);
1414            self.tree[open].item.end += 1;
1415            self.tree[close].item.start -= 1;
1416        } else {
1417            if self.tree[open].item.end == self.tree[close].item.start {
1418                // inline math spans cannot be empty
1419                self.tree[open].item.body = ItemBody::Text {
1420                    backslash_escaped: false,
1421                };
1422                return;
1423            }
1424            self.tree[open].next = Some(close);
1425        }
1426        let span_start = self.tree[open].item.end;
1427        let span_end = self.tree[close].item.start;
1428
1429        let spanned_text = &self.text[span_start..span_end];
1430        let spanned_bytes = spanned_text.as_bytes();
1431        let mut buf: Option<String> = None;
1432
1433        let mut start_ix = 0;
1434        let mut ix = 0;
1435        while ix < spanned_bytes.len() {
1436            let c = spanned_bytes[ix];
1437            if c == b'\r' || c == b'\n' {
1438                ix += 1;
1439                let buf = buf.get_or_insert_with(|| String::with_capacity(spanned_bytes.len()));
1440                buf.push_str(&spanned_text[start_ix..ix]);
1441                ix += skip_container_prefixes(&self.tree, &spanned_bytes[ix..], self.options);
1442                start_ix = ix;
1443            } else if c == b'\\'
1444                && spanned_bytes.get(ix + 1) == Some(&b'|')
1445                && self.tree.is_in_table()
1446            {
1447                let buf = buf.get_or_insert_with(|| String::with_capacity(spanned_bytes.len()));
1448                buf.push_str(&spanned_text[start_ix..ix]);
1449                buf.push('|');
1450                ix += 2;
1451                start_ix = ix;
1452            } else {
1453                ix += 1;
1454            }
1455        }
1456
1457        let cow = if let Some(mut buf) = buf {
1458            buf.push_str(&spanned_text[start_ix..]);
1459            buf.into()
1460        } else {
1461            spanned_text.into()
1462        };
1463
1464        self.tree[open].item.body = ItemBody::Math(self.allocs.allocate_cow(cow), is_display);
1465        self.tree[open].item.end = self.tree[close].item.end;
1466        self.tree[open].next = self.tree[close].next;
1467    }
1468
1469    /// Make a code span.
1470    ///
1471    /// Both `open` and `close` are matching MaybeCode items.
1472    fn make_code_span(&mut self, open: TreeIndex, close: TreeIndex, preceding_backslash: bool) {
1473        let span_start = self.tree[open].item.end;
1474        let span_end = self.tree[close].item.start;
1475        let mut buf: Option<String> = None;
1476
1477        let spanned_text = &self.text[span_start..span_end];
1478        let spanned_bytes = spanned_text.as_bytes();
1479        let mut start_ix = 0;
1480        let mut ix = 0;
1481        while ix < spanned_bytes.len() {
1482            let c = spanned_bytes[ix];
1483            if c == b'\r' || c == b'\n' {
1484                let buf = buf.get_or_insert_with(|| String::with_capacity(spanned_bytes.len()));
1485                buf.push_str(&spanned_text[start_ix..ix]);
1486                buf.push(' ');
1487                ix += 1;
1488                ix += skip_container_prefixes(&self.tree, &spanned_bytes[ix..], self.options);
1489                start_ix = ix;
1490            } else if c == b'\\'
1491                && spanned_bytes.get(ix + 1) == Some(&b'|')
1492                && self.tree.is_in_table()
1493            {
1494                let buf = buf.get_or_insert_with(|| String::with_capacity(spanned_bytes.len()));
1495                buf.push_str(&spanned_text[start_ix..ix]);
1496                buf.push('|');
1497                ix += 2;
1498                start_ix = ix;
1499            } else {
1500                ix += 1;
1501            }
1502        }
1503
1504        let (opening, closing, all_spaces) = {
1505            let s = if let Some(buf) = &mut buf {
1506                buf.push_str(&spanned_text[start_ix..]);
1507                &buf[..]
1508            } else {
1509                spanned_text
1510            };
1511            (
1512                s.as_bytes().first() == Some(&b' '),
1513                s.as_bytes().last() == Some(&b' '),
1514                s.bytes().all(|b| b == b' '),
1515            )
1516        };
1517
1518        let cow: CowStr<'input> = if !all_spaces && opening && closing {
1519            if let Some(mut buf) = buf {
1520                if !buf.is_empty() {
1521                    buf.remove(0);
1522                    buf.pop();
1523                }
1524                buf.into()
1525            } else {
1526                spanned_text[1..(spanned_text.len() - 1).max(1)].into()
1527            }
1528        } else if let Some(buf) = buf {
1529            buf.into()
1530        } else {
1531            spanned_text.into()
1532        };
1533
1534        if preceding_backslash {
1535            self.tree[open].item.body = ItemBody::Text {
1536                backslash_escaped: true,
1537            };
1538            self.tree[open].item.end = self.tree[open].item.start + 1;
1539            self.tree[open].next = Some(close);
1540            self.tree[close].item.body = ItemBody::Code(self.allocs.allocate_cow(cow));
1541            self.tree[close].item.start = self.tree[open].item.start + 1;
1542        } else {
1543            self.tree[open].item.body = ItemBody::Code(self.allocs.allocate_cow(cow));
1544            self.tree[open].item.end = self.tree[close].item.end;
1545            self.tree[open].next = self.tree[close].next;
1546        }
1547
1548        // MDX: errors recorded in pass 1 for `{` inside what turned out to be a
1549        // code span are false positives — the `{` is literal text.
1550        if !self.mdx_errors.is_empty() {
1551            self.mdx_errors
1552                .retain(|(offset, _)| *offset < span_start || *offset >= span_end);
1553        }
1554    }
1555
1556    /// On success, returns a buffer containing the inline html and byte offset.
1557    /// When no bytes were skipped, the buffer will be empty and the html can be
1558    /// represented as a subslice of the input string.
1559    fn scan_inline_html(&mut self, bytes: &[u8], ix: usize) -> Option<(Vec<u8>, usize)> {
1560        let c = *bytes.get(ix)?;
1561        if c == b'!' {
1562            Some((
1563                vec![],
1564                scan_inline_html_comment(bytes, ix + 1, &mut self.html_scan_guard)?,
1565            ))
1566        } else if c == b'?' {
1567            Some((
1568                vec![],
1569                scan_inline_html_processing(bytes, ix + 1, &mut self.html_scan_guard)?,
1570            ))
1571        } else {
1572            let (span, i) = scan_html_block_inner(
1573                // Subtract 1 to include the < character
1574                &bytes[(ix - 1)..],
1575                Some(&|bytes| skip_container_prefixes(&self.tree, bytes, self.options)),
1576            )?;
1577            Some((span, i + ix - 1))
1578        }
1579    }
1580}
1581
1582/// Returns number of containers scanned.
1583pub(crate) fn scan_containers(
1584    tree: &Tree<Item>,
1585    line_start: &mut LineStart<'_>,
1586    options: Options,
1587) -> usize {
1588    let mut i = 0;
1589    for &node_ix in tree.walk_spine() {
1590        match tree[node_ix].item.body {
1591            ItemBody::BlockQuote(..) => {
1592                let save = line_start.clone();
1593                let _ = line_start.scan_space(3);
1594                if !line_start.scan_blockquote_marker() {
1595                    *line_start = save;
1596                    break;
1597                }
1598            }
1599            ItemBody::ListItem(indent) => {
1600                let save = line_start.clone();
1601                if !line_start.scan_space(indent) && !line_start.is_at_eol() {
1602                    *line_start = save;
1603                    break;
1604                }
1605            }
1606            ItemBody::DefinitionListDefinition(indent) => {
1607                let save = line_start.clone();
1608                if !line_start.scan_space(indent) && !line_start.is_at_eol() {
1609                    *line_start = save;
1610                    break;
1611                }
1612            }
1613            ItemBody::FootnoteDefinition(..) if options.has_gfm_footnotes() => {
1614                let save = line_start.clone();
1615                if !line_start.scan_space(4) && !line_start.is_at_eol() {
1616                    *line_start = save;
1617                    break;
1618                }
1619            }
1620            _ => (),
1621        }
1622        i += 1;
1623    }
1624    i
1625}
1626
1627pub(crate) fn skip_container_prefixes(tree: &Tree<Item>, bytes: &[u8], options: Options) -> usize {
1628    let mut line_start = LineStart::new(bytes);
1629    let _ = scan_containers(tree, &mut line_start, options);
1630    line_start.bytes_scanned()
1631}
1632
1633impl Tree<Item> {
1634    pub(crate) fn append_text(&mut self, start: usize, end: usize, backslash_escaped: bool) {
1635        if end > start {
1636            if let Some(ix) = self.cur() {
1637                if matches!(self[ix].item.body, ItemBody::Text { .. }) && self[ix].item.end == start
1638                {
1639                    self[ix].item.end = end;
1640                    return;
1641                }
1642            }
1643            self.append(Item {
1644                start,
1645                end,
1646                body: ItemBody::Text { backslash_escaped },
1647            });
1648        }
1649    }
1650    /// Returns true if the current node is inside a table.
1651    ///
1652    /// If `cur` is an ItemBody::Table, it would return false,
1653    /// but since the `TableRow` and `TableHead` and `TableCell`
1654    /// are children of the table, anything doing inline parsing
1655    /// doesn't need to care about that.
1656    pub(crate) fn is_in_table(&self) -> bool {
1657        fn might_be_in_table(item: &Item) -> bool {
1658            item.body.is_inline()
1659                || matches!(item.body, |ItemBody::TableHead| ItemBody::TableRow
1660                    | ItemBody::TableCell)
1661        }
1662        for &ix in self.walk_spine().rev() {
1663            if matches!(self[ix].item.body, ItemBody::Table(_)) {
1664                return true;
1665            }
1666            if !might_be_in_table(&self[ix].item) {
1667                return false;
1668            }
1669        }
1670        false
1671    }
1672}
1673
1674#[derive(Copy, Clone, Debug)]
1675struct InlineEl {
1676    /// offset of tree node
1677    start: TreeIndex,
1678    /// number of delimiters available for matching
1679    count: usize,
1680    /// length of the run that these delimiters came from
1681    run_length: usize,
1682    /// b'*', b'_', or b'~'
1683    c: u8,
1684    /// can both open and close
1685    both: bool,
1686}
1687
1688#[derive(Debug, Clone, Default)]
1689struct InlineStack {
1690    stack: Vec<InlineEl>,
1691    // Lower bounds for matching indices in the stack. For example
1692    // a strikethrough delimiter will never match with any element
1693    // in the stack with index smaller than
1694    // `lower_bounds[InlineStack::TILDES]`.
1695    lower_bounds: [usize; 10],
1696}
1697
1698impl InlineStack {
1699    /// These are indices into the lower bounds array.
1700    /// Not both refers to the property that the delimiter can not both
1701    /// be opener as a closer.
1702    const UNDERSCORE_NOT_BOTH: usize = 0;
1703    const ASTERISK_NOT_BOTH: usize = 1;
1704    const ASTERISK_BASE: usize = 2;
1705    const TILDES: usize = 5;
1706    const UNDERSCORE_BASE: usize = 6;
1707    const CIRCUMFLEXES: usize = 9;
1708
1709    fn pop_all(&mut self, tree: &mut Tree<Item>) {
1710        for el in self.stack.drain(..) {
1711            for i in 0..el.count {
1712                tree[el.start + i].item.body = ItemBody::Text {
1713                    backslash_escaped: false,
1714                };
1715            }
1716        }
1717        self.lower_bounds = [0; 10];
1718    }
1719
1720    fn get_lowerbound(&self, c: u8, count: usize, both: bool) -> usize {
1721        if c == b'_' {
1722            let mod3_lower = self.lower_bounds[InlineStack::UNDERSCORE_BASE + count % 3];
1723            if both {
1724                mod3_lower
1725            } else {
1726                min(
1727                    mod3_lower,
1728                    self.lower_bounds[InlineStack::UNDERSCORE_NOT_BOTH],
1729                )
1730            }
1731        } else if c == b'*' {
1732            let mod3_lower = self.lower_bounds[InlineStack::ASTERISK_BASE + count % 3];
1733            if both {
1734                mod3_lower
1735            } else {
1736                min(
1737                    mod3_lower,
1738                    self.lower_bounds[InlineStack::ASTERISK_NOT_BOTH],
1739                )
1740            }
1741        } else if c == b'^' {
1742            self.lower_bounds[InlineStack::CIRCUMFLEXES]
1743        } else {
1744            self.lower_bounds[InlineStack::TILDES]
1745        }
1746    }
1747
1748    fn set_lowerbound(&mut self, c: u8, count: usize, both: bool, new_bound: usize) {
1749        if c == b'_' {
1750            if both {
1751                self.lower_bounds[InlineStack::UNDERSCORE_BASE + count % 3] = new_bound;
1752            } else {
1753                self.lower_bounds[InlineStack::UNDERSCORE_NOT_BOTH] = new_bound;
1754            }
1755        } else if c == b'*' {
1756            self.lower_bounds[InlineStack::ASTERISK_BASE + count % 3] = new_bound;
1757            if !both {
1758                self.lower_bounds[InlineStack::ASTERISK_NOT_BOTH] = new_bound;
1759            }
1760        } else if c == b'^' {
1761            self.lower_bounds[InlineStack::CIRCUMFLEXES] = new_bound;
1762        } else {
1763            self.lower_bounds[InlineStack::TILDES] = new_bound;
1764        }
1765    }
1766
1767    fn truncate(&mut self, new_bound: usize) {
1768        self.stack.truncate(new_bound);
1769        for lower_bound in &mut self.lower_bounds {
1770            if *lower_bound > new_bound {
1771                *lower_bound = new_bound;
1772            }
1773        }
1774    }
1775
1776    fn find_match(
1777        &mut self,
1778        tree: &mut Tree<Item>,
1779        c: u8,
1780        run_length: usize,
1781        both: bool,
1782    ) -> Option<InlineEl> {
1783        let lowerbound = min(self.stack.len(), self.get_lowerbound(c, run_length, both));
1784        let res = self.stack[lowerbound..]
1785            .iter()
1786            .cloned()
1787            .enumerate()
1788            .rfind(|(_, el)| {
1789                if (c == b'~' || c == b'^') && run_length != el.run_length {
1790                    return false;
1791                }
1792                el.c == c
1793                    && (!both && !el.both
1794                        || !(run_length + el.run_length).is_multiple_of(3)
1795                        || run_length.is_multiple_of(3))
1796            });
1797
1798        if let Some((matching_ix, matching_el)) = res {
1799            let matching_ix = matching_ix + lowerbound;
1800            for el in &self.stack[(matching_ix + 1)..] {
1801                for i in 0..el.count {
1802                    tree[el.start + i].item.body = ItemBody::Text {
1803                        backslash_escaped: false,
1804                    };
1805                }
1806            }
1807            self.truncate(matching_ix);
1808            Some(matching_el)
1809        } else {
1810            self.set_lowerbound(c, run_length, both, self.stack.len());
1811            None
1812        }
1813    }
1814
1815    fn trim_lower_bound(&mut self, ix: usize) {
1816        self.lower_bounds[ix] = self.lower_bounds[ix].min(self.stack.len());
1817    }
1818
1819    fn push(&mut self, el: InlineEl) {
1820        if el.c == b'~' {
1821            self.trim_lower_bound(InlineStack::TILDES);
1822        } else if el.c == b'^' {
1823            self.trim_lower_bound(InlineStack::CIRCUMFLEXES);
1824        }
1825        self.stack.push(el)
1826    }
1827}
1828
1829#[derive(Debug, Clone)]
1830enum RefScan<'a> {
1831    // label, source ix of label end
1832    LinkLabel(CowStr<'a>, usize),
1833    // contains next node index
1834    Collapsed(Option<TreeIndex>),
1835    UnexpectedFootnote,
1836    Failed,
1837}
1838
1839/// Skips forward within a block to a node which spans (ends inclusive) the given
1840/// index into the source.
1841fn scan_nodes_to_ix(
1842    tree: &Tree<Item>,
1843    mut node: Option<TreeIndex>,
1844    ix: usize,
1845) -> Option<TreeIndex> {
1846    while let Some(node_ix) = node {
1847        if tree[node_ix].item.end <= ix {
1848            node = tree[node_ix].next;
1849        } else {
1850            break;
1851        }
1852    }
1853    node
1854}
1855
1856/// Scans an inline link label, which cannot be interrupted.
1857/// Returns number of bytes (including brackets) and label on success.
1858fn scan_link_label<'text>(
1859    tree: &Tree<Item>,
1860    text: &'text str,
1861    options: Options,
1862) -> Option<(usize, ReferenceLabel<'text>)> {
1863    let bytes = text.as_bytes();
1864    if bytes.len() < 2 || bytes[0] != b'[' {
1865        return None;
1866    }
1867    let linebreak_handler = |bytes: &[u8]| Some(skip_container_prefixes(tree, bytes, options));
1868    if options.contains(Options::ENABLE_FOOTNOTES)
1869        && b'^' == bytes[1]
1870        && bytes.get(2) != Some(&b']')
1871    {
1872        let linebreak_handler: &dyn Fn(&[u8]) -> Option<usize> = if options.has_gfm_footnotes() {
1873            &|_| None
1874        } else {
1875            &linebreak_handler
1876        };
1877        if let Some((byte_index, cow)) =
1878            scan_link_label_rest(&text[2..], linebreak_handler, tree.is_in_table())
1879        {
1880            return Some((byte_index + 2, ReferenceLabel::Footnote(cow)));
1881        }
1882    }
1883    let (byte_index, cow) =
1884        scan_link_label_rest(&text[1..], &linebreak_handler, tree.is_in_table())?;
1885    Some((byte_index + 1, ReferenceLabel::Link(cow)))
1886}
1887
1888fn scan_reference<'b>(
1889    tree: &Tree<Item>,
1890    text: &'b str,
1891    cur: Option<TreeIndex>,
1892    options: Options,
1893) -> RefScan<'b> {
1894    let cur_ix = match cur {
1895        None => return RefScan::Failed,
1896        Some(cur_ix) => cur_ix,
1897    };
1898    let start = tree[cur_ix].item.start;
1899    let tail = &text.as_bytes()[start..];
1900
1901    if tail.starts_with(b"[]") {
1902        // TODO: this unwrap is sus and should be looked at closer
1903        let closing_node = tree[cur_ix].next.unwrap();
1904        RefScan::Collapsed(tree[closing_node].next)
1905    } else {
1906        let label = scan_link_label(tree, &text[start..], options);
1907        match label {
1908            Some((ix, ReferenceLabel::Link(label))) => RefScan::LinkLabel(label, start + ix),
1909            Some((_ix, ReferenceLabel::Footnote(_label))) => RefScan::UnexpectedFootnote,
1910            None => RefScan::Failed,
1911        }
1912    }
1913}
1914
1915#[derive(Clone, Default)]
1916struct LinkStack {
1917    inner: Vec<LinkStackEl>,
1918    disabled_ix: usize,
1919}
1920
1921impl LinkStack {
1922    fn push(&mut self, el: LinkStackEl) {
1923        self.inner.push(el);
1924    }
1925
1926    fn pop(&mut self) -> Option<LinkStackEl> {
1927        let el = self.inner.pop();
1928        self.disabled_ix = core::cmp::min(self.disabled_ix, self.inner.len());
1929        el
1930    }
1931
1932    fn clear(&mut self) {
1933        self.inner.clear();
1934        self.disabled_ix = 0;
1935    }
1936
1937    fn disable_all_links(&mut self) {
1938        for el in &mut self.inner[self.disabled_ix..] {
1939            if el.ty == LinkStackTy::Link {
1940                el.ty = LinkStackTy::Disabled;
1941            }
1942        }
1943        self.disabled_ix = self.inner.len();
1944    }
1945}
1946
1947#[derive(Clone, Debug)]
1948struct LinkStackEl {
1949    node: TreeIndex,
1950    ty: LinkStackTy,
1951}
1952
1953#[derive(PartialEq, Clone, Debug)]
1954enum LinkStackTy {
1955    Link,
1956    Image,
1957    Disabled,
1958}
1959
1960/// Contains the destination URL, title and source span of a reference definition.
1961#[derive(Clone, Debug)]
1962pub struct LinkDef<'a> {
1963    pub dest: CowStr<'a>,
1964    pub title: Option<CowStr<'a>>,
1965    pub span: Range<usize>,
1966}
1967
1968impl<'a> LinkDef<'a> {
1969    pub fn into_static(self) -> LinkDef<'static> {
1970        LinkDef {
1971            dest: self.dest.into_static(),
1972            title: self.title.map(|s| s.into_static()),
1973            span: self.span,
1974        }
1975    }
1976}
1977
1978/// Contains the destination URL, title and source span of a reference definition.
1979#[derive(Clone, Debug)]
1980pub struct FootnoteDef {
1981    pub use_count: usize,
1982}
1983
1984/// Tracks tree indices of code span delimiters of each length. It should prevent
1985/// quadratic scanning behaviours by providing (amortized) constant time lookups.
1986struct CodeDelims {
1987    inner: FxHashMap<usize, VecDeque<TreeIndex>>,
1988    seen_first: bool,
1989}
1990
1991impl CodeDelims {
1992    fn new() -> Self {
1993        Self {
1994            inner: Default::default(),
1995            seen_first: false,
1996        }
1997    }
1998
1999    fn insert(&mut self, count: usize, ix: TreeIndex) {
2000        if self.seen_first {
2001            self.inner.entry(count).or_default().push_back(ix);
2002        } else {
2003            // Skip the first insert, since that delimiter will always
2004            // be an opener and not a closer.
2005            self.seen_first = true;
2006        }
2007    }
2008
2009    fn is_populated(&self) -> bool {
2010        !self.inner.is_empty()
2011    }
2012
2013    fn find(&mut self, open_ix: TreeIndex, count: usize) -> Option<TreeIndex> {
2014        while let Some(ix) = self.inner.get_mut(&count)?.pop_front() {
2015            if ix > open_ix {
2016                return Some(ix);
2017            }
2018        }
2019        None
2020    }
2021
2022    fn clear(&mut self) {
2023        self.inner.clear();
2024        self.seen_first = false;
2025    }
2026}
2027
2028/// Tracks brace contexts and delimiter length for math delimiters.
2029/// Provides amortized constant-time lookups.
2030struct MathDelims {
2031    inner: FxHashMap<u8, VecDeque<(TreeIndex, bool, bool)>>,
2032}
2033
2034impl MathDelims {
2035    fn new() -> Self {
2036        Self {
2037            inner: Default::default(),
2038        }
2039    }
2040
2041    fn insert(
2042        &mut self,
2043        delim_is_display: bool,
2044        brace_context: u8,
2045        ix: TreeIndex,
2046        can_close: bool,
2047    ) {
2048        self.inner
2049            .entry(brace_context)
2050            .or_default()
2051            .push_back((ix, can_close, delim_is_display));
2052    }
2053
2054    fn is_populated(&self) -> bool {
2055        !self.inner.is_empty()
2056    }
2057
2058    fn find(
2059        &mut self,
2060        tree: &Tree<Item>,
2061        open_ix: TreeIndex,
2062        is_display: bool,
2063        brace_context: u8,
2064    ) -> Option<TreeIndex> {
2065        while let Some((ix, can_close, delim_is_display)) =
2066            self.inner.get_mut(&brace_context)?.pop_front()
2067        {
2068            if ix <= open_ix || (is_display && tree[open_ix].next == Some(ix)) {
2069                continue;
2070            }
2071            let can_close = can_close && tree[open_ix].item.end != tree[ix].item.start;
2072            if (!is_display && can_close) || (is_display && delim_is_display) {
2073                return Some(ix);
2074            }
2075            // if we can't use it, leave it in the queue as a tombstone for the next
2076            // thing that tries to match it
2077            self.inner
2078                .get_mut(&brace_context)?
2079                .push_front((ix, can_close, delim_is_display));
2080            break;
2081        }
2082        None
2083    }
2084
2085    fn clear(&mut self) {
2086        self.inner.clear();
2087    }
2088}
2089
2090#[derive(Copy, Clone, PartialEq, Eq, Debug)]
2091pub(crate) struct LinkIndex(usize);
2092
2093#[derive(Copy, Clone, PartialEq, Eq, Debug)]
2094pub(crate) struct CowIndex(usize);
2095
2096#[derive(Copy, Clone, PartialEq, Eq, Debug)]
2097pub(crate) struct AlignmentIndex(usize);
2098
2099#[derive(Copy, Clone, PartialEq, Eq, Debug)]
2100pub(crate) struct HeadingIndex(NonZeroUsize);
2101
2102#[derive(Copy, Clone, PartialEq, Eq, Debug)]
2103pub(crate) struct JsxElementIndex(usize);
2104
2105/// A parsed JSX attribute.
2106#[derive(Debug, Clone)]
2107pub(crate) enum JsxAttr<'a> {
2108    Boolean(CowStr<'a>),
2109    Literal(CowStr<'a>, CowStr<'a>),
2110    Expression(CowStr<'a>, CowStr<'a>),
2111    Spread(CowStr<'a>),
2112}
2113
2114/// Pre-parsed JSX element data (name + attributes + tag classification).
2115#[derive(Debug, Clone)]
2116pub(crate) struct JsxElementData<'a> {
2117    pub name: CowStr<'a>,
2118    pub attrs: Vec<JsxAttr<'a>>,
2119    pub raw: CowStr<'a>,
2120    pub is_closing: bool,
2121    pub is_self_closing: bool,
2122}
2123
2124#[derive(Clone)]
2125pub(crate) struct Allocations<'a> {
2126    pub refdefs: RefDefs<'a>,
2127    pub footdefs: FootnoteDefs<'a>,
2128    links: Vec<(LinkType, CowStr<'a>, CowStr<'a>, CowStr<'a>)>,
2129    cows: Vec<CowStr<'a>>,
2130    alignments: Vec<Vec<Alignment>>,
2131    headings: Vec<HeadingAttributes<'a>>,
2132    jsx_elements: Vec<JsxElementData<'a>>,
2133}
2134
2135/// Used by the heading attributes extension.
2136#[derive(Clone)]
2137pub(crate) struct HeadingAttributes<'a> {
2138    pub id: Option<CowStr<'a>>,
2139    pub classes: Vec<CowStr<'a>>,
2140    pub attrs: Vec<(CowStr<'a>, Option<CowStr<'a>>)>,
2141}
2142
2143/// Keeps track of the reference definitions defined in the document.
2144#[derive(Clone, Default, Debug)]
2145pub struct RefDefs<'input>(pub(crate) FxHashMap<LinkLabel<'input>, LinkDef<'input>>);
2146
2147/// Keeps track of the footnote definitions defined in the document.
2148#[derive(Clone, Default, Debug)]
2149pub struct FootnoteDefs<'input>(pub(crate) FxHashMap<FootnoteLabel<'input>, FootnoteDef>);
2150
2151impl<'input, 'b, 's> RefDefs<'input>
2152where
2153    's: 'b,
2154{
2155    /// Performs a lookup on reference label using unicode case folding.
2156    pub fn get(&'s self, key: &'b str) -> Option<&'b LinkDef<'input>> {
2157        self.0.get(&UniCase::new(key.into()))
2158    }
2159
2160    /// Provides an iterator over all the document's reference definitions.
2161    pub fn iter(&'s self) -> impl Iterator<Item = (&'s str, &'s LinkDef<'input>)> {
2162        self.0.iter().map(|(k, v)| (k.as_ref(), v))
2163    }
2164}
2165
2166impl<'input, 'b, 's> FootnoteDefs<'input>
2167where
2168    's: 'b,
2169{
2170    /// Performs a lookup on reference label using unicode case folding.
2171    pub fn contains(&'s self, key: &'b str) -> bool {
2172        self.0.contains_key(&UniCase::new(key.into()))
2173    }
2174    /// Performs a lookup on reference label using unicode case folding.
2175    pub fn get_mut(&'s mut self, key: CowStr<'input>) -> Option<&'s mut FootnoteDef> {
2176        self.0.get_mut(&UniCase::new(key))
2177    }
2178}
2179
2180impl<'a> Allocations<'a> {
2181    pub fn new() -> Self {
2182        Self {
2183            refdefs: RefDefs::default(),
2184            footdefs: FootnoteDefs::default(),
2185            links: Vec::with_capacity(128),
2186            cows: Vec::new(),
2187            alignments: Vec::new(),
2188            headings: Vec::new(),
2189            jsx_elements: Vec::new(),
2190        }
2191    }
2192
2193    pub fn allocate_cow(&mut self, cow: CowStr<'a>) -> CowIndex {
2194        let ix = self.cows.len();
2195        self.cows.push(cow);
2196        CowIndex(ix)
2197    }
2198
2199    pub fn allocate_link(
2200        &mut self,
2201        ty: LinkType,
2202        url: CowStr<'a>,
2203        title: CowStr<'a>,
2204        id: CowStr<'a>,
2205    ) -> LinkIndex {
2206        let ix = self.links.len();
2207        self.links.push((ty, url, title, id));
2208        LinkIndex(ix)
2209    }
2210
2211    pub fn allocate_alignment(&mut self, alignment: Vec<Alignment>) -> AlignmentIndex {
2212        let ix = self.alignments.len();
2213        self.alignments.push(alignment);
2214        AlignmentIndex(ix)
2215    }
2216
2217    pub fn allocate_heading(&mut self, attrs: HeadingAttributes<'a>) -> HeadingIndex {
2218        let ix = self.headings.len();
2219        self.headings.push(attrs);
2220        // This won't panic. `self.headings.len()` can't be `usize::MAX` since
2221        // such a long Vec cannot fit in memory.
2222        let ix_nonzero = NonZeroUsize::new(ix.wrapping_add(1)).expect("too many headings");
2223        HeadingIndex(ix_nonzero)
2224    }
2225
2226    pub fn take_cow(&mut self, ix: CowIndex) -> CowStr<'a> {
2227        core::mem::replace(&mut self.cows[ix.0], "".into())
2228    }
2229
2230    pub fn take_link(&mut self, ix: LinkIndex) -> (LinkType, CowStr<'a>, CowStr<'a>, CowStr<'a>) {
2231        let default_link = (LinkType::ShortcutUnknown, "".into(), "".into(), "".into());
2232        core::mem::replace(&mut self.links[ix.0], default_link)
2233    }
2234
2235    pub fn take_alignment(&mut self, ix: AlignmentIndex) -> Vec<Alignment> {
2236        core::mem::take(&mut self.alignments[ix.0])
2237    }
2238
2239    pub fn allocate_jsx_element(&mut self, data: JsxElementData<'a>) -> JsxElementIndex {
2240        let ix = self.jsx_elements.len();
2241        self.jsx_elements.push(data);
2242        JsxElementIndex(ix)
2243    }
2244
2245    pub fn take_jsx_element(&mut self, ix: JsxElementIndex) -> JsxElementData<'a> {
2246        core::mem::replace(
2247            &mut self.jsx_elements[ix.0],
2248            JsxElementData {
2249                name: "".into(),
2250                attrs: Vec::new(),
2251                raw: "".into(),
2252                is_closing: false,
2253                is_self_closing: false,
2254            },
2255        )
2256    }
2257}
2258
2259impl<'a> Index<CowIndex> for Allocations<'a> {
2260    type Output = CowStr<'a>;
2261
2262    fn index(&self, ix: CowIndex) -> &Self::Output {
2263        self.cows.index(ix.0)
2264    }
2265}
2266
2267impl<'a> Index<LinkIndex> for Allocations<'a> {
2268    type Output = (LinkType, CowStr<'a>, CowStr<'a>, CowStr<'a>);
2269
2270    fn index(&self, ix: LinkIndex) -> &Self::Output {
2271        self.links.index(ix.0)
2272    }
2273}
2274
2275impl<'a> Index<AlignmentIndex> for Allocations<'a> {
2276    type Output = Vec<Alignment>;
2277
2278    fn index(&self, ix: AlignmentIndex) -> &Self::Output {
2279        self.alignments.index(ix.0)
2280    }
2281}
2282
2283impl<'a> Index<HeadingIndex> for Allocations<'a> {
2284    type Output = HeadingAttributes<'a>;
2285
2286    fn index(&self, ix: HeadingIndex) -> &Self::Output {
2287        self.headings.index(ix.0.get() - 1)
2288    }
2289}
2290
2291/// A struct containing information on the reachability of certain inline HTML
2292/// elements. In particular, for cdata elements (`<![CDATA[`), processing
2293/// elements (`<?`) and declarations (`<!DECLARATION`). The respectives usizes
2294/// represent the indices before which a scan will always fail and can hence
2295/// be skipped.
2296#[derive(Clone, Default)]
2297pub(crate) struct HtmlScanGuard {
2298    pub cdata: usize,
2299    pub processing: usize,
2300    pub declaration: usize,
2301    pub comment: usize,
2302}
2303
2304/// Trait to customize [`Parser`] behavior with callbacks. See [`Parser::new_with_callbacks`].
2305///
2306/// All methods have a default implementation, so you can choose which ones to override.
2307pub trait ParserCallbacks<'input> {
2308    /// Potentially provide a custom definition for a broken link.
2309    ///
2310    /// In case the parser encounters any potential links that have a broken
2311    /// reference (e.g `[foo]` when there is no `[foo]: ` entry at the bottom)
2312    /// this callback will be called with information about the reference,
2313    /// and the returned pair will be used as the link URL and title if it is not
2314    /// `None`.
2315    fn handle_broken_link(
2316        &mut self,
2317        #[allow(unused_variables)] link: BrokenLink<'input>,
2318    ) -> Option<(CowStr<'input>, CowStr<'input>)> {
2319        None
2320    }
2321}
2322
2323/// Wrapper to implement [`ParserCallbacks::handle_broken_link`] with a closure.
2324///
2325/// Used internally by [`Parser::new_with_broken_link_callback`].
2326#[allow(missing_debug_implementations)]
2327pub struct BrokenLinkCallback<F>(Option<F>);
2328
2329impl<'input, F> ParserCallbacks<'input> for BrokenLinkCallback<F>
2330where
2331    F: FnMut(BrokenLink<'input>) -> Option<(CowStr<'input>, CowStr<'input>)>,
2332{
2333    fn handle_broken_link(
2334        &mut self,
2335        link: BrokenLink<'input>,
2336    ) -> Option<(CowStr<'input>, CowStr<'input>)> {
2337        self.0.as_mut().and_then(|cb| cb(link))
2338    }
2339}
2340
2341impl<'input> ParserCallbacks<'input> for Box<dyn ParserCallbacks<'input>> {
2342    fn handle_broken_link(
2343        &mut self,
2344        link: BrokenLink<'input>,
2345    ) -> Option<(CowStr<'input>, CowStr<'input>)> {
2346        (**self).handle_broken_link(link)
2347    }
2348}
2349
2350/// [Parser] callbacks that do nothing.
2351///
2352/// Used when no custom callbacks are provided.
2353#[allow(missing_debug_implementations)]
2354pub struct DefaultParserCallbacks;
2355
2356impl<'input> ParserCallbacks<'input> for DefaultParserCallbacks {}
2357
2358/// Markdown event and source range iterator.
2359///
2360/// Generates tuples where the first element is the markdown event and the second
2361/// is a the corresponding range in the source string.
2362///
2363/// Constructed from a `Parser` using its
2364/// [`into_offset_iter`](struct.Parser.html#method.into_offset_iter) method.
2365#[derive(Debug)]
2366pub struct OffsetIter<'a, CB> {
2367    parser: Parser<'a, CB>,
2368}
2369
2370impl<'a, CB: ParserCallbacks<'a>> OffsetIter<'a, CB> {
2371    /// Returns a reference to the internal reference definition tracker.
2372    pub fn reference_definitions(&self) -> &RefDefs<'_> {
2373        self.parser.reference_definitions()
2374    }
2375
2376    /// Returns MDX validation errors collected during parsing.
2377    pub fn mdx_errors(&self) -> &[(usize, String)] {
2378        self.parser.mdx_errors()
2379    }
2380}
2381
2382impl<'a, CB: ParserCallbacks<'a>> Iterator for OffsetIter<'a, CB> {
2383    type Item = (Event<'a>, Range<usize>);
2384
2385    fn next(&mut self) -> Option<Self::Item> {
2386        self.parser
2387            .inner
2388            .next_event_range(&mut self.parser.callbacks)
2389    }
2390}
2391
2392impl<'a, CB: ParserCallbacks<'a>> Iterator for Parser<'a, CB> {
2393    type Item = Event<'a>;
2394
2395    fn next(&mut self) -> Option<Event<'a>> {
2396        self.inner
2397            .next_event_range(&mut self.callbacks)
2398            .map(|(event, _range)| event)
2399    }
2400}
2401
2402impl<'a, CB: ParserCallbacks<'a>> FusedIterator for Parser<'a, CB> {}
2403
2404impl<'input> ParserInner<'input> {
2405    fn next_event_range(
2406        &mut self,
2407        callbacks: &mut dyn ParserCallbacks<'input>,
2408    ) -> Option<(Event<'input>, Range<usize>)> {
2409        match self.tree.cur() {
2410            None => {
2411                let ix = self.tree.pop()?;
2412                let ix = if matches!(self.tree[ix].item.body, ItemBody::TightParagraph) {
2413                    // tight paragraphs emit nothing
2414                    self.tree.next_sibling(ix);
2415                    return self.next_event_range(callbacks);
2416                } else {
2417                    ix
2418                };
2419                let tag_end = body_to_tag_end(&self.tree[ix].item.body);
2420                self.tree.next_sibling(ix);
2421                let span = self.tree[ix].item.start..self.tree[ix].item.end;
2422                debug_assert!(span.start <= span.end);
2423                Some((Event::End(tag_end), span))
2424            }
2425            Some(cur_ix) => {
2426                let cur_ix = if matches!(self.tree[cur_ix].item.body, ItemBody::TightParagraph) {
2427                    // tight paragraphs emit nothing
2428                    self.tree.push();
2429                    self.tree.cur().unwrap()
2430                } else {
2431                    cur_ix
2432                };
2433                if self.tree[cur_ix].item.body.is_maybe_inline() {
2434                    self.handle_inline(callbacks);
2435                }
2436
2437                let node = self.tree[cur_ix];
2438                let item = node.item;
2439                let event = item_to_event(item, self.text, &mut self.allocs);
2440                if let Event::Start(..) = event {
2441                    self.tree.push();
2442                } else {
2443                    self.tree.next_sibling(cur_ix);
2444                }
2445                debug_assert!(item.start <= item.end);
2446                Some((event, item.start..item.end))
2447            }
2448        }
2449    }
2450}
2451
2452fn body_to_tag_end(body: &ItemBody) -> TagEnd {
2453    match *body {
2454        ItemBody::Paragraph => TagEnd::Paragraph,
2455        ItemBody::Emphasis => TagEnd::Emphasis,
2456        ItemBody::Superscript => TagEnd::Superscript,
2457        ItemBody::Subscript => TagEnd::Subscript,
2458        ItemBody::Strong => TagEnd::Strong,
2459        ItemBody::Strikethrough => TagEnd::Strikethrough,
2460        ItemBody::Link(..) => TagEnd::Link,
2461        ItemBody::Image(..) => TagEnd::Image,
2462        ItemBody::Heading(level, _) => TagEnd::Heading(level),
2463        ItemBody::IndentCodeBlock | ItemBody::FencedCodeBlock(..) => TagEnd::CodeBlock,
2464        ItemBody::Container(_, kind, _) => TagEnd::ContainerBlock(kind),
2465        ItemBody::BlockQuote(kind) => TagEnd::BlockQuote(kind),
2466        ItemBody::HtmlBlock => TagEnd::HtmlBlock,
2467        ItemBody::List(_, c, _) => {
2468            let is_ordered = c == b'.' || c == b')';
2469            TagEnd::List(is_ordered)
2470        }
2471        ItemBody::ListItem(_) => TagEnd::Item,
2472        ItemBody::TableHead => TagEnd::TableHead,
2473        ItemBody::TableCell => TagEnd::TableCell,
2474        ItemBody::TableRow => TagEnd::TableRow,
2475        ItemBody::Table(..) => TagEnd::Table,
2476        ItemBody::FootnoteDefinition(..) => TagEnd::FootnoteDefinition,
2477        ItemBody::MetadataBlock(kind) => TagEnd::MetadataBlock(kind),
2478        ItemBody::DefinitionList(_) => TagEnd::DefinitionList,
2479        ItemBody::DefinitionListTitle => TagEnd::DefinitionListTitle,
2480        ItemBody::DefinitionListDefinition(_) => TagEnd::DefinitionListDefinition,
2481        ItemBody::MdxJsxFlowElement(..) => TagEnd::MdxJsxFlowElement,
2482        ItemBody::MdxJsxTextElement(..) => TagEnd::MdxJsxTextElement,
2483        _ => panic!("unexpected item body {:?}", body),
2484    }
2485}
2486
2487fn item_to_event<'a>(item: Item, text: &'a str, allocs: &mut Allocations<'a>) -> Event<'a> {
2488    let tag = match item.body {
2489        ItemBody::Text { .. } => return Event::Text(text[item.start..item.end].into()),
2490        ItemBody::Code(cow_ix) => return Event::Code(allocs.take_cow(cow_ix)),
2491        ItemBody::SynthesizeText(cow_ix) => return Event::Text(allocs.take_cow(cow_ix)),
2492        ItemBody::SynthesizeChar(c) => return Event::Text(c.into()),
2493        ItemBody::HtmlBlock => Tag::HtmlBlock,
2494        ItemBody::Html => return Event::Html(text[item.start..item.end].into()),
2495        ItemBody::InlineHtml => return Event::InlineHtml(text[item.start..item.end].into()),
2496        ItemBody::OwnedInlineHtml(cow_ix) => return Event::InlineHtml(allocs.take_cow(cow_ix)),
2497        ItemBody::SoftBreak => return Event::SoftBreak,
2498        ItemBody::HardBreak(_) => return Event::HardBreak,
2499        ItemBody::FootnoteReference(cow_ix) => {
2500            return Event::FootnoteReference(allocs.take_cow(cow_ix))
2501        }
2502        ItemBody::TaskListMarker(checked) => return Event::TaskListMarker(checked),
2503        ItemBody::Rule => return Event::Rule,
2504        ItemBody::Paragraph => Tag::Paragraph,
2505        ItemBody::Emphasis => Tag::Emphasis,
2506        ItemBody::Superscript => Tag::Superscript,
2507        ItemBody::Subscript => Tag::Subscript,
2508        ItemBody::Strong => Tag::Strong,
2509        ItemBody::Strikethrough => Tag::Strikethrough,
2510        ItemBody::Link(link_ix) => {
2511            let (link_type, dest_url, title, id) = allocs.take_link(link_ix);
2512            Tag::Link {
2513                link_type,
2514                dest_url,
2515                title,
2516                id,
2517            }
2518        }
2519        ItemBody::Image(link_ix) => {
2520            let (link_type, dest_url, title, id) = allocs.take_link(link_ix);
2521            Tag::Image {
2522                link_type,
2523                dest_url,
2524                title,
2525                id,
2526            }
2527        }
2528        ItemBody::Heading(level, Some(heading_ix)) => {
2529            let HeadingAttributes { id, classes, attrs } = allocs.index(heading_ix);
2530            Tag::Heading {
2531                level,
2532                id: id.clone(),
2533                classes: classes.clone(),
2534                attrs: attrs.clone(),
2535            }
2536        }
2537        ItemBody::Heading(level, None) => Tag::Heading {
2538            level,
2539            id: None,
2540            classes: Vec::new(),
2541            attrs: Vec::new(),
2542        },
2543        ItemBody::FencedCodeBlock(cow_ix) => {
2544            Tag::CodeBlock(CodeBlockKind::Fenced(allocs.take_cow(cow_ix)))
2545        }
2546        ItemBody::IndentCodeBlock => Tag::CodeBlock(CodeBlockKind::Indented),
2547        ItemBody::Container(_, kind, cow_ix) => Tag::ContainerBlock(kind, allocs.take_cow(cow_ix)),
2548        ItemBody::BlockQuote(kind) => Tag::BlockQuote(kind),
2549        ItemBody::List(is_tight, c, listitem_start) => {
2550            if c == b'.' || c == b')' {
2551                Tag::List(Some(listitem_start), is_tight)
2552            } else {
2553                Tag::List(None, is_tight)
2554            }
2555        }
2556        ItemBody::ListItem(_) => Tag::Item,
2557        ItemBody::TableHead => Tag::TableHead,
2558        ItemBody::TableCell => Tag::TableCell,
2559        ItemBody::TableRow => Tag::TableRow,
2560        ItemBody::Table(alignment_ix) => Tag::Table(allocs.take_alignment(alignment_ix)),
2561        ItemBody::FootnoteDefinition(cow_ix) => Tag::FootnoteDefinition(allocs.take_cow(cow_ix)),
2562        ItemBody::MetadataBlock(kind) => Tag::MetadataBlock(kind),
2563        ItemBody::Math(cow_ix, is_display) => {
2564            return if is_display {
2565                Event::DisplayMath(allocs.take_cow(cow_ix))
2566            } else {
2567                Event::InlineMath(allocs.take_cow(cow_ix))
2568            }
2569        }
2570        ItemBody::DefinitionList(_) => Tag::DefinitionList,
2571        ItemBody::DefinitionListTitle => Tag::DefinitionListTitle,
2572        ItemBody::DefinitionListDefinition(_) => Tag::DefinitionListDefinition,
2573        ItemBody::MdxJsxFlowElement(jsx_ix) => {
2574            let jsx = allocs.take_jsx_element(jsx_ix);
2575            Tag::MdxJsxFlowElement(jsx.raw)
2576        }
2577        ItemBody::MdxJsxTextElement(jsx_ix) => {
2578            let jsx = allocs.take_jsx_element(jsx_ix);
2579            Tag::MdxJsxTextElement(jsx.raw)
2580        }
2581        ItemBody::MdxFlowExpression(cow_ix) => {
2582            return Event::MdxFlowExpression(allocs.take_cow(cow_ix))
2583        }
2584        ItemBody::MdxTextExpression(cow_ix) => {
2585            return Event::MdxTextExpression(allocs.take_cow(cow_ix))
2586        }
2587        ItemBody::MdxEsm(cow_ix) => return Event::MdxEsm(allocs.take_cow(cow_ix)),
2588        _ => panic!("unexpected item body {:?}", item.body),
2589    };
2590
2591    Event::Start(tag)
2592}
2593
2594#[cfg(test)]
2595mod test {
2596    use alloc::{borrow::ToOwned, string::ToString, vec::Vec};
2597
2598    use super::*;
2599    use crate::tree::Node;
2600
2601    // TODO: move these tests to tests/html.rs?
2602
2603    fn parser_with_extensions(text: &str) -> Parser<'_> {
2604        let mut opts = Options::empty();
2605        opts.insert(Options::ENABLE_TABLES);
2606        opts.insert(Options::ENABLE_FOOTNOTES);
2607        opts.insert(Options::ENABLE_STRIKETHROUGH);
2608        opts.insert(Options::ENABLE_SUPERSCRIPT);
2609        opts.insert(Options::ENABLE_SUBSCRIPT);
2610        opts.insert(Options::ENABLE_TASKLISTS);
2611
2612        Parser::new_ext(text, opts)
2613    }
2614
2615    #[test]
2616    #[cfg(target_pointer_width = "64")]
2617    fn node_size() {
2618        let node_size = core::mem::size_of::<Node<Item>>();
2619        assert_eq!(48, node_size);
2620    }
2621
2622    #[test]
2623    #[cfg(target_pointer_width = "64")]
2624    fn body_size() {
2625        let body_size = core::mem::size_of::<ItemBody>();
2626        assert_eq!(16, body_size);
2627    }
2628
2629    #[test]
2630    fn single_open_fish_bracket() {
2631        // dont crash
2632        assert_eq!(3, Parser::new("<").count());
2633    }
2634
2635    #[test]
2636    fn lone_hashtag() {
2637        // dont crash
2638        assert_eq!(2, Parser::new("#").count());
2639    }
2640
2641    #[test]
2642    fn lots_of_backslashes() {
2643        // dont crash
2644        Parser::new("\\\\\r\r").count();
2645        Parser::new("\\\r\r\\.\\\\\r\r\\.\\").count();
2646    }
2647
2648    #[test]
2649    fn issue_1030() {
2650        let mut opts = Options::empty();
2651        opts.insert(Options::ENABLE_WIKILINKS);
2652
2653        let parser = Parser::new_ext("For a new ferrari, [[Wikientry|click here]]!", opts);
2654
2655        let offsets = parser
2656            .into_offset_iter()
2657            .map(|(_ev, range)| range)
2658            .collect::<Vec<_>>();
2659        let expected_offsets = vec![
2660            (0..44),  // Paragraph START
2661            (0..19),  // `For a new ferrari, `
2662            (19..43), // Wikilink START
2663            (31..41), // `click here`
2664            (19..43), // Wikilink END
2665            (43..44), // `!`
2666            (0..44),  // Paragraph END
2667        ];
2668        assert_eq!(offsets, expected_offsets);
2669    }
2670
2671    #[test]
2672    fn issue_320() {
2673        // dont crash
2674        parser_with_extensions(":\r\t> |\r:\r\t> |\r").count();
2675    }
2676
2677    #[test]
2678    fn issue_319() {
2679        // dont crash
2680        parser_with_extensions("|\r-]([^|\r-]([^").count();
2681        parser_with_extensions("|\r\r=][^|\r\r=][^car").count();
2682    }
2683
2684    #[test]
2685    fn issue_303() {
2686        // dont crash
2687        parser_with_extensions("[^\r\ra]").count();
2688        parser_with_extensions("\r\r]Z[^\x00\r\r]Z[^\x00").count();
2689    }
2690
2691    #[test]
2692    fn issue_313() {
2693        // dont crash
2694        parser_with_extensions("*]0[^\r\r*]0[^").count();
2695        parser_with_extensions("[^\r> `][^\r> `][^\r> `][").count();
2696    }
2697
2698    #[test]
2699    fn issue_311() {
2700        // dont crash
2701        parser_with_extensions("\\\u{0d}-\u{09}\\\u{0d}-\u{09}").count();
2702    }
2703
2704    #[test]
2705    fn issue_283() {
2706        let input = core::str::from_utf8(b"\xf0\x9b\xb2\x9f<td:^\xf0\x9b\xb2\x9f").unwrap();
2707        // dont crash
2708        parser_with_extensions(input).count();
2709    }
2710
2711    #[test]
2712    fn issue_289() {
2713        // dont crash
2714        parser_with_extensions("> - \\\n> - ").count();
2715        parser_with_extensions("- \n\n").count();
2716    }
2717
2718    #[test]
2719    fn issue_306() {
2720        // dont crash
2721        parser_with_extensions("*\r_<__*\r_<__*\r_<__*\r_<__").count();
2722    }
2723
2724    #[test]
2725    fn issue_305() {
2726        // dont crash
2727        parser_with_extensions("_6**6*_*").count();
2728    }
2729
2730    #[test]
2731    fn another_emphasis_panic() {
2732        parser_with_extensions("*__#_#__*").count();
2733    }
2734
2735    #[test]
2736    fn offset_iter() {
2737        let event_offsets: Vec<_> = Parser::new("*hello* world")
2738            .into_offset_iter()
2739            .map(|(_ev, range)| range)
2740            .collect();
2741        let expected_offsets = vec![(0..13), (0..7), (1..6), (0..7), (7..13), (0..13)];
2742        assert_eq!(expected_offsets, event_offsets);
2743    }
2744
2745    #[test]
2746    fn reference_link_offsets() {
2747        let range =
2748            Parser::new("# H1\n[testing][Some reference]\n\n[Some reference]: https://github.com")
2749                .into_offset_iter()
2750                .filter_map(|(ev, range)| match ev {
2751                    Event::Start(
2752                        Tag::Link {
2753                            link_type: LinkType::Reference,
2754                            ..
2755                        },
2756                        ..,
2757                    ) => Some(range),
2758                    _ => None,
2759                })
2760                .next()
2761                .unwrap();
2762        assert_eq!(5..30, range);
2763    }
2764
2765    #[test]
2766    fn footnote_offsets() {
2767        let range = parser_with_extensions("Testing this[^1] out.\n\n[^1]: Footnote.")
2768            .into_offset_iter()
2769            .filter_map(|(ev, range)| match ev {
2770                Event::FootnoteReference(..) => Some(range),
2771                _ => None,
2772            })
2773            .next()
2774            .unwrap();
2775        assert_eq!(12..16, range);
2776    }
2777
2778    #[test]
2779    fn footnote_offsets_exclamation() {
2780        let mut immediately_before_footnote = None;
2781        let range = parser_with_extensions("Testing this![^1] out.\n\n[^1]: Footnote.")
2782            .into_offset_iter()
2783            .filter_map(|(ev, range)| match ev {
2784                Event::FootnoteReference(..) => Some(range),
2785                _ => {
2786                    immediately_before_footnote = Some((ev, range));
2787                    None
2788                }
2789            })
2790            .next()
2791            .unwrap();
2792        assert_eq!(13..17, range);
2793        if let (Event::Text(exclamation), range_exclamation) =
2794            immediately_before_footnote.as_ref().unwrap()
2795        {
2796            assert_eq!("!", &exclamation[..]);
2797            assert_eq!(&(12..13), range_exclamation);
2798        } else {
2799            panic!("what came first, then? {immediately_before_footnote:?}");
2800        }
2801    }
2802
2803    #[test]
2804    fn table_offset() {
2805        let markdown = "a\n\nTesting|This|Outtt\n--|:--:|--:\nSome Data|Other data|asdf";
2806        let event_offset = parser_with_extensions(markdown)
2807            .into_offset_iter()
2808            .map(|(_ev, range)| range)
2809            .nth(3)
2810            .unwrap();
2811        let expected_offset = 3..59;
2812        assert_eq!(expected_offset, event_offset);
2813    }
2814
2815    #[test]
2816    fn table_cell_span() {
2817        let markdown = "a|b|c\n--|--|--\na|  |c";
2818        let event_offset = parser_with_extensions(markdown)
2819            .into_offset_iter()
2820            .filter_map(|(ev, span)| match ev {
2821                Event::Start(Tag::TableCell) => Some(span),
2822                _ => None,
2823            })
2824            .nth(4)
2825            .unwrap();
2826        let expected_offset_start = "a|b|c\n--|--|--\na|".len();
2827        assert_eq!(
2828            expected_offset_start..(expected_offset_start + 2),
2829            event_offset
2830        );
2831    }
2832
2833    #[test]
2834    fn offset_iter_issue_378() {
2835        let event_offsets: Vec<_> = Parser::new("a [b](c) d")
2836            .into_offset_iter()
2837            .map(|(_ev, range)| range)
2838            .collect();
2839        let expected_offsets = vec![(0..10), (0..2), (2..8), (3..4), (2..8), (8..10), (0..10)];
2840        assert_eq!(expected_offsets, event_offsets);
2841    }
2842
2843    #[test]
2844    fn offset_iter_issue_404() {
2845        let event_offsets: Vec<_> = Parser::new("###\n")
2846            .into_offset_iter()
2847            .map(|(_ev, range)| range)
2848            .collect();
2849        let expected_offsets = vec![(0..4), (0..4)];
2850        assert_eq!(expected_offsets, event_offsets);
2851    }
2852
2853    // FIXME: add this one regression suite
2854    #[cfg(feature = "html")]
2855    #[test]
2856    fn link_def_at_eof() {
2857        let test_str = "[My site][world]\n\n[world]: https://vincentprouillet.com";
2858        let expected = "<p><a href=\"https://vincentprouillet.com\">My site</a></p>\n";
2859
2860        let mut buf = String::new();
2861        crate::html::push_html(&mut buf, Parser::new(test_str));
2862        assert_eq!(expected, buf);
2863    }
2864
2865    #[cfg(feature = "html")]
2866    #[test]
2867    fn no_footnote_refs_without_option() {
2868        let test_str = "a [^a]\n\n[^a]: yolo";
2869        let expected = "<p>a <a href=\"yolo\">^a</a></p>\n";
2870
2871        let mut buf = String::new();
2872        crate::html::push_html(&mut buf, Parser::new(test_str));
2873        assert_eq!(expected, buf);
2874    }
2875
2876    #[cfg(feature = "html")]
2877    #[test]
2878    fn ref_def_at_eof() {
2879        let test_str = "[test]:\\";
2880        let expected = "";
2881
2882        let mut buf = String::new();
2883        crate::html::push_html(&mut buf, Parser::new(test_str));
2884        assert_eq!(expected, buf);
2885    }
2886
2887    #[cfg(feature = "html")]
2888    #[test]
2889    fn ref_def_cr_lf() {
2890        let test_str = "[a]: /u\r\n\n[a]";
2891        let expected = "<p><a href=\"/u\">a</a></p>\n";
2892
2893        let mut buf = String::new();
2894        crate::html::push_html(&mut buf, Parser::new(test_str));
2895        assert_eq!(expected, buf);
2896    }
2897
2898    #[cfg(feature = "html")]
2899    #[test]
2900    fn no_dest_refdef() {
2901        let test_str = "[a]:";
2902        let expected = "<p>[a]:</p>\n";
2903
2904        let mut buf = String::new();
2905        crate::html::push_html(&mut buf, Parser::new(test_str));
2906        assert_eq!(expected, buf);
2907    }
2908
2909    #[test]
2910    fn broken_links_called_only_once() {
2911        for &(markdown, expected) in &[
2912            ("See also [`g()`][crate::g].", 1),
2913            ("See also [`g()`][crate::g][].", 1),
2914            ("[brokenlink1] some other node [brokenlink2]", 2),
2915        ] {
2916            let mut times_called = 0;
2917            let callback = &mut |_broken_link: BrokenLink| {
2918                times_called += 1;
2919                None
2920            };
2921            let parser =
2922                Parser::new_with_broken_link_callback(markdown, Options::empty(), Some(callback));
2923            for _ in parser {}
2924            assert_eq!(times_called, expected);
2925        }
2926    }
2927
2928    #[test]
2929    fn simple_broken_link_callback() {
2930        let test_str = "This is a link w/o def: [hello][world]";
2931        let mut callback = |broken_link: BrokenLink| {
2932            assert_eq!("world", broken_link.reference.as_ref());
2933            assert_eq!(&test_str[broken_link.span], "[hello][world]");
2934            let url = "YOLO".into();
2935            let title = "SWAG".to_owned().into();
2936            Some((url, title))
2937        };
2938        let parser =
2939            Parser::new_with_broken_link_callback(test_str, Options::empty(), Some(&mut callback));
2940        let mut link_tag_count = 0;
2941        for (typ, url, title, id) in parser.filter_map(|event| match event {
2942            Event::Start(Tag::Link {
2943                link_type,
2944                dest_url,
2945                title,
2946                id,
2947            }) => Some((link_type, dest_url, title, id)),
2948            _ => None,
2949        }) {
2950            link_tag_count += 1;
2951            assert_eq!(typ, LinkType::ReferenceUnknown);
2952            assert_eq!(url.as_ref(), "YOLO");
2953            assert_eq!(title.as_ref(), "SWAG");
2954            assert_eq!(id.as_ref(), "world");
2955        }
2956        assert!(link_tag_count > 0);
2957    }
2958
2959    #[test]
2960    fn code_block_kind_check_fenced() {
2961        let parser = Parser::new("hello\n```test\ntadam\n```");
2962        let mut found = 0;
2963        for (ev, _range) in parser.into_offset_iter() {
2964            if let Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(syntax))) = ev {
2965                assert_eq!(syntax.as_ref(), "test");
2966                found += 1;
2967            }
2968        }
2969        assert_eq!(found, 1);
2970    }
2971
2972    #[test]
2973    fn code_block_kind_check_indented() {
2974        let parser = Parser::new("hello\n\n    ```test\n    tadam\nhello");
2975        let mut found = 0;
2976        for (ev, _range) in parser.into_offset_iter() {
2977            if let Event::Start(Tag::CodeBlock(CodeBlockKind::Indented)) = ev {
2978                found += 1;
2979            }
2980        }
2981        assert_eq!(found, 1);
2982    }
2983
2984    #[test]
2985    fn ref_defs() {
2986        let input = r###"[a B c]: http://example.com
2987[another]: https://google.com
2988
2989text
2990
2991[final ONE]: http://wikipedia.org
2992"###;
2993        let mut parser = Parser::new(input);
2994
2995        assert!(parser.reference_definitions().get("a b c").is_some());
2996        assert!(parser.reference_definitions().get("nope").is_none());
2997
2998        if let Some(_event) = parser.next() {
2999            // testing keys with shorter lifetimes than parser and its input
3000            let s = "final one".to_owned();
3001            let link_def = parser.reference_definitions().get(&s).unwrap();
3002            let span = &input[link_def.span.clone()];
3003            assert_eq!(span, "[final ONE]: http://wikipedia.org");
3004        }
3005    }
3006
3007    #[test]
3008    #[allow(clippy::extra_unused_lifetimes)]
3009    fn common_lifetime_patterns_allowed<'b>() {
3010        let temporary_str = String::from("xyz");
3011
3012        // NOTE: this is a limitation of Rust, it doesn't allow putting lifetime parameters on the closure itself.
3013        // Hack it by attaching the lifetime to the test function instead.
3014        // TODO: why is the `'b` lifetime required at all? Changing it to `'_` breaks things :(
3015        let mut closure = |link: BrokenLink<'b>| Some(("#".into(), link.reference));
3016
3017        fn function(link: BrokenLink<'_>) -> Option<(CowStr<'_>, CowStr<'_>)> {
3018            Some(("#".into(), link.reference))
3019        }
3020
3021        for _ in Parser::new_with_broken_link_callback(
3022            "static lifetime",
3023            Options::empty(),
3024            Some(&mut closure),
3025        ) {}
3026        /* This fails to compile. Because the closure can't say `for <'a> fn(BrokenLink<'a>) ->
3027         * CowStr<'a>` and has to use the enclosing `'b` lifetime parameter, `temporary_str` lives
3028         * shorter than `'b`. I think this is unlikely to occur in real life, and if it does, the
3029         * fix is simple: move it out to a function that allows annotating the lifetimes.
3030         */
3031        //for _ in Parser::new_with_broken_link_callback(&temporary_str, Options::empty(), Some(&mut callback)) {
3032        //}
3033
3034        for _ in Parser::new_with_broken_link_callback(
3035            "static lifetime",
3036            Options::empty(),
3037            Some(&mut function),
3038        ) {}
3039        for _ in Parser::new_with_broken_link_callback(
3040            &temporary_str,
3041            Options::empty(),
3042            Some(&mut function),
3043        ) {}
3044    }
3045
3046    #[test]
3047    fn inline_html_inside_blockquote() {
3048        // Regression for #960
3049        let input = "> <foo\n> bar>";
3050        let events: Vec<_> = Parser::new(input).collect();
3051        let expected = [
3052            Event::Start(Tag::BlockQuote(None)),
3053            Event::Start(Tag::Paragraph),
3054            Event::InlineHtml(CowStr::Boxed("<foo\nbar>".to_string().into())),
3055            Event::End(TagEnd::Paragraph),
3056            Event::End(TagEnd::BlockQuote(None)),
3057        ];
3058        assert_eq!(&events, &expected);
3059    }
3060
3061    #[test]
3062    fn wikilink_has_pothole() {
3063        let input = "[[foo]] [[bar|baz]]";
3064        let events: Vec<_> = Parser::new_ext(input, Options::ENABLE_WIKILINKS).collect();
3065        let expected = [
3066            Event::Start(Tag::Paragraph),
3067            Event::Start(Tag::Link {
3068                link_type: LinkType::WikiLink { has_pothole: false },
3069                dest_url: CowStr::Borrowed("foo"),
3070                title: CowStr::Borrowed(""),
3071                id: CowStr::Borrowed(""),
3072            }),
3073            Event::Text(CowStr::Borrowed("foo")),
3074            Event::End(TagEnd::Link),
3075            Event::Text(CowStr::Borrowed(" ")),
3076            Event::Start(Tag::Link {
3077                link_type: LinkType::WikiLink { has_pothole: true },
3078                dest_url: CowStr::Borrowed("bar"),
3079                title: CowStr::Borrowed(""),
3080                id: CowStr::Borrowed(""),
3081            }),
3082            Event::Text(CowStr::Borrowed("baz")),
3083            Event::End(TagEnd::Link),
3084            Event::End(TagEnd::Paragraph),
3085        ];
3086        assert_eq!(&events, &expected);
3087    }
3088
3089    fn mdx_parser(text: &str) -> Parser<'_> {
3090        Parser::new_ext(text, Options::ENABLE_MDX)
3091    }
3092
3093    #[test]
3094    fn mdx_esm_import() {
3095        let events: Vec<_> = mdx_parser("import {Chart} from './chart.js'\n").collect();
3096        assert_eq!(events.len(), 1);
3097        assert!(matches!(&events[0], Event::MdxEsm(s) if s.contains("import")));
3098    }
3099
3100    #[test]
3101    fn mdx_esm_export() {
3102        let events: Vec<_> = mdx_parser("export const meta = {}\n").collect();
3103        assert_eq!(events.len(), 1);
3104        assert!(matches!(&events[0], Event::MdxEsm(s) if s.contains("export")));
3105    }
3106
3107    #[test]
3108    fn mdx_flow_expression() {
3109        let events: Vec<_> = mdx_parser("{1 + 1}\n").collect();
3110        assert_eq!(events.len(), 1);
3111        assert!(matches!(&events[0], Event::MdxFlowExpression(s) if s.as_ref() == "1 + 1"));
3112    }
3113
3114    #[test]
3115    fn mdx_jsx_flow_self_closing() {
3116        let events: Vec<_> = mdx_parser("<Chart values={[1,2,3]} />\n").collect();
3117        assert!(!events.is_empty());
3118        assert!(
3119            matches!(&events[0], Event::Start(Tag::MdxJsxFlowElement(s)) if s.contains("Chart"))
3120        );
3121    }
3122
3123    #[test]
3124    fn mdx_jsx_flow_fragment() {
3125        let events: Vec<_> = mdx_parser("<>\n").collect();
3126        assert!(!events.is_empty());
3127        assert!(matches!(
3128            &events[0],
3129            Event::Start(Tag::MdxJsxFlowElement(_))
3130        ));
3131    }
3132
3133    #[test]
3134    fn mdx_inline_expression() {
3135        let events: Vec<_> = mdx_parser("hello {name} world\n").collect();
3136        let has_expr = events
3137            .iter()
3138            .any(|e| matches!(e, Event::MdxTextExpression(s) if s.as_ref() == "name"));
3139        assert!(
3140            has_expr,
3141            "Expected inline MDX expression, got: {:?}",
3142            events
3143        );
3144    }
3145
3146    #[test]
3147    fn mdx_inline_jsx() {
3148        let events: Vec<_> = mdx_parser("hello <Badge /> world\n").collect();
3149        let has_jsx = events
3150            .iter()
3151            .any(|e| matches!(e, Event::Start(Tag::MdxJsxTextElement(s)) if s.contains("Badge")));
3152        assert!(has_jsx, "Expected inline MDX JSX, got: {:?}", events);
3153    }
3154
3155    #[test]
3156    fn mdx_all_tags_are_jsx() {
3157        // In MDX mode, all tags (including lowercase) are JSX, not HTML.
3158        let events: Vec<_> = mdx_parser("hello <em>world</em>\n").collect();
3159        let has_jsx = events
3160            .iter()
3161            .any(|e| matches!(e, Event::Start(Tag::MdxJsxTextElement(_))));
3162        assert!(has_jsx, "In MDX mode, <em> should be JSX: {:?}", events);
3163    }
3164
3165    #[test]
3166    fn mdx_does_not_interfere_without_flag() {
3167        // Without ENABLE_MDX, none of this should be parsed as MDX.
3168        let events: Vec<_> = Parser::new("import foo from 'bar'\n").collect();
3169        // Should be a regular paragraph.
3170        assert!(events
3171            .iter()
3172            .any(|e| matches!(e, Event::Start(Tag::Paragraph))));
3173    }
3174
3175    #[test]
3176    fn mdx_expression_in_heading() {
3177        let events: Vec<_> = mdx_parser("# {title}\n").collect();
3178        let has_heading = events
3179            .iter()
3180            .any(|e| matches!(e, Event::Start(Tag::Heading { .. })));
3181        assert!(has_heading, "Should have a heading");
3182        let has_expr = events
3183            .iter()
3184            .any(|e| matches!(e, Event::MdxTextExpression(s) if s.as_ref() == "title"));
3185        assert!(
3186            has_expr,
3187            "Heading should contain MdxTextExpression, got: {:?}",
3188            events
3189        );
3190    }
3191
3192    #[test]
3193    fn mdx_expression_mixed_text_in_heading() {
3194        let events: Vec<_> = mdx_parser("## Hello {name}\n").collect();
3195        let has_text = events
3196            .iter()
3197            .any(|e| matches!(e, Event::Text(s) if s.contains("Hello")));
3198        let has_expr = events
3199            .iter()
3200            .any(|e| matches!(e, Event::MdxTextExpression(s) if s.as_ref() == "name"));
3201        assert!(has_text, "Should have text, got: {:?}", events);
3202        assert!(has_expr, "Should have expression, got: {:?}", events);
3203    }
3204}