pulldown_cmark/
parse.rs

1// Copyright 2017 Google Inc. All rights reserved.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19// THE SOFTWARE.
20
21//! Tree-based two pass parser.
22
23use std::cmp::{max, min};
24use std::collections::{HashMap, VecDeque};
25use std::iter::FusedIterator;
26use std::num::NonZeroUsize;
27use std::ops::{Index, Range};
28
29use unicase::UniCase;
30
31use crate::firstpass::run_first_pass;
32use crate::linklabel::{scan_link_label_rest, FootnoteLabel, LinkLabel, ReferenceLabel};
33use crate::scanners::*;
34use crate::strings::CowStr;
35use crate::tree::{Tree, TreeIndex};
36use crate::{
37    Alignment, BlockQuoteKind, CodeBlockKind, Event, HeadingLevel, LinkType, MetadataBlockKind,
38    Options, Tag, TagEnd,
39};
40
41// Allowing arbitrary depth nested parentheses inside link destinations
42// can create denial of service vulnerabilities if we're not careful.
43// The simplest countermeasure is to limit their depth, which is
44// explicitly allowed by the spec as long as the limit is at least 3:
45// https://spec.commonmark.org/0.29/#link-destination
46pub(crate) const LINK_MAX_NESTED_PARENS: usize = 32;
47
48#[derive(Debug, Default, Clone, Copy)]
49pub(crate) struct Item {
50    pub start: usize,
51    pub end: usize,
52    pub body: ItemBody,
53}
54
55#[derive(Debug, PartialEq, Clone, Copy, Default)]
56pub(crate) enum ItemBody {
57    // These are possible inline items, need to be resolved in second pass.
58
59    // repeats, can_open, can_close
60    MaybeEmphasis(usize, bool, bool),
61    // can_open, can_close, brace context
62    MaybeMath(bool, bool, u8),
63    // quote byte, can_open, can_close
64    MaybeSmartQuote(u8, bool, bool),
65    MaybeCode(usize, bool), // number of backticks, preceded by backslash
66    MaybeHtml,
67    MaybeLinkOpen,
68    // bool indicates whether or not the preceding section could be a reference
69    MaybeLinkClose(bool),
70    MaybeImage,
71
72    // These are inline items after resolution.
73    Emphasis,
74    Strong,
75    Strikethrough,
76    Superscript,
77    Subscript,
78    Math(CowIndex, bool), // true for display math
79    Code(CowIndex),
80    Link(LinkIndex),
81    Image(LinkIndex),
82    FootnoteReference(CowIndex),
83    TaskListMarker(bool), // true for checked
84
85    // These are also inline items.
86    InlineHtml,
87    OwnedInlineHtml(CowIndex),
88    SynthesizeText(CowIndex),
89    SynthesizeChar(char),
90    Html,
91    Text {
92        backslash_escaped: bool,
93    },
94    SoftBreak,
95    // true = is backlash
96    HardBreak(bool),
97
98    // Dummy node at the top of the tree - should not be used otherwise!
99    #[default]
100    Root,
101
102    // These are block items.
103    Paragraph,
104    TightParagraph,
105    Rule,
106    Heading(HeadingLevel, Option<HeadingIndex>), // heading level
107    FencedCodeBlock(CowIndex),
108    IndentCodeBlock,
109    HtmlBlock,
110    BlockQuote(Option<BlockQuoteKind>),
111    List(bool, u8, u64), // is_tight, list character, list start index
112    ListItem(usize),     // indent level
113    FootnoteDefinition(CowIndex),
114    MetadataBlock(MetadataBlockKind),
115
116    // Definition lists
117    DefinitionList(bool), // is_tight
118    // gets turned into either a paragraph or a definition list title,
119    // depending on whether there's a definition after it
120    MaybeDefinitionListTitle,
121    DefinitionListTitle,
122    DefinitionListDefinition(usize),
123
124    // Tables
125    Table(AlignmentIndex),
126    TableHead,
127    TableRow,
128    TableCell,
129}
130
131impl ItemBody {
132    fn is_maybe_inline(&self) -> bool {
133        use ItemBody::*;
134        matches!(
135            *self,
136            MaybeEmphasis(..)
137                | MaybeMath(..)
138                | MaybeSmartQuote(..)
139                | MaybeCode(..)
140                | MaybeHtml
141                | MaybeLinkOpen
142                | MaybeLinkClose(..)
143                | MaybeImage
144        )
145    }
146    fn is_inline(&self) -> bool {
147        use ItemBody::*;
148        matches!(
149            *self,
150            MaybeEmphasis(..)
151                | MaybeMath(..)
152                | MaybeSmartQuote(..)
153                | MaybeCode(..)
154                | MaybeHtml
155                | MaybeLinkOpen
156                | MaybeLinkClose(..)
157                | MaybeImage
158                | Emphasis
159                | Strong
160                | Strikethrough
161                | Math(..)
162                | Code(..)
163                | Link(..)
164                | Image(..)
165                | FootnoteReference(..)
166                | TaskListMarker(..)
167                | InlineHtml
168                | OwnedInlineHtml(..)
169                | SynthesizeText(..)
170                | SynthesizeChar(..)
171                | Html
172                | Text { .. }
173                | SoftBreak
174                | HardBreak(..)
175        )
176    }
177}
178
179#[derive(Debug)]
180pub struct BrokenLink<'a> {
181    pub span: std::ops::Range<usize>,
182    pub link_type: LinkType,
183    pub reference: CowStr<'a>,
184}
185
186/// Markdown event iterator.
187pub struct Parser<'input, F = DefaultBrokenLinkCallback> {
188    text: &'input str,
189    options: Options,
190    tree: Tree<Item>,
191    allocs: Allocations<'input>,
192    broken_link_callback: Option<F>,
193    html_scan_guard: HtmlScanGuard,
194
195    // https://github.com/pulldown-cmark/pulldown-cmark/issues/844
196    // Consider this example:
197    //
198    //     [x]: xxx...
199    //     [x]
200    //     [x]
201    //     [x]
202    //
203    // Which expands to this HTML:
204    //
205    //     <a href="xxx...">x</a>
206    //     <a href="xxx...">x</a>
207    //     <a href="xxx...">x</a>
208    //
209    // This is quadratic growth, because it's filling in the area of a square.
210    // To prevent this, track how much it's expanded and limit it.
211    link_ref_expansion_limit: usize,
212
213    // used by inline passes. store them here for reuse
214    inline_stack: InlineStack,
215    link_stack: LinkStack,
216    wikilink_stack: LinkStack,
217    code_delims: CodeDelims,
218    math_delims: MathDelims,
219}
220
221impl<'input, F> std::fmt::Debug for Parser<'input, F> {
222    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
223        // Only print the fields that have public types.
224        f.debug_struct("Parser")
225            .field("text", &self.text)
226            .field("options", &self.options)
227            .field(
228                "broken_link_callback",
229                &self.broken_link_callback.as_ref().map(|_| ..),
230            )
231            .finish()
232    }
233}
234
235impl<'a> BrokenLink<'a> {
236    /// Moves the link into version with a static lifetime.
237    ///
238    /// The `reference` member is cloned to a Boxed or Inline version.
239    pub fn into_static(self) -> BrokenLink<'static> {
240        BrokenLink {
241            span: self.span.clone(),
242            link_type: self.link_type,
243            reference: self.reference.into_string().into(),
244        }
245    }
246}
247
248impl<'input> Parser<'input, DefaultBrokenLinkCallback> {
249    /// Creates a new event iterator for a markdown string without any options enabled.
250    pub fn new(text: &'input str) -> Self {
251        Self::new_ext(text, Options::empty())
252    }
253
254    /// Creates a new event iterator for a markdown string with given options.
255    pub fn new_ext(text: &'input str, options: Options) -> Self {
256        Self::new_with_broken_link_callback(text, options, None)
257    }
258}
259
260impl<'input, F: BrokenLinkCallback<'input>> Parser<'input, F> {
261    /// In case the parser encounters any potential links that have a broken
262    /// reference (e.g `[foo]` when there is no `[foo]: ` entry at the bottom)
263    /// the provided callback will be called with the reference name,
264    /// and the returned pair will be used as the link URL and title if it is not
265    /// `None`.
266    pub fn new_with_broken_link_callback(
267        text: &'input str,
268        options: Options,
269        broken_link_callback: Option<F>,
270    ) -> Self {
271        let (mut tree, allocs) = run_first_pass(text, options);
272        tree.reset();
273        let inline_stack = Default::default();
274        let link_stack = Default::default();
275        let wikilink_stack = Default::default();
276        let html_scan_guard = Default::default();
277        Parser {
278            text,
279            options,
280            tree,
281            allocs,
282            broken_link_callback,
283            inline_stack,
284            link_stack,
285            wikilink_stack,
286            html_scan_guard,
287            // always allow 100KiB
288            link_ref_expansion_limit: text.len().max(100_000),
289            code_delims: CodeDelims::new(),
290            math_delims: MathDelims::new(),
291        }
292    }
293
294    /// Returns a reference to the internal `RefDefs` object, which provides access
295    /// to the internal map of reference definitions.
296    pub fn reference_definitions(&self) -> &RefDefs<'_> {
297        &self.allocs.refdefs
298    }
299
300    /// Use a link label to fetch a type, url, and title.
301    ///
302    /// This function enforces the [`link_ref_expansion_limit`].
303    /// If it returns Some, it also consumes some of the fuel.
304    /// If we're out of fuel, it immediately returns None.
305    ///
306    /// The URL and title are found in the [`RefDefs`] map.
307    /// If they're not there, and a callback was provided by the user,
308    /// the [`broken_link_callback`] will be invoked and given the opportunity
309    /// to provide a fallback.
310    ///
311    /// The link type (that's "link" or "image") depends on the usage site, and
312    /// is provided by the caller of this function.
313    /// This function returns a new one because, if it has to invoke a callback
314    /// to find the information, the link type is [mapped to an unknown type].
315    ///
316    /// [mapped to an unknown type]: crate::LinkType::to_unknown
317    /// [`link_ref_expansion_limit`]: Self::link_ref_expansion_limit
318    /// [`broken_link_callback`]: Self::broken_link_callback
319    fn fetch_link_type_url_title(
320        &mut self,
321        link_label: CowStr<'input>,
322        span: Range<usize>,
323        link_type: LinkType,
324    ) -> Option<(LinkType, CowStr<'input>, CowStr<'input>)> {
325        if self.link_ref_expansion_limit == 0 {
326            return None;
327        }
328
329        let (link_type, url, title) = self
330            .allocs
331            .refdefs
332            .get(link_label.as_ref())
333            .map(|matching_def| {
334                // found a matching definition!
335                let title = matching_def
336                    .title
337                    .as_ref()
338                    .cloned()
339                    .unwrap_or_else(|| "".into());
340                let url = matching_def.dest.clone();
341                (link_type, url, title)
342            })
343            .or_else(|| {
344                match self.broken_link_callback.as_mut() {
345                    Some(callback) => {
346                        // Construct a BrokenLink struct, which will be passed to the callback
347                        let broken_link = BrokenLink {
348                            span,
349                            link_type,
350                            reference: link_label,
351                        };
352
353                        callback
354                            .handle_broken_link(broken_link)
355                            .map(|(url, title)| (link_type.to_unknown(), url, title))
356                    }
357                    None => None,
358                }
359            })?;
360
361        // Limit expansion from link references.
362        // This isn't a problem for footnotes, because multiple references to the same one
363        // reuse the same node, but links/images get their HREF/SRC copied.
364        self.link_ref_expansion_limit = self
365            .link_ref_expansion_limit
366            .saturating_sub(url.len() + title.len());
367
368        Some((link_type, url, title))
369    }
370
371    /// Handle inline markup.
372    ///
373    /// When the parser encounters any item indicating potential inline markup, all
374    /// inline markup passes are run on the remainder of the chain.
375    ///
376    /// Note: there's some potential for optimization here, but that's future work.
377    fn handle_inline(&mut self) {
378        self.handle_inline_pass1();
379        self.handle_emphasis_and_hard_break();
380    }
381
382    /// Handle inline HTML, code spans, and links.
383    ///
384    /// This function handles both inline HTML and code spans, because they have
385    /// the same precedence. It also handles links, even though they have lower
386    /// precedence, because the URL of links must not be processed.
387    fn handle_inline_pass1(&mut self) {
388        let mut cur = self.tree.cur();
389        let mut prev = None;
390
391        let block_end = self.tree[self.tree.peek_up().unwrap()].item.end;
392        let block_text = &self.text[..block_end];
393
394        while let Some(mut cur_ix) = cur {
395            match self.tree[cur_ix].item.body {
396                ItemBody::MaybeHtml => {
397                    let next = self.tree[cur_ix].next;
398                    let autolink = if let Some(next_ix) = next {
399                        scan_autolink(block_text, self.tree[next_ix].item.start)
400                    } else {
401                        None
402                    };
403
404                    if let Some((ix, uri, link_type)) = autolink {
405                        let node = scan_nodes_to_ix(&self.tree, next, ix);
406                        let text_node = self.tree.create_node(Item {
407                            start: self.tree[cur_ix].item.start + 1,
408                            end: ix - 1,
409                            body: ItemBody::Text {
410                                backslash_escaped: false,
411                            },
412                        });
413                        let link_ix =
414                            self.allocs
415                                .allocate_link(link_type, uri, "".into(), "".into());
416                        self.tree[cur_ix].item.body = ItemBody::Link(link_ix);
417                        self.tree[cur_ix].item.end = ix;
418                        self.tree[cur_ix].next = node;
419                        self.tree[cur_ix].child = Some(text_node);
420                        prev = cur;
421                        cur = node;
422                        if let Some(node_ix) = cur {
423                            self.tree[node_ix].item.start = max(self.tree[node_ix].item.start, ix);
424                        }
425                        continue;
426                    } else {
427                        let inline_html = next.and_then(|next_ix| {
428                            self.scan_inline_html(
429                                block_text.as_bytes(),
430                                self.tree[next_ix].item.start,
431                            )
432                        });
433                        if let Some((span, ix)) = inline_html {
434                            let node = scan_nodes_to_ix(&self.tree, next, ix);
435                            self.tree[cur_ix].item.body = if !span.is_empty() {
436                                let converted_string =
437                                    String::from_utf8(span).expect("invalid utf8");
438                                ItemBody::OwnedInlineHtml(
439                                    self.allocs.allocate_cow(converted_string.into()),
440                                )
441                            } else {
442                                ItemBody::InlineHtml
443                            };
444                            self.tree[cur_ix].item.end = ix;
445                            self.tree[cur_ix].next = node;
446                            prev = cur;
447                            cur = node;
448                            if let Some(node_ix) = cur {
449                                self.tree[node_ix].item.start =
450                                    max(self.tree[node_ix].item.start, ix);
451                            }
452                            continue;
453                        }
454                    }
455                    self.tree[cur_ix].item.body = ItemBody::Text {
456                        backslash_escaped: false,
457                    };
458                }
459                ItemBody::MaybeMath(can_open, _can_close, brace_context) => {
460                    if !can_open {
461                        self.tree[cur_ix].item.body = ItemBody::Text {
462                            backslash_escaped: false,
463                        };
464                        prev = cur;
465                        cur = self.tree[cur_ix].next;
466                        continue;
467                    }
468                    let is_display = self.tree[cur_ix].next.map_or(false, |next_ix| {
469                        matches!(
470                            self.tree[next_ix].item.body,
471                            ItemBody::MaybeMath(_can_open, _can_close, _brace_context)
472                        )
473                    });
474                    let result = if self.math_delims.is_populated() {
475                        // we have previously scanned all math environment delimiters,
476                        // so we can reuse that work
477                        self.math_delims
478                            .find(&self.tree, cur_ix, is_display, brace_context)
479                    } else {
480                        // we haven't previously scanned all math delimiters,
481                        // so walk the AST
482                        let mut scan = self.tree[cur_ix].next;
483                        if is_display {
484                            // a display delimiter, `$$`, is actually two delimiters
485                            // skip the second one
486                            scan = self.tree[scan.unwrap()].next;
487                        }
488                        let mut invalid = false;
489                        while let Some(scan_ix) = scan {
490                            if let ItemBody::MaybeMath(_can_open, can_close, delim_brace_context) =
491                                self.tree[scan_ix].item.body
492                            {
493                                let delim_is_display =
494                                    self.tree[scan_ix].next.map_or(false, |next_ix| {
495                                        matches!(
496                                            self.tree[next_ix].item.body,
497                                            ItemBody::MaybeMath(
498                                                _can_open,
499                                                _can_close,
500                                                _brace_context
501                                            )
502                                        )
503                                    });
504                                if !invalid && delim_brace_context == brace_context {
505                                    if (!is_display && can_close)
506                                        || (is_display && delim_is_display)
507                                    {
508                                        // This will skip ahead past everything we
509                                        // just inserted. Needed for correctness to
510                                        // ensure that a new scan is done after this item.
511                                        self.math_delims.clear();
512                                        break;
513                                    } else {
514                                        // Math cannot contain $, so the current item
515                                        // is invalid. Keep scanning to fill math_delims.
516                                        invalid = true;
517                                    }
518                                }
519                                self.math_delims.insert(
520                                    delim_is_display,
521                                    delim_brace_context,
522                                    scan_ix,
523                                    can_close,
524                                );
525                            }
526                            scan = self.tree[scan_ix].next;
527                        }
528                        scan
529                    };
530
531                    if let Some(scan_ix) = result {
532                        self.make_math_span(cur_ix, scan_ix);
533                    } else {
534                        self.tree[cur_ix].item.body = ItemBody::Text {
535                            backslash_escaped: false,
536                        };
537                    }
538                }
539                ItemBody::MaybeCode(mut search_count, preceded_by_backslash) => {
540                    if preceded_by_backslash {
541                        search_count -= 1;
542                        if search_count == 0 {
543                            self.tree[cur_ix].item.body = ItemBody::Text {
544                                backslash_escaped: false,
545                            };
546                            prev = cur;
547                            cur = self.tree[cur_ix].next;
548                            continue;
549                        }
550                    }
551
552                    if self.code_delims.is_populated() {
553                        // we have previously scanned all codeblock delimiters,
554                        // so we can reuse that work
555                        if let Some(scan_ix) = self.code_delims.find(cur_ix, search_count) {
556                            self.make_code_span(cur_ix, scan_ix, preceded_by_backslash);
557                        } else {
558                            self.tree[cur_ix].item.body = ItemBody::Text {
559                                backslash_escaped: false,
560                            };
561                        }
562                    } else {
563                        // we haven't previously scanned all codeblock delimiters,
564                        // so walk the AST
565                        let mut scan = if search_count > 0 {
566                            self.tree[cur_ix].next
567                        } else {
568                            None
569                        };
570                        while let Some(scan_ix) = scan {
571                            if let ItemBody::MaybeCode(delim_count, _) =
572                                self.tree[scan_ix].item.body
573                            {
574                                if search_count == delim_count {
575                                    self.make_code_span(cur_ix, scan_ix, preceded_by_backslash);
576                                    self.code_delims.clear();
577                                    break;
578                                } else {
579                                    self.code_delims.insert(delim_count, scan_ix);
580                                }
581                            }
582                            scan = self.tree[scan_ix].next;
583                        }
584                        if scan.is_none() {
585                            self.tree[cur_ix].item.body = ItemBody::Text {
586                                backslash_escaped: false,
587                            };
588                        }
589                    }
590                }
591                ItemBody::MaybeLinkOpen => {
592                    self.tree[cur_ix].item.body = ItemBody::Text {
593                        backslash_escaped: false,
594                    };
595                    let link_open_doubled = self.tree[cur_ix]
596                        .next
597                        .map(|ix| self.tree[ix].item.body == ItemBody::MaybeLinkOpen)
598                        .unwrap_or(false);
599                    if self.options.contains(Options::ENABLE_WIKILINKS) && link_open_doubled {
600                        self.wikilink_stack.push(LinkStackEl {
601                            node: cur_ix,
602                            ty: LinkStackTy::Link,
603                        });
604                    }
605                    self.link_stack.push(LinkStackEl {
606                        node: cur_ix,
607                        ty: LinkStackTy::Link,
608                    });
609                }
610                ItemBody::MaybeImage => {
611                    self.tree[cur_ix].item.body = ItemBody::Text {
612                        backslash_escaped: false,
613                    };
614                    let link_open_doubled = self.tree[cur_ix]
615                        .next
616                        .map(|ix| self.tree[ix].item.body == ItemBody::MaybeLinkOpen)
617                        .unwrap_or(false);
618                    if self.options.contains(Options::ENABLE_WIKILINKS) && link_open_doubled {
619                        self.wikilink_stack.push(LinkStackEl {
620                            node: cur_ix,
621                            ty: LinkStackTy::Image,
622                        });
623                    }
624                    self.link_stack.push(LinkStackEl {
625                        node: cur_ix,
626                        ty: LinkStackTy::Image,
627                    });
628                }
629                ItemBody::MaybeLinkClose(could_be_ref) => {
630                    self.tree[cur_ix].item.body = ItemBody::Text {
631                        backslash_escaped: false,
632                    };
633                    let tos_link = self.link_stack.pop();
634                    if self.options.contains(Options::ENABLE_WIKILINKS)
635                        && self.tree[cur_ix]
636                            .next
637                            .map(|ix| {
638                                matches!(self.tree[ix].item.body, ItemBody::MaybeLinkClose(..))
639                            })
640                            .unwrap_or(false)
641                    {
642                        if let Some(node) = self.handle_wikilink(block_text, cur_ix, prev) {
643                            cur = self.tree[node].next;
644                            continue;
645                        }
646                    }
647                    if let Some(tos) = tos_link {
648                        // skip rendering if already in a link, unless its an
649                        // image
650                        if tos.ty != LinkStackTy::Image
651                            && matches!(
652                                self.tree[self.tree.peek_up().unwrap()].item.body,
653                                ItemBody::Link(..)
654                            )
655                        {
656                            continue;
657                        }
658                        if tos.ty == LinkStackTy::Disabled {
659                            continue;
660                        }
661                        let next = self.tree[cur_ix].next;
662                        if let Some((next_ix, url, title)) =
663                            self.scan_inline_link(block_text, self.tree[cur_ix].item.end, next)
664                        {
665                            let next_node = scan_nodes_to_ix(&self.tree, next, next_ix);
666                            if let Some(prev_ix) = prev {
667                                self.tree[prev_ix].next = None;
668                            }
669                            cur = Some(tos.node);
670                            cur_ix = tos.node;
671                            let link_ix =
672                                self.allocs
673                                    .allocate_link(LinkType::Inline, url, title, "".into());
674                            self.tree[cur_ix].item.body = if tos.ty == LinkStackTy::Image {
675                                ItemBody::Image(link_ix)
676                            } else {
677                                ItemBody::Link(link_ix)
678                            };
679                            self.tree[cur_ix].child = self.tree[cur_ix].next;
680                            self.tree[cur_ix].next = next_node;
681                            self.tree[cur_ix].item.end = next_ix;
682                            if let Some(next_node_ix) = next_node {
683                                self.tree[next_node_ix].item.start =
684                                    max(self.tree[next_node_ix].item.start, next_ix);
685                            }
686
687                            if tos.ty == LinkStackTy::Link {
688                                self.disable_all_links();
689                            }
690                        } else {
691                            // ok, so its not an inline link. maybe it is a reference
692                            // to a defined link?
693                            let scan_result =
694                                scan_reference(&self.tree, block_text, next, self.options);
695                            let (node_after_link, link_type) = match scan_result {
696                                // [label][reference]
697                                RefScan::LinkLabel(_, end_ix) => {
698                                    // Toggle reference viability of the last closing bracket,
699                                    // so that we can skip it on future iterations in case
700                                    // it fails in this one. In particular, we won't call
701                                    // the broken link callback twice on one reference.
702                                    let reference_close_node = if let Some(node) =
703                                        scan_nodes_to_ix(&self.tree, next, end_ix - 1)
704                                    {
705                                        node
706                                    } else {
707                                        continue;
708                                    };
709                                    self.tree[reference_close_node].item.body =
710                                        ItemBody::MaybeLinkClose(false);
711                                    let next_node = self.tree[reference_close_node].next;
712
713                                    (next_node, LinkType::Reference)
714                                }
715                                // [reference][]
716                                RefScan::Collapsed(next_node) => {
717                                    // This reference has already been tried, and it's not
718                                    // valid. Skip it.
719                                    if !could_be_ref {
720                                        continue;
721                                    }
722                                    (next_node, LinkType::Collapsed)
723                                }
724                                // [shortcut]
725                                //
726                                // [shortcut]: /blah
727                                RefScan::Failed | RefScan::UnexpectedFootnote => {
728                                    if !could_be_ref {
729                                        continue;
730                                    }
731                                    (next, LinkType::Shortcut)
732                                }
733                            };
734
735                            // FIXME: references and labels are mixed in the naming of variables
736                            // below. Disambiguate!
737
738                            // (label, source_ix end)
739                            let label: Option<(ReferenceLabel<'input>, usize)> = match scan_result {
740                                RefScan::LinkLabel(l, end_ix) => {
741                                    Some((ReferenceLabel::Link(l), end_ix))
742                                }
743                                RefScan::Collapsed(..)
744                                | RefScan::Failed
745                                | RefScan::UnexpectedFootnote => {
746                                    // No label? maybe it is a shortcut reference
747                                    let label_start = self.tree[tos.node].item.end - 1;
748                                    let label_end = self.tree[cur_ix].item.end;
749                                    scan_link_label(
750                                        &self.tree,
751                                        &self.text[label_start..label_end],
752                                        self.options,
753                                    )
754                                    .map(|(ix, label)| (label, label_start + ix))
755                                    .filter(|(_, end)| *end == label_end)
756                                }
757                            };
758
759                            let id = match &label {
760                                Some(
761                                    (ReferenceLabel::Link(l), _) | (ReferenceLabel::Footnote(l), _),
762                                ) => l.clone(),
763                                None => "".into(),
764                            };
765
766                            // see if it's a footnote reference
767                            if let Some((ReferenceLabel::Footnote(l), end)) = label {
768                                let footref = self.allocs.allocate_cow(l);
769                                if let Some(def) = self
770                                    .allocs
771                                    .footdefs
772                                    .get_mut(self.allocs.cows[footref.0].to_owned())
773                                {
774                                    def.use_count += 1;
775                                }
776                                if !self.options.has_gfm_footnotes()
777                                    || self.allocs.footdefs.contains(&self.allocs.cows[footref.0])
778                                {
779                                    // If this came from a MaybeImage, then the `!` prefix
780                                    // isn't part of the footnote reference.
781                                    let footnote_ix = if tos.ty == LinkStackTy::Image {
782                                        self.tree[tos.node].next = Some(cur_ix);
783                                        self.tree[tos.node].child = None;
784                                        self.tree[tos.node].item.body =
785                                            ItemBody::SynthesizeChar('!');
786                                        self.tree[cur_ix].item.start =
787                                            self.tree[tos.node].item.start + 1;
788                                        self.tree[tos.node].item.end =
789                                            self.tree[tos.node].item.start + 1;
790                                        cur_ix
791                                    } else {
792                                        tos.node
793                                    };
794                                    // use `next` instead of `node_after_link` because
795                                    // node_after_link is calculated for a [collapsed][] link,
796                                    // which footnotes don't support.
797                                    self.tree[footnote_ix].next = next;
798                                    self.tree[footnote_ix].child = None;
799                                    self.tree[footnote_ix].item.body =
800                                        ItemBody::FootnoteReference(footref);
801                                    self.tree[footnote_ix].item.end = end;
802                                    prev = Some(footnote_ix);
803                                    cur = next;
804                                    self.link_stack.clear();
805                                    continue;
806                                }
807                            } else if let Some((ReferenceLabel::Link(link_label), end)) = label {
808                                if let Some((def_link_type, url, title)) = self
809                                    .fetch_link_type_url_title(
810                                        link_label,
811                                        (self.tree[tos.node].item.start)..end,
812                                        link_type,
813                                    )
814                                {
815                                    let link_ix =
816                                        self.allocs.allocate_link(def_link_type, url, title, id);
817                                    self.tree[tos.node].item.body = if tos.ty == LinkStackTy::Image
818                                    {
819                                        ItemBody::Image(link_ix)
820                                    } else {
821                                        ItemBody::Link(link_ix)
822                                    };
823                                    let label_node = self.tree[tos.node].next;
824
825                                    // lets do some tree surgery to add the link to the tree
826                                    // 1st: skip the label node and close node
827                                    self.tree[tos.node].next = node_after_link;
828
829                                    // then, if it exists, add the label node as a child to the link node
830                                    if label_node != cur {
831                                        self.tree[tos.node].child = label_node;
832
833                                        // finally: disconnect list of children
834                                        if let Some(prev_ix) = prev {
835                                            self.tree[prev_ix].next = None;
836                                        }
837                                    }
838
839                                    self.tree[tos.node].item.end = end;
840
841                                    // set up cur so next node will be node_after_link
842                                    cur = Some(tos.node);
843                                    cur_ix = tos.node;
844
845                                    if tos.ty == LinkStackTy::Link {
846                                        self.disable_all_links();
847                                    }
848                                }
849                            }
850                        }
851                    }
852                }
853                _ => {}
854            }
855            prev = cur;
856            cur = self.tree[cur_ix].next;
857        }
858        self.link_stack.clear();
859        self.wikilink_stack.clear();
860        self.code_delims.clear();
861        self.math_delims.clear();
862    }
863
864    /// Handles a wikilink.
865    ///
866    /// This function may bail early in case the link is malformed, so this
867    /// acts as a control flow guard. Returns the link node if a wikilink was
868    /// found and created.
869    fn handle_wikilink(
870        &mut self,
871        block_text: &'input str,
872        cur_ix: TreeIndex,
873        prev: Option<TreeIndex>,
874    ) -> Option<TreeIndex> {
875        let next_ix = self.tree[cur_ix].next.unwrap();
876        // this is a wikilink closing delim, try popping from
877        // the wikilink stack
878        if let Some(tos) = self.wikilink_stack.pop() {
879            if tos.ty == LinkStackTy::Disabled {
880                return None;
881            }
882            // fetches the beginning of the wikilink body
883            let Some(body_node) = self.tree[tos.node].next.and_then(|ix| self.tree[ix].next) else {
884                // skip if no next node exists, like at end of input
885                return None;
886            };
887            let start_ix = self.tree[body_node].item.start;
888            let end_ix = self.tree[cur_ix].item.start;
889            let wikilink = match scan_wikilink_pipe(
890                block_text,
891                start_ix, // bounded by closing tag
892                end_ix - start_ix,
893            ) {
894                Some((rest, wikitext)) => {
895                    // bail early if the wikiname would be empty
896                    if wikitext.is_empty() {
897                        return None;
898                    }
899                    // [[WikiName|rest]]
900                    let body_node = scan_nodes_to_ix(&self.tree, Some(body_node), rest);
901                    if let Some(body_node) = body_node {
902                        // break node so passes can actually format
903                        // the display text
904                        self.tree[body_node].item.start = rest;
905                        Some((true, body_node, wikitext))
906                    } else {
907                        None
908                    }
909                }
910                None => {
911                    let wikitext = &block_text[start_ix..end_ix];
912                    // bail early if the wikiname would be empty
913                    if wikitext.is_empty() {
914                        return None;
915                    }
916                    let body_node = self.tree.create_node(Item {
917                        start: start_ix,
918                        end: end_ix,
919                        body: ItemBody::Text {
920                            backslash_escaped: false,
921                        },
922                    });
923                    Some((false, body_node, wikitext))
924                }
925            };
926
927            if let Some((has_pothole, body_node, wikiname)) = wikilink {
928                let link_ix = self.allocs.allocate_link(
929                    LinkType::WikiLink { has_pothole },
930                    wikiname.into(),
931                    "".into(),
932                    "".into(),
933                );
934                if let Some(prev_ix) = prev {
935                    self.tree[prev_ix].next = None;
936                }
937                if tos.ty == LinkStackTy::Image {
938                    self.tree[tos.node].item.body = ItemBody::Image(link_ix);
939                } else {
940                    self.tree[tos.node].item.body = ItemBody::Link(link_ix);
941                }
942                self.tree[tos.node].child = Some(body_node);
943                self.tree[tos.node].next = self.tree[next_ix].next;
944                self.tree[tos.node].item.end = end_ix + 1;
945                self.disable_all_links();
946                return Some(tos.node);
947            }
948        }
949
950        None
951    }
952
953    fn handle_emphasis_and_hard_break(&mut self) {
954        let mut prev = None;
955        let mut prev_ix: TreeIndex;
956        let mut cur = self.tree.cur();
957
958        let mut single_quote_open: Option<TreeIndex> = None;
959        let mut double_quote_open: bool = false;
960
961        while let Some(mut cur_ix) = cur {
962            match self.tree[cur_ix].item.body {
963                ItemBody::MaybeEmphasis(mut count, can_open, can_close) => {
964                    let run_length = count;
965                    let c = self.text.as_bytes()[self.tree[cur_ix].item.start];
966                    let both = can_open && can_close;
967                    if can_close {
968                        while let Some(el) =
969                            self.inline_stack
970                                .find_match(&mut self.tree, c, run_length, both)
971                        {
972                            // have a match!
973                            if let Some(prev_ix) = prev {
974                                self.tree[prev_ix].next = None;
975                            }
976                            let match_count = min(count, el.count);
977                            // start, end are tree node indices
978                            let mut end = cur_ix - 1;
979                            let mut start = el.start + el.count;
980
981                            // work from the inside out
982                            while start > el.start + el.count - match_count {
983                                let inc = if start > el.start + el.count - match_count + 1 {
984                                    2
985                                } else {
986                                    1
987                                };
988                                let ty = if c == b'~' {
989                                    if inc == 2 {
990                                        if self.options.contains(Options::ENABLE_STRIKETHROUGH) {
991                                            ItemBody::Strikethrough
992                                        } else {
993                                            ItemBody::Text {
994                                                backslash_escaped: false,
995                                            }
996                                        }
997                                    } else {
998                                        if self.options.contains(Options::ENABLE_SUBSCRIPT) {
999                                            ItemBody::Subscript
1000                                        } else if self
1001                                            .options
1002                                            .contains(Options::ENABLE_STRIKETHROUGH)
1003                                        {
1004                                            ItemBody::Strikethrough
1005                                        } else {
1006                                            ItemBody::Text {
1007                                                backslash_escaped: false,
1008                                            }
1009                                        }
1010                                    }
1011                                } else if c == b'^' {
1012                                    if self.options.contains(Options::ENABLE_SUPERSCRIPT) {
1013                                        ItemBody::Superscript
1014                                    } else {
1015                                        ItemBody::Text {
1016                                            backslash_escaped: false,
1017                                        }
1018                                    }
1019                                } else if inc == 2 {
1020                                    ItemBody::Strong
1021                                } else {
1022                                    ItemBody::Emphasis
1023                                };
1024
1025                                let root = start - inc;
1026                                end = end + inc;
1027                                self.tree[root].item.body = ty;
1028                                self.tree[root].item.end = self.tree[end].item.end;
1029                                self.tree[root].child = Some(start);
1030                                self.tree[root].next = None;
1031                                start = root;
1032                            }
1033
1034                            // set next for top most emph level
1035                            prev_ix = el.start + el.count - match_count;
1036                            prev = Some(prev_ix);
1037                            cur = self.tree[cur_ix + match_count - 1].next;
1038                            self.tree[prev_ix].next = cur;
1039
1040                            if el.count > match_count {
1041                                self.inline_stack.push(InlineEl {
1042                                    start: el.start,
1043                                    count: el.count - match_count,
1044                                    run_length: el.run_length,
1045                                    c: el.c,
1046                                    both: el.both,
1047                                })
1048                            }
1049                            count -= match_count;
1050                            if count > 0 {
1051                                cur_ix = cur.unwrap();
1052                            } else {
1053                                break;
1054                            }
1055                        }
1056                    }
1057                    if count > 0 {
1058                        if can_open {
1059                            self.inline_stack.push(InlineEl {
1060                                start: cur_ix,
1061                                run_length,
1062                                count,
1063                                c,
1064                                both,
1065                            });
1066                        } else {
1067                            for i in 0..count {
1068                                self.tree[cur_ix + i].item.body = ItemBody::Text {
1069                                    backslash_escaped: false,
1070                                };
1071                            }
1072                        }
1073                        prev_ix = cur_ix + count - 1;
1074                        prev = Some(prev_ix);
1075                        cur = self.tree[prev_ix].next;
1076                    }
1077                }
1078                ItemBody::MaybeSmartQuote(c, can_open, can_close) => {
1079                    self.tree[cur_ix].item.body = match c {
1080                        b'\'' => {
1081                            if let (Some(open_ix), true) = (single_quote_open, can_close) {
1082                                self.tree[open_ix].item.body = ItemBody::SynthesizeChar('‘');
1083                                single_quote_open = None;
1084                            } else if can_open {
1085                                single_quote_open = Some(cur_ix);
1086                            }
1087                            ItemBody::SynthesizeChar('’')
1088                        }
1089                        _ /* double quote */ => {
1090                            if can_close && double_quote_open {
1091                                double_quote_open = false;
1092                                ItemBody::SynthesizeChar('”')
1093                            } else {
1094                                if can_open && !double_quote_open {
1095                                    double_quote_open = true;
1096                                }
1097                                ItemBody::SynthesizeChar('“')
1098                            }
1099                        }
1100                    };
1101                    prev = cur;
1102                    cur = self.tree[cur_ix].next;
1103                }
1104                ItemBody::HardBreak(true) => {
1105                    if self.tree[cur_ix].next.is_none() {
1106                        self.tree[cur_ix].item.body = ItemBody::SynthesizeChar('\\');
1107                    }
1108                    prev = cur;
1109                    cur = self.tree[cur_ix].next;
1110                }
1111                _ => {
1112                    prev = cur;
1113                    cur = self.tree[cur_ix].next;
1114                }
1115            }
1116        }
1117        self.inline_stack.pop_all(&mut self.tree);
1118    }
1119
1120    fn disable_all_links(&mut self) {
1121        self.link_stack.disable_all_links();
1122        self.wikilink_stack.disable_all_links();
1123    }
1124
1125    /// Returns next byte index, url and title.
1126    fn scan_inline_link(
1127        &self,
1128        underlying: &'input str,
1129        mut ix: usize,
1130        node: Option<TreeIndex>,
1131    ) -> Option<(usize, CowStr<'input>, CowStr<'input>)> {
1132        if underlying.as_bytes().get(ix) != Some(&b'(') {
1133            return None;
1134        }
1135        ix += 1;
1136
1137        let scan_separator = |ix: &mut usize| {
1138            *ix += scan_while(&underlying.as_bytes()[*ix..], is_ascii_whitespace_no_nl);
1139            if let Some(bl) = scan_eol(&underlying.as_bytes()[*ix..]) {
1140                *ix += bl;
1141                *ix += skip_container_prefixes(
1142                    &self.tree,
1143                    &underlying.as_bytes()[*ix..],
1144                    self.options,
1145                );
1146            }
1147            *ix += scan_while(&underlying.as_bytes()[*ix..], is_ascii_whitespace_no_nl);
1148        };
1149
1150        scan_separator(&mut ix);
1151
1152        let (dest_length, dest) = scan_link_dest(underlying, ix, LINK_MAX_NESTED_PARENS)?;
1153        let dest = unescape(dest, self.tree.is_in_table());
1154        ix += dest_length;
1155
1156        scan_separator(&mut ix);
1157
1158        let title = if let Some((bytes_scanned, t)) = self.scan_link_title(underlying, ix, node) {
1159            ix += bytes_scanned;
1160            scan_separator(&mut ix);
1161            t
1162        } else {
1163            "".into()
1164        };
1165        if underlying.as_bytes().get(ix) != Some(&b')') {
1166            return None;
1167        }
1168        ix += 1;
1169
1170        Some((ix, dest, title))
1171    }
1172
1173    // returns (bytes scanned, title cow)
1174    fn scan_link_title(
1175        &self,
1176        text: &'input str,
1177        start_ix: usize,
1178        node: Option<TreeIndex>,
1179    ) -> Option<(usize, CowStr<'input>)> {
1180        let bytes = text.as_bytes();
1181        let open = match bytes.get(start_ix) {
1182            Some(b @ b'\'') | Some(b @ b'\"') | Some(b @ b'(') => *b,
1183            _ => return None,
1184        };
1185        let close = if open == b'(' { b')' } else { open };
1186
1187        let mut title = String::new();
1188        let mut mark = start_ix + 1;
1189        let mut i = start_ix + 1;
1190
1191        while i < bytes.len() {
1192            let c = bytes[i];
1193
1194            if c == close {
1195                let cow = if mark == 1 {
1196                    (i - start_ix + 1, text[mark..i].into())
1197                } else {
1198                    title.push_str(&text[mark..i]);
1199                    (i - start_ix + 1, title.into())
1200                };
1201
1202                return Some(cow);
1203            }
1204            if c == open {
1205                return None;
1206            }
1207
1208            if c == b'\n' || c == b'\r' {
1209                if let Some(node_ix) = scan_nodes_to_ix(&self.tree, node, i + 1) {
1210                    if self.tree[node_ix].item.start > i {
1211                        title.push_str(&text[mark..i]);
1212                        title.push('\n');
1213                        i = self.tree[node_ix].item.start;
1214                        mark = i;
1215                        continue;
1216                    }
1217                }
1218            }
1219            if c == b'&' {
1220                if let (n, Some(value)) = scan_entity(&bytes[i..]) {
1221                    title.push_str(&text[mark..i]);
1222                    title.push_str(&value);
1223                    i += n;
1224                    mark = i;
1225                    continue;
1226                }
1227            }
1228            if self.tree.is_in_table()
1229                && c == b'\\'
1230                && i + 2 < bytes.len()
1231                && bytes[i + 1] == b'\\'
1232                && bytes[i + 2] == b'|'
1233            {
1234                // this runs if there are an even number of pipes in a table
1235                // if it's odd, then it gets parsed as normal
1236                title.push_str(&text[mark..i]);
1237                i += 2;
1238                mark = i;
1239            }
1240            if c == b'\\' && i + 1 < bytes.len() && is_ascii_punctuation(bytes[i + 1]) {
1241                title.push_str(&text[mark..i]);
1242                i += 1;
1243                mark = i;
1244            }
1245
1246            i += 1;
1247        }
1248
1249        None
1250    }
1251
1252    fn make_math_span(&mut self, open: TreeIndex, mut close: TreeIndex) {
1253        let start_is_display = self.tree[open].next.filter(|&next_ix| {
1254            next_ix != close
1255                && matches!(
1256                    self.tree[next_ix].item.body,
1257                    ItemBody::MaybeMath(_can_open, _can_close, _brace_context)
1258                )
1259        });
1260        let end_is_display = self.tree[close].next.filter(|&next_ix| {
1261            matches!(
1262                self.tree[next_ix].item.body,
1263                ItemBody::MaybeMath(_can_open, _can_close, _brace_context)
1264            )
1265        });
1266        let is_display = start_is_display.is_some() && end_is_display.is_some();
1267        if is_display {
1268            // This unwrap() can't panic, because if the next variable were None, end_is_display would be None
1269            close = self.tree[close].next.unwrap();
1270            self.tree[open].next = Some(close);
1271            self.tree[open].item.end += 1;
1272            self.tree[close].item.start -= 1;
1273        } else {
1274            if self.tree[open].item.end == self.tree[close].item.start {
1275                // inline math spans cannot be empty
1276                self.tree[open].item.body = ItemBody::Text {
1277                    backslash_escaped: false,
1278                };
1279                return;
1280            }
1281            self.tree[open].next = Some(close);
1282        }
1283        let span_start = self.tree[open].item.end;
1284        let span_end = self.tree[close].item.start;
1285
1286        let spanned_text = &self.text[span_start..span_end];
1287        let spanned_bytes = spanned_text.as_bytes();
1288        let mut buf: Option<String> = None;
1289
1290        let mut start_ix = 0;
1291        let mut ix = 0;
1292        while ix < spanned_bytes.len() {
1293            let c = spanned_bytes[ix];
1294            if c == b'\r' || c == b'\n' {
1295                ix += 1;
1296                let buf = buf.get_or_insert_with(|| String::with_capacity(spanned_bytes.len()));
1297                buf.push_str(&spanned_text[start_ix..ix]);
1298                ix += skip_container_prefixes(&self.tree, &spanned_bytes[ix..], self.options);
1299                start_ix = ix;
1300            } else if c == b'\\'
1301                && spanned_bytes.get(ix + 1) == Some(&b'|')
1302                && self.tree.is_in_table()
1303            {
1304                let buf = buf.get_or_insert_with(|| String::with_capacity(spanned_bytes.len()));
1305                buf.push_str(&spanned_text[start_ix..ix]);
1306                buf.push('|');
1307                ix += 2;
1308                start_ix = ix;
1309            } else {
1310                ix += 1;
1311            }
1312        }
1313
1314        let cow = if let Some(mut buf) = buf {
1315            buf.push_str(&spanned_text[start_ix..]);
1316            buf.into()
1317        } else {
1318            spanned_text.into()
1319        };
1320
1321        self.tree[open].item.body = ItemBody::Math(self.allocs.allocate_cow(cow), is_display);
1322        self.tree[open].item.end = self.tree[close].item.end;
1323        self.tree[open].next = self.tree[close].next;
1324    }
1325
1326    /// Make a code span.
1327    ///
1328    /// Both `open` and `close` are matching MaybeCode items.
1329    fn make_code_span(&mut self, open: TreeIndex, close: TreeIndex, preceding_backslash: bool) {
1330        let span_start = self.tree[open].item.end;
1331        let span_end = self.tree[close].item.start;
1332        let mut buf: Option<String> = None;
1333
1334        let spanned_text = &self.text[span_start..span_end];
1335        let spanned_bytes = spanned_text.as_bytes();
1336        let mut start_ix = 0;
1337        let mut ix = 0;
1338        while ix < spanned_bytes.len() {
1339            let c = spanned_bytes[ix];
1340            if c == b'\r' || c == b'\n' {
1341                let buf = buf.get_or_insert_with(|| String::with_capacity(spanned_bytes.len()));
1342                buf.push_str(&spanned_text[start_ix..ix]);
1343                buf.push(' ');
1344                ix += 1;
1345                ix += skip_container_prefixes(&self.tree, &spanned_bytes[ix..], self.options);
1346                start_ix = ix;
1347            } else if c == b'\\'
1348                && spanned_bytes.get(ix + 1) == Some(&b'|')
1349                && self.tree.is_in_table()
1350            {
1351                let buf = buf.get_or_insert_with(|| String::with_capacity(spanned_bytes.len()));
1352                buf.push_str(&spanned_text[start_ix..ix]);
1353                buf.push('|');
1354                ix += 2;
1355                start_ix = ix;
1356            } else {
1357                ix += 1;
1358            }
1359        }
1360
1361        let (opening, closing, all_spaces) = {
1362            let s = if let Some(buf) = &mut buf {
1363                buf.push_str(&spanned_text[start_ix..]);
1364                &buf[..]
1365            } else {
1366                spanned_text
1367            };
1368            (
1369                s.as_bytes().first() == Some(&b' '),
1370                s.as_bytes().last() == Some(&b' '),
1371                s.bytes().all(|b| b == b' '),
1372            )
1373        };
1374
1375        let cow: CowStr<'input> = if !all_spaces && opening && closing {
1376            if let Some(mut buf) = buf {
1377                if !buf.is_empty() {
1378                    buf.remove(0);
1379                    buf.pop();
1380                }
1381                buf.into()
1382            } else {
1383                spanned_text[1..(spanned_text.len() - 1).max(1)].into()
1384            }
1385        } else if let Some(buf) = buf {
1386            buf.into()
1387        } else {
1388            spanned_text.into()
1389        };
1390
1391        if preceding_backslash {
1392            self.tree[open].item.body = ItemBody::Text {
1393                backslash_escaped: true,
1394            };
1395            self.tree[open].item.end = self.tree[open].item.start + 1;
1396            self.tree[open].next = Some(close);
1397            self.tree[close].item.body = ItemBody::Code(self.allocs.allocate_cow(cow));
1398            self.tree[close].item.start = self.tree[open].item.start + 1;
1399        } else {
1400            self.tree[open].item.body = ItemBody::Code(self.allocs.allocate_cow(cow));
1401            self.tree[open].item.end = self.tree[close].item.end;
1402            self.tree[open].next = self.tree[close].next;
1403        }
1404    }
1405
1406    /// On success, returns a buffer containing the inline html and byte offset.
1407    /// When no bytes were skipped, the buffer will be empty and the html can be
1408    /// represented as a subslice of the input string.
1409    fn scan_inline_html(&mut self, bytes: &[u8], ix: usize) -> Option<(Vec<u8>, usize)> {
1410        let c = *bytes.get(ix)?;
1411        if c == b'!' {
1412            Some((
1413                vec![],
1414                scan_inline_html_comment(bytes, ix + 1, &mut self.html_scan_guard)?,
1415            ))
1416        } else if c == b'?' {
1417            Some((
1418                vec![],
1419                scan_inline_html_processing(bytes, ix + 1, &mut self.html_scan_guard)?,
1420            ))
1421        } else {
1422            let (span, i) = scan_html_block_inner(
1423                // Subtract 1 to include the < character
1424                &bytes[(ix - 1)..],
1425                Some(&|bytes| skip_container_prefixes(&self.tree, bytes, self.options)),
1426            )?;
1427            Some((span, i + ix - 1))
1428        }
1429    }
1430
1431    /// Consumes the event iterator and produces an iterator that produces
1432    /// `(Event, Range)` pairs, where the `Range` value maps to the corresponding
1433    /// range in the markdown source.
1434    pub fn into_offset_iter(self) -> OffsetIter<'input, F> {
1435        OffsetIter { inner: self }
1436    }
1437}
1438
1439/// Returns number of containers scanned.
1440pub(crate) fn scan_containers(
1441    tree: &Tree<Item>,
1442    line_start: &mut LineStart<'_>,
1443    options: Options,
1444) -> usize {
1445    let mut i = 0;
1446    for &node_ix in tree.walk_spine() {
1447        match tree[node_ix].item.body {
1448            ItemBody::BlockQuote(..) => {
1449                let save = line_start.clone();
1450                let _ = line_start.scan_space(3);
1451                if !line_start.scan_blockquote_marker() {
1452                    *line_start = save;
1453                    break;
1454                }
1455            }
1456            ItemBody::ListItem(indent) => {
1457                let save = line_start.clone();
1458                if !line_start.scan_space(indent) && !line_start.is_at_eol() {
1459                    *line_start = save;
1460                    break;
1461                }
1462            }
1463            ItemBody::DefinitionListDefinition(indent) => {
1464                let save = line_start.clone();
1465                if !line_start.scan_space(indent) && !line_start.is_at_eol() {
1466                    *line_start = save;
1467                    break;
1468                }
1469            }
1470            ItemBody::FootnoteDefinition(..) if options.has_gfm_footnotes() => {
1471                let save = line_start.clone();
1472                if !line_start.scan_space(4) && !line_start.is_at_eol() {
1473                    *line_start = save;
1474                    break;
1475                }
1476            }
1477            _ => (),
1478        }
1479        i += 1;
1480    }
1481    i
1482}
1483pub(crate) fn skip_container_prefixes(tree: &Tree<Item>, bytes: &[u8], options: Options) -> usize {
1484    let mut line_start = LineStart::new(bytes);
1485    let _ = scan_containers(tree, &mut line_start, options);
1486    line_start.bytes_scanned()
1487}
1488
1489impl Tree<Item> {
1490    pub(crate) fn append_text(&mut self, start: usize, end: usize, backslash_escaped: bool) {
1491        if end > start {
1492            if let Some(ix) = self.cur() {
1493                if matches!(self[ix].item.body, ItemBody::Text { .. }) && self[ix].item.end == start
1494                {
1495                    self[ix].item.end = end;
1496                    return;
1497                }
1498            }
1499            self.append(Item {
1500                start,
1501                end,
1502                body: ItemBody::Text { backslash_escaped },
1503            });
1504        }
1505    }
1506    /// Returns true if the current node is inside a table.
1507    ///
1508    /// If `cur` is an ItemBody::Table, it would return false,
1509    /// but since the `TableRow` and `TableHead` and `TableCell`
1510    /// are children of the table, anything doing inline parsing
1511    /// doesn't need to care about that.
1512    pub(crate) fn is_in_table(&self) -> bool {
1513        fn might_be_in_table(item: &Item) -> bool {
1514            item.body.is_inline()
1515                || matches!(item.body, |ItemBody::TableHead| ItemBody::TableRow
1516                    | ItemBody::TableCell)
1517        }
1518        for &ix in self.walk_spine().rev() {
1519            if matches!(self[ix].item.body, ItemBody::Table(_)) {
1520                return true;
1521            }
1522            if !might_be_in_table(&self[ix].item) {
1523                return false;
1524            }
1525        }
1526        false
1527    }
1528}
1529
1530#[derive(Copy, Clone, Debug)]
1531struct InlineEl {
1532    /// offset of tree node
1533    start: TreeIndex,
1534    /// number of delimiters available for matching
1535    count: usize,
1536    /// length of the run that these delimiters came from
1537    run_length: usize,
1538    /// b'*', b'_', or b'~'
1539    c: u8,
1540    /// can both open and close
1541    both: bool,
1542}
1543
1544#[derive(Debug, Clone, Default)]
1545struct InlineStack {
1546    stack: Vec<InlineEl>,
1547    // Lower bounds for matching indices in the stack. For example
1548    // a strikethrough delimiter will never match with any element
1549    // in the stack with index smaller than
1550    // `lower_bounds[InlineStack::TILDES]`.
1551    lower_bounds: [usize; 9],
1552}
1553
1554impl InlineStack {
1555    /// These are indices into the lower bounds array.
1556    /// Not both refers to the property that the delimiter can not both
1557    /// be opener as a closer.
1558    const UNDERSCORE_NOT_BOTH: usize = 0;
1559    const ASTERISK_NOT_BOTH: usize = 1;
1560    const ASTERISK_BASE: usize = 2;
1561    const TILDES: usize = 5;
1562    const UNDERSCORE_BASE: usize = 6;
1563
1564    fn pop_all(&mut self, tree: &mut Tree<Item>) {
1565        for el in self.stack.drain(..) {
1566            for i in 0..el.count {
1567                tree[el.start + i].item.body = ItemBody::Text {
1568                    backslash_escaped: false,
1569                };
1570            }
1571        }
1572        self.lower_bounds = [0; 9];
1573    }
1574
1575    fn get_lowerbound(&self, c: u8, count: usize, both: bool) -> usize {
1576        if c == b'_' {
1577            let mod3_lower = self.lower_bounds[InlineStack::UNDERSCORE_BASE + count % 3];
1578            if both {
1579                mod3_lower
1580            } else {
1581                min(
1582                    mod3_lower,
1583                    self.lower_bounds[InlineStack::UNDERSCORE_NOT_BOTH],
1584                )
1585            }
1586        } else if c == b'*' {
1587            let mod3_lower = self.lower_bounds[InlineStack::ASTERISK_BASE + count % 3];
1588            if both {
1589                mod3_lower
1590            } else {
1591                min(
1592                    mod3_lower,
1593                    self.lower_bounds[InlineStack::ASTERISK_NOT_BOTH],
1594                )
1595            }
1596        } else {
1597            self.lower_bounds[InlineStack::TILDES]
1598        }
1599    }
1600
1601    fn set_lowerbound(&mut self, c: u8, count: usize, both: bool, new_bound: usize) {
1602        if c == b'_' {
1603            if both {
1604                self.lower_bounds[InlineStack::UNDERSCORE_BASE + count % 3] = new_bound;
1605            } else {
1606                self.lower_bounds[InlineStack::UNDERSCORE_NOT_BOTH] = new_bound;
1607            }
1608        } else if c == b'*' {
1609            self.lower_bounds[InlineStack::ASTERISK_BASE + count % 3] = new_bound;
1610            if !both {
1611                self.lower_bounds[InlineStack::ASTERISK_NOT_BOTH] = new_bound;
1612            }
1613        } else {
1614            self.lower_bounds[InlineStack::TILDES] = new_bound;
1615        }
1616    }
1617
1618    fn truncate(&mut self, new_bound: usize) {
1619        self.stack.truncate(new_bound);
1620        for lower_bound in &mut self.lower_bounds {
1621            if *lower_bound > new_bound {
1622                *lower_bound = new_bound;
1623            }
1624        }
1625    }
1626
1627    fn find_match(
1628        &mut self,
1629        tree: &mut Tree<Item>,
1630        c: u8,
1631        run_length: usize,
1632        both: bool,
1633    ) -> Option<InlineEl> {
1634        let lowerbound = min(self.stack.len(), self.get_lowerbound(c, run_length, both));
1635        let res = self.stack[lowerbound..]
1636            .iter()
1637            .cloned()
1638            .enumerate()
1639            .rfind(|(_, el)| {
1640                if c == b'~' && run_length != el.run_length {
1641                    return false;
1642                }
1643                el.c == c
1644                    && (!both && !el.both
1645                        || (run_length + el.run_length) % 3 != 0
1646                        || run_length % 3 == 0)
1647            });
1648
1649        if let Some((matching_ix, matching_el)) = res {
1650            let matching_ix = matching_ix + lowerbound;
1651            for el in &self.stack[(matching_ix + 1)..] {
1652                for i in 0..el.count {
1653                    tree[el.start + i].item.body = ItemBody::Text {
1654                        backslash_escaped: false,
1655                    };
1656                }
1657            }
1658            self.truncate(matching_ix);
1659            Some(matching_el)
1660        } else {
1661            self.set_lowerbound(c, run_length, both, self.stack.len());
1662            None
1663        }
1664    }
1665
1666    fn trim_lower_bound(&mut self, ix: usize) {
1667        self.lower_bounds[ix] = self.lower_bounds[ix].min(self.stack.len());
1668    }
1669
1670    fn push(&mut self, el: InlineEl) {
1671        if el.c == b'~' {
1672            self.trim_lower_bound(InlineStack::TILDES);
1673        }
1674        self.stack.push(el)
1675    }
1676}
1677
1678#[derive(Debug, Clone)]
1679enum RefScan<'a> {
1680    // label, source ix of label end
1681    LinkLabel(CowStr<'a>, usize),
1682    // contains next node index
1683    Collapsed(Option<TreeIndex>),
1684    UnexpectedFootnote,
1685    Failed,
1686}
1687
1688/// Skips forward within a block to a node which spans (ends inclusive) the given
1689/// index into the source.
1690fn scan_nodes_to_ix(
1691    tree: &Tree<Item>,
1692    mut node: Option<TreeIndex>,
1693    ix: usize,
1694) -> Option<TreeIndex> {
1695    while let Some(node_ix) = node {
1696        if tree[node_ix].item.end <= ix {
1697            node = tree[node_ix].next;
1698        } else {
1699            break;
1700        }
1701    }
1702    node
1703}
1704
1705/// Scans an inline link label, which cannot be interrupted.
1706/// Returns number of bytes (including brackets) and label on success.
1707fn scan_link_label<'text>(
1708    tree: &Tree<Item>,
1709    text: &'text str,
1710    options: Options,
1711) -> Option<(usize, ReferenceLabel<'text>)> {
1712    let bytes = text.as_bytes();
1713    if bytes.len() < 2 || bytes[0] != b'[' {
1714        return None;
1715    }
1716    let linebreak_handler = |bytes: &[u8]| Some(skip_container_prefixes(tree, bytes, options));
1717    if options.contains(Options::ENABLE_FOOTNOTES)
1718        && b'^' == bytes[1]
1719        && bytes.get(2) != Some(&b']')
1720    {
1721        let linebreak_handler: &dyn Fn(&[u8]) -> Option<usize> = if options.has_gfm_footnotes() {
1722            &|_| None
1723        } else {
1724            &linebreak_handler
1725        };
1726        if let Some((byte_index, cow)) =
1727            scan_link_label_rest(&text[2..], linebreak_handler, tree.is_in_table())
1728        {
1729            return Some((byte_index + 2, ReferenceLabel::Footnote(cow)));
1730        }
1731    }
1732    let (byte_index, cow) =
1733        scan_link_label_rest(&text[1..], &linebreak_handler, tree.is_in_table())?;
1734    Some((byte_index + 1, ReferenceLabel::Link(cow)))
1735}
1736
1737fn scan_reference<'b>(
1738    tree: &Tree<Item>,
1739    text: &'b str,
1740    cur: Option<TreeIndex>,
1741    options: Options,
1742) -> RefScan<'b> {
1743    let cur_ix = match cur {
1744        None => return RefScan::Failed,
1745        Some(cur_ix) => cur_ix,
1746    };
1747    let start = tree[cur_ix].item.start;
1748    let tail = &text.as_bytes()[start..];
1749
1750    if tail.starts_with(b"[]") {
1751        // TODO: this unwrap is sus and should be looked at closer
1752        let closing_node = tree[cur_ix].next.unwrap();
1753        RefScan::Collapsed(tree[closing_node].next)
1754    } else {
1755        let label = scan_link_label(tree, &text[start..], options);
1756        match label {
1757            Some((ix, ReferenceLabel::Link(label))) => RefScan::LinkLabel(label, start + ix),
1758            Some((_ix, ReferenceLabel::Footnote(_label))) => RefScan::UnexpectedFootnote,
1759            None => RefScan::Failed,
1760        }
1761    }
1762}
1763
1764#[derive(Clone, Default)]
1765struct LinkStack {
1766    inner: Vec<LinkStackEl>,
1767    disabled_ix: usize,
1768}
1769
1770impl LinkStack {
1771    fn push(&mut self, el: LinkStackEl) {
1772        self.inner.push(el);
1773    }
1774
1775    fn pop(&mut self) -> Option<LinkStackEl> {
1776        let el = self.inner.pop();
1777        self.disabled_ix = std::cmp::min(self.disabled_ix, self.inner.len());
1778        el
1779    }
1780
1781    fn clear(&mut self) {
1782        self.inner.clear();
1783        self.disabled_ix = 0;
1784    }
1785
1786    fn disable_all_links(&mut self) {
1787        for el in &mut self.inner[self.disabled_ix..] {
1788            if el.ty == LinkStackTy::Link {
1789                el.ty = LinkStackTy::Disabled;
1790            }
1791        }
1792        self.disabled_ix = self.inner.len();
1793    }
1794}
1795
1796#[derive(Clone, Debug)]
1797struct LinkStackEl {
1798    node: TreeIndex,
1799    ty: LinkStackTy,
1800}
1801
1802#[derive(PartialEq, Clone, Debug)]
1803enum LinkStackTy {
1804    Link,
1805    Image,
1806    Disabled,
1807}
1808
1809/// Contains the destination URL, title and source span of a reference definition.
1810#[derive(Clone, Debug)]
1811pub struct LinkDef<'a> {
1812    pub dest: CowStr<'a>,
1813    pub title: Option<CowStr<'a>>,
1814    pub span: Range<usize>,
1815}
1816
1817impl<'a> LinkDef<'a> {
1818    pub fn into_static(self) -> LinkDef<'static> {
1819        LinkDef {
1820            dest: self.dest.into_static(),
1821            title: self.title.map(|s| s.into_static()),
1822            span: self.span,
1823        }
1824    }
1825}
1826
1827/// Contains the destination URL, title and source span of a reference definition.
1828#[derive(Clone, Debug)]
1829pub struct FootnoteDef {
1830    pub use_count: usize,
1831}
1832
1833/// Tracks tree indices of code span delimiters of each length. It should prevent
1834/// quadratic scanning behaviours by providing (amortized) constant time lookups.
1835struct CodeDelims {
1836    inner: HashMap<usize, VecDeque<TreeIndex>>,
1837    seen_first: bool,
1838}
1839
1840impl CodeDelims {
1841    fn new() -> Self {
1842        Self {
1843            inner: Default::default(),
1844            seen_first: false,
1845        }
1846    }
1847
1848    fn insert(&mut self, count: usize, ix: TreeIndex) {
1849        if self.seen_first {
1850            self.inner.entry(count).or_default().push_back(ix);
1851        } else {
1852            // Skip the first insert, since that delimiter will always
1853            // be an opener and not a closer.
1854            self.seen_first = true;
1855        }
1856    }
1857
1858    fn is_populated(&self) -> bool {
1859        !self.inner.is_empty()
1860    }
1861
1862    fn find(&mut self, open_ix: TreeIndex, count: usize) -> Option<TreeIndex> {
1863        while let Some(ix) = self.inner.get_mut(&count)?.pop_front() {
1864            if ix > open_ix {
1865                return Some(ix);
1866            }
1867        }
1868        None
1869    }
1870
1871    fn clear(&mut self) {
1872        self.inner.clear();
1873        self.seen_first = false;
1874    }
1875}
1876
1877/// Tracks brace contexts and delimiter length for math delimiters.
1878/// Provides amortized constant-time lookups.
1879struct MathDelims {
1880    inner: HashMap<u8, VecDeque<(TreeIndex, bool, bool)>>,
1881}
1882
1883impl MathDelims {
1884    fn new() -> Self {
1885        Self {
1886            inner: Default::default(),
1887        }
1888    }
1889
1890    fn insert(
1891        &mut self,
1892        delim_is_display: bool,
1893        brace_context: u8,
1894        ix: TreeIndex,
1895        can_close: bool,
1896    ) {
1897        self.inner
1898            .entry(brace_context)
1899            .or_default()
1900            .push_back((ix, can_close, delim_is_display));
1901    }
1902
1903    fn is_populated(&self) -> bool {
1904        !self.inner.is_empty()
1905    }
1906
1907    fn find(
1908        &mut self,
1909        tree: &Tree<Item>,
1910        open_ix: TreeIndex,
1911        is_display: bool,
1912        brace_context: u8,
1913    ) -> Option<TreeIndex> {
1914        while let Some((ix, can_close, delim_is_display)) =
1915            self.inner.get_mut(&brace_context)?.pop_front()
1916        {
1917            if ix <= open_ix || (is_display && tree[open_ix].next == Some(ix)) {
1918                continue;
1919            }
1920            let can_close = can_close && tree[open_ix].item.end != tree[ix].item.start;
1921            if (!is_display && can_close) || (is_display && delim_is_display) {
1922                return Some(ix);
1923            }
1924            // if we can't use it, leave it in the queue as a tombstone for the next
1925            // thing that tries to match it
1926            self.inner
1927                .get_mut(&brace_context)?
1928                .push_front((ix, can_close, delim_is_display));
1929            break;
1930        }
1931        None
1932    }
1933
1934    fn clear(&mut self) {
1935        self.inner.clear();
1936    }
1937}
1938
1939#[derive(Copy, Clone, PartialEq, Eq, Debug)]
1940pub(crate) struct LinkIndex(usize);
1941
1942#[derive(Copy, Clone, PartialEq, Eq, Debug)]
1943pub(crate) struct CowIndex(usize);
1944
1945#[derive(Copy, Clone, PartialEq, Eq, Debug)]
1946pub(crate) struct AlignmentIndex(usize);
1947
1948#[derive(Copy, Clone, PartialEq, Eq, Debug)]
1949pub(crate) struct HeadingIndex(NonZeroUsize);
1950
1951#[derive(Clone)]
1952pub(crate) struct Allocations<'a> {
1953    pub refdefs: RefDefs<'a>,
1954    pub footdefs: FootnoteDefs<'a>,
1955    links: Vec<(LinkType, CowStr<'a>, CowStr<'a>, CowStr<'a>)>,
1956    cows: Vec<CowStr<'a>>,
1957    alignments: Vec<Vec<Alignment>>,
1958    headings: Vec<HeadingAttributes<'a>>,
1959}
1960
1961/// Used by the heading attributes extension.
1962#[derive(Clone)]
1963pub(crate) struct HeadingAttributes<'a> {
1964    pub id: Option<CowStr<'a>>,
1965    pub classes: Vec<CowStr<'a>>,
1966    pub attrs: Vec<(CowStr<'a>, Option<CowStr<'a>>)>,
1967}
1968
1969/// Keeps track of the reference definitions defined in the document.
1970#[derive(Clone, Default, Debug)]
1971pub struct RefDefs<'input>(pub(crate) HashMap<LinkLabel<'input>, LinkDef<'input>>);
1972
1973/// Keeps track of the footnote definitions defined in the document.
1974#[derive(Clone, Default, Debug)]
1975pub struct FootnoteDefs<'input>(pub(crate) HashMap<FootnoteLabel<'input>, FootnoteDef>);
1976
1977impl<'input, 'b, 's> RefDefs<'input>
1978where
1979    's: 'b,
1980{
1981    /// Performs a lookup on reference label using unicode case folding.
1982    pub fn get(&'s self, key: &'b str) -> Option<&'b LinkDef<'input>> {
1983        self.0.get(&UniCase::new(key.into()))
1984    }
1985
1986    /// Provides an iterator over all the document's reference definitions.
1987    pub fn iter(&'s self) -> impl Iterator<Item = (&'s str, &'s LinkDef<'input>)> {
1988        self.0.iter().map(|(k, v)| (k.as_ref(), v))
1989    }
1990}
1991
1992impl<'input, 'b, 's> FootnoteDefs<'input>
1993where
1994    's: 'b,
1995{
1996    /// Performs a lookup on reference label using unicode case folding.
1997    pub fn contains(&'s self, key: &'b str) -> bool {
1998        self.0.contains_key(&UniCase::new(key.into()))
1999    }
2000    /// Performs a lookup on reference label using unicode case folding.
2001    pub fn get_mut(&'s mut self, key: CowStr<'input>) -> Option<&'s mut FootnoteDef> {
2002        self.0.get_mut(&UniCase::new(key))
2003    }
2004}
2005
2006impl<'a> Allocations<'a> {
2007    pub fn new() -> Self {
2008        Self {
2009            refdefs: RefDefs::default(),
2010            footdefs: FootnoteDefs::default(),
2011            links: Vec::with_capacity(128),
2012            cows: Vec::new(),
2013            alignments: Vec::new(),
2014            headings: Vec::new(),
2015        }
2016    }
2017
2018    pub fn allocate_cow(&mut self, cow: CowStr<'a>) -> CowIndex {
2019        let ix = self.cows.len();
2020        self.cows.push(cow);
2021        CowIndex(ix)
2022    }
2023
2024    pub fn allocate_link(
2025        &mut self,
2026        ty: LinkType,
2027        url: CowStr<'a>,
2028        title: CowStr<'a>,
2029        id: CowStr<'a>,
2030    ) -> LinkIndex {
2031        let ix = self.links.len();
2032        self.links.push((ty, url, title, id));
2033        LinkIndex(ix)
2034    }
2035
2036    pub fn allocate_alignment(&mut self, alignment: Vec<Alignment>) -> AlignmentIndex {
2037        let ix = self.alignments.len();
2038        self.alignments.push(alignment);
2039        AlignmentIndex(ix)
2040    }
2041
2042    pub fn allocate_heading(&mut self, attrs: HeadingAttributes<'a>) -> HeadingIndex {
2043        let ix = self.headings.len();
2044        self.headings.push(attrs);
2045        // This won't panic. `self.headings.len()` can't be `usize::MAX` since
2046        // such a long Vec cannot fit in memory.
2047        let ix_nonzero = NonZeroUsize::new(ix.wrapping_add(1)).expect("too many headings");
2048        HeadingIndex(ix_nonzero)
2049    }
2050
2051    pub fn take_cow(&mut self, ix: CowIndex) -> CowStr<'a> {
2052        std::mem::replace(&mut self.cows[ix.0], "".into())
2053    }
2054
2055    pub fn take_link(&mut self, ix: LinkIndex) -> (LinkType, CowStr<'a>, CowStr<'a>, CowStr<'a>) {
2056        let default_link = (LinkType::ShortcutUnknown, "".into(), "".into(), "".into());
2057        std::mem::replace(&mut self.links[ix.0], default_link)
2058    }
2059
2060    pub fn take_alignment(&mut self, ix: AlignmentIndex) -> Vec<Alignment> {
2061        std::mem::take(&mut self.alignments[ix.0])
2062    }
2063}
2064
2065impl<'a> Index<CowIndex> for Allocations<'a> {
2066    type Output = CowStr<'a>;
2067
2068    fn index(&self, ix: CowIndex) -> &Self::Output {
2069        self.cows.index(ix.0)
2070    }
2071}
2072
2073impl<'a> Index<LinkIndex> for Allocations<'a> {
2074    type Output = (LinkType, CowStr<'a>, CowStr<'a>, CowStr<'a>);
2075
2076    fn index(&self, ix: LinkIndex) -> &Self::Output {
2077        self.links.index(ix.0)
2078    }
2079}
2080
2081impl<'a> Index<AlignmentIndex> for Allocations<'a> {
2082    type Output = Vec<Alignment>;
2083
2084    fn index(&self, ix: AlignmentIndex) -> &Self::Output {
2085        self.alignments.index(ix.0)
2086    }
2087}
2088
2089impl<'a> Index<HeadingIndex> for Allocations<'a> {
2090    type Output = HeadingAttributes<'a>;
2091
2092    fn index(&self, ix: HeadingIndex) -> &Self::Output {
2093        self.headings.index(ix.0.get() - 1)
2094    }
2095}
2096
2097/// A struct containing information on the reachability of certain inline HTML
2098/// elements. In particular, for cdata elements (`<![CDATA[`), processing
2099/// elements (`<?`) and declarations (`<!DECLARATION`). The respectives usizes
2100/// represent the indices before which a scan will always fail and can hence
2101/// be skipped.
2102#[derive(Clone, Default)]
2103pub(crate) struct HtmlScanGuard {
2104    pub cdata: usize,
2105    pub processing: usize,
2106    pub declaration: usize,
2107    pub comment: usize,
2108}
2109
2110/// Trait for broken link callbacks.
2111///
2112/// See [Parser::new_with_broken_link_callback].
2113/// Automatically implemented for closures with the appropriate signature.
2114pub trait BrokenLinkCallback<'input> {
2115    fn handle_broken_link(
2116        &mut self,
2117        link: BrokenLink<'input>,
2118    ) -> Option<(CowStr<'input>, CowStr<'input>)>;
2119}
2120
2121impl<'input, T> BrokenLinkCallback<'input> for T
2122where
2123    T: FnMut(BrokenLink<'input>) -> Option<(CowStr<'input>, CowStr<'input>)>,
2124{
2125    fn handle_broken_link(
2126        &mut self,
2127        link: BrokenLink<'input>,
2128    ) -> Option<(CowStr<'input>, CowStr<'input>)> {
2129        self(link)
2130    }
2131}
2132
2133impl<'input> BrokenLinkCallback<'input> for Box<dyn BrokenLinkCallback<'input>> {
2134    fn handle_broken_link(
2135        &mut self,
2136        link: BrokenLink<'input>,
2137    ) -> Option<(CowStr<'input>, CowStr<'input>)> {
2138        (**self).handle_broken_link(link)
2139    }
2140}
2141
2142/// Broken link callback that does nothing.
2143#[derive(Debug)]
2144pub struct DefaultBrokenLinkCallback;
2145
2146impl<'input> BrokenLinkCallback<'input> for DefaultBrokenLinkCallback {
2147    fn handle_broken_link(
2148        &mut self,
2149        _link: BrokenLink<'input>,
2150    ) -> Option<(CowStr<'input>, CowStr<'input>)> {
2151        None
2152    }
2153}
2154
2155/// Markdown event and source range iterator.
2156///
2157/// Generates tuples where the first element is the markdown event and the second
2158/// is a the corresponding range in the source string.
2159///
2160/// Constructed from a `Parser` using its
2161/// [`into_offset_iter`](struct.Parser.html#method.into_offset_iter) method.
2162#[derive(Debug)]
2163pub struct OffsetIter<'a, F = DefaultBrokenLinkCallback> {
2164    inner: Parser<'a, F>,
2165}
2166
2167impl<'a, F: BrokenLinkCallback<'a>> OffsetIter<'a, F> {
2168    /// Returns a reference to the internal reference definition tracker.
2169    pub fn reference_definitions(&self) -> &RefDefs<'_> {
2170        self.inner.reference_definitions()
2171    }
2172}
2173
2174impl<'a, F: BrokenLinkCallback<'a>> Iterator for OffsetIter<'a, F> {
2175    type Item = (Event<'a>, Range<usize>);
2176
2177    fn next(&mut self) -> Option<Self::Item> {
2178        match self.inner.tree.cur() {
2179            None => {
2180                let ix = self.inner.tree.pop()?;
2181                let ix = if matches!(self.inner.tree[ix].item.body, ItemBody::TightParagraph) {
2182                    // tight paragraphs emit nothing
2183                    self.inner.tree.next_sibling(ix);
2184                    return self.next();
2185                } else {
2186                    ix
2187                };
2188                let tag_end = body_to_tag_end(&self.inner.tree[ix].item.body);
2189                self.inner.tree.next_sibling(ix);
2190                let span = self.inner.tree[ix].item.start..self.inner.tree[ix].item.end;
2191                debug_assert!(span.start <= span.end);
2192                Some((Event::End(tag_end), span))
2193            }
2194            Some(cur_ix) => {
2195                let cur_ix =
2196                    if matches!(self.inner.tree[cur_ix].item.body, ItemBody::TightParagraph) {
2197                        // tight paragraphs emit nothing
2198                        self.inner.tree.push();
2199                        self.inner.tree.cur().unwrap()
2200                    } else {
2201                        cur_ix
2202                    };
2203                if self.inner.tree[cur_ix].item.body.is_maybe_inline() {
2204                    self.inner.handle_inline();
2205                }
2206
2207                let node = self.inner.tree[cur_ix];
2208                let item = node.item;
2209                let event = item_to_event(item, self.inner.text, &mut self.inner.allocs);
2210                if let Event::Start(..) = event {
2211                    self.inner.tree.push();
2212                } else {
2213                    self.inner.tree.next_sibling(cur_ix);
2214                }
2215                debug_assert!(item.start <= item.end);
2216                Some((event, item.start..item.end))
2217            }
2218        }
2219    }
2220}
2221
2222fn body_to_tag_end(body: &ItemBody) -> TagEnd {
2223    match *body {
2224        ItemBody::Paragraph => TagEnd::Paragraph,
2225        ItemBody::Emphasis => TagEnd::Emphasis,
2226        ItemBody::Superscript => TagEnd::Superscript,
2227        ItemBody::Subscript => TagEnd::Subscript,
2228        ItemBody::Strong => TagEnd::Strong,
2229        ItemBody::Strikethrough => TagEnd::Strikethrough,
2230        ItemBody::Link(..) => TagEnd::Link,
2231        ItemBody::Image(..) => TagEnd::Image,
2232        ItemBody::Heading(level, _) => TagEnd::Heading(level),
2233        ItemBody::IndentCodeBlock | ItemBody::FencedCodeBlock(..) => TagEnd::CodeBlock,
2234        ItemBody::BlockQuote(kind) => TagEnd::BlockQuote(kind),
2235        ItemBody::HtmlBlock => TagEnd::HtmlBlock,
2236        ItemBody::List(_, c, _) => {
2237            let is_ordered = c == b'.' || c == b')';
2238            TagEnd::List(is_ordered)
2239        }
2240        ItemBody::ListItem(_) => TagEnd::Item,
2241        ItemBody::TableHead => TagEnd::TableHead,
2242        ItemBody::TableCell => TagEnd::TableCell,
2243        ItemBody::TableRow => TagEnd::TableRow,
2244        ItemBody::Table(..) => TagEnd::Table,
2245        ItemBody::FootnoteDefinition(..) => TagEnd::FootnoteDefinition,
2246        ItemBody::MetadataBlock(kind) => TagEnd::MetadataBlock(kind),
2247        ItemBody::DefinitionList(_) => TagEnd::DefinitionList,
2248        ItemBody::DefinitionListTitle => TagEnd::DefinitionListTitle,
2249        ItemBody::DefinitionListDefinition(_) => TagEnd::DefinitionListDefinition,
2250        _ => panic!("unexpected item body {:?}", body),
2251    }
2252}
2253
2254fn item_to_event<'a>(item: Item, text: &'a str, allocs: &mut Allocations<'a>) -> Event<'a> {
2255    let tag = match item.body {
2256        ItemBody::Text { .. } => return Event::Text(text[item.start..item.end].into()),
2257        ItemBody::Code(cow_ix) => return Event::Code(allocs.take_cow(cow_ix)),
2258        ItemBody::SynthesizeText(cow_ix) => return Event::Text(allocs.take_cow(cow_ix)),
2259        ItemBody::SynthesizeChar(c) => return Event::Text(c.into()),
2260        ItemBody::HtmlBlock => Tag::HtmlBlock,
2261        ItemBody::Html => return Event::Html(text[item.start..item.end].into()),
2262        ItemBody::InlineHtml => return Event::InlineHtml(text[item.start..item.end].into()),
2263        ItemBody::OwnedInlineHtml(cow_ix) => return Event::InlineHtml(allocs.take_cow(cow_ix)),
2264        ItemBody::SoftBreak => return Event::SoftBreak,
2265        ItemBody::HardBreak(_) => return Event::HardBreak,
2266        ItemBody::FootnoteReference(cow_ix) => {
2267            return Event::FootnoteReference(allocs.take_cow(cow_ix))
2268        }
2269        ItemBody::TaskListMarker(checked) => return Event::TaskListMarker(checked),
2270        ItemBody::Rule => return Event::Rule,
2271        ItemBody::Paragraph => Tag::Paragraph,
2272        ItemBody::Emphasis => Tag::Emphasis,
2273        ItemBody::Superscript => Tag::Superscript,
2274        ItemBody::Subscript => Tag::Subscript,
2275        ItemBody::Strong => Tag::Strong,
2276        ItemBody::Strikethrough => Tag::Strikethrough,
2277        ItemBody::Link(link_ix) => {
2278            let (link_type, dest_url, title, id) = allocs.take_link(link_ix);
2279            Tag::Link {
2280                link_type,
2281                dest_url,
2282                title,
2283                id,
2284            }
2285        }
2286        ItemBody::Image(link_ix) => {
2287            let (link_type, dest_url, title, id) = allocs.take_link(link_ix);
2288            Tag::Image {
2289                link_type,
2290                dest_url,
2291                title,
2292                id,
2293            }
2294        }
2295        ItemBody::Heading(level, Some(heading_ix)) => {
2296            let HeadingAttributes { id, classes, attrs } = allocs.index(heading_ix);
2297            Tag::Heading {
2298                level,
2299                id: id.clone(),
2300                classes: classes.clone(),
2301                attrs: attrs.clone(),
2302            }
2303        }
2304        ItemBody::Heading(level, None) => Tag::Heading {
2305            level,
2306            id: None,
2307            classes: Vec::new(),
2308            attrs: Vec::new(),
2309        },
2310        ItemBody::FencedCodeBlock(cow_ix) => {
2311            Tag::CodeBlock(CodeBlockKind::Fenced(allocs.take_cow(cow_ix)))
2312        }
2313        ItemBody::IndentCodeBlock => Tag::CodeBlock(CodeBlockKind::Indented),
2314        ItemBody::BlockQuote(kind) => Tag::BlockQuote(kind),
2315        ItemBody::List(_, c, listitem_start) => {
2316            if c == b'.' || c == b')' {
2317                Tag::List(Some(listitem_start))
2318            } else {
2319                Tag::List(None)
2320            }
2321        }
2322        ItemBody::ListItem(_) => Tag::Item,
2323        ItemBody::TableHead => Tag::TableHead,
2324        ItemBody::TableCell => Tag::TableCell,
2325        ItemBody::TableRow => Tag::TableRow,
2326        ItemBody::Table(alignment_ix) => Tag::Table(allocs.take_alignment(alignment_ix)),
2327        ItemBody::FootnoteDefinition(cow_ix) => Tag::FootnoteDefinition(allocs.take_cow(cow_ix)),
2328        ItemBody::MetadataBlock(kind) => Tag::MetadataBlock(kind),
2329        ItemBody::Math(cow_ix, is_display) => {
2330            return if is_display {
2331                Event::DisplayMath(allocs.take_cow(cow_ix))
2332            } else {
2333                Event::InlineMath(allocs.take_cow(cow_ix))
2334            }
2335        }
2336        ItemBody::DefinitionList(_) => Tag::DefinitionList,
2337        ItemBody::DefinitionListTitle => Tag::DefinitionListTitle,
2338        ItemBody::DefinitionListDefinition(_) => Tag::DefinitionListDefinition,
2339        _ => panic!("unexpected item body {:?}", item.body),
2340    };
2341
2342    Event::Start(tag)
2343}
2344
2345impl<'a, F: BrokenLinkCallback<'a>> Iterator for Parser<'a, F> {
2346    type Item = Event<'a>;
2347
2348    fn next(&mut self) -> Option<Event<'a>> {
2349        match self.tree.cur() {
2350            None => {
2351                let ix = self.tree.pop()?;
2352                let ix = if matches!(self.tree[ix].item.body, ItemBody::TightParagraph) {
2353                    // tight paragraphs emit nothing
2354                    self.tree.next_sibling(ix);
2355                    return self.next();
2356                } else {
2357                    ix
2358                };
2359                let tag_end = body_to_tag_end(&self.tree[ix].item.body);
2360                self.tree.next_sibling(ix);
2361                Some(Event::End(tag_end))
2362            }
2363            Some(cur_ix) => {
2364                let cur_ix = if matches!(self.tree[cur_ix].item.body, ItemBody::TightParagraph) {
2365                    // tight paragraphs emit nothing
2366                    self.tree.push();
2367                    self.tree.cur().unwrap()
2368                } else {
2369                    cur_ix
2370                };
2371                if self.tree[cur_ix].item.body.is_maybe_inline() {
2372                    self.handle_inline();
2373                }
2374
2375                let node = self.tree[cur_ix];
2376                let item = node.item;
2377                let event = item_to_event(item, self.text, &mut self.allocs);
2378                if let Event::Start(ref _tag) = event {
2379                    self.tree.push();
2380                } else {
2381                    self.tree.next_sibling(cur_ix);
2382                }
2383                Some(event)
2384            }
2385        }
2386    }
2387}
2388
2389impl<'a, F: BrokenLinkCallback<'a>> FusedIterator for Parser<'a, F> {}
2390
2391#[cfg(test)]
2392mod test {
2393    use super::*;
2394    use crate::tree::Node;
2395
2396    // TODO: move these tests to tests/html.rs?
2397
2398    fn parser_with_extensions(text: &str) -> Parser<'_> {
2399        let mut opts = Options::empty();
2400        opts.insert(Options::ENABLE_TABLES);
2401        opts.insert(Options::ENABLE_FOOTNOTES);
2402        opts.insert(Options::ENABLE_STRIKETHROUGH);
2403        opts.insert(Options::ENABLE_SUPERSCRIPT);
2404        opts.insert(Options::ENABLE_SUBSCRIPT);
2405        opts.insert(Options::ENABLE_TASKLISTS);
2406
2407        Parser::new_ext(text, opts)
2408    }
2409
2410    #[test]
2411    #[cfg(target_pointer_width = "64")]
2412    fn node_size() {
2413        let node_size = std::mem::size_of::<Node<Item>>();
2414        assert_eq!(48, node_size);
2415    }
2416
2417    #[test]
2418    #[cfg(target_pointer_width = "64")]
2419    fn body_size() {
2420        let body_size = std::mem::size_of::<ItemBody>();
2421        assert_eq!(16, body_size);
2422    }
2423
2424    #[test]
2425    fn single_open_fish_bracket() {
2426        // dont crash
2427        assert_eq!(3, Parser::new("<").count());
2428    }
2429
2430    #[test]
2431    fn lone_hashtag() {
2432        // dont crash
2433        assert_eq!(2, Parser::new("#").count());
2434    }
2435
2436    #[test]
2437    fn lots_of_backslashes() {
2438        // dont crash
2439        Parser::new("\\\\\r\r").count();
2440        Parser::new("\\\r\r\\.\\\\\r\r\\.\\").count();
2441    }
2442
2443    #[test]
2444    fn issue_320() {
2445        // dont crash
2446        parser_with_extensions(":\r\t> |\r:\r\t> |\r").count();
2447    }
2448
2449    #[test]
2450    fn issue_319() {
2451        // dont crash
2452        parser_with_extensions("|\r-]([^|\r-]([^").count();
2453        parser_with_extensions("|\r\r=][^|\r\r=][^car").count();
2454    }
2455
2456    #[test]
2457    fn issue_303() {
2458        // dont crash
2459        parser_with_extensions("[^\r\ra]").count();
2460        parser_with_extensions("\r\r]Z[^\x00\r\r]Z[^\x00").count();
2461    }
2462
2463    #[test]
2464    fn issue_313() {
2465        // dont crash
2466        parser_with_extensions("*]0[^\r\r*]0[^").count();
2467        parser_with_extensions("[^\r> `][^\r> `][^\r> `][").count();
2468    }
2469
2470    #[test]
2471    fn issue_311() {
2472        // dont crash
2473        parser_with_extensions("\\\u{0d}-\u{09}\\\u{0d}-\u{09}").count();
2474    }
2475
2476    #[test]
2477    fn issue_283() {
2478        let input = std::str::from_utf8(b"\xf0\x9b\xb2\x9f<td:^\xf0\x9b\xb2\x9f").unwrap();
2479        // dont crash
2480        parser_with_extensions(input).count();
2481    }
2482
2483    #[test]
2484    fn issue_289() {
2485        // dont crash
2486        parser_with_extensions("> - \\\n> - ").count();
2487        parser_with_extensions("- \n\n").count();
2488    }
2489
2490    #[test]
2491    fn issue_306() {
2492        // dont crash
2493        parser_with_extensions("*\r_<__*\r_<__*\r_<__*\r_<__").count();
2494    }
2495
2496    #[test]
2497    fn issue_305() {
2498        // dont crash
2499        parser_with_extensions("_6**6*_*").count();
2500    }
2501
2502    #[test]
2503    fn another_emphasis_panic() {
2504        parser_with_extensions("*__#_#__*").count();
2505    }
2506
2507    #[test]
2508    fn offset_iter() {
2509        let event_offsets: Vec<_> = Parser::new("*hello* world")
2510            .into_offset_iter()
2511            .map(|(_ev, range)| range)
2512            .collect();
2513        let expected_offsets = vec![(0..13), (0..7), (1..6), (0..7), (7..13), (0..13)];
2514        assert_eq!(expected_offsets, event_offsets);
2515    }
2516
2517    #[test]
2518    fn reference_link_offsets() {
2519        let range =
2520            Parser::new("# H1\n[testing][Some reference]\n\n[Some reference]: https://github.com")
2521                .into_offset_iter()
2522                .filter_map(|(ev, range)| match ev {
2523                    Event::Start(
2524                        Tag::Link {
2525                            link_type: LinkType::Reference,
2526                            ..
2527                        },
2528                        ..,
2529                    ) => Some(range),
2530                    _ => None,
2531                })
2532                .next()
2533                .unwrap();
2534        assert_eq!(5..30, range);
2535    }
2536
2537    #[test]
2538    fn footnote_offsets() {
2539        let range = parser_with_extensions("Testing this[^1] out.\n\n[^1]: Footnote.")
2540            .into_offset_iter()
2541            .filter_map(|(ev, range)| match ev {
2542                Event::FootnoteReference(..) => Some(range),
2543                _ => None,
2544            })
2545            .next()
2546            .unwrap();
2547        assert_eq!(12..16, range);
2548    }
2549
2550    #[test]
2551    fn footnote_offsets_exclamation() {
2552        let mut immediately_before_footnote = None;
2553        let range = parser_with_extensions("Testing this![^1] out.\n\n[^1]: Footnote.")
2554            .into_offset_iter()
2555            .filter_map(|(ev, range)| match ev {
2556                Event::FootnoteReference(..) => Some(range),
2557                _ => {
2558                    immediately_before_footnote = Some((ev, range));
2559                    None
2560                }
2561            })
2562            .next()
2563            .unwrap();
2564        assert_eq!(13..17, range);
2565        if let (Event::Text(exclamation), range_exclamation) =
2566            immediately_before_footnote.as_ref().unwrap()
2567        {
2568            assert_eq!("!", &exclamation[..]);
2569            assert_eq!(&(12..13), range_exclamation);
2570        } else {
2571            panic!("what came first, then? {immediately_before_footnote:?}");
2572        }
2573    }
2574
2575    #[test]
2576    fn table_offset() {
2577        let markdown = "a\n\nTesting|This|Outtt\n--|:--:|--:\nSome Data|Other data|asdf";
2578        let event_offset = parser_with_extensions(markdown)
2579            .into_offset_iter()
2580            .map(|(_ev, range)| range)
2581            .nth(3)
2582            .unwrap();
2583        let expected_offset = 3..59;
2584        assert_eq!(expected_offset, event_offset);
2585    }
2586
2587    #[test]
2588    fn table_cell_span() {
2589        let markdown = "a|b|c\n--|--|--\na|  |c";
2590        let event_offset = parser_with_extensions(markdown)
2591            .into_offset_iter()
2592            .filter_map(|(ev, span)| match ev {
2593                Event::Start(Tag::TableCell) => Some(span),
2594                _ => None,
2595            })
2596            .nth(4)
2597            .unwrap();
2598        let expected_offset_start = "a|b|c\n--|--|--\na|".len();
2599        assert_eq!(
2600            expected_offset_start..(expected_offset_start + 2),
2601            event_offset
2602        );
2603    }
2604
2605    #[test]
2606    fn offset_iter_issue_378() {
2607        let event_offsets: Vec<_> = Parser::new("a [b](c) d")
2608            .into_offset_iter()
2609            .map(|(_ev, range)| range)
2610            .collect();
2611        let expected_offsets = vec![(0..10), (0..2), (2..8), (3..4), (2..8), (8..10), (0..10)];
2612        assert_eq!(expected_offsets, event_offsets);
2613    }
2614
2615    #[test]
2616    fn offset_iter_issue_404() {
2617        let event_offsets: Vec<_> = Parser::new("###\n")
2618            .into_offset_iter()
2619            .map(|(_ev, range)| range)
2620            .collect();
2621        let expected_offsets = vec![(0..4), (0..4)];
2622        assert_eq!(expected_offsets, event_offsets);
2623    }
2624
2625    // FIXME: add this one regression suite
2626    #[cfg(feature = "html")]
2627    #[test]
2628    fn link_def_at_eof() {
2629        let test_str = "[My site][world]\n\n[world]: https://vincentprouillet.com";
2630        let expected = "<p><a href=\"https://vincentprouillet.com\">My site</a></p>\n";
2631
2632        let mut buf = String::new();
2633        crate::html::push_html(&mut buf, Parser::new(test_str));
2634        assert_eq!(expected, buf);
2635    }
2636
2637    #[cfg(feature = "html")]
2638    #[test]
2639    fn no_footnote_refs_without_option() {
2640        let test_str = "a [^a]\n\n[^a]: yolo";
2641        let expected = "<p>a <a href=\"yolo\">^a</a></p>\n";
2642
2643        let mut buf = String::new();
2644        crate::html::push_html(&mut buf, Parser::new(test_str));
2645        assert_eq!(expected, buf);
2646    }
2647
2648    #[cfg(feature = "html")]
2649    #[test]
2650    fn ref_def_at_eof() {
2651        let test_str = "[test]:\\";
2652        let expected = "";
2653
2654        let mut buf = String::new();
2655        crate::html::push_html(&mut buf, Parser::new(test_str));
2656        assert_eq!(expected, buf);
2657    }
2658
2659    #[cfg(feature = "html")]
2660    #[test]
2661    fn ref_def_cr_lf() {
2662        let test_str = "[a]: /u\r\n\n[a]";
2663        let expected = "<p><a href=\"/u\">a</a></p>\n";
2664
2665        let mut buf = String::new();
2666        crate::html::push_html(&mut buf, Parser::new(test_str));
2667        assert_eq!(expected, buf);
2668    }
2669
2670    #[cfg(feature = "html")]
2671    #[test]
2672    fn no_dest_refdef() {
2673        let test_str = "[a]:";
2674        let expected = "<p>[a]:</p>\n";
2675
2676        let mut buf = String::new();
2677        crate::html::push_html(&mut buf, Parser::new(test_str));
2678        assert_eq!(expected, buf);
2679    }
2680
2681    #[test]
2682    fn broken_links_called_only_once() {
2683        for &(markdown, expected) in &[
2684            ("See also [`g()`][crate::g].", 1),
2685            ("See also [`g()`][crate::g][].", 1),
2686            ("[brokenlink1] some other node [brokenlink2]", 2),
2687        ] {
2688            let mut times_called = 0;
2689            let callback = &mut |_broken_link: BrokenLink| {
2690                times_called += 1;
2691                None
2692            };
2693            let parser =
2694                Parser::new_with_broken_link_callback(markdown, Options::empty(), Some(callback));
2695            for _ in parser {}
2696            assert_eq!(times_called, expected);
2697        }
2698    }
2699
2700    #[test]
2701    fn simple_broken_link_callback() {
2702        let test_str = "This is a link w/o def: [hello][world]";
2703        let mut callback = |broken_link: BrokenLink| {
2704            assert_eq!("world", broken_link.reference.as_ref());
2705            assert_eq!(&test_str[broken_link.span], "[hello][world]");
2706            let url = "YOLO".into();
2707            let title = "SWAG".to_owned().into();
2708            Some((url, title))
2709        };
2710        let parser =
2711            Parser::new_with_broken_link_callback(test_str, Options::empty(), Some(&mut callback));
2712        let mut link_tag_count = 0;
2713        for (typ, url, title, id) in parser.filter_map(|event| match event {
2714            Event::Start(tag) => match tag {
2715                Tag::Link {
2716                    link_type,
2717                    dest_url,
2718                    title,
2719                    id,
2720                } => Some((link_type, dest_url, title, id)),
2721                _ => None,
2722            },
2723            _ => None,
2724        }) {
2725            link_tag_count += 1;
2726            assert_eq!(typ, LinkType::ReferenceUnknown);
2727            assert_eq!(url.as_ref(), "YOLO");
2728            assert_eq!(title.as_ref(), "SWAG");
2729            assert_eq!(id.as_ref(), "world");
2730        }
2731        assert!(link_tag_count > 0);
2732    }
2733
2734    #[test]
2735    fn code_block_kind_check_fenced() {
2736        let parser = Parser::new("hello\n```test\ntadam\n```");
2737        let mut found = 0;
2738        for (ev, _range) in parser.into_offset_iter() {
2739            match ev {
2740                Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(syntax))) => {
2741                    assert_eq!(syntax.as_ref(), "test");
2742                    found += 1;
2743                }
2744                _ => {}
2745            }
2746        }
2747        assert_eq!(found, 1);
2748    }
2749
2750    #[test]
2751    fn code_block_kind_check_indented() {
2752        let parser = Parser::new("hello\n\n    ```test\n    tadam\nhello");
2753        let mut found = 0;
2754        for (ev, _range) in parser.into_offset_iter() {
2755            match ev {
2756                Event::Start(Tag::CodeBlock(CodeBlockKind::Indented)) => {
2757                    found += 1;
2758                }
2759                _ => {}
2760            }
2761        }
2762        assert_eq!(found, 1);
2763    }
2764
2765    #[test]
2766    fn ref_defs() {
2767        let input = r###"[a B c]: http://example.com
2768[another]: https://google.com
2769
2770text
2771
2772[final ONE]: http://wikipedia.org
2773"###;
2774        let mut parser = Parser::new(input);
2775
2776        assert!(parser.reference_definitions().get("a b c").is_some());
2777        assert!(parser.reference_definitions().get("nope").is_none());
2778
2779        if let Some(_event) = parser.next() {
2780            // testing keys with shorter lifetimes than parser and its input
2781            let s = "final one".to_owned();
2782            let link_def = parser.reference_definitions().get(&s).unwrap();
2783            let span = &input[link_def.span.clone()];
2784            assert_eq!(span, "[final ONE]: http://wikipedia.org");
2785        }
2786    }
2787
2788    #[test]
2789    fn common_lifetime_patterns_allowed<'b>() {
2790        let temporary_str = String::from("xyz");
2791
2792        // NOTE: this is a limitation of Rust, it doesn't allow putting lifetime parameters on the closure itself.
2793        // Hack it by attaching the lifetime to the test function instead.
2794        // TODO: why is the `'b` lifetime required at all? Changing it to `'_` breaks things :(
2795        let mut closure = |link: BrokenLink<'b>| Some(("#".into(), link.reference));
2796
2797        fn function(link: BrokenLink<'_>) -> Option<(CowStr<'_>, CowStr<'_>)> {
2798            Some(("#".into(), link.reference))
2799        }
2800
2801        for _ in Parser::new_with_broken_link_callback(
2802            "static lifetime",
2803            Options::empty(),
2804            Some(&mut closure),
2805        ) {}
2806        /* This fails to compile. Because the closure can't say `for <'a> fn(BrokenLink<'a>) ->
2807         * CowStr<'a>` and has to use the enclosing `'b` lifetime parameter, `temporary_str` lives
2808         * shorter than `'b`. I think this is unlikely to occur in real life, and if it does, the
2809         * fix is simple: move it out to a function that allows annotating the lifetimes.
2810         */
2811        //for _ in Parser::new_with_broken_link_callback(&temporary_str, Options::empty(), Some(&mut callback)) {
2812        //}
2813
2814        for _ in Parser::new_with_broken_link_callback(
2815            "static lifetime",
2816            Options::empty(),
2817            Some(&mut function),
2818        ) {}
2819        for _ in Parser::new_with_broken_link_callback(
2820            &temporary_str,
2821            Options::empty(),
2822            Some(&mut function),
2823        ) {}
2824    }
2825
2826    #[test]
2827    fn inline_html_inside_blockquote() {
2828        // Regression for #960
2829        let input = "> <foo\n> bar>";
2830        let events: Vec<_> = Parser::new(input).collect();
2831        let expected = [
2832            Event::Start(Tag::BlockQuote(None)),
2833            Event::Start(Tag::Paragraph),
2834            Event::InlineHtml(CowStr::Boxed("<foo\nbar>".to_string().into())),
2835            Event::End(TagEnd::Paragraph),
2836            Event::End(TagEnd::BlockQuote(None)),
2837        ];
2838        assert_eq!(&events, &expected);
2839    }
2840
2841    #[test]
2842    fn wikilink_has_pothole() {
2843        let input = "[[foo]] [[bar|baz]]";
2844        let events: Vec<_> = Parser::new_ext(input, Options::ENABLE_WIKILINKS).collect();
2845        let expected = [
2846            Event::Start(Tag::Paragraph),
2847            Event::Start(Tag::Link {
2848                link_type: LinkType::WikiLink { has_pothole: false },
2849                dest_url: CowStr::Borrowed("foo"),
2850                title: CowStr::Borrowed(""),
2851                id: CowStr::Borrowed(""),
2852            }),
2853            Event::Text(CowStr::Borrowed("foo")),
2854            Event::End(TagEnd::Link),
2855            Event::Text(CowStr::Borrowed(" ")),
2856            Event::Start(Tag::Link {
2857                link_type: LinkType::WikiLink { has_pothole: true },
2858                dest_url: CowStr::Borrowed("bar"),
2859                title: CowStr::Borrowed(""),
2860                id: CowStr::Borrowed(""),
2861            }),
2862            Event::Text(CowStr::Borrowed("baz")),
2863            Event::End(TagEnd::Link),
2864            Event::End(TagEnd::Paragraph),
2865        ];
2866        assert_eq!(&events, &expected);
2867    }
2868}