equt_md/
parse.rs

1// Copyright 2017 Google Inc. All rights reserved.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19// THE SOFTWARE.
20
21//! Tree-based two pass parser.
22
23use std::cmp::{max, min};
24use std::collections::{HashMap, VecDeque};
25use std::ops::{Index, Range};
26
27use unicase::UniCase;
28
29use crate::linklabel::{scan_link_label_rest, LinkLabel, ReferenceLabel};
30use crate::scanners::*;
31use crate::strings::CowStr;
32use crate::tree::{Tree, TreeIndex, TreePointer};
33
34// Allowing arbitrary depth nested parentheses inside link destinations
35// can create denial of service vulnerabilities if we're not careful.
36// The simplest countermeasure is to limit their depth, which is
37// explicitly allowed by the spec as long as the limit is at least 3:
38// https://spec.commonmark.org/0.29/#link-destination
39const LINK_MAX_NESTED_PARENS: usize = 5;
40
41/// Codeblock kind.
42#[derive(Clone, Debug, PartialEq)]
43pub enum CodeBlockKind<'a> {
44    Indented,
45    /// The value contained in the tag describes the language of the code, which may be empty.
46    Fenced(CowStr<'a>),
47}
48
49impl<'a> CodeBlockKind<'a> {
50    pub fn is_indented(&self) -> bool {
51        match *self {
52            CodeBlockKind::Indented => true,
53            _ => false,
54        }
55    }
56
57    pub fn is_fenced(&self) -> bool {
58        match *self {
59            CodeBlockKind::Fenced(_) => true,
60            _ => false,
61        }
62    }
63}
64
65/// Tags for elements that can contain other elements.
66#[derive(Clone, Debug, PartialEq)]
67pub enum Tag<'a> {
68    /// A paragraph of text and other inline elements.
69    Paragraph,
70
71    /// A heading. The field indicates the level of the heading.
72    Heading(u32),
73
74    BlockQuote,
75    /// A code block.
76    CodeBlock(CodeBlockKind<'a>),
77    /// Display Math
78    DisplayMath,
79
80    /// A list. If the list is ordered the field indicates the number of the first item.
81    /// Contains only list items.
82    List(Option<u64>), // TODO: add delim and tight for ast (not needed for html)
83    /// A list item.
84    Item,
85    /// A footnote definition. The value contained is the footnote's label by which it can
86    /// be referred to.
87    FootnoteDefinition(CowStr<'a>),
88
89    /// A table. Contains a vector describing the text-alignment for each of its columns.
90    Table(Vec<Alignment>),
91    /// A table header. Contains only `TableRow`s. Note that the table body starts immediately
92    /// after the closure of the `TableHead` tag. There is no `TableBody` tag.
93    TableHead,
94    /// A table row. Is used both for header rows as body rows. Contains only `TableCell`s.
95    TableRow,
96    TableCell,
97
98    // span-level tags
99    Emphasis,
100    Strong,
101    Strikethrough,
102
103    /// A link. The first field is the link type, the second the destination URL and the third is a title.
104    Link(LinkType, CowStr<'a>, CowStr<'a>),
105
106    /// An image. The first field is the link type, the second the destination URL and the third is a title.
107    Image(LinkType, CowStr<'a>, CowStr<'a>),
108}
109
110/// Type specifier for inline links. See [the Tag::Link](enum.Tag.html#variant.Link) for more information.
111#[derive(Clone, Debug, PartialEq, Copy)]
112pub enum LinkType {
113    /// Inline link like `[foo](bar)`
114    Inline,
115    /// Reference link like `[foo][bar]`
116    Reference,
117    /// Reference without destination in the document, but resolved by the broken_link_callback
118    ReferenceUnknown,
119    /// Collapsed link like `[foo][]`
120    Collapsed,
121    /// Collapsed link without destination in the document, but resolved by the broken_link_callback
122    CollapsedUnknown,
123    /// Shortcut link like `[foo]`
124    Shortcut,
125    /// Shortcut without destination in the document, but resolved by the broken_link_callback
126    ShortcutUnknown,
127    /// Autolink like `<http://foo.bar/baz>`
128    Autolink,
129    /// Email address in autolink like `<john@example.org>`
130    Email,
131}
132
133impl LinkType {
134    fn to_unknown(self) -> Self {
135        match self {
136            LinkType::Reference => LinkType::ReferenceUnknown,
137            LinkType::Collapsed => LinkType::CollapsedUnknown,
138            LinkType::Shortcut => LinkType::ShortcutUnknown,
139            _ => unreachable!(),
140        }
141    }
142}
143
144/// Markdown events that are generated in a preorder traversal of the document
145/// tree, with additional `End` events whenever all of an inner node's children
146/// have been visited.
147#[derive(Clone, Debug, PartialEq)]
148pub enum Event<'a> {
149    /// Start of a tagged element. Events that are yielded after this event
150    /// and before its corresponding `End` event are inside this element.
151    /// Start and end events are guaranteed to be balanced.
152    Start(Tag<'a>),
153    /// End of a tagged element.
154    End(Tag<'a>),
155    /// Frontmatter block.
156    Frontmatter(CowStr<'a>),
157    /// A text node.
158    Text(CowStr<'a>),
159    /// An inline math.
160    Math(CowStr<'a>),
161    /// An inline code node.
162    Code(CowStr<'a>),
163    /// An HTML node.
164    Html(CowStr<'a>),
165    /// A reference to a footnote with given label, which may or may not be defined
166    /// by an event with a `Tag::FootnoteDefinition` tag. Definitions and references to them may
167    /// occur in any order.
168    FootnoteReference(CowStr<'a>),
169    /// A soft line break.
170    SoftBreak,
171    /// A hard line break.
172    HardBreak,
173    /// A horizontal ruler.
174    Rule,
175    /// A task list marker, rendered as a checkbox in HTML. Contains a true when it is checked.
176    TaskListMarker(bool),
177}
178
179/// Table column text alignment.
180#[derive(Copy, Clone, Debug, PartialEq)]
181pub enum Alignment {
182    /// Default text alignment.
183    None,
184    Left,
185    Center,
186    Right,
187}
188
189bitflags! {
190    /// Option struct containing flags for enabling extra features
191    /// that are not part of the CommonMark spec.
192    pub struct Options: u32 {
193        const ENABLE_TABLES = 1 << 1;
194        const ENABLE_FOOTNOTES = 1 << 2;
195        const ENABLE_STRIKETHROUGH = 1 << 3;
196        const ENABLE_TASKLISTS = 1 << 4;
197        const ENABLE_FRONTMATTER = 1 << 5;
198        const ENABLE_MATH = 1 << 6;
199    }
200}
201
202#[derive(Debug, Default, Clone, Copy)]
203struct Item {
204    start: usize,
205    end: usize,
206    body: ItemBody,
207}
208
209#[derive(Debug, PartialEq, Clone, Copy)]
210enum ItemBody {
211    Frontmatter(CowIndex),
212    Paragraph,
213    Text,
214    SoftBreak,
215    HardBreak,
216
217    // These are possible inline items, need to be resolved in second pass.
218
219    // repeats, can_open, can_close
220    MaybeEmphasis(usize, bool, bool),
221    MaybeCode(usize, bool), // number of backticks, preceeded by backslash
222    MaybeMath(usize, bool),
223    MaybeHtml,
224    MaybeLinkOpen,
225    MaybeLinkClose,
226    MaybeImage,
227
228    // These are inline items after resolution.
229    Emphasis,
230    Strong,
231    Strikethrough,
232    Code(CowIndex),
233    Math(CowIndex),
234    Link(LinkIndex),
235    Image(LinkIndex),
236    FootnoteReference(CowIndex),
237    TaskListMarker(bool), // true for checked
238
239    Rule,
240    Heading(u32), // heading level
241    FencedCodeBlock(CowIndex),
242    DisplayMath,
243    IndentCodeBlock,
244    Html,
245    BlockQuote,
246    List(bool, u8, u64), // is_tight, list character, list start index
247    ListItem(usize),     // indent level
248    SynthesizeText(CowIndex),
249    FootnoteDefinition(CowIndex),
250
251    // Tables
252    Table(AlignmentIndex),
253    TableHead,
254    TableRow,
255    TableCell,
256
257    // Dummy node at the top of the tree - should not be used otherwise!
258    Root,
259}
260
261impl<'a> ItemBody {
262    fn is_inline(&self) -> bool {
263        match *self {
264            ItemBody::MaybeEmphasis(..)
265            | ItemBody::MaybeHtml
266            | ItemBody::MaybeCode(..)
267            | ItemBody::MaybeMath(..)
268            | ItemBody::MaybeLinkOpen
269            | ItemBody::MaybeLinkClose
270            | ItemBody::MaybeImage => true,
271            _ => false,
272        }
273    }
274}
275
276impl<'a> Default for ItemBody {
277    fn default() -> Self {
278        ItemBody::Root
279    }
280}
281
282/// Scanning modes for `Parser`'s `parse_line` method.
283#[derive(PartialEq, Eq, Copy, Clone)]
284enum TableParseMode {
285    /// Inside a paragraph, scanning for table headers.
286    Scan,
287    /// Inside a table.
288    Active,
289    /// Inside a paragraph, not scanning for table headers.
290    Disabled,
291}
292
293/// State for the first parsing pass.
294///
295/// The first pass resolves all block structure, generating an AST. Within a block, items
296/// are in a linear chain with potential inline markup identified.
297struct FirstPass<'a> {
298    text: &'a str,
299    tree: Tree<Item>,
300    begin_list_item: bool,
301    last_line_blank: bool,
302    allocs: Allocations<'a>,
303    options: Options,
304    list_nesting: usize,
305}
306
307impl<'a> FirstPass<'a> {
308    fn new(text: &'a str, options: Options) -> FirstPass {
309        // This is a very naive heuristic for the number of nodes
310        // we'll need.
311        let start_capacity = max(128, text.len() / 32);
312        let tree = Tree::with_capacity(start_capacity);
313        let begin_list_item = false;
314        let last_line_blank = false;
315        let allocs = Allocations::new();
316        FirstPass {
317            text,
318            tree,
319            begin_list_item,
320            last_line_blank,
321            allocs,
322            options,
323            list_nesting: 0,
324        }
325    }
326
327    fn run(mut self) -> (Tree<Item>, Allocations<'a>) {
328        let mut ix = 0;
329        if ix < self.text.len() && self.options.contains(Options::ENABLE_FRONTMATTER) {
330            ix = self.parse_frontmatter();
331        }
332        while ix < self.text.len() {
333            ix = self.parse_block(ix);
334        }
335        for _ in 0..self.tree.spine_len() {
336            self.pop(ix);
337        }
338        (self.tree, self.allocs)
339    }
340
341    /// Parse a frontmatter at the beginnning. It should be called at and only at the
342    /// beginnning of the parsing.
343    fn parse_frontmatter(&mut self) -> usize {
344        let bytes = self.text.as_bytes();
345        if let Some(n) = scan_frontmatter_delimiter(bytes) {
346            let mut start_ix = n;
347            let start = start_ix;
348            let mut empty = true;
349            loop {
350                if let Some(n) = scan_frontmatter_delimiter(&bytes[start_ix..]) {
351                    if empty {
352                        return 0;
353                    }
354                    self.tree.append(Item {
355                        start,
356                        end: start_ix,
357                        body: ItemBody::Frontmatter(
358                            self.allocs.allocate_cow(self.text[start..start_ix].into()),
359                        ),
360                    });
361                    return start_ix + n;
362                } else {
363                    empty = false;
364                    start_ix += scan_nextline(&bytes[start_ix..]);
365                    if start_ix >= bytes.len() {
366                        return 0;
367                    }
368                }
369            }
370        } else {
371            0
372        }
373    }
374
375    /// Returns offset after block.
376    fn parse_block(&mut self, mut start_ix: usize) -> usize {
377        let bytes = self.text.as_bytes();
378        let mut line_start = LineStart::new(&bytes[start_ix..]);
379
380        let i = scan_containers(&self.tree, &mut line_start);
381        for _ in i..self.tree.spine_len() {
382            self.pop(start_ix);
383        }
384
385        if self.options.contains(Options::ENABLE_FOOTNOTES) {
386            // finish footnote if it's still open and was preceeded by blank line
387            if let Some(node_ix) = self.tree.peek_up() {
388                if let ItemBody::FootnoteDefinition(..) = self.tree[node_ix].item.body {
389                    if self.last_line_blank {
390                        self.pop(start_ix);
391                    }
392                }
393            }
394
395            // Footnote definitions of the form
396            // [^bar]:
397            // * anything really
398            let container_start = start_ix + line_start.bytes_scanned();
399            if let Some(bytecount) = self.parse_footnote(container_start) {
400                start_ix = container_start + bytecount;
401                start_ix += scan_blank_line(&bytes[start_ix..]).unwrap_or(0);
402                line_start = LineStart::new(&bytes[start_ix..]);
403            }
404        }
405
406        // Process new containers
407        loop {
408            let container_start = start_ix + line_start.bytes_scanned();
409            if let Some((ch, index, indent)) = line_start.scan_list_marker() {
410                let after_marker_index = start_ix + line_start.bytes_scanned();
411                self.continue_list(container_start, ch, index);
412                self.tree.append(Item {
413                    start: container_start,
414                    end: after_marker_index, // will get updated later if item not empty
415                    body: ItemBody::ListItem(indent),
416                });
417                self.tree.push();
418                if let Some(n) = scan_blank_line(&bytes[after_marker_index..]) {
419                    self.begin_list_item = true;
420                    return after_marker_index + n;
421                }
422                if self.options.contains(Options::ENABLE_TASKLISTS) {
423                    if let Some(is_checked) = line_start.scan_task_list_marker() {
424                        self.tree.append(Item {
425                            start: after_marker_index,
426                            end: start_ix + line_start.bytes_scanned(),
427                            body: ItemBody::TaskListMarker(is_checked),
428                        });
429                    }
430                }
431            } else if line_start.scan_blockquote_marker() {
432                self.finish_list(start_ix);
433                self.tree.append(Item {
434                    start: container_start,
435                    end: 0, // will get set later
436                    body: ItemBody::BlockQuote,
437                });
438                self.tree.push();
439            } else {
440                break;
441            }
442        }
443
444        let ix = start_ix + line_start.bytes_scanned();
445
446        if let Some(n) = scan_blank_line(&bytes[ix..]) {
447            if let Some(node_ix) = self.tree.peek_up() {
448                match self.tree[node_ix].item.body {
449                    ItemBody::BlockQuote => (),
450                    _ => {
451                        if self.begin_list_item {
452                            // A list item can begin with at most one blank line.
453                            self.pop(start_ix);
454                        }
455                        self.last_line_blank = true;
456                    }
457                }
458            }
459            return ix + n;
460        }
461
462        self.begin_list_item = false;
463        self.finish_list(start_ix);
464
465        // Save `remaining_space` here to avoid needing to backtrack `line_start` for HTML blocks
466        let remaining_space = line_start.remaining_space();
467
468        let indent = line_start.scan_space_upto(4);
469        if indent == 4 {
470            let ix = start_ix + line_start.bytes_scanned();
471            let remaining_space = line_start.remaining_space();
472            return self.parse_indented_code_block(ix, remaining_space);
473        }
474
475        let ix = start_ix + line_start.bytes_scanned();
476
477        // HTML Blocks
478        if bytes[ix] == b'<' {
479            // Types 1-5 are all detected by one function and all end with the same
480            // pattern
481            if let Some(html_end_tag) = get_html_end_tag(&bytes[(ix + 1)..]) {
482                return self.parse_html_block_type_1_to_5(ix, html_end_tag, remaining_space);
483            }
484
485            // Detect type 6
486            let possible_tag = scan_html_block_tag(&bytes[(ix + 1)..]).1;
487            if is_html_tag(possible_tag) {
488                return self.parse_html_block_type_6_or_7(ix, remaining_space);
489            }
490
491            // Detect type 7
492            if let Some(_html_bytes) = scan_html_type_7(&bytes[(ix + 1)..]) {
493                return self.parse_html_block_type_6_or_7(ix, remaining_space);
494            }
495        }
496
497        if let Ok(n) = scan_hrule(&bytes[ix..]) {
498            return self.parse_hrule(n, ix);
499        }
500
501        if let Some(atx_size) = scan_atx_heading(&bytes[ix..]) {
502            return self.parse_atx_heading(ix, atx_size);
503        }
504
505        // parse refdef
506        if let Some((bytecount, label, link_def)) = self.parse_refdef_total(ix) {
507            self.allocs.refdefs.entry(label).or_insert(link_def);
508            let ix = ix + bytecount;
509            // try to read trailing whitespace or it will register as a completely blank line
510            // TODO: shouldn't we do this for all block level items?
511            return ix + scan_blank_line(&bytes[ix..]).unwrap_or(0);
512        }
513
514        if let Some((n, fence_ch)) = scan_code_fence(&bytes[ix..]) {
515            return self.parse_fenced_code_block(ix, indent, fence_ch, n);
516        }
517        if scan_display_math(&bytes[ix..]) && self.options.contains(Options::ENABLE_MATH) {
518            return self.parse_display_math(ix);
519        }
520        self.parse_paragraph(ix)
521    }
522
523    /// Returns the offset of the first line after the table.
524    /// Assumptions: current focus is a table element and the table header
525    /// matches the separator line (same number of columns).
526    fn parse_table(&mut self, table_cols: usize, head_start: usize, body_start: usize) -> usize {
527        // parse header. this shouldn't fail because we made sure the table header is ok
528        let (_sep_start, thead_ix) = self.parse_table_row_inner(head_start, table_cols);
529        self.tree[thead_ix].item.body = ItemBody::TableHead;
530
531        // parse body
532        let mut ix = body_start;
533        while let Some((next_ix, _row_ix)) = self.parse_table_row(ix, table_cols) {
534            ix = next_ix;
535        }
536
537        self.pop(ix);
538        ix
539    }
540
541    /// Call this when containers are taken care of.
542    /// Returns bytes scanned, row_ix
543    fn parse_table_row_inner(&mut self, mut ix: usize, row_cells: usize) -> (usize, TreeIndex) {
544        let bytes = self.text.as_bytes();
545        let mut cells = 0;
546        let mut final_cell_ix = None;
547
548        let row_ix = self.tree.append(Item {
549            start: ix,
550            end: 0, // set at end of this function
551            body: ItemBody::TableRow,
552        });
553        self.tree.push();
554
555        loop {
556            ix += scan_ch(&bytes[ix..], b'|');
557            ix += scan_whitespace_no_nl(&bytes[ix..]);
558
559            if let Some(eol_bytes) = scan_eol(&bytes[ix..]) {
560                ix += eol_bytes;
561                break;
562            }
563
564            let cell_ix = self.tree.append(Item {
565                start: ix,
566                end: ix,
567                body: ItemBody::TableCell,
568            });
569            self.tree.push();
570            let (next_ix, _brk) = self.parse_line(ix, TableParseMode::Active);
571            let trailing_whitespace = scan_rev_while(&bytes[..next_ix], is_ascii_whitespace);
572
573            if let TreePointer::Valid(cur_ix) = self.tree.cur() {
574                self.tree[cur_ix].item.end -= trailing_whitespace;
575            }
576
577            self.tree[cell_ix].item.end = next_ix - trailing_whitespace;
578            self.tree.pop();
579
580            ix = next_ix;
581            cells += 1;
582
583            if cells == row_cells {
584                final_cell_ix = Some(cell_ix);
585            }
586        }
587
588        // fill empty cells if needed
589        // note: this is where GFM and commonmark-extra diverge. we follow
590        // GFM here
591        for _ in cells..row_cells {
592            self.tree.append(Item {
593                start: ix,
594                end: ix,
595                body: ItemBody::TableCell,
596            });
597        }
598
599        // drop excess cells
600        if let Some(cell_ix) = final_cell_ix {
601            self.tree[cell_ix].next = TreePointer::Nil;
602        }
603
604        self.pop(ix);
605
606        (ix, row_ix)
607    }
608
609    /// Returns first offset after the row and the tree index of the row.
610    fn parse_table_row(&mut self, mut ix: usize, row_cells: usize) -> Option<(usize, TreeIndex)> {
611        let bytes = self.text.as_bytes();
612        let mut line_start = LineStart::new(&bytes[ix..]);
613        let containers = scan_containers(&self.tree, &mut line_start);
614        if containers != self.tree.spine_len() {
615            return None;
616        }
617        line_start.scan_all_space();
618        ix += line_start.bytes_scanned();
619        if scan_paragraph_interrupt(&bytes[ix..]) {
620            return None;
621        }
622
623        let (ix, row_ix) = self.parse_table_row_inner(ix, row_cells);
624        Some((ix, row_ix))
625    }
626
627    /// Returns offset of line start after paragraph.
628    fn parse_paragraph(&mut self, start_ix: usize) -> usize {
629        let node_ix = self.tree.append(Item {
630            start: start_ix,
631            end: 0, // will get set later
632            body: ItemBody::Paragraph,
633        });
634        self.tree.push();
635        let bytes = self.text.as_bytes();
636
637        let mut ix = start_ix;
638        loop {
639            let scan_mode = if self.options.contains(Options::ENABLE_TABLES) && ix == start_ix {
640                TableParseMode::Scan
641            } else {
642                TableParseMode::Disabled
643            };
644            let (next_ix, brk) = self.parse_line(ix, scan_mode);
645
646            // break out when we find a table
647            if let Some(Item {
648                body: ItemBody::Table(alignment_ix),
649                start,
650                end,
651            }) = brk
652            {
653                let table_cols = self.allocs[alignment_ix].len();
654                self.tree[node_ix].item = Item {
655                    body: ItemBody::Table(alignment_ix),
656                    start,
657                    end,
658                };
659                // this clears out any stuff we may have appended - but there may
660                // be a cleaner way
661                self.tree[node_ix].child = TreePointer::Nil;
662                self.tree.pop();
663                self.tree.push();
664                return self.parse_table(table_cols, ix, next_ix);
665            }
666
667            ix = next_ix;
668            let mut line_start = LineStart::new(&bytes[ix..]);
669            let n_containers = scan_containers(&self.tree, &mut line_start);
670            if !line_start.scan_space(4) {
671                let ix_new = ix + line_start.bytes_scanned();
672                if n_containers == self.tree.spine_len() {
673                    if let Some(ix_setext) = self.parse_setext_heading(ix_new, node_ix) {
674                        if let Some(Item {
675                            start,
676                            body: ItemBody::HardBreak,
677                            ..
678                        }) = brk
679                        {
680                            if bytes[start] == b'\\' {
681                                self.tree.append_text(start, start + 1);
682                            }
683                        }
684                        ix = ix_setext;
685                        break;
686                    }
687                }
688                // first check for non-empty lists, then for other interrupts
689                let suffix = &bytes[ix_new..];
690                if self.interrupt_paragraph_by_list(suffix) || scan_paragraph_interrupt(suffix) {
691                    break;
692                }
693            }
694            line_start.scan_all_space();
695            if line_start.is_at_eol() {
696                break;
697            }
698            ix = next_ix + line_start.bytes_scanned();
699            if let Some(item) = brk {
700                self.tree.append(item);
701            }
702        }
703
704        self.pop(ix);
705        ix
706    }
707
708    /// Returns end ix of setext_heading on success.
709    fn parse_setext_heading(&mut self, ix: usize, node_ix: TreeIndex) -> Option<usize> {
710        let bytes = self.text.as_bytes();
711        let (n, level) = scan_setext_heading(&bytes[ix..])?;
712        self.tree[node_ix].item.body = ItemBody::Heading(level);
713
714        // strip trailing whitespace
715        if let TreePointer::Valid(cur_ix) = self.tree.cur() {
716            self.tree[cur_ix].item.end -= scan_rev_while(
717                &bytes[..self.tree[cur_ix].item.end],
718                is_ascii_whitespace_no_nl,
719            );
720        }
721
722        Some(ix + n)
723    }
724
725    /// Parse a line of input, appending text and items to tree.
726    ///
727    /// Returns: index after line and an item representing the break.
728    fn parse_line(&mut self, start: usize, mode: TableParseMode) -> (usize, Option<Item>) {
729        let bytes = &self.text.as_bytes();
730        let mut pipes = 0;
731        let mut last_pipe_ix = start;
732        let mut begin_text = start;
733
734        let (final_ix, brk) = iterate_special_bytes(bytes, start, |ix, byte| {
735            match byte {
736                b'\n' | b'\r' => {
737                    if let TableParseMode::Active = mode {
738                        return LoopInstruction::BreakAtWith(ix, None);
739                    }
740
741                    let mut i = ix;
742                    let eol_bytes = scan_eol(&bytes[ix..]).unwrap();
743                    if mode == TableParseMode::Scan && pipes > 0 {
744                        // check if we may be parsing a table
745                        let next_line_ix = ix + eol_bytes;
746                        let mut line_start = LineStart::new(&bytes[next_line_ix..]);
747                        if scan_containers(&self.tree, &mut line_start) == self.tree.spine_len() {
748                            let table_head_ix = next_line_ix + line_start.bytes_scanned();
749                            let (table_head_bytes, alignment) =
750                                scan_table_head(&bytes[table_head_ix..]);
751
752                            if table_head_bytes > 0 {
753                                // computing header count from number of pipes
754                                let header_count =
755                                    count_header_cols(bytes, pipes, start, last_pipe_ix);
756
757                                // make sure they match the number of columns we find in separator line
758                                if alignment.len() == header_count {
759                                    let alignment_ix = self.allocs.allocate_alignment(alignment);
760                                    let end_ix = table_head_ix + table_head_bytes;
761                                    return LoopInstruction::BreakAtWith(
762                                        end_ix,
763                                        Some(Item {
764                                            start: i,
765                                            end: end_ix, // must update later
766                                            body: ItemBody::Table(alignment_ix),
767                                        }),
768                                    );
769                                }
770                            }
771                        }
772                    }
773
774                    let end_ix = ix + eol_bytes;
775                    let trailing_backslashes = scan_rev_while(&bytes[..ix], |b| b == b'\\');
776                    if trailing_backslashes % 2 == 1 && end_ix < self.text.len() {
777                        i -= 1;
778                        self.tree.append_text(begin_text, i);
779                        return LoopInstruction::BreakAtWith(
780                            end_ix,
781                            Some(Item {
782                                start: i,
783                                end: end_ix,
784                                body: ItemBody::HardBreak,
785                            }),
786                        );
787                    }
788                    let trailing_whitespace =
789                        scan_rev_while(&bytes[..ix], is_ascii_whitespace_no_nl);
790                    if trailing_whitespace >= 2 {
791                        i -= trailing_whitespace;
792                        self.tree.append_text(begin_text, i);
793                        return LoopInstruction::BreakAtWith(
794                            end_ix,
795                            Some(Item {
796                                start: i,
797                                end: end_ix,
798                                body: ItemBody::HardBreak,
799                            }),
800                        );
801                    }
802
803                    self.tree.append_text(begin_text, ix);
804                    LoopInstruction::BreakAtWith(
805                        end_ix,
806                        Some(Item {
807                            start: i,
808                            end: end_ix,
809                            body: ItemBody::SoftBreak,
810                        }),
811                    )
812                }
813                b'\\' => {
814                    if ix + 1 < self.text.len() && is_ascii_punctuation(bytes[ix + 1]) {
815                        self.tree.append_text(begin_text, ix);
816                        // FIXME Escape $
817                        if bytes[ix + 1] == b'`' || bytes[ix + 1] == b'$' {
818                            let count = 1 + scan_ch_repeat(&bytes[(ix + 2)..], bytes[ix + 1]);
819                            self.tree.append(Item {
820                                start: ix + 1,
821                                end: ix + count + 1,
822                                body: if bytes[ix + 1] == b'`' {
823                                    ItemBody::MaybeCode(count, true)
824                                } else {
825                                    ItemBody::MaybeMath(count, true)
826                                },
827                            });
828                            begin_text = ix + 1 + count;
829                            LoopInstruction::ContinueAndSkip(count)
830                        } else {
831                            begin_text = ix + 1;
832                            LoopInstruction::ContinueAndSkip(1)
833                        }
834                    } else {
835                        LoopInstruction::ContinueAndSkip(0)
836                    }
837                }
838                c @ b'*' | c @ b'_' | c @ b'~' => {
839                    let string_suffix = &self.text[ix..];
840                    let count = 1 + scan_ch_repeat(&string_suffix.as_bytes()[1..], c);
841                    let can_open = delim_run_can_open(self.text, string_suffix, count, ix);
842                    let can_close = delim_run_can_close(self.text, string_suffix, count, ix);
843                    let is_valid_seq = c != b'~'
844                        || count == 2 && self.options.contains(Options::ENABLE_STRIKETHROUGH);
845
846                    if (can_open || can_close) && is_valid_seq {
847                        self.tree.append_text(begin_text, ix);
848                        for i in 0..count {
849                            self.tree.append(Item {
850                                start: ix + i,
851                                end: ix + i + 1,
852                                body: ItemBody::MaybeEmphasis(count - i, can_open, can_close),
853                            });
854                        }
855                        begin_text = ix + count;
856                    }
857                    LoopInstruction::ContinueAndSkip(count - 1)
858                }
859                b'$' => {
860                    let count = 1 + scan_ch_repeat(&bytes[(ix + 1)..], b'$');
861                    if self.options.contains(Options::ENABLE_MATH) {
862                        self.tree.append_text(begin_text, ix);
863                        self.tree.append(Item {
864                            start: ix,
865                            end: ix + count,
866                            body: ItemBody::MaybeMath(count, false),
867                        });
868                        begin_text = ix + count;
869                    }
870                    LoopInstruction::ContinueAndSkip(count - 1)
871                }
872                b'`' => {
873                    self.tree.append_text(begin_text, ix);
874                    let count = 1 + scan_ch_repeat(&bytes[(ix + 1)..], b'`');
875                    self.tree.append(Item {
876                        start: ix,
877                        end: ix + count,
878                        body: ItemBody::MaybeCode(count, false),
879                    });
880                    begin_text = ix + count;
881                    LoopInstruction::ContinueAndSkip(count - 1)
882                }
883                b'<' => {
884                    // Note: could detect some non-HTML cases and early escape here, but not
885                    // clear that's a win.
886                    self.tree.append_text(begin_text, ix);
887                    self.tree.append(Item {
888                        start: ix,
889                        end: ix + 1,
890                        body: ItemBody::MaybeHtml,
891                    });
892                    begin_text = ix + 1;
893                    LoopInstruction::ContinueAndSkip(0)
894                }
895                b'!' => {
896                    if ix + 1 < self.text.len() && bytes[ix + 1] == b'[' {
897                        self.tree.append_text(begin_text, ix);
898                        self.tree.append(Item {
899                            start: ix,
900                            end: ix + 2,
901                            body: ItemBody::MaybeImage,
902                        });
903                        begin_text = ix + 2;
904                        LoopInstruction::ContinueAndSkip(1)
905                    } else {
906                        LoopInstruction::ContinueAndSkip(0)
907                    }
908                }
909                b'[' => {
910                    self.tree.append_text(begin_text, ix);
911                    self.tree.append(Item {
912                        start: ix,
913                        end: ix + 1,
914                        body: ItemBody::MaybeLinkOpen,
915                    });
916                    begin_text = ix + 1;
917                    LoopInstruction::ContinueAndSkip(0)
918                }
919                b']' => {
920                    self.tree.append_text(begin_text, ix);
921                    self.tree.append(Item {
922                        start: ix,
923                        end: ix + 1,
924                        body: ItemBody::MaybeLinkClose,
925                    });
926                    begin_text = ix + 1;
927                    LoopInstruction::ContinueAndSkip(0)
928                }
929                b'&' => match scan_entity(&bytes[ix..]) {
930                    (n, Some(value)) => {
931                        self.tree.append_text(begin_text, ix);
932                        self.tree.append(Item {
933                            start: ix,
934                            end: ix + n,
935                            body: ItemBody::SynthesizeText(self.allocs.allocate_cow(value)),
936                        });
937                        begin_text = ix + n;
938                        LoopInstruction::ContinueAndSkip(n - 1)
939                    }
940                    _ => LoopInstruction::ContinueAndSkip(0),
941                },
942                b'|' => {
943                    if let TableParseMode::Active = mode {
944                        LoopInstruction::BreakAtWith(ix, None)
945                    } else {
946                        last_pipe_ix = ix;
947                        pipes += 1;
948                        LoopInstruction::ContinueAndSkip(0)
949                    }
950                }
951                _ => LoopInstruction::ContinueAndSkip(0),
952            }
953        });
954
955        if brk.is_none() {
956            // need to close text at eof
957            self.tree.append_text(begin_text, final_ix);
958        }
959        (final_ix, brk)
960    }
961
962    /// Check whether we should allow a paragraph interrupt by lists. Only non-empty
963    /// lists are allowed.
964    fn interrupt_paragraph_by_list(&self, suffix: &[u8]) -> bool {
965        scan_listitem(suffix).map_or(false, |(ix, delim, index, _)| {
966            self.list_nesting > 0 ||
967            // we don't allow interruption by either empty lists or
968            // numbered lists starting at an index other than 1
969            !scan_empty_list(&suffix[ix..]) && (delim == b'*' || delim == b'-' || index == 1)
970        })
971    }
972
973    /// When start_ix is at the beginning of an HTML block of type 1 to 5,
974    /// this will find the end of the block, adding the block itself to the
975    /// tree and also keeping track of the lines of HTML within the block.
976    ///
977    /// The html_end_tag is the tag that must be found on a line to end the block.
978    fn parse_html_block_type_1_to_5(
979        &mut self,
980        start_ix: usize,
981        html_end_tag: &str,
982        mut remaining_space: usize,
983    ) -> usize {
984        let bytes = self.text.as_bytes();
985        let mut ix = start_ix;
986        loop {
987            let line_start_ix = ix;
988            ix += scan_nextline(&bytes[ix..]);
989            self.append_html_line(remaining_space, line_start_ix, ix);
990
991            let mut line_start = LineStart::new(&bytes[ix..]);
992            let n_containers = scan_containers(&self.tree, &mut line_start);
993            if n_containers < self.tree.spine_len() {
994                break;
995            }
996
997            if (&self.text[line_start_ix..ix]).contains(html_end_tag) {
998                break;
999            }
1000
1001            let next_line_ix = ix + line_start.bytes_scanned();
1002            if next_line_ix == self.text.len() {
1003                break;
1004            }
1005            ix = next_line_ix;
1006            remaining_space = line_start.remaining_space();
1007        }
1008        ix
1009    }
1010
1011    /// When start_ix is at the beginning of an HTML block of type 6 or 7,
1012    /// this will consume lines until there is a blank line and keep track of
1013    /// the HTML within the block.
1014    fn parse_html_block_type_6_or_7(
1015        &mut self,
1016        start_ix: usize,
1017        mut remaining_space: usize,
1018    ) -> usize {
1019        let bytes = self.text.as_bytes();
1020        let mut ix = start_ix;
1021        loop {
1022            let line_start_ix = ix;
1023            ix += scan_nextline(&bytes[ix..]);
1024            self.append_html_line(remaining_space, line_start_ix, ix);
1025
1026            let mut line_start = LineStart::new(&bytes[ix..]);
1027            let n_containers = scan_containers(&self.tree, &mut line_start);
1028            if n_containers < self.tree.spine_len() || line_start.is_at_eol() {
1029                break;
1030            }
1031
1032            let next_line_ix = ix + line_start.bytes_scanned();
1033            if next_line_ix == self.text.len() || scan_blank_line(&bytes[next_line_ix..]).is_some()
1034            {
1035                break;
1036            }
1037            ix = next_line_ix;
1038            remaining_space = line_start.remaining_space();
1039        }
1040        ix
1041    }
1042
1043    fn parse_indented_code_block(&mut self, start_ix: usize, mut remaining_space: usize) -> usize {
1044        self.tree.append(Item {
1045            start: start_ix,
1046            end: 0, // will get set later
1047            body: ItemBody::IndentCodeBlock,
1048        });
1049        self.tree.push();
1050        let bytes = self.text.as_bytes();
1051        let mut last_nonblank_child = TreePointer::Nil;
1052        let mut last_nonblank_ix = 0;
1053        let mut end_ix = 0;
1054        let mut last_line_blank = false;
1055
1056        let mut ix = start_ix;
1057        loop {
1058            let line_start_ix = ix;
1059            ix += scan_nextline(&bytes[ix..]);
1060            self.append_code_text(remaining_space, line_start_ix, ix);
1061            // TODO(spec clarification): should we synthesize newline at EOF?
1062
1063            if !last_line_blank {
1064                last_nonblank_child = self.tree.cur();
1065                last_nonblank_ix = ix;
1066                end_ix = ix;
1067            }
1068
1069            let mut line_start = LineStart::new(&bytes[ix..]);
1070            let n_containers = scan_containers(&self.tree, &mut line_start);
1071            if n_containers < self.tree.spine_len()
1072                || !(line_start.scan_space(4) || line_start.is_at_eol())
1073            {
1074                break;
1075            }
1076            let next_line_ix = ix + line_start.bytes_scanned();
1077            if next_line_ix == self.text.len() {
1078                break;
1079            }
1080            ix = next_line_ix;
1081            remaining_space = line_start.remaining_space();
1082            last_line_blank = scan_blank_line(&bytes[ix..]).is_some();
1083        }
1084
1085        // Trim trailing blank lines.
1086        if let TreePointer::Valid(child) = last_nonblank_child {
1087            self.tree[child].next = TreePointer::Nil;
1088            self.tree[child].item.end = last_nonblank_ix;
1089        }
1090        self.pop(end_ix);
1091        ix
1092    }
1093
1094    fn parse_fenced_code_block(
1095        &mut self,
1096        start_ix: usize,
1097        indent: usize,
1098        fence_ch: u8,
1099        n_fence_char: usize,
1100    ) -> usize {
1101        let bytes = self.text.as_bytes();
1102        let mut info_start = start_ix + n_fence_char;
1103        info_start += scan_whitespace_no_nl(&bytes[info_start..]);
1104        // TODO: info strings are typically very short. wouldnt it be faster
1105        // to just do a forward scan here?
1106        let mut ix = info_start + scan_nextline(&bytes[info_start..]);
1107        let info_end = ix - scan_rev_while(&bytes[info_start..ix], is_ascii_whitespace);
1108        let info_string = unescape(&self.text[info_start..info_end]);
1109        self.tree.append(Item {
1110            start: start_ix,
1111            end: 0, // will get set later
1112            body: ItemBody::FencedCodeBlock(self.allocs.allocate_cow(info_string)),
1113        });
1114        self.tree.push();
1115        loop {
1116            let mut line_start = LineStart::new(&bytes[ix..]);
1117            let n_containers = scan_containers(&self.tree, &mut line_start);
1118            if n_containers < self.tree.spine_len() {
1119                break;
1120            }
1121            line_start.scan_space(indent);
1122            let mut close_line_start = line_start.clone();
1123            if !close_line_start.scan_space(4) {
1124                let close_ix = ix + close_line_start.bytes_scanned();
1125                if let Some(n) = scan_closing_code_fence(&bytes[close_ix..], fence_ch, n_fence_char)
1126                {
1127                    ix = close_ix + n;
1128                    break;
1129                }
1130            }
1131            let remaining_space = line_start.remaining_space();
1132            ix += line_start.bytes_scanned();
1133            let next_ix = ix + scan_nextline(&bytes[ix..]);
1134            self.append_code_text(remaining_space, ix, next_ix);
1135            ix = next_ix;
1136        }
1137
1138        self.pop(ix);
1139
1140        // try to read trailing whitespace or it will register as a completely blank line
1141        ix + scan_blank_line(&bytes[ix..]).unwrap_or(0)
1142    }
1143
1144    fn parse_display_math(&mut self, start_ix: usize) -> usize {
1145        let bytes = self.text.as_bytes();
1146        self.tree.append(Item {
1147            start: start_ix,
1148            end: 0, // TODO Never used?
1149            body: ItemBody::DisplayMath,
1150        });
1151        self.tree.push();
1152        let mut ix = start_ix + scan_nextline(&bytes[start_ix..]);
1153        loop {
1154            let mut line_start = LineStart::new(&bytes[ix..]);
1155            let n_containers = scan_containers(&self.tree, &mut line_start);
1156            if n_containers < self.tree.spine_len() {
1157                break;
1158            }
1159            if let Some(n) = scan_closing_display_math(&bytes[ix..]) {
1160                ix += n;
1161                break;
1162            }
1163            let next_ix = ix + scan_nextline(&bytes[ix..]);
1164            self.append_math_text(ix, next_ix);
1165            ix = next_ix;
1166        }
1167        self.pop(ix);
1168        ix + scan_blank_line(&bytes[ix..]).unwrap_or(0)
1169    }
1170
1171    fn append_code_text(&mut self, remaining_space: usize, start: usize, end: usize) {
1172        if remaining_space > 0 {
1173            let cow_ix = self.allocs.allocate_cow("   "[..remaining_space].into());
1174            self.tree.append(Item {
1175                start,
1176                end: start,
1177                body: ItemBody::SynthesizeText(cow_ix),
1178            });
1179        }
1180        if self.text.as_bytes()[end - 2] == b'\r' {
1181            // Normalize CRLF to LF
1182            self.tree.append_text(start, end - 2);
1183            self.tree.append_text(end - 1, end);
1184        } else {
1185            self.tree.append_text(start, end);
1186        }
1187    }
1188
1189    fn append_math_text(&mut self, start: usize, end: usize) {
1190        if self.text.as_bytes()[end - 2] == b'\r' {
1191            self.tree.append_text(start, end - 2);
1192            self.tree.append_text(end - 1, end);
1193        } else {
1194            self.tree.append_text(start, end);
1195        }
1196    }
1197
1198    /// Appends a line of HTML to the tree.
1199    fn append_html_line(&mut self, remaining_space: usize, start: usize, end: usize) {
1200        if remaining_space > 0 {
1201            let cow_ix = self.allocs.allocate_cow("   "[..remaining_space].into());
1202            self.tree.append(Item {
1203                start,
1204                end: start,
1205                // TODO: maybe this should synthesize to html rather than text?
1206                body: ItemBody::SynthesizeText(cow_ix),
1207            });
1208        }
1209        if self.text.as_bytes()[end - 2] == b'\r' {
1210            // Normalize CRLF to LF
1211            self.tree.append(Item {
1212                start,
1213                end: end - 2,
1214                body: ItemBody::Html,
1215            });
1216            self.tree.append(Item {
1217                start: end - 1,
1218                end,
1219                body: ItemBody::Html,
1220            });
1221        } else {
1222            self.tree.append(Item {
1223                start,
1224                end,
1225                body: ItemBody::Html,
1226            });
1227        }
1228    }
1229
1230    /// Pop a container, setting its end.
1231    fn pop(&mut self, ix: usize) {
1232        let cur_ix = self.tree.pop().unwrap();
1233        self.tree[cur_ix].item.end = ix;
1234        if let ItemBody::List(true, _, _) = self.tree[cur_ix].item.body {
1235            surgerize_tight_list(&mut self.tree, cur_ix);
1236        }
1237    }
1238
1239    /// Close a list if it's open. Also set loose if last line was blank
1240    fn finish_list(&mut self, ix: usize) {
1241        if let Some(node_ix) = self.tree.peek_up() {
1242            if let ItemBody::List(_, _, _) = self.tree[node_ix].item.body {
1243                self.pop(ix);
1244                self.list_nesting -= 1;
1245            }
1246        }
1247        if self.last_line_blank {
1248            if let Some(node_ix) = self.tree.peek_grandparent() {
1249                if let ItemBody::List(ref mut is_tight, _, _) = self.tree[node_ix].item.body {
1250                    *is_tight = false;
1251                }
1252            }
1253            self.last_line_blank = false;
1254        }
1255    }
1256
1257    /// Continue an existing list or start a new one if there's not an open
1258    /// list that matches.
1259    fn continue_list(&mut self, start: usize, ch: u8, index: u64) {
1260        if let Some(node_ix) = self.tree.peek_up() {
1261            if let ItemBody::List(ref mut is_tight, existing_ch, _) = self.tree[node_ix].item.body {
1262                if existing_ch == ch {
1263                    if self.last_line_blank {
1264                        *is_tight = false;
1265                        self.last_line_blank = false;
1266                    }
1267                    return;
1268                }
1269            }
1270            // TODO: this is not the best choice for end; maybe get end from last list item.
1271            self.finish_list(start);
1272        }
1273        self.tree.append(Item {
1274            start,
1275            end: 0, // will get set later
1276            body: ItemBody::List(true, ch, index),
1277        });
1278        self.list_nesting += 1;
1279        self.tree.push();
1280        self.last_line_blank = false;
1281    }
1282
1283    /// Parse a thematic break.
1284    ///
1285    /// Returns index of start of next line.
1286    fn parse_hrule(&mut self, hrule_size: usize, ix: usize) -> usize {
1287        self.tree.append(Item {
1288            start: ix,
1289            end: ix + hrule_size,
1290            body: ItemBody::Rule,
1291        });
1292        ix + hrule_size
1293    }
1294
1295    /// Parse an ATX heading.
1296    ///
1297    /// Returns index of start of next line.
1298    fn parse_atx_heading(&mut self, mut ix: usize, atx_size: usize) -> usize {
1299        let heading_ix = self.tree.append(Item {
1300            start: ix,
1301            end: 0, // set later
1302            body: ItemBody::Heading(atx_size as u32),
1303        });
1304        ix += atx_size;
1305        // next char is space or eol (guaranteed by scan_atx_heading)
1306        let bytes = self.text.as_bytes();
1307        if let Some(eol_bytes) = scan_eol(&bytes[ix..]) {
1308            self.tree[heading_ix].item.end = ix + eol_bytes;
1309            return ix + eol_bytes;
1310        }
1311        // skip leading spaces
1312        let skip_spaces = scan_whitespace_no_nl(&bytes[ix..]);
1313        ix += skip_spaces;
1314
1315        // now handle the header text
1316        let header_start = ix;
1317        let header_node_idx = self.tree.push(); // so that we can set the endpoint later
1318        ix = self.parse_line(ix, TableParseMode::Disabled).0;
1319        self.tree[header_node_idx].item.end = ix;
1320
1321        // remove trailing matter from header text
1322        if let TreePointer::Valid(cur_ix) = self.tree.cur() {
1323            let header_text = &bytes[header_start..ix];
1324            let mut limit = header_text
1325                .iter()
1326                .rposition(|&b| !(b == b'\n' || b == b'\r' || b == b' '))
1327                .map_or(0, |i| i + 1);
1328            let closer = header_text[..limit]
1329                .iter()
1330                .rposition(|&b| b != b'#')
1331                .map_or(0, |i| i + 1);
1332            if closer == 0 {
1333                limit = closer;
1334            } else {
1335                let spaces = scan_rev_while(&header_text[..closer], |b| b == b' ');
1336                if spaces > 0 {
1337                    limit = closer - spaces;
1338                }
1339            }
1340            self.tree[cur_ix].item.end = limit + header_start;
1341        }
1342
1343        self.tree.pop();
1344        ix
1345    }
1346
1347    /// Returns the number of bytes scanned on success.
1348    fn parse_footnote(&mut self, start: usize) -> Option<usize> {
1349        let bytes = &self.text.as_bytes()[start..];
1350        if !bytes.starts_with(b"[^") {
1351            return None;
1352        }
1353        let (mut i, label) = self.parse_refdef_label(start + 2)?;
1354        i += 2;
1355        if scan_ch(&bytes[i..], b':') == 0 {
1356            return None;
1357        }
1358        i += 1;
1359        self.finish_list(start);
1360        self.tree.append(Item {
1361            start,
1362            end: 0, // will get set later
1363            // TODO: check whether the label here is strictly necessary
1364            body: ItemBody::FootnoteDefinition(self.allocs.allocate_cow(label)),
1365        });
1366        self.tree.push();
1367        Some(i)
1368    }
1369
1370    /// Tries to parse a reference label, which can be interrupted by new blocks.
1371    /// On success, returns the number of bytes of the label and the label itself.
1372    fn parse_refdef_label(&self, start: usize) -> Option<(usize, CowStr<'a>)> {
1373        scan_link_label_rest(&self.text[start..], &|bytes| {
1374            let mut line_start = LineStart::new(bytes);
1375            let _ = scan_containers(&self.tree, &mut line_start);
1376            let bytes_scanned = line_start.bytes_scanned();
1377
1378            let suffix = &bytes[bytes_scanned..];
1379            if self.interrupt_paragraph_by_list(suffix) || scan_paragraph_interrupt(suffix) {
1380                None
1381            } else {
1382                Some(bytes_scanned)
1383            }
1384        })
1385    }
1386
1387    /// Returns number of bytes scanned, label and definition on success.
1388    fn parse_refdef_total(&mut self, start: usize) -> Option<(usize, LinkLabel<'a>, LinkDef<'a>)> {
1389        let bytes = &self.text.as_bytes()[start..];
1390        if scan_ch(bytes, b'[') == 0 {
1391            return None;
1392        }
1393        let (mut i, label) = self.parse_refdef_label(start + 1)?;
1394        i += 1;
1395        if scan_ch(&bytes[i..], b':') == 0 {
1396            return None;
1397        }
1398        i += 1;
1399        let (bytecount, link_def) = self.scan_refdef(start + i)?;
1400        Some((bytecount + i, UniCase::new(label), link_def))
1401    }
1402
1403    /// Returns number of bytes and number of newlines
1404    fn scan_refdef_space(&self, bytes: &[u8], mut i: usize) -> Option<(usize, usize)> {
1405        let mut newlines = 0;
1406        loop {
1407            let whitespaces = scan_whitespace_no_nl(&bytes[i..]);
1408            i += whitespaces;
1409            if let Some(eol_bytes) = scan_eol(&bytes[i..]) {
1410                i += eol_bytes;
1411                newlines += 1;
1412                if newlines > 1 {
1413                    return None;
1414                }
1415            } else {
1416                break;
1417            }
1418            let mut line_start = LineStart::new(&bytes[i..]);
1419            if self.tree.spine_len() != scan_containers(&self.tree, &mut line_start) {
1420                return None;
1421            }
1422            i += line_start.bytes_scanned();
1423        }
1424        Some((i, newlines))
1425    }
1426
1427    /// Returns # of bytes and definition.
1428    /// Assumes the label of the reference including colon has already been scanned.
1429    fn scan_refdef(&self, start: usize) -> Option<(usize, LinkDef<'a>)> {
1430        let bytes = self.text.as_bytes();
1431
1432        // whitespace between label and url (including up to one newline)
1433        let (mut i, _newlines) = self.scan_refdef_space(bytes, start)?;
1434
1435        // scan link dest
1436        let (dest_length, dest) = scan_link_dest(self.text, i, 1)?;
1437        if dest_length == 0 {
1438            return None;
1439        }
1440        let dest = unescape(dest);
1441        i += dest_length;
1442
1443        // no title
1444        let mut backup = (i - start, LinkDef { dest, title: None });
1445
1446        // scan whitespace between dest and label
1447        let (mut i, newlines) =
1448            if let Some((new_i, mut newlines)) = self.scan_refdef_space(bytes, i) {
1449                if i == self.text.len() {
1450                    newlines += 1;
1451                }
1452                if new_i == i && newlines == 0 {
1453                    return None;
1454                }
1455                if newlines > 1 {
1456                    return Some(backup);
1457                };
1458                (new_i, newlines)
1459            } else {
1460                return Some(backup);
1461            };
1462
1463        // scan title
1464        // if this fails but newline == 1, return also a refdef without title
1465        if let Some((title_length, title)) = scan_refdef_title(&self.text[i..]) {
1466            i += title_length;
1467            backup.1.title = Some(unescape(title));
1468        } else if newlines > 0 {
1469            return Some(backup);
1470        } else {
1471            return None;
1472        };
1473
1474        // scan EOL
1475        if let Some(bytes) = scan_blank_line(&bytes[i..]) {
1476            backup.0 = i + bytes - start;
1477            Some(backup)
1478        } else if newlines > 0 {
1479            Some(backup)
1480        } else {
1481            None
1482        }
1483    }
1484}
1485
1486/// Returns number of containers scanned.
1487fn scan_containers(tree: &Tree<Item>, line_start: &mut LineStart) -> usize {
1488    let mut i = 0;
1489    for &node_ix in tree.walk_spine() {
1490        match tree[node_ix].item.body {
1491            ItemBody::BlockQuote => {
1492                let save = line_start.clone();
1493                if !line_start.scan_blockquote_marker() {
1494                    *line_start = save;
1495                    break;
1496                }
1497            }
1498            ItemBody::ListItem(indent) => {
1499                if !line_start.is_at_eol() {
1500                    let save = line_start.clone();
1501                    if !line_start.scan_space(indent) {
1502                        *line_start = save;
1503                        break;
1504                    }
1505                }
1506            }
1507            _ => (),
1508        }
1509        i += 1;
1510    }
1511    i
1512}
1513
1514/// Computes the number of header columns in a table line by computing the number of dividing pipes
1515/// that aren't followed or preceeded by whitespace.
1516fn count_header_cols(
1517    bytes: &[u8],
1518    mut pipes: usize,
1519    mut start: usize,
1520    last_pipe_ix: usize,
1521) -> usize {
1522    // was first pipe preceeded by whitespace? if so, subtract one
1523    start += scan_whitespace_no_nl(&bytes[start..]);
1524    if bytes[start] == b'|' {
1525        pipes -= 1;
1526    }
1527
1528    // was last pipe followed by whitespace? if so, sub one
1529    if scan_blank_line(&bytes[(last_pipe_ix + 1)..]).is_some() {
1530        pipes
1531    } else {
1532        pipes + 1
1533    }
1534}
1535
1536impl<'a> Tree<Item> {
1537    fn append_text(&mut self, start: usize, end: usize) {
1538        if end > start {
1539            if let TreePointer::Valid(ix) = self.cur() {
1540                if ItemBody::Text == self[ix].item.body && self[ix].item.end == start {
1541                    self[ix].item.end = end;
1542                    return;
1543                }
1544            }
1545            self.append(Item {
1546                start,
1547                end,
1548                body: ItemBody::Text,
1549            });
1550        }
1551    }
1552}
1553
1554/// Determines whether the delimiter run starting at given index is
1555/// left-flanking, as defined by the commonmark spec (and isn't intraword
1556/// for _ delims).
1557/// suffix is &s[ix..], which is passed in as an optimization, since taking
1558/// a string subslice is O(n).
1559fn delim_run_can_open(s: &str, suffix: &str, run_len: usize, ix: usize) -> bool {
1560    let next_char = if let Some(c) = suffix.chars().nth(run_len) {
1561        c
1562    } else {
1563        return false;
1564    };
1565    if next_char.is_whitespace() {
1566        return false;
1567    }
1568    if ix == 0 {
1569        return true;
1570    }
1571    let delim = suffix.chars().next().unwrap();
1572    if delim == '*' && !is_punctuation(next_char) {
1573        return true;
1574    }
1575
1576    let prev_char = s[..ix].chars().last().unwrap();
1577
1578    prev_char.is_whitespace() || is_punctuation(prev_char)
1579}
1580
1581/// Determines whether the delimiter run starting at given index is
1582/// left-flanking, as defined by the commonmark spec (and isn't intraword
1583/// for _ delims)
1584fn delim_run_can_close(s: &str, suffix: &str, run_len: usize, ix: usize) -> bool {
1585    if ix == 0 {
1586        return false;
1587    }
1588    let prev_char = s[..ix].chars().last().unwrap();
1589    if prev_char.is_whitespace() {
1590        return false;
1591    }
1592    let next_char = if let Some(c) = suffix.chars().nth(run_len) {
1593        c
1594    } else {
1595        return true;
1596    };
1597    let delim = suffix.chars().next().unwrap();
1598    if delim == '*' && !is_punctuation(prev_char) {
1599        return true;
1600    }
1601
1602    next_char.is_whitespace() || is_punctuation(next_char)
1603}
1604
1605/// Checks whether we should break a paragraph on the given input.
1606/// Note: lists are dealt with in `interrupt_paragraph_by_list`, because determing
1607/// whether to break on a list requires additional context.
1608fn scan_paragraph_interrupt(bytes: &[u8]) -> bool {
1609    if scan_eol(bytes).is_some()
1610        || scan_hrule(bytes).is_ok()
1611        || scan_atx_heading(bytes).is_some()
1612        || scan_code_fence(bytes).is_some()
1613        || scan_blockquote_start(bytes).is_some()
1614    {
1615        return true;
1616    }
1617    bytes.starts_with(b"<")
1618        && (get_html_end_tag(&bytes[1..]).is_some()
1619            || is_html_tag(scan_html_block_tag(&bytes[1..]).1))
1620}
1621
1622/// Assumes `text_bytes` is preceded by `<`.
1623fn get_html_end_tag(text_bytes: &[u8]) -> Option<&'static str> {
1624    static BEGIN_TAGS: &[&[u8]; 3] = &[b"pre", b"style", b"script"];
1625    static ST_BEGIN_TAGS: &[&[u8]; 3] = &[b"!--", b"?", b"![CDATA["];
1626
1627    for (beg_tag, end_tag) in BEGIN_TAGS
1628        .iter()
1629        .zip(["</pre>", "</style>", "</script>"].iter())
1630    {
1631        let tag_len = beg_tag.len();
1632
1633        if text_bytes.len() < tag_len {
1634            // begin tags are increasing in size
1635            break;
1636        }
1637
1638        if !text_bytes[..tag_len].eq_ignore_ascii_case(beg_tag) {
1639            continue;
1640        }
1641
1642        // Must either be the end of the line...
1643        if text_bytes.len() == tag_len {
1644            return Some(end_tag);
1645        }
1646
1647        // ...or be followed by whitespace, newline, or '>'.
1648        let s = text_bytes[tag_len];
1649        if is_ascii_whitespace(s) || s == b'>' {
1650            return Some(end_tag);
1651        }
1652    }
1653
1654    for (beg_tag, end_tag) in ST_BEGIN_TAGS.iter().zip(["-->", "?>", "]]>"].iter()) {
1655        if text_bytes.starts_with(beg_tag) {
1656            return Some(end_tag);
1657        }
1658    }
1659
1660    if text_bytes.len() > 1
1661        && text_bytes[0] == b'!'
1662        && text_bytes[1] >= b'A'
1663        && text_bytes[1] <= b'Z'
1664    {
1665        Some(">")
1666    } else {
1667        None
1668    }
1669}
1670
1671#[derive(Copy, Clone, Debug)]
1672struct InlineEl {
1673    start: TreeIndex, // offset of tree node
1674    count: usize,
1675    c: u8,      // b'*' or b'_'
1676    both: bool, // can both open and close
1677}
1678
1679#[derive(Debug, Clone, Default)]
1680struct InlineStack {
1681    stack: Vec<InlineEl>,
1682    // Lower bounds for matching indices in the stack. For example
1683    // a strikethrough delimiter will never match with any element
1684    // in the stack with index smaller than
1685    // `lower_bounds[InlineStack::TILDES]`.
1686    lower_bounds: [usize; 7],
1687}
1688
1689impl InlineStack {
1690    /// These are indices into the lower bounds array.
1691    /// Not both refers to the property that the delimiter can not both
1692    /// be opener as a closer.
1693    const UNDERSCORE_NOT_BOTH: usize = 0;
1694    const ASTERISK_NOT_BOTH: usize = 1;
1695    const ASTERISK_BASE: usize = 2;
1696    const TILDES: usize = 5;
1697    const UNDERSCORE_BOTH: usize = 6;
1698
1699    fn pop_all(&mut self, tree: &mut Tree<Item>) {
1700        for el in self.stack.drain(..) {
1701            for i in 0..el.count {
1702                tree[el.start + i].item.body = ItemBody::Text;
1703            }
1704        }
1705        self.lower_bounds = [0; 7];
1706    }
1707
1708    fn get_lowerbound(&self, c: u8, count: usize, both: bool) -> usize {
1709        if c == b'_' {
1710            if both {
1711                self.lower_bounds[InlineStack::UNDERSCORE_BOTH]
1712            } else {
1713                self.lower_bounds[InlineStack::UNDERSCORE_NOT_BOTH]
1714            }
1715        } else if c == b'*' {
1716            let mod3_lower = self.lower_bounds[InlineStack::ASTERISK_BASE + count % 3];
1717            if both {
1718                mod3_lower
1719            } else {
1720                min(
1721                    mod3_lower,
1722                    self.lower_bounds[InlineStack::ASTERISK_NOT_BOTH],
1723                )
1724            }
1725        } else {
1726            self.lower_bounds[InlineStack::TILDES]
1727        }
1728    }
1729
1730    fn set_lowerbound(&mut self, c: u8, count: usize, both: bool, new_bound: usize) {
1731        if c == b'_' {
1732            if both {
1733                self.lower_bounds[InlineStack::UNDERSCORE_BOTH] = new_bound;
1734            } else {
1735                self.lower_bounds[InlineStack::UNDERSCORE_NOT_BOTH] = new_bound;
1736            }
1737        } else if c == b'*' {
1738            self.lower_bounds[InlineStack::ASTERISK_BASE + count % 3] = new_bound;
1739            if !both {
1740                self.lower_bounds[InlineStack::ASTERISK_NOT_BOTH] = new_bound;
1741            }
1742        } else {
1743            self.lower_bounds[InlineStack::TILDES] = new_bound;
1744        }
1745    }
1746
1747    fn find_match(
1748        &mut self,
1749        tree: &mut Tree<Item>,
1750        c: u8,
1751        count: usize,
1752        both: bool,
1753    ) -> Option<InlineEl> {
1754        let lowerbound = min(self.stack.len(), self.get_lowerbound(c, count, both));
1755        let res = self.stack[lowerbound..]
1756            .iter()
1757            .cloned()
1758            .enumerate()
1759            .rfind(|(_, el)| {
1760                el.c == c && (!both && !el.both || (count + el.count) % 3 != 0 || count % 3 == 0)
1761            });
1762
1763        if let Some((matching_ix, matching_el)) = res {
1764            let matching_ix = matching_ix + lowerbound;
1765            for el in &self.stack[(matching_ix + 1)..] {
1766                for i in 0..el.count {
1767                    tree[el.start + i].item.body = ItemBody::Text;
1768                }
1769            }
1770            self.stack.truncate(matching_ix);
1771            Some(matching_el)
1772        } else {
1773            self.set_lowerbound(c, count, both, self.stack.len());
1774            None
1775        }
1776    }
1777
1778    fn push(&mut self, el: InlineEl) {
1779        self.stack.push(el)
1780    }
1781}
1782
1783#[derive(Debug, Clone)]
1784enum RefScan<'a> {
1785    // label, next node index
1786    LinkLabel(CowStr<'a>, TreePointer),
1787    // contains next node index
1788    Collapsed(TreePointer),
1789    Failed,
1790}
1791
1792/// Skips forward within a block to a node which spans (ends inclusive) the given
1793/// index into the source.
1794fn scan_nodes_to_ix(tree: &Tree<Item>, mut node: TreePointer, ix: usize) -> TreePointer {
1795    while let TreePointer::Valid(node_ix) = node {
1796        if tree[node_ix].item.end <= ix {
1797            node = tree[node_ix].next;
1798        } else {
1799            break;
1800        }
1801    }
1802    node
1803}
1804
1805/// Scans an inline link label, which cannot be interrupted.
1806/// Returns number of bytes (including brackets) and label on success.
1807fn scan_link_label<'text, 'tree>(
1808    tree: &'tree Tree<Item>,
1809    text: &'text str,
1810) -> Option<(usize, ReferenceLabel<'text>)> {
1811    let bytes = &text.as_bytes();
1812    if bytes.len() < 2 || bytes[0] != b'[' {
1813        return None;
1814    }
1815    let linebreak_handler = |bytes: &[u8]| {
1816        let mut line_start = LineStart::new(bytes);
1817        let _ = scan_containers(tree, &mut line_start);
1818        Some(line_start.bytes_scanned())
1819    };
1820    let pair = if b'^' == bytes[1] {
1821        let (byte_index, cow) = scan_link_label_rest(&text[2..], &linebreak_handler)?;
1822        (byte_index + 2, ReferenceLabel::Footnote(cow))
1823    } else {
1824        let (byte_index, cow) = scan_link_label_rest(&text[1..], &linebreak_handler)?;
1825        (byte_index + 1, ReferenceLabel::Link(cow))
1826    };
1827    Some(pair)
1828}
1829
1830fn scan_reference<'a, 'b>(tree: &'a Tree<Item>, text: &'b str, cur: TreePointer) -> RefScan<'b> {
1831    let cur_ix = match cur {
1832        TreePointer::Nil => return RefScan::Failed,
1833        TreePointer::Valid(cur_ix) => cur_ix,
1834    };
1835    let start = tree[cur_ix].item.start;
1836    let tail = &text.as_bytes()[start..];
1837
1838    if tail.starts_with(b"[]") {
1839        let closing_node = tree[cur_ix].next.unwrap();
1840        RefScan::Collapsed(tree[closing_node].next)
1841    } else if let Some((ix, ReferenceLabel::Link(label))) = scan_link_label(tree, &text[start..]) {
1842        let next_node = scan_nodes_to_ix(tree, cur, start + ix);
1843        RefScan::LinkLabel(label, next_node)
1844    } else {
1845        RefScan::Failed
1846    }
1847}
1848
1849#[derive(Clone, Default)]
1850struct LinkStack {
1851    inner: Vec<LinkStackEl>,
1852    disabled_ix: usize,
1853}
1854
1855impl LinkStack {
1856    fn push(&mut self, el: LinkStackEl) {
1857        self.inner.push(el);
1858    }
1859
1860    fn pop(&mut self) -> Option<LinkStackEl> {
1861        let el = self.inner.pop();
1862        self.disabled_ix = std::cmp::min(self.disabled_ix, self.inner.len());
1863        el
1864    }
1865
1866    fn clear(&mut self) {
1867        self.inner.clear();
1868        self.disabled_ix = 0;
1869    }
1870
1871    fn disable_all_links(&mut self) {
1872        for el in &mut self.inner[self.disabled_ix..] {
1873            if el.ty == LinkStackTy::Link {
1874                el.ty = LinkStackTy::Disabled;
1875            }
1876        }
1877        self.disabled_ix = self.inner.len();
1878    }
1879}
1880
1881#[derive(Clone, Debug)]
1882struct LinkStackEl {
1883    node: TreeIndex,
1884    ty: LinkStackTy,
1885}
1886
1887#[derive(PartialEq, Clone, Debug)]
1888enum LinkStackTy {
1889    Link,
1890    Image,
1891    Disabled,
1892}
1893
1894#[derive(Clone)]
1895struct LinkDef<'a> {
1896    dest: CowStr<'a>,
1897    title: Option<CowStr<'a>>,
1898}
1899
1900/// Tracks tree indices of code span delimiters of each length. It should prevent
1901/// quadratic scanning behaviours by providing (amortized) constant time lookups.
1902struct CodeDelims {
1903    inner: HashMap<usize, VecDeque<TreeIndex>>,
1904    seen_first: bool,
1905}
1906
1907impl CodeDelims {
1908    fn new() -> Self {
1909        Self {
1910            inner: Default::default(),
1911            seen_first: false,
1912        }
1913    }
1914
1915    fn insert(&mut self, count: usize, ix: TreeIndex) {
1916        if self.seen_first {
1917            self.inner
1918                .entry(count)
1919                .or_insert_with(Default::default)
1920                .push_back(ix);
1921        } else {
1922            // Skip the first insert, since that delimiter will always
1923            // be an opener and not a closer.
1924            self.seen_first = true;
1925        }
1926    }
1927
1928    fn is_populated(&self) -> bool {
1929        !self.inner.is_empty()
1930    }
1931
1932    fn find(&mut self, open_ix: TreeIndex, count: usize) -> Option<TreeIndex> {
1933        while let Some(ix) = self.inner.get_mut(&count)?.pop_front() {
1934            if ix > open_ix {
1935                return Some(ix);
1936            }
1937        }
1938        None
1939    }
1940
1941    fn clear(&mut self) {
1942        self.inner.clear();
1943        self.seen_first = false;
1944    }
1945}
1946
1947#[derive(Copy, Clone, PartialEq, Eq, Debug)]
1948struct LinkIndex(usize);
1949
1950#[derive(Copy, Clone, PartialEq, Eq, Debug)]
1951struct CowIndex(usize);
1952
1953#[derive(Copy, Clone, PartialEq, Eq, Debug)]
1954struct AlignmentIndex(usize);
1955
1956#[derive(Clone)]
1957struct Allocations<'a> {
1958    refdefs: HashMap<LinkLabel<'a>, LinkDef<'a>>,
1959    links: Vec<(LinkType, CowStr<'a>, CowStr<'a>)>,
1960    cows: Vec<CowStr<'a>>,
1961    alignments: Vec<Vec<Alignment>>,
1962}
1963
1964impl<'a> Allocations<'a> {
1965    fn new() -> Self {
1966        Self {
1967            refdefs: HashMap::new(),
1968            links: Vec::with_capacity(128),
1969            cows: Vec::new(),
1970            alignments: Vec::new(),
1971        }
1972    }
1973
1974    fn allocate_cow(&mut self, cow: CowStr<'a>) -> CowIndex {
1975        let ix = self.cows.len();
1976        self.cows.push(cow);
1977        CowIndex(ix)
1978    }
1979
1980    fn allocate_link(&mut self, ty: LinkType, url: CowStr<'a>, title: CowStr<'a>) -> LinkIndex {
1981        let ix = self.links.len();
1982        self.links.push((ty, url, title));
1983        LinkIndex(ix)
1984    }
1985
1986    fn allocate_alignment(&mut self, alignment: Vec<Alignment>) -> AlignmentIndex {
1987        let ix = self.alignments.len();
1988        self.alignments.push(alignment);
1989        AlignmentIndex(ix)
1990    }
1991}
1992
1993impl<'a> Index<CowIndex> for Allocations<'a> {
1994    type Output = CowStr<'a>;
1995
1996    fn index(&self, ix: CowIndex) -> &Self::Output {
1997        self.cows.index(ix.0)
1998    }
1999}
2000
2001impl<'a> Index<LinkIndex> for Allocations<'a> {
2002    type Output = (LinkType, CowStr<'a>, CowStr<'a>);
2003
2004    fn index(&self, ix: LinkIndex) -> &Self::Output {
2005        self.links.index(ix.0)
2006    }
2007}
2008
2009impl<'a> Index<AlignmentIndex> for Allocations<'a> {
2010    type Output = Vec<Alignment>;
2011
2012    fn index(&self, ix: AlignmentIndex) -> &Self::Output {
2013        self.alignments.index(ix.0)
2014    }
2015}
2016
2017/// A struct containing information on the reachability of certain inline HTML
2018/// elements. In particular, for cdata elements (`<![CDATA[`), processing
2019/// elements (`<?`) and declarations (`<!DECLARATION`). The respectives usizes
2020/// represent the indices before which a scan will always fail and can hence
2021/// be skipped.
2022#[derive(Clone, Default)]
2023pub(crate) struct HtmlScanGuard {
2024    pub cdata: usize,
2025    pub processing: usize,
2026    pub declaration: usize,
2027}
2028
2029/// Markdown event iterator.
2030#[derive(Clone)]
2031pub struct Parser<'a> {
2032    text: &'a str,
2033    tree: Tree<Item>,
2034    allocs: Allocations<'a>,
2035    broken_link_callback: Option<&'a dyn Fn(&str, &str) -> Option<(String, String)>>,
2036    html_scan_guard: HtmlScanGuard,
2037
2038    // used by inline passes. store them here for reuse
2039    inline_stack: InlineStack,
2040    link_stack: LinkStack,
2041}
2042
2043impl<'a> Parser<'a> {
2044    /// Creates a new event iterator for a markdown string without any options enabled.
2045    pub fn new(text: &'a str) -> Parser<'a> {
2046        Parser::new_ext(text, Options::empty())
2047    }
2048
2049    /// Creates a new event iterator for a markdown string with given options.
2050    pub fn new_ext(text: &'a str, options: Options) -> Parser<'a> {
2051        Parser::new_with_broken_link_callback(text, options, None)
2052    }
2053
2054    /// In case the parser encounters any potential links that have a broken
2055    /// reference (e.g `[foo]` when there is no `[foo]: ` entry at the bottom)
2056    /// the provided callback will be called with the reference name,
2057    /// and the returned pair will be used as the link name and title if it is not
2058    /// `None`.
2059    pub fn new_with_broken_link_callback(
2060        text: &'a str,
2061        options: Options,
2062        broken_link_callback: Option<&'a dyn Fn(&str, &str) -> Option<(String, String)>>,
2063    ) -> Parser<'a> {
2064        let first_pass = FirstPass::new(text, options);
2065        let (mut tree, allocs) = first_pass.run();
2066        tree.reset();
2067        let inline_stack = Default::default();
2068        let link_stack = Default::default();
2069        let html_scan_guard = Default::default();
2070        Parser {
2071            text,
2072            tree,
2073            allocs,
2074            broken_link_callback,
2075            inline_stack,
2076            link_stack,
2077            html_scan_guard,
2078        }
2079    }
2080
2081    /// Handle inline markup.
2082    ///
2083    /// When the parser encounters any item indicating potential inline markup, all
2084    /// inline markup passes are run on the remainder of the chain.
2085    ///
2086    /// Note: there's some potential for optimization here, but that's future work.
2087    fn handle_inline(&mut self) {
2088        self.handle_inline_pass1();
2089        self.handle_emphasis();
2090    }
2091
2092    /// Handle inline HTML, code spans, and links.
2093    ///
2094    /// This function handles both inline HTML and code spans, because they have
2095    /// the same precedence. It also handles links, even though they have lower
2096    /// precedence, because the URL of links must not be processed.
2097    fn handle_inline_pass1(&mut self) {
2098        let mut code_delims = CodeDelims::new();
2099        let mut cur = self.tree.cur();
2100        let mut prev = TreePointer::Nil;
2101
2102        let block_end = self.tree[self.tree.peek_up().unwrap()].item.end;
2103        let block_text = &self.text[..block_end];
2104
2105        while let TreePointer::Valid(mut cur_ix) = cur {
2106            match self.tree[cur_ix].item.body {
2107                ItemBody::MaybeHtml => {
2108                    let next = self.tree[cur_ix].next;
2109                    let autolink = if let TreePointer::Valid(next_ix) = next {
2110                        scan_autolink(block_text, self.tree[next_ix].item.start)
2111                    } else {
2112                        None
2113                    };
2114
2115                    if let Some((ix, uri, link_type)) = autolink {
2116                        let node = scan_nodes_to_ix(&self.tree, next, ix);
2117                        let text_node = self.tree.create_node(Item {
2118                            start: self.tree[cur_ix].item.start + 1,
2119                            end: ix - 1,
2120                            body: ItemBody::Text,
2121                        });
2122                        let link_ix = self.allocs.allocate_link(link_type, uri, "".into());
2123                        self.tree[cur_ix].item.body = ItemBody::Link(link_ix);
2124                        self.tree[cur_ix].item.end = ix;
2125                        self.tree[cur_ix].next = node;
2126                        self.tree[cur_ix].child = TreePointer::Valid(text_node);
2127                        prev = cur;
2128                        cur = node;
2129                        if let TreePointer::Valid(node_ix) = cur {
2130                            self.tree[node_ix].item.start = max(self.tree[node_ix].item.start, ix);
2131                        }
2132                        continue;
2133                    } else {
2134                        let inline_html = if let TreePointer::Valid(next_ix) = next {
2135                            self.scan_inline_html(
2136                                block_text.as_bytes(),
2137                                self.tree[next_ix].item.start,
2138                            )
2139                        } else {
2140                            None
2141                        };
2142                        if let Some(ix) = inline_html {
2143                            let node = scan_nodes_to_ix(&self.tree, next, ix);
2144                            self.tree[cur_ix].item.body = ItemBody::Html;
2145                            self.tree[cur_ix].item.end = ix;
2146                            self.tree[cur_ix].next = node;
2147                            prev = cur;
2148                            cur = node;
2149                            if let TreePointer::Valid(node_ix) = cur {
2150                                self.tree[node_ix].item.start =
2151                                    max(self.tree[node_ix].item.start, ix);
2152                            }
2153                            continue;
2154                        }
2155                    }
2156                    self.tree[cur_ix].item.body = ItemBody::Text;
2157                }
2158                ItemBody::MaybeCode(mut search_count, preceded_by_backslash) => {
2159                    if preceded_by_backslash {
2160                        search_count -= 1;
2161                        if search_count == 0 {
2162                            self.tree[cur_ix].item.body = ItemBody::Text;
2163                            prev = cur;
2164                            cur = self.tree[cur_ix].next;
2165                            continue;
2166                        }
2167                    }
2168
2169                    if code_delims.is_populated() {
2170                        // we have previously scanned all codeblock delimiters,
2171                        // so we can reuse that work
2172                        if let Some(scan_ix) = code_delims.find(cur_ix, search_count) {
2173                            self.make_code_span(cur_ix, scan_ix, preceded_by_backslash);
2174                        } else {
2175                            self.tree[cur_ix].item.body = ItemBody::Text;
2176                        }
2177                    } else {
2178                        // we haven't previously scanned all codeblock delimiters,
2179                        // so walk the AST
2180                        let mut scan = if search_count > 0 {
2181                            self.tree[cur_ix].next
2182                        } else {
2183                            TreePointer::Nil
2184                        };
2185                        while let TreePointer::Valid(scan_ix) = scan {
2186                            if let ItemBody::MaybeCode(delim_count, _) =
2187                                self.tree[scan_ix].item.body
2188                            {
2189                                if search_count == delim_count {
2190                                    self.make_code_span(cur_ix, scan_ix, preceded_by_backslash);
2191                                    code_delims.clear();
2192                                    break;
2193                                } else {
2194                                    code_delims.insert(delim_count, scan_ix);
2195                                }
2196                            }
2197                            scan = self.tree[scan_ix].next;
2198                        }
2199                        if scan == TreePointer::Nil {
2200                            self.tree[cur_ix].item.body = ItemBody::Text;
2201                        }
2202                    }
2203                }
2204                ItemBody::MaybeMath(mut search_count, preceded_by_backslash) => {
2205                    if preceded_by_backslash {
2206                        search_count -= 1;
2207                        if search_count == 0 {
2208                            self.tree[cur_ix].item.body = ItemBody::Text;
2209                            prev = cur;
2210                            cur = self.tree[cur_ix].next;
2211                            continue;
2212                        }
2213                    }
2214
2215                    if code_delims.is_populated() {
2216                        if let Some(scan_ix) = code_delims.find(cur_ix, search_count) {
2217                            self.make_math_span(cur_ix, scan_ix, preceded_by_backslash);
2218                        } else {
2219                            self.tree[cur_ix].item.body = ItemBody::Text;
2220                        }
2221                    } else {
2222                        let mut scan = if search_count > 0 {
2223                            self.tree[cur_ix].next
2224                        } else {
2225                            TreePointer::Nil
2226                        };
2227                        while let TreePointer::Valid(scan_ix) = scan {
2228                            if let ItemBody::MaybeMath(delim_count, _) =
2229                                self.tree[scan_ix].item.body
2230                            {
2231                                if search_count == delim_count {
2232                                    self.make_math_span(cur_ix, scan_ix, preceded_by_backslash);
2233                                    code_delims.clear();
2234                                    break;
2235                                } else {
2236                                    code_delims.insert(delim_count, scan_ix);
2237                                }
2238                            }
2239                            scan = self.tree[scan_ix].next;
2240                        }
2241                        if scan == TreePointer::Nil {
2242                            self.tree[cur_ix].item.body = ItemBody::Text;
2243                        }
2244                    }
2245                }
2246                ItemBody::MaybeLinkOpen => {
2247                    self.tree[cur_ix].item.body = ItemBody::Text;
2248                    self.link_stack.push(LinkStackEl {
2249                        node: cur_ix,
2250                        ty: LinkStackTy::Link,
2251                    });
2252                }
2253                ItemBody::MaybeImage => {
2254                    self.tree[cur_ix].item.body = ItemBody::Text;
2255                    self.link_stack.push(LinkStackEl {
2256                        node: cur_ix,
2257                        ty: LinkStackTy::Image,
2258                    });
2259                }
2260                ItemBody::MaybeLinkClose => {
2261                    if let Some(tos) = self.link_stack.pop() {
2262                        if tos.ty == LinkStackTy::Disabled {
2263                            self.tree[cur_ix].item.body = ItemBody::Text;
2264                            continue;
2265                        }
2266                        let next = self.tree[cur_ix].next;
2267                        if let Some((next_ix, url, title)) =
2268                            self.scan_inline_link(block_text, self.tree[cur_ix].item.end, next)
2269                        {
2270                            let next_node = scan_nodes_to_ix(&self.tree, next, next_ix);
2271                            if let TreePointer::Valid(prev_ix) = prev {
2272                                self.tree[prev_ix].next = TreePointer::Nil;
2273                            }
2274                            cur = TreePointer::Valid(tos.node);
2275                            cur_ix = tos.node;
2276                            let link_ix = self.allocs.allocate_link(LinkType::Inline, url, title);
2277                            self.tree[cur_ix].item.body = if tos.ty == LinkStackTy::Image {
2278                                ItemBody::Image(link_ix)
2279                            } else {
2280                                ItemBody::Link(link_ix)
2281                            };
2282                            self.tree[cur_ix].child = self.tree[cur_ix].next;
2283                            self.tree[cur_ix].next = next_node;
2284                            self.tree[cur_ix].item.end = next_ix;
2285                            if let TreePointer::Valid(next_node_ix) = next_node {
2286                                self.tree[next_node_ix].item.start =
2287                                    max(self.tree[next_node_ix].item.start, next_ix);
2288                            }
2289
2290                            if tos.ty == LinkStackTy::Link {
2291                                self.link_stack.disable_all_links();
2292                            }
2293                        } else {
2294                            // ok, so its not an inline link. maybe it is a reference
2295                            // to a defined link?
2296                            let scan_result = scan_reference(&self.tree, block_text, next);
2297                            let node_after_link = match scan_result {
2298                                RefScan::LinkLabel(_, next_node) => next_node,
2299                                RefScan::Collapsed(next_node) => next_node,
2300                                RefScan::Failed => next,
2301                            };
2302                            let link_type = match &scan_result {
2303                                RefScan::LinkLabel(..) => LinkType::Reference,
2304                                RefScan::Collapsed(..) => LinkType::Collapsed,
2305                                RefScan::Failed => LinkType::Shortcut,
2306                            };
2307
2308                            let label: Option<ReferenceLabel<'a>> = match scan_result {
2309                                RefScan::LinkLabel(l, ..) => Some(ReferenceLabel::Link(l)),
2310                                RefScan::Collapsed(..) | RefScan::Failed => {
2311                                    // No label? maybe it is a shortcut reference
2312                                    scan_link_label(
2313                                        &self.tree,
2314                                        &self.text[(self.tree[tos.node].item.end - 1)
2315                                            ..self.tree[cur_ix].item.end],
2316                                    )
2317                                    .map(|(_ix, label)| label)
2318                                }
2319                            };
2320
2321                            // see if it's a footnote reference
2322                            if let Some(ReferenceLabel::Footnote(l)) = label {
2323                                self.tree[tos.node].next = node_after_link;
2324                                self.tree[tos.node].child = TreePointer::Nil;
2325                                self.tree[tos.node].item.body =
2326                                    ItemBody::FootnoteReference(self.allocs.allocate_cow(l));
2327                                prev = TreePointer::Valid(tos.node);
2328                                cur = node_after_link;
2329                                self.link_stack.clear();
2330                                continue;
2331                            } else if let Some(ReferenceLabel::Link(link_label)) = label {
2332                                let type_url_title = self
2333                                    .allocs
2334                                    .refdefs
2335                                    .get(&UniCase::new(link_label.as_ref().into()))
2336                                    .map(|matching_def| {
2337                                        // found a matching definition!
2338                                        let title = matching_def
2339                                            .title
2340                                            .as_ref()
2341                                            .cloned()
2342                                            .unwrap_or_else(|| "".into());
2343                                        let url = matching_def.dest.clone();
2344                                        (link_type, url, title)
2345                                    })
2346                                    .or_else(|| {
2347                                        self.broken_link_callback
2348                                            .and_then(|callback| {
2349                                                // looked for matching definition, but didn't find it. try to fix
2350                                                // link with callback, if it is defined
2351                                                callback(link_label.as_ref(), link_label.as_ref())
2352                                            })
2353                                            .map(|(url, title)| {
2354                                                (link_type.to_unknown(), url.into(), title.into())
2355                                            })
2356                                    });
2357
2358                                if let Some((def_link_type, url, title)) = type_url_title {
2359                                    let link_ix =
2360                                        self.allocs.allocate_link(def_link_type, url, title);
2361                                    self.tree[tos.node].item.body = if tos.ty == LinkStackTy::Image
2362                                    {
2363                                        ItemBody::Image(link_ix)
2364                                    } else {
2365                                        ItemBody::Link(link_ix)
2366                                    };
2367                                    let label_node = self.tree[tos.node].next;
2368
2369                                    // lets do some tree surgery to add the link to the tree
2370                                    // 1st: skip the label node and close node
2371                                    self.tree[tos.node].next = node_after_link;
2372
2373                                    // then, if it exists, add the label node as a child to the link node
2374                                    if label_node != cur {
2375                                        self.tree[tos.node].child = label_node;
2376
2377                                        // finally: disconnect list of children
2378                                        if let TreePointer::Valid(prev_ix) = prev {
2379                                            self.tree[prev_ix].next = TreePointer::Nil;
2380                                        }
2381                                    }
2382
2383                                    // set up cur so next node will be node_after_link
2384                                    cur = TreePointer::Valid(tos.node);
2385                                    cur_ix = tos.node;
2386
2387                                    if tos.ty == LinkStackTy::Link {
2388                                        self.link_stack.disable_all_links();
2389                                    }
2390                                } else {
2391                                    self.tree[cur_ix].item.body = ItemBody::Text;
2392                                }
2393                            } else {
2394                                self.tree[cur_ix].item.body = ItemBody::Text;
2395                            }
2396                        }
2397                    } else {
2398                        self.tree[cur_ix].item.body = ItemBody::Text;
2399                    }
2400                }
2401                _ => (),
2402            }
2403            prev = cur;
2404            cur = self.tree[cur_ix].next;
2405        }
2406        self.link_stack.clear();
2407    }
2408
2409    fn handle_emphasis(&mut self) {
2410        let mut prev = TreePointer::Nil;
2411        let mut prev_ix: TreeIndex;
2412        let mut cur = self.tree.cur();
2413        while let TreePointer::Valid(mut cur_ix) = cur {
2414            if let ItemBody::MaybeEmphasis(mut count, can_open, can_close) =
2415                self.tree[cur_ix].item.body
2416            {
2417                let c = self.text.as_bytes()[self.tree[cur_ix].item.start];
2418                let both = can_open && can_close;
2419                if can_close {
2420                    while let Some(el) =
2421                        self.inline_stack.find_match(&mut self.tree, c, count, both)
2422                    {
2423                        // have a match!
2424                        if let TreePointer::Valid(prev_ix) = prev {
2425                            self.tree[prev_ix].next = TreePointer::Nil;
2426                        }
2427                        let match_count = min(count, el.count);
2428                        // start, end are tree node indices
2429                        let mut end = cur_ix - 1;
2430                        let mut start = el.start + el.count;
2431
2432                        // work from the inside out
2433                        while start > el.start + el.count - match_count {
2434                            let (inc, ty) = if c == b'~' {
2435                                (2, ItemBody::Strikethrough)
2436                            } else if start > el.start + el.count - match_count + 1 {
2437                                (2, ItemBody::Strong)
2438                            } else {
2439                                (1, ItemBody::Emphasis)
2440                            };
2441
2442                            let root = start - inc;
2443                            end = end + inc;
2444                            self.tree[root].item.body = ty;
2445                            self.tree[root].item.end = self.tree[end].item.end;
2446                            self.tree[root].child = TreePointer::Valid(start);
2447                            self.tree[root].next = TreePointer::Nil;
2448                            start = root;
2449                        }
2450
2451                        // set next for top most emph level
2452                        prev_ix = el.start + el.count - match_count;
2453                        prev = TreePointer::Valid(prev_ix);
2454                        cur = self.tree[cur_ix + match_count - 1].next;
2455                        self.tree[prev_ix].next = cur;
2456
2457                        if el.count > match_count {
2458                            self.inline_stack.push(InlineEl {
2459                                start: el.start,
2460                                count: el.count - match_count,
2461                                c: el.c,
2462                                both,
2463                            })
2464                        }
2465                        count -= match_count;
2466                        if count > 0 {
2467                            cur_ix = cur.unwrap();
2468                        } else {
2469                            break;
2470                        }
2471                    }
2472                }
2473                if count > 0 {
2474                    if can_open {
2475                        self.inline_stack.push(InlineEl {
2476                            start: cur_ix,
2477                            count,
2478                            c,
2479                            both,
2480                        });
2481                    } else {
2482                        for i in 0..count {
2483                            self.tree[cur_ix + i].item.body = ItemBody::Text;
2484                        }
2485                    }
2486                    prev_ix = cur_ix + count - 1;
2487                    prev = TreePointer::Valid(prev_ix);
2488                    cur = self.tree[prev_ix].next;
2489                }
2490            } else {
2491                prev = cur;
2492                cur = self.tree[cur_ix].next;
2493            }
2494        }
2495        self.inline_stack.pop_all(&mut self.tree);
2496    }
2497
2498    /// Returns next byte index, url and title.
2499    fn scan_inline_link(
2500        &self,
2501        underlying: &'a str,
2502        mut ix: usize,
2503        node: TreePointer,
2504    ) -> Option<(usize, CowStr<'a>, CowStr<'a>)> {
2505        if scan_ch(&underlying.as_bytes()[ix..], b'(') == 0 {
2506            return None;
2507        }
2508        ix += 1;
2509        ix += scan_while(&underlying.as_bytes()[ix..], is_ascii_whitespace);
2510
2511        let (dest_length, dest) = scan_link_dest(underlying, ix, LINK_MAX_NESTED_PARENS)?;
2512        let dest = unescape(dest);
2513        ix += dest_length;
2514
2515        ix += scan_while(&underlying.as_bytes()[ix..], is_ascii_whitespace);
2516
2517        let title = if let Some((bytes_scanned, t)) = self.scan_link_title(underlying, ix, node) {
2518            ix += bytes_scanned;
2519            ix += scan_while(&underlying.as_bytes()[ix..], is_ascii_whitespace);
2520            t
2521        } else {
2522            "".into()
2523        };
2524        if scan_ch(&underlying.as_bytes()[ix..], b')') == 0 {
2525            return None;
2526        }
2527        ix += 1;
2528
2529        Some((ix, dest, title))
2530    }
2531
2532    // returns (bytes scanned, title cow)
2533    fn scan_link_title(
2534        &self,
2535        text: &'a str,
2536        start_ix: usize,
2537        node: TreePointer,
2538    ) -> Option<(usize, CowStr<'a>)> {
2539        let bytes = text.as_bytes();
2540        let open = match bytes.get(start_ix) {
2541            Some(b @ b'\'') | Some(b @ b'\"') | Some(b @ b'(') => *b,
2542            _ => return None,
2543        };
2544        let close = if open == b'(' { b')' } else { open };
2545
2546        let mut title = String::new();
2547        let mut mark = start_ix + 1;
2548        let mut i = start_ix + 1;
2549
2550        while i < bytes.len() {
2551            let c = bytes[i];
2552
2553            if c == close {
2554                let cow = if mark == 1 {
2555                    (i - start_ix + 1, text[mark..i].into())
2556                } else {
2557                    title.push_str(&text[mark..i]);
2558                    (i - start_ix + 1, title.into())
2559                };
2560
2561                return Some(cow);
2562            }
2563            if c == open {
2564                return None;
2565            }
2566
2567            if c == b'\n' || c == b'\r' {
2568                if let TreePointer::Valid(node_ix) = scan_nodes_to_ix(&self.tree, node, i + 1) {
2569                    if self.tree[node_ix].item.start > i {
2570                        title.push_str(&text[mark..i]);
2571                        title.push('\n');
2572                        i = self.tree[node_ix].item.start;
2573                        mark = i;
2574                        continue;
2575                    }
2576                }
2577            }
2578            if c == b'&' {
2579                if let (n, Some(value)) = scan_entity(&bytes[i..]) {
2580                    title.push_str(&text[mark..i]);
2581                    title.push_str(&value);
2582                    i += n;
2583                    mark = i;
2584                    continue;
2585                }
2586            }
2587            if c == b'\\' && i + 1 < bytes.len() && is_ascii_punctuation(bytes[i + 1]) {
2588                title.push_str(&text[mark..i]);
2589                i += 1;
2590                mark = i;
2591            }
2592
2593            i += 1;
2594        }
2595
2596        None
2597    }
2598
2599    /// Make a code span.
2600    ///
2601    /// Both `open` and `close` are matching MaybeCode items.
2602    fn make_code_span(&mut self, open: TreeIndex, close: TreeIndex, preceding_backslash: bool) {
2603        let first_ix = open + 1;
2604        let last_ix = close - 1;
2605        let bytes = self.text.as_bytes();
2606        let mut span_start = self.tree[open].item.end;
2607        let mut span_end = self.tree[close].item.start;
2608        let mut buf: Option<String> = None;
2609
2610        // detect all-space sequences, since they are kept as-is as of commonmark 0.29
2611        if !bytes[span_start..span_end].iter().all(|&b| b == b' ') {
2612            let opening = match bytes[span_start] {
2613                b' ' | b'\r' | b'\n' => true,
2614                _ => false,
2615            };
2616            let closing = match bytes[span_end - 1] {
2617                b' ' | b'\r' | b'\n' => true,
2618                _ => false,
2619            };
2620            let drop_enclosing_whitespace = opening && closing;
2621
2622            if drop_enclosing_whitespace {
2623                span_start += 1;
2624                if span_start < span_end {
2625                    span_end -= 1;
2626                }
2627            }
2628
2629            let mut ix = first_ix;
2630
2631            while ix < close {
2632                if let ItemBody::HardBreak | ItemBody::SoftBreak = self.tree[ix].item.body {
2633                    if drop_enclosing_whitespace {
2634                        // check whether break should be ignored
2635                        if ix == first_ix {
2636                            ix = ix + 1;
2637                            span_start = min(span_end, self.tree[ix].item.start);
2638                            continue;
2639                        } else if ix == last_ix && last_ix > first_ix {
2640                            ix = ix + 1;
2641                            continue;
2642                        }
2643                    }
2644
2645                    let end = bytes[self.tree[ix].item.start..]
2646                        .iter()
2647                        .position(|&b| b == b'\r' || b == b'\n')
2648                        .unwrap()
2649                        + self.tree[ix].item.start;
2650                    if let Some(ref mut buf) = buf {
2651                        buf.push_str(&self.text[self.tree[ix].item.start..end]);
2652                        buf.push(' ');
2653                    } else {
2654                        let mut new_buf = String::with_capacity(span_end - span_start);
2655                        new_buf.push_str(&self.text[span_start..end]);
2656                        new_buf.push(' ');
2657                        buf = Some(new_buf);
2658                    }
2659                } else if let Some(ref mut buf) = buf {
2660                    let end = if ix == last_ix {
2661                        span_end
2662                    } else {
2663                        self.tree[ix].item.end
2664                    };
2665                    buf.push_str(&self.text[self.tree[ix].item.start..end]);
2666                }
2667                ix = ix + 1;
2668            }
2669        }
2670
2671        let cow = if let Some(buf) = buf {
2672            buf.into()
2673        } else {
2674            self.text[span_start..span_end].into()
2675        };
2676        if preceding_backslash {
2677            self.tree[open].item.body = ItemBody::Text;
2678            self.tree[open].item.end = self.tree[open].item.start + 1;
2679            self.tree[open].next = TreePointer::Valid(close);
2680            self.tree[close].item.body = ItemBody::Code(self.allocs.allocate_cow(cow));
2681            self.tree[close].item.start = self.tree[open].item.start + 1;
2682        } else {
2683            self.tree[open].item.body = ItemBody::Code(self.allocs.allocate_cow(cow));
2684            self.tree[open].item.end = self.tree[close].item.end;
2685            self.tree[open].next = self.tree[close].next;
2686        }
2687    }
2688
2689    /// Make an inline math, the same as make_code_span.
2690    ///
2691    /// Both `open` and `close` are matching MaybeMath items.
2692    fn make_math_span(&mut self, open: TreeIndex, close: TreeIndex, preceding_backslash: bool) {
2693        let first_ix = open + 1;
2694        let last_ix = close - 1;
2695        let bytes = self.text.as_bytes();
2696        let mut span_start = self.tree[open].item.end;
2697        let mut span_end = self.tree[close].item.start;
2698        let mut buf: Option<String> = None;
2699
2700        if !bytes[span_start..span_end].iter().all(|&b| b == b' ') {
2701            let opening = match bytes[span_start] {
2702                b' ' | b'\r' | b'\n' => true,
2703                _ => false,
2704            };
2705            let closing = match bytes[span_end - 1] {
2706                b' ' | b'\r' | b'\n' => true,
2707                _ => false,
2708            };
2709            let drop_enclosing_whitespace = opening && closing;
2710
2711            if drop_enclosing_whitespace {
2712                span_start += 1;
2713                if span_start < span_end {
2714                    span_end -= 1;
2715                }
2716            }
2717
2718            let mut ix = first_ix;
2719
2720            while ix < close {
2721                if let ItemBody::HardBreak | ItemBody::SoftBreak = self.tree[ix].item.body {
2722                    if drop_enclosing_whitespace {
2723                        // check whether break should be ignored
2724                        if ix == first_ix {
2725                            ix = ix + 1;
2726                            span_start = min(span_end, self.tree[ix].item.start);
2727                            continue;
2728                        } else if ix == last_ix && last_ix > first_ix {
2729                            ix = ix + 1;
2730                            continue;
2731                        }
2732                    }
2733
2734                    let end = bytes[self.tree[ix].item.start..]
2735                        .iter()
2736                        .position(|&b| b == b'\r' || b == b'\n')
2737                        .unwrap()
2738                        + self.tree[ix].item.start;
2739                    if let Some(ref mut buf) = buf {
2740                        buf.push_str(&self.text[self.tree[ix].item.start..end]);
2741                        buf.push(' ');
2742                    } else {
2743                        let mut new_buf = String::with_capacity(span_end - span_start);
2744                        new_buf.push_str(&self.text[span_start..end]);
2745                        new_buf.push(' ');
2746                        buf = Some(new_buf);
2747                    }
2748                } else if let Some(ref mut buf) = buf {
2749                    let end = if ix == last_ix {
2750                        span_end
2751                    } else {
2752                        self.tree[ix].item.end
2753                    };
2754                    buf.push_str(&self.text[self.tree[ix].item.start..end]);
2755                }
2756                ix = ix + 1;
2757            }
2758        }
2759
2760        let cow = if let Some(buf) = buf {
2761            buf.into()
2762        } else {
2763            self.text[span_start..span_end].into()
2764        };
2765        if preceding_backslash {
2766            self.tree[open].item.body = ItemBody::Text;
2767            self.tree[open].item.end = self.tree[open].item.start + 1;
2768            self.tree[open].next = TreePointer::Valid(close);
2769            self.tree[close].item.body = ItemBody::Math(self.allocs.allocate_cow(cow));
2770            self.tree[close].item.start = self.tree[open].item.start + 1;
2771        } else {
2772            self.tree[open].item.body = ItemBody::Math(self.allocs.allocate_cow(cow));
2773            self.tree[open].item.end = self.tree[close].item.end;
2774            self.tree[open].next = self.tree[close].next;
2775        }
2776    }
2777
2778    /// Returns the next byte offset on success.
2779    fn scan_inline_html(&mut self, bytes: &[u8], ix: usize) -> Option<usize> {
2780        let c = *bytes.get(ix)?;
2781        if c == b'!' {
2782            scan_inline_html_comment(bytes, ix + 1, &mut self.html_scan_guard)
2783        } else if c == b'?' {
2784            scan_inline_html_processing(bytes, ix + 1, &mut self.html_scan_guard)
2785        } else {
2786            let i = scan_html_block_inner(
2787                &bytes[ix..],
2788                Some(&|_bytes| {
2789                    let mut line_start = LineStart::new(bytes);
2790                    let _ = scan_containers(&self.tree, &mut line_start);
2791                    line_start.bytes_scanned()
2792                }),
2793            )?;
2794            Some(i + ix)
2795        }
2796    }
2797
2798    /// Consumes the event iterator and produces an iterator that produces
2799    /// `(Event, Range)` pairs, where the `Range` value maps to the corresponding
2800    /// range in the markdown source.
2801    pub fn into_offset_iter(self) -> OffsetIter<'a> {
2802        OffsetIter { inner: self }
2803    }
2804}
2805
2806pub(crate) enum LoopInstruction<T> {
2807    /// Continue looking for more special bytes, but skip next few bytes.
2808    ContinueAndSkip(usize),
2809    /// Break looping immediately, returning with the given index and value.
2810    BreakAtWith(usize, T),
2811}
2812
2813/// This function walks the byte slices from the given index and
2814/// calls the callback function on all bytes (and their indices) that are in the following set:
2815/// `` ` ``, `\`, `&`, `*`, `_`, `~`, `!`, `<`, `[`, `]`, `|`, `\r`, `\n`, `$`
2816/// It is guaranteed not call the callback on other bytes.
2817/// Whenever `callback(ix, byte)` returns a `ContinueAndSkip(n)` value, the callback
2818/// will not be called with an index that is less than `ix + n + 1`.
2819/// When the callback returns a `BreakAtWith(end_ix, opt+val)`, no more callbacks will be
2820/// called and the function returns immediately with the return value `(end_ix, opt_val)`.
2821/// If `BreakAtWith(..)` is never returned, this function will return the first
2822/// index that is outside the byteslice bound and a `None` value.
2823fn iterate_special_bytes<F, T>(bytes: &[u8], ix: usize, callback: F) -> (usize, Option<T>)
2824where
2825    F: FnMut(usize, u8) -> LoopInstruction<Option<T>>,
2826{
2827    #[cfg(all(target_arch = "x86_64", feature = "simd"))]
2828    {
2829        crate::simd::iterate_special_bytes(bytes, ix, callback)
2830    }
2831    #[cfg(not(all(target_arch = "x86_64", feature = "simd")))]
2832    {
2833        scalar_iterate_special_bytes(bytes, ix, callback)
2834    }
2835}
2836
2837const fn special_bytes() -> [bool; 256] {
2838    let mut bytes = [false; 256];
2839    bytes[b'<' as usize] = true;
2840    bytes[b'!' as usize] = true;
2841    bytes[b'[' as usize] = true;
2842    bytes[b'~' as usize] = true;
2843    bytes[b'`' as usize] = true;
2844    bytes[b'|' as usize] = true;
2845    bytes[b'\\' as usize] = true;
2846    bytes[b'*' as usize] = true;
2847    bytes[b'_' as usize] = true;
2848    bytes[b'\r' as usize] = true;
2849    bytes[b'\n' as usize] = true;
2850    bytes[b']' as usize] = true;
2851    bytes[b'&' as usize] = true;
2852    bytes[b'$' as usize] = true;
2853    bytes
2854}
2855
2856pub(crate) fn scalar_iterate_special_bytes<F, T>(
2857    bytes: &[u8],
2858    mut ix: usize,
2859    mut callback: F,
2860) -> (usize, Option<T>)
2861where
2862    F: FnMut(usize, u8) -> LoopInstruction<Option<T>>,
2863{
2864    let special_bytes = special_bytes();
2865
2866    while ix < bytes.len() {
2867        let b = bytes[ix];
2868        if special_bytes[b as usize] {
2869            match callback(ix, b) {
2870                LoopInstruction::ContinueAndSkip(skip) => {
2871                    ix += skip;
2872                }
2873                LoopInstruction::BreakAtWith(ix, val) => {
2874                    return (ix, val);
2875                }
2876            }
2877        }
2878        ix += 1;
2879    }
2880
2881    (ix, None)
2882}
2883
2884/// Markdown event and source range iterator.
2885///
2886/// Generates tuples where the first element is the markdown event and the second
2887/// is a the corresponding range in the source string.
2888///
2889/// Constructed from a `Parser` using its
2890/// [`into_offset_iter`](struct.Parser.html#method.into_offset_iter) method.
2891pub struct OffsetIter<'a> {
2892    inner: Parser<'a>,
2893}
2894
2895impl<'a> Iterator for OffsetIter<'a> {
2896    type Item = (Event<'a>, Range<usize>);
2897
2898    fn next(&mut self) -> Option<Self::Item> {
2899        match self.inner.tree.cur() {
2900            TreePointer::Nil => {
2901                let ix = self.inner.tree.pop()?;
2902                let tag = item_to_tag(&self.inner.tree[ix].item, &self.inner.allocs);
2903                self.inner.tree.next_sibling(ix);
2904                Some((
2905                    Event::End(tag),
2906                    self.inner.tree[ix].item.start..self.inner.tree[ix].item.end,
2907                ))
2908            }
2909            TreePointer::Valid(cur_ix) => {
2910                if self.inner.tree[cur_ix].item.body.is_inline() {
2911                    self.inner.handle_inline();
2912                }
2913
2914                let node = self.inner.tree[cur_ix];
2915                let item = node.item;
2916                let event = item_to_event(item, self.inner.text, &self.inner.allocs);
2917                if let Event::Start(..) = event {
2918                    self.inner.tree.push();
2919                } else {
2920                    self.inner.tree.next_sibling(cur_ix);
2921                }
2922                Some((event, item.start..item.end))
2923            }
2924        }
2925    }
2926}
2927
2928fn item_to_tag<'a>(item: &Item, allocs: &Allocations<'a>) -> Tag<'a> {
2929    match item.body {
2930        ItemBody::Paragraph => Tag::Paragraph,
2931        ItemBody::Emphasis => Tag::Emphasis,
2932        ItemBody::Strong => Tag::Strong,
2933        ItemBody::Strikethrough => Tag::Strikethrough,
2934        ItemBody::Link(link_ix) => {
2935            let &(ref link_type, ref url, ref title) = allocs.index(link_ix);
2936            Tag::Link(*link_type, url.clone(), title.clone())
2937        }
2938        ItemBody::Image(link_ix) => {
2939            let &(ref link_type, ref url, ref title) = allocs.index(link_ix);
2940            Tag::Image(*link_type, url.clone(), title.clone())
2941        }
2942        ItemBody::Heading(level) => Tag::Heading(level),
2943        ItemBody::FencedCodeBlock(cow_ix) => {
2944            Tag::CodeBlock(CodeBlockKind::Fenced(allocs[cow_ix].clone()))
2945        }
2946        ItemBody::DisplayMath => Tag::DisplayMath,
2947        ItemBody::IndentCodeBlock => Tag::CodeBlock(CodeBlockKind::Indented),
2948        ItemBody::BlockQuote => Tag::BlockQuote,
2949        ItemBody::List(_, c, listitem_start) => {
2950            if c == b'.' || c == b')' {
2951                Tag::List(Some(listitem_start))
2952            } else {
2953                Tag::List(None)
2954            }
2955        }
2956        ItemBody::ListItem(_) => Tag::Item,
2957        ItemBody::TableHead => Tag::TableHead,
2958        ItemBody::TableCell => Tag::TableCell,
2959        ItemBody::TableRow => Tag::TableRow,
2960        ItemBody::Table(alignment_ix) => Tag::Table(allocs[alignment_ix].clone()),
2961        ItemBody::FootnoteDefinition(cow_ix) => Tag::FootnoteDefinition(allocs[cow_ix].clone()),
2962        _ => panic!("unexpected item body {:?}", item.body),
2963    }
2964}
2965
2966fn item_to_event<'a>(item: Item, text: &'a str, allocs: &Allocations<'a>) -> Event<'a> {
2967    let tag = match item.body {
2968        ItemBody::Text => return Event::Text(text[item.start..item.end].into()),
2969        ItemBody::Code(cow_ix) => return Event::Code(allocs[cow_ix].clone()),
2970        ItemBody::Math(cow_ix) => return Event::Math(allocs[cow_ix].clone()),
2971        ItemBody::SynthesizeText(cow_ix) => return Event::Text(allocs[cow_ix].clone()),
2972        ItemBody::Html => return Event::Html(text[item.start..item.end].into()),
2973        ItemBody::SoftBreak => return Event::SoftBreak,
2974        ItemBody::HardBreak => return Event::HardBreak,
2975        ItemBody::FootnoteReference(cow_ix) => {
2976            return Event::FootnoteReference(allocs[cow_ix].clone())
2977        }
2978        ItemBody::TaskListMarker(checked) => return Event::TaskListMarker(checked),
2979        ItemBody::Rule => return Event::Rule,
2980
2981        ItemBody::Frontmatter(cow_ix) => return Event::Frontmatter(allocs[cow_ix].clone()),
2982        ItemBody::Paragraph => Tag::Paragraph,
2983        ItemBody::Emphasis => Tag::Emphasis,
2984        ItemBody::Strong => Tag::Strong,
2985        ItemBody::Strikethrough => Tag::Strikethrough,
2986        ItemBody::Link(link_ix) => {
2987            let &(ref link_type, ref url, ref title) = allocs.index(link_ix);
2988            Tag::Link(*link_type, url.clone(), title.clone())
2989        }
2990        ItemBody::Image(link_ix) => {
2991            let &(ref link_type, ref url, ref title) = allocs.index(link_ix);
2992            Tag::Image(*link_type, url.clone(), title.clone())
2993        }
2994        ItemBody::Heading(level) => Tag::Heading(level),
2995        ItemBody::FencedCodeBlock(cow_ix) => {
2996            Tag::CodeBlock(CodeBlockKind::Fenced(allocs[cow_ix].clone()))
2997        }
2998        ItemBody::DisplayMath => Tag::DisplayMath,
2999        ItemBody::IndentCodeBlock => Tag::CodeBlock(CodeBlockKind::Indented),
3000        ItemBody::BlockQuote => Tag::BlockQuote,
3001        ItemBody::List(_, c, listitem_start) => {
3002            if c == b'.' || c == b')' {
3003                Tag::List(Some(listitem_start))
3004            } else {
3005                Tag::List(None)
3006            }
3007        }
3008        ItemBody::ListItem(_) => Tag::Item,
3009        ItemBody::TableHead => Tag::TableHead,
3010        ItemBody::TableCell => Tag::TableCell,
3011        ItemBody::TableRow => Tag::TableRow,
3012        ItemBody::Table(alignment_ix) => Tag::Table(allocs[alignment_ix].clone()),
3013        ItemBody::FootnoteDefinition(cow_ix) => Tag::FootnoteDefinition(allocs[cow_ix].clone()),
3014        _ => panic!("unexpected item body {:?}", item.body),
3015    };
3016
3017    Event::Start(tag)
3018}
3019
3020// https://english.stackexchange.com/a/285573
3021fn surgerize_tight_list(tree: &mut Tree<Item>, list_ix: TreeIndex) {
3022    let mut list_item = tree[list_ix].child;
3023    while let TreePointer::Valid(listitem_ix) = list_item {
3024        // first child is special, controls how we repoint list_item.child
3025        let list_item_firstborn = tree[listitem_ix].child;
3026
3027        // Check that list item has children - this is not necessarily the case!
3028        if let TreePointer::Valid(firstborn_ix) = list_item_firstborn {
3029            if let ItemBody::Paragraph = tree[firstborn_ix].item.body {
3030                tree[listitem_ix].child = tree[firstborn_ix].child;
3031            }
3032
3033            let mut list_item_child = TreePointer::Valid(firstborn_ix);
3034            let mut node_to_repoint = TreePointer::Nil;
3035            while let TreePointer::Valid(child_ix) = list_item_child {
3036                // surgerize paragraphs
3037                let repoint_ix = if let ItemBody::Paragraph = tree[child_ix].item.body {
3038                    if let TreePointer::Valid(child_firstborn) = tree[child_ix].child {
3039                        if let TreePointer::Valid(repoint_ix) = node_to_repoint {
3040                            tree[repoint_ix].next = TreePointer::Valid(child_firstborn);
3041                        }
3042                        let mut child_lastborn = child_firstborn;
3043                        while let TreePointer::Valid(lastborn_next_ix) = tree[child_lastborn].next {
3044                            child_lastborn = lastborn_next_ix;
3045                        }
3046                        child_lastborn
3047                    } else {
3048                        child_ix
3049                    }
3050                } else {
3051                    child_ix
3052                };
3053
3054                node_to_repoint = TreePointer::Valid(repoint_ix);
3055                tree[repoint_ix].next = tree[child_ix].next;
3056                list_item_child = tree[child_ix].next;
3057            }
3058        }
3059
3060        list_item = tree[listitem_ix].next;
3061    }
3062}
3063
3064impl<'a> Iterator for Parser<'a> {
3065    type Item = Event<'a>;
3066
3067    fn next(&mut self) -> Option<Event<'a>> {
3068        match self.tree.cur() {
3069            TreePointer::Nil => {
3070                let ix = self.tree.pop()?;
3071                let tag = item_to_tag(&self.tree[ix].item, &self.allocs);
3072                self.tree.next_sibling(ix);
3073                Some(Event::End(tag))
3074            }
3075            TreePointer::Valid(cur_ix) => {
3076                if self.tree[cur_ix].item.body.is_inline() {
3077                    self.handle_inline();
3078                }
3079
3080                let node = self.tree[cur_ix];
3081                let item = node.item;
3082                let event = item_to_event(item, self.text, &self.allocs);
3083                if let Event::Start(..) = event {
3084                    self.tree.push();
3085                } else {
3086                    self.tree.next_sibling(cur_ix);
3087                }
3088                Some(event)
3089            }
3090        }
3091    }
3092}
3093
3094#[cfg(test)]
3095mod test {
3096    use super::*;
3097    use crate::tree::Node;
3098
3099    // TODO: move these tests to tests/html.rs?
3100
3101    fn parser_with_extensions(text: &str) -> Parser<'_> {
3102        let mut opts = Options::empty();
3103        opts.insert(Options::ENABLE_TABLES);
3104        opts.insert(Options::ENABLE_FOOTNOTES);
3105        opts.insert(Options::ENABLE_STRIKETHROUGH);
3106        opts.insert(Options::ENABLE_TASKLISTS);
3107
3108        Parser::new_ext(text, opts)
3109    }
3110
3111    #[test]
3112    #[cfg(target_pointer_width = "64")]
3113    fn node_size() {
3114        let node_size = std::mem::size_of::<Node<Item>>();
3115        assert_eq!(48, node_size);
3116    }
3117
3118    #[test]
3119    #[cfg(target_pointer_width = "64")]
3120    fn body_size() {
3121        let body_size = std::mem::size_of::<ItemBody>();
3122        assert_eq!(16, body_size);
3123    }
3124
3125    #[test]
3126    fn single_open_fish_bracket() {
3127        // dont crash
3128        assert_eq!(3, Parser::new("<").count());
3129    }
3130
3131    #[test]
3132    fn lone_hashtag() {
3133        // dont crash
3134        assert_eq!(2, Parser::new("#").count());
3135    }
3136
3137    #[test]
3138    fn lots_of_backslashes() {
3139        // dont crash
3140        Parser::new("\\\\\r\r").count();
3141        Parser::new("\\\r\r\\.\\\\\r\r\\.\\").count();
3142    }
3143
3144    #[test]
3145    fn issue_320() {
3146        // dont crash
3147        parser_with_extensions(":\r\t> |\r:\r\t> |\r").count();
3148    }
3149
3150    #[test]
3151    fn issue_319() {
3152        // dont crash
3153        parser_with_extensions("|\r-]([^|\r-]([^").count();
3154        parser_with_extensions("|\r\r=][^|\r\r=][^car").count();
3155    }
3156
3157    #[test]
3158    fn issue_303() {
3159        // dont crash
3160        parser_with_extensions("[^\r\ra]").count();
3161        parser_with_extensions("\r\r]Z[^\x00\r\r]Z[^\x00").count();
3162    }
3163
3164    #[test]
3165    fn issue_313() {
3166        // dont crash
3167        parser_with_extensions("*]0[^\r\r*]0[^").count();
3168        parser_with_extensions("[^\r> `][^\r> `][^\r> `][").count();
3169    }
3170
3171    #[test]
3172    fn issue_311() {
3173        // dont crash
3174        parser_with_extensions("\\\u{0d}-\u{09}\\\u{0d}-\u{09}").count();
3175    }
3176
3177    #[test]
3178    fn issue_283() {
3179        let input = std::str::from_utf8(b"\xf0\x9b\xb2\x9f<td:^\xf0\x9b\xb2\x9f").unwrap();
3180        // dont crash
3181        parser_with_extensions(input).count();
3182    }
3183
3184    #[test]
3185    fn issue_289() {
3186        // dont crash
3187        parser_with_extensions("> - \\\n> - ").count();
3188        parser_with_extensions("- \n\n").count();
3189    }
3190
3191    #[test]
3192    fn issue_306() {
3193        // dont crash
3194        parser_with_extensions("*\r_<__*\r_<__*\r_<__*\r_<__").count();
3195    }
3196
3197    #[test]
3198    fn issue_305() {
3199        // dont crash
3200        parser_with_extensions("_6**6*_*").count();
3201    }
3202
3203    #[test]
3204    fn another_emphasis_panic() {
3205        parser_with_extensions("*__#_#__*").count();
3206    }
3207
3208    #[test]
3209    fn offset_iter() {
3210        let event_offsets: Vec<_> = Parser::new("*hello* world")
3211            .into_offset_iter()
3212            .map(|(_ev, range)| range)
3213            .collect();
3214        let expected_offsets = vec![(0..13), (0..7), (1..6), (0..7), (7..13), (0..13)];
3215        assert_eq!(expected_offsets, event_offsets);
3216    }
3217
3218    #[test]
3219    fn offset_iter_issue_378() {
3220        let event_offsets: Vec<_> = Parser::new("a [b](c) d")
3221            .into_offset_iter()
3222            .map(|(_ev, range)| range)
3223            .collect();
3224        let expected_offsets = vec![(0..10), (0..2), (2..8), (3..4), (2..8), (8..10), (0..10)];
3225        assert_eq!(expected_offsets, event_offsets);
3226    }
3227
3228    #[test]
3229    fn offset_iter_issue_404() {
3230        let event_offsets: Vec<_> = Parser::new("###\n")
3231            .into_offset_iter()
3232            .map(|(_ev, range)| range)
3233            .collect();
3234        let expected_offsets = vec![(0..4), (0..4)];
3235        assert_eq!(expected_offsets, event_offsets);
3236    }
3237
3238    // FIXME: add this one regression suite
3239    #[test]
3240    fn link_def_at_eof() {
3241        let test_str = "[My site][world]\n\n[world]: https://vincentprouillet.com";
3242        let expected = "<p><a href=\"https://vincentprouillet.com\">My site</a></p>\n";
3243
3244        let mut buf = String::new();
3245        crate::html::push_html(&mut buf, Parser::new(test_str));
3246        assert_eq!(expected, buf);
3247    }
3248
3249    #[test]
3250    fn ref_def_at_eof() {
3251        let test_str = "[test]:\\";
3252        let expected = "";
3253
3254        let mut buf = String::new();
3255        crate::html::push_html(&mut buf, Parser::new(test_str));
3256        assert_eq!(expected, buf);
3257    }
3258
3259    #[test]
3260    fn ref_def_cr_lf() {
3261        let test_str = "[a]: /u\r\n\n[a]";
3262        let expected = "<p><a href=\"/u\">a</a></p>\n";
3263
3264        let mut buf = String::new();
3265        crate::html::push_html(&mut buf, Parser::new(test_str));
3266        assert_eq!(expected, buf);
3267    }
3268
3269    #[test]
3270    fn no_dest_refdef() {
3271        let test_str = "[a]:";
3272        let expected = "<p>[a]:</p>\n";
3273
3274        let mut buf = String::new();
3275        crate::html::push_html(&mut buf, Parser::new(test_str));
3276        assert_eq!(expected, buf);
3277    }
3278
3279    #[test]
3280    fn simple_broken_link_callback() {
3281        let test_str = "This is a link w/o def: [hello][world]";
3282        let parser = Parser::new_with_broken_link_callback(
3283            test_str,
3284            Options::empty(),
3285            Some(&|norm, raw| {
3286                assert_eq!("world", raw);
3287                assert_eq!("world", norm);
3288                Some(("YOLO".to_owned(), "SWAG".to_owned()))
3289            }),
3290        );
3291        let mut link_tag_count = 0;
3292        for (typ, url, title) in parser.filter_map(|event| match event {
3293            Event::Start(tag) | Event::End(tag) => match tag {
3294                Tag::Link(typ, url, title) => Some((typ, url, title)),
3295                _ => None,
3296            },
3297            _ => None,
3298        }) {
3299            link_tag_count += 1;
3300            assert_eq!(typ, LinkType::ReferenceUnknown);
3301            assert_eq!(url.as_ref(), "YOLO");
3302            assert_eq!(title.as_ref(), "SWAG");
3303        }
3304        assert!(link_tag_count > 0);
3305    }
3306
3307    #[test]
3308    fn code_block_kind_check_fenced() {
3309        let parser = Parser::new("hello\n```test\ntadam\n```");
3310        let mut found = 0;
3311        for (ev, _range) in parser.into_offset_iter() {
3312            match ev {
3313                Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(syntax))) => {
3314                    assert_eq!(syntax.as_ref(), "test");
3315                    found += 1;
3316                }
3317                _ => {}
3318            }
3319        }
3320        assert_eq!(found, 1);
3321    }
3322
3323    #[test]
3324    fn code_block_kind_check_indented() {
3325        let parser = Parser::new("hello\n\n    ```test\n    tadam\nhello");
3326        let mut found = 0;
3327        for (ev, _range) in parser.into_offset_iter() {
3328            match ev {
3329                Event::Start(Tag::CodeBlock(CodeBlockKind::Indented)) => {
3330                    found += 1;
3331                }
3332                _ => {}
3333            }
3334        }
3335        assert_eq!(found, 1);
3336    }
3337}