pulldown_cmark_fork/
parse.rs

1// Copyright 2017 Google Inc. All rights reserved.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19// THE SOFTWARE.
20
21//! Tree-based two pass parser.
22
23use std::collections::{VecDeque, HashMap};
24use std::ops::{Index, Range};
25use std::cmp::min;
26
27use unicase::UniCase;
28
29use crate::strings::CowStr;
30use crate::scanners::*;
31use crate::tree::{TreePointer, TreeIndex, Tree};
32use crate::linklabel::{scan_link_label, scan_link_label_rest, LinkLabel, ReferenceLabel};
33
34#[derive(Clone, Debug, PartialEq)]
35pub enum Tag<'a> {
36    // block-level tags
37    Paragraph,
38    Rule,
39
40    /// A heading. The field indicates the level of the heading.
41    Header(i32),
42
43    BlockQuote,
44    CodeBlock(CowStr<'a>),
45
46    /// A list. If the list is ordered the field indicates the number of the first item.
47    List(Option<usize>),  // TODO: add delim and tight for ast (not needed for html)
48    Item,
49    FootnoteDefinition(CowStr<'a>),
50    HtmlBlock,
51
52    // tables
53    Table(Vec<Alignment>),
54    TableHead,
55    TableRow,
56    TableCell,
57
58    // span-level tags
59    Emphasis,
60    Strong,
61    Strikethrough,
62
63    /// A link. The first field is the link type, the second the destination URL and the third is a title
64    Link(LinkType, CowStr<'a>, CowStr<'a>),
65
66    /// An image. The first field is the link type, the second the destination URL and the third is a title
67    Image(LinkType, CowStr<'a>, CowStr<'a>),
68}
69
70#[derive(Clone, Debug, PartialEq, Copy)]
71pub enum LinkType {
72    /// Inline link like `[foo](bar)`
73    Inline,
74    /// Reference link like `[foo][bar]`
75    Reference,
76    /// Reference without destination in the document, but resolved by the broken_link_callback
77    ReferenceUnknown,
78    /// Collapsed link like `[foo][]`
79    Collapsed,
80    /// Collapsed link without destination in the document, but resolved by the broken_link_callback
81    CollapsedUnknown,
82    /// Shortcut link like `[foo]`
83    Shortcut,
84    /// Shortcut without destination in the document, but resolved by the broken_link_callback
85    ShortcutUnknown,
86    /// Autolink like `<http://foo.bar/baz>`
87    Autolink,
88    /// Email address in autolink like `<john@example.org>`
89    Email,
90}
91
92impl LinkType {
93    fn to_unknown(self) -> Self {
94        match self {
95            LinkType::Reference => LinkType::ReferenceUnknown,
96            LinkType::Collapsed => LinkType::CollapsedUnknown,
97            LinkType::Shortcut => LinkType::ShortcutUnknown,
98            _ => unreachable!(),
99        }
100    }
101}
102
103#[derive(Clone, Debug, PartialEq)]
104pub enum Event<'a> {
105    Start(Tag<'a>),
106    End(Tag<'a>),
107    Text(CowStr<'a>),
108    Code(CowStr<'a>),
109    Html(CowStr<'a>),
110    InlineHtml(CowStr<'a>),
111    FootnoteReference(CowStr<'a>),
112    SoftBreak,
113    HardBreak,
114    /// A task list marker, rendered as a checkbox in HTML. Contains a true when it is checked
115    TaskListMarker(bool),
116}
117
118#[derive(Copy, Clone, Debug, PartialEq)]
119pub enum Alignment {
120    None,
121    Left,
122    Center,
123    Right,
124}
125
126bitflags! {
127    pub struct Options: u32 {
128        const FIRST_PASS = 1 << 0;
129        const ENABLE_TABLES = 1 << 1;
130        const ENABLE_FOOTNOTES = 1 << 2;
131        const ENABLE_STRIKETHROUGH = 1 << 3;
132        const ENABLE_TASKLISTS = 1 << 4;
133    }
134}
135
136#[derive(Debug, Default, Clone, Copy)]
137struct Item {
138    start: usize,
139    end: usize,
140    body: ItemBody,
141}
142
143#[derive(Debug, PartialEq, Clone, Copy)]
144enum ItemBody {
145    Paragraph,
146    Text,
147    SoftBreak,
148    HardBreak,
149
150    // These are possible inline items, need to be resolved in second pass.
151
152    // repeats, can_open, can_close
153    MaybeEmphasis(usize, bool, bool),
154    MaybeCode(usize, bool), // number of backticks, preceeded by backslash
155    MaybeHtml,
156    MaybeLinkOpen,
157    MaybeLinkClose,
158    MaybeImage,
159
160    // These are inline items after resolution.
161    Emphasis,
162    Strong,
163    Strikethrough,
164    Code(CowIndex),
165    InlineHtml,
166    Link(LinkIndex),
167    Image(LinkIndex),
168    FootnoteReference(CowIndex), 
169    TaskListMarker(bool), // true for checked
170
171    Rule,
172    Header(i32), // header level
173    FencedCodeBlock(CowIndex),
174    IndentCodeBlock,
175    HtmlBlock(Option<u32>), // end tag, or none for type 6
176    Html,
177    BlockQuote,
178    List(bool, u8, usize), // is_tight, list character, list start index
179    ListItem(usize), // indent level
180    SynthesizeText(CowIndex),
181    FootnoteDefinition(CowIndex),
182
183    // Tables
184    Table(AlignmentIndex),
185    TableHead,
186    TableRow,
187    TableCell,
188
189    // Dummy node at the top of the tree - should not be used otherwise!
190    Root,
191}
192
193impl<'a> ItemBody {
194    fn is_inline(&self) -> bool {
195        match *self {
196            ItemBody::MaybeEmphasis(..) | ItemBody::MaybeHtml | ItemBody::MaybeCode(..)
197            | ItemBody::MaybeLinkOpen | ItemBody::MaybeLinkClose | ItemBody::MaybeImage => true,
198            _ => false,
199        }
200    }
201}
202
203impl<'a> Default for ItemBody {
204    fn default() -> Self {
205        ItemBody::Root
206    }
207}
208
209/// Scanning modes for `Parser`'s `parse_line` method.
210#[derive(PartialEq, Eq, Copy, Clone)]
211enum TableParseMode {
212    /// Inside a paragraph, scanning for table headers.
213    Scan,
214    /// Inside a table.
215    Active,
216    /// Inside a paragraph, not scanning for table headers.
217    Disabled,
218}
219
220/// State for the first parsing pass.
221///
222/// The first pass resolves all block structure, generating an AST. Within a block, items
223/// are in a linear chain with potential inline markup identified.
224struct FirstPass<'a> {
225    text: &'a str,
226    tree: Tree<Item>,
227    begin_list_item: bool,
228    last_line_blank: bool,
229    allocs: Allocations<'a>,
230    options: Options,
231    list_nesting: usize,
232}
233
234impl<'a> FirstPass<'a> {
235    fn new(text: &'a str, options: Options) -> FirstPass {
236        // This is a very naive heuristic for the number of nodes
237        // we'll need.
238        let start_capacity = std::cmp::max(128, text.len() / 32);
239        let tree = Tree::with_capacity(start_capacity);
240        let begin_list_item = false;
241        let last_line_blank = false;
242        let allocs = Allocations::new();
243        FirstPass {
244            text, tree, begin_list_item, last_line_blank,
245            allocs, options, list_nesting: 0
246        }
247    }
248
249    fn run(mut self) -> (Tree<Item>, Allocations<'a>) {
250        let mut ix = 0;
251        while ix < self.text.len() {
252            ix = self.parse_block(ix);
253        }
254        for _ in 0..self.tree.spine_len() {
255            self.pop(ix);
256        }
257        (self.tree, self.allocs)
258    }
259
260    /// Returns offset after block.
261    fn parse_block(&mut self, mut start_ix: usize) -> usize {
262        let bytes = self.text.as_bytes();
263        let mut line_start = LineStart::new(&bytes[start_ix..]);
264
265        let i = self.scan_containers(&mut line_start);
266        for _ in i..self.tree.spine_len() {
267            self.pop(start_ix);
268        }
269        
270        if self.options.contains(Options::ENABLE_FOOTNOTES) {
271            // finish footnote if it's still open and was preceeded by blank line
272            if let Some(node_ix) = self.tree.peek_up() {
273                if let ItemBody::FootnoteDefinition(..) = self.tree[node_ix].item.body {
274                    if self.last_line_blank {
275                        self.pop(start_ix);
276                    }
277                }
278            }
279
280            // Footnote definitions of the form
281            // [^bar]:
282            // * anything really
283            let container_start = start_ix + line_start.bytes_scanned();
284            if let Some(bytecount) = self.parse_footnote(container_start) {
285                start_ix = container_start + bytecount;
286                start_ix += scan_blank_line(&bytes[start_ix..]).unwrap_or(0);
287                line_start = LineStart::new(&bytes[start_ix..]);      
288            }
289        }
290
291        // Process new containers
292        loop {
293            let container_start = start_ix + line_start.bytes_scanned();
294            if let Some((ch, index, indent)) = line_start.scan_list_marker() {
295                let after_marker_index = start_ix + line_start.bytes_scanned();
296                self.continue_list(container_start, ch, index);
297                self.tree.append(Item {
298                    start: container_start,
299                    end: after_marker_index, // will get updated later if item not empty
300                    body: ItemBody::ListItem(indent),
301                });
302                self.tree.push();
303                if let Some(n) = scan_blank_line(&bytes[after_marker_index..]) {
304                    self.begin_list_item = true;
305                    return after_marker_index + n;
306                }
307                if self.options.contains(Options::ENABLE_TASKLISTS) {
308                    if let Some(is_checked) = line_start.scan_task_list_marker() {
309                        self.tree.append(Item {
310                            start: after_marker_index,
311                            end: start_ix + line_start.bytes_scanned(),
312                            body: ItemBody::TaskListMarker(is_checked),
313                        });
314                    }
315                }
316            } else if line_start.scan_blockquote_marker() {
317                self.finish_list(start_ix);
318                self.tree.append(Item {
319                    start: container_start,
320                    end: 0, // will get set later
321                    body: ItemBody::BlockQuote,
322                });
323                self.tree.push();
324            } else {
325                break;
326            }
327        }
328
329        let ix = start_ix + line_start.bytes_scanned();
330
331        if let Some(n) = scan_blank_line(&bytes[ix..]) {
332            if let Some(node_ix) = self.tree.peek_up() {
333                match self.tree[node_ix].item.body {
334                    ItemBody::BlockQuote => (),
335                    _ => {
336                        if self.begin_list_item {
337                            // A list item can begin with at most one blank line.
338                            self.pop(start_ix);
339                        }
340                        self.last_line_blank = true;
341                    }
342                }
343            }
344            return ix + n;
345        }
346
347        self.begin_list_item = false;
348        self.finish_list(start_ix);
349
350        // Save `remaining_space` here to avoid needing to backtrack `line_start` for HTML blocks
351        let remaining_space = line_start.remaining_space();
352
353        let indent = line_start.scan_space_upto(4);
354        if indent == 4 {
355            let ix = start_ix + line_start.bytes_scanned();
356            let remaining_space = line_start.remaining_space();
357            return self.parse_indented_code_block(ix, remaining_space);
358        }
359
360        let ix = start_ix + line_start.bytes_scanned();
361
362        // HTML Blocks
363        if self.text.as_bytes()[ix] == b'<' {
364            // Types 1-5 are all detected by one function and all end with the same
365            // pattern
366            if let Some(html_end_tag_ix) = get_html_end_tag(&bytes[(ix + 1)..]) {
367                return self.parse_html_block_type_1_to_5(ix, html_end_tag_ix, remaining_space);
368            }
369
370            // Detect type 6
371            let possible_tag = scan_html_block_tag(&bytes[(ix + 1)..]).1;
372            if is_html_tag(possible_tag) {
373                return self.parse_html_block_type_6_or_7(ix, remaining_space);
374            }
375
376            // Detect type 7
377            if let Some(_html_bytes) = scan_html_type_7(&bytes[(ix + 1)..]) {
378                return self.parse_html_block_type_6_or_7(ix, remaining_space);
379            }
380        }
381
382        if let Ok(n) = scan_hrule(&bytes[ix..]) {
383            return self.parse_hrule(n, ix);
384        }
385
386        if let Some((atx_size, atx_level)) = scan_atx_heading(&bytes[ix..]) {
387            return self.parse_atx_heading(ix, atx_level, atx_size);
388        }
389
390        // parse refdef
391        if let Some((bytecount, label, link_def)) = self.parse_refdef_total(ix) {
392            self.allocs.refdefs.entry(label).or_insert(link_def);
393            return ix + bytecount;
394        }
395
396        if let Some((n, fence_ch)) = scan_code_fence(&bytes[ix..]) {
397            return self.parse_fenced_code_block(ix, indent, fence_ch, n);
398        }
399        self.parse_paragraph(ix)
400    }
401
402    /// Returns the offset of the first line after the table.
403    /// Assumptions: current focus is a table element and the table header
404    /// matches the separator line (same number of columns).
405    fn parse_table(&mut self, table_cols: usize, head_start: usize, body_start: usize) -> usize {
406        // parse header. this shouldn't fail because we made sure the table header is ok
407        let (_sep_start, thead_ix) = self.parse_table_row_inner(head_start, table_cols);
408        self.tree[thead_ix].item.body = ItemBody::TableHead;
409
410        // parse body
411        let mut ix = body_start;
412        while let Some((next_ix, _row_ix)) = self.parse_table_row(ix, table_cols) {
413            ix = next_ix;
414        }
415
416        self.pop(ix);
417        ix
418    }
419
420    /// Call this when containers are taken care of.
421    /// Returns bytes scanned, row_ix
422    fn parse_table_row_inner(&mut self, mut ix: usize, row_cells: usize) -> (usize, TreeIndex) {
423        let bytes = self.text.as_bytes();
424        let mut cells = 0;
425        let mut final_cell_ix = None;
426
427        let row_ix = self.tree.append(Item {
428            start: ix,
429            end: 0, // set at end of this function
430            body: ItemBody::TableRow,
431        });
432        self.tree.push();
433
434        loop {
435            ix += scan_ch(&bytes[ix..], b'|');
436            ix += scan_whitespace_no_nl(&bytes[ix..]);
437            
438            if let Some(eol_bytes) = scan_eol(&bytes[ix..]) {
439                ix += eol_bytes;
440                break;
441            }
442
443            let cell_ix = self.tree.append(Item {
444                start: ix,
445                end: ix,
446                body: ItemBody::TableCell,
447            });
448            self.tree.push();
449            let (next_ix, _brk) = self.parse_line(ix, TableParseMode::Active);
450            let trailing_whitespace = scan_rev_while(&bytes[..next_ix], is_ascii_whitespace);
451
452            if let TreePointer::Valid(cur_ix) = self.tree.cur() {
453                self.tree[cur_ix].item.end -= trailing_whitespace;
454            }
455            
456            self.tree[cell_ix].item.end = next_ix - trailing_whitespace;
457            self.tree.pop();
458
459            ix = next_ix;
460            cells += 1;
461
462            if cells == row_cells {
463                final_cell_ix = Some(cell_ix);
464            }
465        }
466
467        // fill empty cells if needed
468        // note: this is where GFM and commonmark-extra diverge. we follow
469        // GFM here
470        for _ in cells..row_cells {
471            self.tree.append(Item {
472                start: ix,
473                end: ix,
474                body: ItemBody::TableCell,
475            });
476        }
477
478        // drop excess cells
479        if let Some(cell_ix) = final_cell_ix {
480            self.tree[cell_ix].next = TreePointer::Nil;
481        }
482
483        self.pop(ix);
484
485        (ix, row_ix)
486    }
487
488    /// Returns first offset after the row and the tree index of the row.
489    fn parse_table_row(&mut self, mut ix: usize, row_cells: usize) -> Option<(usize, TreeIndex)> {
490        let bytes = self.text.as_bytes();
491        let mut line_start = LineStart::new(&bytes[ix..]);
492        let containers = self.scan_containers(&mut line_start);
493        if containers != self.tree.spine_len() {
494            return None;
495        }
496        line_start.scan_all_space();
497        ix += line_start.bytes_scanned();
498        if scan_paragraph_interrupt(&bytes[ix..]) {
499            return None;
500        }
501
502        let (ix, row_ix) = self.parse_table_row_inner(ix, row_cells);
503        Some((ix, row_ix))
504    }
505
506    /// Returns offset of line start after paragraph.
507    fn parse_paragraph(&mut self, start_ix: usize) -> usize {
508        let node_ix = self.tree.append(Item {
509            start: start_ix,
510            end: 0,  // will get set later
511            body: ItemBody::Paragraph,
512        });
513        self.tree.push();
514        let bytes = self.text.as_bytes();
515
516        let mut ix = start_ix;
517        loop {
518            let scan_mode = if self.options.contains(Options::ENABLE_TABLES) && ix == start_ix {
519                TableParseMode::Scan
520            } else {
521                TableParseMode::Disabled
522            };
523            let (next_ix, brk) = self.parse_line(ix, scan_mode);
524            
525            // break out when we find a table
526            if let Some(Item { body: ItemBody::Table(alignment_ix), start, end }) = brk {
527                let table_cols = self.allocs[alignment_ix].len();
528                self.tree[node_ix].item = Item { body: ItemBody::Table(alignment_ix), start, end };
529                // this clears out any stuff we may have appended - but there may
530                // be a cleaner way
531                self.tree[node_ix].child = TreePointer::Nil;
532                self.tree.pop();
533                self.tree.push();
534                return self.parse_table(table_cols, ix, next_ix);
535            }
536
537            ix = next_ix;
538            let mut line_start = LineStart::new(&bytes[ix..]);
539            let n_containers = self.scan_containers(&mut line_start);
540            if !line_start.scan_space(4) {
541                let ix_new = ix + line_start.bytes_scanned();
542                if n_containers == self.tree.spine_len() {
543                    if let Some((n, level)) = scan_setext_heading(&bytes[ix_new..]) {
544                        self.tree[node_ix].item.body = ItemBody::Header(level);
545                        if let Some(Item { start, body: ItemBody::HardBreak, .. }) = brk {
546                            if bytes[start] == b'\\' {
547                                self.tree.append_text(start, start + 1);
548                            }
549                        }
550                        ix = ix_new + n;
551                        break;
552                    }
553                }
554                // first check for non-empty lists, then for other interrupts    
555                let suffix = &bytes[ix_new..];
556                if self.interrupt_paragraph_by_list(suffix) || scan_paragraph_interrupt(suffix)  {
557                    break;
558                }
559            }
560            line_start.scan_all_space();
561            ix = next_ix + line_start.bytes_scanned();
562            if let Some(item) = brk {
563                self.tree.append(item);
564            }
565        }
566
567        self.pop(ix);
568        ix
569    }
570
571    /// Parse a line of input, appending text and items to tree.
572    ///
573    /// Returns: index after line and an item representing the break.
574    fn parse_line(&mut self, start: usize, mode: TableParseMode) -> (usize, Option<Item>) {
575        let bytes = &self.text.as_bytes();
576        let mut pipes = 0;
577        let mut last_pipe_ix = start;
578        let mut begin_text = start;
579
580        let (final_ix, brk) = iterate_special_bytes(bytes, start, |ix, byte| {
581            match byte {
582                b'\n' | b'\r' => {
583                    if let TableParseMode::Active = mode {
584                        return LoopInstruction::BreakAtWith(ix, None);
585                    }
586
587                    let mut i = ix;
588                    let eol_bytes = scan_eol(&bytes[ix..]).unwrap();
589                    if mode == TableParseMode::Scan && pipes > 0 {
590                        // check if we may be parsing a table
591                        let next_line_ix = ix + eol_bytes;
592                        let mut line_start = LineStart::new(&bytes[next_line_ix..]);
593                        if self.scan_containers(&mut line_start) == self.tree.spine_len() {
594                            let table_head_ix = next_line_ix + line_start.bytes_scanned();
595                            let (table_head_bytes, alignment) = scan_table_head(&bytes[table_head_ix..]);
596
597                            if table_head_bytes > 0 {
598                                // computing header count from number of pipes
599                                let header_count = count_header_cols(bytes, pipes, start, last_pipe_ix);
600
601                                // make sure they match the number of columns we find in separator line
602                                if alignment.len() == header_count {
603                                    let alignment_ix = self.allocs.allocate_alignment(alignment);
604                                    let end_ix = table_head_ix + table_head_bytes;
605                                    return LoopInstruction::BreakAtWith(end_ix, Some(Item {
606                                        start: i,
607                                        end: end_ix, // must update later
608                                        body: ItemBody::Table(alignment_ix),
609                                    }));
610                                }
611                            }
612                        }
613                    }
614
615                    let end_ix = ix + eol_bytes;
616                    let trailing_backslashes = scan_rev_while(&bytes[..ix], |b| b == b'\\');
617                    if trailing_backslashes % 2 == 1 && end_ix < self.text.len() {
618                        i -= 1;
619                        self.tree.append_text(begin_text, i);
620                        return LoopInstruction::BreakAtWith(end_ix, Some(Item {
621                            start: i,
622                            end: end_ix,
623                            body: ItemBody::HardBreak,
624                        }));
625                    }
626                    let trailing_whitespace = scan_rev_while(&bytes[..ix], is_ascii_whitespace_no_nl);
627                    if trailing_whitespace >= 2 {
628                        i -= trailing_whitespace;
629                        self.tree.append_text(begin_text, i);
630                        return LoopInstruction::BreakAtWith(end_ix, Some(Item {
631                            start: i,
632                            end: end_ix,
633                            body: ItemBody::HardBreak,
634                        }));
635                    }
636                        
637                    self.tree.append_text(begin_text, ix);
638                    LoopInstruction::BreakAtWith(end_ix, Some(Item {
639                        start: i,
640                        end: end_ix,
641                        body: ItemBody::SoftBreak,
642                    }))
643                }
644                b'\\' => {
645                    if ix + 1 < self.text.len() && is_ascii_punctuation(bytes[ix + 1]) {
646                        self.tree.append_text(begin_text, ix);
647                        if bytes[ix + 1] == b'`' {
648                            let count = 1 + scan_ch_repeat(&bytes[(ix + 2)..], b'`');
649                            self.tree.append(Item {
650                                start: ix + 1,
651                                end: ix + count + 1,
652                                body: ItemBody::MaybeCode(count, true),
653                            });
654                            begin_text = ix + 1 + count;
655                            LoopInstruction::ContinueAndSkip(count)
656                        } else {
657                            begin_text = ix + 1;
658                            LoopInstruction::ContinueAndSkip(1)
659                        }
660                    } else {
661                        LoopInstruction::ContinueAndSkip(0)
662                    }
663                }
664                c @ b'*' | c @ b'_' | c @ b'~' => {
665                    let string_suffix = &self.text[ix..];
666                    let count = 1 + scan_ch_repeat(&string_suffix.as_bytes()[1..], c);
667                    let can_open = delim_run_can_open(&self.text, string_suffix, count, ix);
668                    let can_close = delim_run_can_close(&self.text, string_suffix, count, ix);
669                    let is_valid_seq = c != b'~' || count == 2 && self.options.contains(Options::ENABLE_STRIKETHROUGH);
670
671                    if (can_open || can_close) && is_valid_seq {
672                        self.tree.append_text(begin_text, ix);
673                        for i in 0..count {
674                            self.tree.append(Item {
675                                start: ix + i,
676                                end: ix + i + 1,
677                                body: ItemBody::MaybeEmphasis(count - i, can_open, can_close),
678                            });
679                        }
680                        begin_text = ix + count;
681                    }
682                    LoopInstruction::ContinueAndSkip(count - 1)
683                }
684                b'`' => {
685                    self.tree.append_text(begin_text, ix);
686                    let count = 1 + scan_ch_repeat(&bytes[(ix + 1)..], b'`');
687                    self.tree.append(Item {
688                        start: ix,
689                        end: ix + count,
690                        body: ItemBody::MaybeCode(count, false),
691                    });
692                    begin_text = ix + count;
693                    LoopInstruction::ContinueAndSkip(count - 1)
694                }
695                b'<' => {
696                    // Note: could detect some non-HTML cases and early escape here, but not
697                    // clear that's a win.
698                    self.tree.append_text(begin_text, ix);
699                    self.tree.append(Item {
700                        start: ix,
701                        end: ix + 1,
702                        body: ItemBody::MaybeHtml,
703                    });
704                    begin_text = ix + 1;
705                    LoopInstruction::ContinueAndSkip(0)
706                }
707                b'!' => {
708                    if ix + 1 < self.text.len() && bytes[ix + 1] == b'[' {
709                        self.tree.append_text(begin_text, ix);
710                        self.tree.append(Item {
711                            start: ix,
712                            end: ix + 2,
713                            body: ItemBody::MaybeImage,
714                        });
715                        begin_text = ix + 2;
716                        LoopInstruction::ContinueAndSkip(1)
717                    } else {
718                        LoopInstruction::ContinueAndSkip(0)
719                    }
720                }
721                b'[' => {
722                    self.tree.append_text(begin_text, ix);
723                    self.tree.append(Item {
724                        start: ix,
725                        end: ix + 1,
726                        body: ItemBody::MaybeLinkOpen,
727                    });
728                    begin_text = ix + 1;
729                    LoopInstruction::ContinueAndSkip(0)
730                }
731                b']' => {
732                    self.tree.append_text(begin_text, ix);
733                    self.tree.append(Item {
734                        start: ix,
735                        end: ix + 1,
736                        body: ItemBody::MaybeLinkClose,
737                    });
738                    begin_text = ix + 1;
739                    LoopInstruction::ContinueAndSkip(0)
740                }
741                b'&' => {
742                    match scan_entity(&bytes[ix..]) {
743                        (n, Some(value)) => {
744                            self.tree.append_text(begin_text, ix);
745                            self.tree.append(Item {
746                                start: ix,
747                                end: ix + n,
748                                body: ItemBody::SynthesizeText(self.allocs.allocate_cow(value)),
749                            });
750                            begin_text = ix + n;
751                            LoopInstruction::ContinueAndSkip(n - 1)
752                        }
753                        _ => LoopInstruction::ContinueAndSkip(0),
754                    }
755                }
756                b'|' => {
757                    if let TableParseMode::Active = mode {
758                        LoopInstruction::BreakAtWith(ix, None)
759                    } else {
760                        last_pipe_ix = ix;
761                        pipes += 1;
762                        LoopInstruction::ContinueAndSkip(0)
763                    }
764                }
765                _ => LoopInstruction::ContinueAndSkip(0),
766            }
767        });
768
769        if brk.is_none() {
770            // need to close text at eof
771            self.tree.append_text(begin_text, final_ix);
772        }
773        (final_ix, brk)
774    }
775
776    /// Check whether we should allow a paragraph interrupt by lists. Only non-empty
777    /// lists are allowed.
778    fn interrupt_paragraph_by_list(&self, suffix: &[u8]) -> bool {
779        scan_listitem(suffix).map_or(false, |(ix, delim, index, _)| {
780            self.list_nesting > 0 ||
781            // we don't allow interruption by either empty lists or
782            // numbered lists starting at an index other than 1
783            !scan_empty_list(&suffix[ix..]) && (delim == b'*' || delim == b'-' || index == 1)
784        })
785    }
786
787    /// When start_ix is at the beginning of an HTML block of type 1 to 5,
788    /// this will find the end of the block, adding the block itself to the
789    /// tree and also keeping track of the lines of HTML within the block.
790    ///
791    /// The html_end_tag is the tag that must be found on a line to end the block.
792    fn parse_html_block_type_1_to_5(&mut self, start_ix: usize, html_end_tag_ix: u32,
793            mut remaining_space: usize) -> usize
794    {
795        self.tree.append(Item {
796            start: start_ix,
797            end: 0, // set later
798            body: ItemBody::HtmlBlock(Some(html_end_tag_ix)),
799        });
800        self.tree.push();
801
802        let bytes = self.text.as_bytes();
803        let mut ix = start_ix;
804        let end_ix;
805        loop {
806            let line_start_ix = ix;
807            ix += scan_nextline(&bytes[ix..]);
808            self.append_html_line(remaining_space, line_start_ix, ix);
809
810            let mut line_start = LineStart::new(&bytes[ix..]);
811            let n_containers = self.scan_containers(&mut line_start);
812            if n_containers < self.tree.spine_len() {
813                end_ix = ix;
814                break;
815            }
816
817            let html_end_tag = HTML_END_TAGS[html_end_tag_ix as usize];
818
819            if (&self.text[line_start_ix..ix]).contains(html_end_tag) {
820                end_ix = ix;
821                break;
822            }
823
824            let next_line_ix = ix + line_start.bytes_scanned();
825            if next_line_ix == self.text.len() {
826                end_ix = next_line_ix;
827                break;
828            }
829            ix = next_line_ix;
830            remaining_space = line_start.remaining_space();
831        }
832        self.pop(end_ix);
833        ix
834    }
835
836    /// When start_ix is at the beginning of an HTML block of type 6 or 7,
837    /// this will consume lines until there is a blank line and keep track of
838    /// the HTML within the block.
839    fn parse_html_block_type_6_or_7(&mut self, start_ix: usize, mut remaining_space: usize)
840        -> usize
841    {
842        self.tree.append(Item {
843            start: start_ix,
844            end: 0, // set later
845            body: ItemBody::HtmlBlock(None)
846        });
847        self.tree.push();
848
849        let bytes = self.text.as_bytes();
850        let mut ix = start_ix;
851        let end_ix;
852        loop {
853            let line_start_ix = ix;
854            ix += scan_nextline(&bytes[ix..]);
855            self.append_html_line(remaining_space, line_start_ix, ix);
856
857            let mut line_start = LineStart::new(&bytes[ix..]);
858            let n_containers = self.scan_containers(&mut line_start);
859            if n_containers < self.tree.spine_len() || line_start.is_at_eol()
860            {
861                end_ix = ix;
862                break;
863            }
864
865            let next_line_ix = ix + line_start.bytes_scanned();
866            if next_line_ix == self.text.len()
867                || scan_blank_line(&bytes[next_line_ix..]).is_some()
868            {
869                end_ix = next_line_ix;
870                break;
871            }
872            ix = next_line_ix;
873            remaining_space = line_start.remaining_space();
874        }
875        self.pop(end_ix);
876        ix
877    }
878
879    fn parse_indented_code_block(&mut self, start_ix: usize, mut remaining_space: usize)
880        -> usize
881    {
882        self.tree.append(Item {
883            start: start_ix,
884            end: 0,  // will get set later
885            body: ItemBody::IndentCodeBlock,
886        });
887        self.tree.push();
888        let bytes = self.text.as_bytes();
889        let mut last_nonblank_child = TreePointer::Nil;
890        let mut last_nonblank_ix = 0;
891        let mut end_ix = 0;
892        let mut last_line_blank = false;
893
894        let mut ix = start_ix;
895        loop {
896            let line_start_ix = ix;
897            ix += scan_nextline(&bytes[ix..]);
898            self.append_code_text(remaining_space, line_start_ix, ix);
899            // TODO(spec clarification): should we synthesize newline at EOF?
900
901            if !last_line_blank {
902                last_nonblank_child = self.tree.cur();
903                last_nonblank_ix = ix;
904                end_ix = ix;
905            }
906
907            let mut line_start = LineStart::new(&bytes[ix..]);
908            let n_containers = self.scan_containers(&mut line_start);
909            if n_containers < self.tree.spine_len()
910                || !(line_start.scan_space(4) || line_start.is_at_eol())
911            {
912                break;
913            }
914            let next_line_ix = ix + line_start.bytes_scanned();
915            if next_line_ix == self.text.len() {
916                break;
917            }
918            ix = next_line_ix;
919            remaining_space = line_start.remaining_space();
920            last_line_blank = scan_blank_line(&bytes[ix..]).is_some();
921        }
922
923        // Trim trailing blank lines.
924        if let TreePointer::Valid(child) = last_nonblank_child {
925            self.tree[child].next = TreePointer::Nil;
926            self.tree[child].item.end = last_nonblank_ix;
927        }
928        self.pop(end_ix);
929        ix
930    }
931
932    fn parse_fenced_code_block(&mut self, start_ix: usize, indent: usize,
933        fence_ch: u8, n_fence_char: usize) -> usize
934    {
935        let bytes = self.text.as_bytes();
936        let mut info_start = start_ix + n_fence_char;
937        info_start += scan_whitespace_no_nl(&bytes[info_start..]);
938        // TODO: info strings are typically very short. wouldnt it be faster
939        // to just do a forward scan here?
940        let mut ix = info_start + scan_nextline(&bytes[info_start..]);
941        let info_end = ix - scan_rev_while(&bytes[info_start..ix], is_ascii_whitespace);
942        let info_string = unescape(&self.text[info_start..info_end]);
943        self.tree.append(Item {
944            start: start_ix,
945            end: 0,  // will get set later
946            body: ItemBody::FencedCodeBlock(self.allocs.allocate_cow(info_string)),
947        });
948        self.tree.push();
949        loop {
950            let mut line_start = LineStart::new(&bytes[ix..]);
951            let n_containers = self.scan_containers(&mut line_start);
952            if n_containers < self.tree.spine_len() {
953                break;
954            }
955            line_start.scan_space(indent);
956            let mut close_line_start = line_start.clone();
957            if !close_line_start.scan_space(4) {
958                let close_ix = ix + close_line_start.bytes_scanned();
959                if let Some(n) =
960                    scan_closing_code_fence(&bytes[close_ix..], fence_ch, n_fence_char)
961                {
962                    ix = close_ix + n;
963                    break;
964                }
965            }
966            let remaining_space = line_start.remaining_space();
967            ix += line_start.bytes_scanned();
968            let next_ix = ix + scan_nextline(&bytes[ix..]);
969            self.append_code_text(remaining_space, ix, next_ix);
970            ix = next_ix;
971        }
972
973        self.pop(ix);
974
975        // try to read trailing whitespace or it will register as a completely blank line
976        ix + scan_blank_line(&bytes[ix..]).unwrap_or(0)
977    }
978
979    fn append_code_text(&mut self, remaining_space: usize, start: usize, end: usize) {
980        if remaining_space > 0 {
981            let cow_ix = self.allocs.allocate_cow("   "[..remaining_space].into());
982            self.tree.append(Item {
983                start,
984                end: start,
985                body: ItemBody::SynthesizeText(cow_ix),
986            });
987        }
988        if self.text.as_bytes()[end - 2] == b'\r' {
989            // Normalize CRLF to LF
990            self.tree.append_text(start, end - 2);
991            self.tree.append_text(end - 1, end);
992        } else {
993            self.tree.append_text(start, end);
994        }
995    }
996
997    /// Appends a line of HTML to the tree.
998    fn append_html_line(&mut self, remaining_space: usize, start: usize, end: usize) {
999        if remaining_space > 0 {
1000            let cow_ix = self.allocs.allocate_cow("   "[..remaining_space].into());
1001            self.tree.append(Item {
1002                start,
1003                end: start,
1004                // TODO: maybe this should synthesize to html rather than text?
1005                body: ItemBody::SynthesizeText(cow_ix),
1006            });
1007        }
1008        if self.text.as_bytes()[end - 2] == b'\r' {
1009            // Normalize CRLF to LF
1010            self.tree.append(Item {
1011                start,
1012                end: end - 2,
1013                body: ItemBody::Html,
1014            });
1015            self.tree.append(Item {
1016                start: end - 1,
1017                end,
1018                body: ItemBody::Html,
1019            });
1020        } else {
1021            self.tree.append(Item {
1022                start,
1023                end,
1024                body: ItemBody::Html,
1025            });
1026        }
1027    }
1028
1029    /// Returns number of containers scanned.
1030    fn scan_containers(&self, line_start: &mut LineStart) -> usize {
1031        let mut i = 0;
1032        for &node_ix in self.tree.walk_spine() {
1033            match self.tree[node_ix].item.body {
1034                ItemBody::BlockQuote => {
1035                    let save = line_start.clone();
1036                    if !line_start.scan_blockquote_marker() {
1037                        *line_start = save;
1038                        break;
1039                    }
1040                }
1041                ItemBody::ListItem(indent) => {
1042                    if !line_start.is_at_eol() {
1043                        let save = line_start.clone();
1044                        if !line_start.scan_space(indent){
1045                            *line_start = save;
1046                            break;
1047                        }
1048                    }
1049                }
1050                _ => (),
1051            }
1052            i += 1;
1053        }
1054        i
1055    }
1056
1057    /// Pop a container, setting its end.
1058    fn pop(&mut self, ix: usize) {
1059        let cur_ix = self.tree.pop().unwrap();
1060        self.tree[cur_ix].item.end = ix;
1061        if let ItemBody::List(true, _, _) = self.tree[cur_ix].item.body {
1062            surgerize_tight_list(&mut self.tree, cur_ix);
1063        }
1064    }
1065
1066    /// Close a list if it's open. Also set loose if last line was blank
1067    fn finish_list(&mut self, ix: usize) {
1068        if let Some(node_ix) = self.tree.peek_up() {
1069            if let ItemBody::List(_, _, _) = self.tree[node_ix].item.body {
1070                self.pop(ix);
1071                self.list_nesting -= 1; 
1072            }
1073        }
1074        if self.last_line_blank {
1075            if let Some(node_ix) = self.tree.peek_grandparent() {
1076                if let ItemBody::List(ref mut is_tight, _, _) =
1077                    self.tree[node_ix].item.body
1078                {
1079                    *is_tight = false;
1080                }
1081            }
1082            self.last_line_blank = false;
1083        }
1084    }
1085
1086    /// Continue an existing list or start a new one if there's not an open
1087    /// list that matches.
1088    fn continue_list(&mut self, start: usize, ch: u8, index: usize) {
1089        if let Some(node_ix) = self.tree.peek_up() {
1090            if let ItemBody::List(ref mut is_tight, existing_ch, _) =
1091                self.tree[node_ix].item.body
1092            {
1093                if existing_ch == ch {
1094                    if self.last_line_blank {
1095                        *is_tight = false;
1096                        self.last_line_blank = false;
1097                    }
1098                    return;
1099                }
1100            }
1101            // TODO: this is not the best choice for end; maybe get end from last list item.
1102            self.finish_list(start);
1103        }
1104        self.tree.append(Item {
1105            start,
1106            end: 0,  // will get set later
1107            body: ItemBody::List(true, ch, index),
1108        });
1109        self.list_nesting += 1;
1110        self.tree.push();
1111        self.last_line_blank = false;
1112    }
1113
1114    /// Parse a thematic break.
1115    ///
1116    /// Returns index of start of next line.
1117    fn parse_hrule(&mut self, hrule_size: usize, ix: usize) -> usize {
1118        self.tree.append(Item {
1119            start: ix,
1120            end: ix + hrule_size,
1121            body: ItemBody::Rule,
1122        });
1123        ix + hrule_size
1124    }
1125
1126    /// Parse an ATX heading.
1127    ///
1128    /// Returns index of start of next line.
1129    fn parse_atx_heading(&mut self, mut ix: usize, atx_level: i32, atx_size: usize) -> usize {
1130        self.tree.append(Item {
1131            start: ix,
1132            end: 0, // set later
1133            body: ItemBody::Header(atx_level),
1134        });
1135        ix += atx_size;
1136        // next char is space or scan_eol
1137        // (guaranteed by scan_atx_heading)
1138        let bytes = self.text.as_bytes();
1139        if let Some(eol_bytes) = scan_eol(&bytes[ix..]) {
1140            return ix + eol_bytes;
1141        }
1142        // skip leading spaces
1143        let skip_spaces = scan_whitespace_no_nl(&bytes[ix..]);
1144        ix += skip_spaces;
1145
1146        // now handle the header text
1147        let header_start = ix;
1148        let header_node_idx = self.tree.push(); // so that we can set the endpoint later
1149        ix = self.parse_line(ix, TableParseMode::Disabled).0;
1150        self.tree[header_node_idx].item.end = ix;
1151
1152        // remove trailing matter from header text
1153        if let TreePointer::Valid(cur_ix) = self.tree.cur() {
1154            let header_text = &bytes[header_start..ix];
1155            let mut limit = header_text.iter()
1156                .rposition(|&b| !(b == b'\n' || b == b'\r' || b == b' '))
1157                .map_or(0, |i| i + 1);
1158            let closer = header_text[..limit].iter()
1159                .rposition(|&b| b != b'#')
1160                .map_or(0, |i| i + 1);
1161            if closer == 0 {
1162                limit = closer;
1163            } else {
1164                let spaces = scan_rev_while(&header_text[..closer], |b| b == b' ');
1165                if spaces > 0 {
1166                    limit = closer - spaces;
1167                }
1168            }
1169            self.tree[cur_ix].item.end = limit + header_start;
1170        }
1171
1172        self.tree.pop();
1173        ix
1174    }
1175
1176    /// Returns the number of bytes scanned on success.
1177    fn parse_footnote(&mut self, start: usize) -> Option<usize> {
1178        let bytes = &self.text.as_bytes()[start..];
1179        if !bytes.starts_with(b"[^") {
1180            return None;
1181        }
1182        let (mut i, label) = scan_link_label_rest(&self.text[(start + 2)..])?;
1183        i += 2;
1184        if scan_ch(&bytes[i..], b':') == 0 {
1185            return None;
1186        }
1187        i += 1;
1188        self.tree.append(Item {
1189            start,
1190            end: 0,  // will get set later
1191            body: ItemBody::FootnoteDefinition(self.allocs.allocate_cow(label)), // TODO: check whether the label here is strictly necessary
1192        });
1193        self.tree.push();
1194        Some(i)
1195    }
1196
1197    /// Returns number of bytes scanned, label and definition on success.
1198    fn parse_refdef_total(&mut self, start: usize) -> Option<(usize, LinkLabel<'a>, LinkDef<'a>)> {
1199        let bytes = &self.text.as_bytes()[start..];
1200        if scan_ch(bytes, b'[') == 0 {
1201            return None;
1202        }
1203        let (mut i, label) = scan_link_label_rest(&self.text[(start + 1)..])?;
1204        i += 1;
1205        if scan_ch(&bytes[i..], b':') == 0 {
1206            return None;
1207        }
1208        i += 1;
1209        let (bytecount, link_def) = self.scan_refdef(start + i)?;
1210        Some((bytecount + i, UniCase::new(label), link_def))
1211    }
1212
1213    /// Returns number of bytes and number of newlines 
1214    fn scan_refdef_space(&self, bytes: &[u8], mut i: usize) -> Option<(usize, usize)> {
1215        let mut newlines = 0;
1216        loop {
1217            let whitespaces = scan_whitespace_no_nl(&bytes[i..]);
1218            i += whitespaces;
1219            if let Some(eol_bytes) = scan_eol(&bytes[i..]) {
1220                i += eol_bytes;
1221                newlines += 1;
1222                if newlines > 1 {
1223                    return None;
1224                }
1225            } else {
1226                break;
1227            }
1228            let mut line_start = LineStart::new(&bytes[i..]);
1229            if self.tree.spine_len() != self.scan_containers(&mut line_start) {
1230                return None;
1231            }
1232            i += line_start.bytes_scanned();
1233        }
1234        Some((i, newlines))
1235    }
1236
1237    /// Returns # of bytes and definition.
1238    /// Assumes the label of the reference including colon has already been scanned.
1239    fn scan_refdef(&self, start: usize) -> Option<(usize, LinkDef<'a>)> {
1240        let bytes = self.text.as_bytes();
1241
1242        // whitespace between label and url (including up to one newline)
1243        let (mut i, _newlines) = self.scan_refdef_space(&bytes, start)?;
1244
1245        // scan link dest
1246        let (dest_length, dest) = scan_link_dest(&self.text, i, 1)?;
1247        if dest_length == 0 {
1248            return None;
1249        }
1250        let dest = unescape(dest);
1251        i += dest_length;
1252
1253        // no title
1254        let mut backup = (i - start, LinkDef { dest, title: None });
1255
1256        // scan whitespace between dest and label
1257        let (mut i, newlines) = if let Some((new_i, mut newlines)) = self.scan_refdef_space(&bytes, i) {
1258            if i == self.text.len() {
1259                newlines += 1;
1260            }
1261            if new_i == i && newlines == 0 {
1262                return None;
1263            }
1264            if newlines > 1 {
1265                return Some(backup);
1266            };
1267            (new_i, newlines)
1268        } else {
1269            return Some(backup);
1270        };
1271
1272        // scan title
1273        // if this fails but newline == 1, return also a refdef without title
1274        if let Some((title_length, title)) = scan_refdef_title(&self.text[i..]) {
1275            i += title_length;
1276            backup.1.title = Some(unescape(title));
1277        } else if newlines > 0 {
1278            return Some(backup);
1279        } else {
1280            return None;
1281        };
1282
1283        // scan EOL
1284        if let Some(bytes) = scan_blank_line(&bytes[i..]) {
1285            backup.0 = i + bytes - start;
1286            Some(backup)
1287        } else if newlines > 0 {
1288            Some(backup)
1289        } else {
1290            None
1291        }
1292    }
1293}
1294
1295/// Computes the number of header columns in a table line by computing the number of dividing pipes
1296/// that aren't followed or preceeded by whitespace.
1297fn count_header_cols(bytes: &[u8], mut pipes: usize, mut start: usize, last_pipe_ix: usize) -> usize {
1298    // was first pipe preceeded by whitespace? if so, subtract one
1299    start += scan_whitespace_no_nl(&bytes[start..]);
1300    if bytes[start] == b'|' {
1301        pipes -= 1;
1302    }
1303
1304    // was last pipe followed by whitespace? if so, sub one
1305    if scan_blank_line(&bytes[(last_pipe_ix + 1)..]).is_some() {
1306        pipes
1307    } else {
1308        pipes + 1
1309    }
1310}
1311
1312impl<'a> Tree<Item> {
1313    fn append_text(&mut self, start: usize, end: usize) {
1314        if end > start {
1315            if let TreePointer::Valid(ix) = self.cur() {
1316                if ItemBody::Text == self[ix].item.body && self[ix].item.end == start {
1317                    self[ix].item.end = end;
1318                    return;
1319                }
1320            }
1321            self.append(Item {
1322                start,
1323                end,
1324                body: ItemBody::Text,
1325            });
1326        }
1327    }
1328}
1329
1330/// Determines whether the delimiter run starting at given index is
1331/// left-flanking, as defined by the commonmark spec (and isn't intraword
1332/// for _ delims).
1333/// suffix is &s[ix..], which is passed in as an optimization, since taking
1334/// a string subslice is O(n).
1335fn delim_run_can_open(s: &str, suffix: &str, run_len: usize, ix: usize) -> bool {
1336    let next_char = if let Some(c) = suffix.chars().nth(run_len) {
1337        c
1338    } else {
1339        return false;
1340    };
1341    if next_char.is_whitespace() {
1342        return false;
1343    }
1344    if ix == 0 {
1345        return true;
1346    }
1347    let delim = suffix.chars().next().unwrap();
1348    if delim == '*' && !is_punctuation(next_char) {
1349        return true;
1350    }
1351
1352    let prev_char = s[..ix].chars().last().unwrap();
1353
1354    prev_char.is_whitespace() || is_punctuation(prev_char)
1355}
1356
1357/// Determines whether the delimiter run starting at given index is
1358/// left-flanking, as defined by the commonmark spec (and isn't intraword
1359/// for _ delims)
1360fn delim_run_can_close(s: &str, suffix: &str, run_len: usize, ix: usize) -> bool {
1361    if ix == 0 {
1362        return false;
1363    }
1364    let prev_char = s[..ix].chars().last().unwrap();
1365    if prev_char.is_whitespace() {
1366        return false;
1367    }
1368    let next_char = if let Some(c) = suffix.chars().nth(run_len) {
1369        c
1370    } else {
1371        return true;
1372    };
1373    let delim = suffix.chars().next().unwrap();
1374    if delim == '*' && !is_punctuation(prev_char) {
1375        return true;
1376    }
1377
1378    next_char.is_whitespace() || is_punctuation(next_char)
1379}
1380
1381/// Checks whether we should break a paragraph on the given input.
1382/// Note: lists are dealt with in `interrupt_paragraph_by_list`, because determing
1383/// whether to break on a list requires additional context.
1384fn scan_paragraph_interrupt(bytes: &[u8]) -> bool {
1385    if scan_eol(bytes).is_some() ||
1386        scan_hrule(bytes).is_ok() ||
1387        scan_atx_heading(bytes).is_some() ||
1388        scan_code_fence(bytes).is_some() ||
1389        scan_blockquote_start(bytes).is_some()
1390    {
1391        return true;
1392    }
1393    bytes.starts_with(b"<") &&
1394        (get_html_end_tag(&bytes[1..]).is_some() || is_html_tag(scan_html_block_tag(&bytes[1..]).1))
1395}
1396
1397static HTML_END_TAGS: &[&str; 7] = &["</pre>", "</style>", "</script>", "-->", "?>", "]]>", ">"];
1398
1399/// Returns an index into HTML_END_TAGS.
1400/// Assumes `text_bytes` is preceded by `<`.
1401fn get_html_end_tag(text_bytes : &[u8]) -> Option<u32> {
1402    static BEGIN_TAGS: &[&[u8]; 3] = &[b"pre", b"style", b"script"];
1403    static ST_BEGIN_TAGS: &[&[u8]; 3] = &[b"!--", b"?", b"![CDATA["];
1404
1405    for (beg_tag, end_tag_ix) in BEGIN_TAGS.iter().zip(0..3) {
1406        let tag_len = beg_tag.len();
1407
1408        if text_bytes.len() < tag_len {
1409            // begin tags are increasing in size
1410            break;
1411        }
1412
1413        if !text_bytes[..tag_len].eq_ignore_ascii_case(beg_tag) {
1414            continue;
1415        }
1416
1417        // Must either be the end of the line...
1418        if text_bytes.len() == tag_len {
1419            return Some(end_tag_ix);
1420        }
1421
1422        // ...or be followed by whitespace, newline, or '>'.
1423        let s = text_bytes[tag_len];
1424        if is_ascii_whitespace(s) || s == b'>' {
1425            return Some(end_tag_ix);
1426        }
1427    }
1428
1429    for (beg_tag, end_tag_ix) in ST_BEGIN_TAGS.iter().zip(3..6) {
1430        if text_bytes.starts_with(beg_tag) {
1431            return Some(end_tag_ix);
1432        }
1433    }
1434
1435    if text_bytes.len() > 1 && text_bytes[0] == b'!' 
1436        && text_bytes[1] >= b'A' && text_bytes[1] <= b'Z' {
1437        Some(6)
1438    } else {
1439        None
1440    }
1441}
1442
1443#[derive(Copy, Clone, Debug)]
1444struct InlineEl {
1445    start: TreeIndex,  // offset of tree node
1446    count: usize,
1447    c: u8,  // b'*' or b'_'
1448    both: bool,  // can both open and close
1449}
1450
1451#[derive(Debug, Clone, Default)]
1452struct InlineStack {
1453    stack: Vec<InlineEl>,
1454    // Lower bounds for matching indices in the stack. For example
1455    // a strikethrough delimiter will never match with any element
1456    // in the stack with index smaller than
1457    // `lower_bounds[InlineStack::TILDES]`.
1458    lower_bounds: [usize; 7],
1459}
1460
1461impl InlineStack {
1462    /// These are indices into the lower bounds array.
1463    /// Not both refers to the property that the delimiter can not both
1464    /// be opener as a closer.
1465    const UNDERSCORE_NOT_BOTH: usize = 0;
1466    const ASTERISK_NOT_BOTH: usize = 1;
1467    const ASTERISK_BASE: usize = 2;
1468    const TILDES: usize = 5;
1469    const UNDERSCORE_BOTH: usize = 6;
1470
1471    fn pop_all<'a>(&mut self, tree: &mut Tree<Item>) {
1472        for el in self.stack.drain(..) {
1473            for i in 0..el.count {
1474                tree[el.start + i].item.body = ItemBody::Text;
1475            }
1476        }
1477        self.lower_bounds = [0; 7];
1478    }
1479
1480    fn get_lowerbound(&self, c: u8, count: usize, both: bool) -> usize {
1481        if c == b'_' {
1482            if both {
1483                self.lower_bounds[InlineStack::UNDERSCORE_BOTH]
1484            } else {
1485                self.lower_bounds[InlineStack::UNDERSCORE_NOT_BOTH]
1486            }
1487        } else if c == b'*' {
1488            let mod3_lower = self.lower_bounds[InlineStack::ASTERISK_BASE + count % 3];
1489            if both {
1490                mod3_lower
1491            } else {
1492                min(mod3_lower, self.lower_bounds[InlineStack::ASTERISK_NOT_BOTH])
1493            }
1494        } else {
1495            self.lower_bounds[InlineStack::TILDES]
1496        }
1497    }
1498
1499    fn set_lowerbound(&mut self, c: u8, count: usize, both: bool, new_bound: usize) {
1500        if c == b'_' {
1501            if both {
1502                self.lower_bounds[InlineStack::UNDERSCORE_BOTH] = new_bound;
1503            } else {
1504                self.lower_bounds[InlineStack::UNDERSCORE_NOT_BOTH] = new_bound;
1505            }
1506        } else if c == b'*' {
1507            self.lower_bounds[InlineStack::ASTERISK_BASE + count % 3] = new_bound;
1508            if !both {
1509                self.lower_bounds[InlineStack::ASTERISK_NOT_BOTH] = new_bound;
1510            }
1511        } else {
1512            self.lower_bounds[InlineStack::TILDES] = new_bound;
1513        }
1514    }
1515
1516    fn find_match<'a>(&mut self, tree: &mut Tree<Item>, c: u8, count: usize, both: bool)
1517        -> Option<InlineEl>
1518    {
1519        let lowerbound = min(self.stack.len(), self.get_lowerbound(c, count, both));
1520        let res = self.stack[lowerbound..]
1521            .iter()
1522            .cloned()
1523            .enumerate()
1524            .rfind(|(_, el)| {
1525                el.c == c && (!both && !el.both || (count + el.count) % 3 != 0 || count % 3 == 0)
1526            });
1527
1528        if let Some((matching_ix, matching_el)) = res {
1529            let matching_ix = matching_ix + lowerbound;
1530            for el in &self.stack[(matching_ix + 1)..] {
1531                for i in 0..el.count {
1532                    tree[el.start + i].item.body = ItemBody::Text;
1533                }
1534            }
1535            self.stack.truncate(matching_ix);
1536            Some(matching_el)
1537        } else {
1538            self.set_lowerbound(c, count, both, self.stack.len());
1539            None
1540        }
1541    }
1542
1543    fn push(&mut self, el: InlineEl) {
1544        self.stack.push(el)
1545    }
1546}
1547
1548#[derive(Debug, Clone)]
1549enum RefScan<'a> {
1550    // label, next node index
1551    LinkLabel(CowStr<'a>, TreePointer),
1552    // contains next node index
1553    Collapsed(TreePointer),
1554    Failed,
1555}
1556
1557fn scan_nodes_to_ix(tree: &Tree<Item>, mut node: TreePointer, ix: usize) -> TreePointer {
1558    while let TreePointer::Valid(node_ix) = node {
1559        if tree[node_ix].item.end <= ix {
1560            node = tree[node_ix].next;
1561        } else {
1562            break;
1563        }
1564    }
1565    node
1566}
1567
1568fn scan_reference<'a, 'b>(tree: &'a Tree<Item>, text: &'b str, cur: TreePointer) -> RefScan<'b> {
1569    let cur_ix = match cur {
1570        TreePointer::Nil => return RefScan::Failed,
1571        TreePointer::Valid(cur_ix) => cur_ix,
1572    };
1573    let start = tree[cur_ix].item.start;
1574    let tail = &text.as_bytes()[start..];
1575    
1576    if tail.starts_with(b"[]") {
1577        let closing_node = tree[cur_ix].next.unwrap();
1578        RefScan::Collapsed(tree[closing_node].next)
1579    } else if let Some((ix, ReferenceLabel::Link(label))) = scan_link_label(&text[start..]) {
1580        let next_node = scan_nodes_to_ix(tree, cur, start + ix);
1581        RefScan::LinkLabel(label, next_node)
1582    } else {
1583        RefScan::Failed
1584    }
1585}
1586
1587#[derive(Clone, Default)]
1588struct LinkStack {
1589    inner: Vec<LinkStackEl>,
1590    disabled_ix: usize,
1591}
1592
1593impl LinkStack {
1594    fn push(&mut self, el: LinkStackEl) {
1595        self.inner.push(el);
1596    }
1597
1598    fn pop(&mut self) -> Option<LinkStackEl> {
1599        let el = self.inner.pop();
1600        self.disabled_ix = std::cmp::min(self.disabled_ix, self.inner.len());
1601        el
1602    }
1603
1604    fn clear(&mut self) {
1605        self.inner.clear();
1606        self.disabled_ix = 0;
1607    }
1608
1609    fn disable_all_links(&mut self) {
1610        for el in &mut self.inner[self.disabled_ix..] {
1611            if el.ty == LinkStackTy::Link {
1612                el.ty = LinkStackTy::Disabled;
1613            }
1614        }
1615        self.disabled_ix = self.inner.len();
1616    }
1617}
1618
1619#[derive(Clone, Debug)]
1620struct LinkStackEl {
1621    node: TreeIndex,
1622    ty: LinkStackTy,
1623}
1624
1625#[derive(PartialEq, Clone, Debug)]
1626enum LinkStackTy {
1627    Link,
1628    Image,
1629    Disabled,
1630}
1631
1632#[derive(Clone)]
1633struct LinkDef<'a> {
1634    dest: CowStr<'a>,
1635    title: Option<CowStr<'a>>,
1636}
1637
1638/// Tracks tree indices of code span delimiters of each length. It should prevent
1639/// quadratic scanning behaviours by providing (amortized) constant time lookups.
1640struct CodeDelims {
1641    inner: HashMap<usize, VecDeque<TreeIndex>>,
1642    seen_first: bool,
1643}
1644
1645impl CodeDelims {
1646    fn new() -> Self {
1647        Self {
1648            inner: Default::default(),
1649            seen_first: false,
1650        }
1651    }
1652
1653    fn insert(&mut self, count: usize, ix: TreeIndex) {
1654        if self.seen_first {
1655            self.inner.entry(count).or_insert_with(Default::default).push_back(ix);
1656        } else {
1657            // Skip the first insert, since that delimiter will always
1658            // be an opener and not a closer.
1659            self.seen_first = true;
1660        }        
1661    }
1662
1663    fn is_populated(&self) -> bool {
1664        !self.inner.is_empty()
1665    }
1666
1667    fn find(&mut self, open_ix: TreeIndex, count: usize) -> Option<TreeIndex> {
1668        while let Some(ix) = self.inner.get_mut(&count)?.pop_front() {
1669            if ix > open_ix {
1670                return Some(ix);
1671            }
1672        }
1673        None
1674    }
1675
1676    fn clear(&mut self) {
1677        self.inner.clear();
1678        self.seen_first = false;
1679    }
1680}
1681
1682#[derive(Copy, Clone, PartialEq, Eq, Debug)]
1683struct LinkIndex(usize);
1684
1685#[derive(Copy, Clone, PartialEq, Eq, Debug)]
1686struct CowIndex(usize);
1687
1688#[derive(Copy, Clone, PartialEq, Eq, Debug)]
1689struct AlignmentIndex(usize);
1690
1691#[derive(Clone)]
1692struct Allocations<'a> {
1693    refdefs: HashMap<LinkLabel<'a>, LinkDef<'a>>,
1694    links: Vec<(LinkType, CowStr<'a>, CowStr<'a>)>,
1695    cows: Vec<CowStr<'a>>,
1696    alignments: Vec<Vec<Alignment>>,
1697}
1698
1699impl<'a> Allocations<'a> {
1700    fn new() -> Self {
1701        Self {
1702            refdefs: HashMap::new(),
1703            links: Vec::with_capacity(128),
1704            cows: Vec::new(),
1705            alignments: Vec::new(),
1706        }
1707    }
1708
1709    fn allocate_cow(&mut self, cow: CowStr<'a>) -> CowIndex {
1710        let ix = self.cows.len();
1711        self.cows.push(cow);
1712        CowIndex(ix)
1713    }
1714
1715    fn allocate_link(&mut self, ty: LinkType, url: CowStr<'a>, title: CowStr<'a>) -> LinkIndex {
1716        let ix = self.links.len();
1717        self.links.push((ty, url, title));
1718        LinkIndex(ix)
1719    }
1720
1721    fn allocate_alignment(&mut self, alignment: Vec<Alignment>) -> AlignmentIndex {
1722        let ix = self.alignments.len();
1723        self.alignments.push(alignment);
1724        AlignmentIndex(ix)
1725    }
1726}
1727
1728impl<'a> Index<CowIndex> for Allocations<'a> {
1729    type Output = CowStr<'a>;
1730
1731    fn index(&self, ix: CowIndex) -> &Self::Output {
1732        self.cows.index(ix.0)
1733    }
1734}
1735
1736impl<'a> Index<LinkIndex> for Allocations<'a> {
1737    type Output = (LinkType, CowStr<'a>, CowStr<'a>);
1738
1739    fn index(&self, ix: LinkIndex) -> &Self::Output {
1740        self.links.index(ix.0)
1741    }
1742}
1743
1744impl<'a> Index<AlignmentIndex> for Allocations<'a> {
1745    type Output = Vec<Alignment>;
1746
1747    fn index(&self, ix: AlignmentIndex) -> &Self::Output {
1748        self.alignments.index(ix.0)
1749    }
1750}
1751
1752/// A struct containing information on the reachability of certain inline HTML
1753/// elements. In particular, for cdata elements (`<![CDATA[`), processing
1754/// elements (`<?`) and declarations (`<!DECLARATION`). The respectives usizes
1755/// represent the indices before which a scan will always fail and can hence
1756/// be skipped.
1757#[derive(Clone, Default)]
1758pub(crate) struct HtmlScanGuard {
1759    pub cdata: usize,
1760    pub processing: usize,
1761    pub declaration: usize,
1762}
1763
1764#[derive(Clone)]
1765pub struct Parser<'a> {
1766    text: &'a str,
1767    tree: Tree<Item>,
1768    allocs: Allocations<'a>,
1769    broken_link_callback: Option<&'a Fn(&str, &str) -> Option<(String, String)>>,
1770    offset: usize,
1771    html_scan_guard: HtmlScanGuard,
1772
1773    // used by inline passes. store them here for reuse
1774    inline_stack: InlineStack,
1775    link_stack: LinkStack,
1776}
1777
1778impl<'a> Parser<'a> {
1779    pub fn new(text: &'a str) -> Parser<'a> {
1780        Parser::new_ext(text, Options::empty())
1781    }
1782
1783    pub fn new_ext(text: &'a str, options: Options) -> Parser<'a> {
1784        Parser::new_with_broken_link_callback(text, options, None)
1785    }
1786
1787    /// In case the parser encounters any potential links that have a broken
1788    /// reference (e.g `[foo]` when there is no `[foo]: ` entry at the bottom)
1789    /// the provided callback will be called with the reference name,
1790    /// and the returned pair will be used as the link name and title if not
1791    /// None.
1792    pub fn new_with_broken_link_callback(
1793        text: &'a str,
1794        options: Options,
1795        broken_link_callback: Option<&'a Fn(&str, &str) -> Option<(String, String)>>
1796    ) -> Parser<'a> {
1797        let first_pass = FirstPass::new(text, options);
1798        let (mut tree, allocs) = first_pass.run();
1799        tree.reset();
1800        let inline_stack = Default::default();
1801        let link_stack = Default::default();
1802        let html_scan_guard = Default::default();
1803        Parser {
1804            text, tree, allocs, broken_link_callback,
1805            offset: 0, inline_stack, link_stack, html_scan_guard,
1806        }
1807    }
1808
1809    pub fn get_offset(&self) -> usize {
1810        self.offset
1811    }
1812
1813    /// Handle inline markup.
1814    ///
1815    /// When the parser encounters any item indicating potential inline markup, all
1816    /// inline markup passes are run on the remainder of the chain.
1817    ///
1818    /// Note: there's some potential for optimization here, but that's future work.
1819    fn handle_inline(&mut self) {
1820        self.handle_inline_pass1();
1821        self.handle_emphasis();
1822    }
1823
1824    /// Handle inline HTML, code spans, and links.
1825    ///
1826    /// This function handles both inline HTML and code spans, because they have
1827    /// the same precedence. It also handles links, even though they have lower
1828    /// precedence, because the URL of links must not be processed.
1829    fn handle_inline_pass1(&mut self) {
1830        let mut code_delims = CodeDelims::new();
1831        let mut cur = self.tree.cur();
1832        let mut prev = TreePointer::Nil;
1833
1834        while let TreePointer::Valid(mut cur_ix) = cur {
1835            match self.tree[cur_ix].item.body {
1836                ItemBody::MaybeHtml => {
1837                    let next = self.tree[cur_ix].next;
1838                    let autolink = if let TreePointer::Valid(next_ix) = next {
1839                        scan_autolink(self.text, self.tree[next_ix].item.start)
1840                    } else {
1841                        None
1842                    };
1843
1844                    if let Some((ix, uri, link_type)) = autolink {
1845                        let node = scan_nodes_to_ix(&self.tree, next, ix);
1846                        let text_node = self.tree.create_node(Item {
1847                            start: self.tree[cur_ix].item.start + 1,
1848                            end: ix - 1,
1849                            body: ItemBody::Text,
1850                        });
1851                        let link_ix = self.allocs.allocate_link(link_type, uri, "".into());
1852                        self.tree[cur_ix].item.body = ItemBody::Link(link_ix);
1853                        self.tree[cur_ix].item.end = ix;
1854                        self.tree[cur_ix].next = node;
1855                        self.tree[cur_ix].child = TreePointer::Valid(text_node);
1856                        prev = cur;
1857                        cur = node;
1858                        if let TreePointer::Valid(node_ix) = cur {
1859                            self.tree[node_ix].item.start = ix;
1860                        }
1861                        continue;
1862                    } else {
1863                        let inline_html = if let TreePointer::Valid(next_ix) = next {
1864                            self.tree.peek_up()
1865                                .map(|parent_ix| self.tree[parent_ix].item.end)
1866                                .and_then(|end_offset| {
1867                                    let bytes = &self.text.as_bytes()[..end_offset];
1868                                    scan_inline_html(bytes, self.tree[next_ix].item.start, &mut self.html_scan_guard)
1869                                })
1870                        } else {
1871                            None
1872                        };
1873                        if let Some(ix) = inline_html {
1874                            let node = scan_nodes_to_ix(&self.tree, next, ix);
1875                            // TODO: this logic isn't right if the replaced chain has
1876                            // tricky stuff (skipped containers, replaced nulls).
1877                            self.tree[cur_ix].item.body = ItemBody::InlineHtml;
1878                            self.tree[cur_ix].item.end = ix;
1879                            self.tree[cur_ix].next = node;
1880                            prev = cur;
1881                            cur = node;
1882                            if let TreePointer::Valid(node_ix) = cur {
1883                                self.tree[node_ix].item.start = ix;
1884                            }
1885                            continue;
1886                        }
1887                    }
1888                    self.tree[cur_ix].item.body = ItemBody::Text;
1889                }
1890                ItemBody::MaybeCode(mut search_count, preceded_by_backslash) => {
1891                    if preceded_by_backslash {
1892                        search_count -= 1;
1893                        if search_count == 0 {
1894                            self.tree[cur_ix].item.body = ItemBody::Text;
1895                            prev = cur;
1896                            cur = self.tree[cur_ix].next;
1897                            continue;
1898                        }
1899                    }
1900
1901                    if code_delims.is_populated() {
1902                        // we have previously scanned all codeblock delimiters,
1903                        // so we can reuse that work
1904                        if let Some(scan_ix) = code_delims.find(cur_ix, search_count) {
1905                            self.make_code_span(cur_ix, scan_ix, preceded_by_backslash);
1906                        } else {
1907                            self.tree[cur_ix].item.body = ItemBody::Text;
1908                        }
1909                    } else {
1910                        // we haven't previously scanned all codeblock delimiters,
1911                        // so walk the AST
1912                        let mut scan = if search_count > 0 { self.tree[cur_ix].next } else { TreePointer::Nil };
1913                        while let TreePointer::Valid(scan_ix) = scan {
1914                            if let ItemBody::MaybeCode(delim_count, _) = self.tree[scan_ix].item.body {
1915                                if search_count == delim_count {
1916                                    self.make_code_span(cur_ix, scan_ix, preceded_by_backslash);
1917                                    code_delims.clear();
1918                                    break;
1919                                } else {
1920                                    code_delims.insert(delim_count, scan_ix);
1921                                }
1922                            }
1923                            scan = self.tree[scan_ix].next;
1924                        }
1925                        if scan == TreePointer::Nil {
1926                            self.tree[cur_ix].item.body = ItemBody::Text;
1927                        }
1928                    }
1929                }
1930                ItemBody::MaybeLinkOpen => {
1931                    self.tree[cur_ix].item.body = ItemBody::Text;
1932                    self.link_stack.push(LinkStackEl { node: cur_ix, ty: LinkStackTy::Link });
1933                }
1934                ItemBody::MaybeImage => {
1935                    self.tree[cur_ix].item.body = ItemBody::Text;
1936                    self.link_stack.push(LinkStackEl { node: cur_ix, ty: LinkStackTy::Image });
1937                }
1938                ItemBody::MaybeLinkClose => {
1939                    if let Some(tos) = self.link_stack.pop() {
1940                        if tos.ty == LinkStackTy::Disabled {
1941                            self.tree[cur_ix].item.body = ItemBody::Text;
1942                            continue;
1943                        }
1944                        let next = self.tree[cur_ix].next;
1945
1946                        if let Some((next_ix, url, title)) = scan_inline_link(self.text, self.tree[cur_ix].item.end) {
1947                            let next_node = scan_nodes_to_ix(&self.tree, next, next_ix);
1948                            if let TreePointer::Valid(prev_ix) = prev {
1949                                self.tree[prev_ix].next = TreePointer::Nil;
1950                            }                            
1951                            cur = TreePointer::Valid(tos.node);
1952                            cur_ix = tos.node;
1953                            let link_ix = self.allocs.allocate_link(LinkType::Inline, url, title);
1954                            self.tree[cur_ix].item.body = if tos.ty == LinkStackTy::Image {
1955                                ItemBody::Image(link_ix)
1956                            } else {
1957                                ItemBody::Link(link_ix)
1958                            };
1959                            self.tree[cur_ix].child = self.tree[cur_ix].next;
1960                            self.tree[cur_ix].next = next_node;
1961                            if let TreePointer::Valid(next_node_ix) = next_node {
1962                                self.tree[next_node_ix].item.start = next_ix;
1963                            }
1964
1965                            if tos.ty == LinkStackTy::Link {
1966                                self.link_stack.disable_all_links();
1967                            }
1968                        } else {
1969                            // ok, so its not an inline link. maybe it is a reference
1970                            // to a defined link?
1971                            let scan_result = scan_reference(&self.tree, &self.text, next);
1972                            let label_node = self.tree[tos.node].next;
1973                            let node_after_link = match scan_result {
1974                                RefScan::LinkLabel(_, next_node) => next_node,
1975                                RefScan::Collapsed(next_node) => next_node,
1976                                RefScan::Failed => next,
1977                            };
1978                            let link_type = match &scan_result {
1979                                RefScan::LinkLabel(..) => LinkType::Reference,
1980                                RefScan::Collapsed(..) => LinkType::Collapsed,
1981                                RefScan::Failed => LinkType::Shortcut,
1982                            };
1983                            let label: Option<ReferenceLabel<'a>> = match scan_result {
1984                                RefScan::LinkLabel(l, ..) => Some(ReferenceLabel::Link(l)),
1985                                RefScan::Collapsed(..) | RefScan::Failed => {
1986                                    // No label? maybe it is a shortcut reference
1987                                    let start = self.tree[tos.node].item.end - 1;
1988                                    let end = self.tree[cur_ix].item.end;
1989                                    let search_text = &self.text[start..end];
1990
1991                                    scan_link_label(search_text).map(|(_ix, label)| label)
1992                                }
1993                            };
1994
1995                            // see if it's a footnote reference
1996                            if let Some(ReferenceLabel::Footnote(l)) = label {
1997                                self.tree[tos.node].next = node_after_link;
1998                                self.tree[tos.node].child = TreePointer::Nil;
1999                                self.tree[tos.node].item.body = ItemBody::FootnoteReference(self.allocs.allocate_cow(l));
2000                                prev = TreePointer::Valid(tos.node);
2001                                cur = node_after_link;
2002                                self.link_stack.clear();
2003                                continue;
2004                            } else if let Some(ReferenceLabel::Link(link_label)) = label {
2005                                let type_url_title = if let Some(matching_def) = self.allocs.refdefs.get(&UniCase::new(link_label.as_ref().into())) {
2006                                    // found a matching definition!
2007                                    let title = matching_def.title.as_ref().cloned().unwrap_or("".into());
2008                                    let url = matching_def.dest.clone();
2009                                    Some((link_type, url, title))
2010                                } else if let Some(callback) = self.broken_link_callback {
2011                                    // looked for matching definition, but didn't find it. try to fix
2012                                    // link with callback, if it is defined
2013                                    if let Some((url, title)) = callback(link_label.as_ref(), link_label.as_ref()) {
2014                                        Some((link_type.to_unknown(), url.into(), title.into()))
2015                                    } else {
2016                                        None
2017                                    }
2018                                } else {
2019                                    None
2020                                };
2021
2022                                if let Some((def_link_type, url, title)) = type_url_title {
2023                                    let link_ix = self.allocs.allocate_link(def_link_type, url, title);
2024                                    self.tree[tos.node].item.body = if tos.ty == LinkStackTy::Image {
2025                                        ItemBody::Image(link_ix)
2026                                    } else {
2027                                        ItemBody::Link(link_ix)
2028                                    };
2029
2030                                    // lets do some tree surgery to add the link to the tree
2031                                    // 1st: skip the label node and close node
2032                                    self.tree[tos.node].next = node_after_link;
2033
2034                                    // then, add the label node as a child to the link node
2035                                    self.tree[tos.node].child = label_node;
2036
2037                                    // finally: disconnect list of children
2038                                    if let TreePointer::Valid(prev_ix) = prev {
2039                                        self.tree[prev_ix].next = TreePointer::Nil;
2040                                    }                                
2041
2042                                    // set up cur so next node will be node_after_link
2043                                    cur = TreePointer::Valid(tos.node);
2044                                    cur_ix = tos.node;
2045
2046                                    if tos.ty == LinkStackTy::Link {
2047                                        self.link_stack.disable_all_links();
2048                                    }
2049                                } else {
2050                                    self.tree[cur_ix].item.body = ItemBody::Text;
2051                                }
2052                            } else {
2053                                self.tree[cur_ix].item.body = ItemBody::Text;
2054                            }
2055                        }
2056                    } else {
2057                        self.tree[cur_ix].item.body = ItemBody::Text;
2058                    }
2059                }
2060                _ => (),
2061            }
2062            prev = cur;
2063            cur = self.tree[cur_ix].next;
2064        }
2065        self.link_stack.clear();
2066    }
2067
2068    fn handle_emphasis(&mut self) {
2069        let mut prev = TreePointer::Nil;
2070        let mut prev_ix: TreeIndex;
2071        let mut cur = self.tree.cur();
2072        while let TreePointer::Valid(mut cur_ix) = cur {
2073            if let ItemBody::MaybeEmphasis(mut count, can_open, can_close) = self.tree[cur_ix].item.body {
2074                let c = self.text.as_bytes()[self.tree[cur_ix].item.start];
2075                let both = can_open && can_close;
2076                if can_close {
2077                    while let Some(el) = self.inline_stack.find_match(&mut self.tree, c, count, both) {
2078                        // have a match!
2079                        if let TreePointer::Valid(prev_ix) = prev {
2080                            self.tree[prev_ix].next = TreePointer::Nil;
2081                        }                        
2082                        let match_count = min(count, el.count);
2083                        // start, end are tree node indices
2084                        let mut end = cur_ix - 1;
2085                        let mut start = el.start + el.count;
2086
2087                        // work from the inside out
2088                        while start > el.start + el.count - match_count {
2089                            let (inc, ty) = if c == b'~' {
2090                                (2, ItemBody::Strikethrough)
2091                            } else if start > el.start + el.count - match_count + 1 {
2092                                (2, ItemBody::Strong)
2093                            } else {
2094                                (1, ItemBody::Emphasis)
2095                            };
2096
2097                            let root = start - inc;
2098                            end = end + inc;
2099                            self.tree[root].item.body = ty;
2100                            self.tree[root].item.end = self.tree[end].item.end;
2101                            self.tree[root].child = TreePointer::Valid(start);
2102                            self.tree[root].next = TreePointer::Nil;
2103                            start = root;
2104                        }
2105
2106                        // set next for top most emph level
2107                        prev_ix = el.start + el.count - match_count;
2108                        prev = TreePointer::Valid(prev_ix);
2109                        cur = self.tree[cur_ix + match_count - 1].next;
2110                        self.tree[prev_ix].next = cur;
2111
2112                        if el.count > match_count {
2113                            self.inline_stack.push(InlineEl {
2114                                start: el.start,
2115                                count: el.count - match_count,
2116                                c: el.c,
2117                                both,
2118                            })
2119                        }
2120                        count -= match_count;
2121                        if count > 0 {
2122                            cur_ix = cur.unwrap();
2123                        } else {
2124                            break;
2125                        }
2126                    }
2127                }
2128                if count > 0 {
2129                    if can_open {
2130                        self.inline_stack.push(InlineEl {
2131                            start: cur_ix,
2132                            count,
2133                            c,
2134                            both,
2135                        });
2136                    } else {
2137                        for i in 0..count {
2138                            self.tree[cur_ix + i].item.body = ItemBody::Text;
2139                        }
2140                    }
2141                    prev_ix = cur_ix + count - 1;
2142                    prev = TreePointer::Valid(prev_ix);
2143                    cur = self.tree[prev_ix].next;
2144                }
2145            } else {
2146                prev = cur;
2147                cur = self.tree[cur_ix].next;
2148            }
2149        }
2150        self.inline_stack.pop_all(&mut self.tree);
2151    }
2152
2153    /// Make a code span.
2154    ///
2155    /// Both `open` and `close` are matching MaybeCode items.
2156    fn make_code_span(&mut self, open: TreeIndex, close: TreeIndex, preceding_backslash: bool) {
2157        let first_ix = open + 1;
2158        let last_ix = close - 1;
2159        let bytes = self.text.as_bytes();
2160        let mut span_start = self.tree[first_ix].item.start;
2161        let mut span_end = self.tree[close].item.start;
2162        let mut buf: Option<String> = None;
2163
2164        // detect all-space sequences, since they are kept as-is as of commonmark 0.29
2165        if ! bytes[span_start..span_end].iter().all(|&b| b == b' ') {
2166            let opening = match bytes[span_start]   { b' ' | b'\r' | b'\n' => true, _ => false };
2167            let closing = match bytes[span_end - 1] { b' ' | b'\r' | b'\n' => true, _ => false };
2168            let drop_enclosing_whitespace = opening && closing;
2169
2170            if drop_enclosing_whitespace {
2171                span_start += 1;
2172                if span_start < span_end  {
2173                    span_end -= 1;
2174                }
2175            }
2176
2177            let mut ix = first_ix;
2178
2179            while ix < close {
2180                match self.tree[ix].item.body {
2181                    ItemBody::HardBreak | ItemBody::SoftBreak => {
2182                        if drop_enclosing_whitespace &&
2183                            (ix == first_ix && bytes[self.tree[ix].item.start] != b'\\') ||
2184                            (ix == last_ix && last_ix > first_ix) {
2185                            // just ignore it
2186                        } else {
2187                            let end = bytes[self.tree[ix].item.start..]
2188                                .iter()
2189                                .position(|&b| b == b'\r' || b == b'\n')
2190                                .unwrap()
2191                                + self.tree[ix].item.start;
2192                            if let Some(ref mut buf) = buf {
2193                                buf.push_str(&self.text[self.tree[ix].item.start..end]);
2194                                buf.push(' ');
2195                            } else {
2196                                let mut new_buf = String::with_capacity(span_end - span_start);
2197                                new_buf.push_str(&self.text[span_start..end]);
2198                                new_buf.push(' ');
2199                                buf = Some(new_buf);
2200                            }
2201                        }
2202                    }
2203                    _ => {
2204                        if let Some(ref mut buf) = buf {
2205                            let end = if ix == last_ix {
2206                                span_end
2207                            } else {
2208                                self.tree[ix].item.end
2209                            };
2210                            buf.push_str(&self.text[self.tree[ix].item.start..end]);
2211                        }
2212                    }
2213                }
2214                ix = ix + 1;
2215            }
2216        }
2217
2218        let cow = if let Some(buf) = buf {
2219            buf.into()
2220        } else {
2221            self.text[span_start..span_end].into()
2222        };
2223        if preceding_backslash {
2224            self.tree[open].item.body = ItemBody::Text;
2225            self.tree[open].item.end = self.tree[open].item.start + 1;
2226            self.tree[open].next = TreePointer::Valid(close);
2227            self.tree[close].item.body = ItemBody::Code(self.allocs.allocate_cow(cow));
2228            self.tree[close].item.start = self.tree[open].item.start + 1;
2229        } else {
2230            self.tree[open].item.body = ItemBody::Code(self.allocs.allocate_cow(cow));
2231            self.tree[open].item.end = self.tree[close].item.end;
2232            self.tree[open].next = self.tree[close].next;
2233        }
2234    }
2235
2236    pub fn into_offset_iter(self) -> OffsetIter<'a> {
2237        OffsetIter {
2238            inner: self,
2239        }
2240    }
2241}
2242
2243pub(crate) enum LoopInstruction<T> {
2244    /// Continue looking for more special bytes, but skip next few bytes.
2245    ContinueAndSkip(usize),
2246    /// Break looping immediately, returning with the given index and value.
2247    BreakAtWith(usize, T)
2248}
2249
2250/// This function walks the byte slices from the given index and
2251/// calls the callback function on all bytes (and their indices) that are in the following set:
2252/// `` ` ``, `\`, `&`, `*`, `_`, `~`, `!`, `<`, `[`, `]`, `|`, `\r`, `\n`
2253/// It may also call the callback on other bytes, but it is not guaranteed.
2254/// Whenever `callback(ix, byte)` returns a `ContinueAndSkip(n)` value, the callback
2255/// will not be called with an index that is less than `ix + n + 1`.
2256/// When the callback returns a `BreakAtWith(end_ix, opt+val)`, no more callbacks will be
2257/// called and the function returns immediately with the return value `(end_ix, opt_val)`.
2258/// If `BreakAtWith(..)` is never returned, this function will return the first
2259/// index that is outside the byteslice bound and a `None` value.
2260fn iterate_special_bytes<F, T>(bytes: &[u8], ix: usize, callback: F) -> (usize, Option<T>)
2261    where F: FnMut(usize, u8) -> LoopInstruction<Option<T>> 
2262{
2263    #[cfg(all(target_arch = "x86_64", feature="simd"))]
2264    { crate::simd::iterate_special_bytes(bytes, ix, callback) }
2265    #[cfg(not(all(target_arch = "x86_64", feature="simd")))]
2266    { scalar_iterate_special_bytes(bytes, ix, callback) }
2267}
2268
2269pub(crate) fn scalar_iterate_special_bytes<F, T>(bytes: &[u8], mut ix: usize, mut callback: F) -> (usize, Option<T>)
2270    where F: FnMut(usize, u8) -> LoopInstruction<Option<T>> 
2271{
2272    while ix < bytes.len() {
2273        match callback(ix, bytes[ix]) {
2274            LoopInstruction::ContinueAndSkip(skip) => {
2275                ix += skip + 1;
2276            }
2277            LoopInstruction::BreakAtWith(ix, val) => {
2278                return (ix, val);
2279            }
2280        }
2281    }
2282
2283    (ix, None)
2284}
2285
2286pub struct OffsetIter<'a> {
2287    inner: Parser<'a>,
2288}
2289
2290impl<'a> Iterator for OffsetIter<'a> {
2291    type Item = (Event<'a>, Range<usize>);
2292
2293    fn next(&mut self) -> Option<Self::Item> {
2294        match self.inner.tree.cur() {
2295            TreePointer::Nil => {
2296                let ix = self.inner.tree.pop()?;
2297                let tag = item_to_tag(&self.inner.tree[ix].item, &self.inner.allocs);
2298                self.inner.tree.next_sibling(ix);
2299                Some((Event::End(tag), self.inner.tree[ix].item.start..self.inner.tree[ix].item.end))
2300            }
2301            TreePointer::Valid(cur_ix) => {
2302                if self.inner.tree[cur_ix].item.body.is_inline() {
2303                    self.inner.handle_inline();
2304                }
2305
2306                let node = self.inner.tree[cur_ix];
2307                let item = node.item;
2308                let event = item_to_event(item, self.inner.text, &self.inner.allocs);
2309                if let Event::Start(..) = event {
2310                    self.inner.tree.push();
2311                } else {
2312                    self.inner.tree.next_sibling(cur_ix);
2313                }
2314                Some((event, item.start..item.end))
2315            }
2316        }
2317    }
2318}
2319
2320fn item_to_tag<'a>(item: &Item, allocs: &Allocations<'a>) -> Tag<'a> {
2321    match item.body {
2322        ItemBody::Paragraph => Tag::Paragraph,
2323        ItemBody::Emphasis => Tag::Emphasis,
2324        ItemBody::Strong => Tag::Strong,
2325        ItemBody::Strikethrough => Tag::Strikethrough,
2326        ItemBody::Link(link_ix) => {
2327            let &(ref link_type, ref url, ref title) = allocs.index(link_ix);
2328            Tag::Link(*link_type, url.clone(), title.clone())
2329        }
2330        ItemBody::Image(link_ix) => {
2331            let &(ref link_type, ref url, ref title) = allocs.index(link_ix);
2332            Tag::Image(*link_type, url.clone(), title.clone())
2333        }
2334        ItemBody::Rule => Tag::Rule,
2335        ItemBody::Header(level) => Tag::Header(level),
2336        ItemBody::FencedCodeBlock(cow_ix) =>
2337            Tag::CodeBlock(allocs[cow_ix].clone()),
2338        ItemBody::IndentCodeBlock => Tag::CodeBlock("".into()),
2339        ItemBody::BlockQuote => Tag::BlockQuote,
2340        ItemBody::List(_, c, listitem_start) => {
2341            if c == b'.' || c == b')' {
2342                Tag::List(Some(listitem_start))
2343            } else {
2344                Tag::List(None)
2345            }
2346        }
2347        ItemBody::ListItem(_) => Tag::Item,
2348        ItemBody::HtmlBlock(_) => Tag::HtmlBlock,
2349        ItemBody::TableHead => Tag::TableHead,
2350        ItemBody::TableCell => Tag::TableCell,
2351        ItemBody::TableRow => Tag::TableRow,
2352        ItemBody::Table(alignment_ix) => {
2353            Tag::Table(allocs[alignment_ix].clone())
2354        }
2355        ItemBody::FootnoteDefinition(cow_ix) =>
2356            Tag::FootnoteDefinition(allocs[cow_ix].clone()),
2357        _ => panic!("unexpected item body {:?}", item.body)
2358    }
2359}
2360
2361fn item_to_event<'a>(item: Item, text: &'a str, allocs: &Allocations<'a>) -> Event<'a> {
2362    let tag = match item.body {
2363        ItemBody::Text =>
2364            return Event::Text(text[item.start..item.end].into()),
2365        ItemBody::Code(cow_ix) =>
2366            return Event::Code(allocs[cow_ix].clone()),
2367        ItemBody::SynthesizeText(cow_ix) =>
2368            return Event::Text(allocs[cow_ix].clone()),
2369        ItemBody::Html =>
2370            return Event::Html(text[item.start..item.end].into()),
2371        ItemBody::InlineHtml =>
2372            return Event::InlineHtml(text[item.start..item.end].into()),
2373        ItemBody::SoftBreak => return Event::SoftBreak,
2374        ItemBody::HardBreak => return Event::HardBreak,
2375        ItemBody::FootnoteReference(cow_ix) =>
2376            return Event::FootnoteReference(allocs[cow_ix].clone()),
2377        ItemBody::TaskListMarker(checked) => return Event::TaskListMarker(checked),
2378
2379        ItemBody::Paragraph => Tag::Paragraph,
2380        ItemBody::Emphasis => Tag::Emphasis,
2381        ItemBody::Strong => Tag::Strong,
2382        ItemBody::Strikethrough => Tag::Strikethrough,
2383        ItemBody::Link(link_ix) => {
2384            let &(ref link_type, ref url, ref title) = allocs.index(link_ix);
2385            Tag::Link(*link_type, url.clone(), title.clone())
2386        }
2387        ItemBody::Image(link_ix) => {
2388            let &(ref link_type, ref url, ref title) = allocs.index(link_ix);
2389            Tag::Image(*link_type, url.clone(), title.clone())
2390        }
2391        ItemBody::Rule => Tag::Rule,
2392        ItemBody::Header(level) => Tag::Header(level),
2393        ItemBody::FencedCodeBlock(cow_ix) =>
2394            Tag::CodeBlock(allocs[cow_ix].clone()),
2395        ItemBody::IndentCodeBlock => Tag::CodeBlock("".into()),
2396        ItemBody::BlockQuote => Tag::BlockQuote,
2397        ItemBody::List(_, c, listitem_start) => {
2398            if c == b'.' || c == b')' {
2399                Tag::List(Some(listitem_start))
2400            } else {
2401                Tag::List(None)
2402            }
2403        }
2404        ItemBody::ListItem(_) => Tag::Item,
2405        ItemBody::HtmlBlock(_) => Tag::HtmlBlock,
2406        ItemBody::TableHead => Tag::TableHead,
2407        ItemBody::TableCell => Tag::TableCell,
2408        ItemBody::TableRow => Tag::TableRow,
2409        ItemBody::Table(alignment_ix) => {
2410            Tag::Table(allocs[alignment_ix].clone())
2411        }
2412        ItemBody::FootnoteDefinition(cow_ix) =>
2413            Tag::FootnoteDefinition(allocs[cow_ix].clone()),
2414        _ => panic!("unexpected item body {:?}", item.body)
2415    };
2416
2417    Event::Start(tag)
2418}
2419
2420// https://english.stackexchange.com/a/285573
2421fn surgerize_tight_list<'a>(tree : &mut Tree<Item>, list_ix: TreeIndex) {
2422    let mut list_item = tree[list_ix].child;
2423    while let TreePointer::Valid(listitem_ix) = list_item {
2424        // first child is special, controls how we repoint list_item.child
2425        let list_item_firstborn = tree[listitem_ix].child;
2426
2427        // Check that list item has children - this is not necessarily the case!
2428        if let TreePointer::Valid(firstborn_ix) = list_item_firstborn {
2429            if let ItemBody::Paragraph = tree[firstborn_ix].item.body {
2430                tree[listitem_ix].child = tree[firstborn_ix].child;
2431            }
2432
2433            let mut list_item_child = TreePointer::Valid(firstborn_ix);
2434            let mut node_to_repoint = TreePointer::Nil;
2435            while let TreePointer::Valid(child_ix) = list_item_child {
2436                // surgerize paragraphs
2437                let repoint_ix = if let ItemBody::Paragraph = tree[child_ix].item.body {
2438                    if let TreePointer::Valid(child_firstborn) = tree[child_ix].child {
2439                        if let TreePointer::Valid(repoint_ix) = node_to_repoint {
2440                            tree[repoint_ix].next = TreePointer::Valid(child_firstborn);
2441                        }
2442                        let mut child_lastborn = child_firstborn;
2443                        while let TreePointer::Valid(lastborn_next_ix) = tree[child_lastborn].next {
2444                            child_lastborn = lastborn_next_ix;
2445                        }
2446                        child_lastborn
2447                    } else {
2448                        child_ix
2449                    }
2450                } else {
2451                    child_ix
2452                };
2453
2454                node_to_repoint = TreePointer::Valid(repoint_ix);
2455                tree[repoint_ix].next = tree[child_ix].next;
2456                list_item_child = tree[child_ix].next;
2457            }
2458        }
2459
2460        list_item = tree[listitem_ix].next;
2461    }
2462}
2463
2464impl<'a> Iterator for Parser<'a> {
2465    type Item = Event<'a>;
2466
2467    fn next(&mut self) -> Option<Event<'a>> {
2468        match self.tree.cur() {
2469            TreePointer::Nil => {
2470                let ix = self.tree.pop()?;
2471                let tag = item_to_tag(&self.tree[ix].item, &self.allocs);
2472                self.offset = self.tree[ix].item.end;
2473                self.tree.next_sibling(ix);
2474                Some(Event::End(tag))
2475            }
2476            TreePointer::Valid(cur_ix) => {
2477                if self.tree[cur_ix].item.body.is_inline() {
2478                    self.handle_inline();
2479                }
2480
2481                let node = self.tree[cur_ix];
2482                let item = node.item;
2483                let event = item_to_event(item, self.text, &self.allocs);
2484                if let Event::Start(..) = event {
2485                    self.offset = if let TreePointer::Valid(child_ix) = node.child {
2486                        self.tree[child_ix].item.start
2487                    } else {
2488                        item.end
2489                    };
2490                    self.tree.push();
2491                } else {
2492                    self.offset = item.end;
2493                    self.tree.next_sibling(cur_ix);
2494                }
2495                Some(event)
2496            }
2497        }
2498    }
2499}
2500
2501#[cfg(test)]
2502mod test {
2503    use super::*;
2504    use crate::tree::Node;
2505
2506    // TODO: move these tests to tests/html.rs?
2507
2508    fn parser_with_extensions(text: &str) -> Parser<'_> {
2509        let mut opts = Options::empty();
2510        opts.insert(Options::ENABLE_TABLES);
2511        opts.insert(Options::ENABLE_FOOTNOTES);
2512        opts.insert(Options::ENABLE_STRIKETHROUGH);
2513        opts.insert(Options::ENABLE_TASKLISTS);
2514
2515        Parser::new_ext(text, opts)
2516    }
2517
2518    #[test]
2519    #[cfg(target_pointer_width = "64")]
2520    fn node_size() {
2521        let node_size = std::mem::size_of::<Node<Item>>();
2522        assert_eq!(48, node_size);
2523    }
2524
2525    #[test]
2526    #[cfg(target_pointer_width = "64")]
2527    fn body_size() {
2528        let body_size = std::mem::size_of::<ItemBody>();
2529        assert_eq!(16, body_size);
2530    }
2531
2532    #[test]
2533    fn single_open_fish_bracket() {
2534        // dont crash
2535        assert_eq!(3, Parser::new("<").count());
2536    }
2537
2538    #[test]
2539    fn lone_hashtag() {
2540        // dont crash
2541        assert_eq!(2, Parser::new("#").count());
2542    }
2543
2544    #[test]
2545    fn lots_of_backslashes() {
2546        // dont crash
2547        Parser::new("\\\\\r\r").count();
2548        Parser::new("\\\r\r\\.\\\\\r\r\\.\\").count();
2549    }
2550
2551    #[test]
2552    fn issue_320() {
2553        // dont crash
2554        parser_with_extensions(":\r\t> |\r:\r\t> |\r").count();
2555    }
2556
2557    #[test]
2558    fn issue_319() {
2559        // dont crash
2560        parser_with_extensions("|\r-]([^|\r-]([^").count();
2561        parser_with_extensions("|\r\r=][^|\r\r=][^car").count();
2562    }
2563
2564    #[test]
2565    fn issue_303() {
2566        // dont crash
2567        parser_with_extensions("[^\r\ra]").count();
2568        parser_with_extensions("\r\r]Z[^\x00\r\r]Z[^\x00").count();
2569    }
2570
2571    #[test]
2572    fn issue_313() {
2573        // dont crash
2574        parser_with_extensions("*]0[^\r\r*]0[^").count();
2575        parser_with_extensions("[^\r> `][^\r> `][^\r> `][").count();
2576    }
2577
2578    #[test]
2579    fn issue_283() {
2580        let input = std::str::from_utf8(b"\xf0\x9b\xb2\x9f<td:^\xf0\x9b\xb2\x9f").unwrap();
2581        // dont crash
2582        parser_with_extensions(input).count();
2583    }
2584
2585    #[test]
2586    fn issue_289() {
2587        // dont crash
2588        parser_with_extensions("> - \\\n> - ").count();
2589        parser_with_extensions("- \n\n").count();
2590    }
2591
2592    #[test]
2593    fn issue_306() {
2594        // dont crash
2595        parser_with_extensions("*\r_<__*\r_<__*\r_<__*\r_<__").count();
2596    }
2597
2598    #[test]
2599    fn issue_305() {
2600        // dont crash
2601        parser_with_extensions("_6**6*_*").count();
2602    }
2603
2604    #[test]
2605    fn another_emphasis_panic() {
2606        parser_with_extensions("*__#_#__*").count();
2607    }
2608
2609    #[test]
2610    fn offset_iter() {
2611        let event_offsets: Vec<_> = Parser::new("*hello* world")
2612            .into_offset_iter()
2613            .map(|(_ev, range)| range)
2614            .collect();
2615        let expected_offsets = vec![
2616            (0..13),
2617                (0..7),
2618                    (1..6),
2619                (0..7),
2620                (7..13),
2621            (0..13)
2622        ];
2623        assert_eq!(expected_offsets, event_offsets);
2624    }
2625
2626    // FIXME: add this one regression suite
2627    #[test]
2628    fn link_def_at_eof() {
2629        let test_str = "[My site][world]\n\n[world]: https://vincentprouillet.com";
2630        let expected = "<p><a href=\"https://vincentprouillet.com\">My site</a></p>\n";
2631
2632        let mut buf = String::new();
2633        crate::html::push_html(&mut buf, Parser::new(test_str));
2634        assert_eq!(expected, buf);
2635    }
2636
2637    #[test]
2638    fn ref_def_at_eof() {
2639        let test_str = "[test]:\\";
2640        let expected = "";
2641
2642        let mut buf = String::new();
2643        crate::html::push_html(&mut buf, Parser::new(test_str));
2644        assert_eq!(expected, buf);
2645    }
2646
2647    #[test]
2648    fn ref_def_cr_lf() {
2649        let test_str = "[a]: /u\r\n\n[a]";
2650        let expected = "<p><a href=\"/u\">a</a></p>\n";
2651
2652        let mut buf = String::new();
2653        crate::html::push_html(&mut buf, Parser::new(test_str));
2654        assert_eq!(expected, buf);
2655    }
2656
2657    #[test]
2658    fn no_dest_refdef() {
2659        let test_str = "[a]:";
2660        let expected = "<p>[a]:</p>\n";
2661
2662        let mut buf = String::new();
2663        crate::html::push_html(&mut buf, Parser::new(test_str));
2664        assert_eq!(expected, buf);
2665    }
2666
2667    #[test]
2668    fn simple_broken_link_callback() {
2669        let test_str = "This is a link w/o def: [hello][world]";
2670        let parser = Parser::new_with_broken_link_callback(test_str, Options::empty(), Some(&|norm, raw| {
2671            assert_eq!("world", raw);
2672            assert_eq!("world", norm);
2673            Some(("YOLO".to_owned(), "SWAG".to_owned()))
2674        }));
2675        let mut link_tag_count = 0;
2676        for (typ, url, title) in parser.filter_map(|event| match event {
2677            Event::Start(tag) | Event::End(tag) => match tag {
2678                Tag::Link(typ, url, title) => Some((typ, url, title)),
2679                _ => None,
2680            }
2681            _ => None,
2682        }) {
2683            link_tag_count += 1;
2684            assert_eq!(typ, LinkType::ReferenceUnknown);
2685            assert_eq!(url.as_ref(), "YOLO");
2686            assert_eq!(title.as_ref(), "SWAG");
2687        }
2688        assert!(link_tag_count > 0);
2689    }
2690}