1use std::collections::{VecDeque, HashMap};
24use std::ops::{Index, Range};
25use std::cmp::min;
26
27use unicase::UniCase;
28
29use crate::strings::CowStr;
30use crate::scanners::*;
31use crate::tree::{TreePointer, TreeIndex, Tree};
32use crate::linklabel::{scan_link_label, scan_link_label_rest, LinkLabel, ReferenceLabel};
33
34#[derive(Clone, Debug, PartialEq)]
35pub enum Tag<'a> {
36 Paragraph,
38 Rule,
39
40 Header(i32),
42
43 BlockQuote,
44 CodeBlock(CowStr<'a>),
45
46 List(Option<usize>), Item,
49 FootnoteDefinition(CowStr<'a>),
50 HtmlBlock,
51
52 Table(Vec<Alignment>),
54 TableHead,
55 TableRow,
56 TableCell,
57
58 Emphasis,
60 Strong,
61 Strikethrough,
62
63 Link(LinkType, CowStr<'a>, CowStr<'a>),
65
66 Image(LinkType, CowStr<'a>, CowStr<'a>),
68}
69
70#[derive(Clone, Debug, PartialEq, Copy)]
71pub enum LinkType {
72 Inline,
74 Reference,
76 ReferenceUnknown,
78 Collapsed,
80 CollapsedUnknown,
82 Shortcut,
84 ShortcutUnknown,
86 Autolink,
88 Email,
90}
91
92impl LinkType {
93 fn to_unknown(self) -> Self {
94 match self {
95 LinkType::Reference => LinkType::ReferenceUnknown,
96 LinkType::Collapsed => LinkType::CollapsedUnknown,
97 LinkType::Shortcut => LinkType::ShortcutUnknown,
98 _ => unreachable!(),
99 }
100 }
101}
102
103#[derive(Clone, Debug, PartialEq)]
104pub enum Event<'a> {
105 Start(Tag<'a>),
106 End(Tag<'a>),
107 Text(CowStr<'a>),
108 Code(CowStr<'a>),
109 Html(CowStr<'a>),
110 InlineHtml(CowStr<'a>),
111 FootnoteReference(CowStr<'a>),
112 SoftBreak,
113 HardBreak,
114 TaskListMarker(bool),
116}
117
118#[derive(Copy, Clone, Debug, PartialEq)]
119pub enum Alignment {
120 None,
121 Left,
122 Center,
123 Right,
124}
125
126bitflags! {
127 pub struct Options: u32 {
128 const FIRST_PASS = 1 << 0;
129 const ENABLE_TABLES = 1 << 1;
130 const ENABLE_FOOTNOTES = 1 << 2;
131 const ENABLE_STRIKETHROUGH = 1 << 3;
132 const ENABLE_TASKLISTS = 1 << 4;
133 }
134}
135
136#[derive(Debug, Default, Clone, Copy)]
137struct Item {
138 start: usize,
139 end: usize,
140 body: ItemBody,
141}
142
143#[derive(Debug, PartialEq, Clone, Copy)]
144enum ItemBody {
145 Paragraph,
146 Text,
147 SoftBreak,
148 HardBreak,
149
150 MaybeEmphasis(usize, bool, bool),
154 MaybeCode(usize, bool), MaybeHtml,
156 MaybeLinkOpen,
157 MaybeLinkClose,
158 MaybeImage,
159
160 Emphasis,
162 Strong,
163 Strikethrough,
164 Code(CowIndex),
165 InlineHtml,
166 Link(LinkIndex),
167 Image(LinkIndex),
168 FootnoteReference(CowIndex),
169 TaskListMarker(bool), Rule,
172 Header(i32), FencedCodeBlock(CowIndex),
174 IndentCodeBlock,
175 HtmlBlock(Option<u32>), Html,
177 BlockQuote,
178 List(bool, u8, usize), ListItem(usize), SynthesizeText(CowIndex),
181 FootnoteDefinition(CowIndex),
182
183 Table(AlignmentIndex),
185 TableHead,
186 TableRow,
187 TableCell,
188
189 Root,
191}
192
193impl<'a> ItemBody {
194 fn is_inline(&self) -> bool {
195 match *self {
196 ItemBody::MaybeEmphasis(..) | ItemBody::MaybeHtml | ItemBody::MaybeCode(..)
197 | ItemBody::MaybeLinkOpen | ItemBody::MaybeLinkClose | ItemBody::MaybeImage => true,
198 _ => false,
199 }
200 }
201}
202
203impl<'a> Default for ItemBody {
204 fn default() -> Self {
205 ItemBody::Root
206 }
207}
208
209#[derive(PartialEq, Eq, Copy, Clone)]
211enum TableParseMode {
212 Scan,
214 Active,
216 Disabled,
218}
219
220struct FirstPass<'a> {
225 text: &'a str,
226 tree: Tree<Item>,
227 begin_list_item: bool,
228 last_line_blank: bool,
229 allocs: Allocations<'a>,
230 options: Options,
231 list_nesting: usize,
232}
233
234impl<'a> FirstPass<'a> {
235 fn new(text: &'a str, options: Options) -> FirstPass {
236 let start_capacity = std::cmp::max(128, text.len() / 32);
239 let tree = Tree::with_capacity(start_capacity);
240 let begin_list_item = false;
241 let last_line_blank = false;
242 let allocs = Allocations::new();
243 FirstPass {
244 text, tree, begin_list_item, last_line_blank,
245 allocs, options, list_nesting: 0
246 }
247 }
248
249 fn run(mut self) -> (Tree<Item>, Allocations<'a>) {
250 let mut ix = 0;
251 while ix < self.text.len() {
252 ix = self.parse_block(ix);
253 }
254 for _ in 0..self.tree.spine_len() {
255 self.pop(ix);
256 }
257 (self.tree, self.allocs)
258 }
259
260 fn parse_block(&mut self, mut start_ix: usize) -> usize {
262 let bytes = self.text.as_bytes();
263 let mut line_start = LineStart::new(&bytes[start_ix..]);
264
265 let i = self.scan_containers(&mut line_start);
266 for _ in i..self.tree.spine_len() {
267 self.pop(start_ix);
268 }
269
270 if self.options.contains(Options::ENABLE_FOOTNOTES) {
271 if let Some(node_ix) = self.tree.peek_up() {
273 if let ItemBody::FootnoteDefinition(..) = self.tree[node_ix].item.body {
274 if self.last_line_blank {
275 self.pop(start_ix);
276 }
277 }
278 }
279
280 let container_start = start_ix + line_start.bytes_scanned();
284 if let Some(bytecount) = self.parse_footnote(container_start) {
285 start_ix = container_start + bytecount;
286 start_ix += scan_blank_line(&bytes[start_ix..]).unwrap_or(0);
287 line_start = LineStart::new(&bytes[start_ix..]);
288 }
289 }
290
291 loop {
293 let container_start = start_ix + line_start.bytes_scanned();
294 if let Some((ch, index, indent)) = line_start.scan_list_marker() {
295 let after_marker_index = start_ix + line_start.bytes_scanned();
296 self.continue_list(container_start, ch, index);
297 self.tree.append(Item {
298 start: container_start,
299 end: after_marker_index, body: ItemBody::ListItem(indent),
301 });
302 self.tree.push();
303 if let Some(n) = scan_blank_line(&bytes[after_marker_index..]) {
304 self.begin_list_item = true;
305 return after_marker_index + n;
306 }
307 if self.options.contains(Options::ENABLE_TASKLISTS) {
308 if let Some(is_checked) = line_start.scan_task_list_marker() {
309 self.tree.append(Item {
310 start: after_marker_index,
311 end: start_ix + line_start.bytes_scanned(),
312 body: ItemBody::TaskListMarker(is_checked),
313 });
314 }
315 }
316 } else if line_start.scan_blockquote_marker() {
317 self.finish_list(start_ix);
318 self.tree.append(Item {
319 start: container_start,
320 end: 0, body: ItemBody::BlockQuote,
322 });
323 self.tree.push();
324 } else {
325 break;
326 }
327 }
328
329 let ix = start_ix + line_start.bytes_scanned();
330
331 if let Some(n) = scan_blank_line(&bytes[ix..]) {
332 if let Some(node_ix) = self.tree.peek_up() {
333 match self.tree[node_ix].item.body {
334 ItemBody::BlockQuote => (),
335 _ => {
336 if self.begin_list_item {
337 self.pop(start_ix);
339 }
340 self.last_line_blank = true;
341 }
342 }
343 }
344 return ix + n;
345 }
346
347 self.begin_list_item = false;
348 self.finish_list(start_ix);
349
350 let remaining_space = line_start.remaining_space();
352
353 let indent = line_start.scan_space_upto(4);
354 if indent == 4 {
355 let ix = start_ix + line_start.bytes_scanned();
356 let remaining_space = line_start.remaining_space();
357 return self.parse_indented_code_block(ix, remaining_space);
358 }
359
360 let ix = start_ix + line_start.bytes_scanned();
361
362 if self.text.as_bytes()[ix] == b'<' {
364 if let Some(html_end_tag_ix) = get_html_end_tag(&bytes[(ix + 1)..]) {
367 return self.parse_html_block_type_1_to_5(ix, html_end_tag_ix, remaining_space);
368 }
369
370 let possible_tag = scan_html_block_tag(&bytes[(ix + 1)..]).1;
372 if is_html_tag(possible_tag) {
373 return self.parse_html_block_type_6_or_7(ix, remaining_space);
374 }
375
376 if let Some(_html_bytes) = scan_html_type_7(&bytes[(ix + 1)..]) {
378 return self.parse_html_block_type_6_or_7(ix, remaining_space);
379 }
380 }
381
382 if let Ok(n) = scan_hrule(&bytes[ix..]) {
383 return self.parse_hrule(n, ix);
384 }
385
386 if let Some((atx_size, atx_level)) = scan_atx_heading(&bytes[ix..]) {
387 return self.parse_atx_heading(ix, atx_level, atx_size);
388 }
389
390 if let Some((bytecount, label, link_def)) = self.parse_refdef_total(ix) {
392 self.allocs.refdefs.entry(label).or_insert(link_def);
393 return ix + bytecount;
394 }
395
396 if let Some((n, fence_ch)) = scan_code_fence(&bytes[ix..]) {
397 return self.parse_fenced_code_block(ix, indent, fence_ch, n);
398 }
399 self.parse_paragraph(ix)
400 }
401
402 fn parse_table(&mut self, table_cols: usize, head_start: usize, body_start: usize) -> usize {
406 let (_sep_start, thead_ix) = self.parse_table_row_inner(head_start, table_cols);
408 self.tree[thead_ix].item.body = ItemBody::TableHead;
409
410 let mut ix = body_start;
412 while let Some((next_ix, _row_ix)) = self.parse_table_row(ix, table_cols) {
413 ix = next_ix;
414 }
415
416 self.pop(ix);
417 ix
418 }
419
420 fn parse_table_row_inner(&mut self, mut ix: usize, row_cells: usize) -> (usize, TreeIndex) {
423 let bytes = self.text.as_bytes();
424 let mut cells = 0;
425 let mut final_cell_ix = None;
426
427 let row_ix = self.tree.append(Item {
428 start: ix,
429 end: 0, body: ItemBody::TableRow,
431 });
432 self.tree.push();
433
434 loop {
435 ix += scan_ch(&bytes[ix..], b'|');
436 ix += scan_whitespace_no_nl(&bytes[ix..]);
437
438 if let Some(eol_bytes) = scan_eol(&bytes[ix..]) {
439 ix += eol_bytes;
440 break;
441 }
442
443 let cell_ix = self.tree.append(Item {
444 start: ix,
445 end: ix,
446 body: ItemBody::TableCell,
447 });
448 self.tree.push();
449 let (next_ix, _brk) = self.parse_line(ix, TableParseMode::Active);
450 let trailing_whitespace = scan_rev_while(&bytes[..next_ix], is_ascii_whitespace);
451
452 if let TreePointer::Valid(cur_ix) = self.tree.cur() {
453 self.tree[cur_ix].item.end -= trailing_whitespace;
454 }
455
456 self.tree[cell_ix].item.end = next_ix - trailing_whitespace;
457 self.tree.pop();
458
459 ix = next_ix;
460 cells += 1;
461
462 if cells == row_cells {
463 final_cell_ix = Some(cell_ix);
464 }
465 }
466
467 for _ in cells..row_cells {
471 self.tree.append(Item {
472 start: ix,
473 end: ix,
474 body: ItemBody::TableCell,
475 });
476 }
477
478 if let Some(cell_ix) = final_cell_ix {
480 self.tree[cell_ix].next = TreePointer::Nil;
481 }
482
483 self.pop(ix);
484
485 (ix, row_ix)
486 }
487
488 fn parse_table_row(&mut self, mut ix: usize, row_cells: usize) -> Option<(usize, TreeIndex)> {
490 let bytes = self.text.as_bytes();
491 let mut line_start = LineStart::new(&bytes[ix..]);
492 let containers = self.scan_containers(&mut line_start);
493 if containers != self.tree.spine_len() {
494 return None;
495 }
496 line_start.scan_all_space();
497 ix += line_start.bytes_scanned();
498 if scan_paragraph_interrupt(&bytes[ix..]) {
499 return None;
500 }
501
502 let (ix, row_ix) = self.parse_table_row_inner(ix, row_cells);
503 Some((ix, row_ix))
504 }
505
506 fn parse_paragraph(&mut self, start_ix: usize) -> usize {
508 let node_ix = self.tree.append(Item {
509 start: start_ix,
510 end: 0, body: ItemBody::Paragraph,
512 });
513 self.tree.push();
514 let bytes = self.text.as_bytes();
515
516 let mut ix = start_ix;
517 loop {
518 let scan_mode = if self.options.contains(Options::ENABLE_TABLES) && ix == start_ix {
519 TableParseMode::Scan
520 } else {
521 TableParseMode::Disabled
522 };
523 let (next_ix, brk) = self.parse_line(ix, scan_mode);
524
525 if let Some(Item { body: ItemBody::Table(alignment_ix), start, end }) = brk {
527 let table_cols = self.allocs[alignment_ix].len();
528 self.tree[node_ix].item = Item { body: ItemBody::Table(alignment_ix), start, end };
529 self.tree[node_ix].child = TreePointer::Nil;
532 self.tree.pop();
533 self.tree.push();
534 return self.parse_table(table_cols, ix, next_ix);
535 }
536
537 ix = next_ix;
538 let mut line_start = LineStart::new(&bytes[ix..]);
539 let n_containers = self.scan_containers(&mut line_start);
540 if !line_start.scan_space(4) {
541 let ix_new = ix + line_start.bytes_scanned();
542 if n_containers == self.tree.spine_len() {
543 if let Some((n, level)) = scan_setext_heading(&bytes[ix_new..]) {
544 self.tree[node_ix].item.body = ItemBody::Header(level);
545 if let Some(Item { start, body: ItemBody::HardBreak, .. }) = brk {
546 if bytes[start] == b'\\' {
547 self.tree.append_text(start, start + 1);
548 }
549 }
550 ix = ix_new + n;
551 break;
552 }
553 }
554 let suffix = &bytes[ix_new..];
556 if self.interrupt_paragraph_by_list(suffix) || scan_paragraph_interrupt(suffix) {
557 break;
558 }
559 }
560 line_start.scan_all_space();
561 ix = next_ix + line_start.bytes_scanned();
562 if let Some(item) = brk {
563 self.tree.append(item);
564 }
565 }
566
567 self.pop(ix);
568 ix
569 }
570
571 fn parse_line(&mut self, start: usize, mode: TableParseMode) -> (usize, Option<Item>) {
575 let bytes = &self.text.as_bytes();
576 let mut pipes = 0;
577 let mut last_pipe_ix = start;
578 let mut begin_text = start;
579
580 let (final_ix, brk) = iterate_special_bytes(bytes, start, |ix, byte| {
581 match byte {
582 b'\n' | b'\r' => {
583 if let TableParseMode::Active = mode {
584 return LoopInstruction::BreakAtWith(ix, None);
585 }
586
587 let mut i = ix;
588 let eol_bytes = scan_eol(&bytes[ix..]).unwrap();
589 if mode == TableParseMode::Scan && pipes > 0 {
590 let next_line_ix = ix + eol_bytes;
592 let mut line_start = LineStart::new(&bytes[next_line_ix..]);
593 if self.scan_containers(&mut line_start) == self.tree.spine_len() {
594 let table_head_ix = next_line_ix + line_start.bytes_scanned();
595 let (table_head_bytes, alignment) = scan_table_head(&bytes[table_head_ix..]);
596
597 if table_head_bytes > 0 {
598 let header_count = count_header_cols(bytes, pipes, start, last_pipe_ix);
600
601 if alignment.len() == header_count {
603 let alignment_ix = self.allocs.allocate_alignment(alignment);
604 let end_ix = table_head_ix + table_head_bytes;
605 return LoopInstruction::BreakAtWith(end_ix, Some(Item {
606 start: i,
607 end: end_ix, body: ItemBody::Table(alignment_ix),
609 }));
610 }
611 }
612 }
613 }
614
615 let end_ix = ix + eol_bytes;
616 let trailing_backslashes = scan_rev_while(&bytes[..ix], |b| b == b'\\');
617 if trailing_backslashes % 2 == 1 && end_ix < self.text.len() {
618 i -= 1;
619 self.tree.append_text(begin_text, i);
620 return LoopInstruction::BreakAtWith(end_ix, Some(Item {
621 start: i,
622 end: end_ix,
623 body: ItemBody::HardBreak,
624 }));
625 }
626 let trailing_whitespace = scan_rev_while(&bytes[..ix], is_ascii_whitespace_no_nl);
627 if trailing_whitespace >= 2 {
628 i -= trailing_whitespace;
629 self.tree.append_text(begin_text, i);
630 return LoopInstruction::BreakAtWith(end_ix, Some(Item {
631 start: i,
632 end: end_ix,
633 body: ItemBody::HardBreak,
634 }));
635 }
636
637 self.tree.append_text(begin_text, ix);
638 LoopInstruction::BreakAtWith(end_ix, Some(Item {
639 start: i,
640 end: end_ix,
641 body: ItemBody::SoftBreak,
642 }))
643 }
644 b'\\' => {
645 if ix + 1 < self.text.len() && is_ascii_punctuation(bytes[ix + 1]) {
646 self.tree.append_text(begin_text, ix);
647 if bytes[ix + 1] == b'`' {
648 let count = 1 + scan_ch_repeat(&bytes[(ix + 2)..], b'`');
649 self.tree.append(Item {
650 start: ix + 1,
651 end: ix + count + 1,
652 body: ItemBody::MaybeCode(count, true),
653 });
654 begin_text = ix + 1 + count;
655 LoopInstruction::ContinueAndSkip(count)
656 } else {
657 begin_text = ix + 1;
658 LoopInstruction::ContinueAndSkip(1)
659 }
660 } else {
661 LoopInstruction::ContinueAndSkip(0)
662 }
663 }
664 c @ b'*' | c @ b'_' | c @ b'~' => {
665 let string_suffix = &self.text[ix..];
666 let count = 1 + scan_ch_repeat(&string_suffix.as_bytes()[1..], c);
667 let can_open = delim_run_can_open(&self.text, string_suffix, count, ix);
668 let can_close = delim_run_can_close(&self.text, string_suffix, count, ix);
669 let is_valid_seq = c != b'~' || count == 2 && self.options.contains(Options::ENABLE_STRIKETHROUGH);
670
671 if (can_open || can_close) && is_valid_seq {
672 self.tree.append_text(begin_text, ix);
673 for i in 0..count {
674 self.tree.append(Item {
675 start: ix + i,
676 end: ix + i + 1,
677 body: ItemBody::MaybeEmphasis(count - i, can_open, can_close),
678 });
679 }
680 begin_text = ix + count;
681 }
682 LoopInstruction::ContinueAndSkip(count - 1)
683 }
684 b'`' => {
685 self.tree.append_text(begin_text, ix);
686 let count = 1 + scan_ch_repeat(&bytes[(ix + 1)..], b'`');
687 self.tree.append(Item {
688 start: ix,
689 end: ix + count,
690 body: ItemBody::MaybeCode(count, false),
691 });
692 begin_text = ix + count;
693 LoopInstruction::ContinueAndSkip(count - 1)
694 }
695 b'<' => {
696 self.tree.append_text(begin_text, ix);
699 self.tree.append(Item {
700 start: ix,
701 end: ix + 1,
702 body: ItemBody::MaybeHtml,
703 });
704 begin_text = ix + 1;
705 LoopInstruction::ContinueAndSkip(0)
706 }
707 b'!' => {
708 if ix + 1 < self.text.len() && bytes[ix + 1] == b'[' {
709 self.tree.append_text(begin_text, ix);
710 self.tree.append(Item {
711 start: ix,
712 end: ix + 2,
713 body: ItemBody::MaybeImage,
714 });
715 begin_text = ix + 2;
716 LoopInstruction::ContinueAndSkip(1)
717 } else {
718 LoopInstruction::ContinueAndSkip(0)
719 }
720 }
721 b'[' => {
722 self.tree.append_text(begin_text, ix);
723 self.tree.append(Item {
724 start: ix,
725 end: ix + 1,
726 body: ItemBody::MaybeLinkOpen,
727 });
728 begin_text = ix + 1;
729 LoopInstruction::ContinueAndSkip(0)
730 }
731 b']' => {
732 self.tree.append_text(begin_text, ix);
733 self.tree.append(Item {
734 start: ix,
735 end: ix + 1,
736 body: ItemBody::MaybeLinkClose,
737 });
738 begin_text = ix + 1;
739 LoopInstruction::ContinueAndSkip(0)
740 }
741 b'&' => {
742 match scan_entity(&bytes[ix..]) {
743 (n, Some(value)) => {
744 self.tree.append_text(begin_text, ix);
745 self.tree.append(Item {
746 start: ix,
747 end: ix + n,
748 body: ItemBody::SynthesizeText(self.allocs.allocate_cow(value)),
749 });
750 begin_text = ix + n;
751 LoopInstruction::ContinueAndSkip(n - 1)
752 }
753 _ => LoopInstruction::ContinueAndSkip(0),
754 }
755 }
756 b'|' => {
757 if let TableParseMode::Active = mode {
758 LoopInstruction::BreakAtWith(ix, None)
759 } else {
760 last_pipe_ix = ix;
761 pipes += 1;
762 LoopInstruction::ContinueAndSkip(0)
763 }
764 }
765 _ => LoopInstruction::ContinueAndSkip(0),
766 }
767 });
768
769 if brk.is_none() {
770 self.tree.append_text(begin_text, final_ix);
772 }
773 (final_ix, brk)
774 }
775
776 fn interrupt_paragraph_by_list(&self, suffix: &[u8]) -> bool {
779 scan_listitem(suffix).map_or(false, |(ix, delim, index, _)| {
780 self.list_nesting > 0 ||
781 !scan_empty_list(&suffix[ix..]) && (delim == b'*' || delim == b'-' || index == 1)
784 })
785 }
786
787 fn parse_html_block_type_1_to_5(&mut self, start_ix: usize, html_end_tag_ix: u32,
793 mut remaining_space: usize) -> usize
794 {
795 self.tree.append(Item {
796 start: start_ix,
797 end: 0, body: ItemBody::HtmlBlock(Some(html_end_tag_ix)),
799 });
800 self.tree.push();
801
802 let bytes = self.text.as_bytes();
803 let mut ix = start_ix;
804 let end_ix;
805 loop {
806 let line_start_ix = ix;
807 ix += scan_nextline(&bytes[ix..]);
808 self.append_html_line(remaining_space, line_start_ix, ix);
809
810 let mut line_start = LineStart::new(&bytes[ix..]);
811 let n_containers = self.scan_containers(&mut line_start);
812 if n_containers < self.tree.spine_len() {
813 end_ix = ix;
814 break;
815 }
816
817 let html_end_tag = HTML_END_TAGS[html_end_tag_ix as usize];
818
819 if (&self.text[line_start_ix..ix]).contains(html_end_tag) {
820 end_ix = ix;
821 break;
822 }
823
824 let next_line_ix = ix + line_start.bytes_scanned();
825 if next_line_ix == self.text.len() {
826 end_ix = next_line_ix;
827 break;
828 }
829 ix = next_line_ix;
830 remaining_space = line_start.remaining_space();
831 }
832 self.pop(end_ix);
833 ix
834 }
835
836 fn parse_html_block_type_6_or_7(&mut self, start_ix: usize, mut remaining_space: usize)
840 -> usize
841 {
842 self.tree.append(Item {
843 start: start_ix,
844 end: 0, body: ItemBody::HtmlBlock(None)
846 });
847 self.tree.push();
848
849 let bytes = self.text.as_bytes();
850 let mut ix = start_ix;
851 let end_ix;
852 loop {
853 let line_start_ix = ix;
854 ix += scan_nextline(&bytes[ix..]);
855 self.append_html_line(remaining_space, line_start_ix, ix);
856
857 let mut line_start = LineStart::new(&bytes[ix..]);
858 let n_containers = self.scan_containers(&mut line_start);
859 if n_containers < self.tree.spine_len() || line_start.is_at_eol()
860 {
861 end_ix = ix;
862 break;
863 }
864
865 let next_line_ix = ix + line_start.bytes_scanned();
866 if next_line_ix == self.text.len()
867 || scan_blank_line(&bytes[next_line_ix..]).is_some()
868 {
869 end_ix = next_line_ix;
870 break;
871 }
872 ix = next_line_ix;
873 remaining_space = line_start.remaining_space();
874 }
875 self.pop(end_ix);
876 ix
877 }
878
879 fn parse_indented_code_block(&mut self, start_ix: usize, mut remaining_space: usize)
880 -> usize
881 {
882 self.tree.append(Item {
883 start: start_ix,
884 end: 0, body: ItemBody::IndentCodeBlock,
886 });
887 self.tree.push();
888 let bytes = self.text.as_bytes();
889 let mut last_nonblank_child = TreePointer::Nil;
890 let mut last_nonblank_ix = 0;
891 let mut end_ix = 0;
892 let mut last_line_blank = false;
893
894 let mut ix = start_ix;
895 loop {
896 let line_start_ix = ix;
897 ix += scan_nextline(&bytes[ix..]);
898 self.append_code_text(remaining_space, line_start_ix, ix);
899 if !last_line_blank {
902 last_nonblank_child = self.tree.cur();
903 last_nonblank_ix = ix;
904 end_ix = ix;
905 }
906
907 let mut line_start = LineStart::new(&bytes[ix..]);
908 let n_containers = self.scan_containers(&mut line_start);
909 if n_containers < self.tree.spine_len()
910 || !(line_start.scan_space(4) || line_start.is_at_eol())
911 {
912 break;
913 }
914 let next_line_ix = ix + line_start.bytes_scanned();
915 if next_line_ix == self.text.len() {
916 break;
917 }
918 ix = next_line_ix;
919 remaining_space = line_start.remaining_space();
920 last_line_blank = scan_blank_line(&bytes[ix..]).is_some();
921 }
922
923 if let TreePointer::Valid(child) = last_nonblank_child {
925 self.tree[child].next = TreePointer::Nil;
926 self.tree[child].item.end = last_nonblank_ix;
927 }
928 self.pop(end_ix);
929 ix
930 }
931
932 fn parse_fenced_code_block(&mut self, start_ix: usize, indent: usize,
933 fence_ch: u8, n_fence_char: usize) -> usize
934 {
935 let bytes = self.text.as_bytes();
936 let mut info_start = start_ix + n_fence_char;
937 info_start += scan_whitespace_no_nl(&bytes[info_start..]);
938 let mut ix = info_start + scan_nextline(&bytes[info_start..]);
941 let info_end = ix - scan_rev_while(&bytes[info_start..ix], is_ascii_whitespace);
942 let info_string = unescape(&self.text[info_start..info_end]);
943 self.tree.append(Item {
944 start: start_ix,
945 end: 0, body: ItemBody::FencedCodeBlock(self.allocs.allocate_cow(info_string)),
947 });
948 self.tree.push();
949 loop {
950 let mut line_start = LineStart::new(&bytes[ix..]);
951 let n_containers = self.scan_containers(&mut line_start);
952 if n_containers < self.tree.spine_len() {
953 break;
954 }
955 line_start.scan_space(indent);
956 let mut close_line_start = line_start.clone();
957 if !close_line_start.scan_space(4) {
958 let close_ix = ix + close_line_start.bytes_scanned();
959 if let Some(n) =
960 scan_closing_code_fence(&bytes[close_ix..], fence_ch, n_fence_char)
961 {
962 ix = close_ix + n;
963 break;
964 }
965 }
966 let remaining_space = line_start.remaining_space();
967 ix += line_start.bytes_scanned();
968 let next_ix = ix + scan_nextline(&bytes[ix..]);
969 self.append_code_text(remaining_space, ix, next_ix);
970 ix = next_ix;
971 }
972
973 self.pop(ix);
974
975 ix + scan_blank_line(&bytes[ix..]).unwrap_or(0)
977 }
978
979 fn append_code_text(&mut self, remaining_space: usize, start: usize, end: usize) {
980 if remaining_space > 0 {
981 let cow_ix = self.allocs.allocate_cow(" "[..remaining_space].into());
982 self.tree.append(Item {
983 start,
984 end: start,
985 body: ItemBody::SynthesizeText(cow_ix),
986 });
987 }
988 if self.text.as_bytes()[end - 2] == b'\r' {
989 self.tree.append_text(start, end - 2);
991 self.tree.append_text(end - 1, end);
992 } else {
993 self.tree.append_text(start, end);
994 }
995 }
996
997 fn append_html_line(&mut self, remaining_space: usize, start: usize, end: usize) {
999 if remaining_space > 0 {
1000 let cow_ix = self.allocs.allocate_cow(" "[..remaining_space].into());
1001 self.tree.append(Item {
1002 start,
1003 end: start,
1004 body: ItemBody::SynthesizeText(cow_ix),
1006 });
1007 }
1008 if self.text.as_bytes()[end - 2] == b'\r' {
1009 self.tree.append(Item {
1011 start,
1012 end: end - 2,
1013 body: ItemBody::Html,
1014 });
1015 self.tree.append(Item {
1016 start: end - 1,
1017 end,
1018 body: ItemBody::Html,
1019 });
1020 } else {
1021 self.tree.append(Item {
1022 start,
1023 end,
1024 body: ItemBody::Html,
1025 });
1026 }
1027 }
1028
1029 fn scan_containers(&self, line_start: &mut LineStart) -> usize {
1031 let mut i = 0;
1032 for &node_ix in self.tree.walk_spine() {
1033 match self.tree[node_ix].item.body {
1034 ItemBody::BlockQuote => {
1035 let save = line_start.clone();
1036 if !line_start.scan_blockquote_marker() {
1037 *line_start = save;
1038 break;
1039 }
1040 }
1041 ItemBody::ListItem(indent) => {
1042 if !line_start.is_at_eol() {
1043 let save = line_start.clone();
1044 if !line_start.scan_space(indent){
1045 *line_start = save;
1046 break;
1047 }
1048 }
1049 }
1050 _ => (),
1051 }
1052 i += 1;
1053 }
1054 i
1055 }
1056
1057 fn pop(&mut self, ix: usize) {
1059 let cur_ix = self.tree.pop().unwrap();
1060 self.tree[cur_ix].item.end = ix;
1061 if let ItemBody::List(true, _, _) = self.tree[cur_ix].item.body {
1062 surgerize_tight_list(&mut self.tree, cur_ix);
1063 }
1064 }
1065
1066 fn finish_list(&mut self, ix: usize) {
1068 if let Some(node_ix) = self.tree.peek_up() {
1069 if let ItemBody::List(_, _, _) = self.tree[node_ix].item.body {
1070 self.pop(ix);
1071 self.list_nesting -= 1;
1072 }
1073 }
1074 if self.last_line_blank {
1075 if let Some(node_ix) = self.tree.peek_grandparent() {
1076 if let ItemBody::List(ref mut is_tight, _, _) =
1077 self.tree[node_ix].item.body
1078 {
1079 *is_tight = false;
1080 }
1081 }
1082 self.last_line_blank = false;
1083 }
1084 }
1085
1086 fn continue_list(&mut self, start: usize, ch: u8, index: usize) {
1089 if let Some(node_ix) = self.tree.peek_up() {
1090 if let ItemBody::List(ref mut is_tight, existing_ch, _) =
1091 self.tree[node_ix].item.body
1092 {
1093 if existing_ch == ch {
1094 if self.last_line_blank {
1095 *is_tight = false;
1096 self.last_line_blank = false;
1097 }
1098 return;
1099 }
1100 }
1101 self.finish_list(start);
1103 }
1104 self.tree.append(Item {
1105 start,
1106 end: 0, body: ItemBody::List(true, ch, index),
1108 });
1109 self.list_nesting += 1;
1110 self.tree.push();
1111 self.last_line_blank = false;
1112 }
1113
1114 fn parse_hrule(&mut self, hrule_size: usize, ix: usize) -> usize {
1118 self.tree.append(Item {
1119 start: ix,
1120 end: ix + hrule_size,
1121 body: ItemBody::Rule,
1122 });
1123 ix + hrule_size
1124 }
1125
1126 fn parse_atx_heading(&mut self, mut ix: usize, atx_level: i32, atx_size: usize) -> usize {
1130 self.tree.append(Item {
1131 start: ix,
1132 end: 0, body: ItemBody::Header(atx_level),
1134 });
1135 ix += atx_size;
1136 let bytes = self.text.as_bytes();
1139 if let Some(eol_bytes) = scan_eol(&bytes[ix..]) {
1140 return ix + eol_bytes;
1141 }
1142 let skip_spaces = scan_whitespace_no_nl(&bytes[ix..]);
1144 ix += skip_spaces;
1145
1146 let header_start = ix;
1148 let header_node_idx = self.tree.push(); ix = self.parse_line(ix, TableParseMode::Disabled).0;
1150 self.tree[header_node_idx].item.end = ix;
1151
1152 if let TreePointer::Valid(cur_ix) = self.tree.cur() {
1154 let header_text = &bytes[header_start..ix];
1155 let mut limit = header_text.iter()
1156 .rposition(|&b| !(b == b'\n' || b == b'\r' || b == b' '))
1157 .map_or(0, |i| i + 1);
1158 let closer = header_text[..limit].iter()
1159 .rposition(|&b| b != b'#')
1160 .map_or(0, |i| i + 1);
1161 if closer == 0 {
1162 limit = closer;
1163 } else {
1164 let spaces = scan_rev_while(&header_text[..closer], |b| b == b' ');
1165 if spaces > 0 {
1166 limit = closer - spaces;
1167 }
1168 }
1169 self.tree[cur_ix].item.end = limit + header_start;
1170 }
1171
1172 self.tree.pop();
1173 ix
1174 }
1175
1176 fn parse_footnote(&mut self, start: usize) -> Option<usize> {
1178 let bytes = &self.text.as_bytes()[start..];
1179 if !bytes.starts_with(b"[^") {
1180 return None;
1181 }
1182 let (mut i, label) = scan_link_label_rest(&self.text[(start + 2)..])?;
1183 i += 2;
1184 if scan_ch(&bytes[i..], b':') == 0 {
1185 return None;
1186 }
1187 i += 1;
1188 self.tree.append(Item {
1189 start,
1190 end: 0, body: ItemBody::FootnoteDefinition(self.allocs.allocate_cow(label)), });
1193 self.tree.push();
1194 Some(i)
1195 }
1196
1197 fn parse_refdef_total(&mut self, start: usize) -> Option<(usize, LinkLabel<'a>, LinkDef<'a>)> {
1199 let bytes = &self.text.as_bytes()[start..];
1200 if scan_ch(bytes, b'[') == 0 {
1201 return None;
1202 }
1203 let (mut i, label) = scan_link_label_rest(&self.text[(start + 1)..])?;
1204 i += 1;
1205 if scan_ch(&bytes[i..], b':') == 0 {
1206 return None;
1207 }
1208 i += 1;
1209 let (bytecount, link_def) = self.scan_refdef(start + i)?;
1210 Some((bytecount + i, UniCase::new(label), link_def))
1211 }
1212
1213 fn scan_refdef_space(&self, bytes: &[u8], mut i: usize) -> Option<(usize, usize)> {
1215 let mut newlines = 0;
1216 loop {
1217 let whitespaces = scan_whitespace_no_nl(&bytes[i..]);
1218 i += whitespaces;
1219 if let Some(eol_bytes) = scan_eol(&bytes[i..]) {
1220 i += eol_bytes;
1221 newlines += 1;
1222 if newlines > 1 {
1223 return None;
1224 }
1225 } else {
1226 break;
1227 }
1228 let mut line_start = LineStart::new(&bytes[i..]);
1229 if self.tree.spine_len() != self.scan_containers(&mut line_start) {
1230 return None;
1231 }
1232 i += line_start.bytes_scanned();
1233 }
1234 Some((i, newlines))
1235 }
1236
1237 fn scan_refdef(&self, start: usize) -> Option<(usize, LinkDef<'a>)> {
1240 let bytes = self.text.as_bytes();
1241
1242 let (mut i, _newlines) = self.scan_refdef_space(&bytes, start)?;
1244
1245 let (dest_length, dest) = scan_link_dest(&self.text, i, 1)?;
1247 if dest_length == 0 {
1248 return None;
1249 }
1250 let dest = unescape(dest);
1251 i += dest_length;
1252
1253 let mut backup = (i - start, LinkDef { dest, title: None });
1255
1256 let (mut i, newlines) = if let Some((new_i, mut newlines)) = self.scan_refdef_space(&bytes, i) {
1258 if i == self.text.len() {
1259 newlines += 1;
1260 }
1261 if new_i == i && newlines == 0 {
1262 return None;
1263 }
1264 if newlines > 1 {
1265 return Some(backup);
1266 };
1267 (new_i, newlines)
1268 } else {
1269 return Some(backup);
1270 };
1271
1272 if let Some((title_length, title)) = scan_refdef_title(&self.text[i..]) {
1275 i += title_length;
1276 backup.1.title = Some(unescape(title));
1277 } else if newlines > 0 {
1278 return Some(backup);
1279 } else {
1280 return None;
1281 };
1282
1283 if let Some(bytes) = scan_blank_line(&bytes[i..]) {
1285 backup.0 = i + bytes - start;
1286 Some(backup)
1287 } else if newlines > 0 {
1288 Some(backup)
1289 } else {
1290 None
1291 }
1292 }
1293}
1294
1295fn count_header_cols(bytes: &[u8], mut pipes: usize, mut start: usize, last_pipe_ix: usize) -> usize {
1298 start += scan_whitespace_no_nl(&bytes[start..]);
1300 if bytes[start] == b'|' {
1301 pipes -= 1;
1302 }
1303
1304 if scan_blank_line(&bytes[(last_pipe_ix + 1)..]).is_some() {
1306 pipes
1307 } else {
1308 pipes + 1
1309 }
1310}
1311
1312impl<'a> Tree<Item> {
1313 fn append_text(&mut self, start: usize, end: usize) {
1314 if end > start {
1315 if let TreePointer::Valid(ix) = self.cur() {
1316 if ItemBody::Text == self[ix].item.body && self[ix].item.end == start {
1317 self[ix].item.end = end;
1318 return;
1319 }
1320 }
1321 self.append(Item {
1322 start,
1323 end,
1324 body: ItemBody::Text,
1325 });
1326 }
1327 }
1328}
1329
1330fn delim_run_can_open(s: &str, suffix: &str, run_len: usize, ix: usize) -> bool {
1336 let next_char = if let Some(c) = suffix.chars().nth(run_len) {
1337 c
1338 } else {
1339 return false;
1340 };
1341 if next_char.is_whitespace() {
1342 return false;
1343 }
1344 if ix == 0 {
1345 return true;
1346 }
1347 let delim = suffix.chars().next().unwrap();
1348 if delim == '*' && !is_punctuation(next_char) {
1349 return true;
1350 }
1351
1352 let prev_char = s[..ix].chars().last().unwrap();
1353
1354 prev_char.is_whitespace() || is_punctuation(prev_char)
1355}
1356
1357fn delim_run_can_close(s: &str, suffix: &str, run_len: usize, ix: usize) -> bool {
1361 if ix == 0 {
1362 return false;
1363 }
1364 let prev_char = s[..ix].chars().last().unwrap();
1365 if prev_char.is_whitespace() {
1366 return false;
1367 }
1368 let next_char = if let Some(c) = suffix.chars().nth(run_len) {
1369 c
1370 } else {
1371 return true;
1372 };
1373 let delim = suffix.chars().next().unwrap();
1374 if delim == '*' && !is_punctuation(prev_char) {
1375 return true;
1376 }
1377
1378 next_char.is_whitespace() || is_punctuation(next_char)
1379}
1380
1381fn scan_paragraph_interrupt(bytes: &[u8]) -> bool {
1385 if scan_eol(bytes).is_some() ||
1386 scan_hrule(bytes).is_ok() ||
1387 scan_atx_heading(bytes).is_some() ||
1388 scan_code_fence(bytes).is_some() ||
1389 scan_blockquote_start(bytes).is_some()
1390 {
1391 return true;
1392 }
1393 bytes.starts_with(b"<") &&
1394 (get_html_end_tag(&bytes[1..]).is_some() || is_html_tag(scan_html_block_tag(&bytes[1..]).1))
1395}
1396
1397static HTML_END_TAGS: &[&str; 7] = &["</pre>", "</style>", "</script>", "-->", "?>", "]]>", ">"];
1398
1399fn get_html_end_tag(text_bytes : &[u8]) -> Option<u32> {
1402 static BEGIN_TAGS: &[&[u8]; 3] = &[b"pre", b"style", b"script"];
1403 static ST_BEGIN_TAGS: &[&[u8]; 3] = &[b"!--", b"?", b"![CDATA["];
1404
1405 for (beg_tag, end_tag_ix) in BEGIN_TAGS.iter().zip(0..3) {
1406 let tag_len = beg_tag.len();
1407
1408 if text_bytes.len() < tag_len {
1409 break;
1411 }
1412
1413 if !text_bytes[..tag_len].eq_ignore_ascii_case(beg_tag) {
1414 continue;
1415 }
1416
1417 if text_bytes.len() == tag_len {
1419 return Some(end_tag_ix);
1420 }
1421
1422 let s = text_bytes[tag_len];
1424 if is_ascii_whitespace(s) || s == b'>' {
1425 return Some(end_tag_ix);
1426 }
1427 }
1428
1429 for (beg_tag, end_tag_ix) in ST_BEGIN_TAGS.iter().zip(3..6) {
1430 if text_bytes.starts_with(beg_tag) {
1431 return Some(end_tag_ix);
1432 }
1433 }
1434
1435 if text_bytes.len() > 1 && text_bytes[0] == b'!'
1436 && text_bytes[1] >= b'A' && text_bytes[1] <= b'Z' {
1437 Some(6)
1438 } else {
1439 None
1440 }
1441}
1442
1443#[derive(Copy, Clone, Debug)]
1444struct InlineEl {
1445 start: TreeIndex, count: usize,
1447 c: u8, both: bool, }
1450
1451#[derive(Debug, Clone, Default)]
1452struct InlineStack {
1453 stack: Vec<InlineEl>,
1454 lower_bounds: [usize; 7],
1459}
1460
1461impl InlineStack {
1462 const UNDERSCORE_NOT_BOTH: usize = 0;
1466 const ASTERISK_NOT_BOTH: usize = 1;
1467 const ASTERISK_BASE: usize = 2;
1468 const TILDES: usize = 5;
1469 const UNDERSCORE_BOTH: usize = 6;
1470
1471 fn pop_all<'a>(&mut self, tree: &mut Tree<Item>) {
1472 for el in self.stack.drain(..) {
1473 for i in 0..el.count {
1474 tree[el.start + i].item.body = ItemBody::Text;
1475 }
1476 }
1477 self.lower_bounds = [0; 7];
1478 }
1479
1480 fn get_lowerbound(&self, c: u8, count: usize, both: bool) -> usize {
1481 if c == b'_' {
1482 if both {
1483 self.lower_bounds[InlineStack::UNDERSCORE_BOTH]
1484 } else {
1485 self.lower_bounds[InlineStack::UNDERSCORE_NOT_BOTH]
1486 }
1487 } else if c == b'*' {
1488 let mod3_lower = self.lower_bounds[InlineStack::ASTERISK_BASE + count % 3];
1489 if both {
1490 mod3_lower
1491 } else {
1492 min(mod3_lower, self.lower_bounds[InlineStack::ASTERISK_NOT_BOTH])
1493 }
1494 } else {
1495 self.lower_bounds[InlineStack::TILDES]
1496 }
1497 }
1498
1499 fn set_lowerbound(&mut self, c: u8, count: usize, both: bool, new_bound: usize) {
1500 if c == b'_' {
1501 if both {
1502 self.lower_bounds[InlineStack::UNDERSCORE_BOTH] = new_bound;
1503 } else {
1504 self.lower_bounds[InlineStack::UNDERSCORE_NOT_BOTH] = new_bound;
1505 }
1506 } else if c == b'*' {
1507 self.lower_bounds[InlineStack::ASTERISK_BASE + count % 3] = new_bound;
1508 if !both {
1509 self.lower_bounds[InlineStack::ASTERISK_NOT_BOTH] = new_bound;
1510 }
1511 } else {
1512 self.lower_bounds[InlineStack::TILDES] = new_bound;
1513 }
1514 }
1515
1516 fn find_match<'a>(&mut self, tree: &mut Tree<Item>, c: u8, count: usize, both: bool)
1517 -> Option<InlineEl>
1518 {
1519 let lowerbound = min(self.stack.len(), self.get_lowerbound(c, count, both));
1520 let res = self.stack[lowerbound..]
1521 .iter()
1522 .cloned()
1523 .enumerate()
1524 .rfind(|(_, el)| {
1525 el.c == c && (!both && !el.both || (count + el.count) % 3 != 0 || count % 3 == 0)
1526 });
1527
1528 if let Some((matching_ix, matching_el)) = res {
1529 let matching_ix = matching_ix + lowerbound;
1530 for el in &self.stack[(matching_ix + 1)..] {
1531 for i in 0..el.count {
1532 tree[el.start + i].item.body = ItemBody::Text;
1533 }
1534 }
1535 self.stack.truncate(matching_ix);
1536 Some(matching_el)
1537 } else {
1538 self.set_lowerbound(c, count, both, self.stack.len());
1539 None
1540 }
1541 }
1542
1543 fn push(&mut self, el: InlineEl) {
1544 self.stack.push(el)
1545 }
1546}
1547
1548#[derive(Debug, Clone)]
1549enum RefScan<'a> {
1550 LinkLabel(CowStr<'a>, TreePointer),
1552 Collapsed(TreePointer),
1554 Failed,
1555}
1556
1557fn scan_nodes_to_ix(tree: &Tree<Item>, mut node: TreePointer, ix: usize) -> TreePointer {
1558 while let TreePointer::Valid(node_ix) = node {
1559 if tree[node_ix].item.end <= ix {
1560 node = tree[node_ix].next;
1561 } else {
1562 break;
1563 }
1564 }
1565 node
1566}
1567
1568fn scan_reference<'a, 'b>(tree: &'a Tree<Item>, text: &'b str, cur: TreePointer) -> RefScan<'b> {
1569 let cur_ix = match cur {
1570 TreePointer::Nil => return RefScan::Failed,
1571 TreePointer::Valid(cur_ix) => cur_ix,
1572 };
1573 let start = tree[cur_ix].item.start;
1574 let tail = &text.as_bytes()[start..];
1575
1576 if tail.starts_with(b"[]") {
1577 let closing_node = tree[cur_ix].next.unwrap();
1578 RefScan::Collapsed(tree[closing_node].next)
1579 } else if let Some((ix, ReferenceLabel::Link(label))) = scan_link_label(&text[start..]) {
1580 let next_node = scan_nodes_to_ix(tree, cur, start + ix);
1581 RefScan::LinkLabel(label, next_node)
1582 } else {
1583 RefScan::Failed
1584 }
1585}
1586
1587#[derive(Clone, Default)]
1588struct LinkStack {
1589 inner: Vec<LinkStackEl>,
1590 disabled_ix: usize,
1591}
1592
1593impl LinkStack {
1594 fn push(&mut self, el: LinkStackEl) {
1595 self.inner.push(el);
1596 }
1597
1598 fn pop(&mut self) -> Option<LinkStackEl> {
1599 let el = self.inner.pop();
1600 self.disabled_ix = std::cmp::min(self.disabled_ix, self.inner.len());
1601 el
1602 }
1603
1604 fn clear(&mut self) {
1605 self.inner.clear();
1606 self.disabled_ix = 0;
1607 }
1608
1609 fn disable_all_links(&mut self) {
1610 for el in &mut self.inner[self.disabled_ix..] {
1611 if el.ty == LinkStackTy::Link {
1612 el.ty = LinkStackTy::Disabled;
1613 }
1614 }
1615 self.disabled_ix = self.inner.len();
1616 }
1617}
1618
1619#[derive(Clone, Debug)]
1620struct LinkStackEl {
1621 node: TreeIndex,
1622 ty: LinkStackTy,
1623}
1624
1625#[derive(PartialEq, Clone, Debug)]
1626enum LinkStackTy {
1627 Link,
1628 Image,
1629 Disabled,
1630}
1631
1632#[derive(Clone)]
1633struct LinkDef<'a> {
1634 dest: CowStr<'a>,
1635 title: Option<CowStr<'a>>,
1636}
1637
1638struct CodeDelims {
1641 inner: HashMap<usize, VecDeque<TreeIndex>>,
1642 seen_first: bool,
1643}
1644
1645impl CodeDelims {
1646 fn new() -> Self {
1647 Self {
1648 inner: Default::default(),
1649 seen_first: false,
1650 }
1651 }
1652
1653 fn insert(&mut self, count: usize, ix: TreeIndex) {
1654 if self.seen_first {
1655 self.inner.entry(count).or_insert_with(Default::default).push_back(ix);
1656 } else {
1657 self.seen_first = true;
1660 }
1661 }
1662
1663 fn is_populated(&self) -> bool {
1664 !self.inner.is_empty()
1665 }
1666
1667 fn find(&mut self, open_ix: TreeIndex, count: usize) -> Option<TreeIndex> {
1668 while let Some(ix) = self.inner.get_mut(&count)?.pop_front() {
1669 if ix > open_ix {
1670 return Some(ix);
1671 }
1672 }
1673 None
1674 }
1675
1676 fn clear(&mut self) {
1677 self.inner.clear();
1678 self.seen_first = false;
1679 }
1680}
1681
1682#[derive(Copy, Clone, PartialEq, Eq, Debug)]
1683struct LinkIndex(usize);
1684
1685#[derive(Copy, Clone, PartialEq, Eq, Debug)]
1686struct CowIndex(usize);
1687
1688#[derive(Copy, Clone, PartialEq, Eq, Debug)]
1689struct AlignmentIndex(usize);
1690
1691#[derive(Clone)]
1692struct Allocations<'a> {
1693 refdefs: HashMap<LinkLabel<'a>, LinkDef<'a>>,
1694 links: Vec<(LinkType, CowStr<'a>, CowStr<'a>)>,
1695 cows: Vec<CowStr<'a>>,
1696 alignments: Vec<Vec<Alignment>>,
1697}
1698
1699impl<'a> Allocations<'a> {
1700 fn new() -> Self {
1701 Self {
1702 refdefs: HashMap::new(),
1703 links: Vec::with_capacity(128),
1704 cows: Vec::new(),
1705 alignments: Vec::new(),
1706 }
1707 }
1708
1709 fn allocate_cow(&mut self, cow: CowStr<'a>) -> CowIndex {
1710 let ix = self.cows.len();
1711 self.cows.push(cow);
1712 CowIndex(ix)
1713 }
1714
1715 fn allocate_link(&mut self, ty: LinkType, url: CowStr<'a>, title: CowStr<'a>) -> LinkIndex {
1716 let ix = self.links.len();
1717 self.links.push((ty, url, title));
1718 LinkIndex(ix)
1719 }
1720
1721 fn allocate_alignment(&mut self, alignment: Vec<Alignment>) -> AlignmentIndex {
1722 let ix = self.alignments.len();
1723 self.alignments.push(alignment);
1724 AlignmentIndex(ix)
1725 }
1726}
1727
1728impl<'a> Index<CowIndex> for Allocations<'a> {
1729 type Output = CowStr<'a>;
1730
1731 fn index(&self, ix: CowIndex) -> &Self::Output {
1732 self.cows.index(ix.0)
1733 }
1734}
1735
1736impl<'a> Index<LinkIndex> for Allocations<'a> {
1737 type Output = (LinkType, CowStr<'a>, CowStr<'a>);
1738
1739 fn index(&self, ix: LinkIndex) -> &Self::Output {
1740 self.links.index(ix.0)
1741 }
1742}
1743
1744impl<'a> Index<AlignmentIndex> for Allocations<'a> {
1745 type Output = Vec<Alignment>;
1746
1747 fn index(&self, ix: AlignmentIndex) -> &Self::Output {
1748 self.alignments.index(ix.0)
1749 }
1750}
1751
1752#[derive(Clone, Default)]
1758pub(crate) struct HtmlScanGuard {
1759 pub cdata: usize,
1760 pub processing: usize,
1761 pub declaration: usize,
1762}
1763
1764#[derive(Clone)]
1765pub struct Parser<'a> {
1766 text: &'a str,
1767 tree: Tree<Item>,
1768 allocs: Allocations<'a>,
1769 broken_link_callback: Option<&'a Fn(&str, &str) -> Option<(String, String)>>,
1770 offset: usize,
1771 html_scan_guard: HtmlScanGuard,
1772
1773 inline_stack: InlineStack,
1775 link_stack: LinkStack,
1776}
1777
1778impl<'a> Parser<'a> {
1779 pub fn new(text: &'a str) -> Parser<'a> {
1780 Parser::new_ext(text, Options::empty())
1781 }
1782
1783 pub fn new_ext(text: &'a str, options: Options) -> Parser<'a> {
1784 Parser::new_with_broken_link_callback(text, options, None)
1785 }
1786
1787 pub fn new_with_broken_link_callback(
1793 text: &'a str,
1794 options: Options,
1795 broken_link_callback: Option<&'a Fn(&str, &str) -> Option<(String, String)>>
1796 ) -> Parser<'a> {
1797 let first_pass = FirstPass::new(text, options);
1798 let (mut tree, allocs) = first_pass.run();
1799 tree.reset();
1800 let inline_stack = Default::default();
1801 let link_stack = Default::default();
1802 let html_scan_guard = Default::default();
1803 Parser {
1804 text, tree, allocs, broken_link_callback,
1805 offset: 0, inline_stack, link_stack, html_scan_guard,
1806 }
1807 }
1808
1809 pub fn get_offset(&self) -> usize {
1810 self.offset
1811 }
1812
1813 fn handle_inline(&mut self) {
1820 self.handle_inline_pass1();
1821 self.handle_emphasis();
1822 }
1823
1824 fn handle_inline_pass1(&mut self) {
1830 let mut code_delims = CodeDelims::new();
1831 let mut cur = self.tree.cur();
1832 let mut prev = TreePointer::Nil;
1833
1834 while let TreePointer::Valid(mut cur_ix) = cur {
1835 match self.tree[cur_ix].item.body {
1836 ItemBody::MaybeHtml => {
1837 let next = self.tree[cur_ix].next;
1838 let autolink = if let TreePointer::Valid(next_ix) = next {
1839 scan_autolink(self.text, self.tree[next_ix].item.start)
1840 } else {
1841 None
1842 };
1843
1844 if let Some((ix, uri, link_type)) = autolink {
1845 let node = scan_nodes_to_ix(&self.tree, next, ix);
1846 let text_node = self.tree.create_node(Item {
1847 start: self.tree[cur_ix].item.start + 1,
1848 end: ix - 1,
1849 body: ItemBody::Text,
1850 });
1851 let link_ix = self.allocs.allocate_link(link_type, uri, "".into());
1852 self.tree[cur_ix].item.body = ItemBody::Link(link_ix);
1853 self.tree[cur_ix].item.end = ix;
1854 self.tree[cur_ix].next = node;
1855 self.tree[cur_ix].child = TreePointer::Valid(text_node);
1856 prev = cur;
1857 cur = node;
1858 if let TreePointer::Valid(node_ix) = cur {
1859 self.tree[node_ix].item.start = ix;
1860 }
1861 continue;
1862 } else {
1863 let inline_html = if let TreePointer::Valid(next_ix) = next {
1864 self.tree.peek_up()
1865 .map(|parent_ix| self.tree[parent_ix].item.end)
1866 .and_then(|end_offset| {
1867 let bytes = &self.text.as_bytes()[..end_offset];
1868 scan_inline_html(bytes, self.tree[next_ix].item.start, &mut self.html_scan_guard)
1869 })
1870 } else {
1871 None
1872 };
1873 if let Some(ix) = inline_html {
1874 let node = scan_nodes_to_ix(&self.tree, next, ix);
1875 self.tree[cur_ix].item.body = ItemBody::InlineHtml;
1878 self.tree[cur_ix].item.end = ix;
1879 self.tree[cur_ix].next = node;
1880 prev = cur;
1881 cur = node;
1882 if let TreePointer::Valid(node_ix) = cur {
1883 self.tree[node_ix].item.start = ix;
1884 }
1885 continue;
1886 }
1887 }
1888 self.tree[cur_ix].item.body = ItemBody::Text;
1889 }
1890 ItemBody::MaybeCode(mut search_count, preceded_by_backslash) => {
1891 if preceded_by_backslash {
1892 search_count -= 1;
1893 if search_count == 0 {
1894 self.tree[cur_ix].item.body = ItemBody::Text;
1895 prev = cur;
1896 cur = self.tree[cur_ix].next;
1897 continue;
1898 }
1899 }
1900
1901 if code_delims.is_populated() {
1902 if let Some(scan_ix) = code_delims.find(cur_ix, search_count) {
1905 self.make_code_span(cur_ix, scan_ix, preceded_by_backslash);
1906 } else {
1907 self.tree[cur_ix].item.body = ItemBody::Text;
1908 }
1909 } else {
1910 let mut scan = if search_count > 0 { self.tree[cur_ix].next } else { TreePointer::Nil };
1913 while let TreePointer::Valid(scan_ix) = scan {
1914 if let ItemBody::MaybeCode(delim_count, _) = self.tree[scan_ix].item.body {
1915 if search_count == delim_count {
1916 self.make_code_span(cur_ix, scan_ix, preceded_by_backslash);
1917 code_delims.clear();
1918 break;
1919 } else {
1920 code_delims.insert(delim_count, scan_ix);
1921 }
1922 }
1923 scan = self.tree[scan_ix].next;
1924 }
1925 if scan == TreePointer::Nil {
1926 self.tree[cur_ix].item.body = ItemBody::Text;
1927 }
1928 }
1929 }
1930 ItemBody::MaybeLinkOpen => {
1931 self.tree[cur_ix].item.body = ItemBody::Text;
1932 self.link_stack.push(LinkStackEl { node: cur_ix, ty: LinkStackTy::Link });
1933 }
1934 ItemBody::MaybeImage => {
1935 self.tree[cur_ix].item.body = ItemBody::Text;
1936 self.link_stack.push(LinkStackEl { node: cur_ix, ty: LinkStackTy::Image });
1937 }
1938 ItemBody::MaybeLinkClose => {
1939 if let Some(tos) = self.link_stack.pop() {
1940 if tos.ty == LinkStackTy::Disabled {
1941 self.tree[cur_ix].item.body = ItemBody::Text;
1942 continue;
1943 }
1944 let next = self.tree[cur_ix].next;
1945
1946 if let Some((next_ix, url, title)) = scan_inline_link(self.text, self.tree[cur_ix].item.end) {
1947 let next_node = scan_nodes_to_ix(&self.tree, next, next_ix);
1948 if let TreePointer::Valid(prev_ix) = prev {
1949 self.tree[prev_ix].next = TreePointer::Nil;
1950 }
1951 cur = TreePointer::Valid(tos.node);
1952 cur_ix = tos.node;
1953 let link_ix = self.allocs.allocate_link(LinkType::Inline, url, title);
1954 self.tree[cur_ix].item.body = if tos.ty == LinkStackTy::Image {
1955 ItemBody::Image(link_ix)
1956 } else {
1957 ItemBody::Link(link_ix)
1958 };
1959 self.tree[cur_ix].child = self.tree[cur_ix].next;
1960 self.tree[cur_ix].next = next_node;
1961 if let TreePointer::Valid(next_node_ix) = next_node {
1962 self.tree[next_node_ix].item.start = next_ix;
1963 }
1964
1965 if tos.ty == LinkStackTy::Link {
1966 self.link_stack.disable_all_links();
1967 }
1968 } else {
1969 let scan_result = scan_reference(&self.tree, &self.text, next);
1972 let label_node = self.tree[tos.node].next;
1973 let node_after_link = match scan_result {
1974 RefScan::LinkLabel(_, next_node) => next_node,
1975 RefScan::Collapsed(next_node) => next_node,
1976 RefScan::Failed => next,
1977 };
1978 let link_type = match &scan_result {
1979 RefScan::LinkLabel(..) => LinkType::Reference,
1980 RefScan::Collapsed(..) => LinkType::Collapsed,
1981 RefScan::Failed => LinkType::Shortcut,
1982 };
1983 let label: Option<ReferenceLabel<'a>> = match scan_result {
1984 RefScan::LinkLabel(l, ..) => Some(ReferenceLabel::Link(l)),
1985 RefScan::Collapsed(..) | RefScan::Failed => {
1986 let start = self.tree[tos.node].item.end - 1;
1988 let end = self.tree[cur_ix].item.end;
1989 let search_text = &self.text[start..end];
1990
1991 scan_link_label(search_text).map(|(_ix, label)| label)
1992 }
1993 };
1994
1995 if let Some(ReferenceLabel::Footnote(l)) = label {
1997 self.tree[tos.node].next = node_after_link;
1998 self.tree[tos.node].child = TreePointer::Nil;
1999 self.tree[tos.node].item.body = ItemBody::FootnoteReference(self.allocs.allocate_cow(l));
2000 prev = TreePointer::Valid(tos.node);
2001 cur = node_after_link;
2002 self.link_stack.clear();
2003 continue;
2004 } else if let Some(ReferenceLabel::Link(link_label)) = label {
2005 let type_url_title = if let Some(matching_def) = self.allocs.refdefs.get(&UniCase::new(link_label.as_ref().into())) {
2006 let title = matching_def.title.as_ref().cloned().unwrap_or("".into());
2008 let url = matching_def.dest.clone();
2009 Some((link_type, url, title))
2010 } else if let Some(callback) = self.broken_link_callback {
2011 if let Some((url, title)) = callback(link_label.as_ref(), link_label.as_ref()) {
2014 Some((link_type.to_unknown(), url.into(), title.into()))
2015 } else {
2016 None
2017 }
2018 } else {
2019 None
2020 };
2021
2022 if let Some((def_link_type, url, title)) = type_url_title {
2023 let link_ix = self.allocs.allocate_link(def_link_type, url, title);
2024 self.tree[tos.node].item.body = if tos.ty == LinkStackTy::Image {
2025 ItemBody::Image(link_ix)
2026 } else {
2027 ItemBody::Link(link_ix)
2028 };
2029
2030 self.tree[tos.node].next = node_after_link;
2033
2034 self.tree[tos.node].child = label_node;
2036
2037 if let TreePointer::Valid(prev_ix) = prev {
2039 self.tree[prev_ix].next = TreePointer::Nil;
2040 }
2041
2042 cur = TreePointer::Valid(tos.node);
2044 cur_ix = tos.node;
2045
2046 if tos.ty == LinkStackTy::Link {
2047 self.link_stack.disable_all_links();
2048 }
2049 } else {
2050 self.tree[cur_ix].item.body = ItemBody::Text;
2051 }
2052 } else {
2053 self.tree[cur_ix].item.body = ItemBody::Text;
2054 }
2055 }
2056 } else {
2057 self.tree[cur_ix].item.body = ItemBody::Text;
2058 }
2059 }
2060 _ => (),
2061 }
2062 prev = cur;
2063 cur = self.tree[cur_ix].next;
2064 }
2065 self.link_stack.clear();
2066 }
2067
2068 fn handle_emphasis(&mut self) {
2069 let mut prev = TreePointer::Nil;
2070 let mut prev_ix: TreeIndex;
2071 let mut cur = self.tree.cur();
2072 while let TreePointer::Valid(mut cur_ix) = cur {
2073 if let ItemBody::MaybeEmphasis(mut count, can_open, can_close) = self.tree[cur_ix].item.body {
2074 let c = self.text.as_bytes()[self.tree[cur_ix].item.start];
2075 let both = can_open && can_close;
2076 if can_close {
2077 while let Some(el) = self.inline_stack.find_match(&mut self.tree, c, count, both) {
2078 if let TreePointer::Valid(prev_ix) = prev {
2080 self.tree[prev_ix].next = TreePointer::Nil;
2081 }
2082 let match_count = min(count, el.count);
2083 let mut end = cur_ix - 1;
2085 let mut start = el.start + el.count;
2086
2087 while start > el.start + el.count - match_count {
2089 let (inc, ty) = if c == b'~' {
2090 (2, ItemBody::Strikethrough)
2091 } else if start > el.start + el.count - match_count + 1 {
2092 (2, ItemBody::Strong)
2093 } else {
2094 (1, ItemBody::Emphasis)
2095 };
2096
2097 let root = start - inc;
2098 end = end + inc;
2099 self.tree[root].item.body = ty;
2100 self.tree[root].item.end = self.tree[end].item.end;
2101 self.tree[root].child = TreePointer::Valid(start);
2102 self.tree[root].next = TreePointer::Nil;
2103 start = root;
2104 }
2105
2106 prev_ix = el.start + el.count - match_count;
2108 prev = TreePointer::Valid(prev_ix);
2109 cur = self.tree[cur_ix + match_count - 1].next;
2110 self.tree[prev_ix].next = cur;
2111
2112 if el.count > match_count {
2113 self.inline_stack.push(InlineEl {
2114 start: el.start,
2115 count: el.count - match_count,
2116 c: el.c,
2117 both,
2118 })
2119 }
2120 count -= match_count;
2121 if count > 0 {
2122 cur_ix = cur.unwrap();
2123 } else {
2124 break;
2125 }
2126 }
2127 }
2128 if count > 0 {
2129 if can_open {
2130 self.inline_stack.push(InlineEl {
2131 start: cur_ix,
2132 count,
2133 c,
2134 both,
2135 });
2136 } else {
2137 for i in 0..count {
2138 self.tree[cur_ix + i].item.body = ItemBody::Text;
2139 }
2140 }
2141 prev_ix = cur_ix + count - 1;
2142 prev = TreePointer::Valid(prev_ix);
2143 cur = self.tree[prev_ix].next;
2144 }
2145 } else {
2146 prev = cur;
2147 cur = self.tree[cur_ix].next;
2148 }
2149 }
2150 self.inline_stack.pop_all(&mut self.tree);
2151 }
2152
2153 fn make_code_span(&mut self, open: TreeIndex, close: TreeIndex, preceding_backslash: bool) {
2157 let first_ix = open + 1;
2158 let last_ix = close - 1;
2159 let bytes = self.text.as_bytes();
2160 let mut span_start = self.tree[first_ix].item.start;
2161 let mut span_end = self.tree[close].item.start;
2162 let mut buf: Option<String> = None;
2163
2164 if ! bytes[span_start..span_end].iter().all(|&b| b == b' ') {
2166 let opening = match bytes[span_start] { b' ' | b'\r' | b'\n' => true, _ => false };
2167 let closing = match bytes[span_end - 1] { b' ' | b'\r' | b'\n' => true, _ => false };
2168 let drop_enclosing_whitespace = opening && closing;
2169
2170 if drop_enclosing_whitespace {
2171 span_start += 1;
2172 if span_start < span_end {
2173 span_end -= 1;
2174 }
2175 }
2176
2177 let mut ix = first_ix;
2178
2179 while ix < close {
2180 match self.tree[ix].item.body {
2181 ItemBody::HardBreak | ItemBody::SoftBreak => {
2182 if drop_enclosing_whitespace &&
2183 (ix == first_ix && bytes[self.tree[ix].item.start] != b'\\') ||
2184 (ix == last_ix && last_ix > first_ix) {
2185 } else {
2187 let end = bytes[self.tree[ix].item.start..]
2188 .iter()
2189 .position(|&b| b == b'\r' || b == b'\n')
2190 .unwrap()
2191 + self.tree[ix].item.start;
2192 if let Some(ref mut buf) = buf {
2193 buf.push_str(&self.text[self.tree[ix].item.start..end]);
2194 buf.push(' ');
2195 } else {
2196 let mut new_buf = String::with_capacity(span_end - span_start);
2197 new_buf.push_str(&self.text[span_start..end]);
2198 new_buf.push(' ');
2199 buf = Some(new_buf);
2200 }
2201 }
2202 }
2203 _ => {
2204 if let Some(ref mut buf) = buf {
2205 let end = if ix == last_ix {
2206 span_end
2207 } else {
2208 self.tree[ix].item.end
2209 };
2210 buf.push_str(&self.text[self.tree[ix].item.start..end]);
2211 }
2212 }
2213 }
2214 ix = ix + 1;
2215 }
2216 }
2217
2218 let cow = if let Some(buf) = buf {
2219 buf.into()
2220 } else {
2221 self.text[span_start..span_end].into()
2222 };
2223 if preceding_backslash {
2224 self.tree[open].item.body = ItemBody::Text;
2225 self.tree[open].item.end = self.tree[open].item.start + 1;
2226 self.tree[open].next = TreePointer::Valid(close);
2227 self.tree[close].item.body = ItemBody::Code(self.allocs.allocate_cow(cow));
2228 self.tree[close].item.start = self.tree[open].item.start + 1;
2229 } else {
2230 self.tree[open].item.body = ItemBody::Code(self.allocs.allocate_cow(cow));
2231 self.tree[open].item.end = self.tree[close].item.end;
2232 self.tree[open].next = self.tree[close].next;
2233 }
2234 }
2235
2236 pub fn into_offset_iter(self) -> OffsetIter<'a> {
2237 OffsetIter {
2238 inner: self,
2239 }
2240 }
2241}
2242
2243pub(crate) enum LoopInstruction<T> {
2244 ContinueAndSkip(usize),
2246 BreakAtWith(usize, T)
2248}
2249
2250fn iterate_special_bytes<F, T>(bytes: &[u8], ix: usize, callback: F) -> (usize, Option<T>)
2261 where F: FnMut(usize, u8) -> LoopInstruction<Option<T>>
2262{
2263 #[cfg(all(target_arch = "x86_64", feature="simd"))]
2264 { crate::simd::iterate_special_bytes(bytes, ix, callback) }
2265 #[cfg(not(all(target_arch = "x86_64", feature="simd")))]
2266 { scalar_iterate_special_bytes(bytes, ix, callback) }
2267}
2268
2269pub(crate) fn scalar_iterate_special_bytes<F, T>(bytes: &[u8], mut ix: usize, mut callback: F) -> (usize, Option<T>)
2270 where F: FnMut(usize, u8) -> LoopInstruction<Option<T>>
2271{
2272 while ix < bytes.len() {
2273 match callback(ix, bytes[ix]) {
2274 LoopInstruction::ContinueAndSkip(skip) => {
2275 ix += skip + 1;
2276 }
2277 LoopInstruction::BreakAtWith(ix, val) => {
2278 return (ix, val);
2279 }
2280 }
2281 }
2282
2283 (ix, None)
2284}
2285
2286pub struct OffsetIter<'a> {
2287 inner: Parser<'a>,
2288}
2289
2290impl<'a> Iterator for OffsetIter<'a> {
2291 type Item = (Event<'a>, Range<usize>);
2292
2293 fn next(&mut self) -> Option<Self::Item> {
2294 match self.inner.tree.cur() {
2295 TreePointer::Nil => {
2296 let ix = self.inner.tree.pop()?;
2297 let tag = item_to_tag(&self.inner.tree[ix].item, &self.inner.allocs);
2298 self.inner.tree.next_sibling(ix);
2299 Some((Event::End(tag), self.inner.tree[ix].item.start..self.inner.tree[ix].item.end))
2300 }
2301 TreePointer::Valid(cur_ix) => {
2302 if self.inner.tree[cur_ix].item.body.is_inline() {
2303 self.inner.handle_inline();
2304 }
2305
2306 let node = self.inner.tree[cur_ix];
2307 let item = node.item;
2308 let event = item_to_event(item, self.inner.text, &self.inner.allocs);
2309 if let Event::Start(..) = event {
2310 self.inner.tree.push();
2311 } else {
2312 self.inner.tree.next_sibling(cur_ix);
2313 }
2314 Some((event, item.start..item.end))
2315 }
2316 }
2317 }
2318}
2319
2320fn item_to_tag<'a>(item: &Item, allocs: &Allocations<'a>) -> Tag<'a> {
2321 match item.body {
2322 ItemBody::Paragraph => Tag::Paragraph,
2323 ItemBody::Emphasis => Tag::Emphasis,
2324 ItemBody::Strong => Tag::Strong,
2325 ItemBody::Strikethrough => Tag::Strikethrough,
2326 ItemBody::Link(link_ix) => {
2327 let &(ref link_type, ref url, ref title) = allocs.index(link_ix);
2328 Tag::Link(*link_type, url.clone(), title.clone())
2329 }
2330 ItemBody::Image(link_ix) => {
2331 let &(ref link_type, ref url, ref title) = allocs.index(link_ix);
2332 Tag::Image(*link_type, url.clone(), title.clone())
2333 }
2334 ItemBody::Rule => Tag::Rule,
2335 ItemBody::Header(level) => Tag::Header(level),
2336 ItemBody::FencedCodeBlock(cow_ix) =>
2337 Tag::CodeBlock(allocs[cow_ix].clone()),
2338 ItemBody::IndentCodeBlock => Tag::CodeBlock("".into()),
2339 ItemBody::BlockQuote => Tag::BlockQuote,
2340 ItemBody::List(_, c, listitem_start) => {
2341 if c == b'.' || c == b')' {
2342 Tag::List(Some(listitem_start))
2343 } else {
2344 Tag::List(None)
2345 }
2346 }
2347 ItemBody::ListItem(_) => Tag::Item,
2348 ItemBody::HtmlBlock(_) => Tag::HtmlBlock,
2349 ItemBody::TableHead => Tag::TableHead,
2350 ItemBody::TableCell => Tag::TableCell,
2351 ItemBody::TableRow => Tag::TableRow,
2352 ItemBody::Table(alignment_ix) => {
2353 Tag::Table(allocs[alignment_ix].clone())
2354 }
2355 ItemBody::FootnoteDefinition(cow_ix) =>
2356 Tag::FootnoteDefinition(allocs[cow_ix].clone()),
2357 _ => panic!("unexpected item body {:?}", item.body)
2358 }
2359}
2360
2361fn item_to_event<'a>(item: Item, text: &'a str, allocs: &Allocations<'a>) -> Event<'a> {
2362 let tag = match item.body {
2363 ItemBody::Text =>
2364 return Event::Text(text[item.start..item.end].into()),
2365 ItemBody::Code(cow_ix) =>
2366 return Event::Code(allocs[cow_ix].clone()),
2367 ItemBody::SynthesizeText(cow_ix) =>
2368 return Event::Text(allocs[cow_ix].clone()),
2369 ItemBody::Html =>
2370 return Event::Html(text[item.start..item.end].into()),
2371 ItemBody::InlineHtml =>
2372 return Event::InlineHtml(text[item.start..item.end].into()),
2373 ItemBody::SoftBreak => return Event::SoftBreak,
2374 ItemBody::HardBreak => return Event::HardBreak,
2375 ItemBody::FootnoteReference(cow_ix) =>
2376 return Event::FootnoteReference(allocs[cow_ix].clone()),
2377 ItemBody::TaskListMarker(checked) => return Event::TaskListMarker(checked),
2378
2379 ItemBody::Paragraph => Tag::Paragraph,
2380 ItemBody::Emphasis => Tag::Emphasis,
2381 ItemBody::Strong => Tag::Strong,
2382 ItemBody::Strikethrough => Tag::Strikethrough,
2383 ItemBody::Link(link_ix) => {
2384 let &(ref link_type, ref url, ref title) = allocs.index(link_ix);
2385 Tag::Link(*link_type, url.clone(), title.clone())
2386 }
2387 ItemBody::Image(link_ix) => {
2388 let &(ref link_type, ref url, ref title) = allocs.index(link_ix);
2389 Tag::Image(*link_type, url.clone(), title.clone())
2390 }
2391 ItemBody::Rule => Tag::Rule,
2392 ItemBody::Header(level) => Tag::Header(level),
2393 ItemBody::FencedCodeBlock(cow_ix) =>
2394 Tag::CodeBlock(allocs[cow_ix].clone()),
2395 ItemBody::IndentCodeBlock => Tag::CodeBlock("".into()),
2396 ItemBody::BlockQuote => Tag::BlockQuote,
2397 ItemBody::List(_, c, listitem_start) => {
2398 if c == b'.' || c == b')' {
2399 Tag::List(Some(listitem_start))
2400 } else {
2401 Tag::List(None)
2402 }
2403 }
2404 ItemBody::ListItem(_) => Tag::Item,
2405 ItemBody::HtmlBlock(_) => Tag::HtmlBlock,
2406 ItemBody::TableHead => Tag::TableHead,
2407 ItemBody::TableCell => Tag::TableCell,
2408 ItemBody::TableRow => Tag::TableRow,
2409 ItemBody::Table(alignment_ix) => {
2410 Tag::Table(allocs[alignment_ix].clone())
2411 }
2412 ItemBody::FootnoteDefinition(cow_ix) =>
2413 Tag::FootnoteDefinition(allocs[cow_ix].clone()),
2414 _ => panic!("unexpected item body {:?}", item.body)
2415 };
2416
2417 Event::Start(tag)
2418}
2419
2420fn surgerize_tight_list<'a>(tree : &mut Tree<Item>, list_ix: TreeIndex) {
2422 let mut list_item = tree[list_ix].child;
2423 while let TreePointer::Valid(listitem_ix) = list_item {
2424 let list_item_firstborn = tree[listitem_ix].child;
2426
2427 if let TreePointer::Valid(firstborn_ix) = list_item_firstborn {
2429 if let ItemBody::Paragraph = tree[firstborn_ix].item.body {
2430 tree[listitem_ix].child = tree[firstborn_ix].child;
2431 }
2432
2433 let mut list_item_child = TreePointer::Valid(firstborn_ix);
2434 let mut node_to_repoint = TreePointer::Nil;
2435 while let TreePointer::Valid(child_ix) = list_item_child {
2436 let repoint_ix = if let ItemBody::Paragraph = tree[child_ix].item.body {
2438 if let TreePointer::Valid(child_firstborn) = tree[child_ix].child {
2439 if let TreePointer::Valid(repoint_ix) = node_to_repoint {
2440 tree[repoint_ix].next = TreePointer::Valid(child_firstborn);
2441 }
2442 let mut child_lastborn = child_firstborn;
2443 while let TreePointer::Valid(lastborn_next_ix) = tree[child_lastborn].next {
2444 child_lastborn = lastborn_next_ix;
2445 }
2446 child_lastborn
2447 } else {
2448 child_ix
2449 }
2450 } else {
2451 child_ix
2452 };
2453
2454 node_to_repoint = TreePointer::Valid(repoint_ix);
2455 tree[repoint_ix].next = tree[child_ix].next;
2456 list_item_child = tree[child_ix].next;
2457 }
2458 }
2459
2460 list_item = tree[listitem_ix].next;
2461 }
2462}
2463
2464impl<'a> Iterator for Parser<'a> {
2465 type Item = Event<'a>;
2466
2467 fn next(&mut self) -> Option<Event<'a>> {
2468 match self.tree.cur() {
2469 TreePointer::Nil => {
2470 let ix = self.tree.pop()?;
2471 let tag = item_to_tag(&self.tree[ix].item, &self.allocs);
2472 self.offset = self.tree[ix].item.end;
2473 self.tree.next_sibling(ix);
2474 Some(Event::End(tag))
2475 }
2476 TreePointer::Valid(cur_ix) => {
2477 if self.tree[cur_ix].item.body.is_inline() {
2478 self.handle_inline();
2479 }
2480
2481 let node = self.tree[cur_ix];
2482 let item = node.item;
2483 let event = item_to_event(item, self.text, &self.allocs);
2484 if let Event::Start(..) = event {
2485 self.offset = if let TreePointer::Valid(child_ix) = node.child {
2486 self.tree[child_ix].item.start
2487 } else {
2488 item.end
2489 };
2490 self.tree.push();
2491 } else {
2492 self.offset = item.end;
2493 self.tree.next_sibling(cur_ix);
2494 }
2495 Some(event)
2496 }
2497 }
2498 }
2499}
2500
2501#[cfg(test)]
2502mod test {
2503 use super::*;
2504 use crate::tree::Node;
2505
2506 fn parser_with_extensions(text: &str) -> Parser<'_> {
2509 let mut opts = Options::empty();
2510 opts.insert(Options::ENABLE_TABLES);
2511 opts.insert(Options::ENABLE_FOOTNOTES);
2512 opts.insert(Options::ENABLE_STRIKETHROUGH);
2513 opts.insert(Options::ENABLE_TASKLISTS);
2514
2515 Parser::new_ext(text, opts)
2516 }
2517
2518 #[test]
2519 #[cfg(target_pointer_width = "64")]
2520 fn node_size() {
2521 let node_size = std::mem::size_of::<Node<Item>>();
2522 assert_eq!(48, node_size);
2523 }
2524
2525 #[test]
2526 #[cfg(target_pointer_width = "64")]
2527 fn body_size() {
2528 let body_size = std::mem::size_of::<ItemBody>();
2529 assert_eq!(16, body_size);
2530 }
2531
2532 #[test]
2533 fn single_open_fish_bracket() {
2534 assert_eq!(3, Parser::new("<").count());
2536 }
2537
2538 #[test]
2539 fn lone_hashtag() {
2540 assert_eq!(2, Parser::new("#").count());
2542 }
2543
2544 #[test]
2545 fn lots_of_backslashes() {
2546 Parser::new("\\\\\r\r").count();
2548 Parser::new("\\\r\r\\.\\\\\r\r\\.\\").count();
2549 }
2550
2551 #[test]
2552 fn issue_320() {
2553 parser_with_extensions(":\r\t> |\r:\r\t> |\r").count();
2555 }
2556
2557 #[test]
2558 fn issue_319() {
2559 parser_with_extensions("|\r-]([^|\r-]([^").count();
2561 parser_with_extensions("|\r\r=][^|\r\r=][^car").count();
2562 }
2563
2564 #[test]
2565 fn issue_303() {
2566 parser_with_extensions("[^\r\ra]").count();
2568 parser_with_extensions("\r\r]Z[^\x00\r\r]Z[^\x00").count();
2569 }
2570
2571 #[test]
2572 fn issue_313() {
2573 parser_with_extensions("*]0[^\r\r*]0[^").count();
2575 parser_with_extensions("[^\r> `][^\r> `][^\r> `][").count();
2576 }
2577
2578 #[test]
2579 fn issue_283() {
2580 let input = std::str::from_utf8(b"\xf0\x9b\xb2\x9f<td:^\xf0\x9b\xb2\x9f").unwrap();
2581 parser_with_extensions(input).count();
2583 }
2584
2585 #[test]
2586 fn issue_289() {
2587 parser_with_extensions("> - \\\n> - ").count();
2589 parser_with_extensions("- \n\n").count();
2590 }
2591
2592 #[test]
2593 fn issue_306() {
2594 parser_with_extensions("*\r_<__*\r_<__*\r_<__*\r_<__").count();
2596 }
2597
2598 #[test]
2599 fn issue_305() {
2600 parser_with_extensions("_6**6*_*").count();
2602 }
2603
2604 #[test]
2605 fn another_emphasis_panic() {
2606 parser_with_extensions("*__#_#__*").count();
2607 }
2608
2609 #[test]
2610 fn offset_iter() {
2611 let event_offsets: Vec<_> = Parser::new("*hello* world")
2612 .into_offset_iter()
2613 .map(|(_ev, range)| range)
2614 .collect();
2615 let expected_offsets = vec![
2616 (0..13),
2617 (0..7),
2618 (1..6),
2619 (0..7),
2620 (7..13),
2621 (0..13)
2622 ];
2623 assert_eq!(expected_offsets, event_offsets);
2624 }
2625
2626 #[test]
2628 fn link_def_at_eof() {
2629 let test_str = "[My site][world]\n\n[world]: https://vincentprouillet.com";
2630 let expected = "<p><a href=\"https://vincentprouillet.com\">My site</a></p>\n";
2631
2632 let mut buf = String::new();
2633 crate::html::push_html(&mut buf, Parser::new(test_str));
2634 assert_eq!(expected, buf);
2635 }
2636
2637 #[test]
2638 fn ref_def_at_eof() {
2639 let test_str = "[test]:\\";
2640 let expected = "";
2641
2642 let mut buf = String::new();
2643 crate::html::push_html(&mut buf, Parser::new(test_str));
2644 assert_eq!(expected, buf);
2645 }
2646
2647 #[test]
2648 fn ref_def_cr_lf() {
2649 let test_str = "[a]: /u\r\n\n[a]";
2650 let expected = "<p><a href=\"/u\">a</a></p>\n";
2651
2652 let mut buf = String::new();
2653 crate::html::push_html(&mut buf, Parser::new(test_str));
2654 assert_eq!(expected, buf);
2655 }
2656
2657 #[test]
2658 fn no_dest_refdef() {
2659 let test_str = "[a]:";
2660 let expected = "<p>[a]:</p>\n";
2661
2662 let mut buf = String::new();
2663 crate::html::push_html(&mut buf, Parser::new(test_str));
2664 assert_eq!(expected, buf);
2665 }
2666
2667 #[test]
2668 fn simple_broken_link_callback() {
2669 let test_str = "This is a link w/o def: [hello][world]";
2670 let parser = Parser::new_with_broken_link_callback(test_str, Options::empty(), Some(&|norm, raw| {
2671 assert_eq!("world", raw);
2672 assert_eq!("world", norm);
2673 Some(("YOLO".to_owned(), "SWAG".to_owned()))
2674 }));
2675 let mut link_tag_count = 0;
2676 for (typ, url, title) in parser.filter_map(|event| match event {
2677 Event::Start(tag) | Event::End(tag) => match tag {
2678 Tag::Link(typ, url, title) => Some((typ, url, title)),
2679 _ => None,
2680 }
2681 _ => None,
2682 }) {
2683 link_tag_count += 1;
2684 assert_eq!(typ, LinkType::ReferenceUnknown);
2685 assert_eq!(url.as_ref(), "YOLO");
2686 assert_eq!(title.as_ref(), "SWAG");
2687 }
2688 assert!(link_tag_count > 0);
2689 }
2690}