1use std::cmp::{max, min};
24use std::collections::{HashMap, VecDeque};
25use std::ops::{Index, Range};
26
27use unicase::UniCase;
28
29use crate::linklabel::{scan_link_label_rest, LinkLabel, ReferenceLabel};
30use crate::scanners::*;
31use crate::strings::CowStr;
32use crate::tree::{Tree, TreeIndex, TreePointer};
33
34const LINK_MAX_NESTED_PARENS: usize = 5;
40
41#[derive(Clone, Debug, PartialEq)]
43pub enum CodeBlockKind<'a> {
44 Indented,
45 Fenced(CowStr<'a>),
47}
48
49impl<'a> CodeBlockKind<'a> {
50 pub fn is_indented(&self) -> bool {
51 match *self {
52 CodeBlockKind::Indented => true,
53 _ => false,
54 }
55 }
56
57 pub fn is_fenced(&self) -> bool {
58 match *self {
59 CodeBlockKind::Fenced(_) => true,
60 _ => false,
61 }
62 }
63}
64
65#[derive(Clone, Debug, PartialEq)]
67pub enum Tag<'a> {
68 Paragraph,
70
71 Heading(u32),
73
74 BlockQuote,
75 CodeBlock(CodeBlockKind<'a>),
77 DisplayMath,
79
80 List(Option<u64>), Item,
85 FootnoteDefinition(CowStr<'a>),
88
89 Table(Vec<Alignment>),
91 TableHead,
94 TableRow,
96 TableCell,
97
98 Emphasis,
100 Strong,
101 Strikethrough,
102
103 Link(LinkType, CowStr<'a>, CowStr<'a>),
105
106 Image(LinkType, CowStr<'a>, CowStr<'a>),
108}
109
110#[derive(Clone, Debug, PartialEq, Copy)]
112pub enum LinkType {
113 Inline,
115 Reference,
117 ReferenceUnknown,
119 Collapsed,
121 CollapsedUnknown,
123 Shortcut,
125 ShortcutUnknown,
127 Autolink,
129 Email,
131}
132
133impl LinkType {
134 fn to_unknown(self) -> Self {
135 match self {
136 LinkType::Reference => LinkType::ReferenceUnknown,
137 LinkType::Collapsed => LinkType::CollapsedUnknown,
138 LinkType::Shortcut => LinkType::ShortcutUnknown,
139 _ => unreachable!(),
140 }
141 }
142}
143
144#[derive(Clone, Debug, PartialEq)]
148pub enum Event<'a> {
149 Start(Tag<'a>),
153 End(Tag<'a>),
155 Frontmatter(CowStr<'a>),
157 Text(CowStr<'a>),
159 Math(CowStr<'a>),
161 Code(CowStr<'a>),
163 Html(CowStr<'a>),
165 FootnoteReference(CowStr<'a>),
169 SoftBreak,
171 HardBreak,
173 Rule,
175 TaskListMarker(bool),
177}
178
179#[derive(Copy, Clone, Debug, PartialEq)]
181pub enum Alignment {
182 None,
184 Left,
185 Center,
186 Right,
187}
188
189bitflags! {
190 pub struct Options: u32 {
193 const ENABLE_TABLES = 1 << 1;
194 const ENABLE_FOOTNOTES = 1 << 2;
195 const ENABLE_STRIKETHROUGH = 1 << 3;
196 const ENABLE_TASKLISTS = 1 << 4;
197 const ENABLE_FRONTMATTER = 1 << 5;
198 const ENABLE_MATH = 1 << 6;
199 }
200}
201
202#[derive(Debug, Default, Clone, Copy)]
203struct Item {
204 start: usize,
205 end: usize,
206 body: ItemBody,
207}
208
209#[derive(Debug, PartialEq, Clone, Copy)]
210enum ItemBody {
211 Frontmatter(CowIndex),
212 Paragraph,
213 Text,
214 SoftBreak,
215 HardBreak,
216
217 MaybeEmphasis(usize, bool, bool),
221 MaybeCode(usize, bool), MaybeMath(usize, bool),
223 MaybeHtml,
224 MaybeLinkOpen,
225 MaybeLinkClose,
226 MaybeImage,
227
228 Emphasis,
230 Strong,
231 Strikethrough,
232 Code(CowIndex),
233 Math(CowIndex),
234 Link(LinkIndex),
235 Image(LinkIndex),
236 FootnoteReference(CowIndex),
237 TaskListMarker(bool), Rule,
240 Heading(u32), FencedCodeBlock(CowIndex),
242 DisplayMath,
243 IndentCodeBlock,
244 Html,
245 BlockQuote,
246 List(bool, u8, u64), ListItem(usize), SynthesizeText(CowIndex),
249 FootnoteDefinition(CowIndex),
250
251 Table(AlignmentIndex),
253 TableHead,
254 TableRow,
255 TableCell,
256
257 Root,
259}
260
261impl<'a> ItemBody {
262 fn is_inline(&self) -> bool {
263 match *self {
264 ItemBody::MaybeEmphasis(..)
265 | ItemBody::MaybeHtml
266 | ItemBody::MaybeCode(..)
267 | ItemBody::MaybeMath(..)
268 | ItemBody::MaybeLinkOpen
269 | ItemBody::MaybeLinkClose
270 | ItemBody::MaybeImage => true,
271 _ => false,
272 }
273 }
274}
275
276impl<'a> Default for ItemBody {
277 fn default() -> Self {
278 ItemBody::Root
279 }
280}
281
282#[derive(PartialEq, Eq, Copy, Clone)]
284enum TableParseMode {
285 Scan,
287 Active,
289 Disabled,
291}
292
293struct FirstPass<'a> {
298 text: &'a str,
299 tree: Tree<Item>,
300 begin_list_item: bool,
301 last_line_blank: bool,
302 allocs: Allocations<'a>,
303 options: Options,
304 list_nesting: usize,
305}
306
307impl<'a> FirstPass<'a> {
308 fn new(text: &'a str, options: Options) -> FirstPass {
309 let start_capacity = max(128, text.len() / 32);
312 let tree = Tree::with_capacity(start_capacity);
313 let begin_list_item = false;
314 let last_line_blank = false;
315 let allocs = Allocations::new();
316 FirstPass {
317 text,
318 tree,
319 begin_list_item,
320 last_line_blank,
321 allocs,
322 options,
323 list_nesting: 0,
324 }
325 }
326
327 fn run(mut self) -> (Tree<Item>, Allocations<'a>) {
328 let mut ix = 0;
329 if ix < self.text.len() && self.options.contains(Options::ENABLE_FRONTMATTER) {
330 ix = self.parse_frontmatter();
331 }
332 while ix < self.text.len() {
333 ix = self.parse_block(ix);
334 }
335 for _ in 0..self.tree.spine_len() {
336 self.pop(ix);
337 }
338 (self.tree, self.allocs)
339 }
340
341 fn parse_frontmatter(&mut self) -> usize {
344 let bytes = self.text.as_bytes();
345 if let Some(n) = scan_frontmatter_delimiter(bytes) {
346 let mut start_ix = n;
347 let start = start_ix;
348 let mut empty = true;
349 loop {
350 if let Some(n) = scan_frontmatter_delimiter(&bytes[start_ix..]) {
351 if empty {
352 return 0;
353 }
354 self.tree.append(Item {
355 start,
356 end: start_ix,
357 body: ItemBody::Frontmatter(
358 self.allocs.allocate_cow(self.text[start..start_ix].into()),
359 ),
360 });
361 return start_ix + n;
362 } else {
363 empty = false;
364 start_ix += scan_nextline(&bytes[start_ix..]);
365 if start_ix >= bytes.len() {
366 return 0;
367 }
368 }
369 }
370 } else {
371 0
372 }
373 }
374
375 fn parse_block(&mut self, mut start_ix: usize) -> usize {
377 let bytes = self.text.as_bytes();
378 let mut line_start = LineStart::new(&bytes[start_ix..]);
379
380 let i = scan_containers(&self.tree, &mut line_start);
381 for _ in i..self.tree.spine_len() {
382 self.pop(start_ix);
383 }
384
385 if self.options.contains(Options::ENABLE_FOOTNOTES) {
386 if let Some(node_ix) = self.tree.peek_up() {
388 if let ItemBody::FootnoteDefinition(..) = self.tree[node_ix].item.body {
389 if self.last_line_blank {
390 self.pop(start_ix);
391 }
392 }
393 }
394
395 let container_start = start_ix + line_start.bytes_scanned();
399 if let Some(bytecount) = self.parse_footnote(container_start) {
400 start_ix = container_start + bytecount;
401 start_ix += scan_blank_line(&bytes[start_ix..]).unwrap_or(0);
402 line_start = LineStart::new(&bytes[start_ix..]);
403 }
404 }
405
406 loop {
408 let container_start = start_ix + line_start.bytes_scanned();
409 if let Some((ch, index, indent)) = line_start.scan_list_marker() {
410 let after_marker_index = start_ix + line_start.bytes_scanned();
411 self.continue_list(container_start, ch, index);
412 self.tree.append(Item {
413 start: container_start,
414 end: after_marker_index, body: ItemBody::ListItem(indent),
416 });
417 self.tree.push();
418 if let Some(n) = scan_blank_line(&bytes[after_marker_index..]) {
419 self.begin_list_item = true;
420 return after_marker_index + n;
421 }
422 if self.options.contains(Options::ENABLE_TASKLISTS) {
423 if let Some(is_checked) = line_start.scan_task_list_marker() {
424 self.tree.append(Item {
425 start: after_marker_index,
426 end: start_ix + line_start.bytes_scanned(),
427 body: ItemBody::TaskListMarker(is_checked),
428 });
429 }
430 }
431 } else if line_start.scan_blockquote_marker() {
432 self.finish_list(start_ix);
433 self.tree.append(Item {
434 start: container_start,
435 end: 0, body: ItemBody::BlockQuote,
437 });
438 self.tree.push();
439 } else {
440 break;
441 }
442 }
443
444 let ix = start_ix + line_start.bytes_scanned();
445
446 if let Some(n) = scan_blank_line(&bytes[ix..]) {
447 if let Some(node_ix) = self.tree.peek_up() {
448 match self.tree[node_ix].item.body {
449 ItemBody::BlockQuote => (),
450 _ => {
451 if self.begin_list_item {
452 self.pop(start_ix);
454 }
455 self.last_line_blank = true;
456 }
457 }
458 }
459 return ix + n;
460 }
461
462 self.begin_list_item = false;
463 self.finish_list(start_ix);
464
465 let remaining_space = line_start.remaining_space();
467
468 let indent = line_start.scan_space_upto(4);
469 if indent == 4 {
470 let ix = start_ix + line_start.bytes_scanned();
471 let remaining_space = line_start.remaining_space();
472 return self.parse_indented_code_block(ix, remaining_space);
473 }
474
475 let ix = start_ix + line_start.bytes_scanned();
476
477 if bytes[ix] == b'<' {
479 if let Some(html_end_tag) = get_html_end_tag(&bytes[(ix + 1)..]) {
482 return self.parse_html_block_type_1_to_5(ix, html_end_tag, remaining_space);
483 }
484
485 let possible_tag = scan_html_block_tag(&bytes[(ix + 1)..]).1;
487 if is_html_tag(possible_tag) {
488 return self.parse_html_block_type_6_or_7(ix, remaining_space);
489 }
490
491 if let Some(_html_bytes) = scan_html_type_7(&bytes[(ix + 1)..]) {
493 return self.parse_html_block_type_6_or_7(ix, remaining_space);
494 }
495 }
496
497 if let Ok(n) = scan_hrule(&bytes[ix..]) {
498 return self.parse_hrule(n, ix);
499 }
500
501 if let Some(atx_size) = scan_atx_heading(&bytes[ix..]) {
502 return self.parse_atx_heading(ix, atx_size);
503 }
504
505 if let Some((bytecount, label, link_def)) = self.parse_refdef_total(ix) {
507 self.allocs.refdefs.entry(label).or_insert(link_def);
508 let ix = ix + bytecount;
509 return ix + scan_blank_line(&bytes[ix..]).unwrap_or(0);
512 }
513
514 if let Some((n, fence_ch)) = scan_code_fence(&bytes[ix..]) {
515 return self.parse_fenced_code_block(ix, indent, fence_ch, n);
516 }
517 if scan_display_math(&bytes[ix..]) && self.options.contains(Options::ENABLE_MATH) {
518 return self.parse_display_math(ix);
519 }
520 self.parse_paragraph(ix)
521 }
522
523 fn parse_table(&mut self, table_cols: usize, head_start: usize, body_start: usize) -> usize {
527 let (_sep_start, thead_ix) = self.parse_table_row_inner(head_start, table_cols);
529 self.tree[thead_ix].item.body = ItemBody::TableHead;
530
531 let mut ix = body_start;
533 while let Some((next_ix, _row_ix)) = self.parse_table_row(ix, table_cols) {
534 ix = next_ix;
535 }
536
537 self.pop(ix);
538 ix
539 }
540
541 fn parse_table_row_inner(&mut self, mut ix: usize, row_cells: usize) -> (usize, TreeIndex) {
544 let bytes = self.text.as_bytes();
545 let mut cells = 0;
546 let mut final_cell_ix = None;
547
548 let row_ix = self.tree.append(Item {
549 start: ix,
550 end: 0, body: ItemBody::TableRow,
552 });
553 self.tree.push();
554
555 loop {
556 ix += scan_ch(&bytes[ix..], b'|');
557 ix += scan_whitespace_no_nl(&bytes[ix..]);
558
559 if let Some(eol_bytes) = scan_eol(&bytes[ix..]) {
560 ix += eol_bytes;
561 break;
562 }
563
564 let cell_ix = self.tree.append(Item {
565 start: ix,
566 end: ix,
567 body: ItemBody::TableCell,
568 });
569 self.tree.push();
570 let (next_ix, _brk) = self.parse_line(ix, TableParseMode::Active);
571 let trailing_whitespace = scan_rev_while(&bytes[..next_ix], is_ascii_whitespace);
572
573 if let TreePointer::Valid(cur_ix) = self.tree.cur() {
574 self.tree[cur_ix].item.end -= trailing_whitespace;
575 }
576
577 self.tree[cell_ix].item.end = next_ix - trailing_whitespace;
578 self.tree.pop();
579
580 ix = next_ix;
581 cells += 1;
582
583 if cells == row_cells {
584 final_cell_ix = Some(cell_ix);
585 }
586 }
587
588 for _ in cells..row_cells {
592 self.tree.append(Item {
593 start: ix,
594 end: ix,
595 body: ItemBody::TableCell,
596 });
597 }
598
599 if let Some(cell_ix) = final_cell_ix {
601 self.tree[cell_ix].next = TreePointer::Nil;
602 }
603
604 self.pop(ix);
605
606 (ix, row_ix)
607 }
608
609 fn parse_table_row(&mut self, mut ix: usize, row_cells: usize) -> Option<(usize, TreeIndex)> {
611 let bytes = self.text.as_bytes();
612 let mut line_start = LineStart::new(&bytes[ix..]);
613 let containers = scan_containers(&self.tree, &mut line_start);
614 if containers != self.tree.spine_len() {
615 return None;
616 }
617 line_start.scan_all_space();
618 ix += line_start.bytes_scanned();
619 if scan_paragraph_interrupt(&bytes[ix..]) {
620 return None;
621 }
622
623 let (ix, row_ix) = self.parse_table_row_inner(ix, row_cells);
624 Some((ix, row_ix))
625 }
626
627 fn parse_paragraph(&mut self, start_ix: usize) -> usize {
629 let node_ix = self.tree.append(Item {
630 start: start_ix,
631 end: 0, body: ItemBody::Paragraph,
633 });
634 self.tree.push();
635 let bytes = self.text.as_bytes();
636
637 let mut ix = start_ix;
638 loop {
639 let scan_mode = if self.options.contains(Options::ENABLE_TABLES) && ix == start_ix {
640 TableParseMode::Scan
641 } else {
642 TableParseMode::Disabled
643 };
644 let (next_ix, brk) = self.parse_line(ix, scan_mode);
645
646 if let Some(Item {
648 body: ItemBody::Table(alignment_ix),
649 start,
650 end,
651 }) = brk
652 {
653 let table_cols = self.allocs[alignment_ix].len();
654 self.tree[node_ix].item = Item {
655 body: ItemBody::Table(alignment_ix),
656 start,
657 end,
658 };
659 self.tree[node_ix].child = TreePointer::Nil;
662 self.tree.pop();
663 self.tree.push();
664 return self.parse_table(table_cols, ix, next_ix);
665 }
666
667 ix = next_ix;
668 let mut line_start = LineStart::new(&bytes[ix..]);
669 let n_containers = scan_containers(&self.tree, &mut line_start);
670 if !line_start.scan_space(4) {
671 let ix_new = ix + line_start.bytes_scanned();
672 if n_containers == self.tree.spine_len() {
673 if let Some(ix_setext) = self.parse_setext_heading(ix_new, node_ix) {
674 if let Some(Item {
675 start,
676 body: ItemBody::HardBreak,
677 ..
678 }) = brk
679 {
680 if bytes[start] == b'\\' {
681 self.tree.append_text(start, start + 1);
682 }
683 }
684 ix = ix_setext;
685 break;
686 }
687 }
688 let suffix = &bytes[ix_new..];
690 if self.interrupt_paragraph_by_list(suffix) || scan_paragraph_interrupt(suffix) {
691 break;
692 }
693 }
694 line_start.scan_all_space();
695 if line_start.is_at_eol() {
696 break;
697 }
698 ix = next_ix + line_start.bytes_scanned();
699 if let Some(item) = brk {
700 self.tree.append(item);
701 }
702 }
703
704 self.pop(ix);
705 ix
706 }
707
708 fn parse_setext_heading(&mut self, ix: usize, node_ix: TreeIndex) -> Option<usize> {
710 let bytes = self.text.as_bytes();
711 let (n, level) = scan_setext_heading(&bytes[ix..])?;
712 self.tree[node_ix].item.body = ItemBody::Heading(level);
713
714 if let TreePointer::Valid(cur_ix) = self.tree.cur() {
716 self.tree[cur_ix].item.end -= scan_rev_while(
717 &bytes[..self.tree[cur_ix].item.end],
718 is_ascii_whitespace_no_nl,
719 );
720 }
721
722 Some(ix + n)
723 }
724
725 fn parse_line(&mut self, start: usize, mode: TableParseMode) -> (usize, Option<Item>) {
729 let bytes = &self.text.as_bytes();
730 let mut pipes = 0;
731 let mut last_pipe_ix = start;
732 let mut begin_text = start;
733
734 let (final_ix, brk) = iterate_special_bytes(bytes, start, |ix, byte| {
735 match byte {
736 b'\n' | b'\r' => {
737 if let TableParseMode::Active = mode {
738 return LoopInstruction::BreakAtWith(ix, None);
739 }
740
741 let mut i = ix;
742 let eol_bytes = scan_eol(&bytes[ix..]).unwrap();
743 if mode == TableParseMode::Scan && pipes > 0 {
744 let next_line_ix = ix + eol_bytes;
746 let mut line_start = LineStart::new(&bytes[next_line_ix..]);
747 if scan_containers(&self.tree, &mut line_start) == self.tree.spine_len() {
748 let table_head_ix = next_line_ix + line_start.bytes_scanned();
749 let (table_head_bytes, alignment) =
750 scan_table_head(&bytes[table_head_ix..]);
751
752 if table_head_bytes > 0 {
753 let header_count =
755 count_header_cols(bytes, pipes, start, last_pipe_ix);
756
757 if alignment.len() == header_count {
759 let alignment_ix = self.allocs.allocate_alignment(alignment);
760 let end_ix = table_head_ix + table_head_bytes;
761 return LoopInstruction::BreakAtWith(
762 end_ix,
763 Some(Item {
764 start: i,
765 end: end_ix, body: ItemBody::Table(alignment_ix),
767 }),
768 );
769 }
770 }
771 }
772 }
773
774 let end_ix = ix + eol_bytes;
775 let trailing_backslashes = scan_rev_while(&bytes[..ix], |b| b == b'\\');
776 if trailing_backslashes % 2 == 1 && end_ix < self.text.len() {
777 i -= 1;
778 self.tree.append_text(begin_text, i);
779 return LoopInstruction::BreakAtWith(
780 end_ix,
781 Some(Item {
782 start: i,
783 end: end_ix,
784 body: ItemBody::HardBreak,
785 }),
786 );
787 }
788 let trailing_whitespace =
789 scan_rev_while(&bytes[..ix], is_ascii_whitespace_no_nl);
790 if trailing_whitespace >= 2 {
791 i -= trailing_whitespace;
792 self.tree.append_text(begin_text, i);
793 return LoopInstruction::BreakAtWith(
794 end_ix,
795 Some(Item {
796 start: i,
797 end: end_ix,
798 body: ItemBody::HardBreak,
799 }),
800 );
801 }
802
803 self.tree.append_text(begin_text, ix);
804 LoopInstruction::BreakAtWith(
805 end_ix,
806 Some(Item {
807 start: i,
808 end: end_ix,
809 body: ItemBody::SoftBreak,
810 }),
811 )
812 }
813 b'\\' => {
814 if ix + 1 < self.text.len() && is_ascii_punctuation(bytes[ix + 1]) {
815 self.tree.append_text(begin_text, ix);
816 if bytes[ix + 1] == b'`' || bytes[ix + 1] == b'$' {
818 let count = 1 + scan_ch_repeat(&bytes[(ix + 2)..], bytes[ix + 1]);
819 self.tree.append(Item {
820 start: ix + 1,
821 end: ix + count + 1,
822 body: if bytes[ix + 1] == b'`' {
823 ItemBody::MaybeCode(count, true)
824 } else {
825 ItemBody::MaybeMath(count, true)
826 },
827 });
828 begin_text = ix + 1 + count;
829 LoopInstruction::ContinueAndSkip(count)
830 } else {
831 begin_text = ix + 1;
832 LoopInstruction::ContinueAndSkip(1)
833 }
834 } else {
835 LoopInstruction::ContinueAndSkip(0)
836 }
837 }
838 c @ b'*' | c @ b'_' | c @ b'~' => {
839 let string_suffix = &self.text[ix..];
840 let count = 1 + scan_ch_repeat(&string_suffix.as_bytes()[1..], c);
841 let can_open = delim_run_can_open(self.text, string_suffix, count, ix);
842 let can_close = delim_run_can_close(self.text, string_suffix, count, ix);
843 let is_valid_seq = c != b'~'
844 || count == 2 && self.options.contains(Options::ENABLE_STRIKETHROUGH);
845
846 if (can_open || can_close) && is_valid_seq {
847 self.tree.append_text(begin_text, ix);
848 for i in 0..count {
849 self.tree.append(Item {
850 start: ix + i,
851 end: ix + i + 1,
852 body: ItemBody::MaybeEmphasis(count - i, can_open, can_close),
853 });
854 }
855 begin_text = ix + count;
856 }
857 LoopInstruction::ContinueAndSkip(count - 1)
858 }
859 b'$' => {
860 let count = 1 + scan_ch_repeat(&bytes[(ix + 1)..], b'$');
861 if self.options.contains(Options::ENABLE_MATH) {
862 self.tree.append_text(begin_text, ix);
863 self.tree.append(Item {
864 start: ix,
865 end: ix + count,
866 body: ItemBody::MaybeMath(count, false),
867 });
868 begin_text = ix + count;
869 }
870 LoopInstruction::ContinueAndSkip(count - 1)
871 }
872 b'`' => {
873 self.tree.append_text(begin_text, ix);
874 let count = 1 + scan_ch_repeat(&bytes[(ix + 1)..], b'`');
875 self.tree.append(Item {
876 start: ix,
877 end: ix + count,
878 body: ItemBody::MaybeCode(count, false),
879 });
880 begin_text = ix + count;
881 LoopInstruction::ContinueAndSkip(count - 1)
882 }
883 b'<' => {
884 self.tree.append_text(begin_text, ix);
887 self.tree.append(Item {
888 start: ix,
889 end: ix + 1,
890 body: ItemBody::MaybeHtml,
891 });
892 begin_text = ix + 1;
893 LoopInstruction::ContinueAndSkip(0)
894 }
895 b'!' => {
896 if ix + 1 < self.text.len() && bytes[ix + 1] == b'[' {
897 self.tree.append_text(begin_text, ix);
898 self.tree.append(Item {
899 start: ix,
900 end: ix + 2,
901 body: ItemBody::MaybeImage,
902 });
903 begin_text = ix + 2;
904 LoopInstruction::ContinueAndSkip(1)
905 } else {
906 LoopInstruction::ContinueAndSkip(0)
907 }
908 }
909 b'[' => {
910 self.tree.append_text(begin_text, ix);
911 self.tree.append(Item {
912 start: ix,
913 end: ix + 1,
914 body: ItemBody::MaybeLinkOpen,
915 });
916 begin_text = ix + 1;
917 LoopInstruction::ContinueAndSkip(0)
918 }
919 b']' => {
920 self.tree.append_text(begin_text, ix);
921 self.tree.append(Item {
922 start: ix,
923 end: ix + 1,
924 body: ItemBody::MaybeLinkClose,
925 });
926 begin_text = ix + 1;
927 LoopInstruction::ContinueAndSkip(0)
928 }
929 b'&' => match scan_entity(&bytes[ix..]) {
930 (n, Some(value)) => {
931 self.tree.append_text(begin_text, ix);
932 self.tree.append(Item {
933 start: ix,
934 end: ix + n,
935 body: ItemBody::SynthesizeText(self.allocs.allocate_cow(value)),
936 });
937 begin_text = ix + n;
938 LoopInstruction::ContinueAndSkip(n - 1)
939 }
940 _ => LoopInstruction::ContinueAndSkip(0),
941 },
942 b'|' => {
943 if let TableParseMode::Active = mode {
944 LoopInstruction::BreakAtWith(ix, None)
945 } else {
946 last_pipe_ix = ix;
947 pipes += 1;
948 LoopInstruction::ContinueAndSkip(0)
949 }
950 }
951 _ => LoopInstruction::ContinueAndSkip(0),
952 }
953 });
954
955 if brk.is_none() {
956 self.tree.append_text(begin_text, final_ix);
958 }
959 (final_ix, brk)
960 }
961
962 fn interrupt_paragraph_by_list(&self, suffix: &[u8]) -> bool {
965 scan_listitem(suffix).map_or(false, |(ix, delim, index, _)| {
966 self.list_nesting > 0 ||
967 !scan_empty_list(&suffix[ix..]) && (delim == b'*' || delim == b'-' || index == 1)
970 })
971 }
972
973 fn parse_html_block_type_1_to_5(
979 &mut self,
980 start_ix: usize,
981 html_end_tag: &str,
982 mut remaining_space: usize,
983 ) -> usize {
984 let bytes = self.text.as_bytes();
985 let mut ix = start_ix;
986 loop {
987 let line_start_ix = ix;
988 ix += scan_nextline(&bytes[ix..]);
989 self.append_html_line(remaining_space, line_start_ix, ix);
990
991 let mut line_start = LineStart::new(&bytes[ix..]);
992 let n_containers = scan_containers(&self.tree, &mut line_start);
993 if n_containers < self.tree.spine_len() {
994 break;
995 }
996
997 if (&self.text[line_start_ix..ix]).contains(html_end_tag) {
998 break;
999 }
1000
1001 let next_line_ix = ix + line_start.bytes_scanned();
1002 if next_line_ix == self.text.len() {
1003 break;
1004 }
1005 ix = next_line_ix;
1006 remaining_space = line_start.remaining_space();
1007 }
1008 ix
1009 }
1010
1011 fn parse_html_block_type_6_or_7(
1015 &mut self,
1016 start_ix: usize,
1017 mut remaining_space: usize,
1018 ) -> usize {
1019 let bytes = self.text.as_bytes();
1020 let mut ix = start_ix;
1021 loop {
1022 let line_start_ix = ix;
1023 ix += scan_nextline(&bytes[ix..]);
1024 self.append_html_line(remaining_space, line_start_ix, ix);
1025
1026 let mut line_start = LineStart::new(&bytes[ix..]);
1027 let n_containers = scan_containers(&self.tree, &mut line_start);
1028 if n_containers < self.tree.spine_len() || line_start.is_at_eol() {
1029 break;
1030 }
1031
1032 let next_line_ix = ix + line_start.bytes_scanned();
1033 if next_line_ix == self.text.len() || scan_blank_line(&bytes[next_line_ix..]).is_some()
1034 {
1035 break;
1036 }
1037 ix = next_line_ix;
1038 remaining_space = line_start.remaining_space();
1039 }
1040 ix
1041 }
1042
1043 fn parse_indented_code_block(&mut self, start_ix: usize, mut remaining_space: usize) -> usize {
1044 self.tree.append(Item {
1045 start: start_ix,
1046 end: 0, body: ItemBody::IndentCodeBlock,
1048 });
1049 self.tree.push();
1050 let bytes = self.text.as_bytes();
1051 let mut last_nonblank_child = TreePointer::Nil;
1052 let mut last_nonblank_ix = 0;
1053 let mut end_ix = 0;
1054 let mut last_line_blank = false;
1055
1056 let mut ix = start_ix;
1057 loop {
1058 let line_start_ix = ix;
1059 ix += scan_nextline(&bytes[ix..]);
1060 self.append_code_text(remaining_space, line_start_ix, ix);
1061 if !last_line_blank {
1064 last_nonblank_child = self.tree.cur();
1065 last_nonblank_ix = ix;
1066 end_ix = ix;
1067 }
1068
1069 let mut line_start = LineStart::new(&bytes[ix..]);
1070 let n_containers = scan_containers(&self.tree, &mut line_start);
1071 if n_containers < self.tree.spine_len()
1072 || !(line_start.scan_space(4) || line_start.is_at_eol())
1073 {
1074 break;
1075 }
1076 let next_line_ix = ix + line_start.bytes_scanned();
1077 if next_line_ix == self.text.len() {
1078 break;
1079 }
1080 ix = next_line_ix;
1081 remaining_space = line_start.remaining_space();
1082 last_line_blank = scan_blank_line(&bytes[ix..]).is_some();
1083 }
1084
1085 if let TreePointer::Valid(child) = last_nonblank_child {
1087 self.tree[child].next = TreePointer::Nil;
1088 self.tree[child].item.end = last_nonblank_ix;
1089 }
1090 self.pop(end_ix);
1091 ix
1092 }
1093
1094 fn parse_fenced_code_block(
1095 &mut self,
1096 start_ix: usize,
1097 indent: usize,
1098 fence_ch: u8,
1099 n_fence_char: usize,
1100 ) -> usize {
1101 let bytes = self.text.as_bytes();
1102 let mut info_start = start_ix + n_fence_char;
1103 info_start += scan_whitespace_no_nl(&bytes[info_start..]);
1104 let mut ix = info_start + scan_nextline(&bytes[info_start..]);
1107 let info_end = ix - scan_rev_while(&bytes[info_start..ix], is_ascii_whitespace);
1108 let info_string = unescape(&self.text[info_start..info_end]);
1109 self.tree.append(Item {
1110 start: start_ix,
1111 end: 0, body: ItemBody::FencedCodeBlock(self.allocs.allocate_cow(info_string)),
1113 });
1114 self.tree.push();
1115 loop {
1116 let mut line_start = LineStart::new(&bytes[ix..]);
1117 let n_containers = scan_containers(&self.tree, &mut line_start);
1118 if n_containers < self.tree.spine_len() {
1119 break;
1120 }
1121 line_start.scan_space(indent);
1122 let mut close_line_start = line_start.clone();
1123 if !close_line_start.scan_space(4) {
1124 let close_ix = ix + close_line_start.bytes_scanned();
1125 if let Some(n) = scan_closing_code_fence(&bytes[close_ix..], fence_ch, n_fence_char)
1126 {
1127 ix = close_ix + n;
1128 break;
1129 }
1130 }
1131 let remaining_space = line_start.remaining_space();
1132 ix += line_start.bytes_scanned();
1133 let next_ix = ix + scan_nextline(&bytes[ix..]);
1134 self.append_code_text(remaining_space, ix, next_ix);
1135 ix = next_ix;
1136 }
1137
1138 self.pop(ix);
1139
1140 ix + scan_blank_line(&bytes[ix..]).unwrap_or(0)
1142 }
1143
1144 fn parse_display_math(&mut self, start_ix: usize) -> usize {
1145 let bytes = self.text.as_bytes();
1146 self.tree.append(Item {
1147 start: start_ix,
1148 end: 0, body: ItemBody::DisplayMath,
1150 });
1151 self.tree.push();
1152 let mut ix = start_ix + scan_nextline(&bytes[start_ix..]);
1153 loop {
1154 let mut line_start = LineStart::new(&bytes[ix..]);
1155 let n_containers = scan_containers(&self.tree, &mut line_start);
1156 if n_containers < self.tree.spine_len() {
1157 break;
1158 }
1159 if let Some(n) = scan_closing_display_math(&bytes[ix..]) {
1160 ix += n;
1161 break;
1162 }
1163 let next_ix = ix + scan_nextline(&bytes[ix..]);
1164 self.append_math_text(ix, next_ix);
1165 ix = next_ix;
1166 }
1167 self.pop(ix);
1168 ix + scan_blank_line(&bytes[ix..]).unwrap_or(0)
1169 }
1170
1171 fn append_code_text(&mut self, remaining_space: usize, start: usize, end: usize) {
1172 if remaining_space > 0 {
1173 let cow_ix = self.allocs.allocate_cow(" "[..remaining_space].into());
1174 self.tree.append(Item {
1175 start,
1176 end: start,
1177 body: ItemBody::SynthesizeText(cow_ix),
1178 });
1179 }
1180 if self.text.as_bytes()[end - 2] == b'\r' {
1181 self.tree.append_text(start, end - 2);
1183 self.tree.append_text(end - 1, end);
1184 } else {
1185 self.tree.append_text(start, end);
1186 }
1187 }
1188
1189 fn append_math_text(&mut self, start: usize, end: usize) {
1190 if self.text.as_bytes()[end - 2] == b'\r' {
1191 self.tree.append_text(start, end - 2);
1192 self.tree.append_text(end - 1, end);
1193 } else {
1194 self.tree.append_text(start, end);
1195 }
1196 }
1197
1198 fn append_html_line(&mut self, remaining_space: usize, start: usize, end: usize) {
1200 if remaining_space > 0 {
1201 let cow_ix = self.allocs.allocate_cow(" "[..remaining_space].into());
1202 self.tree.append(Item {
1203 start,
1204 end: start,
1205 body: ItemBody::SynthesizeText(cow_ix),
1207 });
1208 }
1209 if self.text.as_bytes()[end - 2] == b'\r' {
1210 self.tree.append(Item {
1212 start,
1213 end: end - 2,
1214 body: ItemBody::Html,
1215 });
1216 self.tree.append(Item {
1217 start: end - 1,
1218 end,
1219 body: ItemBody::Html,
1220 });
1221 } else {
1222 self.tree.append(Item {
1223 start,
1224 end,
1225 body: ItemBody::Html,
1226 });
1227 }
1228 }
1229
1230 fn pop(&mut self, ix: usize) {
1232 let cur_ix = self.tree.pop().unwrap();
1233 self.tree[cur_ix].item.end = ix;
1234 if let ItemBody::List(true, _, _) = self.tree[cur_ix].item.body {
1235 surgerize_tight_list(&mut self.tree, cur_ix);
1236 }
1237 }
1238
1239 fn finish_list(&mut self, ix: usize) {
1241 if let Some(node_ix) = self.tree.peek_up() {
1242 if let ItemBody::List(_, _, _) = self.tree[node_ix].item.body {
1243 self.pop(ix);
1244 self.list_nesting -= 1;
1245 }
1246 }
1247 if self.last_line_blank {
1248 if let Some(node_ix) = self.tree.peek_grandparent() {
1249 if let ItemBody::List(ref mut is_tight, _, _) = self.tree[node_ix].item.body {
1250 *is_tight = false;
1251 }
1252 }
1253 self.last_line_blank = false;
1254 }
1255 }
1256
1257 fn continue_list(&mut self, start: usize, ch: u8, index: u64) {
1260 if let Some(node_ix) = self.tree.peek_up() {
1261 if let ItemBody::List(ref mut is_tight, existing_ch, _) = self.tree[node_ix].item.body {
1262 if existing_ch == ch {
1263 if self.last_line_blank {
1264 *is_tight = false;
1265 self.last_line_blank = false;
1266 }
1267 return;
1268 }
1269 }
1270 self.finish_list(start);
1272 }
1273 self.tree.append(Item {
1274 start,
1275 end: 0, body: ItemBody::List(true, ch, index),
1277 });
1278 self.list_nesting += 1;
1279 self.tree.push();
1280 self.last_line_blank = false;
1281 }
1282
1283 fn parse_hrule(&mut self, hrule_size: usize, ix: usize) -> usize {
1287 self.tree.append(Item {
1288 start: ix,
1289 end: ix + hrule_size,
1290 body: ItemBody::Rule,
1291 });
1292 ix + hrule_size
1293 }
1294
1295 fn parse_atx_heading(&mut self, mut ix: usize, atx_size: usize) -> usize {
1299 let heading_ix = self.tree.append(Item {
1300 start: ix,
1301 end: 0, body: ItemBody::Heading(atx_size as u32),
1303 });
1304 ix += atx_size;
1305 let bytes = self.text.as_bytes();
1307 if let Some(eol_bytes) = scan_eol(&bytes[ix..]) {
1308 self.tree[heading_ix].item.end = ix + eol_bytes;
1309 return ix + eol_bytes;
1310 }
1311 let skip_spaces = scan_whitespace_no_nl(&bytes[ix..]);
1313 ix += skip_spaces;
1314
1315 let header_start = ix;
1317 let header_node_idx = self.tree.push(); ix = self.parse_line(ix, TableParseMode::Disabled).0;
1319 self.tree[header_node_idx].item.end = ix;
1320
1321 if let TreePointer::Valid(cur_ix) = self.tree.cur() {
1323 let header_text = &bytes[header_start..ix];
1324 let mut limit = header_text
1325 .iter()
1326 .rposition(|&b| !(b == b'\n' || b == b'\r' || b == b' '))
1327 .map_or(0, |i| i + 1);
1328 let closer = header_text[..limit]
1329 .iter()
1330 .rposition(|&b| b != b'#')
1331 .map_or(0, |i| i + 1);
1332 if closer == 0 {
1333 limit = closer;
1334 } else {
1335 let spaces = scan_rev_while(&header_text[..closer], |b| b == b' ');
1336 if spaces > 0 {
1337 limit = closer - spaces;
1338 }
1339 }
1340 self.tree[cur_ix].item.end = limit + header_start;
1341 }
1342
1343 self.tree.pop();
1344 ix
1345 }
1346
1347 fn parse_footnote(&mut self, start: usize) -> Option<usize> {
1349 let bytes = &self.text.as_bytes()[start..];
1350 if !bytes.starts_with(b"[^") {
1351 return None;
1352 }
1353 let (mut i, label) = self.parse_refdef_label(start + 2)?;
1354 i += 2;
1355 if scan_ch(&bytes[i..], b':') == 0 {
1356 return None;
1357 }
1358 i += 1;
1359 self.finish_list(start);
1360 self.tree.append(Item {
1361 start,
1362 end: 0, body: ItemBody::FootnoteDefinition(self.allocs.allocate_cow(label)),
1365 });
1366 self.tree.push();
1367 Some(i)
1368 }
1369
1370 fn parse_refdef_label(&self, start: usize) -> Option<(usize, CowStr<'a>)> {
1373 scan_link_label_rest(&self.text[start..], &|bytes| {
1374 let mut line_start = LineStart::new(bytes);
1375 let _ = scan_containers(&self.tree, &mut line_start);
1376 let bytes_scanned = line_start.bytes_scanned();
1377
1378 let suffix = &bytes[bytes_scanned..];
1379 if self.interrupt_paragraph_by_list(suffix) || scan_paragraph_interrupt(suffix) {
1380 None
1381 } else {
1382 Some(bytes_scanned)
1383 }
1384 })
1385 }
1386
1387 fn parse_refdef_total(&mut self, start: usize) -> Option<(usize, LinkLabel<'a>, LinkDef<'a>)> {
1389 let bytes = &self.text.as_bytes()[start..];
1390 if scan_ch(bytes, b'[') == 0 {
1391 return None;
1392 }
1393 let (mut i, label) = self.parse_refdef_label(start + 1)?;
1394 i += 1;
1395 if scan_ch(&bytes[i..], b':') == 0 {
1396 return None;
1397 }
1398 i += 1;
1399 let (bytecount, link_def) = self.scan_refdef(start + i)?;
1400 Some((bytecount + i, UniCase::new(label), link_def))
1401 }
1402
1403 fn scan_refdef_space(&self, bytes: &[u8], mut i: usize) -> Option<(usize, usize)> {
1405 let mut newlines = 0;
1406 loop {
1407 let whitespaces = scan_whitespace_no_nl(&bytes[i..]);
1408 i += whitespaces;
1409 if let Some(eol_bytes) = scan_eol(&bytes[i..]) {
1410 i += eol_bytes;
1411 newlines += 1;
1412 if newlines > 1 {
1413 return None;
1414 }
1415 } else {
1416 break;
1417 }
1418 let mut line_start = LineStart::new(&bytes[i..]);
1419 if self.tree.spine_len() != scan_containers(&self.tree, &mut line_start) {
1420 return None;
1421 }
1422 i += line_start.bytes_scanned();
1423 }
1424 Some((i, newlines))
1425 }
1426
1427 fn scan_refdef(&self, start: usize) -> Option<(usize, LinkDef<'a>)> {
1430 let bytes = self.text.as_bytes();
1431
1432 let (mut i, _newlines) = self.scan_refdef_space(bytes, start)?;
1434
1435 let (dest_length, dest) = scan_link_dest(self.text, i, 1)?;
1437 if dest_length == 0 {
1438 return None;
1439 }
1440 let dest = unescape(dest);
1441 i += dest_length;
1442
1443 let mut backup = (i - start, LinkDef { dest, title: None });
1445
1446 let (mut i, newlines) =
1448 if let Some((new_i, mut newlines)) = self.scan_refdef_space(bytes, i) {
1449 if i == self.text.len() {
1450 newlines += 1;
1451 }
1452 if new_i == i && newlines == 0 {
1453 return None;
1454 }
1455 if newlines > 1 {
1456 return Some(backup);
1457 };
1458 (new_i, newlines)
1459 } else {
1460 return Some(backup);
1461 };
1462
1463 if let Some((title_length, title)) = scan_refdef_title(&self.text[i..]) {
1466 i += title_length;
1467 backup.1.title = Some(unescape(title));
1468 } else if newlines > 0 {
1469 return Some(backup);
1470 } else {
1471 return None;
1472 };
1473
1474 if let Some(bytes) = scan_blank_line(&bytes[i..]) {
1476 backup.0 = i + bytes - start;
1477 Some(backup)
1478 } else if newlines > 0 {
1479 Some(backup)
1480 } else {
1481 None
1482 }
1483 }
1484}
1485
1486fn scan_containers(tree: &Tree<Item>, line_start: &mut LineStart) -> usize {
1488 let mut i = 0;
1489 for &node_ix in tree.walk_spine() {
1490 match tree[node_ix].item.body {
1491 ItemBody::BlockQuote => {
1492 let save = line_start.clone();
1493 if !line_start.scan_blockquote_marker() {
1494 *line_start = save;
1495 break;
1496 }
1497 }
1498 ItemBody::ListItem(indent) => {
1499 if !line_start.is_at_eol() {
1500 let save = line_start.clone();
1501 if !line_start.scan_space(indent) {
1502 *line_start = save;
1503 break;
1504 }
1505 }
1506 }
1507 _ => (),
1508 }
1509 i += 1;
1510 }
1511 i
1512}
1513
1514fn count_header_cols(
1517 bytes: &[u8],
1518 mut pipes: usize,
1519 mut start: usize,
1520 last_pipe_ix: usize,
1521) -> usize {
1522 start += scan_whitespace_no_nl(&bytes[start..]);
1524 if bytes[start] == b'|' {
1525 pipes -= 1;
1526 }
1527
1528 if scan_blank_line(&bytes[(last_pipe_ix + 1)..]).is_some() {
1530 pipes
1531 } else {
1532 pipes + 1
1533 }
1534}
1535
1536impl<'a> Tree<Item> {
1537 fn append_text(&mut self, start: usize, end: usize) {
1538 if end > start {
1539 if let TreePointer::Valid(ix) = self.cur() {
1540 if ItemBody::Text == self[ix].item.body && self[ix].item.end == start {
1541 self[ix].item.end = end;
1542 return;
1543 }
1544 }
1545 self.append(Item {
1546 start,
1547 end,
1548 body: ItemBody::Text,
1549 });
1550 }
1551 }
1552}
1553
1554fn delim_run_can_open(s: &str, suffix: &str, run_len: usize, ix: usize) -> bool {
1560 let next_char = if let Some(c) = suffix.chars().nth(run_len) {
1561 c
1562 } else {
1563 return false;
1564 };
1565 if next_char.is_whitespace() {
1566 return false;
1567 }
1568 if ix == 0 {
1569 return true;
1570 }
1571 let delim = suffix.chars().next().unwrap();
1572 if delim == '*' && !is_punctuation(next_char) {
1573 return true;
1574 }
1575
1576 let prev_char = s[..ix].chars().last().unwrap();
1577
1578 prev_char.is_whitespace() || is_punctuation(prev_char)
1579}
1580
1581fn delim_run_can_close(s: &str, suffix: &str, run_len: usize, ix: usize) -> bool {
1585 if ix == 0 {
1586 return false;
1587 }
1588 let prev_char = s[..ix].chars().last().unwrap();
1589 if prev_char.is_whitespace() {
1590 return false;
1591 }
1592 let next_char = if let Some(c) = suffix.chars().nth(run_len) {
1593 c
1594 } else {
1595 return true;
1596 };
1597 let delim = suffix.chars().next().unwrap();
1598 if delim == '*' && !is_punctuation(prev_char) {
1599 return true;
1600 }
1601
1602 next_char.is_whitespace() || is_punctuation(next_char)
1603}
1604
1605fn scan_paragraph_interrupt(bytes: &[u8]) -> bool {
1609 if scan_eol(bytes).is_some()
1610 || scan_hrule(bytes).is_ok()
1611 || scan_atx_heading(bytes).is_some()
1612 || scan_code_fence(bytes).is_some()
1613 || scan_blockquote_start(bytes).is_some()
1614 {
1615 return true;
1616 }
1617 bytes.starts_with(b"<")
1618 && (get_html_end_tag(&bytes[1..]).is_some()
1619 || is_html_tag(scan_html_block_tag(&bytes[1..]).1))
1620}
1621
1622fn get_html_end_tag(text_bytes: &[u8]) -> Option<&'static str> {
1624 static BEGIN_TAGS: &[&[u8]; 3] = &[b"pre", b"style", b"script"];
1625 static ST_BEGIN_TAGS: &[&[u8]; 3] = &[b"!--", b"?", b"![CDATA["];
1626
1627 for (beg_tag, end_tag) in BEGIN_TAGS
1628 .iter()
1629 .zip(["</pre>", "</style>", "</script>"].iter())
1630 {
1631 let tag_len = beg_tag.len();
1632
1633 if text_bytes.len() < tag_len {
1634 break;
1636 }
1637
1638 if !text_bytes[..tag_len].eq_ignore_ascii_case(beg_tag) {
1639 continue;
1640 }
1641
1642 if text_bytes.len() == tag_len {
1644 return Some(end_tag);
1645 }
1646
1647 let s = text_bytes[tag_len];
1649 if is_ascii_whitespace(s) || s == b'>' {
1650 return Some(end_tag);
1651 }
1652 }
1653
1654 for (beg_tag, end_tag) in ST_BEGIN_TAGS.iter().zip(["-->", "?>", "]]>"].iter()) {
1655 if text_bytes.starts_with(beg_tag) {
1656 return Some(end_tag);
1657 }
1658 }
1659
1660 if text_bytes.len() > 1
1661 && text_bytes[0] == b'!'
1662 && text_bytes[1] >= b'A'
1663 && text_bytes[1] <= b'Z'
1664 {
1665 Some(">")
1666 } else {
1667 None
1668 }
1669}
1670
1671#[derive(Copy, Clone, Debug)]
1672struct InlineEl {
1673 start: TreeIndex, count: usize,
1675 c: u8, both: bool, }
1678
1679#[derive(Debug, Clone, Default)]
1680struct InlineStack {
1681 stack: Vec<InlineEl>,
1682 lower_bounds: [usize; 7],
1687}
1688
1689impl InlineStack {
1690 const UNDERSCORE_NOT_BOTH: usize = 0;
1694 const ASTERISK_NOT_BOTH: usize = 1;
1695 const ASTERISK_BASE: usize = 2;
1696 const TILDES: usize = 5;
1697 const UNDERSCORE_BOTH: usize = 6;
1698
1699 fn pop_all(&mut self, tree: &mut Tree<Item>) {
1700 for el in self.stack.drain(..) {
1701 for i in 0..el.count {
1702 tree[el.start + i].item.body = ItemBody::Text;
1703 }
1704 }
1705 self.lower_bounds = [0; 7];
1706 }
1707
1708 fn get_lowerbound(&self, c: u8, count: usize, both: bool) -> usize {
1709 if c == b'_' {
1710 if both {
1711 self.lower_bounds[InlineStack::UNDERSCORE_BOTH]
1712 } else {
1713 self.lower_bounds[InlineStack::UNDERSCORE_NOT_BOTH]
1714 }
1715 } else if c == b'*' {
1716 let mod3_lower = self.lower_bounds[InlineStack::ASTERISK_BASE + count % 3];
1717 if both {
1718 mod3_lower
1719 } else {
1720 min(
1721 mod3_lower,
1722 self.lower_bounds[InlineStack::ASTERISK_NOT_BOTH],
1723 )
1724 }
1725 } else {
1726 self.lower_bounds[InlineStack::TILDES]
1727 }
1728 }
1729
1730 fn set_lowerbound(&mut self, c: u8, count: usize, both: bool, new_bound: usize) {
1731 if c == b'_' {
1732 if both {
1733 self.lower_bounds[InlineStack::UNDERSCORE_BOTH] = new_bound;
1734 } else {
1735 self.lower_bounds[InlineStack::UNDERSCORE_NOT_BOTH] = new_bound;
1736 }
1737 } else if c == b'*' {
1738 self.lower_bounds[InlineStack::ASTERISK_BASE + count % 3] = new_bound;
1739 if !both {
1740 self.lower_bounds[InlineStack::ASTERISK_NOT_BOTH] = new_bound;
1741 }
1742 } else {
1743 self.lower_bounds[InlineStack::TILDES] = new_bound;
1744 }
1745 }
1746
1747 fn find_match(
1748 &mut self,
1749 tree: &mut Tree<Item>,
1750 c: u8,
1751 count: usize,
1752 both: bool,
1753 ) -> Option<InlineEl> {
1754 let lowerbound = min(self.stack.len(), self.get_lowerbound(c, count, both));
1755 let res = self.stack[lowerbound..]
1756 .iter()
1757 .cloned()
1758 .enumerate()
1759 .rfind(|(_, el)| {
1760 el.c == c && (!both && !el.both || (count + el.count) % 3 != 0 || count % 3 == 0)
1761 });
1762
1763 if let Some((matching_ix, matching_el)) = res {
1764 let matching_ix = matching_ix + lowerbound;
1765 for el in &self.stack[(matching_ix + 1)..] {
1766 for i in 0..el.count {
1767 tree[el.start + i].item.body = ItemBody::Text;
1768 }
1769 }
1770 self.stack.truncate(matching_ix);
1771 Some(matching_el)
1772 } else {
1773 self.set_lowerbound(c, count, both, self.stack.len());
1774 None
1775 }
1776 }
1777
1778 fn push(&mut self, el: InlineEl) {
1779 self.stack.push(el)
1780 }
1781}
1782
1783#[derive(Debug, Clone)]
1784enum RefScan<'a> {
1785 LinkLabel(CowStr<'a>, TreePointer),
1787 Collapsed(TreePointer),
1789 Failed,
1790}
1791
1792fn scan_nodes_to_ix(tree: &Tree<Item>, mut node: TreePointer, ix: usize) -> TreePointer {
1795 while let TreePointer::Valid(node_ix) = node {
1796 if tree[node_ix].item.end <= ix {
1797 node = tree[node_ix].next;
1798 } else {
1799 break;
1800 }
1801 }
1802 node
1803}
1804
1805fn scan_link_label<'text, 'tree>(
1808 tree: &'tree Tree<Item>,
1809 text: &'text str,
1810) -> Option<(usize, ReferenceLabel<'text>)> {
1811 let bytes = &text.as_bytes();
1812 if bytes.len() < 2 || bytes[0] != b'[' {
1813 return None;
1814 }
1815 let linebreak_handler = |bytes: &[u8]| {
1816 let mut line_start = LineStart::new(bytes);
1817 let _ = scan_containers(tree, &mut line_start);
1818 Some(line_start.bytes_scanned())
1819 };
1820 let pair = if b'^' == bytes[1] {
1821 let (byte_index, cow) = scan_link_label_rest(&text[2..], &linebreak_handler)?;
1822 (byte_index + 2, ReferenceLabel::Footnote(cow))
1823 } else {
1824 let (byte_index, cow) = scan_link_label_rest(&text[1..], &linebreak_handler)?;
1825 (byte_index + 1, ReferenceLabel::Link(cow))
1826 };
1827 Some(pair)
1828}
1829
1830fn scan_reference<'a, 'b>(tree: &'a Tree<Item>, text: &'b str, cur: TreePointer) -> RefScan<'b> {
1831 let cur_ix = match cur {
1832 TreePointer::Nil => return RefScan::Failed,
1833 TreePointer::Valid(cur_ix) => cur_ix,
1834 };
1835 let start = tree[cur_ix].item.start;
1836 let tail = &text.as_bytes()[start..];
1837
1838 if tail.starts_with(b"[]") {
1839 let closing_node = tree[cur_ix].next.unwrap();
1840 RefScan::Collapsed(tree[closing_node].next)
1841 } else if let Some((ix, ReferenceLabel::Link(label))) = scan_link_label(tree, &text[start..]) {
1842 let next_node = scan_nodes_to_ix(tree, cur, start + ix);
1843 RefScan::LinkLabel(label, next_node)
1844 } else {
1845 RefScan::Failed
1846 }
1847}
1848
1849#[derive(Clone, Default)]
1850struct LinkStack {
1851 inner: Vec<LinkStackEl>,
1852 disabled_ix: usize,
1853}
1854
1855impl LinkStack {
1856 fn push(&mut self, el: LinkStackEl) {
1857 self.inner.push(el);
1858 }
1859
1860 fn pop(&mut self) -> Option<LinkStackEl> {
1861 let el = self.inner.pop();
1862 self.disabled_ix = std::cmp::min(self.disabled_ix, self.inner.len());
1863 el
1864 }
1865
1866 fn clear(&mut self) {
1867 self.inner.clear();
1868 self.disabled_ix = 0;
1869 }
1870
1871 fn disable_all_links(&mut self) {
1872 for el in &mut self.inner[self.disabled_ix..] {
1873 if el.ty == LinkStackTy::Link {
1874 el.ty = LinkStackTy::Disabled;
1875 }
1876 }
1877 self.disabled_ix = self.inner.len();
1878 }
1879}
1880
1881#[derive(Clone, Debug)]
1882struct LinkStackEl {
1883 node: TreeIndex,
1884 ty: LinkStackTy,
1885}
1886
1887#[derive(PartialEq, Clone, Debug)]
1888enum LinkStackTy {
1889 Link,
1890 Image,
1891 Disabled,
1892}
1893
1894#[derive(Clone)]
1895struct LinkDef<'a> {
1896 dest: CowStr<'a>,
1897 title: Option<CowStr<'a>>,
1898}
1899
1900struct CodeDelims {
1903 inner: HashMap<usize, VecDeque<TreeIndex>>,
1904 seen_first: bool,
1905}
1906
1907impl CodeDelims {
1908 fn new() -> Self {
1909 Self {
1910 inner: Default::default(),
1911 seen_first: false,
1912 }
1913 }
1914
1915 fn insert(&mut self, count: usize, ix: TreeIndex) {
1916 if self.seen_first {
1917 self.inner
1918 .entry(count)
1919 .or_insert_with(Default::default)
1920 .push_back(ix);
1921 } else {
1922 self.seen_first = true;
1925 }
1926 }
1927
1928 fn is_populated(&self) -> bool {
1929 !self.inner.is_empty()
1930 }
1931
1932 fn find(&mut self, open_ix: TreeIndex, count: usize) -> Option<TreeIndex> {
1933 while let Some(ix) = self.inner.get_mut(&count)?.pop_front() {
1934 if ix > open_ix {
1935 return Some(ix);
1936 }
1937 }
1938 None
1939 }
1940
1941 fn clear(&mut self) {
1942 self.inner.clear();
1943 self.seen_first = false;
1944 }
1945}
1946
1947#[derive(Copy, Clone, PartialEq, Eq, Debug)]
1948struct LinkIndex(usize);
1949
1950#[derive(Copy, Clone, PartialEq, Eq, Debug)]
1951struct CowIndex(usize);
1952
1953#[derive(Copy, Clone, PartialEq, Eq, Debug)]
1954struct AlignmentIndex(usize);
1955
1956#[derive(Clone)]
1957struct Allocations<'a> {
1958 refdefs: HashMap<LinkLabel<'a>, LinkDef<'a>>,
1959 links: Vec<(LinkType, CowStr<'a>, CowStr<'a>)>,
1960 cows: Vec<CowStr<'a>>,
1961 alignments: Vec<Vec<Alignment>>,
1962}
1963
1964impl<'a> Allocations<'a> {
1965 fn new() -> Self {
1966 Self {
1967 refdefs: HashMap::new(),
1968 links: Vec::with_capacity(128),
1969 cows: Vec::new(),
1970 alignments: Vec::new(),
1971 }
1972 }
1973
1974 fn allocate_cow(&mut self, cow: CowStr<'a>) -> CowIndex {
1975 let ix = self.cows.len();
1976 self.cows.push(cow);
1977 CowIndex(ix)
1978 }
1979
1980 fn allocate_link(&mut self, ty: LinkType, url: CowStr<'a>, title: CowStr<'a>) -> LinkIndex {
1981 let ix = self.links.len();
1982 self.links.push((ty, url, title));
1983 LinkIndex(ix)
1984 }
1985
1986 fn allocate_alignment(&mut self, alignment: Vec<Alignment>) -> AlignmentIndex {
1987 let ix = self.alignments.len();
1988 self.alignments.push(alignment);
1989 AlignmentIndex(ix)
1990 }
1991}
1992
1993impl<'a> Index<CowIndex> for Allocations<'a> {
1994 type Output = CowStr<'a>;
1995
1996 fn index(&self, ix: CowIndex) -> &Self::Output {
1997 self.cows.index(ix.0)
1998 }
1999}
2000
2001impl<'a> Index<LinkIndex> for Allocations<'a> {
2002 type Output = (LinkType, CowStr<'a>, CowStr<'a>);
2003
2004 fn index(&self, ix: LinkIndex) -> &Self::Output {
2005 self.links.index(ix.0)
2006 }
2007}
2008
2009impl<'a> Index<AlignmentIndex> for Allocations<'a> {
2010 type Output = Vec<Alignment>;
2011
2012 fn index(&self, ix: AlignmentIndex) -> &Self::Output {
2013 self.alignments.index(ix.0)
2014 }
2015}
2016
2017#[derive(Clone, Default)]
2023pub(crate) struct HtmlScanGuard {
2024 pub cdata: usize,
2025 pub processing: usize,
2026 pub declaration: usize,
2027}
2028
2029#[derive(Clone)]
2031pub struct Parser<'a> {
2032 text: &'a str,
2033 tree: Tree<Item>,
2034 allocs: Allocations<'a>,
2035 broken_link_callback: Option<&'a dyn Fn(&str, &str) -> Option<(String, String)>>,
2036 html_scan_guard: HtmlScanGuard,
2037
2038 inline_stack: InlineStack,
2040 link_stack: LinkStack,
2041}
2042
2043impl<'a> Parser<'a> {
2044 pub fn new(text: &'a str) -> Parser<'a> {
2046 Parser::new_ext(text, Options::empty())
2047 }
2048
2049 pub fn new_ext(text: &'a str, options: Options) -> Parser<'a> {
2051 Parser::new_with_broken_link_callback(text, options, None)
2052 }
2053
2054 pub fn new_with_broken_link_callback(
2060 text: &'a str,
2061 options: Options,
2062 broken_link_callback: Option<&'a dyn Fn(&str, &str) -> Option<(String, String)>>,
2063 ) -> Parser<'a> {
2064 let first_pass = FirstPass::new(text, options);
2065 let (mut tree, allocs) = first_pass.run();
2066 tree.reset();
2067 let inline_stack = Default::default();
2068 let link_stack = Default::default();
2069 let html_scan_guard = Default::default();
2070 Parser {
2071 text,
2072 tree,
2073 allocs,
2074 broken_link_callback,
2075 inline_stack,
2076 link_stack,
2077 html_scan_guard,
2078 }
2079 }
2080
2081 fn handle_inline(&mut self) {
2088 self.handle_inline_pass1();
2089 self.handle_emphasis();
2090 }
2091
2092 fn handle_inline_pass1(&mut self) {
2098 let mut code_delims = CodeDelims::new();
2099 let mut cur = self.tree.cur();
2100 let mut prev = TreePointer::Nil;
2101
2102 let block_end = self.tree[self.tree.peek_up().unwrap()].item.end;
2103 let block_text = &self.text[..block_end];
2104
2105 while let TreePointer::Valid(mut cur_ix) = cur {
2106 match self.tree[cur_ix].item.body {
2107 ItemBody::MaybeHtml => {
2108 let next = self.tree[cur_ix].next;
2109 let autolink = if let TreePointer::Valid(next_ix) = next {
2110 scan_autolink(block_text, self.tree[next_ix].item.start)
2111 } else {
2112 None
2113 };
2114
2115 if let Some((ix, uri, link_type)) = autolink {
2116 let node = scan_nodes_to_ix(&self.tree, next, ix);
2117 let text_node = self.tree.create_node(Item {
2118 start: self.tree[cur_ix].item.start + 1,
2119 end: ix - 1,
2120 body: ItemBody::Text,
2121 });
2122 let link_ix = self.allocs.allocate_link(link_type, uri, "".into());
2123 self.tree[cur_ix].item.body = ItemBody::Link(link_ix);
2124 self.tree[cur_ix].item.end = ix;
2125 self.tree[cur_ix].next = node;
2126 self.tree[cur_ix].child = TreePointer::Valid(text_node);
2127 prev = cur;
2128 cur = node;
2129 if let TreePointer::Valid(node_ix) = cur {
2130 self.tree[node_ix].item.start = max(self.tree[node_ix].item.start, ix);
2131 }
2132 continue;
2133 } else {
2134 let inline_html = if let TreePointer::Valid(next_ix) = next {
2135 self.scan_inline_html(
2136 block_text.as_bytes(),
2137 self.tree[next_ix].item.start,
2138 )
2139 } else {
2140 None
2141 };
2142 if let Some(ix) = inline_html {
2143 let node = scan_nodes_to_ix(&self.tree, next, ix);
2144 self.tree[cur_ix].item.body = ItemBody::Html;
2145 self.tree[cur_ix].item.end = ix;
2146 self.tree[cur_ix].next = node;
2147 prev = cur;
2148 cur = node;
2149 if let TreePointer::Valid(node_ix) = cur {
2150 self.tree[node_ix].item.start =
2151 max(self.tree[node_ix].item.start, ix);
2152 }
2153 continue;
2154 }
2155 }
2156 self.tree[cur_ix].item.body = ItemBody::Text;
2157 }
2158 ItemBody::MaybeCode(mut search_count, preceded_by_backslash) => {
2159 if preceded_by_backslash {
2160 search_count -= 1;
2161 if search_count == 0 {
2162 self.tree[cur_ix].item.body = ItemBody::Text;
2163 prev = cur;
2164 cur = self.tree[cur_ix].next;
2165 continue;
2166 }
2167 }
2168
2169 if code_delims.is_populated() {
2170 if let Some(scan_ix) = code_delims.find(cur_ix, search_count) {
2173 self.make_code_span(cur_ix, scan_ix, preceded_by_backslash);
2174 } else {
2175 self.tree[cur_ix].item.body = ItemBody::Text;
2176 }
2177 } else {
2178 let mut scan = if search_count > 0 {
2181 self.tree[cur_ix].next
2182 } else {
2183 TreePointer::Nil
2184 };
2185 while let TreePointer::Valid(scan_ix) = scan {
2186 if let ItemBody::MaybeCode(delim_count, _) =
2187 self.tree[scan_ix].item.body
2188 {
2189 if search_count == delim_count {
2190 self.make_code_span(cur_ix, scan_ix, preceded_by_backslash);
2191 code_delims.clear();
2192 break;
2193 } else {
2194 code_delims.insert(delim_count, scan_ix);
2195 }
2196 }
2197 scan = self.tree[scan_ix].next;
2198 }
2199 if scan == TreePointer::Nil {
2200 self.tree[cur_ix].item.body = ItemBody::Text;
2201 }
2202 }
2203 }
2204 ItemBody::MaybeMath(mut search_count, preceded_by_backslash) => {
2205 if preceded_by_backslash {
2206 search_count -= 1;
2207 if search_count == 0 {
2208 self.tree[cur_ix].item.body = ItemBody::Text;
2209 prev = cur;
2210 cur = self.tree[cur_ix].next;
2211 continue;
2212 }
2213 }
2214
2215 if code_delims.is_populated() {
2216 if let Some(scan_ix) = code_delims.find(cur_ix, search_count) {
2217 self.make_math_span(cur_ix, scan_ix, preceded_by_backslash);
2218 } else {
2219 self.tree[cur_ix].item.body = ItemBody::Text;
2220 }
2221 } else {
2222 let mut scan = if search_count > 0 {
2223 self.tree[cur_ix].next
2224 } else {
2225 TreePointer::Nil
2226 };
2227 while let TreePointer::Valid(scan_ix) = scan {
2228 if let ItemBody::MaybeMath(delim_count, _) =
2229 self.tree[scan_ix].item.body
2230 {
2231 if search_count == delim_count {
2232 self.make_math_span(cur_ix, scan_ix, preceded_by_backslash);
2233 code_delims.clear();
2234 break;
2235 } else {
2236 code_delims.insert(delim_count, scan_ix);
2237 }
2238 }
2239 scan = self.tree[scan_ix].next;
2240 }
2241 if scan == TreePointer::Nil {
2242 self.tree[cur_ix].item.body = ItemBody::Text;
2243 }
2244 }
2245 }
2246 ItemBody::MaybeLinkOpen => {
2247 self.tree[cur_ix].item.body = ItemBody::Text;
2248 self.link_stack.push(LinkStackEl {
2249 node: cur_ix,
2250 ty: LinkStackTy::Link,
2251 });
2252 }
2253 ItemBody::MaybeImage => {
2254 self.tree[cur_ix].item.body = ItemBody::Text;
2255 self.link_stack.push(LinkStackEl {
2256 node: cur_ix,
2257 ty: LinkStackTy::Image,
2258 });
2259 }
2260 ItemBody::MaybeLinkClose => {
2261 if let Some(tos) = self.link_stack.pop() {
2262 if tos.ty == LinkStackTy::Disabled {
2263 self.tree[cur_ix].item.body = ItemBody::Text;
2264 continue;
2265 }
2266 let next = self.tree[cur_ix].next;
2267 if let Some((next_ix, url, title)) =
2268 self.scan_inline_link(block_text, self.tree[cur_ix].item.end, next)
2269 {
2270 let next_node = scan_nodes_to_ix(&self.tree, next, next_ix);
2271 if let TreePointer::Valid(prev_ix) = prev {
2272 self.tree[prev_ix].next = TreePointer::Nil;
2273 }
2274 cur = TreePointer::Valid(tos.node);
2275 cur_ix = tos.node;
2276 let link_ix = self.allocs.allocate_link(LinkType::Inline, url, title);
2277 self.tree[cur_ix].item.body = if tos.ty == LinkStackTy::Image {
2278 ItemBody::Image(link_ix)
2279 } else {
2280 ItemBody::Link(link_ix)
2281 };
2282 self.tree[cur_ix].child = self.tree[cur_ix].next;
2283 self.tree[cur_ix].next = next_node;
2284 self.tree[cur_ix].item.end = next_ix;
2285 if let TreePointer::Valid(next_node_ix) = next_node {
2286 self.tree[next_node_ix].item.start =
2287 max(self.tree[next_node_ix].item.start, next_ix);
2288 }
2289
2290 if tos.ty == LinkStackTy::Link {
2291 self.link_stack.disable_all_links();
2292 }
2293 } else {
2294 let scan_result = scan_reference(&self.tree, block_text, next);
2297 let node_after_link = match scan_result {
2298 RefScan::LinkLabel(_, next_node) => next_node,
2299 RefScan::Collapsed(next_node) => next_node,
2300 RefScan::Failed => next,
2301 };
2302 let link_type = match &scan_result {
2303 RefScan::LinkLabel(..) => LinkType::Reference,
2304 RefScan::Collapsed(..) => LinkType::Collapsed,
2305 RefScan::Failed => LinkType::Shortcut,
2306 };
2307
2308 let label: Option<ReferenceLabel<'a>> = match scan_result {
2309 RefScan::LinkLabel(l, ..) => Some(ReferenceLabel::Link(l)),
2310 RefScan::Collapsed(..) | RefScan::Failed => {
2311 scan_link_label(
2313 &self.tree,
2314 &self.text[(self.tree[tos.node].item.end - 1)
2315 ..self.tree[cur_ix].item.end],
2316 )
2317 .map(|(_ix, label)| label)
2318 }
2319 };
2320
2321 if let Some(ReferenceLabel::Footnote(l)) = label {
2323 self.tree[tos.node].next = node_after_link;
2324 self.tree[tos.node].child = TreePointer::Nil;
2325 self.tree[tos.node].item.body =
2326 ItemBody::FootnoteReference(self.allocs.allocate_cow(l));
2327 prev = TreePointer::Valid(tos.node);
2328 cur = node_after_link;
2329 self.link_stack.clear();
2330 continue;
2331 } else if let Some(ReferenceLabel::Link(link_label)) = label {
2332 let type_url_title = self
2333 .allocs
2334 .refdefs
2335 .get(&UniCase::new(link_label.as_ref().into()))
2336 .map(|matching_def| {
2337 let title = matching_def
2339 .title
2340 .as_ref()
2341 .cloned()
2342 .unwrap_or_else(|| "".into());
2343 let url = matching_def.dest.clone();
2344 (link_type, url, title)
2345 })
2346 .or_else(|| {
2347 self.broken_link_callback
2348 .and_then(|callback| {
2349 callback(link_label.as_ref(), link_label.as_ref())
2352 })
2353 .map(|(url, title)| {
2354 (link_type.to_unknown(), url.into(), title.into())
2355 })
2356 });
2357
2358 if let Some((def_link_type, url, title)) = type_url_title {
2359 let link_ix =
2360 self.allocs.allocate_link(def_link_type, url, title);
2361 self.tree[tos.node].item.body = if tos.ty == LinkStackTy::Image
2362 {
2363 ItemBody::Image(link_ix)
2364 } else {
2365 ItemBody::Link(link_ix)
2366 };
2367 let label_node = self.tree[tos.node].next;
2368
2369 self.tree[tos.node].next = node_after_link;
2372
2373 if label_node != cur {
2375 self.tree[tos.node].child = label_node;
2376
2377 if let TreePointer::Valid(prev_ix) = prev {
2379 self.tree[prev_ix].next = TreePointer::Nil;
2380 }
2381 }
2382
2383 cur = TreePointer::Valid(tos.node);
2385 cur_ix = tos.node;
2386
2387 if tos.ty == LinkStackTy::Link {
2388 self.link_stack.disable_all_links();
2389 }
2390 } else {
2391 self.tree[cur_ix].item.body = ItemBody::Text;
2392 }
2393 } else {
2394 self.tree[cur_ix].item.body = ItemBody::Text;
2395 }
2396 }
2397 } else {
2398 self.tree[cur_ix].item.body = ItemBody::Text;
2399 }
2400 }
2401 _ => (),
2402 }
2403 prev = cur;
2404 cur = self.tree[cur_ix].next;
2405 }
2406 self.link_stack.clear();
2407 }
2408
2409 fn handle_emphasis(&mut self) {
2410 let mut prev = TreePointer::Nil;
2411 let mut prev_ix: TreeIndex;
2412 let mut cur = self.tree.cur();
2413 while let TreePointer::Valid(mut cur_ix) = cur {
2414 if let ItemBody::MaybeEmphasis(mut count, can_open, can_close) =
2415 self.tree[cur_ix].item.body
2416 {
2417 let c = self.text.as_bytes()[self.tree[cur_ix].item.start];
2418 let both = can_open && can_close;
2419 if can_close {
2420 while let Some(el) =
2421 self.inline_stack.find_match(&mut self.tree, c, count, both)
2422 {
2423 if let TreePointer::Valid(prev_ix) = prev {
2425 self.tree[prev_ix].next = TreePointer::Nil;
2426 }
2427 let match_count = min(count, el.count);
2428 let mut end = cur_ix - 1;
2430 let mut start = el.start + el.count;
2431
2432 while start > el.start + el.count - match_count {
2434 let (inc, ty) = if c == b'~' {
2435 (2, ItemBody::Strikethrough)
2436 } else if start > el.start + el.count - match_count + 1 {
2437 (2, ItemBody::Strong)
2438 } else {
2439 (1, ItemBody::Emphasis)
2440 };
2441
2442 let root = start - inc;
2443 end = end + inc;
2444 self.tree[root].item.body = ty;
2445 self.tree[root].item.end = self.tree[end].item.end;
2446 self.tree[root].child = TreePointer::Valid(start);
2447 self.tree[root].next = TreePointer::Nil;
2448 start = root;
2449 }
2450
2451 prev_ix = el.start + el.count - match_count;
2453 prev = TreePointer::Valid(prev_ix);
2454 cur = self.tree[cur_ix + match_count - 1].next;
2455 self.tree[prev_ix].next = cur;
2456
2457 if el.count > match_count {
2458 self.inline_stack.push(InlineEl {
2459 start: el.start,
2460 count: el.count - match_count,
2461 c: el.c,
2462 both,
2463 })
2464 }
2465 count -= match_count;
2466 if count > 0 {
2467 cur_ix = cur.unwrap();
2468 } else {
2469 break;
2470 }
2471 }
2472 }
2473 if count > 0 {
2474 if can_open {
2475 self.inline_stack.push(InlineEl {
2476 start: cur_ix,
2477 count,
2478 c,
2479 both,
2480 });
2481 } else {
2482 for i in 0..count {
2483 self.tree[cur_ix + i].item.body = ItemBody::Text;
2484 }
2485 }
2486 prev_ix = cur_ix + count - 1;
2487 prev = TreePointer::Valid(prev_ix);
2488 cur = self.tree[prev_ix].next;
2489 }
2490 } else {
2491 prev = cur;
2492 cur = self.tree[cur_ix].next;
2493 }
2494 }
2495 self.inline_stack.pop_all(&mut self.tree);
2496 }
2497
2498 fn scan_inline_link(
2500 &self,
2501 underlying: &'a str,
2502 mut ix: usize,
2503 node: TreePointer,
2504 ) -> Option<(usize, CowStr<'a>, CowStr<'a>)> {
2505 if scan_ch(&underlying.as_bytes()[ix..], b'(') == 0 {
2506 return None;
2507 }
2508 ix += 1;
2509 ix += scan_while(&underlying.as_bytes()[ix..], is_ascii_whitespace);
2510
2511 let (dest_length, dest) = scan_link_dest(underlying, ix, LINK_MAX_NESTED_PARENS)?;
2512 let dest = unescape(dest);
2513 ix += dest_length;
2514
2515 ix += scan_while(&underlying.as_bytes()[ix..], is_ascii_whitespace);
2516
2517 let title = if let Some((bytes_scanned, t)) = self.scan_link_title(underlying, ix, node) {
2518 ix += bytes_scanned;
2519 ix += scan_while(&underlying.as_bytes()[ix..], is_ascii_whitespace);
2520 t
2521 } else {
2522 "".into()
2523 };
2524 if scan_ch(&underlying.as_bytes()[ix..], b')') == 0 {
2525 return None;
2526 }
2527 ix += 1;
2528
2529 Some((ix, dest, title))
2530 }
2531
2532 fn scan_link_title(
2534 &self,
2535 text: &'a str,
2536 start_ix: usize,
2537 node: TreePointer,
2538 ) -> Option<(usize, CowStr<'a>)> {
2539 let bytes = text.as_bytes();
2540 let open = match bytes.get(start_ix) {
2541 Some(b @ b'\'') | Some(b @ b'\"') | Some(b @ b'(') => *b,
2542 _ => return None,
2543 };
2544 let close = if open == b'(' { b')' } else { open };
2545
2546 let mut title = String::new();
2547 let mut mark = start_ix + 1;
2548 let mut i = start_ix + 1;
2549
2550 while i < bytes.len() {
2551 let c = bytes[i];
2552
2553 if c == close {
2554 let cow = if mark == 1 {
2555 (i - start_ix + 1, text[mark..i].into())
2556 } else {
2557 title.push_str(&text[mark..i]);
2558 (i - start_ix + 1, title.into())
2559 };
2560
2561 return Some(cow);
2562 }
2563 if c == open {
2564 return None;
2565 }
2566
2567 if c == b'\n' || c == b'\r' {
2568 if let TreePointer::Valid(node_ix) = scan_nodes_to_ix(&self.tree, node, i + 1) {
2569 if self.tree[node_ix].item.start > i {
2570 title.push_str(&text[mark..i]);
2571 title.push('\n');
2572 i = self.tree[node_ix].item.start;
2573 mark = i;
2574 continue;
2575 }
2576 }
2577 }
2578 if c == b'&' {
2579 if let (n, Some(value)) = scan_entity(&bytes[i..]) {
2580 title.push_str(&text[mark..i]);
2581 title.push_str(&value);
2582 i += n;
2583 mark = i;
2584 continue;
2585 }
2586 }
2587 if c == b'\\' && i + 1 < bytes.len() && is_ascii_punctuation(bytes[i + 1]) {
2588 title.push_str(&text[mark..i]);
2589 i += 1;
2590 mark = i;
2591 }
2592
2593 i += 1;
2594 }
2595
2596 None
2597 }
2598
2599 fn make_code_span(&mut self, open: TreeIndex, close: TreeIndex, preceding_backslash: bool) {
2603 let first_ix = open + 1;
2604 let last_ix = close - 1;
2605 let bytes = self.text.as_bytes();
2606 let mut span_start = self.tree[open].item.end;
2607 let mut span_end = self.tree[close].item.start;
2608 let mut buf: Option<String> = None;
2609
2610 if !bytes[span_start..span_end].iter().all(|&b| b == b' ') {
2612 let opening = match bytes[span_start] {
2613 b' ' | b'\r' | b'\n' => true,
2614 _ => false,
2615 };
2616 let closing = match bytes[span_end - 1] {
2617 b' ' | b'\r' | b'\n' => true,
2618 _ => false,
2619 };
2620 let drop_enclosing_whitespace = opening && closing;
2621
2622 if drop_enclosing_whitespace {
2623 span_start += 1;
2624 if span_start < span_end {
2625 span_end -= 1;
2626 }
2627 }
2628
2629 let mut ix = first_ix;
2630
2631 while ix < close {
2632 if let ItemBody::HardBreak | ItemBody::SoftBreak = self.tree[ix].item.body {
2633 if drop_enclosing_whitespace {
2634 if ix == first_ix {
2636 ix = ix + 1;
2637 span_start = min(span_end, self.tree[ix].item.start);
2638 continue;
2639 } else if ix == last_ix && last_ix > first_ix {
2640 ix = ix + 1;
2641 continue;
2642 }
2643 }
2644
2645 let end = bytes[self.tree[ix].item.start..]
2646 .iter()
2647 .position(|&b| b == b'\r' || b == b'\n')
2648 .unwrap()
2649 + self.tree[ix].item.start;
2650 if let Some(ref mut buf) = buf {
2651 buf.push_str(&self.text[self.tree[ix].item.start..end]);
2652 buf.push(' ');
2653 } else {
2654 let mut new_buf = String::with_capacity(span_end - span_start);
2655 new_buf.push_str(&self.text[span_start..end]);
2656 new_buf.push(' ');
2657 buf = Some(new_buf);
2658 }
2659 } else if let Some(ref mut buf) = buf {
2660 let end = if ix == last_ix {
2661 span_end
2662 } else {
2663 self.tree[ix].item.end
2664 };
2665 buf.push_str(&self.text[self.tree[ix].item.start..end]);
2666 }
2667 ix = ix + 1;
2668 }
2669 }
2670
2671 let cow = if let Some(buf) = buf {
2672 buf.into()
2673 } else {
2674 self.text[span_start..span_end].into()
2675 };
2676 if preceding_backslash {
2677 self.tree[open].item.body = ItemBody::Text;
2678 self.tree[open].item.end = self.tree[open].item.start + 1;
2679 self.tree[open].next = TreePointer::Valid(close);
2680 self.tree[close].item.body = ItemBody::Code(self.allocs.allocate_cow(cow));
2681 self.tree[close].item.start = self.tree[open].item.start + 1;
2682 } else {
2683 self.tree[open].item.body = ItemBody::Code(self.allocs.allocate_cow(cow));
2684 self.tree[open].item.end = self.tree[close].item.end;
2685 self.tree[open].next = self.tree[close].next;
2686 }
2687 }
2688
2689 fn make_math_span(&mut self, open: TreeIndex, close: TreeIndex, preceding_backslash: bool) {
2693 let first_ix = open + 1;
2694 let last_ix = close - 1;
2695 let bytes = self.text.as_bytes();
2696 let mut span_start = self.tree[open].item.end;
2697 let mut span_end = self.tree[close].item.start;
2698 let mut buf: Option<String> = None;
2699
2700 if !bytes[span_start..span_end].iter().all(|&b| b == b' ') {
2701 let opening = match bytes[span_start] {
2702 b' ' | b'\r' | b'\n' => true,
2703 _ => false,
2704 };
2705 let closing = match bytes[span_end - 1] {
2706 b' ' | b'\r' | b'\n' => true,
2707 _ => false,
2708 };
2709 let drop_enclosing_whitespace = opening && closing;
2710
2711 if drop_enclosing_whitespace {
2712 span_start += 1;
2713 if span_start < span_end {
2714 span_end -= 1;
2715 }
2716 }
2717
2718 let mut ix = first_ix;
2719
2720 while ix < close {
2721 if let ItemBody::HardBreak | ItemBody::SoftBreak = self.tree[ix].item.body {
2722 if drop_enclosing_whitespace {
2723 if ix == first_ix {
2725 ix = ix + 1;
2726 span_start = min(span_end, self.tree[ix].item.start);
2727 continue;
2728 } else if ix == last_ix && last_ix > first_ix {
2729 ix = ix + 1;
2730 continue;
2731 }
2732 }
2733
2734 let end = bytes[self.tree[ix].item.start..]
2735 .iter()
2736 .position(|&b| b == b'\r' || b == b'\n')
2737 .unwrap()
2738 + self.tree[ix].item.start;
2739 if let Some(ref mut buf) = buf {
2740 buf.push_str(&self.text[self.tree[ix].item.start..end]);
2741 buf.push(' ');
2742 } else {
2743 let mut new_buf = String::with_capacity(span_end - span_start);
2744 new_buf.push_str(&self.text[span_start..end]);
2745 new_buf.push(' ');
2746 buf = Some(new_buf);
2747 }
2748 } else if let Some(ref mut buf) = buf {
2749 let end = if ix == last_ix {
2750 span_end
2751 } else {
2752 self.tree[ix].item.end
2753 };
2754 buf.push_str(&self.text[self.tree[ix].item.start..end]);
2755 }
2756 ix = ix + 1;
2757 }
2758 }
2759
2760 let cow = if let Some(buf) = buf {
2761 buf.into()
2762 } else {
2763 self.text[span_start..span_end].into()
2764 };
2765 if preceding_backslash {
2766 self.tree[open].item.body = ItemBody::Text;
2767 self.tree[open].item.end = self.tree[open].item.start + 1;
2768 self.tree[open].next = TreePointer::Valid(close);
2769 self.tree[close].item.body = ItemBody::Math(self.allocs.allocate_cow(cow));
2770 self.tree[close].item.start = self.tree[open].item.start + 1;
2771 } else {
2772 self.tree[open].item.body = ItemBody::Math(self.allocs.allocate_cow(cow));
2773 self.tree[open].item.end = self.tree[close].item.end;
2774 self.tree[open].next = self.tree[close].next;
2775 }
2776 }
2777
2778 fn scan_inline_html(&mut self, bytes: &[u8], ix: usize) -> Option<usize> {
2780 let c = *bytes.get(ix)?;
2781 if c == b'!' {
2782 scan_inline_html_comment(bytes, ix + 1, &mut self.html_scan_guard)
2783 } else if c == b'?' {
2784 scan_inline_html_processing(bytes, ix + 1, &mut self.html_scan_guard)
2785 } else {
2786 let i = scan_html_block_inner(
2787 &bytes[ix..],
2788 Some(&|_bytes| {
2789 let mut line_start = LineStart::new(bytes);
2790 let _ = scan_containers(&self.tree, &mut line_start);
2791 line_start.bytes_scanned()
2792 }),
2793 )?;
2794 Some(i + ix)
2795 }
2796 }
2797
2798 pub fn into_offset_iter(self) -> OffsetIter<'a> {
2802 OffsetIter { inner: self }
2803 }
2804}
2805
2806pub(crate) enum LoopInstruction<T> {
2807 ContinueAndSkip(usize),
2809 BreakAtWith(usize, T),
2811}
2812
2813fn iterate_special_bytes<F, T>(bytes: &[u8], ix: usize, callback: F) -> (usize, Option<T>)
2824where
2825 F: FnMut(usize, u8) -> LoopInstruction<Option<T>>,
2826{
2827 #[cfg(all(target_arch = "x86_64", feature = "simd"))]
2828 {
2829 crate::simd::iterate_special_bytes(bytes, ix, callback)
2830 }
2831 #[cfg(not(all(target_arch = "x86_64", feature = "simd")))]
2832 {
2833 scalar_iterate_special_bytes(bytes, ix, callback)
2834 }
2835}
2836
2837const fn special_bytes() -> [bool; 256] {
2838 let mut bytes = [false; 256];
2839 bytes[b'<' as usize] = true;
2840 bytes[b'!' as usize] = true;
2841 bytes[b'[' as usize] = true;
2842 bytes[b'~' as usize] = true;
2843 bytes[b'`' as usize] = true;
2844 bytes[b'|' as usize] = true;
2845 bytes[b'\\' as usize] = true;
2846 bytes[b'*' as usize] = true;
2847 bytes[b'_' as usize] = true;
2848 bytes[b'\r' as usize] = true;
2849 bytes[b'\n' as usize] = true;
2850 bytes[b']' as usize] = true;
2851 bytes[b'&' as usize] = true;
2852 bytes[b'$' as usize] = true;
2853 bytes
2854}
2855
2856pub(crate) fn scalar_iterate_special_bytes<F, T>(
2857 bytes: &[u8],
2858 mut ix: usize,
2859 mut callback: F,
2860) -> (usize, Option<T>)
2861where
2862 F: FnMut(usize, u8) -> LoopInstruction<Option<T>>,
2863{
2864 let special_bytes = special_bytes();
2865
2866 while ix < bytes.len() {
2867 let b = bytes[ix];
2868 if special_bytes[b as usize] {
2869 match callback(ix, b) {
2870 LoopInstruction::ContinueAndSkip(skip) => {
2871 ix += skip;
2872 }
2873 LoopInstruction::BreakAtWith(ix, val) => {
2874 return (ix, val);
2875 }
2876 }
2877 }
2878 ix += 1;
2879 }
2880
2881 (ix, None)
2882}
2883
2884pub struct OffsetIter<'a> {
2892 inner: Parser<'a>,
2893}
2894
2895impl<'a> Iterator for OffsetIter<'a> {
2896 type Item = (Event<'a>, Range<usize>);
2897
2898 fn next(&mut self) -> Option<Self::Item> {
2899 match self.inner.tree.cur() {
2900 TreePointer::Nil => {
2901 let ix = self.inner.tree.pop()?;
2902 let tag = item_to_tag(&self.inner.tree[ix].item, &self.inner.allocs);
2903 self.inner.tree.next_sibling(ix);
2904 Some((
2905 Event::End(tag),
2906 self.inner.tree[ix].item.start..self.inner.tree[ix].item.end,
2907 ))
2908 }
2909 TreePointer::Valid(cur_ix) => {
2910 if self.inner.tree[cur_ix].item.body.is_inline() {
2911 self.inner.handle_inline();
2912 }
2913
2914 let node = self.inner.tree[cur_ix];
2915 let item = node.item;
2916 let event = item_to_event(item, self.inner.text, &self.inner.allocs);
2917 if let Event::Start(..) = event {
2918 self.inner.tree.push();
2919 } else {
2920 self.inner.tree.next_sibling(cur_ix);
2921 }
2922 Some((event, item.start..item.end))
2923 }
2924 }
2925 }
2926}
2927
2928fn item_to_tag<'a>(item: &Item, allocs: &Allocations<'a>) -> Tag<'a> {
2929 match item.body {
2930 ItemBody::Paragraph => Tag::Paragraph,
2931 ItemBody::Emphasis => Tag::Emphasis,
2932 ItemBody::Strong => Tag::Strong,
2933 ItemBody::Strikethrough => Tag::Strikethrough,
2934 ItemBody::Link(link_ix) => {
2935 let &(ref link_type, ref url, ref title) = allocs.index(link_ix);
2936 Tag::Link(*link_type, url.clone(), title.clone())
2937 }
2938 ItemBody::Image(link_ix) => {
2939 let &(ref link_type, ref url, ref title) = allocs.index(link_ix);
2940 Tag::Image(*link_type, url.clone(), title.clone())
2941 }
2942 ItemBody::Heading(level) => Tag::Heading(level),
2943 ItemBody::FencedCodeBlock(cow_ix) => {
2944 Tag::CodeBlock(CodeBlockKind::Fenced(allocs[cow_ix].clone()))
2945 }
2946 ItemBody::DisplayMath => Tag::DisplayMath,
2947 ItemBody::IndentCodeBlock => Tag::CodeBlock(CodeBlockKind::Indented),
2948 ItemBody::BlockQuote => Tag::BlockQuote,
2949 ItemBody::List(_, c, listitem_start) => {
2950 if c == b'.' || c == b')' {
2951 Tag::List(Some(listitem_start))
2952 } else {
2953 Tag::List(None)
2954 }
2955 }
2956 ItemBody::ListItem(_) => Tag::Item,
2957 ItemBody::TableHead => Tag::TableHead,
2958 ItemBody::TableCell => Tag::TableCell,
2959 ItemBody::TableRow => Tag::TableRow,
2960 ItemBody::Table(alignment_ix) => Tag::Table(allocs[alignment_ix].clone()),
2961 ItemBody::FootnoteDefinition(cow_ix) => Tag::FootnoteDefinition(allocs[cow_ix].clone()),
2962 _ => panic!("unexpected item body {:?}", item.body),
2963 }
2964}
2965
2966fn item_to_event<'a>(item: Item, text: &'a str, allocs: &Allocations<'a>) -> Event<'a> {
2967 let tag = match item.body {
2968 ItemBody::Text => return Event::Text(text[item.start..item.end].into()),
2969 ItemBody::Code(cow_ix) => return Event::Code(allocs[cow_ix].clone()),
2970 ItemBody::Math(cow_ix) => return Event::Math(allocs[cow_ix].clone()),
2971 ItemBody::SynthesizeText(cow_ix) => return Event::Text(allocs[cow_ix].clone()),
2972 ItemBody::Html => return Event::Html(text[item.start..item.end].into()),
2973 ItemBody::SoftBreak => return Event::SoftBreak,
2974 ItemBody::HardBreak => return Event::HardBreak,
2975 ItemBody::FootnoteReference(cow_ix) => {
2976 return Event::FootnoteReference(allocs[cow_ix].clone())
2977 }
2978 ItemBody::TaskListMarker(checked) => return Event::TaskListMarker(checked),
2979 ItemBody::Rule => return Event::Rule,
2980
2981 ItemBody::Frontmatter(cow_ix) => return Event::Frontmatter(allocs[cow_ix].clone()),
2982 ItemBody::Paragraph => Tag::Paragraph,
2983 ItemBody::Emphasis => Tag::Emphasis,
2984 ItemBody::Strong => Tag::Strong,
2985 ItemBody::Strikethrough => Tag::Strikethrough,
2986 ItemBody::Link(link_ix) => {
2987 let &(ref link_type, ref url, ref title) = allocs.index(link_ix);
2988 Tag::Link(*link_type, url.clone(), title.clone())
2989 }
2990 ItemBody::Image(link_ix) => {
2991 let &(ref link_type, ref url, ref title) = allocs.index(link_ix);
2992 Tag::Image(*link_type, url.clone(), title.clone())
2993 }
2994 ItemBody::Heading(level) => Tag::Heading(level),
2995 ItemBody::FencedCodeBlock(cow_ix) => {
2996 Tag::CodeBlock(CodeBlockKind::Fenced(allocs[cow_ix].clone()))
2997 }
2998 ItemBody::DisplayMath => Tag::DisplayMath,
2999 ItemBody::IndentCodeBlock => Tag::CodeBlock(CodeBlockKind::Indented),
3000 ItemBody::BlockQuote => Tag::BlockQuote,
3001 ItemBody::List(_, c, listitem_start) => {
3002 if c == b'.' || c == b')' {
3003 Tag::List(Some(listitem_start))
3004 } else {
3005 Tag::List(None)
3006 }
3007 }
3008 ItemBody::ListItem(_) => Tag::Item,
3009 ItemBody::TableHead => Tag::TableHead,
3010 ItemBody::TableCell => Tag::TableCell,
3011 ItemBody::TableRow => Tag::TableRow,
3012 ItemBody::Table(alignment_ix) => Tag::Table(allocs[alignment_ix].clone()),
3013 ItemBody::FootnoteDefinition(cow_ix) => Tag::FootnoteDefinition(allocs[cow_ix].clone()),
3014 _ => panic!("unexpected item body {:?}", item.body),
3015 };
3016
3017 Event::Start(tag)
3018}
3019
3020fn surgerize_tight_list(tree: &mut Tree<Item>, list_ix: TreeIndex) {
3022 let mut list_item = tree[list_ix].child;
3023 while let TreePointer::Valid(listitem_ix) = list_item {
3024 let list_item_firstborn = tree[listitem_ix].child;
3026
3027 if let TreePointer::Valid(firstborn_ix) = list_item_firstborn {
3029 if let ItemBody::Paragraph = tree[firstborn_ix].item.body {
3030 tree[listitem_ix].child = tree[firstborn_ix].child;
3031 }
3032
3033 let mut list_item_child = TreePointer::Valid(firstborn_ix);
3034 let mut node_to_repoint = TreePointer::Nil;
3035 while let TreePointer::Valid(child_ix) = list_item_child {
3036 let repoint_ix = if let ItemBody::Paragraph = tree[child_ix].item.body {
3038 if let TreePointer::Valid(child_firstborn) = tree[child_ix].child {
3039 if let TreePointer::Valid(repoint_ix) = node_to_repoint {
3040 tree[repoint_ix].next = TreePointer::Valid(child_firstborn);
3041 }
3042 let mut child_lastborn = child_firstborn;
3043 while let TreePointer::Valid(lastborn_next_ix) = tree[child_lastborn].next {
3044 child_lastborn = lastborn_next_ix;
3045 }
3046 child_lastborn
3047 } else {
3048 child_ix
3049 }
3050 } else {
3051 child_ix
3052 };
3053
3054 node_to_repoint = TreePointer::Valid(repoint_ix);
3055 tree[repoint_ix].next = tree[child_ix].next;
3056 list_item_child = tree[child_ix].next;
3057 }
3058 }
3059
3060 list_item = tree[listitem_ix].next;
3061 }
3062}
3063
3064impl<'a> Iterator for Parser<'a> {
3065 type Item = Event<'a>;
3066
3067 fn next(&mut self) -> Option<Event<'a>> {
3068 match self.tree.cur() {
3069 TreePointer::Nil => {
3070 let ix = self.tree.pop()?;
3071 let tag = item_to_tag(&self.tree[ix].item, &self.allocs);
3072 self.tree.next_sibling(ix);
3073 Some(Event::End(tag))
3074 }
3075 TreePointer::Valid(cur_ix) => {
3076 if self.tree[cur_ix].item.body.is_inline() {
3077 self.handle_inline();
3078 }
3079
3080 let node = self.tree[cur_ix];
3081 let item = node.item;
3082 let event = item_to_event(item, self.text, &self.allocs);
3083 if let Event::Start(..) = event {
3084 self.tree.push();
3085 } else {
3086 self.tree.next_sibling(cur_ix);
3087 }
3088 Some(event)
3089 }
3090 }
3091 }
3092}
3093
3094#[cfg(test)]
3095mod test {
3096 use super::*;
3097 use crate::tree::Node;
3098
3099 fn parser_with_extensions(text: &str) -> Parser<'_> {
3102 let mut opts = Options::empty();
3103 opts.insert(Options::ENABLE_TABLES);
3104 opts.insert(Options::ENABLE_FOOTNOTES);
3105 opts.insert(Options::ENABLE_STRIKETHROUGH);
3106 opts.insert(Options::ENABLE_TASKLISTS);
3107
3108 Parser::new_ext(text, opts)
3109 }
3110
3111 #[test]
3112 #[cfg(target_pointer_width = "64")]
3113 fn node_size() {
3114 let node_size = std::mem::size_of::<Node<Item>>();
3115 assert_eq!(48, node_size);
3116 }
3117
3118 #[test]
3119 #[cfg(target_pointer_width = "64")]
3120 fn body_size() {
3121 let body_size = std::mem::size_of::<ItemBody>();
3122 assert_eq!(16, body_size);
3123 }
3124
3125 #[test]
3126 fn single_open_fish_bracket() {
3127 assert_eq!(3, Parser::new("<").count());
3129 }
3130
3131 #[test]
3132 fn lone_hashtag() {
3133 assert_eq!(2, Parser::new("#").count());
3135 }
3136
3137 #[test]
3138 fn lots_of_backslashes() {
3139 Parser::new("\\\\\r\r").count();
3141 Parser::new("\\\r\r\\.\\\\\r\r\\.\\").count();
3142 }
3143
3144 #[test]
3145 fn issue_320() {
3146 parser_with_extensions(":\r\t> |\r:\r\t> |\r").count();
3148 }
3149
3150 #[test]
3151 fn issue_319() {
3152 parser_with_extensions("|\r-]([^|\r-]([^").count();
3154 parser_with_extensions("|\r\r=][^|\r\r=][^car").count();
3155 }
3156
3157 #[test]
3158 fn issue_303() {
3159 parser_with_extensions("[^\r\ra]").count();
3161 parser_with_extensions("\r\r]Z[^\x00\r\r]Z[^\x00").count();
3162 }
3163
3164 #[test]
3165 fn issue_313() {
3166 parser_with_extensions("*]0[^\r\r*]0[^").count();
3168 parser_with_extensions("[^\r> `][^\r> `][^\r> `][").count();
3169 }
3170
3171 #[test]
3172 fn issue_311() {
3173 parser_with_extensions("\\\u{0d}-\u{09}\\\u{0d}-\u{09}").count();
3175 }
3176
3177 #[test]
3178 fn issue_283() {
3179 let input = std::str::from_utf8(b"\xf0\x9b\xb2\x9f<td:^\xf0\x9b\xb2\x9f").unwrap();
3180 parser_with_extensions(input).count();
3182 }
3183
3184 #[test]
3185 fn issue_289() {
3186 parser_with_extensions("> - \\\n> - ").count();
3188 parser_with_extensions("- \n\n").count();
3189 }
3190
3191 #[test]
3192 fn issue_306() {
3193 parser_with_extensions("*\r_<__*\r_<__*\r_<__*\r_<__").count();
3195 }
3196
3197 #[test]
3198 fn issue_305() {
3199 parser_with_extensions("_6**6*_*").count();
3201 }
3202
3203 #[test]
3204 fn another_emphasis_panic() {
3205 parser_with_extensions("*__#_#__*").count();
3206 }
3207
3208 #[test]
3209 fn offset_iter() {
3210 let event_offsets: Vec<_> = Parser::new("*hello* world")
3211 .into_offset_iter()
3212 .map(|(_ev, range)| range)
3213 .collect();
3214 let expected_offsets = vec![(0..13), (0..7), (1..6), (0..7), (7..13), (0..13)];
3215 assert_eq!(expected_offsets, event_offsets);
3216 }
3217
3218 #[test]
3219 fn offset_iter_issue_378() {
3220 let event_offsets: Vec<_> = Parser::new("a [b](c) d")
3221 .into_offset_iter()
3222 .map(|(_ev, range)| range)
3223 .collect();
3224 let expected_offsets = vec![(0..10), (0..2), (2..8), (3..4), (2..8), (8..10), (0..10)];
3225 assert_eq!(expected_offsets, event_offsets);
3226 }
3227
3228 #[test]
3229 fn offset_iter_issue_404() {
3230 let event_offsets: Vec<_> = Parser::new("###\n")
3231 .into_offset_iter()
3232 .map(|(_ev, range)| range)
3233 .collect();
3234 let expected_offsets = vec![(0..4), (0..4)];
3235 assert_eq!(expected_offsets, event_offsets);
3236 }
3237
3238 #[test]
3240 fn link_def_at_eof() {
3241 let test_str = "[My site][world]\n\n[world]: https://vincentprouillet.com";
3242 let expected = "<p><a href=\"https://vincentprouillet.com\">My site</a></p>\n";
3243
3244 let mut buf = String::new();
3245 crate::html::push_html(&mut buf, Parser::new(test_str));
3246 assert_eq!(expected, buf);
3247 }
3248
3249 #[test]
3250 fn ref_def_at_eof() {
3251 let test_str = "[test]:\\";
3252 let expected = "";
3253
3254 let mut buf = String::new();
3255 crate::html::push_html(&mut buf, Parser::new(test_str));
3256 assert_eq!(expected, buf);
3257 }
3258
3259 #[test]
3260 fn ref_def_cr_lf() {
3261 let test_str = "[a]: /u\r\n\n[a]";
3262 let expected = "<p><a href=\"/u\">a</a></p>\n";
3263
3264 let mut buf = String::new();
3265 crate::html::push_html(&mut buf, Parser::new(test_str));
3266 assert_eq!(expected, buf);
3267 }
3268
3269 #[test]
3270 fn no_dest_refdef() {
3271 let test_str = "[a]:";
3272 let expected = "<p>[a]:</p>\n";
3273
3274 let mut buf = String::new();
3275 crate::html::push_html(&mut buf, Parser::new(test_str));
3276 assert_eq!(expected, buf);
3277 }
3278
3279 #[test]
3280 fn simple_broken_link_callback() {
3281 let test_str = "This is a link w/o def: [hello][world]";
3282 let parser = Parser::new_with_broken_link_callback(
3283 test_str,
3284 Options::empty(),
3285 Some(&|norm, raw| {
3286 assert_eq!("world", raw);
3287 assert_eq!("world", norm);
3288 Some(("YOLO".to_owned(), "SWAG".to_owned()))
3289 }),
3290 );
3291 let mut link_tag_count = 0;
3292 for (typ, url, title) in parser.filter_map(|event| match event {
3293 Event::Start(tag) | Event::End(tag) => match tag {
3294 Tag::Link(typ, url, title) => Some((typ, url, title)),
3295 _ => None,
3296 },
3297 _ => None,
3298 }) {
3299 link_tag_count += 1;
3300 assert_eq!(typ, LinkType::ReferenceUnknown);
3301 assert_eq!(url.as_ref(), "YOLO");
3302 assert_eq!(title.as_ref(), "SWAG");
3303 }
3304 assert!(link_tag_count > 0);
3305 }
3306
3307 #[test]
3308 fn code_block_kind_check_fenced() {
3309 let parser = Parser::new("hello\n```test\ntadam\n```");
3310 let mut found = 0;
3311 for (ev, _range) in parser.into_offset_iter() {
3312 match ev {
3313 Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(syntax))) => {
3314 assert_eq!(syntax.as_ref(), "test");
3315 found += 1;
3316 }
3317 _ => {}
3318 }
3319 }
3320 assert_eq!(found, 1);
3321 }
3322
3323 #[test]
3324 fn code_block_kind_check_indented() {
3325 let parser = Parser::new("hello\n\n ```test\n tadam\nhello");
3326 let mut found = 0;
3327 for (ev, _range) in parser.into_offset_iter() {
3328 match ev {
3329 Event::Start(Tag::CodeBlock(CodeBlockKind::Indented)) => {
3330 found += 1;
3331 }
3332 _ => {}
3333 }
3334 }
3335 assert_eq!(found, 1);
3336 }
3337}