1use std::cmp::{max, min};
24use std::collections::{HashMap, VecDeque};
25use std::iter::FusedIterator;
26use std::num::NonZeroUsize;
27use std::ops::{Index, Range};
28
29use unicase::UniCase;
30
31use crate::firstpass::run_first_pass;
32use crate::linklabel::{scan_link_label_rest, FootnoteLabel, LinkLabel, ReferenceLabel};
33use crate::scanners::*;
34use crate::strings::CowStr;
35use crate::tree::{Tree, TreeIndex};
36use crate::{
37 Alignment, BlockQuoteKind, CodeBlockKind, Event, HeadingLevel, LinkType, MetadataBlockKind,
38 Options, Tag, TagEnd,
39};
40
41pub(crate) const LINK_MAX_NESTED_PARENS: usize = 32;
47
48#[derive(Debug, Default, Clone, Copy)]
49pub(crate) struct Item {
50 pub start: usize,
51 pub end: usize,
52 pub body: ItemBody,
53}
54
55#[derive(Debug, PartialEq, Clone, Copy, Default)]
56pub(crate) enum ItemBody {
57 MaybeEmphasis(usize, bool, bool),
61 MaybeMath(bool, bool, u8),
63 MaybeSmartQuote(u8, bool, bool),
65 MaybeCode(usize, bool), MaybeHtml,
67 MaybeLinkOpen,
68 MaybeLinkClose(bool),
70 MaybeImage,
71
72 Emphasis,
74 Strong,
75 Strikethrough,
76 Superscript,
77 Subscript,
78 Math(CowIndex, bool), Code(CowIndex),
80 Link(LinkIndex),
81 Image(LinkIndex),
82 FootnoteReference(CowIndex),
83 TaskListMarker(bool), InlineHtml,
87 OwnedInlineHtml(CowIndex),
88 SynthesizeText(CowIndex),
89 SynthesizeChar(char),
90 Html,
91 Text {
92 backslash_escaped: bool,
93 },
94 SoftBreak,
95 HardBreak(bool),
97
98 #[default]
100 Root,
101
102 Paragraph,
104 TightParagraph,
105 Rule,
106 Heading(HeadingLevel, Option<HeadingIndex>), FencedCodeBlock(CowIndex),
108 IndentCodeBlock,
109 HtmlBlock,
110 BlockQuote(Option<BlockQuoteKind>),
111 List(bool, u8, u64), ListItem(usize), FootnoteDefinition(CowIndex),
114 MetadataBlock(MetadataBlockKind),
115
116 DefinitionList(bool), MaybeDefinitionListTitle,
121 DefinitionListTitle,
122 DefinitionListDefinition(usize),
123
124 Table(AlignmentIndex),
126 TableHead,
127 TableRow,
128 TableCell,
129}
130
131impl ItemBody {
132 fn is_maybe_inline(&self) -> bool {
133 use ItemBody::*;
134 matches!(
135 *self,
136 MaybeEmphasis(..)
137 | MaybeMath(..)
138 | MaybeSmartQuote(..)
139 | MaybeCode(..)
140 | MaybeHtml
141 | MaybeLinkOpen
142 | MaybeLinkClose(..)
143 | MaybeImage
144 )
145 }
146 fn is_inline(&self) -> bool {
147 use ItemBody::*;
148 matches!(
149 *self,
150 MaybeEmphasis(..)
151 | MaybeMath(..)
152 | MaybeSmartQuote(..)
153 | MaybeCode(..)
154 | MaybeHtml
155 | MaybeLinkOpen
156 | MaybeLinkClose(..)
157 | MaybeImage
158 | Emphasis
159 | Strong
160 | Strikethrough
161 | Math(..)
162 | Code(..)
163 | Link(..)
164 | Image(..)
165 | FootnoteReference(..)
166 | TaskListMarker(..)
167 | InlineHtml
168 | OwnedInlineHtml(..)
169 | SynthesizeText(..)
170 | SynthesizeChar(..)
171 | Html
172 | Text { .. }
173 | SoftBreak
174 | HardBreak(..)
175 )
176 }
177}
178
179#[derive(Debug)]
180pub struct BrokenLink<'a> {
181 pub span: std::ops::Range<usize>,
182 pub link_type: LinkType,
183 pub reference: CowStr<'a>,
184}
185
186pub struct Parser<'input, F = DefaultBrokenLinkCallback> {
188 text: &'input str,
189 options: Options,
190 tree: Tree<Item>,
191 allocs: Allocations<'input>,
192 broken_link_callback: Option<F>,
193 html_scan_guard: HtmlScanGuard,
194
195 link_ref_expansion_limit: usize,
212
213 inline_stack: InlineStack,
215 link_stack: LinkStack,
216 wikilink_stack: LinkStack,
217 code_delims: CodeDelims,
218 math_delims: MathDelims,
219}
220
221impl<'input, F> std::fmt::Debug for Parser<'input, F> {
222 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
223 f.debug_struct("Parser")
225 .field("text", &self.text)
226 .field("options", &self.options)
227 .field(
228 "broken_link_callback",
229 &self.broken_link_callback.as_ref().map(|_| ..),
230 )
231 .finish()
232 }
233}
234
235impl<'a> BrokenLink<'a> {
236 pub fn into_static(self) -> BrokenLink<'static> {
240 BrokenLink {
241 span: self.span.clone(),
242 link_type: self.link_type,
243 reference: self.reference.into_string().into(),
244 }
245 }
246}
247
248impl<'input> Parser<'input, DefaultBrokenLinkCallback> {
249 pub fn new(text: &'input str) -> Self {
251 Self::new_ext(text, Options::empty())
252 }
253
254 pub fn new_ext(text: &'input str, options: Options) -> Self {
256 Self::new_with_broken_link_callback(text, options, None)
257 }
258}
259
260impl<'input, F: BrokenLinkCallback<'input>> Parser<'input, F> {
261 pub fn new_with_broken_link_callback(
267 text: &'input str,
268 options: Options,
269 broken_link_callback: Option<F>,
270 ) -> Self {
271 let (mut tree, allocs) = run_first_pass(text, options);
272 tree.reset();
273 let inline_stack = Default::default();
274 let link_stack = Default::default();
275 let wikilink_stack = Default::default();
276 let html_scan_guard = Default::default();
277 Parser {
278 text,
279 options,
280 tree,
281 allocs,
282 broken_link_callback,
283 inline_stack,
284 link_stack,
285 wikilink_stack,
286 html_scan_guard,
287 link_ref_expansion_limit: text.len().max(100_000),
289 code_delims: CodeDelims::new(),
290 math_delims: MathDelims::new(),
291 }
292 }
293
294 pub fn reference_definitions(&self) -> &RefDefs<'_> {
297 &self.allocs.refdefs
298 }
299
300 fn fetch_link_type_url_title(
320 &mut self,
321 link_label: CowStr<'input>,
322 span: Range<usize>,
323 link_type: LinkType,
324 ) -> Option<(LinkType, CowStr<'input>, CowStr<'input>)> {
325 if self.link_ref_expansion_limit == 0 {
326 return None;
327 }
328
329 let (link_type, url, title) = self
330 .allocs
331 .refdefs
332 .get(link_label.as_ref())
333 .map(|matching_def| {
334 let title = matching_def
336 .title
337 .as_ref()
338 .cloned()
339 .unwrap_or_else(|| "".into());
340 let url = matching_def.dest.clone();
341 (link_type, url, title)
342 })
343 .or_else(|| {
344 match self.broken_link_callback.as_mut() {
345 Some(callback) => {
346 let broken_link = BrokenLink {
348 span,
349 link_type,
350 reference: link_label,
351 };
352
353 callback
354 .handle_broken_link(broken_link)
355 .map(|(url, title)| (link_type.to_unknown(), url, title))
356 }
357 None => None,
358 }
359 })?;
360
361 self.link_ref_expansion_limit = self
365 .link_ref_expansion_limit
366 .saturating_sub(url.len() + title.len());
367
368 Some((link_type, url, title))
369 }
370
371 fn handle_inline(&mut self) {
378 self.handle_inline_pass1();
379 self.handle_emphasis_and_hard_break();
380 }
381
382 fn handle_inline_pass1(&mut self) {
388 let mut cur = self.tree.cur();
389 let mut prev = None;
390
391 let block_end = self.tree[self.tree.peek_up().unwrap()].item.end;
392 let block_text = &self.text[..block_end];
393
394 while let Some(mut cur_ix) = cur {
395 match self.tree[cur_ix].item.body {
396 ItemBody::MaybeHtml => {
397 let next = self.tree[cur_ix].next;
398 let autolink = if let Some(next_ix) = next {
399 scan_autolink(block_text, self.tree[next_ix].item.start)
400 } else {
401 None
402 };
403
404 if let Some((ix, uri, link_type)) = autolink {
405 let node = scan_nodes_to_ix(&self.tree, next, ix);
406 let text_node = self.tree.create_node(Item {
407 start: self.tree[cur_ix].item.start + 1,
408 end: ix - 1,
409 body: ItemBody::Text {
410 backslash_escaped: false,
411 },
412 });
413 let link_ix =
414 self.allocs
415 .allocate_link(link_type, uri, "".into(), "".into());
416 self.tree[cur_ix].item.body = ItemBody::Link(link_ix);
417 self.tree[cur_ix].item.end = ix;
418 self.tree[cur_ix].next = node;
419 self.tree[cur_ix].child = Some(text_node);
420 prev = cur;
421 cur = node;
422 if let Some(node_ix) = cur {
423 self.tree[node_ix].item.start = max(self.tree[node_ix].item.start, ix);
424 }
425 continue;
426 } else {
427 let inline_html = next.and_then(|next_ix| {
428 self.scan_inline_html(
429 block_text.as_bytes(),
430 self.tree[next_ix].item.start,
431 )
432 });
433 if let Some((span, ix)) = inline_html {
434 let node = scan_nodes_to_ix(&self.tree, next, ix);
435 self.tree[cur_ix].item.body = if !span.is_empty() {
436 let converted_string =
437 String::from_utf8(span).expect("invalid utf8");
438 ItemBody::OwnedInlineHtml(
439 self.allocs.allocate_cow(converted_string.into()),
440 )
441 } else {
442 ItemBody::InlineHtml
443 };
444 self.tree[cur_ix].item.end = ix;
445 self.tree[cur_ix].next = node;
446 prev = cur;
447 cur = node;
448 if let Some(node_ix) = cur {
449 self.tree[node_ix].item.start =
450 max(self.tree[node_ix].item.start, ix);
451 }
452 continue;
453 }
454 }
455 self.tree[cur_ix].item.body = ItemBody::Text {
456 backslash_escaped: false,
457 };
458 }
459 ItemBody::MaybeMath(can_open, _can_close, brace_context) => {
460 if !can_open {
461 self.tree[cur_ix].item.body = ItemBody::Text {
462 backslash_escaped: false,
463 };
464 prev = cur;
465 cur = self.tree[cur_ix].next;
466 continue;
467 }
468 let is_display = self.tree[cur_ix].next.map_or(false, |next_ix| {
469 matches!(
470 self.tree[next_ix].item.body,
471 ItemBody::MaybeMath(_can_open, _can_close, _brace_context)
472 )
473 });
474 let result = if self.math_delims.is_populated() {
475 self.math_delims
478 .find(&self.tree, cur_ix, is_display, brace_context)
479 } else {
480 let mut scan = self.tree[cur_ix].next;
483 if is_display {
484 scan = self.tree[scan.unwrap()].next;
487 }
488 let mut invalid = false;
489 while let Some(scan_ix) = scan {
490 if let ItemBody::MaybeMath(_can_open, can_close, delim_brace_context) =
491 self.tree[scan_ix].item.body
492 {
493 let delim_is_display =
494 self.tree[scan_ix].next.map_or(false, |next_ix| {
495 matches!(
496 self.tree[next_ix].item.body,
497 ItemBody::MaybeMath(
498 _can_open,
499 _can_close,
500 _brace_context
501 )
502 )
503 });
504 if !invalid && delim_brace_context == brace_context {
505 if (!is_display && can_close)
506 || (is_display && delim_is_display)
507 {
508 self.math_delims.clear();
512 break;
513 } else {
514 invalid = true;
517 }
518 }
519 self.math_delims.insert(
520 delim_is_display,
521 delim_brace_context,
522 scan_ix,
523 can_close,
524 );
525 }
526 scan = self.tree[scan_ix].next;
527 }
528 scan
529 };
530
531 if let Some(scan_ix) = result {
532 self.make_math_span(cur_ix, scan_ix);
533 } else {
534 self.tree[cur_ix].item.body = ItemBody::Text {
535 backslash_escaped: false,
536 };
537 }
538 }
539 ItemBody::MaybeCode(mut search_count, preceded_by_backslash) => {
540 if preceded_by_backslash {
541 search_count -= 1;
542 if search_count == 0 {
543 self.tree[cur_ix].item.body = ItemBody::Text {
544 backslash_escaped: false,
545 };
546 prev = cur;
547 cur = self.tree[cur_ix].next;
548 continue;
549 }
550 }
551
552 if self.code_delims.is_populated() {
553 if let Some(scan_ix) = self.code_delims.find(cur_ix, search_count) {
556 self.make_code_span(cur_ix, scan_ix, preceded_by_backslash);
557 } else {
558 self.tree[cur_ix].item.body = ItemBody::Text {
559 backslash_escaped: false,
560 };
561 }
562 } else {
563 let mut scan = if search_count > 0 {
566 self.tree[cur_ix].next
567 } else {
568 None
569 };
570 while let Some(scan_ix) = scan {
571 if let ItemBody::MaybeCode(delim_count, _) =
572 self.tree[scan_ix].item.body
573 {
574 if search_count == delim_count {
575 self.make_code_span(cur_ix, scan_ix, preceded_by_backslash);
576 self.code_delims.clear();
577 break;
578 } else {
579 self.code_delims.insert(delim_count, scan_ix);
580 }
581 }
582 scan = self.tree[scan_ix].next;
583 }
584 if scan.is_none() {
585 self.tree[cur_ix].item.body = ItemBody::Text {
586 backslash_escaped: false,
587 };
588 }
589 }
590 }
591 ItemBody::MaybeLinkOpen => {
592 self.tree[cur_ix].item.body = ItemBody::Text {
593 backslash_escaped: false,
594 };
595 let link_open_doubled = self.tree[cur_ix]
596 .next
597 .map(|ix| self.tree[ix].item.body == ItemBody::MaybeLinkOpen)
598 .unwrap_or(false);
599 if self.options.contains(Options::ENABLE_WIKILINKS) && link_open_doubled {
600 self.wikilink_stack.push(LinkStackEl {
601 node: cur_ix,
602 ty: LinkStackTy::Link,
603 });
604 }
605 self.link_stack.push(LinkStackEl {
606 node: cur_ix,
607 ty: LinkStackTy::Link,
608 });
609 }
610 ItemBody::MaybeImage => {
611 self.tree[cur_ix].item.body = ItemBody::Text {
612 backslash_escaped: false,
613 };
614 let link_open_doubled = self.tree[cur_ix]
615 .next
616 .map(|ix| self.tree[ix].item.body == ItemBody::MaybeLinkOpen)
617 .unwrap_or(false);
618 if self.options.contains(Options::ENABLE_WIKILINKS) && link_open_doubled {
619 self.wikilink_stack.push(LinkStackEl {
620 node: cur_ix,
621 ty: LinkStackTy::Image,
622 });
623 }
624 self.link_stack.push(LinkStackEl {
625 node: cur_ix,
626 ty: LinkStackTy::Image,
627 });
628 }
629 ItemBody::MaybeLinkClose(could_be_ref) => {
630 self.tree[cur_ix].item.body = ItemBody::Text {
631 backslash_escaped: false,
632 };
633 let tos_link = self.link_stack.pop();
634 if self.options.contains(Options::ENABLE_WIKILINKS)
635 && self.tree[cur_ix]
636 .next
637 .map(|ix| {
638 matches!(self.tree[ix].item.body, ItemBody::MaybeLinkClose(..))
639 })
640 .unwrap_or(false)
641 {
642 if let Some(node) = self.handle_wikilink(block_text, cur_ix, prev) {
643 cur = self.tree[node].next;
644 continue;
645 }
646 }
647 if let Some(tos) = tos_link {
648 if tos.ty != LinkStackTy::Image
651 && matches!(
652 self.tree[self.tree.peek_up().unwrap()].item.body,
653 ItemBody::Link(..)
654 )
655 {
656 continue;
657 }
658 if tos.ty == LinkStackTy::Disabled {
659 continue;
660 }
661 let next = self.tree[cur_ix].next;
662 if let Some((next_ix, url, title)) =
663 self.scan_inline_link(block_text, self.tree[cur_ix].item.end, next)
664 {
665 let next_node = scan_nodes_to_ix(&self.tree, next, next_ix);
666 if let Some(prev_ix) = prev {
667 self.tree[prev_ix].next = None;
668 }
669 cur = Some(tos.node);
670 cur_ix = tos.node;
671 let link_ix =
672 self.allocs
673 .allocate_link(LinkType::Inline, url, title, "".into());
674 self.tree[cur_ix].item.body = if tos.ty == LinkStackTy::Image {
675 ItemBody::Image(link_ix)
676 } else {
677 ItemBody::Link(link_ix)
678 };
679 self.tree[cur_ix].child = self.tree[cur_ix].next;
680 self.tree[cur_ix].next = next_node;
681 self.tree[cur_ix].item.end = next_ix;
682 if let Some(next_node_ix) = next_node {
683 self.tree[next_node_ix].item.start =
684 max(self.tree[next_node_ix].item.start, next_ix);
685 }
686
687 if tos.ty == LinkStackTy::Link {
688 self.disable_all_links();
689 }
690 } else {
691 let scan_result =
694 scan_reference(&self.tree, block_text, next, self.options);
695 let (node_after_link, link_type) = match scan_result {
696 RefScan::LinkLabel(_, end_ix) => {
698 let reference_close_node = if let Some(node) =
703 scan_nodes_to_ix(&self.tree, next, end_ix - 1)
704 {
705 node
706 } else {
707 continue;
708 };
709 self.tree[reference_close_node].item.body =
710 ItemBody::MaybeLinkClose(false);
711 let next_node = self.tree[reference_close_node].next;
712
713 (next_node, LinkType::Reference)
714 }
715 RefScan::Collapsed(next_node) => {
717 if !could_be_ref {
720 continue;
721 }
722 (next_node, LinkType::Collapsed)
723 }
724 RefScan::Failed | RefScan::UnexpectedFootnote => {
728 if !could_be_ref {
729 continue;
730 }
731 (next, LinkType::Shortcut)
732 }
733 };
734
735 let label: Option<(ReferenceLabel<'input>, usize)> = match scan_result {
740 RefScan::LinkLabel(l, end_ix) => {
741 Some((ReferenceLabel::Link(l), end_ix))
742 }
743 RefScan::Collapsed(..)
744 | RefScan::Failed
745 | RefScan::UnexpectedFootnote => {
746 let label_start = self.tree[tos.node].item.end - 1;
748 let label_end = self.tree[cur_ix].item.end;
749 scan_link_label(
750 &self.tree,
751 &self.text[label_start..label_end],
752 self.options,
753 )
754 .map(|(ix, label)| (label, label_start + ix))
755 .filter(|(_, end)| *end == label_end)
756 }
757 };
758
759 let id = match &label {
760 Some(
761 (ReferenceLabel::Link(l), _) | (ReferenceLabel::Footnote(l), _),
762 ) => l.clone(),
763 None => "".into(),
764 };
765
766 if let Some((ReferenceLabel::Footnote(l), end)) = label {
768 let footref = self.allocs.allocate_cow(l);
769 if let Some(def) = self
770 .allocs
771 .footdefs
772 .get_mut(self.allocs.cows[footref.0].to_owned())
773 {
774 def.use_count += 1;
775 }
776 if !self.options.has_gfm_footnotes()
777 || self.allocs.footdefs.contains(&self.allocs.cows[footref.0])
778 {
779 let footnote_ix = if tos.ty == LinkStackTy::Image {
782 self.tree[tos.node].next = Some(cur_ix);
783 self.tree[tos.node].child = None;
784 self.tree[tos.node].item.body =
785 ItemBody::SynthesizeChar('!');
786 self.tree[cur_ix].item.start =
787 self.tree[tos.node].item.start + 1;
788 self.tree[tos.node].item.end =
789 self.tree[tos.node].item.start + 1;
790 cur_ix
791 } else {
792 tos.node
793 };
794 self.tree[footnote_ix].next = next;
798 self.tree[footnote_ix].child = None;
799 self.tree[footnote_ix].item.body =
800 ItemBody::FootnoteReference(footref);
801 self.tree[footnote_ix].item.end = end;
802 prev = Some(footnote_ix);
803 cur = next;
804 self.link_stack.clear();
805 continue;
806 }
807 } else if let Some((ReferenceLabel::Link(link_label), end)) = label {
808 if let Some((def_link_type, url, title)) = self
809 .fetch_link_type_url_title(
810 link_label,
811 (self.tree[tos.node].item.start)..end,
812 link_type,
813 )
814 {
815 let link_ix =
816 self.allocs.allocate_link(def_link_type, url, title, id);
817 self.tree[tos.node].item.body = if tos.ty == LinkStackTy::Image
818 {
819 ItemBody::Image(link_ix)
820 } else {
821 ItemBody::Link(link_ix)
822 };
823 let label_node = self.tree[tos.node].next;
824
825 self.tree[tos.node].next = node_after_link;
828
829 if label_node != cur {
831 self.tree[tos.node].child = label_node;
832
833 if let Some(prev_ix) = prev {
835 self.tree[prev_ix].next = None;
836 }
837 }
838
839 self.tree[tos.node].item.end = end;
840
841 cur = Some(tos.node);
843 cur_ix = tos.node;
844
845 if tos.ty == LinkStackTy::Link {
846 self.disable_all_links();
847 }
848 }
849 }
850 }
851 }
852 }
853 _ => {}
854 }
855 prev = cur;
856 cur = self.tree[cur_ix].next;
857 }
858 self.link_stack.clear();
859 self.wikilink_stack.clear();
860 self.code_delims.clear();
861 self.math_delims.clear();
862 }
863
864 fn handle_wikilink(
870 &mut self,
871 block_text: &'input str,
872 cur_ix: TreeIndex,
873 prev: Option<TreeIndex>,
874 ) -> Option<TreeIndex> {
875 let next_ix = self.tree[cur_ix].next.unwrap();
876 if let Some(tos) = self.wikilink_stack.pop() {
879 if tos.ty == LinkStackTy::Disabled {
880 return None;
881 }
882 let Some(body_node) = self.tree[tos.node].next.and_then(|ix| self.tree[ix].next) else {
884 return None;
886 };
887 let start_ix = self.tree[body_node].item.start;
888 let end_ix = self.tree[cur_ix].item.start;
889 let wikilink = match scan_wikilink_pipe(
890 block_text,
891 start_ix, end_ix - start_ix,
893 ) {
894 Some((rest, wikitext)) => {
895 if wikitext.is_empty() {
897 return None;
898 }
899 let body_node = scan_nodes_to_ix(&self.tree, Some(body_node), rest);
901 if let Some(body_node) = body_node {
902 self.tree[body_node].item.start = rest;
905 Some((true, body_node, wikitext))
906 } else {
907 None
908 }
909 }
910 None => {
911 let wikitext = &block_text[start_ix..end_ix];
912 if wikitext.is_empty() {
914 return None;
915 }
916 let body_node = self.tree.create_node(Item {
917 start: start_ix,
918 end: end_ix,
919 body: ItemBody::Text {
920 backslash_escaped: false,
921 },
922 });
923 Some((false, body_node, wikitext))
924 }
925 };
926
927 if let Some((has_pothole, body_node, wikiname)) = wikilink {
928 let link_ix = self.allocs.allocate_link(
929 LinkType::WikiLink { has_pothole },
930 wikiname.into(),
931 "".into(),
932 "".into(),
933 );
934 if let Some(prev_ix) = prev {
935 self.tree[prev_ix].next = None;
936 }
937 if tos.ty == LinkStackTy::Image {
938 self.tree[tos.node].item.body = ItemBody::Image(link_ix);
939 } else {
940 self.tree[tos.node].item.body = ItemBody::Link(link_ix);
941 }
942 self.tree[tos.node].child = Some(body_node);
943 self.tree[tos.node].next = self.tree[next_ix].next;
944 self.tree[tos.node].item.end = end_ix + 1;
945 self.disable_all_links();
946 return Some(tos.node);
947 }
948 }
949
950 None
951 }
952
953 fn handle_emphasis_and_hard_break(&mut self) {
954 let mut prev = None;
955 let mut prev_ix: TreeIndex;
956 let mut cur = self.tree.cur();
957
958 let mut single_quote_open: Option<TreeIndex> = None;
959 let mut double_quote_open: bool = false;
960
961 while let Some(mut cur_ix) = cur {
962 match self.tree[cur_ix].item.body {
963 ItemBody::MaybeEmphasis(mut count, can_open, can_close) => {
964 let run_length = count;
965 let c = self.text.as_bytes()[self.tree[cur_ix].item.start];
966 let both = can_open && can_close;
967 if can_close {
968 while let Some(el) =
969 self.inline_stack
970 .find_match(&mut self.tree, c, run_length, both)
971 {
972 if let Some(prev_ix) = prev {
974 self.tree[prev_ix].next = None;
975 }
976 let match_count = min(count, el.count);
977 let mut end = cur_ix - 1;
979 let mut start = el.start + el.count;
980
981 while start > el.start + el.count - match_count {
983 let inc = if start > el.start + el.count - match_count + 1 {
984 2
985 } else {
986 1
987 };
988 let ty = if c == b'~' {
989 if inc == 2 {
990 if self.options.contains(Options::ENABLE_STRIKETHROUGH) {
991 ItemBody::Strikethrough
992 } else {
993 ItemBody::Text {
994 backslash_escaped: false,
995 }
996 }
997 } else {
998 if self.options.contains(Options::ENABLE_SUBSCRIPT) {
999 ItemBody::Subscript
1000 } else if self
1001 .options
1002 .contains(Options::ENABLE_STRIKETHROUGH)
1003 {
1004 ItemBody::Strikethrough
1005 } else {
1006 ItemBody::Text {
1007 backslash_escaped: false,
1008 }
1009 }
1010 }
1011 } else if c == b'^' {
1012 if self.options.contains(Options::ENABLE_SUPERSCRIPT) {
1013 ItemBody::Superscript
1014 } else {
1015 ItemBody::Text {
1016 backslash_escaped: false,
1017 }
1018 }
1019 } else if inc == 2 {
1020 ItemBody::Strong
1021 } else {
1022 ItemBody::Emphasis
1023 };
1024
1025 let root = start - inc;
1026 end = end + inc;
1027 self.tree[root].item.body = ty;
1028 self.tree[root].item.end = self.tree[end].item.end;
1029 self.tree[root].child = Some(start);
1030 self.tree[root].next = None;
1031 start = root;
1032 }
1033
1034 prev_ix = el.start + el.count - match_count;
1036 prev = Some(prev_ix);
1037 cur = self.tree[cur_ix + match_count - 1].next;
1038 self.tree[prev_ix].next = cur;
1039
1040 if el.count > match_count {
1041 self.inline_stack.push(InlineEl {
1042 start: el.start,
1043 count: el.count - match_count,
1044 run_length: el.run_length,
1045 c: el.c,
1046 both: el.both,
1047 })
1048 }
1049 count -= match_count;
1050 if count > 0 {
1051 cur_ix = cur.unwrap();
1052 } else {
1053 break;
1054 }
1055 }
1056 }
1057 if count > 0 {
1058 if can_open {
1059 self.inline_stack.push(InlineEl {
1060 start: cur_ix,
1061 run_length,
1062 count,
1063 c,
1064 both,
1065 });
1066 } else {
1067 for i in 0..count {
1068 self.tree[cur_ix + i].item.body = ItemBody::Text {
1069 backslash_escaped: false,
1070 };
1071 }
1072 }
1073 prev_ix = cur_ix + count - 1;
1074 prev = Some(prev_ix);
1075 cur = self.tree[prev_ix].next;
1076 }
1077 }
1078 ItemBody::MaybeSmartQuote(c, can_open, can_close) => {
1079 self.tree[cur_ix].item.body = match c {
1080 b'\'' => {
1081 if let (Some(open_ix), true) = (single_quote_open, can_close) {
1082 self.tree[open_ix].item.body = ItemBody::SynthesizeChar('‘');
1083 single_quote_open = None;
1084 } else if can_open {
1085 single_quote_open = Some(cur_ix);
1086 }
1087 ItemBody::SynthesizeChar('’')
1088 }
1089 _ => {
1090 if can_close && double_quote_open {
1091 double_quote_open = false;
1092 ItemBody::SynthesizeChar('”')
1093 } else {
1094 if can_open && !double_quote_open {
1095 double_quote_open = true;
1096 }
1097 ItemBody::SynthesizeChar('“')
1098 }
1099 }
1100 };
1101 prev = cur;
1102 cur = self.tree[cur_ix].next;
1103 }
1104 ItemBody::HardBreak(true) => {
1105 if self.tree[cur_ix].next.is_none() {
1106 self.tree[cur_ix].item.body = ItemBody::SynthesizeChar('\\');
1107 }
1108 prev = cur;
1109 cur = self.tree[cur_ix].next;
1110 }
1111 _ => {
1112 prev = cur;
1113 cur = self.tree[cur_ix].next;
1114 }
1115 }
1116 }
1117 self.inline_stack.pop_all(&mut self.tree);
1118 }
1119
1120 fn disable_all_links(&mut self) {
1121 self.link_stack.disable_all_links();
1122 self.wikilink_stack.disable_all_links();
1123 }
1124
1125 fn scan_inline_link(
1127 &self,
1128 underlying: &'input str,
1129 mut ix: usize,
1130 node: Option<TreeIndex>,
1131 ) -> Option<(usize, CowStr<'input>, CowStr<'input>)> {
1132 if underlying.as_bytes().get(ix) != Some(&b'(') {
1133 return None;
1134 }
1135 ix += 1;
1136
1137 let scan_separator = |ix: &mut usize| {
1138 *ix += scan_while(&underlying.as_bytes()[*ix..], is_ascii_whitespace_no_nl);
1139 if let Some(bl) = scan_eol(&underlying.as_bytes()[*ix..]) {
1140 *ix += bl;
1141 *ix += skip_container_prefixes(
1142 &self.tree,
1143 &underlying.as_bytes()[*ix..],
1144 self.options,
1145 );
1146 }
1147 *ix += scan_while(&underlying.as_bytes()[*ix..], is_ascii_whitespace_no_nl);
1148 };
1149
1150 scan_separator(&mut ix);
1151
1152 let (dest_length, dest) = scan_link_dest(underlying, ix, LINK_MAX_NESTED_PARENS)?;
1153 let dest = unescape(dest, self.tree.is_in_table());
1154 ix += dest_length;
1155
1156 scan_separator(&mut ix);
1157
1158 let title = if let Some((bytes_scanned, t)) = self.scan_link_title(underlying, ix, node) {
1159 ix += bytes_scanned;
1160 scan_separator(&mut ix);
1161 t
1162 } else {
1163 "".into()
1164 };
1165 if underlying.as_bytes().get(ix) != Some(&b')') {
1166 return None;
1167 }
1168 ix += 1;
1169
1170 Some((ix, dest, title))
1171 }
1172
1173 fn scan_link_title(
1175 &self,
1176 text: &'input str,
1177 start_ix: usize,
1178 node: Option<TreeIndex>,
1179 ) -> Option<(usize, CowStr<'input>)> {
1180 let bytes = text.as_bytes();
1181 let open = match bytes.get(start_ix) {
1182 Some(b @ b'\'') | Some(b @ b'\"') | Some(b @ b'(') => *b,
1183 _ => return None,
1184 };
1185 let close = if open == b'(' { b')' } else { open };
1186
1187 let mut title = String::new();
1188 let mut mark = start_ix + 1;
1189 let mut i = start_ix + 1;
1190
1191 while i < bytes.len() {
1192 let c = bytes[i];
1193
1194 if c == close {
1195 let cow = if mark == 1 {
1196 (i - start_ix + 1, text[mark..i].into())
1197 } else {
1198 title.push_str(&text[mark..i]);
1199 (i - start_ix + 1, title.into())
1200 };
1201
1202 return Some(cow);
1203 }
1204 if c == open {
1205 return None;
1206 }
1207
1208 if c == b'\n' || c == b'\r' {
1209 if let Some(node_ix) = scan_nodes_to_ix(&self.tree, node, i + 1) {
1210 if self.tree[node_ix].item.start > i {
1211 title.push_str(&text[mark..i]);
1212 title.push('\n');
1213 i = self.tree[node_ix].item.start;
1214 mark = i;
1215 continue;
1216 }
1217 }
1218 }
1219 if c == b'&' {
1220 if let (n, Some(value)) = scan_entity(&bytes[i..]) {
1221 title.push_str(&text[mark..i]);
1222 title.push_str(&value);
1223 i += n;
1224 mark = i;
1225 continue;
1226 }
1227 }
1228 if self.tree.is_in_table()
1229 && c == b'\\'
1230 && i + 2 < bytes.len()
1231 && bytes[i + 1] == b'\\'
1232 && bytes[i + 2] == b'|'
1233 {
1234 title.push_str(&text[mark..i]);
1237 i += 2;
1238 mark = i;
1239 }
1240 if c == b'\\' && i + 1 < bytes.len() && is_ascii_punctuation(bytes[i + 1]) {
1241 title.push_str(&text[mark..i]);
1242 i += 1;
1243 mark = i;
1244 }
1245
1246 i += 1;
1247 }
1248
1249 None
1250 }
1251
1252 fn make_math_span(&mut self, open: TreeIndex, mut close: TreeIndex) {
1253 let start_is_display = self.tree[open].next.filter(|&next_ix| {
1254 next_ix != close
1255 && matches!(
1256 self.tree[next_ix].item.body,
1257 ItemBody::MaybeMath(_can_open, _can_close, _brace_context)
1258 )
1259 });
1260 let end_is_display = self.tree[close].next.filter(|&next_ix| {
1261 matches!(
1262 self.tree[next_ix].item.body,
1263 ItemBody::MaybeMath(_can_open, _can_close, _brace_context)
1264 )
1265 });
1266 let is_display = start_is_display.is_some() && end_is_display.is_some();
1267 if is_display {
1268 close = self.tree[close].next.unwrap();
1270 self.tree[open].next = Some(close);
1271 self.tree[open].item.end += 1;
1272 self.tree[close].item.start -= 1;
1273 } else {
1274 if self.tree[open].item.end == self.tree[close].item.start {
1275 self.tree[open].item.body = ItemBody::Text {
1277 backslash_escaped: false,
1278 };
1279 return;
1280 }
1281 self.tree[open].next = Some(close);
1282 }
1283 let span_start = self.tree[open].item.end;
1284 let span_end = self.tree[close].item.start;
1285
1286 let spanned_text = &self.text[span_start..span_end];
1287 let spanned_bytes = spanned_text.as_bytes();
1288 let mut buf: Option<String> = None;
1289
1290 let mut start_ix = 0;
1291 let mut ix = 0;
1292 while ix < spanned_bytes.len() {
1293 let c = spanned_bytes[ix];
1294 if c == b'\r' || c == b'\n' {
1295 ix += 1;
1296 let buf = buf.get_or_insert_with(|| String::with_capacity(spanned_bytes.len()));
1297 buf.push_str(&spanned_text[start_ix..ix]);
1298 ix += skip_container_prefixes(&self.tree, &spanned_bytes[ix..], self.options);
1299 start_ix = ix;
1300 } else if c == b'\\'
1301 && spanned_bytes.get(ix + 1) == Some(&b'|')
1302 && self.tree.is_in_table()
1303 {
1304 let buf = buf.get_or_insert_with(|| String::with_capacity(spanned_bytes.len()));
1305 buf.push_str(&spanned_text[start_ix..ix]);
1306 buf.push('|');
1307 ix += 2;
1308 start_ix = ix;
1309 } else {
1310 ix += 1;
1311 }
1312 }
1313
1314 let cow = if let Some(mut buf) = buf {
1315 buf.push_str(&spanned_text[start_ix..]);
1316 buf.into()
1317 } else {
1318 spanned_text.into()
1319 };
1320
1321 self.tree[open].item.body = ItemBody::Math(self.allocs.allocate_cow(cow), is_display);
1322 self.tree[open].item.end = self.tree[close].item.end;
1323 self.tree[open].next = self.tree[close].next;
1324 }
1325
1326 fn make_code_span(&mut self, open: TreeIndex, close: TreeIndex, preceding_backslash: bool) {
1330 let span_start = self.tree[open].item.end;
1331 let span_end = self.tree[close].item.start;
1332 let mut buf: Option<String> = None;
1333
1334 let spanned_text = &self.text[span_start..span_end];
1335 let spanned_bytes = spanned_text.as_bytes();
1336 let mut start_ix = 0;
1337 let mut ix = 0;
1338 while ix < spanned_bytes.len() {
1339 let c = spanned_bytes[ix];
1340 if c == b'\r' || c == b'\n' {
1341 let buf = buf.get_or_insert_with(|| String::with_capacity(spanned_bytes.len()));
1342 buf.push_str(&spanned_text[start_ix..ix]);
1343 buf.push(' ');
1344 ix += 1;
1345 ix += skip_container_prefixes(&self.tree, &spanned_bytes[ix..], self.options);
1346 start_ix = ix;
1347 } else if c == b'\\'
1348 && spanned_bytes.get(ix + 1) == Some(&b'|')
1349 && self.tree.is_in_table()
1350 {
1351 let buf = buf.get_or_insert_with(|| String::with_capacity(spanned_bytes.len()));
1352 buf.push_str(&spanned_text[start_ix..ix]);
1353 buf.push('|');
1354 ix += 2;
1355 start_ix = ix;
1356 } else {
1357 ix += 1;
1358 }
1359 }
1360
1361 let (opening, closing, all_spaces) = {
1362 let s = if let Some(buf) = &mut buf {
1363 buf.push_str(&spanned_text[start_ix..]);
1364 &buf[..]
1365 } else {
1366 spanned_text
1367 };
1368 (
1369 s.as_bytes().first() == Some(&b' '),
1370 s.as_bytes().last() == Some(&b' '),
1371 s.bytes().all(|b| b == b' '),
1372 )
1373 };
1374
1375 let cow: CowStr<'input> = if !all_spaces && opening && closing {
1376 if let Some(mut buf) = buf {
1377 if !buf.is_empty() {
1378 buf.remove(0);
1379 buf.pop();
1380 }
1381 buf.into()
1382 } else {
1383 spanned_text[1..(spanned_text.len() - 1).max(1)].into()
1384 }
1385 } else if let Some(buf) = buf {
1386 buf.into()
1387 } else {
1388 spanned_text.into()
1389 };
1390
1391 if preceding_backslash {
1392 self.tree[open].item.body = ItemBody::Text {
1393 backslash_escaped: true,
1394 };
1395 self.tree[open].item.end = self.tree[open].item.start + 1;
1396 self.tree[open].next = Some(close);
1397 self.tree[close].item.body = ItemBody::Code(self.allocs.allocate_cow(cow));
1398 self.tree[close].item.start = self.tree[open].item.start + 1;
1399 } else {
1400 self.tree[open].item.body = ItemBody::Code(self.allocs.allocate_cow(cow));
1401 self.tree[open].item.end = self.tree[close].item.end;
1402 self.tree[open].next = self.tree[close].next;
1403 }
1404 }
1405
1406 fn scan_inline_html(&mut self, bytes: &[u8], ix: usize) -> Option<(Vec<u8>, usize)> {
1410 let c = *bytes.get(ix)?;
1411 if c == b'!' {
1412 Some((
1413 vec![],
1414 scan_inline_html_comment(bytes, ix + 1, &mut self.html_scan_guard)?,
1415 ))
1416 } else if c == b'?' {
1417 Some((
1418 vec![],
1419 scan_inline_html_processing(bytes, ix + 1, &mut self.html_scan_guard)?,
1420 ))
1421 } else {
1422 let (span, i) = scan_html_block_inner(
1423 &bytes[(ix - 1)..],
1425 Some(&|bytes| skip_container_prefixes(&self.tree, bytes, self.options)),
1426 )?;
1427 Some((span, i + ix - 1))
1428 }
1429 }
1430
1431 pub fn into_offset_iter(self) -> OffsetIter<'input, F> {
1435 OffsetIter { inner: self }
1436 }
1437}
1438
1439pub(crate) fn scan_containers(
1441 tree: &Tree<Item>,
1442 line_start: &mut LineStart<'_>,
1443 options: Options,
1444) -> usize {
1445 let mut i = 0;
1446 for &node_ix in tree.walk_spine() {
1447 match tree[node_ix].item.body {
1448 ItemBody::BlockQuote(..) => {
1449 let save = line_start.clone();
1450 let _ = line_start.scan_space(3);
1451 if !line_start.scan_blockquote_marker() {
1452 *line_start = save;
1453 break;
1454 }
1455 }
1456 ItemBody::ListItem(indent) => {
1457 let save = line_start.clone();
1458 if !line_start.scan_space(indent) && !line_start.is_at_eol() {
1459 *line_start = save;
1460 break;
1461 }
1462 }
1463 ItemBody::DefinitionListDefinition(indent) => {
1464 let save = line_start.clone();
1465 if !line_start.scan_space(indent) && !line_start.is_at_eol() {
1466 *line_start = save;
1467 break;
1468 }
1469 }
1470 ItemBody::FootnoteDefinition(..) if options.has_gfm_footnotes() => {
1471 let save = line_start.clone();
1472 if !line_start.scan_space(4) && !line_start.is_at_eol() {
1473 *line_start = save;
1474 break;
1475 }
1476 }
1477 _ => (),
1478 }
1479 i += 1;
1480 }
1481 i
1482}
1483pub(crate) fn skip_container_prefixes(tree: &Tree<Item>, bytes: &[u8], options: Options) -> usize {
1484 let mut line_start = LineStart::new(bytes);
1485 let _ = scan_containers(tree, &mut line_start, options);
1486 line_start.bytes_scanned()
1487}
1488
1489impl Tree<Item> {
1490 pub(crate) fn append_text(&mut self, start: usize, end: usize, backslash_escaped: bool) {
1491 if end > start {
1492 if let Some(ix) = self.cur() {
1493 if matches!(self[ix].item.body, ItemBody::Text { .. }) && self[ix].item.end == start
1494 {
1495 self[ix].item.end = end;
1496 return;
1497 }
1498 }
1499 self.append(Item {
1500 start,
1501 end,
1502 body: ItemBody::Text { backslash_escaped },
1503 });
1504 }
1505 }
1506 pub(crate) fn is_in_table(&self) -> bool {
1513 fn might_be_in_table(item: &Item) -> bool {
1514 item.body.is_inline()
1515 || matches!(item.body, |ItemBody::TableHead| ItemBody::TableRow
1516 | ItemBody::TableCell)
1517 }
1518 for &ix in self.walk_spine().rev() {
1519 if matches!(self[ix].item.body, ItemBody::Table(_)) {
1520 return true;
1521 }
1522 if !might_be_in_table(&self[ix].item) {
1523 return false;
1524 }
1525 }
1526 false
1527 }
1528}
1529
1530#[derive(Copy, Clone, Debug)]
1531struct InlineEl {
1532 start: TreeIndex,
1534 count: usize,
1536 run_length: usize,
1538 c: u8,
1540 both: bool,
1542}
1543
1544#[derive(Debug, Clone, Default)]
1545struct InlineStack {
1546 stack: Vec<InlineEl>,
1547 lower_bounds: [usize; 9],
1552}
1553
1554impl InlineStack {
1555 const UNDERSCORE_NOT_BOTH: usize = 0;
1559 const ASTERISK_NOT_BOTH: usize = 1;
1560 const ASTERISK_BASE: usize = 2;
1561 const TILDES: usize = 5;
1562 const UNDERSCORE_BASE: usize = 6;
1563
1564 fn pop_all(&mut self, tree: &mut Tree<Item>) {
1565 for el in self.stack.drain(..) {
1566 for i in 0..el.count {
1567 tree[el.start + i].item.body = ItemBody::Text {
1568 backslash_escaped: false,
1569 };
1570 }
1571 }
1572 self.lower_bounds = [0; 9];
1573 }
1574
1575 fn get_lowerbound(&self, c: u8, count: usize, both: bool) -> usize {
1576 if c == b'_' {
1577 let mod3_lower = self.lower_bounds[InlineStack::UNDERSCORE_BASE + count % 3];
1578 if both {
1579 mod3_lower
1580 } else {
1581 min(
1582 mod3_lower,
1583 self.lower_bounds[InlineStack::UNDERSCORE_NOT_BOTH],
1584 )
1585 }
1586 } else if c == b'*' {
1587 let mod3_lower = self.lower_bounds[InlineStack::ASTERISK_BASE + count % 3];
1588 if both {
1589 mod3_lower
1590 } else {
1591 min(
1592 mod3_lower,
1593 self.lower_bounds[InlineStack::ASTERISK_NOT_BOTH],
1594 )
1595 }
1596 } else {
1597 self.lower_bounds[InlineStack::TILDES]
1598 }
1599 }
1600
1601 fn set_lowerbound(&mut self, c: u8, count: usize, both: bool, new_bound: usize) {
1602 if c == b'_' {
1603 if both {
1604 self.lower_bounds[InlineStack::UNDERSCORE_BASE + count % 3] = new_bound;
1605 } else {
1606 self.lower_bounds[InlineStack::UNDERSCORE_NOT_BOTH] = new_bound;
1607 }
1608 } else if c == b'*' {
1609 self.lower_bounds[InlineStack::ASTERISK_BASE + count % 3] = new_bound;
1610 if !both {
1611 self.lower_bounds[InlineStack::ASTERISK_NOT_BOTH] = new_bound;
1612 }
1613 } else {
1614 self.lower_bounds[InlineStack::TILDES] = new_bound;
1615 }
1616 }
1617
1618 fn truncate(&mut self, new_bound: usize) {
1619 self.stack.truncate(new_bound);
1620 for lower_bound in &mut self.lower_bounds {
1621 if *lower_bound > new_bound {
1622 *lower_bound = new_bound;
1623 }
1624 }
1625 }
1626
1627 fn find_match(
1628 &mut self,
1629 tree: &mut Tree<Item>,
1630 c: u8,
1631 run_length: usize,
1632 both: bool,
1633 ) -> Option<InlineEl> {
1634 let lowerbound = min(self.stack.len(), self.get_lowerbound(c, run_length, both));
1635 let res = self.stack[lowerbound..]
1636 .iter()
1637 .cloned()
1638 .enumerate()
1639 .rfind(|(_, el)| {
1640 if c == b'~' && run_length != el.run_length {
1641 return false;
1642 }
1643 el.c == c
1644 && (!both && !el.both
1645 || (run_length + el.run_length) % 3 != 0
1646 || run_length % 3 == 0)
1647 });
1648
1649 if let Some((matching_ix, matching_el)) = res {
1650 let matching_ix = matching_ix + lowerbound;
1651 for el in &self.stack[(matching_ix + 1)..] {
1652 for i in 0..el.count {
1653 tree[el.start + i].item.body = ItemBody::Text {
1654 backslash_escaped: false,
1655 };
1656 }
1657 }
1658 self.truncate(matching_ix);
1659 Some(matching_el)
1660 } else {
1661 self.set_lowerbound(c, run_length, both, self.stack.len());
1662 None
1663 }
1664 }
1665
1666 fn trim_lower_bound(&mut self, ix: usize) {
1667 self.lower_bounds[ix] = self.lower_bounds[ix].min(self.stack.len());
1668 }
1669
1670 fn push(&mut self, el: InlineEl) {
1671 if el.c == b'~' {
1672 self.trim_lower_bound(InlineStack::TILDES);
1673 }
1674 self.stack.push(el)
1675 }
1676}
1677
1678#[derive(Debug, Clone)]
1679enum RefScan<'a> {
1680 LinkLabel(CowStr<'a>, usize),
1682 Collapsed(Option<TreeIndex>),
1684 UnexpectedFootnote,
1685 Failed,
1686}
1687
1688fn scan_nodes_to_ix(
1691 tree: &Tree<Item>,
1692 mut node: Option<TreeIndex>,
1693 ix: usize,
1694) -> Option<TreeIndex> {
1695 while let Some(node_ix) = node {
1696 if tree[node_ix].item.end <= ix {
1697 node = tree[node_ix].next;
1698 } else {
1699 break;
1700 }
1701 }
1702 node
1703}
1704
1705fn scan_link_label<'text>(
1708 tree: &Tree<Item>,
1709 text: &'text str,
1710 options: Options,
1711) -> Option<(usize, ReferenceLabel<'text>)> {
1712 let bytes = text.as_bytes();
1713 if bytes.len() < 2 || bytes[0] != b'[' {
1714 return None;
1715 }
1716 let linebreak_handler = |bytes: &[u8]| Some(skip_container_prefixes(tree, bytes, options));
1717 if options.contains(Options::ENABLE_FOOTNOTES)
1718 && b'^' == bytes[1]
1719 && bytes.get(2) != Some(&b']')
1720 {
1721 let linebreak_handler: &dyn Fn(&[u8]) -> Option<usize> = if options.has_gfm_footnotes() {
1722 &|_| None
1723 } else {
1724 &linebreak_handler
1725 };
1726 if let Some((byte_index, cow)) =
1727 scan_link_label_rest(&text[2..], linebreak_handler, tree.is_in_table())
1728 {
1729 return Some((byte_index + 2, ReferenceLabel::Footnote(cow)));
1730 }
1731 }
1732 let (byte_index, cow) =
1733 scan_link_label_rest(&text[1..], &linebreak_handler, tree.is_in_table())?;
1734 Some((byte_index + 1, ReferenceLabel::Link(cow)))
1735}
1736
1737fn scan_reference<'b>(
1738 tree: &Tree<Item>,
1739 text: &'b str,
1740 cur: Option<TreeIndex>,
1741 options: Options,
1742) -> RefScan<'b> {
1743 let cur_ix = match cur {
1744 None => return RefScan::Failed,
1745 Some(cur_ix) => cur_ix,
1746 };
1747 let start = tree[cur_ix].item.start;
1748 let tail = &text.as_bytes()[start..];
1749
1750 if tail.starts_with(b"[]") {
1751 let closing_node = tree[cur_ix].next.unwrap();
1753 RefScan::Collapsed(tree[closing_node].next)
1754 } else {
1755 let label = scan_link_label(tree, &text[start..], options);
1756 match label {
1757 Some((ix, ReferenceLabel::Link(label))) => RefScan::LinkLabel(label, start + ix),
1758 Some((_ix, ReferenceLabel::Footnote(_label))) => RefScan::UnexpectedFootnote,
1759 None => RefScan::Failed,
1760 }
1761 }
1762}
1763
1764#[derive(Clone, Default)]
1765struct LinkStack {
1766 inner: Vec<LinkStackEl>,
1767 disabled_ix: usize,
1768}
1769
1770impl LinkStack {
1771 fn push(&mut self, el: LinkStackEl) {
1772 self.inner.push(el);
1773 }
1774
1775 fn pop(&mut self) -> Option<LinkStackEl> {
1776 let el = self.inner.pop();
1777 self.disabled_ix = std::cmp::min(self.disabled_ix, self.inner.len());
1778 el
1779 }
1780
1781 fn clear(&mut self) {
1782 self.inner.clear();
1783 self.disabled_ix = 0;
1784 }
1785
1786 fn disable_all_links(&mut self) {
1787 for el in &mut self.inner[self.disabled_ix..] {
1788 if el.ty == LinkStackTy::Link {
1789 el.ty = LinkStackTy::Disabled;
1790 }
1791 }
1792 self.disabled_ix = self.inner.len();
1793 }
1794}
1795
1796#[derive(Clone, Debug)]
1797struct LinkStackEl {
1798 node: TreeIndex,
1799 ty: LinkStackTy,
1800}
1801
1802#[derive(PartialEq, Clone, Debug)]
1803enum LinkStackTy {
1804 Link,
1805 Image,
1806 Disabled,
1807}
1808
1809#[derive(Clone, Debug)]
1811pub struct LinkDef<'a> {
1812 pub dest: CowStr<'a>,
1813 pub title: Option<CowStr<'a>>,
1814 pub span: Range<usize>,
1815}
1816
1817impl<'a> LinkDef<'a> {
1818 pub fn into_static(self) -> LinkDef<'static> {
1819 LinkDef {
1820 dest: self.dest.into_static(),
1821 title: self.title.map(|s| s.into_static()),
1822 span: self.span,
1823 }
1824 }
1825}
1826
1827#[derive(Clone, Debug)]
1829pub struct FootnoteDef {
1830 pub use_count: usize,
1831}
1832
1833struct CodeDelims {
1836 inner: HashMap<usize, VecDeque<TreeIndex>>,
1837 seen_first: bool,
1838}
1839
1840impl CodeDelims {
1841 fn new() -> Self {
1842 Self {
1843 inner: Default::default(),
1844 seen_first: false,
1845 }
1846 }
1847
1848 fn insert(&mut self, count: usize, ix: TreeIndex) {
1849 if self.seen_first {
1850 self.inner.entry(count).or_default().push_back(ix);
1851 } else {
1852 self.seen_first = true;
1855 }
1856 }
1857
1858 fn is_populated(&self) -> bool {
1859 !self.inner.is_empty()
1860 }
1861
1862 fn find(&mut self, open_ix: TreeIndex, count: usize) -> Option<TreeIndex> {
1863 while let Some(ix) = self.inner.get_mut(&count)?.pop_front() {
1864 if ix > open_ix {
1865 return Some(ix);
1866 }
1867 }
1868 None
1869 }
1870
1871 fn clear(&mut self) {
1872 self.inner.clear();
1873 self.seen_first = false;
1874 }
1875}
1876
1877struct MathDelims {
1880 inner: HashMap<u8, VecDeque<(TreeIndex, bool, bool)>>,
1881}
1882
1883impl MathDelims {
1884 fn new() -> Self {
1885 Self {
1886 inner: Default::default(),
1887 }
1888 }
1889
1890 fn insert(
1891 &mut self,
1892 delim_is_display: bool,
1893 brace_context: u8,
1894 ix: TreeIndex,
1895 can_close: bool,
1896 ) {
1897 self.inner
1898 .entry(brace_context)
1899 .or_default()
1900 .push_back((ix, can_close, delim_is_display));
1901 }
1902
1903 fn is_populated(&self) -> bool {
1904 !self.inner.is_empty()
1905 }
1906
1907 fn find(
1908 &mut self,
1909 tree: &Tree<Item>,
1910 open_ix: TreeIndex,
1911 is_display: bool,
1912 brace_context: u8,
1913 ) -> Option<TreeIndex> {
1914 while let Some((ix, can_close, delim_is_display)) =
1915 self.inner.get_mut(&brace_context)?.pop_front()
1916 {
1917 if ix <= open_ix || (is_display && tree[open_ix].next == Some(ix)) {
1918 continue;
1919 }
1920 let can_close = can_close && tree[open_ix].item.end != tree[ix].item.start;
1921 if (!is_display && can_close) || (is_display && delim_is_display) {
1922 return Some(ix);
1923 }
1924 self.inner
1927 .get_mut(&brace_context)?
1928 .push_front((ix, can_close, delim_is_display));
1929 break;
1930 }
1931 None
1932 }
1933
1934 fn clear(&mut self) {
1935 self.inner.clear();
1936 }
1937}
1938
1939#[derive(Copy, Clone, PartialEq, Eq, Debug)]
1940pub(crate) struct LinkIndex(usize);
1941
1942#[derive(Copy, Clone, PartialEq, Eq, Debug)]
1943pub(crate) struct CowIndex(usize);
1944
1945#[derive(Copy, Clone, PartialEq, Eq, Debug)]
1946pub(crate) struct AlignmentIndex(usize);
1947
1948#[derive(Copy, Clone, PartialEq, Eq, Debug)]
1949pub(crate) struct HeadingIndex(NonZeroUsize);
1950
1951#[derive(Clone)]
1952pub(crate) struct Allocations<'a> {
1953 pub refdefs: RefDefs<'a>,
1954 pub footdefs: FootnoteDefs<'a>,
1955 links: Vec<(LinkType, CowStr<'a>, CowStr<'a>, CowStr<'a>)>,
1956 cows: Vec<CowStr<'a>>,
1957 alignments: Vec<Vec<Alignment>>,
1958 headings: Vec<HeadingAttributes<'a>>,
1959}
1960
1961#[derive(Clone)]
1963pub(crate) struct HeadingAttributes<'a> {
1964 pub id: Option<CowStr<'a>>,
1965 pub classes: Vec<CowStr<'a>>,
1966 pub attrs: Vec<(CowStr<'a>, Option<CowStr<'a>>)>,
1967}
1968
1969#[derive(Clone, Default, Debug)]
1971pub struct RefDefs<'input>(pub(crate) HashMap<LinkLabel<'input>, LinkDef<'input>>);
1972
1973#[derive(Clone, Default, Debug)]
1975pub struct FootnoteDefs<'input>(pub(crate) HashMap<FootnoteLabel<'input>, FootnoteDef>);
1976
1977impl<'input, 'b, 's> RefDefs<'input>
1978where
1979 's: 'b,
1980{
1981 pub fn get(&'s self, key: &'b str) -> Option<&'b LinkDef<'input>> {
1983 self.0.get(&UniCase::new(key.into()))
1984 }
1985
1986 pub fn iter(&'s self) -> impl Iterator<Item = (&'s str, &'s LinkDef<'input>)> {
1988 self.0.iter().map(|(k, v)| (k.as_ref(), v))
1989 }
1990}
1991
1992impl<'input, 'b, 's> FootnoteDefs<'input>
1993where
1994 's: 'b,
1995{
1996 pub fn contains(&'s self, key: &'b str) -> bool {
1998 self.0.contains_key(&UniCase::new(key.into()))
1999 }
2000 pub fn get_mut(&'s mut self, key: CowStr<'input>) -> Option<&'s mut FootnoteDef> {
2002 self.0.get_mut(&UniCase::new(key))
2003 }
2004}
2005
2006impl<'a> Allocations<'a> {
2007 pub fn new() -> Self {
2008 Self {
2009 refdefs: RefDefs::default(),
2010 footdefs: FootnoteDefs::default(),
2011 links: Vec::with_capacity(128),
2012 cows: Vec::new(),
2013 alignments: Vec::new(),
2014 headings: Vec::new(),
2015 }
2016 }
2017
2018 pub fn allocate_cow(&mut self, cow: CowStr<'a>) -> CowIndex {
2019 let ix = self.cows.len();
2020 self.cows.push(cow);
2021 CowIndex(ix)
2022 }
2023
2024 pub fn allocate_link(
2025 &mut self,
2026 ty: LinkType,
2027 url: CowStr<'a>,
2028 title: CowStr<'a>,
2029 id: CowStr<'a>,
2030 ) -> LinkIndex {
2031 let ix = self.links.len();
2032 self.links.push((ty, url, title, id));
2033 LinkIndex(ix)
2034 }
2035
2036 pub fn allocate_alignment(&mut self, alignment: Vec<Alignment>) -> AlignmentIndex {
2037 let ix = self.alignments.len();
2038 self.alignments.push(alignment);
2039 AlignmentIndex(ix)
2040 }
2041
2042 pub fn allocate_heading(&mut self, attrs: HeadingAttributes<'a>) -> HeadingIndex {
2043 let ix = self.headings.len();
2044 self.headings.push(attrs);
2045 let ix_nonzero = NonZeroUsize::new(ix.wrapping_add(1)).expect("too many headings");
2048 HeadingIndex(ix_nonzero)
2049 }
2050
2051 pub fn take_cow(&mut self, ix: CowIndex) -> CowStr<'a> {
2052 std::mem::replace(&mut self.cows[ix.0], "".into())
2053 }
2054
2055 pub fn take_link(&mut self, ix: LinkIndex) -> (LinkType, CowStr<'a>, CowStr<'a>, CowStr<'a>) {
2056 let default_link = (LinkType::ShortcutUnknown, "".into(), "".into(), "".into());
2057 std::mem::replace(&mut self.links[ix.0], default_link)
2058 }
2059
2060 pub fn take_alignment(&mut self, ix: AlignmentIndex) -> Vec<Alignment> {
2061 std::mem::take(&mut self.alignments[ix.0])
2062 }
2063}
2064
2065impl<'a> Index<CowIndex> for Allocations<'a> {
2066 type Output = CowStr<'a>;
2067
2068 fn index(&self, ix: CowIndex) -> &Self::Output {
2069 self.cows.index(ix.0)
2070 }
2071}
2072
2073impl<'a> Index<LinkIndex> for Allocations<'a> {
2074 type Output = (LinkType, CowStr<'a>, CowStr<'a>, CowStr<'a>);
2075
2076 fn index(&self, ix: LinkIndex) -> &Self::Output {
2077 self.links.index(ix.0)
2078 }
2079}
2080
2081impl<'a> Index<AlignmentIndex> for Allocations<'a> {
2082 type Output = Vec<Alignment>;
2083
2084 fn index(&self, ix: AlignmentIndex) -> &Self::Output {
2085 self.alignments.index(ix.0)
2086 }
2087}
2088
2089impl<'a> Index<HeadingIndex> for Allocations<'a> {
2090 type Output = HeadingAttributes<'a>;
2091
2092 fn index(&self, ix: HeadingIndex) -> &Self::Output {
2093 self.headings.index(ix.0.get() - 1)
2094 }
2095}
2096
2097#[derive(Clone, Default)]
2103pub(crate) struct HtmlScanGuard {
2104 pub cdata: usize,
2105 pub processing: usize,
2106 pub declaration: usize,
2107 pub comment: usize,
2108}
2109
2110pub trait BrokenLinkCallback<'input> {
2115 fn handle_broken_link(
2116 &mut self,
2117 link: BrokenLink<'input>,
2118 ) -> Option<(CowStr<'input>, CowStr<'input>)>;
2119}
2120
2121impl<'input, T> BrokenLinkCallback<'input> for T
2122where
2123 T: FnMut(BrokenLink<'input>) -> Option<(CowStr<'input>, CowStr<'input>)>,
2124{
2125 fn handle_broken_link(
2126 &mut self,
2127 link: BrokenLink<'input>,
2128 ) -> Option<(CowStr<'input>, CowStr<'input>)> {
2129 self(link)
2130 }
2131}
2132
2133impl<'input> BrokenLinkCallback<'input> for Box<dyn BrokenLinkCallback<'input>> {
2134 fn handle_broken_link(
2135 &mut self,
2136 link: BrokenLink<'input>,
2137 ) -> Option<(CowStr<'input>, CowStr<'input>)> {
2138 (**self).handle_broken_link(link)
2139 }
2140}
2141
2142#[derive(Debug)]
2144pub struct DefaultBrokenLinkCallback;
2145
2146impl<'input> BrokenLinkCallback<'input> for DefaultBrokenLinkCallback {
2147 fn handle_broken_link(
2148 &mut self,
2149 _link: BrokenLink<'input>,
2150 ) -> Option<(CowStr<'input>, CowStr<'input>)> {
2151 None
2152 }
2153}
2154
2155#[derive(Debug)]
2163pub struct OffsetIter<'a, F = DefaultBrokenLinkCallback> {
2164 inner: Parser<'a, F>,
2165}
2166
2167impl<'a, F: BrokenLinkCallback<'a>> OffsetIter<'a, F> {
2168 pub fn reference_definitions(&self) -> &RefDefs<'_> {
2170 self.inner.reference_definitions()
2171 }
2172}
2173
2174impl<'a, F: BrokenLinkCallback<'a>> Iterator for OffsetIter<'a, F> {
2175 type Item = (Event<'a>, Range<usize>);
2176
2177 fn next(&mut self) -> Option<Self::Item> {
2178 match self.inner.tree.cur() {
2179 None => {
2180 let ix = self.inner.tree.pop()?;
2181 let ix = if matches!(self.inner.tree[ix].item.body, ItemBody::TightParagraph) {
2182 self.inner.tree.next_sibling(ix);
2184 return self.next();
2185 } else {
2186 ix
2187 };
2188 let tag_end = body_to_tag_end(&self.inner.tree[ix].item.body);
2189 self.inner.tree.next_sibling(ix);
2190 let span = self.inner.tree[ix].item.start..self.inner.tree[ix].item.end;
2191 debug_assert!(span.start <= span.end);
2192 Some((Event::End(tag_end), span))
2193 }
2194 Some(cur_ix) => {
2195 let cur_ix =
2196 if matches!(self.inner.tree[cur_ix].item.body, ItemBody::TightParagraph) {
2197 self.inner.tree.push();
2199 self.inner.tree.cur().unwrap()
2200 } else {
2201 cur_ix
2202 };
2203 if self.inner.tree[cur_ix].item.body.is_maybe_inline() {
2204 self.inner.handle_inline();
2205 }
2206
2207 let node = self.inner.tree[cur_ix];
2208 let item = node.item;
2209 let event = item_to_event(item, self.inner.text, &mut self.inner.allocs);
2210 if let Event::Start(..) = event {
2211 self.inner.tree.push();
2212 } else {
2213 self.inner.tree.next_sibling(cur_ix);
2214 }
2215 debug_assert!(item.start <= item.end);
2216 Some((event, item.start..item.end))
2217 }
2218 }
2219 }
2220}
2221
2222fn body_to_tag_end(body: &ItemBody) -> TagEnd {
2223 match *body {
2224 ItemBody::Paragraph => TagEnd::Paragraph,
2225 ItemBody::Emphasis => TagEnd::Emphasis,
2226 ItemBody::Superscript => TagEnd::Superscript,
2227 ItemBody::Subscript => TagEnd::Subscript,
2228 ItemBody::Strong => TagEnd::Strong,
2229 ItemBody::Strikethrough => TagEnd::Strikethrough,
2230 ItemBody::Link(..) => TagEnd::Link,
2231 ItemBody::Image(..) => TagEnd::Image,
2232 ItemBody::Heading(level, _) => TagEnd::Heading(level),
2233 ItemBody::IndentCodeBlock | ItemBody::FencedCodeBlock(..) => TagEnd::CodeBlock,
2234 ItemBody::BlockQuote(kind) => TagEnd::BlockQuote(kind),
2235 ItemBody::HtmlBlock => TagEnd::HtmlBlock,
2236 ItemBody::List(_, c, _) => {
2237 let is_ordered = c == b'.' || c == b')';
2238 TagEnd::List(is_ordered)
2239 }
2240 ItemBody::ListItem(_) => TagEnd::Item,
2241 ItemBody::TableHead => TagEnd::TableHead,
2242 ItemBody::TableCell => TagEnd::TableCell,
2243 ItemBody::TableRow => TagEnd::TableRow,
2244 ItemBody::Table(..) => TagEnd::Table,
2245 ItemBody::FootnoteDefinition(..) => TagEnd::FootnoteDefinition,
2246 ItemBody::MetadataBlock(kind) => TagEnd::MetadataBlock(kind),
2247 ItemBody::DefinitionList(_) => TagEnd::DefinitionList,
2248 ItemBody::DefinitionListTitle => TagEnd::DefinitionListTitle,
2249 ItemBody::DefinitionListDefinition(_) => TagEnd::DefinitionListDefinition,
2250 _ => panic!("unexpected item body {:?}", body),
2251 }
2252}
2253
2254fn item_to_event<'a>(item: Item, text: &'a str, allocs: &mut Allocations<'a>) -> Event<'a> {
2255 let tag = match item.body {
2256 ItemBody::Text { .. } => return Event::Text(text[item.start..item.end].into()),
2257 ItemBody::Code(cow_ix) => return Event::Code(allocs.take_cow(cow_ix)),
2258 ItemBody::SynthesizeText(cow_ix) => return Event::Text(allocs.take_cow(cow_ix)),
2259 ItemBody::SynthesizeChar(c) => return Event::Text(c.into()),
2260 ItemBody::HtmlBlock => Tag::HtmlBlock,
2261 ItemBody::Html => return Event::Html(text[item.start..item.end].into()),
2262 ItemBody::InlineHtml => return Event::InlineHtml(text[item.start..item.end].into()),
2263 ItemBody::OwnedInlineHtml(cow_ix) => return Event::InlineHtml(allocs.take_cow(cow_ix)),
2264 ItemBody::SoftBreak => return Event::SoftBreak,
2265 ItemBody::HardBreak(_) => return Event::HardBreak,
2266 ItemBody::FootnoteReference(cow_ix) => {
2267 return Event::FootnoteReference(allocs.take_cow(cow_ix))
2268 }
2269 ItemBody::TaskListMarker(checked) => return Event::TaskListMarker(checked),
2270 ItemBody::Rule => return Event::Rule,
2271 ItemBody::Paragraph => Tag::Paragraph,
2272 ItemBody::Emphasis => Tag::Emphasis,
2273 ItemBody::Superscript => Tag::Superscript,
2274 ItemBody::Subscript => Tag::Subscript,
2275 ItemBody::Strong => Tag::Strong,
2276 ItemBody::Strikethrough => Tag::Strikethrough,
2277 ItemBody::Link(link_ix) => {
2278 let (link_type, dest_url, title, id) = allocs.take_link(link_ix);
2279 Tag::Link {
2280 link_type,
2281 dest_url,
2282 title,
2283 id,
2284 }
2285 }
2286 ItemBody::Image(link_ix) => {
2287 let (link_type, dest_url, title, id) = allocs.take_link(link_ix);
2288 Tag::Image {
2289 link_type,
2290 dest_url,
2291 title,
2292 id,
2293 }
2294 }
2295 ItemBody::Heading(level, Some(heading_ix)) => {
2296 let HeadingAttributes { id, classes, attrs } = allocs.index(heading_ix);
2297 Tag::Heading {
2298 level,
2299 id: id.clone(),
2300 classes: classes.clone(),
2301 attrs: attrs.clone(),
2302 }
2303 }
2304 ItemBody::Heading(level, None) => Tag::Heading {
2305 level,
2306 id: None,
2307 classes: Vec::new(),
2308 attrs: Vec::new(),
2309 },
2310 ItemBody::FencedCodeBlock(cow_ix) => {
2311 Tag::CodeBlock(CodeBlockKind::Fenced(allocs.take_cow(cow_ix)))
2312 }
2313 ItemBody::IndentCodeBlock => Tag::CodeBlock(CodeBlockKind::Indented),
2314 ItemBody::BlockQuote(kind) => Tag::BlockQuote(kind),
2315 ItemBody::List(_, c, listitem_start) => {
2316 if c == b'.' || c == b')' {
2317 Tag::List(Some(listitem_start))
2318 } else {
2319 Tag::List(None)
2320 }
2321 }
2322 ItemBody::ListItem(_) => Tag::Item,
2323 ItemBody::TableHead => Tag::TableHead,
2324 ItemBody::TableCell => Tag::TableCell,
2325 ItemBody::TableRow => Tag::TableRow,
2326 ItemBody::Table(alignment_ix) => Tag::Table(allocs.take_alignment(alignment_ix)),
2327 ItemBody::FootnoteDefinition(cow_ix) => Tag::FootnoteDefinition(allocs.take_cow(cow_ix)),
2328 ItemBody::MetadataBlock(kind) => Tag::MetadataBlock(kind),
2329 ItemBody::Math(cow_ix, is_display) => {
2330 return if is_display {
2331 Event::DisplayMath(allocs.take_cow(cow_ix))
2332 } else {
2333 Event::InlineMath(allocs.take_cow(cow_ix))
2334 }
2335 }
2336 ItemBody::DefinitionList(_) => Tag::DefinitionList,
2337 ItemBody::DefinitionListTitle => Tag::DefinitionListTitle,
2338 ItemBody::DefinitionListDefinition(_) => Tag::DefinitionListDefinition,
2339 _ => panic!("unexpected item body {:?}", item.body),
2340 };
2341
2342 Event::Start(tag)
2343}
2344
2345impl<'a, F: BrokenLinkCallback<'a>> Iterator for Parser<'a, F> {
2346 type Item = Event<'a>;
2347
2348 fn next(&mut self) -> Option<Event<'a>> {
2349 match self.tree.cur() {
2350 None => {
2351 let ix = self.tree.pop()?;
2352 let ix = if matches!(self.tree[ix].item.body, ItemBody::TightParagraph) {
2353 self.tree.next_sibling(ix);
2355 return self.next();
2356 } else {
2357 ix
2358 };
2359 let tag_end = body_to_tag_end(&self.tree[ix].item.body);
2360 self.tree.next_sibling(ix);
2361 Some(Event::End(tag_end))
2362 }
2363 Some(cur_ix) => {
2364 let cur_ix = if matches!(self.tree[cur_ix].item.body, ItemBody::TightParagraph) {
2365 self.tree.push();
2367 self.tree.cur().unwrap()
2368 } else {
2369 cur_ix
2370 };
2371 if self.tree[cur_ix].item.body.is_maybe_inline() {
2372 self.handle_inline();
2373 }
2374
2375 let node = self.tree[cur_ix];
2376 let item = node.item;
2377 let event = item_to_event(item, self.text, &mut self.allocs);
2378 if let Event::Start(ref _tag) = event {
2379 self.tree.push();
2380 } else {
2381 self.tree.next_sibling(cur_ix);
2382 }
2383 Some(event)
2384 }
2385 }
2386 }
2387}
2388
2389impl<'a, F: BrokenLinkCallback<'a>> FusedIterator for Parser<'a, F> {}
2390
2391#[cfg(test)]
2392mod test {
2393 use super::*;
2394 use crate::tree::Node;
2395
2396 fn parser_with_extensions(text: &str) -> Parser<'_> {
2399 let mut opts = Options::empty();
2400 opts.insert(Options::ENABLE_TABLES);
2401 opts.insert(Options::ENABLE_FOOTNOTES);
2402 opts.insert(Options::ENABLE_STRIKETHROUGH);
2403 opts.insert(Options::ENABLE_SUPERSCRIPT);
2404 opts.insert(Options::ENABLE_SUBSCRIPT);
2405 opts.insert(Options::ENABLE_TASKLISTS);
2406
2407 Parser::new_ext(text, opts)
2408 }
2409
2410 #[test]
2411 #[cfg(target_pointer_width = "64")]
2412 fn node_size() {
2413 let node_size = std::mem::size_of::<Node<Item>>();
2414 assert_eq!(48, node_size);
2415 }
2416
2417 #[test]
2418 #[cfg(target_pointer_width = "64")]
2419 fn body_size() {
2420 let body_size = std::mem::size_of::<ItemBody>();
2421 assert_eq!(16, body_size);
2422 }
2423
2424 #[test]
2425 fn single_open_fish_bracket() {
2426 assert_eq!(3, Parser::new("<").count());
2428 }
2429
2430 #[test]
2431 fn lone_hashtag() {
2432 assert_eq!(2, Parser::new("#").count());
2434 }
2435
2436 #[test]
2437 fn lots_of_backslashes() {
2438 Parser::new("\\\\\r\r").count();
2440 Parser::new("\\\r\r\\.\\\\\r\r\\.\\").count();
2441 }
2442
2443 #[test]
2444 fn issue_320() {
2445 parser_with_extensions(":\r\t> |\r:\r\t> |\r").count();
2447 }
2448
2449 #[test]
2450 fn issue_319() {
2451 parser_with_extensions("|\r-]([^|\r-]([^").count();
2453 parser_with_extensions("|\r\r=][^|\r\r=][^car").count();
2454 }
2455
2456 #[test]
2457 fn issue_303() {
2458 parser_with_extensions("[^\r\ra]").count();
2460 parser_with_extensions("\r\r]Z[^\x00\r\r]Z[^\x00").count();
2461 }
2462
2463 #[test]
2464 fn issue_313() {
2465 parser_with_extensions("*]0[^\r\r*]0[^").count();
2467 parser_with_extensions("[^\r> `][^\r> `][^\r> `][").count();
2468 }
2469
2470 #[test]
2471 fn issue_311() {
2472 parser_with_extensions("\\\u{0d}-\u{09}\\\u{0d}-\u{09}").count();
2474 }
2475
2476 #[test]
2477 fn issue_283() {
2478 let input = std::str::from_utf8(b"\xf0\x9b\xb2\x9f<td:^\xf0\x9b\xb2\x9f").unwrap();
2479 parser_with_extensions(input).count();
2481 }
2482
2483 #[test]
2484 fn issue_289() {
2485 parser_with_extensions("> - \\\n> - ").count();
2487 parser_with_extensions("- \n\n").count();
2488 }
2489
2490 #[test]
2491 fn issue_306() {
2492 parser_with_extensions("*\r_<__*\r_<__*\r_<__*\r_<__").count();
2494 }
2495
2496 #[test]
2497 fn issue_305() {
2498 parser_with_extensions("_6**6*_*").count();
2500 }
2501
2502 #[test]
2503 fn another_emphasis_panic() {
2504 parser_with_extensions("*__#_#__*").count();
2505 }
2506
2507 #[test]
2508 fn offset_iter() {
2509 let event_offsets: Vec<_> = Parser::new("*hello* world")
2510 .into_offset_iter()
2511 .map(|(_ev, range)| range)
2512 .collect();
2513 let expected_offsets = vec![(0..13), (0..7), (1..6), (0..7), (7..13), (0..13)];
2514 assert_eq!(expected_offsets, event_offsets);
2515 }
2516
2517 #[test]
2518 fn reference_link_offsets() {
2519 let range =
2520 Parser::new("# H1\n[testing][Some reference]\n\n[Some reference]: https://github.com")
2521 .into_offset_iter()
2522 .filter_map(|(ev, range)| match ev {
2523 Event::Start(
2524 Tag::Link {
2525 link_type: LinkType::Reference,
2526 ..
2527 },
2528 ..,
2529 ) => Some(range),
2530 _ => None,
2531 })
2532 .next()
2533 .unwrap();
2534 assert_eq!(5..30, range);
2535 }
2536
2537 #[test]
2538 fn footnote_offsets() {
2539 let range = parser_with_extensions("Testing this[^1] out.\n\n[^1]: Footnote.")
2540 .into_offset_iter()
2541 .filter_map(|(ev, range)| match ev {
2542 Event::FootnoteReference(..) => Some(range),
2543 _ => None,
2544 })
2545 .next()
2546 .unwrap();
2547 assert_eq!(12..16, range);
2548 }
2549
2550 #[test]
2551 fn footnote_offsets_exclamation() {
2552 let mut immediately_before_footnote = None;
2553 let range = parser_with_extensions("Testing this![^1] out.\n\n[^1]: Footnote.")
2554 .into_offset_iter()
2555 .filter_map(|(ev, range)| match ev {
2556 Event::FootnoteReference(..) => Some(range),
2557 _ => {
2558 immediately_before_footnote = Some((ev, range));
2559 None
2560 }
2561 })
2562 .next()
2563 .unwrap();
2564 assert_eq!(13..17, range);
2565 if let (Event::Text(exclamation), range_exclamation) =
2566 immediately_before_footnote.as_ref().unwrap()
2567 {
2568 assert_eq!("!", &exclamation[..]);
2569 assert_eq!(&(12..13), range_exclamation);
2570 } else {
2571 panic!("what came first, then? {immediately_before_footnote:?}");
2572 }
2573 }
2574
2575 #[test]
2576 fn table_offset() {
2577 let markdown = "a\n\nTesting|This|Outtt\n--|:--:|--:\nSome Data|Other data|asdf";
2578 let event_offset = parser_with_extensions(markdown)
2579 .into_offset_iter()
2580 .map(|(_ev, range)| range)
2581 .nth(3)
2582 .unwrap();
2583 let expected_offset = 3..59;
2584 assert_eq!(expected_offset, event_offset);
2585 }
2586
2587 #[test]
2588 fn table_cell_span() {
2589 let markdown = "a|b|c\n--|--|--\na| |c";
2590 let event_offset = parser_with_extensions(markdown)
2591 .into_offset_iter()
2592 .filter_map(|(ev, span)| match ev {
2593 Event::Start(Tag::TableCell) => Some(span),
2594 _ => None,
2595 })
2596 .nth(4)
2597 .unwrap();
2598 let expected_offset_start = "a|b|c\n--|--|--\na|".len();
2599 assert_eq!(
2600 expected_offset_start..(expected_offset_start + 2),
2601 event_offset
2602 );
2603 }
2604
2605 #[test]
2606 fn offset_iter_issue_378() {
2607 let event_offsets: Vec<_> = Parser::new("a [b](c) d")
2608 .into_offset_iter()
2609 .map(|(_ev, range)| range)
2610 .collect();
2611 let expected_offsets = vec![(0..10), (0..2), (2..8), (3..4), (2..8), (8..10), (0..10)];
2612 assert_eq!(expected_offsets, event_offsets);
2613 }
2614
2615 #[test]
2616 fn offset_iter_issue_404() {
2617 let event_offsets: Vec<_> = Parser::new("###\n")
2618 .into_offset_iter()
2619 .map(|(_ev, range)| range)
2620 .collect();
2621 let expected_offsets = vec![(0..4), (0..4)];
2622 assert_eq!(expected_offsets, event_offsets);
2623 }
2624
2625 #[cfg(feature = "html")]
2627 #[test]
2628 fn link_def_at_eof() {
2629 let test_str = "[My site][world]\n\n[world]: https://vincentprouillet.com";
2630 let expected = "<p><a href=\"https://vincentprouillet.com\">My site</a></p>\n";
2631
2632 let mut buf = String::new();
2633 crate::html::push_html(&mut buf, Parser::new(test_str));
2634 assert_eq!(expected, buf);
2635 }
2636
2637 #[cfg(feature = "html")]
2638 #[test]
2639 fn no_footnote_refs_without_option() {
2640 let test_str = "a [^a]\n\n[^a]: yolo";
2641 let expected = "<p>a <a href=\"yolo\">^a</a></p>\n";
2642
2643 let mut buf = String::new();
2644 crate::html::push_html(&mut buf, Parser::new(test_str));
2645 assert_eq!(expected, buf);
2646 }
2647
2648 #[cfg(feature = "html")]
2649 #[test]
2650 fn ref_def_at_eof() {
2651 let test_str = "[test]:\\";
2652 let expected = "";
2653
2654 let mut buf = String::new();
2655 crate::html::push_html(&mut buf, Parser::new(test_str));
2656 assert_eq!(expected, buf);
2657 }
2658
2659 #[cfg(feature = "html")]
2660 #[test]
2661 fn ref_def_cr_lf() {
2662 let test_str = "[a]: /u\r\n\n[a]";
2663 let expected = "<p><a href=\"/u\">a</a></p>\n";
2664
2665 let mut buf = String::new();
2666 crate::html::push_html(&mut buf, Parser::new(test_str));
2667 assert_eq!(expected, buf);
2668 }
2669
2670 #[cfg(feature = "html")]
2671 #[test]
2672 fn no_dest_refdef() {
2673 let test_str = "[a]:";
2674 let expected = "<p>[a]:</p>\n";
2675
2676 let mut buf = String::new();
2677 crate::html::push_html(&mut buf, Parser::new(test_str));
2678 assert_eq!(expected, buf);
2679 }
2680
2681 #[test]
2682 fn broken_links_called_only_once() {
2683 for &(markdown, expected) in &[
2684 ("See also [`g()`][crate::g].", 1),
2685 ("See also [`g()`][crate::g][].", 1),
2686 ("[brokenlink1] some other node [brokenlink2]", 2),
2687 ] {
2688 let mut times_called = 0;
2689 let callback = &mut |_broken_link: BrokenLink| {
2690 times_called += 1;
2691 None
2692 };
2693 let parser =
2694 Parser::new_with_broken_link_callback(markdown, Options::empty(), Some(callback));
2695 for _ in parser {}
2696 assert_eq!(times_called, expected);
2697 }
2698 }
2699
2700 #[test]
2701 fn simple_broken_link_callback() {
2702 let test_str = "This is a link w/o def: [hello][world]";
2703 let mut callback = |broken_link: BrokenLink| {
2704 assert_eq!("world", broken_link.reference.as_ref());
2705 assert_eq!(&test_str[broken_link.span], "[hello][world]");
2706 let url = "YOLO".into();
2707 let title = "SWAG".to_owned().into();
2708 Some((url, title))
2709 };
2710 let parser =
2711 Parser::new_with_broken_link_callback(test_str, Options::empty(), Some(&mut callback));
2712 let mut link_tag_count = 0;
2713 for (typ, url, title, id) in parser.filter_map(|event| match event {
2714 Event::Start(tag) => match tag {
2715 Tag::Link {
2716 link_type,
2717 dest_url,
2718 title,
2719 id,
2720 } => Some((link_type, dest_url, title, id)),
2721 _ => None,
2722 },
2723 _ => None,
2724 }) {
2725 link_tag_count += 1;
2726 assert_eq!(typ, LinkType::ReferenceUnknown);
2727 assert_eq!(url.as_ref(), "YOLO");
2728 assert_eq!(title.as_ref(), "SWAG");
2729 assert_eq!(id.as_ref(), "world");
2730 }
2731 assert!(link_tag_count > 0);
2732 }
2733
2734 #[test]
2735 fn code_block_kind_check_fenced() {
2736 let parser = Parser::new("hello\n```test\ntadam\n```");
2737 let mut found = 0;
2738 for (ev, _range) in parser.into_offset_iter() {
2739 match ev {
2740 Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(syntax))) => {
2741 assert_eq!(syntax.as_ref(), "test");
2742 found += 1;
2743 }
2744 _ => {}
2745 }
2746 }
2747 assert_eq!(found, 1);
2748 }
2749
2750 #[test]
2751 fn code_block_kind_check_indented() {
2752 let parser = Parser::new("hello\n\n ```test\n tadam\nhello");
2753 let mut found = 0;
2754 for (ev, _range) in parser.into_offset_iter() {
2755 match ev {
2756 Event::Start(Tag::CodeBlock(CodeBlockKind::Indented)) => {
2757 found += 1;
2758 }
2759 _ => {}
2760 }
2761 }
2762 assert_eq!(found, 1);
2763 }
2764
2765 #[test]
2766 fn ref_defs() {
2767 let input = r###"[a B c]: http://example.com
2768[another]: https://google.com
2769
2770text
2771
2772[final ONE]: http://wikipedia.org
2773"###;
2774 let mut parser = Parser::new(input);
2775
2776 assert!(parser.reference_definitions().get("a b c").is_some());
2777 assert!(parser.reference_definitions().get("nope").is_none());
2778
2779 if let Some(_event) = parser.next() {
2780 let s = "final one".to_owned();
2782 let link_def = parser.reference_definitions().get(&s).unwrap();
2783 let span = &input[link_def.span.clone()];
2784 assert_eq!(span, "[final ONE]: http://wikipedia.org");
2785 }
2786 }
2787
2788 #[test]
2789 fn common_lifetime_patterns_allowed<'b>() {
2790 let temporary_str = String::from("xyz");
2791
2792 let mut closure = |link: BrokenLink<'b>| Some(("#".into(), link.reference));
2796
2797 fn function(link: BrokenLink<'_>) -> Option<(CowStr<'_>, CowStr<'_>)> {
2798 Some(("#".into(), link.reference))
2799 }
2800
2801 for _ in Parser::new_with_broken_link_callback(
2802 "static lifetime",
2803 Options::empty(),
2804 Some(&mut closure),
2805 ) {}
2806 for _ in Parser::new_with_broken_link_callback(
2815 "static lifetime",
2816 Options::empty(),
2817 Some(&mut function),
2818 ) {}
2819 for _ in Parser::new_with_broken_link_callback(
2820 &temporary_str,
2821 Options::empty(),
2822 Some(&mut function),
2823 ) {}
2824 }
2825
2826 #[test]
2827 fn inline_html_inside_blockquote() {
2828 let input = "> <foo\n> bar>";
2830 let events: Vec<_> = Parser::new(input).collect();
2831 let expected = [
2832 Event::Start(Tag::BlockQuote(None)),
2833 Event::Start(Tag::Paragraph),
2834 Event::InlineHtml(CowStr::Boxed("<foo\nbar>".to_string().into())),
2835 Event::End(TagEnd::Paragraph),
2836 Event::End(TagEnd::BlockQuote(None)),
2837 ];
2838 assert_eq!(&events, &expected);
2839 }
2840
2841 #[test]
2842 fn wikilink_has_pothole() {
2843 let input = "[[foo]] [[bar|baz]]";
2844 let events: Vec<_> = Parser::new_ext(input, Options::ENABLE_WIKILINKS).collect();
2845 let expected = [
2846 Event::Start(Tag::Paragraph),
2847 Event::Start(Tag::Link {
2848 link_type: LinkType::WikiLink { has_pothole: false },
2849 dest_url: CowStr::Borrowed("foo"),
2850 title: CowStr::Borrowed(""),
2851 id: CowStr::Borrowed(""),
2852 }),
2853 Event::Text(CowStr::Borrowed("foo")),
2854 Event::End(TagEnd::Link),
2855 Event::Text(CowStr::Borrowed(" ")),
2856 Event::Start(Tag::Link {
2857 link_type: LinkType::WikiLink { has_pothole: true },
2858 dest_url: CowStr::Borrowed("bar"),
2859 title: CowStr::Borrowed(""),
2860 id: CowStr::Borrowed(""),
2861 }),
2862 Event::Text(CowStr::Borrowed("baz")),
2863 Event::End(TagEnd::Link),
2864 Event::End(TagEnd::Paragraph),
2865 ];
2866 assert_eq!(&events, &expected);
2867 }
2868}