1use alloc::{borrow::ToOwned, boxed::Box, collections::VecDeque, string::String, vec::Vec};
24use core::{
25 cmp::{max, min},
26 iter::FusedIterator,
27 num::NonZeroUsize,
28 ops::{Index, Range},
29};
30use rustc_hash::FxHashMap;
31use unicase::UniCase;
32
33use crate::{
34 firstpass::run_first_pass,
35 linklabel::{scan_link_label_rest, FootnoteLabel, LinkLabel, ReferenceLabel},
36 mdx::*,
37 scanners::*,
38 strings::CowStr,
39 tree::{Tree, TreeIndex},
40 Alignment, BlockQuoteKind, CodeBlockKind, DirectiveKind, Event, HeadingLevel, LinkType,
41 MetadataBlockKind, Options, Tag, TagEnd,
42};
43
44pub(crate) const LINK_MAX_NESTED_PARENS: usize = 32;
50
51#[derive(Debug, Default, Clone, Copy)]
52pub(crate) struct Item {
53 pub start: usize,
54 pub end: usize,
55 pub body: ItemBody,
56}
57
58#[derive(Debug, PartialEq, Clone, Copy, Default)]
59pub(crate) enum ItemBody {
60 MaybeEmphasis(usize, bool, bool),
64 MaybeMath(bool, u8),
66 MaybeSmartQuote(u8, bool, bool),
68 MaybeCode(usize, bool), MaybeHtml,
70 MaybeLinkOpen,
71 MaybeLinkClose(bool),
73 MaybeImage,
74
75 Emphasis,
77 Strong,
78 Strikethrough,
79 Superscript,
80 Subscript,
81 Math(CowIndex, bool), Code(CowIndex),
83 Link(LinkIndex),
84 Image(LinkIndex),
85 FootnoteReference(CowIndex),
86 TaskListMarker(bool), InlineHtml,
90 OwnedInlineHtml(CowIndex),
91 SynthesizeText(CowIndex),
92 SynthesizeChar(char),
93 Html,
94 Text {
95 backslash_escaped: bool,
96 },
97 SoftBreak,
98 HardBreak(bool),
100
101 #[default]
103 Root,
104
105 Paragraph,
107 TightParagraph,
108 Rule,
109 Heading(HeadingLevel, Option<HeadingIndex>), FencedCodeBlock(CowIndex),
111 MathBlock(CowIndex), IndentCodeBlock,
113 HtmlBlock(bool), BlockQuote(Option<BlockQuoteKind>),
115 ContainerDirective(u8, DirectiveIndex), LeafDirective(DirectiveIndex),
117 TextDirective(DirectiveIndex),
118 List(bool, u8, u64), ListItem(usize, bool), FootnoteDefinition(CowIndex),
121 MetadataBlock(MetadataBlockKind),
122
123 DefinitionList(bool), MaybeDefinitionListTitle,
128 DefinitionListTitle,
129 DefinitionListDefinition(usize),
130
131 Table(AlignmentIndex),
133 TableHead,
134 TableRow,
135 TableCell,
136
137 MdxJsxFlowElement(JsxElementIndex),
139 MdxJsxTextElement(JsxElementIndex),
140 MdxFlowExpression(CowIndex),
141 MdxTextExpression(CowIndex),
142 MdxEsm(CowIndex),
143}
144
145impl ItemBody {
146 pub(crate) fn is_maybe_inline(&self) -> bool {
147 use ItemBody::*;
148 matches!(
149 *self,
150 MaybeEmphasis(..)
151 | MaybeMath(..)
152 | MaybeSmartQuote(..)
153 | MaybeCode(..)
154 | MaybeHtml
155 | MaybeLinkOpen
156 | MaybeLinkClose(..)
157 | MaybeImage
158 )
159 }
160 pub(crate) fn is_block_level(&self) -> bool {
161 !self.is_inline() && !matches!(self, ItemBody::Root)
162 }
163 fn is_inline(&self) -> bool {
164 use ItemBody::*;
165 matches!(
166 *self,
167 MaybeEmphasis(..)
168 | MaybeMath(..)
169 | MaybeSmartQuote(..)
170 | MaybeCode(..)
171 | MaybeHtml
172 | MaybeLinkOpen
173 | MaybeLinkClose(..)
174 | MaybeImage
175 | Emphasis
176 | Strong
177 | Strikethrough
178 | Math(..)
179 | Code(..)
180 | Link(..)
181 | Image(..)
182 | FootnoteReference(..)
183 | TaskListMarker(..)
184 | InlineHtml
185 | OwnedInlineHtml(..)
186 | SynthesizeText(..)
187 | SynthesizeChar(..)
188 | Html
189 | Text { .. }
190 | SoftBreak
191 | HardBreak(..)
192 )
193 }
194}
195
196#[derive(Debug)]
197pub struct BrokenLink<'a> {
198 pub span: core::ops::Range<usize>,
199 pub link_type: LinkType,
200 pub reference: CowStr<'a>,
201}
202
203pub struct Parser<'input, CB = DefaultParserCallbacks> {
205 callbacks: CB,
206 inner: ParserInner<'input>,
207}
208
209pub(crate) struct ParserInner<'input> {
212 pub(crate) text: &'input str,
213 pub(crate) options: Options,
214 pub(crate) tree: Tree<Item>,
215 pub(crate) allocs: Allocations<'input>,
216 html_scan_guard: HtmlScanGuard,
217
218 link_ref_expansion_limit: usize,
235
236 pub(crate) mdx_errors: Vec<(usize, String)>,
238
239 inline_stack: InlineStack,
241 link_stack: LinkStack,
242 wikilink_stack: LinkStack,
243 code_delims: CodeDelims,
244 math_delims: MathDelims,
245}
246
247impl<'input, CB> core::fmt::Debug for Parser<'input, CB> {
248 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
249 f.debug_struct("Parser")
251 .field("text", &self.inner.text)
252 .field("options", &self.inner.options)
253 .field("callbacks", &..)
254 .finish()
255 }
256}
257
258impl<'a> BrokenLink<'a> {
259 pub fn into_static(self) -> BrokenLink<'static> {
263 BrokenLink {
264 span: self.span.clone(),
265 link_type: self.link_type,
266 reference: self.reference.into_string().into(),
267 }
268 }
269}
270
271impl<'input> Parser<'input, DefaultParserCallbacks> {
272 pub fn new(text: &'input str) -> Self {
274 Self::new_ext(text, Options::empty())
275 }
276
277 pub fn new_ext(text: &'input str, options: Options) -> Self {
279 Self::new_with_callbacks(text, options, DefaultParserCallbacks)
280 }
281}
282
283impl<'input, CB: ParserCallbacks<'input>> Parser<'input, CB> {
284 pub fn new_with_callbacks(text: &'input str, options: Options, callbacks: CB) -> Self {
309 let (mut tree, allocs, _firstpass_mdx_errors) = run_first_pass(text, options);
310 tree.reset();
311 let inline_stack = Default::default();
312 let link_stack = Default::default();
313 let wikilink_stack = Default::default();
314 let html_scan_guard = Default::default();
315 Parser {
316 callbacks,
317
318 inner: ParserInner {
319 text,
320 options,
321 tree,
322 allocs,
323 inline_stack,
324 link_stack,
325 wikilink_stack,
326 html_scan_guard,
327 link_ref_expansion_limit: text.len().max(100_000),
329 mdx_errors: Vec::new(),
330 code_delims: CodeDelims::new(),
331 math_delims: MathDelims::new(),
332 },
333 }
334 }
335
336 pub fn reference_definitions(&self) -> &RefDefs<'_> {
339 &self.inner.allocs.refdefs
340 }
341
342 pub fn mdx_errors(&self) -> &[(usize, String)] {
345 &self.inner.mdx_errors
346 }
347
348 pub fn into_offset_iter(self) -> OffsetIter<'input, CB> {
352 OffsetIter { parser: self }
353 }
354}
355
356impl<'input, F> Parser<'input, BrokenLinkCallback<F>> {
357 pub fn new_with_broken_link_callback(
366 text: &'input str,
367 options: Options,
368 broken_link_callback: Option<F>,
369 ) -> Self
370 where
371 F: FnMut(BrokenLink<'input>) -> Option<(CowStr<'input>, CowStr<'input>)>,
372 {
373 Self::new_with_callbacks(text, options, BrokenLinkCallback(broken_link_callback))
374 }
375}
376
377impl<'input> ParserInner<'input> {
378 pub(crate) fn new(text: &'input str, options: Options) -> Self {
379 let (mut tree, allocs, firstpass_mdx_errors) = run_first_pass(text, options);
380 tree.reset();
381 ParserInner {
382 text,
383 options,
384 tree,
385 allocs,
386 inline_stack: Default::default(),
387 link_stack: Default::default(),
388 wikilink_stack: Default::default(),
389 html_scan_guard: Default::default(),
390 link_ref_expansion_limit: text.len().max(100_000),
391 mdx_errors: firstpass_mdx_errors,
392 code_delims: CodeDelims::new(),
393 math_delims: MathDelims::new(),
394 }
395 }
396
397 fn fetch_link_type_url_title(
416 &mut self,
417 link_label: CowStr<'input>,
418 span: Range<usize>,
419 link_type: LinkType,
420 callbacks: &mut dyn ParserCallbacks<'input>,
421 ) -> Option<(LinkType, CowStr<'input>, CowStr<'input>)> {
422 if self.link_ref_expansion_limit == 0 {
423 return None;
424 }
425
426 let (link_type, url, title) = self
427 .allocs
428 .refdefs
429 .get(link_label.as_ref())
430 .map(|matching_def| {
431 let title = matching_def
433 .title
434 .as_ref()
435 .cloned()
436 .unwrap_or_else(|| "".into());
437 let url = matching_def.dest.clone();
438 (link_type, url, title)
439 })
440 .or_else(|| {
441 let broken_link = BrokenLink {
443 span,
444 link_type,
445 reference: link_label,
446 };
447
448 callbacks
449 .handle_broken_link(broken_link)
450 .map(|(url, title)| (link_type.to_unknown(), url, title))
451 })?;
452
453 self.link_ref_expansion_limit = self
457 .link_ref_expansion_limit
458 .saturating_sub(url.len() + title.len());
459
460 Some((link_type, url, title))
461 }
462
463 pub(crate) fn handle_inline(&mut self, callbacks: &mut dyn ParserCallbacks<'input>) {
470 self.handle_inline_pass1(callbacks);
471 self.handle_emphasis_and_hard_break();
472 }
473
474 fn handle_inline_pass1(&mut self, callbacks: &mut dyn ParserCallbacks<'input>) {
480 let mut cur = self.tree.cur();
481 let mut prev = None;
482
483 let block_end = self.tree[self.tree.peek_up().unwrap()].item.end;
484 let block_text = &self.text[..block_end];
485
486 while let Some(mut cur_ix) = cur {
487 match self.tree[cur_ix].item.body {
488 ItemBody::MaybeHtml => {
489 if self.options.contains(Options::ENABLE_MDX) {
491 let start = self.tree[cur_ix].item.start;
492 let next_byte = block_text.as_bytes().get(start + 1).copied();
493
494 if next_byte == Some(b'!') {
496 self.mdx_errors.push((
497 start,
498 "Unexpected character `!` (U+0021) before name, expected a \
499 character that can start a name, such as a letter, `$`, or `_` \
500 (note: to create a comment in MDX, use `{/* text */}`)"
501 .to_string(),
502 ));
503 self.tree[cur_ix].item.body = ItemBody::Text {
504 backslash_escaped: false,
505 };
506 prev = cur;
507 cur = self.tree[cur_ix].next;
508 continue;
509 }
510
511 if let Some(total_len) =
512 scan_mdx_inline_jsx(&block_text.as_bytes()[start..])
513 {
514 let end = start + total_len;
515 let node = scan_nodes_to_ix(&self.tree, self.tree[cur_ix].next, end);
516 let raw = &block_text[start..end];
517 let col = crate::mdx::column_at(block_text.as_bytes(), start);
518 let jsx_data = crate::mdx::parse_jsx_tag_with_column(raw, col);
519 let jsx_ix = self.allocs.allocate_jsx_element(jsx_data);
520 self.tree[cur_ix].item.body = ItemBody::MdxJsxTextElement(jsx_ix);
521 self.tree[cur_ix].item.end = end;
522 self.tree[cur_ix].next = node;
523 prev = cur;
524 cur = node;
525 if let Some(node_ix) = cur {
526 self.tree[node_ix].item.start =
527 max(self.tree[node_ix].item.start, end);
528 }
529 continue;
530 }
531
532 if matches!(next_byte, Some(b'a'..=b'z' | b'A'..=b'Z' | b'/' | b'>')) {
535 self.mdx_errors.push((
536 start,
537 "Unexpected character after `<`, expected a valid JSX tag \
538 (note: to create a link in MDX, use `[text](url)`)"
539 .to_string(),
540 ));
541 }
542
543 self.tree[cur_ix].item.body = ItemBody::Text {
544 backslash_escaped: false,
545 };
546 prev = cur;
547 cur = self.tree[cur_ix].next;
548 continue;
549 }
550
551 let next = self.tree[cur_ix].next;
552 let autolink = if let Some(next_ix) = next {
553 scan_autolink(block_text, self.tree[next_ix].item.start)
554 } else {
555 None
556 };
557
558 if let Some((ix, uri, link_type)) = autolink {
559 let node = scan_nodes_to_ix(&self.tree, next, ix);
560 let text_node = self.tree.create_node(Item {
561 start: self.tree[cur_ix].item.start + 1,
562 end: ix - 1,
563 body: ItemBody::Text {
564 backslash_escaped: false,
565 },
566 });
567 let link_ix =
568 self.allocs
569 .allocate_link(link_type, uri, "".into(), "".into());
570 self.tree[cur_ix].item.body = ItemBody::Link(link_ix);
571 self.tree[cur_ix].item.end = ix;
572 self.tree[cur_ix].next = node;
573 self.tree[cur_ix].child = Some(text_node);
574 prev = cur;
575 cur = node;
576 if let Some(node_ix) = cur {
577 self.tree[node_ix].item.start = max(self.tree[node_ix].item.start, ix);
578 }
579 continue;
580 } else {
581 let inline_html = next.and_then(|next_ix| {
582 self.scan_inline_html(
583 block_text.as_bytes(),
584 self.tree[next_ix].item.start,
585 )
586 });
587 if let Some((span, ix)) = inline_html {
588 let node = scan_nodes_to_ix(&self.tree, next, ix);
589 self.tree[cur_ix].item.body = if !span.is_empty() {
590 let converted_string =
591 String::from_utf8(span).expect("invalid utf8");
592 ItemBody::OwnedInlineHtml(
593 self.allocs.allocate_cow(converted_string.into()),
594 )
595 } else {
596 ItemBody::InlineHtml
597 };
598 self.tree[cur_ix].item.end = ix;
599 self.tree[cur_ix].next = node;
600 prev = cur;
601 cur = node;
602 if let Some(node_ix) = cur {
603 self.tree[node_ix].item.start =
604 max(self.tree[node_ix].item.start, ix);
605 }
606 continue;
607 }
608 }
609 self.tree[cur_ix].item.body = ItemBody::Text {
610 backslash_escaped: false,
611 };
612 }
613 ItemBody::MaybeMath(preceded_by_backslash, _brace_context) => {
614 if preceded_by_backslash {
615 self.tree[cur_ix].item.body = ItemBody::Text {
616 backslash_escaped: true,
617 };
618 prev = cur;
619 cur = self.tree[cur_ix].next;
620 continue;
621 }
622 let mut open_count = 1usize;
624 let mut open_end = cur_ix;
625 {
626 let mut peek = self.tree[cur_ix].next;
627 while let Some(peek_ix) = peek {
628 if matches!(self.tree[peek_ix].item.body, ItemBody::MaybeMath(..))
629 && self.tree[peek_ix].item.start == self.tree[open_end].item.end
630 {
631 open_count += 1;
632 open_end = peek_ix;
633 peek = self.tree[peek_ix].next;
634 } else {
635 break;
636 }
637 }
638 }
639
640 let mut scan = self.tree[open_end].next;
642 let mut close_ix = None;
643 while let Some(scan_ix) = scan {
644 if matches!(self.tree[scan_ix].item.body, ItemBody::MaybeMath(..)) {
645 let mut run = 1usize;
646 let mut run_end = scan_ix;
647 let mut peek = self.tree[scan_ix].next;
648 while let Some(peek_ix) = peek {
649 if matches!(self.tree[peek_ix].item.body, ItemBody::MaybeMath(..))
650 && self.tree[peek_ix].item.start == self.tree[run_end].item.end
651 {
652 run += 1;
653 run_end = peek_ix;
654 peek = self.tree[peek_ix].next;
655 } else {
656 break;
657 }
658 }
659 if run == open_count {
660 close_ix = Some(scan_ix);
661 break;
662 }
663 scan = self.tree[run_end].next;
665 continue;
666 }
667 scan = self.tree[scan_ix].next;
668 }
669
670 if let Some(scan_ix) = close_ix {
671 self.make_math_span(cur_ix, scan_ix);
672 } else {
673 let mut fail_ix = cur_ix;
674 loop {
675 self.tree[fail_ix].item.body = ItemBody::Text {
676 backslash_escaped: false,
677 };
678 if fail_ix == open_end {
679 break;
680 }
681 if let Some(next) = self.tree[fail_ix].next {
682 fail_ix = next;
683 } else {
684 break;
685 }
686 }
687 }
688 }
689 ItemBody::MaybeCode(mut search_count, preceded_by_backslash) => {
690 if preceded_by_backslash {
691 search_count -= 1;
692 if search_count == 0 {
693 self.tree[cur_ix].item.body = ItemBody::Text {
694 backslash_escaped: true,
695 };
696 prev = cur;
697 cur = self.tree[cur_ix].next;
698 continue;
699 }
700 }
701
702 if self.code_delims.is_populated() {
703 if let Some(scan_ix) = self.code_delims.find(cur_ix, search_count) {
706 self.make_code_span(cur_ix, scan_ix, preceded_by_backslash);
707 } else {
708 self.tree[cur_ix].item.body = ItemBody::Text {
709 backslash_escaped: preceded_by_backslash,
710 };
711 }
712 } else {
713 let mut scan = if search_count > 0 {
716 self.tree[cur_ix].next
717 } else {
718 None
719 };
720 while let Some(scan_ix) = scan {
721 if let ItemBody::MaybeCode(delim_count, _) =
722 self.tree[scan_ix].item.body
723 {
724 if search_count == delim_count {
725 self.make_code_span(cur_ix, scan_ix, preceded_by_backslash);
726 self.code_delims.clear();
727 break;
728 } else {
729 self.code_delims.insert(delim_count, scan_ix);
730 }
731 }
732 scan = self.tree[scan_ix].next;
733 }
734 if scan.is_none() {
735 self.tree[cur_ix].item.body = ItemBody::Text {
736 backslash_escaped: preceded_by_backslash,
737 };
738 }
739 }
740 }
741 ItemBody::MaybeLinkOpen => {
742 self.tree[cur_ix].item.body = ItemBody::Text {
743 backslash_escaped: false,
744 };
745 let link_open_doubled = self.tree[cur_ix]
746 .next
747 .map(|ix| self.tree[ix].item.body == ItemBody::MaybeLinkOpen)
748 .unwrap_or(false);
749 if self.options.contains(Options::ENABLE_WIKILINKS) && link_open_doubled {
750 self.wikilink_stack.push(LinkStackEl {
751 node: cur_ix,
752 ty: LinkStackTy::Link,
753 });
754 }
755 self.link_stack.push(LinkStackEl {
756 node: cur_ix,
757 ty: LinkStackTy::Link,
758 });
759 }
760 ItemBody::MaybeImage => {
761 self.tree[cur_ix].item.body = ItemBody::Text {
762 backslash_escaped: false,
763 };
764 let link_open_doubled = self.tree[cur_ix]
765 .next
766 .map(|ix| self.tree[ix].item.body == ItemBody::MaybeLinkOpen)
767 .unwrap_or(false);
768 if self.options.contains(Options::ENABLE_WIKILINKS) && link_open_doubled {
769 self.wikilink_stack.push(LinkStackEl {
770 node: cur_ix,
771 ty: LinkStackTy::Image,
772 });
773 }
774 self.link_stack.push(LinkStackEl {
775 node: cur_ix,
776 ty: LinkStackTy::Image,
777 });
778 }
779 ItemBody::MaybeLinkClose(could_be_ref) => {
780 self.tree[cur_ix].item.body = ItemBody::Text {
781 backslash_escaped: false,
782 };
783 let tos_link = self.link_stack.pop();
784 if self.options.contains(Options::ENABLE_WIKILINKS)
785 && self.tree[cur_ix]
786 .next
787 .map(|ix| {
788 matches!(self.tree[ix].item.body, ItemBody::MaybeLinkClose(..))
789 })
790 .unwrap_or(false)
791 {
792 if let Some(node) = self.handle_wikilink(block_text, cur_ix, prev) {
793 cur = self.tree[node].next;
794 continue;
795 }
796 }
797 if let Some(tos) = tos_link {
798 if tos.ty != LinkStackTy::Image
801 && matches!(
802 self.tree[self.tree.peek_up().unwrap()].item.body,
803 ItemBody::Link(..)
804 )
805 {
806 continue;
807 }
808 if tos.ty == LinkStackTy::Disabled {
809 continue;
810 }
811 let next = self.tree[cur_ix].next;
812 if let Some((next_ix, url, title)) =
813 self.scan_inline_link(block_text, self.tree[cur_ix].item.end, next)
814 {
815 let next_node = scan_nodes_to_ix(&self.tree, next, next_ix);
816 if let Some(prev_ix) = prev {
817 self.tree[prev_ix].next = None;
818 }
819 cur = Some(tos.node);
820 cur_ix = tos.node;
821 let link_ix =
822 self.allocs
823 .allocate_link(LinkType::Inline, url, title, "".into());
824 self.tree[cur_ix].item.body = if tos.ty == LinkStackTy::Image {
825 ItemBody::Image(link_ix)
826 } else {
827 ItemBody::Link(link_ix)
828 };
829 self.tree[cur_ix].child = self.tree[cur_ix].next;
830 self.tree[cur_ix].next = next_node;
831 self.tree[cur_ix].item.end = next_ix;
832 if let Some(next_node_ix) = next_node {
833 self.tree[next_node_ix].item.start =
834 max(self.tree[next_node_ix].item.start, next_ix);
835 }
836
837 if tos.ty == LinkStackTy::Link {
838 self.disable_all_links();
839 }
840 } else {
841 let first_bracket_start = self.tree[tos.node].item.start;
848 let first_bracket_end = self.tree[cur_ix].item.end;
849 let first_bracket_text =
850 &self.text[first_bracket_start..first_bracket_end];
851 if let Some((_, ReferenceLabel::Footnote(footlabel))) =
852 scan_link_label(&self.tree, first_bracket_text, self.options)
853 {
854 if self.allocs.footdefs.contains(&footlabel) {
855 let footref = self.allocs.allocate_cow(footlabel);
856 if let Some(def) = self
857 .allocs
858 .footdefs
859 .get_mut(self.allocs.cows[footref.0].to_owned())
860 {
861 def.use_count += 1;
862 }
863 let footnote_ix = if tos.ty == LinkStackTy::Image {
864 self.tree[tos.node].next = Some(cur_ix);
865 self.tree[tos.node].child = None;
866 self.tree[tos.node].item.body =
867 ItemBody::SynthesizeChar('!');
868 self.tree[cur_ix].item.start =
869 self.tree[tos.node].item.start + 1;
870 self.tree[tos.node].item.end =
871 self.tree[tos.node].item.start + 1;
872 cur_ix
873 } else {
874 tos.node
875 };
876 self.tree[footnote_ix].next = next;
877 self.tree[footnote_ix].child = None;
878 self.tree[footnote_ix].item.body =
879 ItemBody::FootnoteReference(footref);
880 self.tree[footnote_ix].item.end = first_bracket_end;
881 prev = Some(footnote_ix);
882 cur = next;
883 self.link_stack.clear();
884 continue;
885 }
886 }
887 let scan_result =
890 scan_reference(&self.tree, block_text, next, self.options);
891 let (node_after_link, link_type) = match scan_result {
892 RefScan::LinkLabel(_, end_ix) => {
894 let reference_close_node = if let Some(node) =
899 scan_nodes_to_ix(&self.tree, next, end_ix - 1)
900 {
901 node
902 } else {
903 continue;
904 };
905 self.tree[reference_close_node].item.body =
906 ItemBody::MaybeLinkClose(false);
907 let next_node = self.tree[reference_close_node].next;
908
909 (next_node, LinkType::Reference)
910 }
911 RefScan::Collapsed(next_node) => {
913 if !could_be_ref {
916 continue;
917 }
918 (next_node, LinkType::Collapsed)
919 }
920 RefScan::UnexpectedFootnote => continue,
927 RefScan::Failed => {
931 if !could_be_ref {
932 continue;
933 }
934 (next, LinkType::Shortcut)
935 }
936 };
937
938 let label: Option<(ReferenceLabel<'input>, usize)> = match scan_result {
943 RefScan::LinkLabel(l, end_ix) => {
944 Some((ReferenceLabel::Link(l), end_ix))
945 }
946 RefScan::Collapsed(..)
947 | RefScan::Failed
948 | RefScan::UnexpectedFootnote => {
949 let label_start = self.tree[tos.node].item.end - 1;
951 let label_end = self.tree[cur_ix].item.end;
952 scan_link_label(
953 &self.tree,
954 &self.text[label_start..label_end],
955 self.options,
956 )
957 .map(|(ix, label)| (label, label_start + ix))
958 .filter(|(_, end)| *end == label_end)
959 }
960 };
961
962 let id = match &label {
963 Some(
964 (ReferenceLabel::Link(l), _) | (ReferenceLabel::Footnote(l), _),
965 ) => l.clone(),
966 None => "".into(),
967 };
968
969 if let Some((ReferenceLabel::Footnote(l), end)) = label {
971 let footref = self.allocs.allocate_cow(l);
972 if let Some(def) = self
973 .allocs
974 .footdefs
975 .get_mut(self.allocs.cows[footref.0].to_owned())
976 {
977 def.use_count += 1;
978 }
979 if self.allocs.footdefs.contains(&self.allocs.cows[footref.0]) {
980 let footnote_ix = if tos.ty == LinkStackTy::Image {
983 self.tree[tos.node].next = Some(cur_ix);
984 self.tree[tos.node].child = None;
985 self.tree[tos.node].item.body =
986 ItemBody::SynthesizeChar('!');
987 self.tree[cur_ix].item.start =
988 self.tree[tos.node].item.start + 1;
989 self.tree[tos.node].item.end =
990 self.tree[tos.node].item.start + 1;
991 cur_ix
992 } else {
993 tos.node
994 };
995 self.tree[footnote_ix].next = next;
999 self.tree[footnote_ix].child = None;
1000 self.tree[footnote_ix].item.body =
1001 ItemBody::FootnoteReference(footref);
1002 self.tree[footnote_ix].item.end = end;
1003 prev = Some(footnote_ix);
1004 cur = next;
1005 self.link_stack.clear();
1006 continue;
1007 }
1008 } else if let Some((ReferenceLabel::Link(link_label), end)) = label {
1009 if let Some((def_link_type, url, title)) = self
1010 .fetch_link_type_url_title(
1011 link_label,
1012 (self.tree[tos.node].item.start)..end,
1013 link_type,
1014 callbacks,
1015 )
1016 {
1017 let link_ix =
1018 self.allocs.allocate_link(def_link_type, url, title, id);
1019 self.tree[tos.node].item.body = if tos.ty == LinkStackTy::Image
1020 {
1021 ItemBody::Image(link_ix)
1022 } else {
1023 ItemBody::Link(link_ix)
1024 };
1025 let label_node = self.tree[tos.node].next;
1026
1027 self.tree[tos.node].next = node_after_link;
1030
1031 if label_node != cur {
1033 self.tree[tos.node].child = label_node;
1034
1035 if let Some(prev_ix) = prev {
1037 self.tree[prev_ix].next = None;
1038 }
1039 }
1040
1041 self.tree[tos.node].item.end = end;
1042
1043 cur = Some(tos.node);
1045 cur_ix = tos.node;
1046
1047 if tos.ty == LinkStackTy::Link {
1048 self.disable_all_links();
1049 }
1050 }
1051 }
1052 }
1053 }
1054 }
1055 _ => {}
1056 }
1057 prev = cur;
1058 cur = self.tree[cur_ix].next;
1059 }
1060 self.link_stack.clear();
1061 self.wikilink_stack.clear();
1062 self.code_delims.clear();
1063 self.math_delims.clear();
1064 }
1065
1066 fn handle_wikilink(
1072 &mut self,
1073 block_text: &'input str,
1074 cur_ix: TreeIndex,
1075 prev: Option<TreeIndex>,
1076 ) -> Option<TreeIndex> {
1077 let next_ix = self.tree[cur_ix].next.unwrap();
1078 if let Some(tos) = self.wikilink_stack.pop() {
1081 if tos.ty == LinkStackTy::Disabled {
1082 return None;
1083 }
1084 let Some(body_node) = self.tree[tos.node].next.and_then(|ix| self.tree[ix].next) else {
1086 return None;
1088 };
1089 let start_ix = self.tree[body_node].item.start;
1090 let end_ix = self.tree[cur_ix].item.start;
1091 let wikilink = match scan_wikilink_pipe(
1092 block_text,
1093 start_ix, end_ix - start_ix,
1095 ) {
1096 Some((rest, wikitext)) => {
1097 if wikitext.is_empty() {
1099 return None;
1100 }
1101 let body_node = scan_nodes_to_ix(&self.tree, Some(body_node), rest);
1103 if let Some(body_node) = body_node {
1104 self.tree[body_node].item.start = rest;
1107 Some((true, body_node, wikitext))
1108 } else {
1109 None
1110 }
1111 }
1112 None => {
1113 let wikitext = &block_text[start_ix..end_ix];
1114 if wikitext.is_empty() {
1116 return None;
1117 }
1118 let body_node = self.tree.create_node(Item {
1119 start: start_ix,
1120 end: end_ix,
1121 body: ItemBody::Text {
1122 backslash_escaped: false,
1123 },
1124 });
1125 Some((false, body_node, wikitext))
1126 }
1127 };
1128
1129 if let Some((has_pothole, body_node, wikiname)) = wikilink {
1130 let link_ix = self.allocs.allocate_link(
1131 LinkType::WikiLink { has_pothole },
1132 wikiname.into(),
1133 "".into(),
1134 "".into(),
1135 );
1136 if let Some(prev_ix) = prev {
1137 self.tree[prev_ix].next = None;
1138 }
1139 if tos.ty == LinkStackTy::Image {
1140 self.tree[tos.node].item.body = ItemBody::Image(link_ix);
1141 } else {
1142 self.tree[tos.node].item.body = ItemBody::Link(link_ix);
1143 }
1144 self.tree[tos.node].child = Some(body_node);
1145 self.tree[tos.node].next = self.tree[next_ix].next;
1146 self.tree[tos.node].item.end = end_ix + 2;
1147 self.disable_all_links();
1148 return Some(tos.node);
1149 }
1150 }
1151
1152 None
1153 }
1154
1155 fn handle_emphasis_and_hard_break(&mut self) {
1156 let mut prev = None;
1157 let mut prev_ix: TreeIndex;
1158 let mut cur = self.tree.cur();
1159
1160 let mut single_quote_open: Option<TreeIndex> = None;
1161 let mut double_quote_open: bool = false;
1162
1163 while let Some(mut cur_ix) = cur {
1164 match self.tree[cur_ix].item.body {
1165 ItemBody::MaybeEmphasis(mut count, can_open, can_close) => {
1166 let run_length = count;
1167 let c = self.text.as_bytes()[self.tree[cur_ix].item.start];
1168 let both = can_open && can_close;
1169 if can_close {
1170 while let Some(el) =
1171 self.inline_stack
1172 .find_match(&mut self.tree, c, run_length, both)
1173 {
1174 if let Some(prev_ix) = prev {
1176 self.tree[prev_ix].next = None;
1177 }
1178 let match_count = min(count, el.count);
1179 let mut end = cur_ix - 1;
1181 let mut start = el.start + el.count;
1182
1183 while start > el.start + el.count - match_count {
1185 let inc = if start > el.start + el.count - match_count + 1 {
1186 2
1187 } else {
1188 1
1189 };
1190 let ty = if c == b'~' {
1191 if inc == 2 {
1192 if self.options.contains(Options::ENABLE_STRIKETHROUGH) {
1193 ItemBody::Strikethrough
1194 } else {
1195 ItemBody::Text {
1196 backslash_escaped: false,
1197 }
1198 }
1199 } else if self.options.contains(Options::ENABLE_SUBSCRIPT) {
1200 ItemBody::Subscript
1201 } else if self.options.contains(Options::ENABLE_STRIKETHROUGH) {
1202 ItemBody::Strikethrough
1203 } else {
1204 ItemBody::Text {
1205 backslash_escaped: false,
1206 }
1207 }
1208 } else if c == b'^' {
1209 if self.options.contains(Options::ENABLE_SUPERSCRIPT) {
1210 ItemBody::Superscript
1211 } else {
1212 ItemBody::Text {
1213 backslash_escaped: false,
1214 }
1215 }
1216 } else if inc == 2 {
1217 ItemBody::Strong
1218 } else {
1219 ItemBody::Emphasis
1220 };
1221
1222 let root = start - inc;
1223 end = end + inc;
1224 self.tree[root].item.body = ty;
1225 self.tree[root].item.end = self.tree[end].item.end;
1226 self.tree[root].child = Some(start);
1227 self.tree[root].next = None;
1228 start = root;
1229 }
1230
1231 prev_ix = el.start + el.count - match_count;
1233 prev = Some(prev_ix);
1234 cur = self.tree[cur_ix + match_count - 1].next;
1235 self.tree[prev_ix].next = cur;
1236
1237 if el.count > match_count {
1238 self.inline_stack.push(InlineEl {
1239 start: el.start,
1240 count: el.count - match_count,
1241 run_length: el.run_length,
1242 c: el.c,
1243 both: el.both,
1244 })
1245 }
1246 count -= match_count;
1247 if count > 0 {
1248 cur_ix = cur.unwrap();
1249 } else {
1250 break;
1251 }
1252 }
1253 }
1254 if count > 0 {
1255 if can_open {
1256 self.inline_stack.push(InlineEl {
1257 start: cur_ix,
1258 run_length,
1259 count,
1260 c,
1261 both,
1262 });
1263 } else {
1264 for i in 0..count {
1265 self.tree[cur_ix + i].item.body = ItemBody::Text {
1266 backslash_escaped: false,
1267 };
1268 }
1269 }
1270 prev_ix = cur_ix + count - 1;
1271 prev = Some(prev_ix);
1272 cur = self.tree[prev_ix].next;
1273 }
1274 }
1275 ItemBody::MaybeSmartQuote(c, can_open, can_close) => {
1276 self.tree[cur_ix].item.body = match c {
1277 b'\'' => {
1278 if let (Some(open_ix), true) = (single_quote_open, can_close) {
1279 self.tree[open_ix].item.body = ItemBody::SynthesizeChar('‘');
1280 single_quote_open = None;
1281 } else if can_open {
1282 single_quote_open = Some(cur_ix);
1283 }
1284 ItemBody::SynthesizeChar('’')
1285 }
1286 _ => {
1287 if can_close && double_quote_open {
1288 double_quote_open = false;
1289 ItemBody::SynthesizeChar('”')
1290 } else {
1291 if can_open && !double_quote_open {
1292 double_quote_open = true;
1293 }
1294 ItemBody::SynthesizeChar('“')
1295 }
1296 }
1297 };
1298 prev = cur;
1299 cur = self.tree[cur_ix].next;
1300 }
1301 ItemBody::HardBreak(true) => {
1302 if self.tree[cur_ix].next.is_none() {
1303 self.tree[cur_ix].item.body = ItemBody::SynthesizeChar('\\');
1304 }
1305 prev = cur;
1306 cur = self.tree[cur_ix].next;
1307 }
1308 _ => {
1309 prev = cur;
1310 cur = self.tree[cur_ix].next;
1311 }
1312 }
1313 }
1314 self.inline_stack.pop_all(&mut self.tree);
1315 }
1316
1317 fn disable_all_links(&mut self) {
1318 self.link_stack.disable_all_links();
1319 self.wikilink_stack.disable_all_links();
1320 }
1321
1322 fn scan_inline_link(
1324 &self,
1325 underlying: &'input str,
1326 mut ix: usize,
1327 node: Option<TreeIndex>,
1328 ) -> Option<(usize, CowStr<'input>, CowStr<'input>)> {
1329 if underlying.as_bytes().get(ix) != Some(&b'(') {
1330 return None;
1331 }
1332 ix += 1;
1333
1334 let scan_separator = |ix: &mut usize| {
1335 *ix += scan_while(&underlying.as_bytes()[*ix..], is_ascii_whitespace_no_nl);
1336 if let Some(bl) = scan_eol(&underlying.as_bytes()[*ix..]) {
1337 *ix += bl;
1338 *ix += skip_container_prefixes(
1339 &self.tree,
1340 &underlying.as_bytes()[*ix..],
1341 self.options,
1342 );
1343 }
1344 *ix += scan_while(&underlying.as_bytes()[*ix..], is_ascii_whitespace_no_nl);
1345 };
1346
1347 scan_separator(&mut ix);
1348
1349 let (dest_length, dest) = scan_link_dest(underlying, ix, LINK_MAX_NESTED_PARENS)?;
1350 let dest = unescape(dest, self.tree.is_in_table());
1351 ix += dest_length;
1352
1353 scan_separator(&mut ix);
1354
1355 let title = if let Some((bytes_scanned, t)) = self.scan_link_title(underlying, ix, node) {
1356 ix += bytes_scanned;
1357 scan_separator(&mut ix);
1358 t
1359 } else {
1360 "".into()
1361 };
1362 if underlying.as_bytes().get(ix) != Some(&b')') {
1363 return None;
1364 }
1365 ix += 1;
1366
1367 Some((ix, dest, title))
1368 }
1369
1370 fn scan_link_title(
1372 &self,
1373 text: &'input str,
1374 start_ix: usize,
1375 node: Option<TreeIndex>,
1376 ) -> Option<(usize, CowStr<'input>)> {
1377 let bytes = text.as_bytes();
1378 let open = match bytes.get(start_ix) {
1379 Some(b @ b'\'') | Some(b @ b'\"') | Some(b @ b'(') => *b,
1380 _ => return None,
1381 };
1382 let close = if open == b'(' { b')' } else { open };
1383
1384 let mut title = String::new();
1385 let mut mark = start_ix + 1;
1386 let mut i = start_ix + 1;
1387
1388 while i < bytes.len() {
1389 let c = bytes[i];
1390
1391 if c == close {
1392 let cow = if title.is_empty() {
1393 (i - start_ix + 1, text[mark..i].into())
1394 } else {
1395 title.push_str(&text[mark..i]);
1396 (i - start_ix + 1, title.into())
1397 };
1398
1399 return Some(cow);
1400 }
1401 if c == open {
1402 return None;
1403 }
1404
1405 if c == b'\n' || c == b'\r' {
1406 if let Some(node_ix) = scan_nodes_to_ix(&self.tree, node, i + 1) {
1407 if self.tree[node_ix].item.start > i {
1408 title.push_str(&text[mark..i]);
1409 title.push('\n');
1410 i = self.tree[node_ix].item.start;
1411 mark = i;
1412 continue;
1413 }
1414 }
1415 }
1416 if c == b'&' {
1417 if let (n, Some(value)) = scan_entity(&bytes[i..]) {
1418 title.push_str(&text[mark..i]);
1419 title.push_str(&value);
1420 i += n;
1421 mark = i;
1422 continue;
1423 }
1424 }
1425 if self.tree.is_in_table()
1426 && c == b'\\'
1427 && i + 2 < bytes.len()
1428 && bytes[i + 1] == b'\\'
1429 && bytes[i + 2] == b'|'
1430 {
1431 title.push_str(&text[mark..i]);
1434 i += 2;
1435 mark = i;
1436 }
1437 if c == b'\\' && i + 1 < bytes.len() && is_ascii_punctuation(bytes[i + 1]) {
1438 title.push_str(&text[mark..i]);
1439 i += 1;
1440 mark = i;
1441 }
1442
1443 i += 1;
1444 }
1445
1446 None
1447 }
1448
1449 fn make_math_span(&mut self, open: TreeIndex, close: TreeIndex) {
1450 let mut open_end = open;
1452 {
1453 let mut peek = self.tree[open].next;
1454 while let Some(peek_ix) = peek {
1455 if matches!(self.tree[peek_ix].item.body, ItemBody::MaybeMath(..))
1456 && self.tree[peek_ix].item.start == self.tree[open_end].item.end
1457 && peek_ix != close
1458 {
1459 open_end = peek_ix;
1460 peek = self.tree[peek_ix].next;
1461 } else {
1462 break;
1463 }
1464 }
1465 }
1466 let mut close_end = close;
1468 {
1469 let mut peek = self.tree[close].next;
1470 while let Some(peek_ix) = peek {
1471 if matches!(self.tree[peek_ix].item.body, ItemBody::MaybeMath(..))
1472 && self.tree[peek_ix].item.start == self.tree[close_end].item.end
1473 {
1474 close_end = peek_ix;
1475 peek = self.tree[peek_ix].next;
1476 } else {
1477 break;
1478 }
1479 }
1480 }
1481
1482 let span_start = self.tree[open_end].item.end;
1483 let span_end = self.tree[close].item.start;
1484
1485 if span_start > span_end {
1486 self.tree[open].item.body = ItemBody::Text {
1487 backslash_escaped: false,
1488 };
1489 return;
1490 }
1491
1492 let spanned_text = &self.text[span_start..span_end];
1493 let spanned_bytes = spanned_text.as_bytes();
1494 let mut buf: Option<String> = None;
1495
1496 let mut start_ix = 0;
1497 let mut ix = 0;
1498 while ix < spanned_bytes.len() {
1499 let c = spanned_bytes[ix];
1500 if c == b'\r' || c == b'\n' {
1501 ix += 1;
1502 let buf = buf.get_or_insert_with(|| String::with_capacity(spanned_bytes.len()));
1503 buf.push_str(&spanned_text[start_ix..ix]);
1504 ix += skip_container_prefixes(&self.tree, &spanned_bytes[ix..], self.options);
1505 start_ix = ix;
1506 } else if c == b'\\'
1507 && spanned_bytes.get(ix + 1) == Some(&b'|')
1508 && self.tree.is_in_table()
1509 {
1510 let buf = buf.get_or_insert_with(|| String::with_capacity(spanned_bytes.len()));
1511 buf.push_str(&spanned_text[start_ix..ix]);
1512 buf.push('|');
1513 ix += 2;
1514 start_ix = ix;
1515 } else {
1516 ix += 1;
1517 }
1518 }
1519
1520 let (opening, closing, all_spaces) = {
1521 let s = if let Some(buf) = &mut buf {
1522 buf.push_str(&spanned_text[start_ix..]);
1523 &buf[..]
1524 } else {
1525 spanned_text
1526 };
1527 (
1528 matches!(s.as_bytes().first(), Some(b' ' | b'\n')),
1529 matches!(s.as_bytes().last(), Some(b' ' | b'\n')),
1530 s.bytes().all(|b| b == b' ' || b == b'\n'),
1531 )
1532 };
1533
1534 let cow: CowStr<'input> = if !all_spaces && opening && closing {
1535 if let Some(mut buf) = buf {
1536 if !buf.is_empty() {
1537 buf.remove(0);
1538 buf.pop();
1539 }
1540 buf.into()
1541 } else {
1542 spanned_text[1..(spanned_text.len() - 1).max(1)].into()
1543 }
1544 } else if let Some(buf) = buf {
1545 buf.into()
1546 } else {
1547 spanned_text.into()
1548 };
1549
1550 self.tree[open].item.body = ItemBody::Math(self.allocs.allocate_cow(cow), false);
1551 self.tree[open].item.end = self.tree[close_end].item.end;
1552 self.tree[open].next = self.tree[close_end].next;
1553 }
1554
1555 fn make_code_span(&mut self, open: TreeIndex, close: TreeIndex, preceding_backslash: bool) {
1559 let span_start = self.tree[open].item.end;
1560 let span_end = self.tree[close].item.start;
1561 let mut buf: Option<String> = None;
1562
1563 let spanned_text = &self.text[span_start..span_end];
1564 let spanned_bytes = spanned_text.as_bytes();
1565 let mut start_ix = 0;
1566 let mut ix = 0;
1567 while ix < spanned_bytes.len() {
1568 let c = spanned_bytes[ix];
1569 if c == b'\r' || c == b'\n' {
1570 let buf = buf.get_or_insert_with(|| String::with_capacity(spanned_bytes.len()));
1571 buf.push_str(&spanned_text[start_ix..ix]);
1572 buf.push('\n');
1573 ix += 1;
1574 if c == b'\r' && spanned_bytes.get(ix) == Some(&b'\n') {
1575 ix += 1;
1576 }
1577 ix += skip_container_prefixes(&self.tree, &spanned_bytes[ix..], self.options);
1578 start_ix = ix;
1579 } else if c == b'\\'
1580 && spanned_bytes.get(ix + 1) == Some(&b'|')
1581 && self.tree.is_in_table()
1582 {
1583 let buf = buf.get_or_insert_with(|| String::with_capacity(spanned_bytes.len()));
1584 buf.push_str(&spanned_text[start_ix..ix]);
1585 buf.push('|');
1586 ix += 2;
1587 start_ix = ix;
1588 } else {
1589 ix += 1;
1590 }
1591 }
1592
1593 let (opening, closing, all_spaces) = {
1594 let s = if let Some(buf) = &mut buf {
1595 buf.push_str(&spanned_text[start_ix..]);
1596 &buf[..]
1597 } else {
1598 spanned_text
1599 };
1600 (
1601 matches!(s.as_bytes().first(), Some(b' ' | b'\n')),
1602 matches!(s.as_bytes().last(), Some(b' ' | b'\n')),
1603 s.bytes().all(|b| b == b' ' || b == b'\n'),
1604 )
1605 };
1606
1607 let cow: CowStr<'input> = if !all_spaces && opening && closing {
1608 if let Some(mut buf) = buf {
1609 if !buf.is_empty() {
1610 buf.remove(0);
1611 buf.pop();
1612 }
1613 buf.into()
1614 } else {
1615 spanned_text[1..(spanned_text.len() - 1).max(1)].into()
1616 }
1617 } else if let Some(buf) = buf {
1618 buf.into()
1619 } else {
1620 spanned_text.into()
1621 };
1622
1623 if preceding_backslash {
1624 self.tree[open].item.body = ItemBody::Text {
1625 backslash_escaped: true,
1626 };
1627 self.tree[open].item.end = self.tree[open].item.start + 1;
1628 self.tree[open].next = Some(close);
1629 self.tree[close].item.body = ItemBody::Code(self.allocs.allocate_cow(cow));
1630 self.tree[close].item.start = self.tree[open].item.start + 1;
1631 } else {
1632 self.tree[open].item.body = ItemBody::Code(self.allocs.allocate_cow(cow));
1633 self.tree[open].item.end = self.tree[close].item.end;
1634 self.tree[open].next = self.tree[close].next;
1635 }
1636
1637 if !self.mdx_errors.is_empty() {
1640 self.mdx_errors
1641 .retain(|(offset, _)| *offset < span_start || *offset >= span_end);
1642 }
1643 }
1644
1645 fn scan_inline_html(&mut self, bytes: &[u8], ix: usize) -> Option<(Vec<u8>, usize)> {
1649 let c = *bytes.get(ix)?;
1650 if c == b'!' {
1651 Some((
1652 vec![],
1653 scan_inline_html_comment(bytes, ix + 1, &mut self.html_scan_guard)?,
1654 ))
1655 } else if c == b'?' {
1656 Some((
1657 vec![],
1658 scan_inline_html_processing(bytes, ix + 1, &mut self.html_scan_guard)?,
1659 ))
1660 } else {
1661 let (span, i) = scan_html_block_inner(
1662 &bytes[(ix - 1)..],
1664 Some(&|bytes| skip_container_prefixes(&self.tree, bytes, self.options)),
1665 )?;
1666 Some((span, i + ix - 1))
1667 }
1668 }
1669}
1670
1671pub(crate) fn scan_containers(
1673 tree: &Tree<Item>,
1674 line_start: &mut LineStart<'_>,
1675 options: Options,
1676) -> usize {
1677 let mut i = 0;
1678 for &node_ix in tree.walk_spine() {
1679 match tree[node_ix].item.body {
1680 ItemBody::BlockQuote(..) => {
1681 let save = line_start.clone();
1682 let _ = line_start.scan_space(3);
1683 if !line_start.scan_blockquote_marker() {
1684 *line_start = save;
1685 break;
1686 }
1687 }
1688 ItemBody::ListItem(indent, _) => {
1689 let save = line_start.clone();
1690 if !line_start.scan_space(indent) && !line_start.is_at_eol() {
1691 *line_start = save;
1692 break;
1693 }
1694 }
1695 ItemBody::DefinitionListDefinition(indent) => {
1696 let save = line_start.clone();
1697 if !line_start.scan_space(indent) && !line_start.is_at_eol() {
1698 *line_start = save;
1699 break;
1700 }
1701 }
1702 ItemBody::FootnoteDefinition(..) if options.contains(Options::ENABLE_FOOTNOTES) => {
1703 let save = line_start.clone();
1704 if !line_start.scan_space(4) && !line_start.is_at_eol() {
1705 *line_start = save;
1706 break;
1707 }
1708 }
1709 _ => (),
1710 }
1711 i += 1;
1712 }
1713 i
1714}
1715
1716pub(crate) fn skip_container_prefixes(tree: &Tree<Item>, bytes: &[u8], options: Options) -> usize {
1717 let mut line_start = LineStart::new(bytes);
1718 let _ = scan_containers(tree, &mut line_start, options);
1719 line_start.bytes_scanned()
1720}
1721
1722impl Tree<Item> {
1723 pub(crate) fn append_text(&mut self, start: usize, end: usize, backslash_escaped: bool) {
1724 if end > start {
1725 if let Some(ix) = self.cur() {
1726 if matches!(self[ix].item.body, ItemBody::Text { .. }) && self[ix].item.end == start
1727 {
1728 self[ix].item.end = end;
1729 return;
1730 }
1731 }
1732 self.append(Item {
1733 start,
1734 end,
1735 body: ItemBody::Text { backslash_escaped },
1736 });
1737 }
1738 }
1739 pub(crate) fn is_in_table(&self) -> bool {
1746 fn might_be_in_table(item: &Item) -> bool {
1747 item.body.is_inline()
1748 || matches!(item.body, |ItemBody::TableHead| ItemBody::TableRow
1749 | ItemBody::TableCell)
1750 }
1751 for &ix in self.walk_spine().rev() {
1752 if matches!(self[ix].item.body, ItemBody::Table(_)) {
1753 return true;
1754 }
1755 if !might_be_in_table(&self[ix].item) {
1756 return false;
1757 }
1758 }
1759 false
1760 }
1761}
1762
1763#[derive(Copy, Clone, Debug)]
1764struct InlineEl {
1765 start: TreeIndex,
1767 count: usize,
1769 run_length: usize,
1771 c: u8,
1773 both: bool,
1775}
1776
1777#[derive(Debug, Clone, Default)]
1778struct InlineStack {
1779 stack: Vec<InlineEl>,
1780 lower_bounds: [usize; 10],
1785}
1786
1787impl InlineStack {
1788 const UNDERSCORE_NOT_BOTH: usize = 0;
1792 const ASTERISK_NOT_BOTH: usize = 1;
1793 const ASTERISK_BASE: usize = 2;
1794 const TILDES: usize = 5;
1795 const UNDERSCORE_BASE: usize = 6;
1796 const CIRCUMFLEXES: usize = 9;
1797
1798 fn pop_all(&mut self, tree: &mut Tree<Item>) {
1799 for el in self.stack.drain(..) {
1800 for i in 0..el.count {
1801 tree[el.start + i].item.body = ItemBody::Text {
1802 backslash_escaped: false,
1803 };
1804 }
1805 }
1806 self.lower_bounds = [0; 10];
1807 }
1808
1809 fn get_lowerbound(&self, c: u8, count: usize, both: bool) -> usize {
1810 if c == b'_' {
1811 let mod3_lower = self.lower_bounds[InlineStack::UNDERSCORE_BASE + count % 3];
1812 if both {
1813 mod3_lower
1814 } else {
1815 min(
1816 mod3_lower,
1817 self.lower_bounds[InlineStack::UNDERSCORE_NOT_BOTH],
1818 )
1819 }
1820 } else if c == b'*' {
1821 let mod3_lower = self.lower_bounds[InlineStack::ASTERISK_BASE + count % 3];
1822 if both {
1823 mod3_lower
1824 } else {
1825 min(
1826 mod3_lower,
1827 self.lower_bounds[InlineStack::ASTERISK_NOT_BOTH],
1828 )
1829 }
1830 } else if c == b'^' {
1831 self.lower_bounds[InlineStack::CIRCUMFLEXES]
1832 } else {
1833 self.lower_bounds[InlineStack::TILDES]
1834 }
1835 }
1836
1837 fn set_lowerbound(&mut self, c: u8, count: usize, both: bool, new_bound: usize) {
1838 if c == b'_' {
1839 if both {
1840 self.lower_bounds[InlineStack::UNDERSCORE_BASE + count % 3] = new_bound;
1841 } else {
1842 self.lower_bounds[InlineStack::UNDERSCORE_NOT_BOTH] = new_bound;
1843 }
1844 } else if c == b'*' {
1845 self.lower_bounds[InlineStack::ASTERISK_BASE + count % 3] = new_bound;
1846 if !both {
1847 self.lower_bounds[InlineStack::ASTERISK_NOT_BOTH] = new_bound;
1848 }
1849 } else if c == b'^' {
1850 self.lower_bounds[InlineStack::CIRCUMFLEXES] = new_bound;
1851 } else {
1852 self.lower_bounds[InlineStack::TILDES] = new_bound;
1853 }
1854 }
1855
1856 fn truncate(&mut self, new_bound: usize) {
1857 self.stack.truncate(new_bound);
1858 for lower_bound in &mut self.lower_bounds {
1859 if *lower_bound > new_bound {
1860 *lower_bound = new_bound;
1861 }
1862 }
1863 }
1864
1865 fn find_match(
1866 &mut self,
1867 tree: &mut Tree<Item>,
1868 c: u8,
1869 run_length: usize,
1870 both: bool,
1871 ) -> Option<InlineEl> {
1872 let lowerbound = min(self.stack.len(), self.get_lowerbound(c, run_length, both));
1873 let res = self.stack[lowerbound..]
1874 .iter()
1875 .cloned()
1876 .enumerate()
1877 .rfind(|(_, el)| {
1878 if (c == b'~' || c == b'^') && run_length != el.run_length {
1879 return false;
1880 }
1881 el.c == c
1882 && (!both && !el.both
1883 || !(run_length + el.run_length).is_multiple_of(3)
1884 || run_length.is_multiple_of(3))
1885 });
1886
1887 if let Some((matching_ix, matching_el)) = res {
1888 let matching_ix = matching_ix + lowerbound;
1889 for el in &self.stack[(matching_ix + 1)..] {
1890 for i in 0..el.count {
1891 tree[el.start + i].item.body = ItemBody::Text {
1892 backslash_escaped: false,
1893 };
1894 }
1895 }
1896 self.truncate(matching_ix);
1897 Some(matching_el)
1898 } else {
1899 if c != b'~' && c != b'^' {
1906 self.set_lowerbound(c, run_length, both, self.stack.len());
1907 }
1908 None
1909 }
1910 }
1911
1912 fn trim_lower_bound(&mut self, ix: usize) {
1913 self.lower_bounds[ix] = self.lower_bounds[ix].min(self.stack.len());
1914 }
1915
1916 fn push(&mut self, el: InlineEl) {
1917 if el.c == b'~' {
1918 self.trim_lower_bound(InlineStack::TILDES);
1919 } else if el.c == b'^' {
1920 self.trim_lower_bound(InlineStack::CIRCUMFLEXES);
1921 }
1922 self.stack.push(el)
1923 }
1924}
1925
1926#[derive(Debug, Clone)]
1927enum RefScan<'a> {
1928 LinkLabel(CowStr<'a>, usize),
1930 Collapsed(Option<TreeIndex>),
1932 UnexpectedFootnote,
1933 Failed,
1934}
1935
1936fn scan_nodes_to_ix(
1939 tree: &Tree<Item>,
1940 mut node: Option<TreeIndex>,
1941 ix: usize,
1942) -> Option<TreeIndex> {
1943 while let Some(node_ix) = node {
1944 if tree[node_ix].item.end <= ix {
1945 node = tree[node_ix].next;
1946 } else {
1947 break;
1948 }
1949 }
1950 node
1951}
1952
1953fn scan_link_label<'text>(
1956 tree: &Tree<Item>,
1957 text: &'text str,
1958 options: Options,
1959) -> Option<(usize, ReferenceLabel<'text>)> {
1960 let bytes = text.as_bytes();
1961 if bytes.len() < 2 || bytes[0] != b'[' {
1962 return None;
1963 }
1964 let linebreak_handler = |bytes: &[u8]| Some(skip_container_prefixes(tree, bytes, options));
1965 if options.contains(Options::ENABLE_FOOTNOTES)
1966 && b'^' == bytes[1]
1967 && bytes.get(2) != Some(&b']')
1968 {
1969 let linebreak_handler: &dyn Fn(&[u8]) -> Option<usize> = &|_| None;
1971 if let Some((byte_index, cow)) =
1972 scan_link_label_rest(&text[2..], linebreak_handler, tree.is_in_table())
1973 {
1974 return Some((byte_index + 2, ReferenceLabel::Footnote(cow)));
1975 }
1976 }
1977 let (byte_index, cow) =
1978 scan_link_label_rest(&text[1..], &linebreak_handler, tree.is_in_table())?;
1979 Some((byte_index + 1, ReferenceLabel::Link(cow)))
1980}
1981
1982fn scan_reference<'b>(
1983 tree: &Tree<Item>,
1984 text: &'b str,
1985 cur: Option<TreeIndex>,
1986 options: Options,
1987) -> RefScan<'b> {
1988 let cur_ix = match cur {
1989 None => return RefScan::Failed,
1990 Some(cur_ix) => cur_ix,
1991 };
1992 let start = tree[cur_ix].item.start;
1993 let tail = &text.as_bytes()[start..];
1994
1995 if tail.starts_with(b"[]") {
1996 let Some(closing_node) = tree[cur_ix].next else {
2001 return RefScan::Failed;
2002 };
2003 RefScan::Collapsed(tree[closing_node].next)
2004 } else {
2005 let label = scan_link_label(tree, &text[start..], options);
2006 match label {
2007 Some((ix, ReferenceLabel::Link(label))) => RefScan::LinkLabel(label, start + ix),
2008 Some((_ix, ReferenceLabel::Footnote(_label))) => RefScan::UnexpectedFootnote,
2009 None => RefScan::Failed,
2010 }
2011 }
2012}
2013
2014#[derive(Clone, Default)]
2015struct LinkStack {
2016 inner: Vec<LinkStackEl>,
2017 disabled_ix: usize,
2018}
2019
2020impl LinkStack {
2021 fn push(&mut self, el: LinkStackEl) {
2022 self.inner.push(el);
2023 }
2024
2025 fn pop(&mut self) -> Option<LinkStackEl> {
2026 let el = self.inner.pop();
2027 self.disabled_ix = core::cmp::min(self.disabled_ix, self.inner.len());
2028 el
2029 }
2030
2031 fn clear(&mut self) {
2032 self.inner.clear();
2033 self.disabled_ix = 0;
2034 }
2035
2036 fn disable_all_links(&mut self) {
2037 for el in &mut self.inner[self.disabled_ix..] {
2038 if el.ty == LinkStackTy::Link {
2039 el.ty = LinkStackTy::Disabled;
2040 }
2041 }
2042 self.disabled_ix = self.inner.len();
2043 }
2044}
2045
2046#[derive(Clone, Debug)]
2047struct LinkStackEl {
2048 node: TreeIndex,
2049 ty: LinkStackTy,
2050}
2051
2052#[derive(PartialEq, Clone, Debug)]
2053enum LinkStackTy {
2054 Link,
2055 Image,
2056 Disabled,
2057}
2058
2059#[derive(Clone, Debug)]
2061pub struct LinkDef<'a> {
2062 pub dest: CowStr<'a>,
2063 pub title: Option<CowStr<'a>>,
2064 pub span: Range<usize>,
2065}
2066
2067impl<'a> LinkDef<'a> {
2068 pub fn into_static(self) -> LinkDef<'static> {
2069 LinkDef {
2070 dest: self.dest.into_static(),
2071 title: self.title.map(|s| s.into_static()),
2072 span: self.span,
2073 }
2074 }
2075}
2076
2077#[derive(Clone, Debug)]
2079pub struct FootnoteDef {
2080 pub use_count: usize,
2081}
2082
2083struct CodeDelims {
2086 inner: FxHashMap<usize, VecDeque<TreeIndex>>,
2087 seen_first: bool,
2088}
2089
2090impl CodeDelims {
2091 fn new() -> Self {
2092 Self {
2093 inner: Default::default(),
2094 seen_first: false,
2095 }
2096 }
2097
2098 fn insert(&mut self, count: usize, ix: TreeIndex) {
2099 if self.seen_first {
2100 self.inner.entry(count).or_default().push_back(ix);
2101 } else {
2102 self.seen_first = true;
2105 }
2106 }
2107
2108 fn is_populated(&self) -> bool {
2109 !self.inner.is_empty()
2110 }
2111
2112 fn find(&mut self, open_ix: TreeIndex, count: usize) -> Option<TreeIndex> {
2113 while let Some(ix) = self.inner.get_mut(&count)?.pop_front() {
2114 if ix > open_ix {
2115 return Some(ix);
2116 }
2117 }
2118 None
2119 }
2120
2121 fn clear(&mut self) {
2122 self.inner.clear();
2123 self.seen_first = false;
2124 }
2125}
2126
2127struct MathDelims {
2130 inner: FxHashMap<u8, VecDeque<(TreeIndex, bool, bool)>>,
2131}
2132
2133impl MathDelims {
2134 fn new() -> Self {
2135 Self {
2136 inner: Default::default(),
2137 }
2138 }
2139
2140 fn clear(&mut self) {
2141 self.inner.clear();
2142 }
2143}
2144
2145#[derive(Copy, Clone, PartialEq, Eq, Debug)]
2146pub(crate) struct LinkIndex(usize);
2147
2148#[derive(Copy, Clone, PartialEq, Eq, Debug)]
2149pub(crate) struct CowIndex(usize);
2150
2151#[derive(Copy, Clone, PartialEq, Eq, Debug)]
2152pub(crate) struct AlignmentIndex(usize);
2153
2154#[derive(Copy, Clone, PartialEq, Eq, Debug)]
2155pub(crate) struct HeadingIndex(NonZeroUsize);
2156
2157#[derive(Copy, Clone, PartialEq, Eq, Debug)]
2158pub(crate) struct JsxElementIndex(usize);
2159
2160#[derive(Copy, Clone, PartialEq, Eq, Debug)]
2161pub(crate) struct DirectiveIndex(usize);
2162
2163#[derive(Debug, Clone)]
2165pub(crate) enum JsxAttr<'a> {
2166 Boolean(CowStr<'a>),
2167 Literal(CowStr<'a>, CowStr<'a>),
2168 Expression(CowStr<'a>, CowStr<'a>),
2169 Spread(CowStr<'a>),
2170}
2171
2172impl<'a> JsxAttr<'a> {
2173 pub fn into_static(self) -> JsxAttr<'static> {
2174 match self {
2175 JsxAttr::Boolean(n) => JsxAttr::Boolean(n.into_static()),
2176 JsxAttr::Literal(n, v) => JsxAttr::Literal(n.into_static(), v.into_static()),
2177 JsxAttr::Expression(n, v) => JsxAttr::Expression(n.into_static(), v.into_static()),
2178 JsxAttr::Spread(v) => JsxAttr::Spread(v.into_static()),
2179 }
2180 }
2181}
2182
2183#[derive(Debug, Clone)]
2185pub(crate) struct JsxElementData<'a> {
2186 pub name: CowStr<'a>,
2187 pub attrs: Vec<JsxAttr<'a>>,
2188 pub raw: CowStr<'a>,
2189 pub is_closing: bool,
2190 pub is_self_closing: bool,
2191}
2192
2193impl<'a> JsxElementData<'a> {
2194 pub fn into_static(self) -> JsxElementData<'static> {
2195 JsxElementData {
2196 name: self.name.into_static(),
2197 attrs: self.attrs.into_iter().map(|a| a.into_static()).collect(),
2198 raw: self.raw.into_static(),
2199 is_closing: self.is_closing,
2200 is_self_closing: self.is_self_closing,
2201 }
2202 }
2203}
2204
2205#[derive(Debug, Clone)]
2206pub(crate) struct DirectiveAttrData<'a> {
2207 pub name: CowStr<'a>,
2208 pub attributes: Vec<(CowStr<'a>, CowStr<'a>)>,
2209 pub label_start: usize,
2210 pub label_end: usize,
2211}
2212
2213#[derive(Clone)]
2214pub(crate) struct Allocations<'a> {
2215 pub refdefs: RefDefs<'a>,
2216 pub footdefs: FootnoteDefs<'a>,
2217 links: Vec<(LinkType, CowStr<'a>, CowStr<'a>, CowStr<'a>)>,
2218 cows: Vec<CowStr<'a>>,
2219 alignments: Vec<Vec<Alignment>>,
2220 headings: Vec<HeadingAttributes<'a>>,
2221 jsx_elements: Vec<JsxElementData<'a>>,
2222 directives: Vec<DirectiveAttrData<'a>>,
2223}
2224
2225#[derive(Clone)]
2227pub(crate) struct HeadingAttributes<'a> {
2228 pub id: Option<CowStr<'a>>,
2229 pub classes: Vec<CowStr<'a>>,
2230 pub attrs: Vec<(CowStr<'a>, Option<CowStr<'a>>)>,
2231}
2232
2233#[derive(Clone, Default, Debug)]
2235pub struct RefDefs<'input>(pub(crate) FxHashMap<LinkLabel<'input>, LinkDef<'input>>);
2236
2237#[derive(Clone, Default, Debug)]
2239pub struct FootnoteDefs<'input>(pub(crate) FxHashMap<FootnoteLabel<'input>, FootnoteDef>);
2240
2241impl<'input, 'b, 's> RefDefs<'input>
2242where
2243 's: 'b,
2244{
2245 pub fn get(&'s self, key: &'b str) -> Option<&'b LinkDef<'input>> {
2247 self.0.get(&UniCase::new(key.into()))
2248 }
2249
2250 pub fn iter(&'s self) -> impl Iterator<Item = (&'s str, &'s LinkDef<'input>)> {
2252 self.0.iter().map(|(k, v)| (k.as_ref(), v))
2253 }
2254}
2255
2256impl<'input, 'b, 's> FootnoteDefs<'input>
2257where
2258 's: 'b,
2259{
2260 pub fn contains(&'s self, key: &'b str) -> bool {
2262 self.0.contains_key(&UniCase::new(key.into()))
2263 }
2264 pub fn get_mut(&'s mut self, key: CowStr<'input>) -> Option<&'s mut FootnoteDef> {
2266 self.0.get_mut(&UniCase::new(key))
2267 }
2268}
2269
2270impl<'a> Allocations<'a> {
2271 pub fn new() -> Self {
2272 Self {
2273 refdefs: RefDefs::default(),
2274 footdefs: FootnoteDefs::default(),
2275 links: Vec::with_capacity(128),
2276 cows: Vec::new(),
2277 alignments: Vec::new(),
2278 headings: Vec::new(),
2279 jsx_elements: Vec::new(),
2280 directives: Vec::new(),
2281 }
2282 }
2283
2284 pub fn allocate_cow(&mut self, cow: CowStr<'a>) -> CowIndex {
2285 let ix = self.cows.len();
2286 self.cows.push(cow);
2287 CowIndex(ix)
2288 }
2289
2290 pub fn allocate_link(
2291 &mut self,
2292 ty: LinkType,
2293 url: CowStr<'a>,
2294 title: CowStr<'a>,
2295 id: CowStr<'a>,
2296 ) -> LinkIndex {
2297 let ix = self.links.len();
2298 self.links.push((ty, url, title, id));
2299 LinkIndex(ix)
2300 }
2301
2302 pub fn allocate_alignment(&mut self, alignment: Vec<Alignment>) -> AlignmentIndex {
2303 let ix = self.alignments.len();
2304 self.alignments.push(alignment);
2305 AlignmentIndex(ix)
2306 }
2307
2308 pub fn allocate_heading(&mut self, attrs: HeadingAttributes<'a>) -> HeadingIndex {
2309 let ix = self.headings.len();
2310 self.headings.push(attrs);
2311 let ix_nonzero = NonZeroUsize::new(ix.wrapping_add(1)).expect("too many headings");
2314 HeadingIndex(ix_nonzero)
2315 }
2316
2317 pub fn take_cow(&mut self, ix: CowIndex) -> CowStr<'a> {
2318 core::mem::replace(&mut self.cows[ix.0], "".into())
2319 }
2320
2321 pub fn take_link(&mut self, ix: LinkIndex) -> (LinkType, CowStr<'a>, CowStr<'a>, CowStr<'a>) {
2322 let default_link = (LinkType::ShortcutUnknown, "".into(), "".into(), "".into());
2323 core::mem::replace(&mut self.links[ix.0], default_link)
2324 }
2325
2326 pub fn take_alignment(&mut self, ix: AlignmentIndex) -> Vec<Alignment> {
2327 core::mem::take(&mut self.alignments[ix.0])
2328 }
2329
2330 pub fn allocate_jsx_element(&mut self, data: JsxElementData<'a>) -> JsxElementIndex {
2331 let ix = self.jsx_elements.len();
2332 self.jsx_elements.push(data);
2333 JsxElementIndex(ix)
2334 }
2335
2336 pub fn allocate_directive(&mut self, data: DirectiveAttrData<'a>) -> DirectiveIndex {
2337 let ix = self.directives.len();
2338 self.directives.push(data);
2339 DirectiveIndex(ix)
2340 }
2341
2342 pub fn take_directive(&mut self, ix: DirectiveIndex) -> DirectiveAttrData<'a> {
2343 core::mem::replace(
2344 &mut self.directives[ix.0],
2345 DirectiveAttrData {
2346 name: "".into(),
2347 attributes: Vec::new(),
2348 label_start: 0,
2349 label_end: 0,
2350 },
2351 )
2352 }
2353
2354 pub fn directive_ref(&self, ix: DirectiveIndex) -> &DirectiveAttrData<'a> {
2355 &self.directives[ix.0]
2356 }
2357
2358 pub fn take_jsx_element(&mut self, ix: JsxElementIndex) -> JsxElementData<'a> {
2359 core::mem::replace(
2360 &mut self.jsx_elements[ix.0],
2361 JsxElementData {
2362 name: "".into(),
2363 attrs: Vec::new(),
2364 raw: "".into(),
2365 is_closing: false,
2366 is_self_closing: false,
2367 },
2368 )
2369 }
2370}
2371
2372impl<'a> Index<CowIndex> for Allocations<'a> {
2373 type Output = CowStr<'a>;
2374
2375 fn index(&self, ix: CowIndex) -> &Self::Output {
2376 self.cows.index(ix.0)
2377 }
2378}
2379
2380impl<'a> Index<LinkIndex> for Allocations<'a> {
2381 type Output = (LinkType, CowStr<'a>, CowStr<'a>, CowStr<'a>);
2382
2383 fn index(&self, ix: LinkIndex) -> &Self::Output {
2384 self.links.index(ix.0)
2385 }
2386}
2387
2388impl<'a> Index<AlignmentIndex> for Allocations<'a> {
2389 type Output = Vec<Alignment>;
2390
2391 fn index(&self, ix: AlignmentIndex) -> &Self::Output {
2392 self.alignments.index(ix.0)
2393 }
2394}
2395
2396impl<'a> Index<HeadingIndex> for Allocations<'a> {
2397 type Output = HeadingAttributes<'a>;
2398
2399 fn index(&self, ix: HeadingIndex) -> &Self::Output {
2400 self.headings.index(ix.0.get() - 1)
2401 }
2402}
2403
2404#[derive(Clone, Default)]
2410pub(crate) struct HtmlScanGuard {
2411 pub cdata: usize,
2412 pub processing: usize,
2413 pub declaration: usize,
2414 pub comment: usize,
2415}
2416
2417pub trait ParserCallbacks<'input> {
2421 fn handle_broken_link(
2429 &mut self,
2430 #[allow(unused_variables)] link: BrokenLink<'input>,
2431 ) -> Option<(CowStr<'input>, CowStr<'input>)> {
2432 None
2433 }
2434}
2435
2436#[allow(missing_debug_implementations)]
2440pub struct BrokenLinkCallback<F>(Option<F>);
2441
2442impl<'input, F> ParserCallbacks<'input> for BrokenLinkCallback<F>
2443where
2444 F: FnMut(BrokenLink<'input>) -> Option<(CowStr<'input>, CowStr<'input>)>,
2445{
2446 fn handle_broken_link(
2447 &mut self,
2448 link: BrokenLink<'input>,
2449 ) -> Option<(CowStr<'input>, CowStr<'input>)> {
2450 self.0.as_mut().and_then(|cb| cb(link))
2451 }
2452}
2453
2454impl<'input> ParserCallbacks<'input> for Box<dyn ParserCallbacks<'input>> {
2455 fn handle_broken_link(
2456 &mut self,
2457 link: BrokenLink<'input>,
2458 ) -> Option<(CowStr<'input>, CowStr<'input>)> {
2459 (**self).handle_broken_link(link)
2460 }
2461}
2462
2463#[allow(missing_debug_implementations)]
2467pub struct DefaultParserCallbacks;
2468
2469impl<'input> ParserCallbacks<'input> for DefaultParserCallbacks {}
2470
2471#[derive(Debug)]
2479pub struct OffsetIter<'a, CB> {
2480 parser: Parser<'a, CB>,
2481}
2482
2483impl<'a, CB: ParserCallbacks<'a>> OffsetIter<'a, CB> {
2484 pub fn reference_definitions(&self) -> &RefDefs<'_> {
2486 self.parser.reference_definitions()
2487 }
2488
2489 pub fn mdx_errors(&self) -> &[(usize, String)] {
2491 self.parser.mdx_errors()
2492 }
2493}
2494
2495impl<'a, CB: ParserCallbacks<'a>> Iterator for OffsetIter<'a, CB> {
2496 type Item = (Event<'a>, Range<usize>);
2497
2498 fn next(&mut self) -> Option<Self::Item> {
2499 self.parser
2500 .inner
2501 .next_event_range(&mut self.parser.callbacks)
2502 }
2503}
2504
2505impl<'a, CB: ParserCallbacks<'a>> Iterator for Parser<'a, CB> {
2506 type Item = Event<'a>;
2507
2508 fn next(&mut self) -> Option<Event<'a>> {
2509 self.inner
2510 .next_event_range(&mut self.callbacks)
2511 .map(|(event, _range)| event)
2512 }
2513}
2514
2515impl<'a, CB: ParserCallbacks<'a>> FusedIterator for Parser<'a, CB> {}
2516
2517impl<'input> ParserInner<'input> {
2518 fn next_event_range(
2519 &mut self,
2520 callbacks: &mut dyn ParserCallbacks<'input>,
2521 ) -> Option<(Event<'input>, Range<usize>)> {
2522 match self.tree.cur() {
2523 None => {
2524 let ix = self.tree.pop()?;
2525 let ix = if matches!(self.tree[ix].item.body, ItemBody::TightParagraph) {
2526 self.tree.next_sibling(ix);
2528 return self.next_event_range(callbacks);
2529 } else {
2530 ix
2531 };
2532 let tag_end = body_to_tag_end(&self.tree[ix].item.body);
2533 self.tree.next_sibling(ix);
2534 let span = self.tree[ix].item.start..self.tree[ix].item.end;
2535 debug_assert!(span.start <= span.end);
2536 Some((Event::End(tag_end), span))
2537 }
2538 Some(cur_ix) => {
2539 let cur_ix = if matches!(self.tree[cur_ix].item.body, ItemBody::TightParagraph) {
2540 self.tree.push();
2542 self.tree.cur().unwrap()
2543 } else {
2544 cur_ix
2545 };
2546 if self.tree[cur_ix].item.body.is_maybe_inline() {
2547 self.handle_inline(callbacks);
2548 }
2549
2550 let node = self.tree[cur_ix];
2551 let item = node.item;
2552 let event = item_to_event(item, self.text, &mut self.allocs);
2553 if let Event::Start(..) = event {
2554 self.tree.push();
2555 } else {
2556 self.tree.next_sibling(cur_ix);
2557 }
2558 debug_assert!(item.start <= item.end);
2559 Some((event, item.start..item.end))
2560 }
2561 }
2562 }
2563}
2564
2565fn body_to_tag_end(body: &ItemBody) -> TagEnd {
2566 match *body {
2567 ItemBody::Paragraph => TagEnd::Paragraph,
2568 ItemBody::Emphasis => TagEnd::Emphasis,
2569 ItemBody::Superscript => TagEnd::Superscript,
2570 ItemBody::Subscript => TagEnd::Subscript,
2571 ItemBody::Strong => TagEnd::Strong,
2572 ItemBody::Strikethrough => TagEnd::Strikethrough,
2573 ItemBody::Link(..) => TagEnd::Link,
2574 ItemBody::Image(..) => TagEnd::Image,
2575 ItemBody::Heading(level, _) => TagEnd::Heading(level),
2576 ItemBody::IndentCodeBlock | ItemBody::FencedCodeBlock(..) | ItemBody::MathBlock(..) => {
2577 TagEnd::CodeBlock
2578 }
2579 ItemBody::ContainerDirective(..) => TagEnd::Directive(DirectiveKind::Container),
2580 ItemBody::LeafDirective(..) => TagEnd::Directive(DirectiveKind::Leaf),
2581 ItemBody::TextDirective(..) => TagEnd::Directive(DirectiveKind::Text),
2582 ItemBody::BlockQuote(kind) => TagEnd::BlockQuote(kind),
2583 ItemBody::HtmlBlock(_) => TagEnd::HtmlBlock,
2584 ItemBody::List(_, c, _) => {
2585 let is_ordered = c == b'.' || c == b')';
2586 TagEnd::List(is_ordered)
2587 }
2588 ItemBody::ListItem(_, _) => TagEnd::Item,
2589 ItemBody::TableHead => TagEnd::TableHead,
2590 ItemBody::TableCell => TagEnd::TableCell,
2591 ItemBody::TableRow => TagEnd::TableRow,
2592 ItemBody::Table(..) => TagEnd::Table,
2593 ItemBody::FootnoteDefinition(..) => TagEnd::FootnoteDefinition,
2594 ItemBody::MetadataBlock(kind) => TagEnd::MetadataBlock(kind),
2595 ItemBody::DefinitionList(_) => TagEnd::DefinitionList,
2596 ItemBody::DefinitionListTitle => TagEnd::DefinitionListTitle,
2597 ItemBody::DefinitionListDefinition(_) => TagEnd::DefinitionListDefinition,
2598 ItemBody::MdxJsxFlowElement(..) => TagEnd::MdxJsxFlowElement,
2599 ItemBody::MdxJsxTextElement(..) => TagEnd::MdxJsxTextElement,
2600 _ => panic!("unexpected item body {:?}", body),
2601 }
2602}
2603
2604fn item_to_event<'a>(item: Item, text: &'a str, allocs: &mut Allocations<'a>) -> Event<'a> {
2605 let tag = match item.body {
2606 ItemBody::Text { .. } => return Event::Text(text[item.start..item.end].into()),
2607 ItemBody::Code(cow_ix) => return Event::Code(allocs.take_cow(cow_ix)),
2608 ItemBody::SynthesizeText(cow_ix) => return Event::Text(allocs.take_cow(cow_ix)),
2609 ItemBody::SynthesizeChar(c) => return Event::Text(c.into()),
2610 ItemBody::HtmlBlock(_) => Tag::HtmlBlock,
2611 ItemBody::Html => return Event::Html(text[item.start..item.end].into()),
2612 ItemBody::InlineHtml => return Event::InlineHtml(text[item.start..item.end].into()),
2613 ItemBody::OwnedInlineHtml(cow_ix) => return Event::InlineHtml(allocs.take_cow(cow_ix)),
2614 ItemBody::SoftBreak => return Event::SoftBreak,
2615 ItemBody::HardBreak(_) => return Event::HardBreak,
2616 ItemBody::FootnoteReference(cow_ix) => {
2617 return Event::FootnoteReference(allocs.take_cow(cow_ix))
2618 }
2619 ItemBody::TaskListMarker(checked) => return Event::TaskListMarker(checked),
2620 ItemBody::Rule => return Event::Rule,
2621 ItemBody::Paragraph => Tag::Paragraph,
2622 ItemBody::Emphasis => Tag::Emphasis,
2623 ItemBody::Superscript => Tag::Superscript,
2624 ItemBody::Subscript => Tag::Subscript,
2625 ItemBody::Strong => Tag::Strong,
2626 ItemBody::Strikethrough => Tag::Strikethrough,
2627 ItemBody::Link(link_ix) => {
2628 let (link_type, dest_url, title, id) = allocs.take_link(link_ix);
2629 Tag::Link {
2630 link_type,
2631 dest_url,
2632 title,
2633 id,
2634 }
2635 }
2636 ItemBody::Image(link_ix) => {
2637 let (link_type, dest_url, title, id) = allocs.take_link(link_ix);
2638 Tag::Image {
2639 link_type,
2640 dest_url,
2641 title,
2642 id,
2643 }
2644 }
2645 ItemBody::Heading(level, Some(heading_ix)) => {
2646 let HeadingAttributes { id, classes, attrs } = allocs.index(heading_ix);
2647 Tag::Heading {
2648 level,
2649 id: id.clone(),
2650 classes: classes.clone(),
2651 attrs: attrs.clone(),
2652 }
2653 }
2654 ItemBody::Heading(level, None) => Tag::Heading {
2655 level,
2656 id: None,
2657 classes: Vec::new(),
2658 attrs: Vec::new(),
2659 },
2660 ItemBody::MathBlock(cow_ix) => {
2661 Tag::CodeBlock(CodeBlockKind::Fenced(allocs.take_cow(cow_ix)))
2662 }
2663 ItemBody::FencedCodeBlock(cow_ix) => {
2664 Tag::CodeBlock(CodeBlockKind::Fenced(allocs.take_cow(cow_ix)))
2665 }
2666 ItemBody::IndentCodeBlock => Tag::CodeBlock(CodeBlockKind::Indented),
2667 ItemBody::ContainerDirective(_, dir_ix)
2668 | ItemBody::LeafDirective(dir_ix)
2669 | ItemBody::TextDirective(dir_ix) => {
2670 let kind = match item.body {
2671 ItemBody::ContainerDirective(..) => DirectiveKind::Container,
2672 ItemBody::LeafDirective(..) => DirectiveKind::Leaf,
2673 _ => DirectiveKind::Text,
2674 };
2675 let dir = allocs.take_directive(dir_ix);
2676 Tag::Directive {
2677 kind,
2678 name: dir.name,
2679 attributes: dir.attributes,
2680 }
2681 }
2682 ItemBody::BlockQuote(kind) => Tag::BlockQuote(kind),
2683 ItemBody::List(is_tight, c, listitem_start) => {
2684 if c == b'.' || c == b')' {
2685 Tag::List(Some(listitem_start), is_tight)
2686 } else {
2687 Tag::List(None, is_tight)
2688 }
2689 }
2690 ItemBody::ListItem(_, _) => Tag::Item,
2691 ItemBody::TableHead => Tag::TableHead,
2692 ItemBody::TableCell => Tag::TableCell,
2693 ItemBody::TableRow => Tag::TableRow,
2694 ItemBody::Table(alignment_ix) => Tag::Table(allocs.take_alignment(alignment_ix)),
2695 ItemBody::FootnoteDefinition(cow_ix) => Tag::FootnoteDefinition(allocs.take_cow(cow_ix)),
2696 ItemBody::MetadataBlock(kind) => Tag::MetadataBlock(kind),
2697 ItemBody::Math(cow_ix, is_display) => {
2698 return if is_display {
2699 Event::DisplayMath(allocs.take_cow(cow_ix))
2700 } else {
2701 Event::InlineMath(allocs.take_cow(cow_ix))
2702 }
2703 }
2704 ItemBody::DefinitionList(_) => Tag::DefinitionList,
2705 ItemBody::DefinitionListTitle => Tag::DefinitionListTitle,
2706 ItemBody::DefinitionListDefinition(_) => Tag::DefinitionListDefinition,
2707 ItemBody::MdxJsxFlowElement(jsx_ix) => {
2708 let jsx = allocs.take_jsx_element(jsx_ix);
2709 Tag::MdxJsxFlowElement(jsx.raw)
2710 }
2711 ItemBody::MdxJsxTextElement(jsx_ix) => {
2712 let jsx = allocs.take_jsx_element(jsx_ix);
2713 Tag::MdxJsxTextElement(jsx.raw)
2714 }
2715 ItemBody::MdxFlowExpression(cow_ix) => {
2716 return Event::MdxFlowExpression(allocs.take_cow(cow_ix))
2717 }
2718 ItemBody::MdxTextExpression(cow_ix) => {
2719 return Event::MdxTextExpression(allocs.take_cow(cow_ix))
2720 }
2721 ItemBody::MdxEsm(cow_ix) => return Event::MdxEsm(allocs.take_cow(cow_ix)),
2722 _ => panic!("unexpected item body {:?}", item.body),
2723 };
2724
2725 Event::Start(tag)
2726}
2727
2728#[cfg(test)]
2729mod test {
2730 use alloc::{borrow::ToOwned, string::ToString, vec::Vec};
2731
2732 use super::*;
2733 use crate::tree::Node;
2734
2735 fn parser_with_extensions(text: &str) -> Parser<'_> {
2738 let mut opts = Options::empty();
2739 opts.insert(Options::ENABLE_TABLES);
2740 opts.insert(Options::ENABLE_FOOTNOTES);
2741 opts.insert(Options::ENABLE_STRIKETHROUGH);
2742 opts.insert(Options::ENABLE_SUPERSCRIPT);
2743 opts.insert(Options::ENABLE_SUBSCRIPT);
2744 opts.insert(Options::ENABLE_TASKLISTS);
2745
2746 Parser::new_ext(text, opts)
2747 }
2748
2749 #[test]
2750 #[cfg(target_pointer_width = "64")]
2751 fn node_size() {
2752 let node_size = core::mem::size_of::<Node<Item>>();
2753 assert_eq!(48, node_size);
2754 }
2755
2756 #[test]
2757 #[cfg(target_pointer_width = "64")]
2758 fn body_size() {
2759 let body_size = core::mem::size_of::<ItemBody>();
2760 assert_eq!(16, body_size);
2761 }
2762
2763 #[test]
2764 fn single_open_fish_bracket() {
2765 assert_eq!(3, Parser::new("<").count());
2767 }
2768
2769 #[test]
2770 fn lone_hashtag() {
2771 assert_eq!(2, Parser::new("#").count());
2773 }
2774
2775 #[test]
2776 fn lots_of_backslashes() {
2777 Parser::new("\\\\\r\r").count();
2779 Parser::new("\\\r\r\\.\\\\\r\r\\.\\").count();
2780 }
2781
2782 #[test]
2783 fn issue_1030() {
2784 let mut opts = Options::empty();
2785 opts.insert(Options::ENABLE_WIKILINKS);
2786
2787 let parser = Parser::new_ext("For a new ferrari, [[Wikientry|click here]]!", opts);
2788
2789 let offsets = parser
2790 .into_offset_iter()
2791 .map(|(_ev, range)| range)
2792 .collect::<Vec<_>>();
2793 let expected_offsets = vec![
2794 (0..44), (0..19), (19..43), (31..41), (19..43), (43..44), (0..44), ];
2802 assert_eq!(offsets, expected_offsets);
2803 }
2804
2805 #[test]
2806 fn issue_320() {
2807 parser_with_extensions(":\r\t> |\r:\r\t> |\r").count();
2809 }
2810
2811 #[test]
2812 fn issue_319() {
2813 parser_with_extensions("|\r-]([^|\r-]([^").count();
2815 parser_with_extensions("|\r\r=][^|\r\r=][^car").count();
2816 }
2817
2818 #[test]
2819 fn issue_303() {
2820 parser_with_extensions("[^\r\ra]").count();
2822 parser_with_extensions("\r\r]Z[^\x00\r\r]Z[^\x00").count();
2823 }
2824
2825 #[test]
2826 fn issue_313() {
2827 parser_with_extensions("*]0[^\r\r*]0[^").count();
2829 parser_with_extensions("[^\r> `][^\r> `][^\r> `][").count();
2830 }
2831
2832 #[test]
2833 fn issue_311() {
2834 parser_with_extensions("\\\u{0d}-\u{09}\\\u{0d}-\u{09}").count();
2836 }
2837
2838 #[test]
2839 fn issue_283() {
2840 let input = core::str::from_utf8(b"\xf0\x9b\xb2\x9f<td:^\xf0\x9b\xb2\x9f").unwrap();
2841 parser_with_extensions(input).count();
2843 }
2844
2845 #[test]
2846 fn issue_289() {
2847 parser_with_extensions("> - \\\n> - ").count();
2849 parser_with_extensions("- \n\n").count();
2850 }
2851
2852 #[test]
2853 fn issue_306() {
2854 parser_with_extensions("*\r_<__*\r_<__*\r_<__*\r_<__").count();
2856 }
2857
2858 #[test]
2859 fn issue_305() {
2860 parser_with_extensions("_6**6*_*").count();
2862 }
2863
2864 #[test]
2865 fn another_emphasis_panic() {
2866 parser_with_extensions("*__#_#__*").count();
2867 }
2868
2869 #[test]
2870 fn offset_iter() {
2871 let event_offsets: Vec<_> = Parser::new("*hello* world")
2872 .into_offset_iter()
2873 .map(|(_ev, range)| range)
2874 .collect();
2875 let expected_offsets = vec![(0..13), (0..7), (1..6), (0..7), (7..13), (0..13)];
2876 assert_eq!(expected_offsets, event_offsets);
2877 }
2878
2879 #[test]
2880 fn reference_link_offsets() {
2881 let range =
2882 Parser::new("# H1\n[testing][Some reference]\n\n[Some reference]: https://github.com")
2883 .into_offset_iter()
2884 .filter_map(|(ev, range)| match ev {
2885 Event::Start(
2886 Tag::Link {
2887 link_type: LinkType::Reference,
2888 ..
2889 },
2890 ..,
2891 ) => Some(range),
2892 _ => None,
2893 })
2894 .next()
2895 .unwrap();
2896 assert_eq!(5..30, range);
2897 }
2898
2899 #[test]
2900 fn footnote_offsets() {
2901 let range = parser_with_extensions("Testing this[^1] out.\n\n[^1]: Footnote.")
2902 .into_offset_iter()
2903 .filter_map(|(ev, range)| match ev {
2904 Event::FootnoteReference(..) => Some(range),
2905 _ => None,
2906 })
2907 .next()
2908 .unwrap();
2909 assert_eq!(12..16, range);
2910 }
2911
2912 #[test]
2913 fn footnote_offsets_exclamation() {
2914 let mut immediately_before_footnote = None;
2915 let range = parser_with_extensions("Testing this![^1] out.\n\n[^1]: Footnote.")
2916 .into_offset_iter()
2917 .filter_map(|(ev, range)| match ev {
2918 Event::FootnoteReference(..) => Some(range),
2919 _ => {
2920 immediately_before_footnote = Some((ev, range));
2921 None
2922 }
2923 })
2924 .next()
2925 .unwrap();
2926 assert_eq!(13..17, range);
2927 if let (Event::Text(exclamation), range_exclamation) =
2928 immediately_before_footnote.as_ref().unwrap()
2929 {
2930 assert_eq!("!", &exclamation[..]);
2931 assert_eq!(&(12..13), range_exclamation);
2932 } else {
2933 panic!("what came first, then? {immediately_before_footnote:?}");
2934 }
2935 }
2936
2937 #[test]
2938 fn table_offset() {
2939 let markdown = "a\n\nTesting|This|Outtt\n--|:--:|--:\nSome Data|Other data|asdf";
2940 let event_offset = parser_with_extensions(markdown)
2941 .into_offset_iter()
2942 .map(|(_ev, range)| range)
2943 .nth(3)
2944 .unwrap();
2945 let expected_offset = 3..59;
2946 assert_eq!(expected_offset, event_offset);
2947 }
2948
2949 #[test]
2950 fn table_cell_span() {
2951 let markdown = "a|b|c\n--|--|--\na| |c";
2952 let event_offset = parser_with_extensions(markdown)
2953 .into_offset_iter()
2954 .filter_map(|(ev, span)| match ev {
2955 Event::Start(Tag::TableCell) => Some(span),
2956 _ => None,
2957 })
2958 .nth(4)
2959 .unwrap();
2960 let expected_offset_start = "a|b|c\n--|--|--\na".len();
2962 assert_eq!(
2963 expected_offset_start..(expected_offset_start + 3),
2964 event_offset
2965 );
2966 }
2967
2968 #[test]
2969 fn offset_iter_issue_378() {
2970 let event_offsets: Vec<_> = Parser::new("a [b](c) d")
2971 .into_offset_iter()
2972 .map(|(_ev, range)| range)
2973 .collect();
2974 let expected_offsets = vec![(0..10), (0..2), (2..8), (3..4), (2..8), (8..10), (0..10)];
2975 assert_eq!(expected_offsets, event_offsets);
2976 }
2977
2978 #[test]
2979 fn offset_iter_issue_404() {
2980 let event_offsets: Vec<_> = Parser::new("###\n")
2981 .into_offset_iter()
2982 .map(|(_ev, range)| range)
2983 .collect();
2984 let expected_offsets = vec![(0..4), (0..4)];
2985 assert_eq!(expected_offsets, event_offsets);
2986 }
2987
2988 #[test]
2989 fn broken_links_called_only_once() {
2990 for &(markdown, expected) in &[
2991 ("See also [`g()`][crate::g].", 1),
2992 ("See also [`g()`][crate::g][].", 1),
2993 ("[brokenlink1] some other node [brokenlink2]", 2),
2994 ] {
2995 let mut times_called = 0;
2996 let callback = &mut |_broken_link: BrokenLink| {
2997 times_called += 1;
2998 None
2999 };
3000 let parser =
3001 Parser::new_with_broken_link_callback(markdown, Options::empty(), Some(callback));
3002 for _ in parser {}
3003 assert_eq!(times_called, expected);
3004 }
3005 }
3006
3007 #[test]
3008 fn simple_broken_link_callback() {
3009 let test_str = "This is a link w/o def: [hello][world]";
3010 let mut callback = |broken_link: BrokenLink| {
3011 assert_eq!("world", broken_link.reference.as_ref());
3012 assert_eq!(&test_str[broken_link.span], "[hello][world]");
3013 let url = "YOLO".into();
3014 let title = "SWAG".to_owned().into();
3015 Some((url, title))
3016 };
3017 let parser =
3018 Parser::new_with_broken_link_callback(test_str, Options::empty(), Some(&mut callback));
3019 let mut link_tag_count = 0;
3020 for (typ, url, title, id) in parser.filter_map(|event| match event {
3021 Event::Start(Tag::Link {
3022 link_type,
3023 dest_url,
3024 title,
3025 id,
3026 }) => Some((link_type, dest_url, title, id)),
3027 _ => None,
3028 }) {
3029 link_tag_count += 1;
3030 assert_eq!(typ, LinkType::ReferenceUnknown);
3031 assert_eq!(url.as_ref(), "YOLO");
3032 assert_eq!(title.as_ref(), "SWAG");
3033 assert_eq!(id.as_ref(), "world");
3034 }
3035 assert!(link_tag_count > 0);
3036 }
3037
3038 #[test]
3039 fn code_block_kind_check_fenced() {
3040 let parser = Parser::new("hello\n```test\ntadam\n```");
3041 let mut found = 0;
3042 for (ev, _range) in parser.into_offset_iter() {
3043 if let Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(syntax))) = ev {
3044 assert_eq!(syntax.as_ref(), "test");
3045 found += 1;
3046 }
3047 }
3048 assert_eq!(found, 1);
3049 }
3050
3051 #[test]
3052 fn code_block_kind_check_indented() {
3053 let parser = Parser::new("hello\n\n ```test\n tadam\nhello");
3054 let mut found = 0;
3055 for (ev, _range) in parser.into_offset_iter() {
3056 if let Event::Start(Tag::CodeBlock(CodeBlockKind::Indented)) = ev {
3057 found += 1;
3058 }
3059 }
3060 assert_eq!(found, 1);
3061 }
3062
3063 #[test]
3064 fn ref_defs() {
3065 let input = r###"[a B c]: http://example.com
3066[another]: https://google.com
3067
3068text
3069
3070[final ONE]: http://wikipedia.org
3071"###;
3072 let mut parser = Parser::new(input);
3073
3074 assert!(parser.reference_definitions().get("a b c").is_some());
3075 assert!(parser.reference_definitions().get("nope").is_none());
3076
3077 if let Some(_event) = parser.next() {
3078 let s = "final one".to_owned();
3080 let link_def = parser.reference_definitions().get(&s).unwrap();
3081 let span = &input[link_def.span.clone()];
3082 assert_eq!(span, "[final ONE]: http://wikipedia.org");
3083 }
3084 }
3085
3086 #[test]
3087 #[allow(clippy::extra_unused_lifetimes)]
3088 fn common_lifetime_patterns_allowed<'b>() {
3089 let temporary_str = String::from("xyz");
3090
3091 let mut closure = |link: BrokenLink<'b>| Some(("#".into(), link.reference));
3095
3096 fn function(link: BrokenLink<'_>) -> Option<(CowStr<'_>, CowStr<'_>)> {
3097 Some(("#".into(), link.reference))
3098 }
3099
3100 for _ in Parser::new_with_broken_link_callback(
3101 "static lifetime",
3102 Options::empty(),
3103 Some(&mut closure),
3104 ) {}
3105 for _ in Parser::new_with_broken_link_callback(
3114 "static lifetime",
3115 Options::empty(),
3116 Some(&mut function),
3117 ) {}
3118 for _ in Parser::new_with_broken_link_callback(
3119 &temporary_str,
3120 Options::empty(),
3121 Some(&mut function),
3122 ) {}
3123 }
3124
3125 #[test]
3126 fn inline_html_inside_blockquote() {
3127 let input = "> <foo\n> bar>";
3129 let events: Vec<_> = Parser::new(input).collect();
3130 let expected = [
3131 Event::Start(Tag::BlockQuote(None)),
3132 Event::Start(Tag::Paragraph),
3133 Event::InlineHtml(CowStr::Boxed("<foo\nbar>".to_string().into())),
3134 Event::End(TagEnd::Paragraph),
3135 Event::End(TagEnd::BlockQuote(None)),
3136 ];
3137 assert_eq!(&events, &expected);
3138 }
3139
3140 #[test]
3141 fn wikilink_has_pothole() {
3142 let input = "[[foo]] [[bar|baz]]";
3143 let events: Vec<_> = Parser::new_ext(input, Options::ENABLE_WIKILINKS).collect();
3144 let expected = [
3145 Event::Start(Tag::Paragraph),
3146 Event::Start(Tag::Link {
3147 link_type: LinkType::WikiLink { has_pothole: false },
3148 dest_url: CowStr::Borrowed("foo"),
3149 title: CowStr::Borrowed(""),
3150 id: CowStr::Borrowed(""),
3151 }),
3152 Event::Text(CowStr::Borrowed("foo")),
3153 Event::End(TagEnd::Link),
3154 Event::Text(CowStr::Borrowed(" ")),
3155 Event::Start(Tag::Link {
3156 link_type: LinkType::WikiLink { has_pothole: true },
3157 dest_url: CowStr::Borrowed("bar"),
3158 title: CowStr::Borrowed(""),
3159 id: CowStr::Borrowed(""),
3160 }),
3161 Event::Text(CowStr::Borrowed("baz")),
3162 Event::End(TagEnd::Link),
3163 Event::End(TagEnd::Paragraph),
3164 ];
3165 assert_eq!(&events, &expected);
3166 }
3167
3168 fn mdx_parser(text: &str) -> Parser<'_> {
3169 Parser::new_ext(text, Options::ENABLE_MDX)
3170 }
3171
3172 #[test]
3173 fn mdx_esm_import() {
3174 let events: Vec<_> = mdx_parser("import {Chart} from './chart.js'\n").collect();
3175 assert_eq!(events.len(), 1);
3176 assert!(matches!(&events[0], Event::MdxEsm(s) if s.contains("import")));
3177 }
3178
3179 #[test]
3180 fn mdx_esm_export() {
3181 let events: Vec<_> = mdx_parser("export const meta = {}\n").collect();
3182 assert_eq!(events.len(), 1);
3183 assert!(matches!(&events[0], Event::MdxEsm(s) if s.contains("export")));
3184 }
3185
3186 #[test]
3187 fn mdx_flow_expression() {
3188 let events: Vec<_> = mdx_parser("{1 + 1}\n").collect();
3189 assert_eq!(events.len(), 1);
3190 assert!(matches!(&events[0], Event::MdxFlowExpression(s) if s.as_ref() == "1 + 1"));
3191 }
3192
3193 #[test]
3194 fn mdx_jsx_flow_self_closing() {
3195 let events: Vec<_> = mdx_parser("<Chart values={[1,2,3]} />\n").collect();
3196 assert!(!events.is_empty());
3197 assert!(
3198 matches!(&events[0], Event::Start(Tag::MdxJsxFlowElement(s)) if s.contains("Chart"))
3199 );
3200 }
3201
3202 #[test]
3203 fn mdx_jsx_flow_fragment() {
3204 let events: Vec<_> = mdx_parser("<>\n").collect();
3205 assert!(!events.is_empty());
3206 assert!(matches!(
3207 &events[0],
3208 Event::Start(Tag::MdxJsxFlowElement(_))
3209 ));
3210 }
3211
3212 #[test]
3213 fn mdx_inline_expression() {
3214 let events: Vec<_> = mdx_parser("hello {name} world\n").collect();
3215 let has_expr = events
3216 .iter()
3217 .any(|e| matches!(e, Event::MdxTextExpression(s) if s.as_ref() == "name"));
3218 assert!(
3219 has_expr,
3220 "Expected inline MDX expression, got: {:?}",
3221 events
3222 );
3223 }
3224
3225 #[test]
3226 fn mdx_inline_jsx() {
3227 let events: Vec<_> = mdx_parser("hello <Badge /> world\n").collect();
3228 let has_jsx = events
3229 .iter()
3230 .any(|e| matches!(e, Event::Start(Tag::MdxJsxTextElement(s)) if s.contains("Badge")));
3231 assert!(has_jsx, "Expected inline MDX JSX, got: {:?}", events);
3232 }
3233
3234 #[test]
3235 fn mdx_all_tags_are_jsx() {
3236 let events: Vec<_> = mdx_parser("hello <em>world</em>\n").collect();
3238 let has_jsx = events
3239 .iter()
3240 .any(|e| matches!(e, Event::Start(Tag::MdxJsxTextElement(_))));
3241 assert!(has_jsx, "In MDX mode, <em> should be JSX: {:?}", events);
3242 }
3243
3244 #[test]
3245 fn mdx_does_not_interfere_without_flag() {
3246 let events: Vec<_> = Parser::new("import foo from 'bar'\n").collect();
3248 assert!(events
3250 .iter()
3251 .any(|e| matches!(e, Event::Start(Tag::Paragraph))));
3252 }
3253
3254 #[test]
3255 fn mdx_expression_in_heading() {
3256 let events: Vec<_> = mdx_parser("# {title}\n").collect();
3257 let has_heading = events
3258 .iter()
3259 .any(|e| matches!(e, Event::Start(Tag::Heading { .. })));
3260 assert!(has_heading, "Should have a heading");
3261 let has_expr = events
3262 .iter()
3263 .any(|e| matches!(e, Event::MdxTextExpression(s) if s.as_ref() == "title"));
3264 assert!(
3265 has_expr,
3266 "Heading should contain MdxTextExpression, got: {:?}",
3267 events
3268 );
3269 }
3270
3271 #[test]
3272 fn mdx_expression_mixed_text_in_heading() {
3273 let events: Vec<_> = mdx_parser("## Hello {name}\n").collect();
3274 let has_text = events
3275 .iter()
3276 .any(|e| matches!(e, Event::Text(s) if s.contains("Hello")));
3277 let has_expr = events
3278 .iter()
3279 .any(|e| matches!(e, Event::MdxTextExpression(s) if s.as_ref() == "name"));
3280 assert!(has_text, "Should have text, got: {:?}", events);
3281 assert!(has_expr, "Should have expression, got: {:?}", events);
3282 }
3283}