1use alloc::{borrow::ToOwned, boxed::Box, collections::VecDeque, string::String, vec::Vec};
24use core::{
25 cmp::{max, min},
26 iter::FusedIterator,
27 num::NonZeroUsize,
28 ops::{Index, Range},
29};
30use rustc_hash::FxHashMap;
31use unicase::UniCase;
32
33#[cfg(feature = "mdx")]
34use crate::mdx::*;
35use crate::{
36 firstpass::run_first_pass,
37 linklabel::{scan_link_label_rest, FootnoteLabel, LinkLabel, ReferenceLabel},
38 scanners::*,
39 strings::CowStr,
40 tree::{Tree, TreeIndex},
41 Alignment, BlockQuoteKind, CodeBlockKind, DirectiveKind, Event, HeadingLevel, LinkType,
42 MetadataBlockKind, Options, Tag, TagEnd,
43};
44
45pub(crate) const LINK_MAX_NESTED_PARENS: usize = 32;
51
52#[derive(Debug, Default, Clone, Copy)]
53pub(crate) struct Item {
54 pub start: usize,
55 pub end: usize,
56 pub body: ItemBody,
57}
58
59#[derive(Debug, PartialEq, Clone, Copy, Default)]
60pub(crate) enum ItemBody {
61 MaybeEmphasis(usize, bool, bool),
65 MaybeMath(bool, u8),
67 MaybeSmartQuote(u8, bool, bool),
69 MaybeCode(usize, bool), MaybeHtml,
71 MaybeLinkOpen,
72 MaybeLinkClose(bool),
74 MaybeImage,
75
76 Emphasis,
78 Strong,
79 Strikethrough,
80 Superscript,
81 Subscript,
82 Math(CowIndex, bool), Code(CowIndex),
84 Link(LinkIndex),
85 Image(LinkIndex),
86 FootnoteReference(CowIndex),
87 TaskListMarker(bool), InlineHtml,
91 OwnedInlineHtml(CowIndex),
92 SynthesizeText(CowIndex),
93 SynthesizeChar(char),
94 Html,
95 Text {
96 backslash_escaped: bool,
97 },
98 SoftBreak,
99 HardBreak(bool),
101
102 #[default]
104 Root,
105
106 Paragraph,
108 TightParagraph,
109 Rule,
110 Heading(HeadingLevel, Option<HeadingIndex>), FencedCodeBlock(CowIndex),
112 MathBlock(CowIndex), IndentCodeBlock(bool),
117 HtmlBlock(bool), BlockQuote(Option<BlockQuoteKind>),
121 ContainerDirective(u8, DirectiveIndex), LeafDirective(DirectiveIndex),
123 TextDirective(DirectiveIndex),
124 DirectiveLabel,
128 List(bool, u8, u64), ListItem(usize, bool), FootnoteDefinition(CowIndex),
131 MetadataBlock(MetadataBlockKind),
132
133 DefinitionList(bool), MaybeDefinitionListTitle,
138 DefinitionListTitle,
139 DefinitionListDefinition(usize),
140
141 Table(AlignmentIndex),
143 TableHead,
144 TableRow,
145 TableCell,
146
147 #[cfg(feature = "mdx")]
149 MdxJsxFlowElement(JsxElementIndex),
150 #[cfg(feature = "mdx")]
151 MdxJsxTextElement(JsxElementIndex),
152 #[cfg(feature = "mdx")]
153 MdxFlowExpression(CowIndex),
154 #[cfg(feature = "mdx")]
155 MdxTextExpression(CowIndex),
156 #[cfg(feature = "mdx")]
157 MdxEsm(CowIndex),
158}
159
160impl ItemBody {
161 pub(crate) fn is_maybe_inline(&self) -> bool {
162 use ItemBody::*;
163 matches!(
164 *self,
165 MaybeEmphasis(..)
166 | MaybeMath(..)
167 | MaybeSmartQuote(..)
168 | MaybeCode(..)
169 | MaybeHtml
170 | MaybeLinkOpen
171 | MaybeLinkClose(..)
172 | MaybeImage
173 )
174 }
175 pub(crate) fn is_block_level(&self) -> bool {
176 !self.is_inline() && !matches!(self, ItemBody::Root)
177 }
178 fn is_inline(&self) -> bool {
179 use ItemBody::*;
180 matches!(
181 *self,
182 MaybeEmphasis(..)
183 | MaybeMath(..)
184 | MaybeSmartQuote(..)
185 | MaybeCode(..)
186 | MaybeHtml
187 | MaybeLinkOpen
188 | MaybeLinkClose(..)
189 | MaybeImage
190 | Emphasis
191 | Strong
192 | Strikethrough
193 | Math(..)
194 | Code(..)
195 | Link(..)
196 | Image(..)
197 | FootnoteReference(..)
198 | TaskListMarker(..)
199 | InlineHtml
200 | OwnedInlineHtml(..)
201 | SynthesizeText(..)
202 | SynthesizeChar(..)
203 | Html
204 | Text { .. }
205 | SoftBreak
206 | HardBreak(..)
207 )
208 }
209}
210
211#[derive(Debug)]
212pub struct BrokenLink<'a> {
213 pub span: core::ops::Range<usize>,
214 pub link_type: LinkType,
215 pub reference: CowStr<'a>,
216}
217
218pub struct Parser<'input, CB = DefaultParserCallbacks> {
220 callbacks: CB,
221 inner: ParserInner<'input>,
222}
223
224pub(crate) struct ParserInner<'input> {
227 pub(crate) text: &'input str,
228 pub(crate) options: Options,
229 pub(crate) tree: Tree<Item>,
230 pub(crate) allocs: Allocations<'input>,
231 html_scan_guard: HtmlScanGuard,
232
233 link_ref_expansion_limit: usize,
250
251 pub(crate) mdx_errors: Vec<(usize, String)>,
253
254 inline_stack: InlineStack,
256 link_stack: LinkStack,
257 wikilink_stack: LinkStack,
258 code_delims: CodeDelims,
259 math_delims: MathDelims,
260}
261
262impl<'input, CB> core::fmt::Debug for Parser<'input, CB> {
263 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
264 f.debug_struct("Parser")
266 .field("text", &self.inner.text)
267 .field("options", &self.inner.options)
268 .field("callbacks", &..)
269 .finish()
270 }
271}
272
273impl<'a> BrokenLink<'a> {
274 pub fn into_static(self) -> BrokenLink<'static> {
278 BrokenLink {
279 span: self.span.clone(),
280 link_type: self.link_type,
281 reference: self.reference.into_string().into(),
282 }
283 }
284}
285
286impl<'input> Parser<'input, DefaultParserCallbacks> {
287 pub fn new(text: &'input str) -> Self {
289 Self::new_ext(text, Options::empty())
290 }
291
292 pub fn new_ext(text: &'input str, options: Options) -> Self {
294 Self::new_with_callbacks(text, options, DefaultParserCallbacks)
295 }
296}
297
298impl<'input, CB: ParserCallbacks<'input>> Parser<'input, CB> {
299 pub fn new_with_callbacks(text: &'input str, options: Options, callbacks: CB) -> Self {
324 let (mut tree, allocs, _firstpass_mdx_errors) = run_first_pass(text, options);
325 tree.reset();
326 let inline_stack = Default::default();
327 let link_stack = Default::default();
328 let wikilink_stack = Default::default();
329 let html_scan_guard = Default::default();
330 Parser {
331 callbacks,
332
333 inner: ParserInner {
334 text,
335 options,
336 tree,
337 allocs,
338 inline_stack,
339 link_stack,
340 wikilink_stack,
341 html_scan_guard,
342 link_ref_expansion_limit: text.len().max(100_000),
344 mdx_errors: Vec::new(),
345 code_delims: CodeDelims::new(),
346 math_delims: MathDelims::new(),
347 },
348 }
349 }
350
351 pub fn reference_definitions(&self) -> &RefDefs<'_> {
354 &self.inner.allocs.refdefs
355 }
356
357 pub fn mdx_errors(&self) -> &[(usize, String)] {
360 &self.inner.mdx_errors
361 }
362
363 pub fn into_offset_iter(self) -> OffsetIter<'input, CB> {
367 OffsetIter { parser: self }
368 }
369}
370
371impl<'input, F> Parser<'input, BrokenLinkCallback<F>> {
372 pub fn new_with_broken_link_callback(
381 text: &'input str,
382 options: Options,
383 broken_link_callback: Option<F>,
384 ) -> Self
385 where
386 F: FnMut(BrokenLink<'input>) -> Option<(CowStr<'input>, CowStr<'input>)>,
387 {
388 Self::new_with_callbacks(text, options, BrokenLinkCallback(broken_link_callback))
389 }
390}
391
392impl<'input> ParserInner<'input> {
393 pub(crate) fn new(text: &'input str, options: Options) -> Self {
394 let (mut tree, allocs, firstpass_mdx_errors) = run_first_pass(text, options);
395 tree.reset();
396 ParserInner {
397 text,
398 options,
399 tree,
400 allocs,
401 inline_stack: Default::default(),
402 link_stack: Default::default(),
403 wikilink_stack: Default::default(),
404 html_scan_guard: Default::default(),
405 link_ref_expansion_limit: text.len().max(100_000),
406 mdx_errors: firstpass_mdx_errors,
407 code_delims: CodeDelims::new(),
408 math_delims: MathDelims::new(),
409 }
410 }
411
412 fn fetch_link_type_url_title(
431 &mut self,
432 link_label: CowStr<'input>,
433 span: Range<usize>,
434 link_type: LinkType,
435 callbacks: &mut dyn ParserCallbacks<'input>,
436 ) -> Option<(LinkType, CowStr<'input>, CowStr<'input>)> {
437 if self.link_ref_expansion_limit == 0 {
438 return None;
439 }
440
441 let (link_type, url, title) = self
442 .allocs
443 .refdefs
444 .get(link_label.as_ref())
445 .map(|matching_def| {
446 let title = matching_def
448 .title
449 .as_ref()
450 .cloned()
451 .unwrap_or_else(|| "".into());
452 let url = matching_def.dest.clone();
453 (link_type, url, title)
454 })
455 .or_else(|| {
456 let broken_link = BrokenLink {
458 span,
459 link_type,
460 reference: link_label,
461 };
462
463 callbacks
464 .handle_broken_link(broken_link)
465 .map(|(url, title)| (link_type.to_unknown(), url, title))
466 })?;
467
468 self.link_ref_expansion_limit = self
472 .link_ref_expansion_limit
473 .saturating_sub(url.len() + title.len());
474
475 Some((link_type, url, title))
476 }
477
478 pub(crate) fn handle_inline(&mut self, callbacks: &mut dyn ParserCallbacks<'input>) {
485 self.handle_inline_pass1(callbacks);
486 let st_enabled = self.options.contains(Options::ENABLE_STRIKETHROUGH)
502 || self.options.contains(Options::ENABLE_SUBSCRIPT)
503 || self.options.contains(Options::ENABLE_SUPERSCRIPT);
504 if !st_enabled {
505 self.handle_emphasis_pass();
506 return;
507 }
508 let strikethrough_first = matches!(
509 self.first_inline_marker_char(self.tree.cur()),
510 Some(b'~') | Some(b'^')
511 );
512 if strikethrough_first {
513 self.handle_tildes_carets_pass();
514 self.handle_emphasis_pass();
515 } else {
516 self.handle_emphasis_pass();
517 self.handle_tildes_carets_pass();
518 }
519 }
520
521 fn first_inline_marker_char(&self, start: Option<TreeIndex>) -> Option<u8> {
524 let mut cur = start;
525 while let Some(cur_ix) = cur {
526 if let ItemBody::MaybeEmphasis(_, _, _) = self.tree[cur_ix].item.body {
527 let c = self.text.as_bytes()[self.tree[cur_ix].item.start];
528 if matches!(c, b'*' | b'_' | b'~' | b'^') {
529 return Some(c);
530 }
531 }
532 cur = self.tree[cur_ix].next;
533 }
534 None
535 }
536
537 fn handle_emphasis_pass(&mut self) {
542 let start = self.tree.cur();
543 self.resolve_emphasis_recursive(start);
544 }
545
546 fn resolve_emphasis_recursive(&mut self, start: Option<TreeIndex>) {
547 let saved = core::mem::take(&mut self.inline_stack);
551 self.handle_emphasis_in_scope(start);
552 self.inline_stack = saved;
553
554 let mut cur = start;
555 while let Some(cur_ix) = cur {
556 let next = self.tree[cur_ix].next;
557 match self.tree[cur_ix].item.body {
558 ItemBody::Emphasis
559 | ItemBody::Strong
560 | ItemBody::Strikethrough
561 | ItemBody::Subscript
562 | ItemBody::Superscript
563 | ItemBody::Link(_)
564 | ItemBody::Image(_) => {
565 let child = self.tree[cur_ix].child;
566 self.resolve_emphasis_recursive(child);
567 }
568 _ => {}
569 }
570 cur = next;
571 }
572 }
573
574 fn handle_inline_pass1(&mut self, callbacks: &mut dyn ParserCallbacks<'input>) {
580 let mut cur = self.tree.cur();
581 let mut prev = None;
582
583 let block_end = self.tree[self.tree.peek_up().unwrap()].item.end;
584 let block_text = &self.text[..block_end];
585
586 while let Some(mut cur_ix) = cur {
587 match self.tree[cur_ix].item.body {
588 ItemBody::MaybeHtml => {
589 #[cfg(feature = "mdx")]
591 if self.options.contains(Options::ENABLE_MDX) {
592 let start = self.tree[cur_ix].item.start;
593 let next_byte = block_text.as_bytes().get(start + 1).copied();
594
595 if next_byte == Some(b'!') {
597 self.mdx_errors.push((
598 start,
599 "Unexpected character `!` (U+0021) before name, expected a \
600 character that can start a name, such as a letter, `$`, or `_` \
601 (note: to create a comment in MDX, use `{/* text */}`)"
602 .to_string(),
603 ));
604 self.tree[cur_ix].item.body = ItemBody::Text {
605 backslash_escaped: false,
606 };
607 prev = cur;
608 cur = self.tree[cur_ix].next;
609 continue;
610 }
611
612 if let Some(total_len) =
613 scan_mdx_inline_jsx(&block_text.as_bytes()[start..])
614 {
615 let end = start + total_len;
616 let node = scan_nodes_to_ix(&self.tree, self.tree[cur_ix].next, end);
617 let raw = &block_text[start..end];
618 let col = crate::mdx::column_at(block_text.as_bytes(), start);
619 let jsx_data = crate::mdx::parse_jsx_tag_with_column(raw, col, 0);
620 let mut allocator = oxc_allocator::Allocator::default();
621 crate::mdx::validate_jsx_expressions(
622 &jsx_data.attrs,
623 start,
624 &mut allocator,
625 &mut self.mdx_errors,
626 );
627 let jsx_ix = self.allocs.allocate_jsx_element(jsx_data);
628 self.tree[cur_ix].item.body = ItemBody::MdxJsxTextElement(jsx_ix);
629 self.tree[cur_ix].item.end = end;
630 self.tree[cur_ix].next = node;
631 prev = cur;
632 cur = node;
633 if let Some(node_ix) = cur {
634 self.tree[node_ix].item.start =
635 max(self.tree[node_ix].item.start, end);
636 }
637 continue;
638 }
639
640 let bytes_block = block_text.as_bytes();
655 let is_text_fallback = match next_byte {
656 Some(b' ' | b'\t') => true,
657 Some(b'\n' | b'\r') => {
658 let bq_depth = self
664 .tree
665 .walk_spine()
666 .filter(|&&ix| {
667 matches!(self.tree[ix].item.body, ItemBody::BlockQuote(..))
668 })
669 .count();
670 let mut probe = start + 1;
671 loop {
672 while probe < bytes_block.len()
673 && matches!(
674 bytes_block[probe],
675 b' ' | b'\t' | b'\n' | b'\r'
676 )
677 {
678 probe += 1;
679 }
680 if bq_depth == 0
681 || probe >= bytes_block.len()
682 || bytes_block[probe] != b'>'
683 {
684 break;
685 }
686 let mut consumed = 0;
687 while consumed < bq_depth
688 && probe < bytes_block.len()
689 && bytes_block[probe] == b'>'
690 {
691 probe += 1;
692 if probe < bytes_block.len() && bytes_block[probe] == b' ' {
693 probe += 1;
694 }
695 consumed += 1;
696 }
697 }
698 if probe >= bytes_block.len() || bytes_block[probe] == b'>' {
699 false
700 } else {
701 let underline_char = bytes_block[probe];
711 if !matches!(underline_char, b'-' | b'=') {
712 true
713 } else {
714 let mut q = probe;
715 while q < bytes_block.len()
716 && bytes_block[q] == underline_char
717 {
718 q += 1;
719 }
720 while q < bytes_block.len()
721 && matches!(bytes_block[q], b' ' | b'\t')
722 {
723 q += 1;
724 }
725 let at_eol = q >= bytes_block.len()
726 || matches!(bytes_block[q], b'\n' | b'\r');
727 if !at_eol {
728 true
729 } else {
730 let mut ls = start;
749 while ls > 0
750 && !matches!(bytes_block[ls - 1], b'\n' | b'\r')
751 {
752 ls -= 1;
753 }
754 let mut k = ls;
755 let mut sp = 0;
756 while k < start && bytes_block[k] == b' ' && sp < 3 {
757 k += 1;
758 sp += 1;
759 }
760 if k < start && bytes_block[k] == b'>' {
761 true
762 } else {
763 let mut us = probe;
765 while us > 0
766 && !matches!(bytes_block[us - 1], b'\n' | b'\r')
767 {
768 us -= 1;
769 }
770 let mut underline_col = 0;
771 let mut uk = us;
772 while uk < probe && bytes_block[uk] == b' ' {
773 uk += 1;
774 underline_col += 1;
775 }
776 let listitem_indent = self
777 .tree
778 .walk_spine()
779 .filter_map(|&ix| {
780 match self.tree[ix].item.body {
781 ItemBody::ListItem(indent, _) => {
782 Some(indent)
783 }
784 _ => None,
785 }
786 })
787 .next();
788 let in_blockquote =
789 self.tree.walk_spine().any(|&ix| {
790 matches!(
791 self.tree[ix].item.body,
792 ItemBody::BlockQuote(..)
793 )
794 });
795 let bq_lazy = if in_blockquote {
805 underline_col < 1
806 || !bytes_block[us..probe].contains(&b'>')
807 } else {
808 false
809 };
810 matches!(listitem_indent, Some(i) if underline_col < i)
811 || bq_lazy
812 }
813 }
814 }
815 }
816 }
817 _ => false,
818 };
819 if !is_text_fallback {
820 self.mdx_errors.push((
821 start,
822 "Unexpected character after `<`, expected a valid JSX tag \
823 (note: to create a link in MDX, use `[text](url)`)"
824 .to_string(),
825 ));
826 }
827
828 self.tree[cur_ix].item.body = ItemBody::Text {
829 backslash_escaped: false,
830 };
831 prev = cur;
832 cur = self.tree[cur_ix].next;
833 continue;
834 }
835
836 let next = self.tree[cur_ix].next;
837 let autolink = if let Some(next_ix) = next {
838 scan_autolink(block_text, self.tree[next_ix].item.start)
839 } else {
840 None
841 };
842
843 if let Some((ix, uri, link_type)) = autolink {
844 let node = scan_nodes_to_ix(&self.tree, next, ix);
845 let text_node = self.tree.create_node(Item {
846 start: self.tree[cur_ix].item.start + 1,
847 end: ix - 1,
848 body: ItemBody::Text {
849 backslash_escaped: false,
850 },
851 });
852 let link_ix =
853 self.allocs
854 .allocate_link(link_type, uri, "".into(), "".into());
855 self.tree[cur_ix].item.body = ItemBody::Link(link_ix);
856 self.tree[cur_ix].item.end = ix;
857 self.tree[cur_ix].next = node;
858 self.tree[cur_ix].child = Some(text_node);
859 prev = cur;
860 cur = node;
861 if let Some(node_ix) = cur {
862 let orig_start = self.tree[node_ix].item.start;
863 let new_start = max(orig_start, ix);
864 self.tree[node_ix].item.start = new_start;
865 if new_start > orig_start {
872 if let ItemBody::Text { backslash_escaped } =
873 &mut self.tree[node_ix].item.body
874 {
875 *backslash_escaped = false;
876 }
877 }
878 }
879 continue;
880 } else {
881 let inline_html = next.and_then(|next_ix| {
882 self.scan_inline_html(
883 block_text.as_bytes(),
884 self.tree[next_ix].item.start,
885 )
886 });
887 if let Some((span, ix)) = inline_html {
888 let node = scan_nodes_to_ix(&self.tree, next, ix);
889 self.tree[cur_ix].item.body = if !span.is_empty() {
890 let converted_string =
891 String::from_utf8(span).expect("invalid utf8");
892 ItemBody::OwnedInlineHtml(
893 self.allocs.allocate_cow(converted_string.into()),
894 )
895 } else {
896 ItemBody::InlineHtml
897 };
898 self.tree[cur_ix].item.end = ix;
899 self.tree[cur_ix].next = node;
900 prev = cur;
901 cur = node;
902 if let Some(node_ix) = cur {
903 let orig_start = self.tree[node_ix].item.start;
904 let new_start = max(orig_start, ix);
905 self.tree[node_ix].item.start = new_start;
906 if new_start > orig_start {
912 if let ItemBody::Text { backslash_escaped } =
913 &mut self.tree[node_ix].item.body
914 {
915 *backslash_escaped = false;
916 }
917 }
918 }
919 continue;
920 }
921 }
922 self.tree[cur_ix].item.body = ItemBody::Text {
923 backslash_escaped: false,
924 };
925 }
926 ItemBody::MaybeMath(preceded_by_backslash, _brace_context) => {
927 if preceded_by_backslash {
928 self.tree[cur_ix].item.body = ItemBody::Text {
929 backslash_escaped: true,
930 };
931 prev = cur;
932 cur = self.tree[cur_ix].next;
933 continue;
934 }
935 let mut open_count = 1usize;
937 let mut open_end = cur_ix;
938 {
939 let mut peek = self.tree[cur_ix].next;
940 while let Some(peek_ix) = peek {
941 if matches!(self.tree[peek_ix].item.body, ItemBody::MaybeMath(..))
942 && self.tree[peek_ix].item.start == self.tree[open_end].item.end
943 {
944 open_count += 1;
945 open_end = peek_ix;
946 peek = self.tree[peek_ix].next;
947 } else {
948 break;
949 }
950 }
951 }
952
953 let count_enabled = if open_count == 1 {
959 self.options.contains(Options::ENABLE_MATH_SINGLE_DOLLAR)
960 } else {
961 self.options.contains(Options::ENABLE_MATH_MULTI_DOLLAR)
962 };
963 if !count_enabled {
964 let mut text_ix = cur_ix;
965 loop {
966 self.tree[text_ix].item.body = ItemBody::Text {
967 backslash_escaped: false,
968 };
969 if text_ix == open_end {
970 break;
971 }
972 match self.tree[text_ix].next {
973 Some(next) => text_ix = next,
974 None => break,
975 }
976 }
977 prev = cur;
978 cur = self.tree[cur_ix].next;
979 continue;
980 }
981
982 let mut scan = self.tree[open_end].next;
984 let mut close_ix = None;
985 while let Some(scan_ix) = scan {
986 if matches!(self.tree[scan_ix].item.body, ItemBody::MaybeMath(..)) {
987 let mut run = 1usize;
988 let mut run_end = scan_ix;
989 let mut peek = self.tree[scan_ix].next;
990 while let Some(peek_ix) = peek {
991 if matches!(self.tree[peek_ix].item.body, ItemBody::MaybeMath(..))
992 && self.tree[peek_ix].item.start == self.tree[run_end].item.end
993 {
994 run += 1;
995 run_end = peek_ix;
996 peek = self.tree[peek_ix].next;
997 } else {
998 break;
999 }
1000 }
1001 if run == open_count {
1002 close_ix = Some(scan_ix);
1003 break;
1004 }
1005 scan = self.tree[run_end].next;
1007 continue;
1008 }
1009 scan = self.tree[scan_ix].next;
1010 }
1011
1012 if let Some(scan_ix) = close_ix {
1013 self.make_math_span(cur_ix, scan_ix);
1014 } else {
1015 let mut fail_ix = cur_ix;
1016 loop {
1017 self.tree[fail_ix].item.body = ItemBody::Text {
1018 backslash_escaped: false,
1019 };
1020 if fail_ix == open_end {
1021 break;
1022 }
1023 if let Some(next) = self.tree[fail_ix].next {
1024 fail_ix = next;
1025 } else {
1026 break;
1027 }
1028 }
1029 }
1030 }
1031 ItemBody::MaybeCode(mut search_count, preceded_by_backslash) => {
1032 if preceded_by_backslash {
1033 search_count -= 1;
1034 if search_count == 0 {
1035 self.tree[cur_ix].item.body = ItemBody::Text {
1036 backslash_escaped: true,
1037 };
1038 prev = cur;
1039 cur = self.tree[cur_ix].next;
1040 continue;
1041 }
1042 }
1043
1044 if self.code_delims.is_populated() {
1045 if let Some(scan_ix) = self.code_delims.find(cur_ix, search_count) {
1048 self.make_code_span(cur_ix, scan_ix, preceded_by_backslash);
1049 } else {
1050 self.tree[cur_ix].item.body = ItemBody::Text {
1051 backslash_escaped: preceded_by_backslash,
1052 };
1053 }
1054 } else {
1055 let mut scan = if search_count > 0 {
1058 self.tree[cur_ix].next
1059 } else {
1060 None
1061 };
1062 while let Some(scan_ix) = scan {
1063 if let ItemBody::MaybeCode(delim_count, _) =
1064 self.tree[scan_ix].item.body
1065 {
1066 if search_count == delim_count {
1067 self.make_code_span(cur_ix, scan_ix, preceded_by_backslash);
1068 self.code_delims.clear();
1069 break;
1070 } else {
1071 self.code_delims.insert(delim_count, scan_ix);
1072 }
1073 }
1074 scan = self.tree[scan_ix].next;
1075 }
1076 if scan.is_none() {
1077 self.tree[cur_ix].item.body = ItemBody::Text {
1078 backslash_escaped: preceded_by_backslash,
1079 };
1080 }
1081 }
1082 }
1083 ItemBody::MaybeLinkOpen => {
1084 self.tree[cur_ix].item.body = ItemBody::Text {
1085 backslash_escaped: false,
1086 };
1087 let link_open_doubled = self.tree[cur_ix]
1088 .next
1089 .map(|ix| self.tree[ix].item.body == ItemBody::MaybeLinkOpen)
1090 .unwrap_or(false);
1091 if self.options.contains(Options::ENABLE_WIKILINKS) && link_open_doubled {
1092 self.wikilink_stack.push(LinkStackEl {
1093 node: cur_ix,
1094 ty: LinkStackTy::Link,
1095 });
1096 }
1097 self.link_stack.push(LinkStackEl {
1098 node: cur_ix,
1099 ty: LinkStackTy::Link,
1100 });
1101 }
1102 ItemBody::MaybeImage => {
1103 self.tree[cur_ix].item.body = ItemBody::Text {
1104 backslash_escaped: false,
1105 };
1106 let link_open_doubled = self.tree[cur_ix]
1107 .next
1108 .map(|ix| self.tree[ix].item.body == ItemBody::MaybeLinkOpen)
1109 .unwrap_or(false);
1110 if self.options.contains(Options::ENABLE_WIKILINKS) && link_open_doubled {
1111 self.wikilink_stack.push(LinkStackEl {
1112 node: cur_ix,
1113 ty: LinkStackTy::Image,
1114 });
1115 }
1116 self.link_stack.push(LinkStackEl {
1117 node: cur_ix,
1118 ty: LinkStackTy::Image,
1119 });
1120 }
1121 ItemBody::MaybeLinkClose(could_be_ref) => {
1122 self.tree[cur_ix].item.body = ItemBody::Text {
1123 backslash_escaped: false,
1124 };
1125 let tos_link = self.link_stack.pop();
1126 if self.options.contains(Options::ENABLE_WIKILINKS)
1127 && self.tree[cur_ix]
1128 .next
1129 .map(|ix| {
1130 matches!(self.tree[ix].item.body, ItemBody::MaybeLinkClose(..))
1131 })
1132 .unwrap_or(false)
1133 {
1134 if let Some(node) = self.handle_wikilink(block_text, cur_ix, prev) {
1135 cur = self.tree[node].next;
1136 continue;
1137 }
1138 }
1139 if let Some(tos) = tos_link {
1140 if tos.ty != LinkStackTy::Image
1143 && matches!(
1144 self.tree[self.tree.peek_up().unwrap()].item.body,
1145 ItemBody::Link(..)
1146 )
1147 {
1148 continue;
1149 }
1150 if tos.ty == LinkStackTy::Disabled {
1151 continue;
1152 }
1153 let next = self.tree[cur_ix].next;
1154 if let Some((next_ix, url, title)) =
1155 self.scan_inline_link(block_text, self.tree[cur_ix].item.end, next)
1156 {
1157 let next_node = scan_nodes_to_ix(&self.tree, next, next_ix);
1158 if let Some(prev_ix) = prev {
1159 self.tree[prev_ix].next = None;
1160 }
1161 cur = Some(tos.node);
1162 cur_ix = tos.node;
1163 let link_ix =
1164 self.allocs
1165 .allocate_link(LinkType::Inline, url, title, "".into());
1166 self.tree[cur_ix].item.body = if tos.ty == LinkStackTy::Image {
1167 ItemBody::Image(link_ix)
1168 } else {
1169 ItemBody::Link(link_ix)
1170 };
1171 self.tree[cur_ix].child = self.tree[cur_ix].next;
1172 self.tree[cur_ix].next = next_node;
1173 self.tree[cur_ix].item.end = next_ix;
1174 if let Some(next_node_ix) = next_node {
1175 let orig_start = self.tree[next_node_ix].item.start;
1176 let new_start = max(orig_start, next_ix);
1177 self.tree[next_node_ix].item.start = new_start;
1178 if new_start > orig_start {
1187 if let ItemBody::Text { backslash_escaped } =
1188 &mut self.tree[next_node_ix].item.body
1189 {
1190 *backslash_escaped = false;
1191 }
1192 }
1193 }
1194
1195 if tos.ty == LinkStackTy::Link {
1196 self.disable_all_links();
1197 }
1198 } else {
1199 let first_bracket_start = self.tree[tos.node].item.start;
1206 let first_bracket_end = self.tree[cur_ix].item.end;
1207 let first_bracket_text =
1208 &self.text[first_bracket_start..first_bracket_end];
1209 if let Some((_, ReferenceLabel::Footnote(footlabel))) =
1210 scan_link_label(&self.tree, first_bracket_text, self.options)
1211 {
1212 if self.allocs.footdefs.contains(&footlabel) {
1213 let footref = self.allocs.allocate_cow(footlabel);
1214 if let Some(def) = self
1215 .allocs
1216 .footdefs
1217 .get_mut(self.allocs.cows[footref.0].to_owned())
1218 {
1219 def.use_count += 1;
1220 }
1221 let footnote_ix = if tos.ty == LinkStackTy::Image {
1222 self.tree[tos.node].next = Some(cur_ix);
1223 self.tree[tos.node].child = None;
1224 self.tree[tos.node].item.body =
1225 ItemBody::SynthesizeChar('!');
1226 self.tree[cur_ix].item.start =
1227 self.tree[tos.node].item.start + 1;
1228 self.tree[tos.node].item.end =
1229 self.tree[tos.node].item.start + 1;
1230 cur_ix
1231 } else {
1232 tos.node
1233 };
1234 self.tree[footnote_ix].next = next;
1235 self.tree[footnote_ix].child = None;
1236 self.tree[footnote_ix].item.body =
1237 ItemBody::FootnoteReference(footref);
1238 self.tree[footnote_ix].item.end = first_bracket_end;
1239 prev = Some(footnote_ix);
1240 cur = next;
1241 self.link_stack.clear();
1242 continue;
1243 }
1244 }
1245 let scan_result =
1248 scan_reference(&self.tree, block_text, next, self.options);
1249 let (node_after_link, link_type) = match scan_result {
1250 RefScan::LinkLabel(_, end_ix) => {
1252 let reference_close_node = if let Some(node) =
1257 scan_nodes_to_ix(&self.tree, next, end_ix - 1)
1258 {
1259 node
1260 } else {
1261 continue;
1262 };
1263 self.tree[reference_close_node].item.body =
1264 ItemBody::MaybeLinkClose(false);
1265 let next_node = self.tree[reference_close_node].next;
1266
1267 (next_node, LinkType::Reference)
1268 }
1269 RefScan::Collapsed(next_node) => {
1271 if !could_be_ref {
1274 continue;
1275 }
1276 (next_node, LinkType::Collapsed)
1277 }
1278 RefScan::UnexpectedFootnote => continue,
1285 RefScan::FailedInvalidLabel => continue,
1291 RefScan::Failed => {
1295 if !could_be_ref {
1296 continue;
1297 }
1298 (next, LinkType::Shortcut)
1299 }
1300 };
1301
1302 let label: Option<(ReferenceLabel<'input>, usize)> = match scan_result {
1307 RefScan::LinkLabel(l, end_ix) => {
1308 Some((ReferenceLabel::Link(l), end_ix))
1309 }
1310 RefScan::Collapsed(..)
1311 | RefScan::Failed
1312 | RefScan::FailedInvalidLabel
1313 | RefScan::UnexpectedFootnote => {
1314 let label_start = self.tree[tos.node].item.end - 1;
1316 let label_end = self.tree[cur_ix].item.end;
1317 scan_link_label(
1318 &self.tree,
1319 &self.text[label_start..label_end],
1320 self.options,
1321 )
1322 .map(|(ix, label)| (label, label_start + ix))
1323 .filter(|(_, end)| *end == label_end)
1324 }
1325 };
1326
1327 let id = match &label {
1328 Some(
1329 (ReferenceLabel::Link(l), _) | (ReferenceLabel::Footnote(l), _),
1330 ) => l.clone(),
1331 None => "".into(),
1332 };
1333
1334 if let Some((ReferenceLabel::Footnote(l), end)) = label {
1336 let footref = self.allocs.allocate_cow(l);
1337 if let Some(def) = self
1338 .allocs
1339 .footdefs
1340 .get_mut(self.allocs.cows[footref.0].to_owned())
1341 {
1342 def.use_count += 1;
1343 }
1344 if self.allocs.footdefs.contains(&self.allocs.cows[footref.0]) {
1345 let footnote_ix = if tos.ty == LinkStackTy::Image {
1348 self.tree[tos.node].next = Some(cur_ix);
1349 self.tree[tos.node].child = None;
1350 self.tree[tos.node].item.body =
1351 ItemBody::SynthesizeChar('!');
1352 self.tree[cur_ix].item.start =
1353 self.tree[tos.node].item.start + 1;
1354 self.tree[tos.node].item.end =
1355 self.tree[tos.node].item.start + 1;
1356 cur_ix
1357 } else {
1358 tos.node
1359 };
1360 self.tree[footnote_ix].next = next;
1364 self.tree[footnote_ix].child = None;
1365 self.tree[footnote_ix].item.body =
1366 ItemBody::FootnoteReference(footref);
1367 self.tree[footnote_ix].item.end = end;
1368 prev = Some(footnote_ix);
1369 cur = next;
1370 self.link_stack.clear();
1371 continue;
1372 }
1373 } else if let Some((ReferenceLabel::Link(link_label), end)) = label {
1374 if let Some((def_link_type, url, title)) = self
1375 .fetch_link_type_url_title(
1376 link_label,
1377 (self.tree[tos.node].item.start)..end,
1378 link_type,
1379 callbacks,
1380 )
1381 {
1382 let link_ix =
1383 self.allocs.allocate_link(def_link_type, url, title, id);
1384 self.tree[tos.node].item.body = if tos.ty == LinkStackTy::Image
1385 {
1386 ItemBody::Image(link_ix)
1387 } else {
1388 ItemBody::Link(link_ix)
1389 };
1390 let label_node = self.tree[tos.node].next;
1391
1392 self.tree[tos.node].next = node_after_link;
1395
1396 if label_node != cur {
1398 self.tree[tos.node].child = label_node;
1399
1400 if let Some(prev_ix) = prev {
1402 self.tree[prev_ix].next = None;
1403 }
1404 }
1405
1406 self.tree[tos.node].item.end = end;
1407
1408 cur = Some(tos.node);
1410 cur_ix = tos.node;
1411
1412 if tos.ty == LinkStackTy::Link {
1413 self.disable_all_links();
1414 }
1415 }
1416 }
1417 }
1418 }
1419 }
1420 _ => {}
1421 }
1422 prev = cur;
1423 cur = self.tree[cur_ix].next;
1424 }
1425 self.link_stack.clear();
1426 self.wikilink_stack.clear();
1427 self.code_delims.clear();
1428 self.math_delims.clear();
1429 }
1430
1431 fn handle_wikilink(
1437 &mut self,
1438 block_text: &'input str,
1439 cur_ix: TreeIndex,
1440 prev: Option<TreeIndex>,
1441 ) -> Option<TreeIndex> {
1442 let next_ix = self.tree[cur_ix].next.unwrap();
1443 if let Some(tos) = self.wikilink_stack.pop() {
1446 if tos.ty == LinkStackTy::Disabled {
1447 return None;
1448 }
1449 let Some(body_node) = self.tree[tos.node].next.and_then(|ix| self.tree[ix].next) else {
1451 return None;
1453 };
1454 let start_ix = self.tree[body_node].item.start;
1455 let end_ix = self.tree[cur_ix].item.start;
1456 let wikilink = match scan_wikilink_pipe(
1457 block_text,
1458 start_ix, end_ix - start_ix,
1460 ) {
1461 Some((rest, wikitext)) => {
1462 if wikitext.is_empty() {
1464 return None;
1465 }
1466 let body_node = scan_nodes_to_ix(&self.tree, Some(body_node), rest);
1468 if let Some(body_node) = body_node {
1469 self.tree[body_node].item.start = rest;
1472 Some((true, body_node, wikitext))
1473 } else {
1474 None
1475 }
1476 }
1477 None => {
1478 let wikitext = &block_text[start_ix..end_ix];
1479 if wikitext.is_empty() {
1481 return None;
1482 }
1483 let body_node = self.tree.create_node(Item {
1484 start: start_ix,
1485 end: end_ix,
1486 body: ItemBody::Text {
1487 backslash_escaped: false,
1488 },
1489 });
1490 Some((false, body_node, wikitext))
1491 }
1492 };
1493
1494 if let Some((has_pothole, body_node, wikiname)) = wikilink {
1495 let link_ix = self.allocs.allocate_link(
1496 LinkType::WikiLink { has_pothole },
1497 wikiname.into(),
1498 "".into(),
1499 "".into(),
1500 );
1501 if let Some(prev_ix) = prev {
1502 self.tree[prev_ix].next = None;
1503 }
1504 if tos.ty == LinkStackTy::Image {
1505 self.tree[tos.node].item.body = ItemBody::Image(link_ix);
1506 } else {
1507 self.tree[tos.node].item.body = ItemBody::Link(link_ix);
1508 }
1509 self.tree[tos.node].child = Some(body_node);
1510 self.tree[tos.node].next = self.tree[next_ix].next;
1511 self.tree[tos.node].item.end = end_ix + 2;
1512 self.disable_all_links();
1513 return Some(tos.node);
1514 }
1515 }
1516
1517 None
1518 }
1519
1520 fn handle_emphasis_in_scope(&mut self, start: Option<TreeIndex>) {
1521 let mut prev = None;
1522 let mut prev_ix: TreeIndex;
1523 let mut cur = start;
1524
1525 let mut single_quote_open: Option<TreeIndex> = None;
1526 let mut double_quote_open: bool = false;
1527
1528 while let Some(mut cur_ix) = cur {
1529 match self.tree[cur_ix].item.body {
1530 ItemBody::MaybeEmphasis(mut count, can_open, can_close) => {
1531 let run_length = count;
1532 let c = self.text.as_bytes()[self.tree[cur_ix].item.start];
1533 let both = can_open && can_close;
1534 if c == b'~' || c == b'^' {
1542 prev_ix = cur_ix + count - 1;
1543 prev = Some(prev_ix);
1544 cur = self.tree[prev_ix].next;
1545 continue;
1546 }
1547 if can_close {
1548 while let Some(el) =
1549 self.inline_stack
1550 .find_match(&mut self.tree, c, run_length, count, both)
1551 {
1552 if let Some(prev_ix) = prev {
1554 self.tree[prev_ix].next = None;
1555 }
1556 let match_count = min(2, min(count, el.count));
1565 let mut end = cur_ix - 1;
1567 let mut start = el.start + el.count;
1568
1569 while start > el.start + el.count - match_count {
1571 let inc = if start > el.start + el.count - match_count + 1 {
1572 2
1573 } else {
1574 1
1575 };
1576 let ty = if c == b'~' {
1577 if inc == 2 {
1578 if self.options.contains(Options::ENABLE_STRIKETHROUGH) {
1579 ItemBody::Strikethrough
1580 } else {
1581 ItemBody::Text {
1582 backslash_escaped: false,
1583 }
1584 }
1585 } else if self.options.contains(Options::ENABLE_SUBSCRIPT) {
1586 ItemBody::Subscript
1587 } else if self.options.contains(Options::ENABLE_STRIKETHROUGH) {
1588 ItemBody::Strikethrough
1589 } else {
1590 ItemBody::Text {
1591 backslash_escaped: false,
1592 }
1593 }
1594 } else if c == b'^' {
1595 if self.options.contains(Options::ENABLE_SUPERSCRIPT) {
1596 ItemBody::Superscript
1597 } else {
1598 ItemBody::Text {
1599 backslash_escaped: false,
1600 }
1601 }
1602 } else if inc == 2 {
1603 ItemBody::Strong
1604 } else {
1605 ItemBody::Emphasis
1606 };
1607
1608 let root = start - inc;
1609 end = end + inc;
1610 self.tree[root].item.body = ty;
1611 self.tree[root].item.end = self.tree[end].item.end;
1612 self.tree[root].child = Some(start);
1613 self.tree[root].next = None;
1614 start = root;
1615 }
1616
1617 prev_ix = el.start + el.count - match_count;
1619 prev = Some(prev_ix);
1620 cur = self.tree[cur_ix + match_count - 1].next;
1621 self.tree[prev_ix].next = cur;
1622
1623 if el.count > match_count {
1624 self.inline_stack.push(InlineEl {
1625 start: el.start,
1626 count: el.count - match_count,
1627 run_length: el.run_length,
1628 c: el.c,
1629 both: el.both,
1630 })
1631 }
1632 count -= match_count;
1633 if count > 0 {
1634 cur_ix = cur.unwrap();
1635 } else {
1636 break;
1637 }
1638 }
1639 }
1640 if count > 0 {
1641 if can_open {
1642 self.inline_stack.push(InlineEl {
1643 start: cur_ix,
1644 run_length,
1645 count,
1646 c,
1647 both,
1648 });
1649 } else {
1650 for i in 0..count {
1651 self.tree[cur_ix + i].item.body = ItemBody::Text {
1652 backslash_escaped: false,
1653 };
1654 }
1655 }
1656 prev_ix = cur_ix + count - 1;
1657 prev = Some(prev_ix);
1658 cur = self.tree[prev_ix].next;
1659 }
1660 }
1661 ItemBody::MaybeSmartQuote(c, can_open, can_close) => {
1662 self.tree[cur_ix].item.body = match c {
1663 b'\'' => {
1664 if let (Some(open_ix), true) = (single_quote_open, can_close) {
1665 self.tree[open_ix].item.body = ItemBody::SynthesizeChar('‘');
1666 single_quote_open = None;
1667 } else if can_open {
1668 single_quote_open = Some(cur_ix);
1669 }
1670 ItemBody::SynthesizeChar('’')
1671 }
1672 _ => {
1673 if can_close && double_quote_open {
1674 double_quote_open = false;
1675 ItemBody::SynthesizeChar('”')
1676 } else {
1677 if can_open && !double_quote_open {
1678 double_quote_open = true;
1679 }
1680 ItemBody::SynthesizeChar('“')
1681 }
1682 }
1683 };
1684 prev = cur;
1685 cur = self.tree[cur_ix].next;
1686 }
1687 ItemBody::HardBreak(true) => {
1688 if self.tree[cur_ix].next.is_none() {
1689 self.tree[cur_ix].item.body = ItemBody::SynthesizeChar('\\');
1690 }
1691 prev = cur;
1692 cur = self.tree[cur_ix].next;
1693 }
1694 _ => {
1695 prev = cur;
1696 cur = self.tree[cur_ix].next;
1697 }
1698 }
1699 }
1700 self.inline_stack.pop_all(&mut self.tree);
1701 }
1702
1703 fn handle_tildes_carets_pass(&mut self) {
1711 let start = self.tree.cur();
1712 self.resolve_tildes_carets_in_scope(start);
1713 }
1714 fn resolve_tildes_carets_in_scope(&mut self, start: Option<TreeIndex>) {
1715 let mut stack: Vec<InlineEl> = Vec::new();
1716 let mut cur = start;
1717 let mut prev: Option<TreeIndex> = None;
1718 while let Some(mut cur_ix) = cur {
1719 match self.tree[cur_ix].item.body {
1720 ItemBody::MaybeEmphasis(count, can_open, can_close) => {
1721 let c = self.text.as_bytes()[self.tree[cur_ix].item.start];
1722 if c != b'~' && c != b'^' {
1723 prev = Some(cur_ix);
1724 cur = self.tree[cur_ix].next;
1725 continue;
1726 }
1727 let run_length = count;
1728 let mut remaining = count;
1729 if can_close {
1730 while remaining > 0 {
1731 let res = stack
1732 .iter()
1733 .enumerate()
1734 .rfind(|(_, el)| el.c == c && el.run_length == run_length);
1735 let Some((matching_ix, matching_el)) = res else {
1736 break;
1737 };
1738 let matching_el = *matching_el;
1739 if let Some(prev_ix) = prev {
1740 self.tree[prev_ix].next = None;
1741 }
1742 for el in &stack[(matching_ix + 1)..] {
1745 for i in 0..el.count {
1746 self.tree[el.start + i].item.body = ItemBody::Text {
1747 backslash_escaped: false,
1748 };
1749 }
1750 }
1751 stack.truncate(matching_ix);
1752 let match_count =
1753 core::cmp::min(2, core::cmp::min(remaining, matching_el.count));
1754 let mut end = cur_ix - 1;
1755 let mut sub_start = matching_el.start + matching_el.count;
1756 while sub_start > matching_el.start + matching_el.count - match_count {
1757 let inc = if sub_start
1758 > matching_el.start + matching_el.count - match_count + 1
1759 {
1760 2
1761 } else {
1762 1
1763 };
1764 let ty = if c == b'~' {
1765 if inc == 2 {
1766 if self.options.contains(Options::ENABLE_STRIKETHROUGH) {
1767 ItemBody::Strikethrough
1768 } else {
1769 ItemBody::Text {
1770 backslash_escaped: false,
1771 }
1772 }
1773 } else if self.options.contains(Options::ENABLE_SUBSCRIPT) {
1774 ItemBody::Subscript
1775 } else if self.options.contains(Options::ENABLE_STRIKETHROUGH) {
1776 ItemBody::Strikethrough
1777 } else {
1778 ItemBody::Text {
1779 backslash_escaped: false,
1780 }
1781 }
1782 } else if self.options.contains(Options::ENABLE_SUPERSCRIPT) {
1783 ItemBody::Superscript
1784 } else {
1785 ItemBody::Text {
1786 backslash_escaped: false,
1787 }
1788 };
1789 let root = sub_start - inc;
1790 end = end + inc;
1791 self.tree[root].item.body = ty;
1792 self.tree[root].item.end = self.tree[end].item.end;
1793 self.tree[root].child = Some(sub_start);
1794 self.tree[root].next = None;
1795 sub_start = root;
1796 }
1797 let new_prev_ix = matching_el.start + matching_el.count - match_count;
1798 let new_cur = self.tree[cur_ix + match_count - 1].next;
1799 self.tree[new_prev_ix].next = new_cur;
1800 prev = Some(new_prev_ix);
1801 if matching_el.count > match_count {
1802 stack.push(InlineEl {
1803 start: matching_el.start,
1804 count: matching_el.count - match_count,
1805 run_length: matching_el.run_length,
1806 c: matching_el.c,
1807 both: matching_el.both,
1808 });
1809 }
1810 remaining -= match_count;
1811 if remaining > 0 {
1812 let Some(next_cur) = new_cur else { break };
1813 cur_ix = next_cur;
1814 } else {
1815 break;
1816 }
1817 }
1818 }
1819 if remaining > 0 {
1820 if can_open {
1821 stack.push(InlineEl {
1822 start: cur_ix,
1823 count: remaining,
1824 run_length,
1825 c,
1826 both: can_open && can_close,
1827 });
1828 } else {
1829 for i in 0..remaining {
1830 self.tree[cur_ix + i].item.body = ItemBody::Text {
1831 backslash_escaped: false,
1832 };
1833 }
1834 }
1835 let prev_ix = cur_ix + remaining - 1;
1836 prev = Some(prev_ix);
1837 cur = self.tree[prev_ix].next;
1838 } else {
1839 cur = self.tree[prev.unwrap()].next;
1840 }
1841 continue;
1842 }
1843 ItemBody::Emphasis
1844 | ItemBody::Strong
1845 | ItemBody::Strikethrough
1846 | ItemBody::Subscript
1847 | ItemBody::Superscript
1848 | ItemBody::Link(_)
1849 | ItemBody::Image(_) => {
1850 let child = self.tree[cur_ix].child;
1851 self.resolve_tildes_carets_in_scope(child);
1852 }
1853 _ => {}
1854 }
1855 prev = Some(cur_ix);
1856 cur = self.tree[cur_ix].next;
1857 }
1858 for el in stack {
1860 for i in 0..el.count {
1861 self.tree[el.start + i].item.body = ItemBody::Text {
1862 backslash_escaped: false,
1863 };
1864 }
1865 }
1866 }
1867
1868 fn disable_all_links(&mut self) {
1869 self.link_stack.disable_all_links();
1870 self.wikilink_stack.disable_all_links();
1871 }
1872
1873 fn scan_inline_link(
1875 &self,
1876 underlying: &'input str,
1877 mut ix: usize,
1878 node: Option<TreeIndex>,
1879 ) -> Option<(usize, CowStr<'input>, CowStr<'input>)> {
1880 if underlying.as_bytes().get(ix) != Some(&b'(') {
1881 return None;
1882 }
1883 ix += 1;
1884
1885 let scan_separator = |ix: &mut usize| {
1886 *ix += scan_while(&underlying.as_bytes()[*ix..], is_ascii_whitespace_no_nl);
1887 if let Some(bl) = scan_eol(&underlying.as_bytes()[*ix..]) {
1888 *ix += bl;
1889 *ix += skip_container_prefixes(
1890 &self.tree,
1891 &underlying.as_bytes()[*ix..],
1892 self.options,
1893 );
1894 }
1895 *ix += scan_while(&underlying.as_bytes()[*ix..], is_ascii_whitespace_no_nl);
1896 };
1897
1898 scan_separator(&mut ix);
1899
1900 let (dest_length, dest) = scan_link_dest(underlying, ix, LINK_MAX_NESTED_PARENS)?;
1901 let dest = unescape(dest, self.tree.is_in_table());
1902 ix += dest_length;
1903
1904 scan_separator(&mut ix);
1905
1906 let title = if let Some((bytes_scanned, t)) = self.scan_link_title(underlying, ix, node) {
1907 ix += bytes_scanned;
1908 scan_separator(&mut ix);
1909 t
1910 } else {
1911 "".into()
1912 };
1913 if underlying.as_bytes().get(ix) != Some(&b')') {
1914 return None;
1915 }
1916 ix += 1;
1917
1918 Some((ix, dest, title))
1919 }
1920
1921 fn scan_link_title(
1923 &self,
1924 text: &'input str,
1925 start_ix: usize,
1926 node: Option<TreeIndex>,
1927 ) -> Option<(usize, CowStr<'input>)> {
1928 let bytes = text.as_bytes();
1929 let open = match bytes.get(start_ix) {
1930 Some(b @ b'\'') | Some(b @ b'\"') | Some(b @ b'(') => *b,
1931 _ => return None,
1932 };
1933 let close = if open == b'(' { b')' } else { open };
1934
1935 let mut title = String::new();
1936 let mut mark = start_ix + 1;
1937 let mut i = start_ix + 1;
1938
1939 while i < bytes.len() {
1940 let c = bytes[i];
1941
1942 if c == close {
1943 let cow = if title.is_empty() {
1944 (i - start_ix + 1, text[mark..i].into())
1945 } else {
1946 title.push_str(&text[mark..i]);
1947 (i - start_ix + 1, title.into())
1948 };
1949
1950 return Some(cow);
1951 }
1952 if c == open {
1953 return None;
1954 }
1955
1956 if c == b'\n' || c == b'\r' {
1957 if let Some(node_ix) = scan_nodes_to_ix(&self.tree, node, i + 1) {
1958 if self.tree[node_ix].item.start > i {
1959 title.push_str(&text[mark..i]);
1960 title.push('\n');
1961 i = self.tree[node_ix].item.start;
1962 mark = i;
1963 continue;
1964 }
1965 }
1966 }
1967 if c == b'&' {
1968 if let (n, Some(value)) = scan_entity(&bytes[i..]) {
1969 title.push_str(&text[mark..i]);
1970 title.push_str(&value);
1971 i += n;
1972 mark = i;
1973 continue;
1974 }
1975 }
1976 if self.tree.is_in_table()
1977 && c == b'\\'
1978 && i + 2 < bytes.len()
1979 && bytes[i + 1] == b'\\'
1980 && bytes[i + 2] == b'|'
1981 {
1982 title.push_str(&text[mark..i]);
1985 i += 2;
1986 mark = i;
1987 }
1988 if c == b'\\' && i + 1 < bytes.len() && is_ascii_punctuation(bytes[i + 1]) {
1989 title.push_str(&text[mark..i]);
1990 i += 1;
1991 mark = i;
1992 }
1993
1994 i += 1;
1995 }
1996
1997 None
1998 }
1999
2000 fn make_math_span(&mut self, open: TreeIndex, close: TreeIndex) {
2001 let mut open_end = open;
2003 {
2004 let mut peek = self.tree[open].next;
2005 while let Some(peek_ix) = peek {
2006 if matches!(self.tree[peek_ix].item.body, ItemBody::MaybeMath(..))
2007 && self.tree[peek_ix].item.start == self.tree[open_end].item.end
2008 && peek_ix != close
2009 {
2010 open_end = peek_ix;
2011 peek = self.tree[peek_ix].next;
2012 } else {
2013 break;
2014 }
2015 }
2016 }
2017 let mut close_end = close;
2019 {
2020 let mut peek = self.tree[close].next;
2021 while let Some(peek_ix) = peek {
2022 if matches!(self.tree[peek_ix].item.body, ItemBody::MaybeMath(..))
2023 && self.tree[peek_ix].item.start == self.tree[close_end].item.end
2024 {
2025 close_end = peek_ix;
2026 peek = self.tree[peek_ix].next;
2027 } else {
2028 break;
2029 }
2030 }
2031 }
2032
2033 let span_start = self.tree[open_end].item.end;
2034 let span_end = self.tree[close].item.start;
2035
2036 if span_start > span_end {
2037 self.tree[open].item.body = ItemBody::Text {
2038 backslash_escaped: false,
2039 };
2040 return;
2041 }
2042
2043 let spanned_text = &self.text[span_start..span_end];
2044 let spanned_bytes = spanned_text.as_bytes();
2045 let mut buf: Option<String> = None;
2046
2047 let mut start_ix = 0;
2048 let mut ix = 0;
2049 while ix < spanned_bytes.len() {
2050 let c = spanned_bytes[ix];
2051 if c == b'\r' || c == b'\n' {
2052 ix += 1;
2053 let buf = buf.get_or_insert_with(|| String::with_capacity(spanned_bytes.len()));
2054 buf.push_str(&spanned_text[start_ix..ix]);
2055 let from = span_start + ix;
2064 let (scanned, leftover) = skip_container_prefixes_with_remaining(
2065 &self.tree,
2066 &self.text.as_bytes()[from..],
2067 self.options,
2068 );
2069 let scanned = scanned.min(spanned_bytes.len() - ix);
2070 ix += scanned;
2071 start_ix = ix;
2072 for _ in 0..leftover {
2076 buf.push(' ');
2077 }
2078 } else if c == b'\\'
2079 && spanned_bytes.get(ix + 1) == Some(&b'|')
2080 && self.tree.is_in_table()
2081 {
2082 let buf = buf.get_or_insert_with(|| String::with_capacity(spanned_bytes.len()));
2083 buf.push_str(&spanned_text[start_ix..ix]);
2084 buf.push('|');
2085 ix += 2;
2086 start_ix = ix;
2087 } else {
2088 ix += 1;
2089 }
2090 }
2091
2092 let (opening, closing, all_spaces) = {
2093 let s = if let Some(buf) = &mut buf {
2094 buf.push_str(&spanned_text[start_ix..]);
2095 &buf[..]
2096 } else {
2097 spanned_text
2098 };
2099 (
2100 matches!(s.as_bytes().first(), Some(b' ' | b'\n')),
2101 matches!(s.as_bytes().last(), Some(b' ' | b'\n')),
2102 s.bytes().all(|b| b == b' ' || b == b'\n'),
2103 )
2104 };
2105
2106 let cow: CowStr<'input> = if !all_spaces && opening && closing {
2107 if let Some(mut buf) = buf {
2108 if !buf.is_empty() {
2109 buf.remove(0);
2110 buf.pop();
2111 }
2112 buf.into()
2113 } else {
2114 spanned_text[1..(spanned_text.len() - 1).max(1)].into()
2115 }
2116 } else if let Some(buf) = buf {
2117 buf.into()
2118 } else {
2119 spanned_text.into()
2120 };
2121
2122 self.tree[open].item.body = ItemBody::Math(self.allocs.allocate_cow(cow), false);
2123 self.tree[open].item.end = self.tree[close_end].item.end;
2124 self.tree[open].next = self.tree[close_end].next;
2125 }
2126
2127 fn make_code_span(&mut self, open: TreeIndex, close: TreeIndex, preceding_backslash: bool) {
2131 let span_start = self.tree[open].item.end;
2132 let span_end = self.tree[close].item.start;
2133 let mut buf: Option<String> = None;
2134
2135 let spanned_text = &self.text[span_start..span_end];
2136 let spanned_bytes = spanned_text.as_bytes();
2137 let mut start_ix = 0;
2138 let mut ix = 0;
2139 while ix < spanned_bytes.len() {
2140 let c = spanned_bytes[ix];
2141 if c == b'\r' || c == b'\n' {
2142 let buf = buf.get_or_insert_with(|| String::with_capacity(spanned_bytes.len()));
2143 buf.push_str(&spanned_text[start_ix..ix]);
2144 buf.push('\n');
2145 ix += 1;
2146 if c == b'\r' && spanned_bytes.get(ix) == Some(&b'\n') {
2147 ix += 1;
2148 }
2149 let from = span_start + ix;
2158 let (scanned, leftover) = skip_container_prefixes_with_remaining(
2159 &self.tree,
2160 &self.text.as_bytes()[from..],
2161 self.options,
2162 );
2163 let scanned = scanned.min(spanned_bytes.len() - ix);
2164 ix += scanned;
2165 start_ix = ix;
2166 for _ in 0..leftover {
2170 buf.push(' ');
2171 }
2172 } else if c == b'\\'
2173 && spanned_bytes.get(ix + 1) == Some(&b'|')
2174 && self.tree.is_in_table()
2175 {
2176 let buf = buf.get_or_insert_with(|| String::with_capacity(spanned_bytes.len()));
2177 buf.push_str(&spanned_text[start_ix..ix]);
2178 buf.push('|');
2179 ix += 2;
2180 start_ix = ix;
2181 } else {
2182 ix += 1;
2183 }
2184 }
2185
2186 let (opening, closing, all_spaces) = {
2187 let s = if let Some(buf) = &mut buf {
2188 buf.push_str(&spanned_text[start_ix..]);
2189 &buf[..]
2190 } else {
2191 spanned_text
2192 };
2193 (
2194 matches!(s.as_bytes().first(), Some(b' ' | b'\n')),
2195 matches!(s.as_bytes().last(), Some(b' ' | b'\n')),
2196 s.bytes().all(|b| b == b' ' || b == b'\n'),
2197 )
2198 };
2199
2200 let cow: CowStr<'input> = if !all_spaces && opening && closing {
2201 if let Some(mut buf) = buf {
2202 if !buf.is_empty() {
2203 buf.remove(0);
2204 buf.pop();
2205 }
2206 buf.into()
2207 } else {
2208 spanned_text[1..(spanned_text.len() - 1).max(1)].into()
2209 }
2210 } else if let Some(buf) = buf {
2211 buf.into()
2212 } else {
2213 spanned_text.into()
2214 };
2215
2216 if preceding_backslash {
2217 self.tree[open].item.body = ItemBody::Text {
2218 backslash_escaped: true,
2219 };
2220 self.tree[open].item.end = self.tree[open].item.start + 1;
2221 self.tree[open].next = Some(close);
2222 self.tree[close].item.body = ItemBody::Code(self.allocs.allocate_cow(cow));
2223 self.tree[close].item.start = self.tree[open].item.start + 1;
2224 } else {
2225 self.tree[open].item.body = ItemBody::Code(self.allocs.allocate_cow(cow));
2226 self.tree[open].item.end = self.tree[close].item.end;
2227 self.tree[open].next = self.tree[close].next;
2228 }
2229
2230 if !self.mdx_errors.is_empty() {
2233 self.mdx_errors
2234 .retain(|(offset, _)| *offset < span_start || *offset >= span_end);
2235 }
2236 }
2237
2238 fn scan_inline_html(&mut self, bytes: &[u8], ix: usize) -> Option<(Vec<u8>, usize)> {
2242 let c = *bytes.get(ix)?;
2243 if c == b'!' {
2244 Some((
2245 vec![],
2246 scan_inline_html_comment(bytes, ix + 1, &mut self.html_scan_guard)?,
2247 ))
2248 } else if c == b'?' {
2249 Some((
2250 vec![],
2251 scan_inline_html_processing(bytes, ix + 1, &mut self.html_scan_guard)?,
2252 ))
2253 } else {
2254 let (span, i) = scan_html_block_inner(
2255 &bytes[(ix - 1)..],
2257 Some(&|bytes| skip_container_prefixes(&self.tree, bytes, self.options)),
2258 )?;
2259 Some((span, i + ix - 1))
2260 }
2261 }
2262}
2263
2264pub(crate) fn scan_containers(
2266 tree: &Tree<Item>,
2267 line_start: &mut LineStart<'_>,
2268 options: Options,
2269) -> usize {
2270 let mut i = 0;
2271 for &node_ix in tree.walk_spine() {
2272 match tree[node_ix].item.body {
2273 ItemBody::BlockQuote(..) => {
2274 let save = line_start.clone();
2275 if options.contains(Options::ENABLE_MDX) {
2280 line_start.scan_all_space();
2281 } else {
2282 let _ = line_start.scan_space(3);
2283 }
2284 if !line_start.scan_blockquote_marker() {
2285 *line_start = save;
2286 break;
2287 }
2288 }
2289 ItemBody::ListItem(indent, _) => {
2290 let save = line_start.clone();
2291 if !line_start.scan_space(indent) && !line_start.is_at_eol() {
2292 *line_start = save;
2293 break;
2294 }
2295 }
2296 ItemBody::DefinitionListDefinition(indent) => {
2297 let save = line_start.clone();
2298 if !line_start.scan_space(indent) && !line_start.is_at_eol() {
2299 *line_start = save;
2300 break;
2301 }
2302 }
2303 ItemBody::FootnoteDefinition(..) if options.contains(Options::ENABLE_FOOTNOTES) => {
2304 let save = line_start.clone();
2305 if !line_start.scan_space(4) && !line_start.is_at_eol() {
2306 *line_start = save;
2307 break;
2308 }
2309 }
2310 _ => (),
2311 }
2312 i += 1;
2313 }
2314 i
2315}
2316
2317pub(crate) fn skip_container_prefixes(tree: &Tree<Item>, bytes: &[u8], options: Options) -> usize {
2318 let mut line_start = LineStart::new(bytes);
2319 let _ = scan_containers(tree, &mut line_start, options);
2320 line_start.bytes_scanned()
2321}
2322
2323fn skip_container_prefixes_with_remaining(
2330 tree: &Tree<Item>,
2331 bytes: &[u8],
2332 options: Options,
2333) -> (usize, usize) {
2334 let mut line_start = LineStart::new(bytes);
2335 let _ = scan_containers(tree, &mut line_start, options);
2336 (line_start.bytes_scanned(), line_start.remaining_space())
2337}
2338
2339impl Tree<Item> {
2340 pub(crate) fn append_text(&mut self, start: usize, end: usize, backslash_escaped: bool) {
2341 if end > start {
2342 if let Some(ix) = self.cur() {
2343 if matches!(self[ix].item.body, ItemBody::Text { .. }) && self[ix].item.end == start
2344 {
2345 self[ix].item.end = end;
2346 return;
2347 }
2348 }
2349 self.append(Item {
2350 start,
2351 end,
2352 body: ItemBody::Text { backslash_escaped },
2353 });
2354 }
2355 }
2356 pub(crate) fn is_in_table(&self) -> bool {
2363 fn might_be_in_table(item: &Item) -> bool {
2364 item.body.is_inline()
2365 || matches!(item.body, |ItemBody::TableHead| ItemBody::TableRow
2366 | ItemBody::TableCell)
2367 }
2368 for &ix in self.walk_spine().rev() {
2369 if matches!(self[ix].item.body, ItemBody::Table(_)) {
2370 return true;
2371 }
2372 if !might_be_in_table(&self[ix].item) {
2373 return false;
2374 }
2375 }
2376 false
2377 }
2378}
2379
2380#[derive(Copy, Clone, Debug)]
2381struct InlineEl {
2382 start: TreeIndex,
2384 count: usize,
2386 run_length: usize,
2388 c: u8,
2390 both: bool,
2392}
2393
2394#[derive(Debug, Clone, Default)]
2395struct InlineStack {
2396 stack: Vec<InlineEl>,
2397 lower_bounds: [usize; 10],
2402}
2403
2404impl InlineStack {
2405 const UNDERSCORE_NOT_BOTH: usize = 0;
2409 const ASTERISK_NOT_BOTH: usize = 1;
2410 const ASTERISK_BASE: usize = 2;
2411 const TILDES: usize = 5;
2412 const UNDERSCORE_BASE: usize = 6;
2413 const CIRCUMFLEXES: usize = 9;
2414
2415 fn pop_all(&mut self, tree: &mut Tree<Item>) {
2416 for el in self.stack.drain(..) {
2417 for i in 0..el.count {
2418 tree[el.start + i].item.body = ItemBody::Text {
2419 backslash_escaped: false,
2420 };
2421 }
2422 }
2423 self.lower_bounds = [0; 10];
2424 }
2425
2426 fn get_lowerbound(&self, c: u8, count: usize, both: bool) -> usize {
2427 if c == b'_' {
2428 let mod3_lower = self.lower_bounds[InlineStack::UNDERSCORE_BASE + count % 3];
2429 if both {
2430 mod3_lower
2431 } else {
2432 min(
2433 mod3_lower,
2434 self.lower_bounds[InlineStack::UNDERSCORE_NOT_BOTH],
2435 )
2436 }
2437 } else if c == b'*' {
2438 let mod3_lower = self.lower_bounds[InlineStack::ASTERISK_BASE + count % 3];
2439 if both {
2440 mod3_lower
2441 } else {
2442 min(
2443 mod3_lower,
2444 self.lower_bounds[InlineStack::ASTERISK_NOT_BOTH],
2445 )
2446 }
2447 } else if c == b'^' {
2448 self.lower_bounds[InlineStack::CIRCUMFLEXES]
2449 } else {
2450 self.lower_bounds[InlineStack::TILDES]
2451 }
2452 }
2453
2454 fn set_lowerbound(&mut self, c: u8, count: usize, both: bool, new_bound: usize) {
2455 if c == b'_' {
2456 if both {
2457 self.lower_bounds[InlineStack::UNDERSCORE_BASE + count % 3] = new_bound;
2458 } else {
2459 self.lower_bounds[InlineStack::UNDERSCORE_NOT_BOTH] = new_bound;
2460 }
2461 } else if c == b'*' {
2462 self.lower_bounds[InlineStack::ASTERISK_BASE + count % 3] = new_bound;
2463 if !both {
2464 self.lower_bounds[InlineStack::ASTERISK_NOT_BOTH] = new_bound;
2465 }
2466 } else if c == b'^' {
2467 self.lower_bounds[InlineStack::CIRCUMFLEXES] = new_bound;
2468 } else {
2469 self.lower_bounds[InlineStack::TILDES] = new_bound;
2470 }
2471 }
2472
2473 fn truncate(&mut self, new_bound: usize) {
2474 self.stack.truncate(new_bound);
2475 for lower_bound in &mut self.lower_bounds {
2476 if *lower_bound > new_bound {
2477 *lower_bound = new_bound;
2478 }
2479 }
2480 }
2481
2482 fn find_match(
2495 &mut self,
2496 tree: &mut Tree<Item>,
2497 c: u8,
2498 run_length: usize,
2499 current_count: usize,
2500 both: bool,
2501 ) -> Option<InlineEl> {
2502 let lowerbound = min(
2512 self.stack.len(),
2513 self.get_lowerbound(c, current_count, both),
2514 );
2515 let res = self.stack[lowerbound..]
2516 .iter()
2517 .cloned()
2518 .enumerate()
2519 .rfind(|(_, el)| {
2520 if (c == b'~' || c == b'^') && run_length != el.run_length {
2521 return false;
2522 }
2523 el.c == c
2528 && (!both && !el.both
2529 || !(current_count + el.count).is_multiple_of(3)
2530 || current_count.is_multiple_of(3))
2531 });
2532
2533 if let Some((matching_ix, matching_el)) = res {
2534 let matching_ix = matching_ix + lowerbound;
2535 for el in &self.stack[(matching_ix + 1)..] {
2536 for i in 0..el.count {
2537 tree[el.start + i].item.body = ItemBody::Text {
2538 backslash_escaped: false,
2539 };
2540 }
2541 }
2542 self.truncate(matching_ix);
2543 Some(matching_el)
2544 } else {
2545 if c != b'~' && c != b'^' {
2555 self.set_lowerbound(c, current_count, both, self.stack.len());
2556 }
2557 None
2558 }
2559 }
2560
2561 fn trim_lower_bound(&mut self, ix: usize) {
2562 self.lower_bounds[ix] = self.lower_bounds[ix].min(self.stack.len());
2563 }
2564
2565 fn push(&mut self, el: InlineEl) {
2566 if el.c == b'~' {
2567 self.trim_lower_bound(InlineStack::TILDES);
2568 } else if el.c == b'^' {
2569 self.trim_lower_bound(InlineStack::CIRCUMFLEXES);
2570 }
2571 self.stack.push(el)
2572 }
2573}
2574
2575#[derive(Debug, Clone)]
2576enum RefScan<'a> {
2577 LinkLabel(CowStr<'a>, usize),
2579 Collapsed(Option<TreeIndex>),
2581 UnexpectedFootnote,
2582 Failed,
2583 FailedInvalidLabel,
2588}
2589
2590fn scan_nodes_to_ix(
2593 tree: &Tree<Item>,
2594 mut node: Option<TreeIndex>,
2595 ix: usize,
2596) -> Option<TreeIndex> {
2597 while let Some(node_ix) = node {
2598 if tree[node_ix].item.end <= ix {
2599 node = tree[node_ix].next;
2600 } else {
2601 break;
2602 }
2603 }
2604 node
2605}
2606
2607fn scan_link_label<'text>(
2610 tree: &Tree<Item>,
2611 text: &'text str,
2612 options: Options,
2613) -> Option<(usize, ReferenceLabel<'text>)> {
2614 let bytes = text.as_bytes();
2615 if bytes.len() < 2 || bytes[0] != b'[' {
2616 return None;
2617 }
2618 let linebreak_handler = |bytes: &[u8]| Some(skip_container_prefixes(tree, bytes, options));
2619 if options.contains(Options::ENABLE_FOOTNOTES)
2620 && b'^' == bytes[1]
2621 && bytes.get(2) != Some(&b']')
2622 {
2623 let linebreak_handler: &dyn Fn(&[u8]) -> Option<usize> = &|_| None;
2625 if let Some((byte_index, cow)) =
2626 scan_link_label_rest(&text[2..], linebreak_handler, tree.is_in_table())
2627 {
2628 return Some((byte_index + 2, ReferenceLabel::Footnote(cow)));
2629 }
2630 }
2631 let (byte_index, cow) =
2632 scan_link_label_rest(&text[1..], &linebreak_handler, tree.is_in_table())?;
2633 Some((byte_index + 1, ReferenceLabel::Link(cow)))
2634}
2635
2636fn scan_reference<'b>(
2637 tree: &Tree<Item>,
2638 text: &'b str,
2639 cur: Option<TreeIndex>,
2640 options: Options,
2641) -> RefScan<'b> {
2642 let cur_ix = match cur {
2643 None => return RefScan::Failed,
2644 Some(cur_ix) => cur_ix,
2645 };
2646 let start = tree[cur_ix].item.start;
2647 let tail = &text.as_bytes()[start..];
2648
2649 if tail.first() == Some(&b'[') && start > 0 {
2656 let src = text.as_bytes();
2657 let mut backslashes = 0usize;
2658 let mut j = start;
2659 while j > 0 && src[j - 1] == b'\\' {
2660 backslashes += 1;
2661 j -= 1;
2662 }
2663 if backslashes % 2 == 1 {
2664 return RefScan::Failed;
2665 }
2666 }
2667
2668 if tail.starts_with(b"[]") {
2669 let Some(closing_node) = tree[cur_ix].next else {
2674 return RefScan::Failed;
2675 };
2676 RefScan::Collapsed(tree[closing_node].next)
2677 } else {
2678 let label = scan_link_label(tree, &text[start..], options);
2679 match label {
2680 Some((ix, ReferenceLabel::Link(label))) => RefScan::LinkLabel(label, start + ix),
2681 Some((_ix, ReferenceLabel::Footnote(_label))) => RefScan::UnexpectedFootnote,
2682 None => {
2683 if tail.starts_with(b"[") {
2688 RefScan::FailedInvalidLabel
2689 } else {
2690 RefScan::Failed
2691 }
2692 }
2693 }
2694 }
2695}
2696
2697#[derive(Clone, Default)]
2698struct LinkStack {
2699 inner: Vec<LinkStackEl>,
2700 disabled_ix: usize,
2701}
2702
2703impl LinkStack {
2704 fn push(&mut self, el: LinkStackEl) {
2705 self.inner.push(el);
2706 }
2707
2708 fn pop(&mut self) -> Option<LinkStackEl> {
2709 let el = self.inner.pop();
2710 self.disabled_ix = core::cmp::min(self.disabled_ix, self.inner.len());
2711 el
2712 }
2713
2714 fn clear(&mut self) {
2715 self.inner.clear();
2716 self.disabled_ix = 0;
2717 }
2718
2719 fn disable_all_links(&mut self) {
2720 for el in &mut self.inner[self.disabled_ix..] {
2721 if el.ty == LinkStackTy::Link {
2722 el.ty = LinkStackTy::Disabled;
2723 }
2724 }
2725 self.disabled_ix = self.inner.len();
2726 }
2727}
2728
2729#[derive(Clone, Debug)]
2730struct LinkStackEl {
2731 node: TreeIndex,
2732 ty: LinkStackTy,
2733}
2734
2735#[derive(PartialEq, Clone, Debug)]
2736enum LinkStackTy {
2737 Link,
2738 Image,
2739 Disabled,
2740}
2741
2742#[derive(Clone, Debug)]
2744pub struct LinkDef<'a> {
2745 pub dest: CowStr<'a>,
2746 pub title: Option<CowStr<'a>>,
2747 pub span: Range<usize>,
2748}
2749
2750impl<'a> LinkDef<'a> {
2751 pub fn into_static(self) -> LinkDef<'static> {
2752 LinkDef {
2753 dest: self.dest.into_static(),
2754 title: self.title.map(|s| s.into_static()),
2755 span: self.span,
2756 }
2757 }
2758}
2759
2760#[derive(Clone, Debug)]
2762pub struct FootnoteDef {
2763 pub use_count: usize,
2764}
2765
2766struct CodeDelims {
2769 inner: FxHashMap<usize, VecDeque<TreeIndex>>,
2770 seen_first: bool,
2771}
2772
2773impl CodeDelims {
2774 fn new() -> Self {
2775 Self {
2776 inner: Default::default(),
2777 seen_first: false,
2778 }
2779 }
2780
2781 fn insert(&mut self, count: usize, ix: TreeIndex) {
2782 if self.seen_first {
2783 self.inner.entry(count).or_default().push_back(ix);
2784 } else {
2785 self.seen_first = true;
2788 }
2789 }
2790
2791 fn is_populated(&self) -> bool {
2792 !self.inner.is_empty()
2793 }
2794
2795 fn find(&mut self, open_ix: TreeIndex, count: usize) -> Option<TreeIndex> {
2796 while let Some(ix) = self.inner.get_mut(&count)?.pop_front() {
2797 if ix > open_ix {
2798 return Some(ix);
2799 }
2800 }
2801 None
2802 }
2803
2804 fn clear(&mut self) {
2805 self.inner.clear();
2806 self.seen_first = false;
2807 }
2808}
2809
2810struct MathDelims {
2813 inner: FxHashMap<u8, VecDeque<(TreeIndex, bool, bool)>>,
2814}
2815
2816impl MathDelims {
2817 fn new() -> Self {
2818 Self {
2819 inner: Default::default(),
2820 }
2821 }
2822
2823 fn clear(&mut self) {
2824 self.inner.clear();
2825 }
2826}
2827
2828#[derive(Copy, Clone, PartialEq, Eq, Debug)]
2829pub(crate) struct LinkIndex(usize);
2830
2831#[derive(Copy, Clone, PartialEq, Eq, Debug)]
2832pub(crate) struct CowIndex(usize);
2833
2834#[derive(Copy, Clone, PartialEq, Eq, Debug)]
2835pub(crate) struct AlignmentIndex(usize);
2836
2837#[derive(Copy, Clone, PartialEq, Eq, Debug)]
2838pub(crate) struct HeadingIndex(NonZeroUsize);
2839
2840#[cfg(feature = "mdx")]
2841#[derive(Copy, Clone, PartialEq, Eq, Debug)]
2842pub(crate) struct JsxElementIndex(usize);
2843
2844#[derive(Copy, Clone, PartialEq, Eq, Debug)]
2845pub(crate) struct DirectiveIndex(usize);
2846
2847#[cfg(feature = "mdx")]
2849#[derive(Debug, Clone)]
2850pub(crate) enum JsxAttr<'a> {
2851 Boolean(CowStr<'a>),
2852 Literal(CowStr<'a>, CowStr<'a>),
2853 Expression(CowStr<'a>, CowStr<'a>),
2854 Spread(CowStr<'a>),
2855}
2856
2857#[cfg(feature = "mdx")]
2858impl<'a> JsxAttr<'a> {
2859 pub fn into_static(self) -> JsxAttr<'static> {
2860 match self {
2861 JsxAttr::Boolean(n) => JsxAttr::Boolean(n.into_static()),
2862 JsxAttr::Literal(n, v) => JsxAttr::Literal(n.into_static(), v.into_static()),
2863 JsxAttr::Expression(n, v) => JsxAttr::Expression(n.into_static(), v.into_static()),
2864 JsxAttr::Spread(v) => JsxAttr::Spread(v.into_static()),
2865 }
2866 }
2867}
2868
2869#[cfg(feature = "mdx")]
2871#[derive(Debug, Clone)]
2872pub(crate) struct JsxElementData<'a> {
2873 pub name: CowStr<'a>,
2874 pub attrs: Vec<JsxAttr<'a>>,
2875 pub raw: CowStr<'a>,
2876 pub is_closing: bool,
2877 pub is_self_closing: bool,
2878}
2879
2880#[cfg(feature = "mdx")]
2881impl<'a> JsxElementData<'a> {
2882 pub fn into_static(self) -> JsxElementData<'static> {
2883 JsxElementData {
2884 name: self.name.into_static(),
2885 attrs: self.attrs.into_iter().map(|a| a.into_static()).collect(),
2886 raw: self.raw.into_static(),
2887 is_closing: self.is_closing,
2888 is_self_closing: self.is_self_closing,
2889 }
2890 }
2891}
2892
2893#[derive(Debug, Clone)]
2894pub(crate) struct DirectiveAttrData<'a> {
2895 pub name: CowStr<'a>,
2896 pub attributes: Vec<(CowStr<'a>, CowStr<'a>)>,
2897 pub label_start: usize,
2898 pub label_end: usize,
2899 pub initial_size: u8,
2905}
2906
2907#[derive(Clone)]
2908pub(crate) struct Allocations<'a> {
2909 pub refdefs: RefDefs<'a>,
2910 pub refdefs_all: Vec<(LinkLabel<'a>, LinkDef<'a>)>,
2915 pub footdefs: FootnoteDefs<'a>,
2916 links: Vec<(LinkType, CowStr<'a>, CowStr<'a>, CowStr<'a>)>,
2917 cows: Vec<CowStr<'a>>,
2918 alignments: Vec<Vec<Alignment>>,
2919 headings: Vec<HeadingAttributes<'a>>,
2920 #[cfg(feature = "mdx")]
2921 jsx_elements: Vec<JsxElementData<'a>>,
2922 directives: Vec<DirectiveAttrData<'a>>,
2923}
2924
2925#[derive(Clone)]
2927pub(crate) struct HeadingAttributes<'a> {
2928 pub id: Option<CowStr<'a>>,
2929 pub classes: Vec<CowStr<'a>>,
2930 pub attrs: Vec<(CowStr<'a>, Option<CowStr<'a>>)>,
2931}
2932
2933#[derive(Clone, Default, Debug)]
2935pub struct RefDefs<'input>(pub(crate) FxHashMap<LinkLabel<'input>, LinkDef<'input>>);
2936
2937#[derive(Clone, Default, Debug)]
2939pub struct FootnoteDefs<'input>(pub(crate) FxHashMap<FootnoteLabel<'input>, FootnoteDef>);
2940
2941impl<'input, 'b, 's> RefDefs<'input>
2942where
2943 's: 'b,
2944{
2945 pub fn get(&'s self, key: &'b str) -> Option<&'b LinkDef<'input>> {
2947 self.0.get(&UniCase::new(key.into()))
2948 }
2949
2950 pub fn iter(&'s self) -> impl Iterator<Item = (&'s str, &'s LinkDef<'input>)> {
2952 self.0.iter().map(|(k, v)| (k.as_ref(), v))
2953 }
2954}
2955
2956impl<'input, 'b, 's> FootnoteDefs<'input>
2957where
2958 's: 'b,
2959{
2960 pub fn contains(&'s self, key: &'b str) -> bool {
2962 self.0.contains_key(&UniCase::new(key.into()))
2963 }
2964 pub fn get_mut(&'s mut self, key: CowStr<'input>) -> Option<&'s mut FootnoteDef> {
2966 self.0.get_mut(&UniCase::new(key))
2967 }
2968}
2969
2970impl<'a> Allocations<'a> {
2971 pub fn new() -> Self {
2972 Self {
2973 refdefs: RefDefs::default(),
2974 refdefs_all: Vec::new(),
2975 footdefs: FootnoteDefs::default(),
2976 links: Vec::with_capacity(128),
2977 cows: Vec::new(),
2978 alignments: Vec::new(),
2979 headings: Vec::new(),
2980 #[cfg(feature = "mdx")]
2981 jsx_elements: Vec::new(),
2982 directives: Vec::new(),
2983 }
2984 }
2985
2986 pub fn allocate_cow(&mut self, cow: CowStr<'a>) -> CowIndex {
2987 let ix = self.cows.len();
2988 self.cows.push(cow);
2989 CowIndex(ix)
2990 }
2991
2992 pub fn allocate_link(
2993 &mut self,
2994 ty: LinkType,
2995 url: CowStr<'a>,
2996 title: CowStr<'a>,
2997 id: CowStr<'a>,
2998 ) -> LinkIndex {
2999 let ix = self.links.len();
3000 self.links.push((ty, url, title, id));
3001 LinkIndex(ix)
3002 }
3003
3004 pub fn allocate_alignment(&mut self, alignment: Vec<Alignment>) -> AlignmentIndex {
3005 let ix = self.alignments.len();
3006 self.alignments.push(alignment);
3007 AlignmentIndex(ix)
3008 }
3009
3010 pub fn allocate_heading(&mut self, attrs: HeadingAttributes<'a>) -> HeadingIndex {
3011 let ix = self.headings.len();
3012 self.headings.push(attrs);
3013 let ix_nonzero = NonZeroUsize::new(ix.wrapping_add(1)).expect("too many headings");
3016 HeadingIndex(ix_nonzero)
3017 }
3018
3019 pub fn take_cow(&mut self, ix: CowIndex) -> CowStr<'a> {
3020 core::mem::replace(&mut self.cows[ix.0], "".into())
3021 }
3022
3023 pub fn take_link(&mut self, ix: LinkIndex) -> (LinkType, CowStr<'a>, CowStr<'a>, CowStr<'a>) {
3024 let default_link = (LinkType::ShortcutUnknown, "".into(), "".into(), "".into());
3025 core::mem::replace(&mut self.links[ix.0], default_link)
3026 }
3027
3028 pub fn take_alignment(&mut self, ix: AlignmentIndex) -> Vec<Alignment> {
3029 core::mem::take(&mut self.alignments[ix.0])
3030 }
3031
3032 #[cfg(feature = "mdx")]
3033 pub fn allocate_jsx_element(&mut self, data: JsxElementData<'a>) -> JsxElementIndex {
3034 let ix = self.jsx_elements.len();
3035 self.jsx_elements.push(data);
3036 JsxElementIndex(ix)
3037 }
3038
3039 pub fn allocate_directive(&mut self, data: DirectiveAttrData<'a>) -> DirectiveIndex {
3040 let ix = self.directives.len();
3041 self.directives.push(data);
3042 DirectiveIndex(ix)
3043 }
3044
3045 pub fn take_directive(&mut self, ix: DirectiveIndex) -> DirectiveAttrData<'a> {
3046 core::mem::replace(
3047 &mut self.directives[ix.0],
3048 DirectiveAttrData {
3049 name: "".into(),
3050 attributes: Vec::new(),
3051 label_start: 0,
3052 label_end: 0,
3053 initial_size: 0,
3054 },
3055 )
3056 }
3057
3058 pub fn directive_ref(&self, ix: DirectiveIndex) -> &DirectiveAttrData<'a> {
3059 &self.directives[ix.0]
3060 }
3061
3062 #[cfg(feature = "mdx")]
3063 pub fn take_jsx_element(&mut self, ix: JsxElementIndex) -> JsxElementData<'a> {
3064 core::mem::replace(
3065 &mut self.jsx_elements[ix.0],
3066 JsxElementData {
3067 name: "".into(),
3068 attrs: Vec::new(),
3069 raw: "".into(),
3070 is_closing: false,
3071 is_self_closing: false,
3072 },
3073 )
3074 }
3075}
3076
3077impl<'a> Index<CowIndex> for Allocations<'a> {
3078 type Output = CowStr<'a>;
3079
3080 fn index(&self, ix: CowIndex) -> &Self::Output {
3081 self.cows.index(ix.0)
3082 }
3083}
3084
3085impl<'a> Index<LinkIndex> for Allocations<'a> {
3086 type Output = (LinkType, CowStr<'a>, CowStr<'a>, CowStr<'a>);
3087
3088 fn index(&self, ix: LinkIndex) -> &Self::Output {
3089 self.links.index(ix.0)
3090 }
3091}
3092
3093impl<'a> Index<AlignmentIndex> for Allocations<'a> {
3094 type Output = Vec<Alignment>;
3095
3096 fn index(&self, ix: AlignmentIndex) -> &Self::Output {
3097 self.alignments.index(ix.0)
3098 }
3099}
3100
3101impl<'a> Index<HeadingIndex> for Allocations<'a> {
3102 type Output = HeadingAttributes<'a>;
3103
3104 fn index(&self, ix: HeadingIndex) -> &Self::Output {
3105 self.headings.index(ix.0.get() - 1)
3106 }
3107}
3108
3109#[derive(Clone, Default)]
3115pub(crate) struct HtmlScanGuard {
3116 pub cdata: usize,
3117 pub processing: usize,
3118 pub declaration: usize,
3119 pub comment: usize,
3120}
3121
3122pub trait ParserCallbacks<'input> {
3126 fn handle_broken_link(
3134 &mut self,
3135 #[allow(unused_variables)] link: BrokenLink<'input>,
3136 ) -> Option<(CowStr<'input>, CowStr<'input>)> {
3137 None
3138 }
3139}
3140
3141#[allow(missing_debug_implementations)]
3145pub struct BrokenLinkCallback<F>(Option<F>);
3146
3147impl<'input, F> ParserCallbacks<'input> for BrokenLinkCallback<F>
3148where
3149 F: FnMut(BrokenLink<'input>) -> Option<(CowStr<'input>, CowStr<'input>)>,
3150{
3151 fn handle_broken_link(
3152 &mut self,
3153 link: BrokenLink<'input>,
3154 ) -> Option<(CowStr<'input>, CowStr<'input>)> {
3155 self.0.as_mut().and_then(|cb| cb(link))
3156 }
3157}
3158
3159impl<'input> ParserCallbacks<'input> for Box<dyn ParserCallbacks<'input>> {
3160 fn handle_broken_link(
3161 &mut self,
3162 link: BrokenLink<'input>,
3163 ) -> Option<(CowStr<'input>, CowStr<'input>)> {
3164 (**self).handle_broken_link(link)
3165 }
3166}
3167
3168#[allow(missing_debug_implementations)]
3172pub struct DefaultParserCallbacks;
3173
3174impl<'input> ParserCallbacks<'input> for DefaultParserCallbacks {}
3175
3176#[derive(Debug)]
3184pub struct OffsetIter<'a, CB> {
3185 parser: Parser<'a, CB>,
3186}
3187
3188impl<'a, CB: ParserCallbacks<'a>> OffsetIter<'a, CB> {
3189 pub fn reference_definitions(&self) -> &RefDefs<'_> {
3191 self.parser.reference_definitions()
3192 }
3193
3194 pub fn mdx_errors(&self) -> &[(usize, String)] {
3196 self.parser.mdx_errors()
3197 }
3198}
3199
3200impl<'a, CB: ParserCallbacks<'a>> Iterator for OffsetIter<'a, CB> {
3201 type Item = (Event<'a>, Range<usize>);
3202
3203 fn next(&mut self) -> Option<Self::Item> {
3204 self.parser
3205 .inner
3206 .next_event_range(&mut self.parser.callbacks)
3207 }
3208}
3209
3210impl<'a, CB: ParserCallbacks<'a>> Iterator for Parser<'a, CB> {
3211 type Item = Event<'a>;
3212
3213 fn next(&mut self) -> Option<Event<'a>> {
3214 self.inner
3215 .next_event_range(&mut self.callbacks)
3216 .map(|(event, _range)| event)
3217 }
3218}
3219
3220impl<'a, CB: ParserCallbacks<'a>> FusedIterator for Parser<'a, CB> {}
3221
3222impl<'input> ParserInner<'input> {
3223 fn next_event_range(
3224 &mut self,
3225 callbacks: &mut dyn ParserCallbacks<'input>,
3226 ) -> Option<(Event<'input>, Range<usize>)> {
3227 match self.tree.cur() {
3228 None => {
3229 let ix = self.tree.pop()?;
3230 let ix = if matches!(self.tree[ix].item.body, ItemBody::TightParagraph) {
3231 self.tree.next_sibling(ix);
3233 return self.next_event_range(callbacks);
3234 } else {
3235 ix
3236 };
3237 let tag_end = body_to_tag_end(&self.tree[ix].item.body);
3238 self.tree.next_sibling(ix);
3239 let span = self.tree[ix].item.start..self.tree[ix].item.end;
3240 debug_assert!(span.start <= span.end);
3241 Some((Event::End(tag_end), span))
3242 }
3243 Some(cur_ix) => {
3244 let cur_ix = if matches!(self.tree[cur_ix].item.body, ItemBody::TightParagraph) {
3245 self.tree.push();
3247 self.tree.cur().unwrap()
3248 } else {
3249 cur_ix
3250 };
3251 if self.tree[cur_ix].item.body.is_maybe_inline() {
3252 self.handle_inline(callbacks);
3253 }
3254
3255 let node = self.tree[cur_ix];
3256 let item = node.item;
3257 let event = item_to_event(item, self.text, &mut self.allocs);
3258 if let Event::Start(..) = event {
3259 self.tree.push();
3260 } else {
3261 self.tree.next_sibling(cur_ix);
3262 }
3263 debug_assert!(item.start <= item.end);
3264 Some((event, item.start..item.end))
3265 }
3266 }
3267 }
3268}
3269
3270fn body_to_tag_end(body: &ItemBody) -> TagEnd {
3271 match *body {
3272 ItemBody::Paragraph => TagEnd::Paragraph,
3273 ItemBody::Emphasis => TagEnd::Emphasis,
3274 ItemBody::Superscript => TagEnd::Superscript,
3275 ItemBody::Subscript => TagEnd::Subscript,
3276 ItemBody::Strong => TagEnd::Strong,
3277 ItemBody::Strikethrough => TagEnd::Strikethrough,
3278 ItemBody::Link(..) => TagEnd::Link,
3279 ItemBody::Image(..) => TagEnd::Image,
3280 ItemBody::Heading(level, _) => TagEnd::Heading(level),
3281 ItemBody::IndentCodeBlock(..) | ItemBody::FencedCodeBlock(..) | ItemBody::MathBlock(..) => {
3282 TagEnd::CodeBlock
3283 }
3284 ItemBody::ContainerDirective(..) => TagEnd::Directive(DirectiveKind::Container),
3285 ItemBody::LeafDirective(..) => TagEnd::Directive(DirectiveKind::Leaf),
3286 ItemBody::TextDirective(..) => TagEnd::Directive(DirectiveKind::Text),
3287 ItemBody::BlockQuote(kind) => TagEnd::BlockQuote(kind),
3288 ItemBody::HtmlBlock(_) => TagEnd::HtmlBlock,
3289 ItemBody::List(_, c, _) => {
3290 let is_ordered = c == b'.' || c == b')';
3291 TagEnd::List(is_ordered)
3292 }
3293 ItemBody::ListItem(_, _) => TagEnd::Item,
3294 ItemBody::TableHead => TagEnd::TableHead,
3295 ItemBody::TableCell => TagEnd::TableCell,
3296 ItemBody::TableRow => TagEnd::TableRow,
3297 ItemBody::Table(..) => TagEnd::Table,
3298 ItemBody::FootnoteDefinition(..) => TagEnd::FootnoteDefinition,
3299 ItemBody::MetadataBlock(kind) => TagEnd::MetadataBlock(kind),
3300 ItemBody::DefinitionList(_) => TagEnd::DefinitionList,
3301 ItemBody::DefinitionListTitle => TagEnd::DefinitionListTitle,
3302 ItemBody::DefinitionListDefinition(_) => TagEnd::DefinitionListDefinition,
3303 #[cfg(feature = "mdx")]
3304 ItemBody::MdxJsxFlowElement(..) => TagEnd::MdxJsxFlowElement,
3305 #[cfg(feature = "mdx")]
3306 ItemBody::MdxJsxTextElement(..) => TagEnd::MdxJsxTextElement,
3307 _ => panic!("unexpected item body {:?}", body),
3308 }
3309}
3310
3311fn item_to_event<'a>(item: Item, text: &'a str, allocs: &mut Allocations<'a>) -> Event<'a> {
3312 let tag = match item.body {
3313 ItemBody::Text { .. } => return Event::Text(text[item.start..item.end].into()),
3314 ItemBody::Code(cow_ix) => return Event::Code(allocs.take_cow(cow_ix)),
3315 ItemBody::SynthesizeText(cow_ix) => return Event::Text(allocs.take_cow(cow_ix)),
3316 ItemBody::SynthesizeChar(c) => return Event::Text(c.into()),
3317 ItemBody::HtmlBlock(_) => Tag::HtmlBlock,
3318 ItemBody::Html => return Event::Html(text[item.start..item.end].into()),
3319 ItemBody::InlineHtml => return Event::InlineHtml(text[item.start..item.end].into()),
3320 ItemBody::OwnedInlineHtml(cow_ix) => return Event::InlineHtml(allocs.take_cow(cow_ix)),
3321 ItemBody::SoftBreak => return Event::SoftBreak,
3322 ItemBody::HardBreak(_) => return Event::HardBreak,
3323 ItemBody::FootnoteReference(cow_ix) => {
3324 return Event::FootnoteReference(allocs.take_cow(cow_ix))
3325 }
3326 ItemBody::TaskListMarker(checked) => return Event::TaskListMarker(checked),
3327 ItemBody::Rule => return Event::Rule,
3328 ItemBody::Paragraph => Tag::Paragraph,
3329 ItemBody::Emphasis => Tag::Emphasis,
3330 ItemBody::Superscript => Tag::Superscript,
3331 ItemBody::Subscript => Tag::Subscript,
3332 ItemBody::Strong => Tag::Strong,
3333 ItemBody::Strikethrough => Tag::Strikethrough,
3334 ItemBody::Link(link_ix) => {
3335 let (link_type, dest_url, title, id) = allocs.take_link(link_ix);
3336 Tag::Link {
3337 link_type,
3338 dest_url,
3339 title,
3340 id,
3341 }
3342 }
3343 ItemBody::Image(link_ix) => {
3344 let (link_type, dest_url, title, id) = allocs.take_link(link_ix);
3345 Tag::Image {
3346 link_type,
3347 dest_url,
3348 title,
3349 id,
3350 }
3351 }
3352 ItemBody::Heading(level, Some(heading_ix)) => {
3353 let HeadingAttributes { id, classes, attrs } = allocs.index(heading_ix);
3354 Tag::Heading {
3355 level,
3356 id: id.clone(),
3357 classes: classes.clone(),
3358 attrs: attrs.clone(),
3359 }
3360 }
3361 ItemBody::Heading(level, None) => Tag::Heading {
3362 level,
3363 id: None,
3364 classes: Vec::new(),
3365 attrs: Vec::new(),
3366 },
3367 ItemBody::MathBlock(cow_ix) => {
3368 Tag::CodeBlock(CodeBlockKind::Fenced(allocs.take_cow(cow_ix)))
3369 }
3370 ItemBody::FencedCodeBlock(cow_ix) => {
3371 Tag::CodeBlock(CodeBlockKind::Fenced(allocs.take_cow(cow_ix)))
3372 }
3373 ItemBody::IndentCodeBlock(..) => Tag::CodeBlock(CodeBlockKind::Indented),
3374 ItemBody::ContainerDirective(_, dir_ix)
3375 | ItemBody::LeafDirective(dir_ix)
3376 | ItemBody::TextDirective(dir_ix) => {
3377 let kind = match item.body {
3378 ItemBody::ContainerDirective(..) => DirectiveKind::Container,
3379 ItemBody::LeafDirective(..) => DirectiveKind::Leaf,
3380 _ => DirectiveKind::Text,
3381 };
3382 let dir = allocs.take_directive(dir_ix);
3383 Tag::Directive {
3384 kind,
3385 name: dir.name,
3386 attributes: dir.attributes,
3387 }
3388 }
3389 ItemBody::BlockQuote(kind) => Tag::BlockQuote(kind),
3390 ItemBody::List(is_tight, c, listitem_start) => {
3391 if c == b'.' || c == b')' {
3392 Tag::List(Some(listitem_start), is_tight)
3393 } else {
3394 Tag::List(None, is_tight)
3395 }
3396 }
3397 ItemBody::ListItem(_, _) => Tag::Item,
3398 ItemBody::TableHead => Tag::TableHead,
3399 ItemBody::TableCell => Tag::TableCell,
3400 ItemBody::TableRow => Tag::TableRow,
3401 ItemBody::Table(alignment_ix) => Tag::Table(allocs.take_alignment(alignment_ix)),
3402 ItemBody::FootnoteDefinition(cow_ix) => Tag::FootnoteDefinition(allocs.take_cow(cow_ix)),
3403 ItemBody::MetadataBlock(kind) => Tag::MetadataBlock(kind),
3404 ItemBody::Math(cow_ix, is_display) => {
3405 return if is_display {
3406 Event::DisplayMath(allocs.take_cow(cow_ix))
3407 } else {
3408 Event::InlineMath(allocs.take_cow(cow_ix))
3409 }
3410 }
3411 ItemBody::DefinitionList(_) => Tag::DefinitionList,
3412 ItemBody::DefinitionListTitle => Tag::DefinitionListTitle,
3413 ItemBody::DefinitionListDefinition(_) => Tag::DefinitionListDefinition,
3414 #[cfg(feature = "mdx")]
3415 ItemBody::MdxJsxFlowElement(jsx_ix) => {
3416 let jsx = allocs.take_jsx_element(jsx_ix);
3417 Tag::MdxJsxFlowElement(jsx.raw)
3418 }
3419 #[cfg(feature = "mdx")]
3420 ItemBody::MdxJsxTextElement(jsx_ix) => {
3421 let jsx = allocs.take_jsx_element(jsx_ix);
3422 Tag::MdxJsxTextElement(jsx.raw)
3423 }
3424 #[cfg(feature = "mdx")]
3425 ItemBody::MdxFlowExpression(cow_ix) => {
3426 return Event::MdxFlowExpression(allocs.take_cow(cow_ix))
3427 }
3428 #[cfg(feature = "mdx")]
3429 ItemBody::MdxTextExpression(cow_ix) => {
3430 return Event::MdxTextExpression(allocs.take_cow(cow_ix))
3431 }
3432 #[cfg(feature = "mdx")]
3433 ItemBody::MdxEsm(cow_ix) => return Event::MdxEsm(allocs.take_cow(cow_ix)),
3434 _ => panic!("unexpected item body {:?}", item.body),
3435 };
3436
3437 Event::Start(tag)
3438}
3439
3440#[cfg(test)]
3441mod test {
3442 use alloc::{borrow::ToOwned, string::ToString, vec::Vec};
3443
3444 use super::*;
3445 use crate::tree::Node;
3446
3447 fn parser_with_extensions(text: &str) -> Parser<'_> {
3450 let mut opts = Options::empty();
3451 opts.insert(Options::ENABLE_TABLES);
3452 opts.insert(Options::ENABLE_FOOTNOTES);
3453 opts.insert(Options::ENABLE_STRIKETHROUGH);
3454 opts.insert(Options::ENABLE_SUPERSCRIPT);
3455 opts.insert(Options::ENABLE_SUBSCRIPT);
3456 opts.insert(Options::ENABLE_TASKLISTS);
3457
3458 Parser::new_ext(text, opts)
3459 }
3460
3461 #[test]
3462 #[cfg(target_pointer_width = "64")]
3463 fn node_size() {
3464 let node_size = core::mem::size_of::<Node<Item>>();
3465 assert_eq!(48, node_size);
3466 }
3467
3468 #[test]
3469 #[cfg(target_pointer_width = "64")]
3470 fn body_size() {
3471 let body_size = core::mem::size_of::<ItemBody>();
3472 assert_eq!(16, body_size);
3473 }
3474
3475 #[test]
3476 fn single_open_fish_bracket() {
3477 assert_eq!(3, Parser::new("<").count());
3479 }
3480
3481 #[test]
3482 fn lone_hashtag() {
3483 assert_eq!(2, Parser::new("#").count());
3485 }
3486
3487 #[test]
3488 fn lots_of_backslashes() {
3489 Parser::new("\\\\\r\r").count();
3491 Parser::new("\\\r\r\\.\\\\\r\r\\.\\").count();
3492 }
3493
3494 #[test]
3495 fn issue_1030() {
3496 let mut opts = Options::empty();
3497 opts.insert(Options::ENABLE_WIKILINKS);
3498
3499 let parser = Parser::new_ext("For a new ferrari, [[Wikientry|click here]]!", opts);
3500
3501 let offsets = parser
3502 .into_offset_iter()
3503 .map(|(_ev, range)| range)
3504 .collect::<Vec<_>>();
3505 let expected_offsets = vec![
3506 (0..44), (0..19), (19..43), (31..41), (19..43), (43..44), (0..44), ];
3514 assert_eq!(offsets, expected_offsets);
3515 }
3516
3517 #[test]
3518 fn issue_320() {
3519 parser_with_extensions(":\r\t> |\r:\r\t> |\r").count();
3521 }
3522
3523 #[test]
3524 fn issue_319() {
3525 parser_with_extensions("|\r-]([^|\r-]([^").count();
3527 parser_with_extensions("|\r\r=][^|\r\r=][^car").count();
3528 }
3529
3530 #[test]
3531 fn issue_303() {
3532 parser_with_extensions("[^\r\ra]").count();
3534 parser_with_extensions("\r\r]Z[^\x00\r\r]Z[^\x00").count();
3535 }
3536
3537 #[test]
3538 fn issue_313() {
3539 parser_with_extensions("*]0[^\r\r*]0[^").count();
3541 parser_with_extensions("[^\r> `][^\r> `][^\r> `][").count();
3542 }
3543
3544 #[test]
3545 fn issue_311() {
3546 parser_with_extensions("\\\u{0d}-\u{09}\\\u{0d}-\u{09}").count();
3548 }
3549
3550 #[test]
3551 fn issue_283() {
3552 let input = core::str::from_utf8(b"\xf0\x9b\xb2\x9f<td:^\xf0\x9b\xb2\x9f").unwrap();
3553 parser_with_extensions(input).count();
3555 }
3556
3557 #[test]
3558 fn issue_289() {
3559 parser_with_extensions("> - \\\n> - ").count();
3561 parser_with_extensions("- \n\n").count();
3562 }
3563
3564 #[test]
3565 fn issue_306() {
3566 parser_with_extensions("*\r_<__*\r_<__*\r_<__*\r_<__").count();
3568 }
3569
3570 #[test]
3571 fn issue_305() {
3572 parser_with_extensions("_6**6*_*").count();
3574 }
3575
3576 #[test]
3577 fn another_emphasis_panic() {
3578 parser_with_extensions("*__#_#__*").count();
3579 }
3580
3581 #[test]
3582 fn offset_iter() {
3583 let event_offsets: Vec<_> = Parser::new("*hello* world")
3584 .into_offset_iter()
3585 .map(|(_ev, range)| range)
3586 .collect();
3587 let expected_offsets = vec![(0..13), (0..7), (1..6), (0..7), (7..13), (0..13)];
3588 assert_eq!(expected_offsets, event_offsets);
3589 }
3590
3591 #[test]
3592 fn reference_link_offsets() {
3593 let range =
3594 Parser::new("# H1\n[testing][Some reference]\n\n[Some reference]: https://github.com")
3595 .into_offset_iter()
3596 .filter_map(|(ev, range)| match ev {
3597 Event::Start(
3598 Tag::Link {
3599 link_type: LinkType::Reference,
3600 ..
3601 },
3602 ..,
3603 ) => Some(range),
3604 _ => None,
3605 })
3606 .next()
3607 .unwrap();
3608 assert_eq!(5..30, range);
3609 }
3610
3611 #[test]
3612 fn footnote_offsets() {
3613 let range = parser_with_extensions("Testing this[^1] out.\n\n[^1]: Footnote.")
3614 .into_offset_iter()
3615 .filter_map(|(ev, range)| match ev {
3616 Event::FootnoteReference(..) => Some(range),
3617 _ => None,
3618 })
3619 .next()
3620 .unwrap();
3621 assert_eq!(12..16, range);
3622 }
3623
3624 #[test]
3625 fn footnote_offsets_exclamation() {
3626 let mut immediately_before_footnote = None;
3627 let range = parser_with_extensions("Testing this![^1] out.\n\n[^1]: Footnote.")
3628 .into_offset_iter()
3629 .filter_map(|(ev, range)| match ev {
3630 Event::FootnoteReference(..) => Some(range),
3631 _ => {
3632 immediately_before_footnote = Some((ev, range));
3633 None
3634 }
3635 })
3636 .next()
3637 .unwrap();
3638 assert_eq!(13..17, range);
3639 if let (Event::Text(exclamation), range_exclamation) =
3640 immediately_before_footnote.as_ref().unwrap()
3641 {
3642 assert_eq!("!", &exclamation[..]);
3643 assert_eq!(&(12..13), range_exclamation);
3644 } else {
3645 panic!("what came first, then? {immediately_before_footnote:?}");
3646 }
3647 }
3648
3649 #[test]
3650 fn table_offset() {
3651 let markdown = "a\n\nTesting|This|Outtt\n--|:--:|--:\nSome Data|Other data|asdf";
3652 let event_offset = parser_with_extensions(markdown)
3653 .into_offset_iter()
3654 .map(|(_ev, range)| range)
3655 .nth(3)
3656 .unwrap();
3657 let expected_offset = 3..59;
3658 assert_eq!(expected_offset, event_offset);
3659 }
3660
3661 #[test]
3662 fn table_cell_span() {
3663 let markdown = "a|b|c\n--|--|--\na| |c";
3664 let event_offset = parser_with_extensions(markdown)
3665 .into_offset_iter()
3666 .filter_map(|(ev, span)| match ev {
3667 Event::Start(Tag::TableCell) => Some(span),
3668 _ => None,
3669 })
3670 .nth(4)
3671 .unwrap();
3672 let expected_offset_start = "a|b|c\n--|--|--\na".len();
3674 assert_eq!(
3675 expected_offset_start..(expected_offset_start + 3),
3676 event_offset
3677 );
3678 }
3679
3680 #[test]
3681 fn offset_iter_issue_378() {
3682 let event_offsets: Vec<_> = Parser::new("a [b](c) d")
3683 .into_offset_iter()
3684 .map(|(_ev, range)| range)
3685 .collect();
3686 let expected_offsets = vec![(0..10), (0..2), (2..8), (3..4), (2..8), (8..10), (0..10)];
3687 assert_eq!(expected_offsets, event_offsets);
3688 }
3689
3690 #[test]
3691 fn offset_iter_issue_404() {
3692 let event_offsets: Vec<_> = Parser::new("###\n")
3693 .into_offset_iter()
3694 .map(|(_ev, range)| range)
3695 .collect();
3696 let expected_offsets = vec![(0..4), (0..4)];
3697 assert_eq!(expected_offsets, event_offsets);
3698 }
3699
3700 #[test]
3701 fn broken_links_called_only_once() {
3702 for &(markdown, expected) in &[
3703 ("See also [`g()`][crate::g].", 1),
3704 ("See also [`g()`][crate::g][].", 1),
3705 ("[brokenlink1] some other node [brokenlink2]", 2),
3706 ] {
3707 let mut times_called = 0;
3708 let callback = &mut |_broken_link: BrokenLink| {
3709 times_called += 1;
3710 None
3711 };
3712 let parser =
3713 Parser::new_with_broken_link_callback(markdown, Options::empty(), Some(callback));
3714 for _ in parser {}
3715 assert_eq!(times_called, expected);
3716 }
3717 }
3718
3719 #[test]
3720 fn simple_broken_link_callback() {
3721 let test_str = "This is a link w/o def: [hello][world]";
3722 let mut callback = |broken_link: BrokenLink| {
3723 assert_eq!("world", broken_link.reference.as_ref());
3724 assert_eq!(&test_str[broken_link.span], "[hello][world]");
3725 let url = "YOLO".into();
3726 let title = "SWAG".to_owned().into();
3727 Some((url, title))
3728 };
3729 let parser =
3730 Parser::new_with_broken_link_callback(test_str, Options::empty(), Some(&mut callback));
3731 let mut link_tag_count = 0;
3732 for (typ, url, title, id) in parser.filter_map(|event| match event {
3733 Event::Start(Tag::Link {
3734 link_type,
3735 dest_url,
3736 title,
3737 id,
3738 }) => Some((link_type, dest_url, title, id)),
3739 _ => None,
3740 }) {
3741 link_tag_count += 1;
3742 assert_eq!(typ, LinkType::ReferenceUnknown);
3743 assert_eq!(url.as_ref(), "YOLO");
3744 assert_eq!(title.as_ref(), "SWAG");
3745 assert_eq!(id.as_ref(), "world");
3746 }
3747 assert!(link_tag_count > 0);
3748 }
3749
3750 #[test]
3751 fn code_block_kind_check_fenced() {
3752 let parser = Parser::new("hello\n```test\ntadam\n```");
3753 let mut found = 0;
3754 for (ev, _range) in parser.into_offset_iter() {
3755 if let Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(syntax))) = ev {
3756 assert_eq!(syntax.as_ref(), "test");
3757 found += 1;
3758 }
3759 }
3760 assert_eq!(found, 1);
3761 }
3762
3763 #[test]
3764 fn code_block_kind_check_indented() {
3765 let parser = Parser::new("hello\n\n ```test\n tadam\nhello");
3766 let mut found = 0;
3767 for (ev, _range) in parser.into_offset_iter() {
3768 if let Event::Start(Tag::CodeBlock(CodeBlockKind::Indented)) = ev {
3769 found += 1;
3770 }
3771 }
3772 assert_eq!(found, 1);
3773 }
3774
3775 #[test]
3776 fn ref_defs() {
3777 let input = r###"[a B c]: http://example.com
3778[another]: https://google.com
3779
3780text
3781
3782[final ONE]: http://wikipedia.org
3783"###;
3784 let mut parser = Parser::new(input);
3785
3786 assert!(parser.reference_definitions().get("a b c").is_some());
3787 assert!(parser.reference_definitions().get("nope").is_none());
3788
3789 if let Some(_event) = parser.next() {
3790 let s = "final one".to_owned();
3792 let link_def = parser.reference_definitions().get(&s).unwrap();
3793 let span = &input[link_def.span.clone()];
3794 assert_eq!(span, "[final ONE]: http://wikipedia.org");
3795 }
3796 }
3797
3798 #[test]
3799 #[allow(clippy::extra_unused_lifetimes)]
3800 fn common_lifetime_patterns_allowed<'b>() {
3801 let temporary_str = String::from("xyz");
3802
3803 let mut closure = |link: BrokenLink<'b>| Some(("#".into(), link.reference));
3807
3808 fn function(link: BrokenLink<'_>) -> Option<(CowStr<'_>, CowStr<'_>)> {
3809 Some(("#".into(), link.reference))
3810 }
3811
3812 for _ in Parser::new_with_broken_link_callback(
3813 "static lifetime",
3814 Options::empty(),
3815 Some(&mut closure),
3816 ) {}
3817 for _ in Parser::new_with_broken_link_callback(
3826 "static lifetime",
3827 Options::empty(),
3828 Some(&mut function),
3829 ) {}
3830 for _ in Parser::new_with_broken_link_callback(
3831 &temporary_str,
3832 Options::empty(),
3833 Some(&mut function),
3834 ) {}
3835 }
3836
3837 #[test]
3838 fn inline_html_inside_blockquote() {
3839 let input = "> <foo\n> bar>";
3841 let events: Vec<_> = Parser::new(input).collect();
3842 let expected = [
3843 Event::Start(Tag::BlockQuote(None)),
3844 Event::Start(Tag::Paragraph),
3845 Event::InlineHtml(CowStr::Boxed("<foo\nbar>".to_string().into())),
3846 Event::End(TagEnd::Paragraph),
3847 Event::End(TagEnd::BlockQuote(None)),
3848 ];
3849 assert_eq!(&events, &expected);
3850 }
3851
3852 #[test]
3853 fn wikilink_has_pothole() {
3854 let input = "[[foo]] [[bar|baz]]";
3855 let events: Vec<_> = Parser::new_ext(input, Options::ENABLE_WIKILINKS).collect();
3856 let expected = [
3857 Event::Start(Tag::Paragraph),
3858 Event::Start(Tag::Link {
3859 link_type: LinkType::WikiLink { has_pothole: false },
3860 dest_url: CowStr::Borrowed("foo"),
3861 title: CowStr::Borrowed(""),
3862 id: CowStr::Borrowed(""),
3863 }),
3864 Event::Text(CowStr::Borrowed("foo")),
3865 Event::End(TagEnd::Link),
3866 Event::Text(CowStr::Borrowed(" ")),
3867 Event::Start(Tag::Link {
3868 link_type: LinkType::WikiLink { has_pothole: true },
3869 dest_url: CowStr::Borrowed("bar"),
3870 title: CowStr::Borrowed(""),
3871 id: CowStr::Borrowed(""),
3872 }),
3873 Event::Text(CowStr::Borrowed("baz")),
3874 Event::End(TagEnd::Link),
3875 Event::End(TagEnd::Paragraph),
3876 ];
3877 assert_eq!(&events, &expected);
3878 }
3879
3880 #[cfg(feature = "mdx")]
3881 fn mdx_parser(text: &str) -> Parser<'_> {
3882 Parser::new_ext(text, Options::ENABLE_MDX)
3883 }
3884
3885 #[cfg(feature = "mdx")]
3886 #[test]
3887 fn mdx_esm_import() {
3888 let events: Vec<_> = mdx_parser("import {Chart} from './chart.js'\n").collect();
3889 assert_eq!(events.len(), 1);
3890 assert!(matches!(&events[0], Event::MdxEsm(s) if s.contains("import")));
3891 }
3892
3893 #[cfg(feature = "mdx")]
3894 #[test]
3895 fn mdx_esm_export() {
3896 let events: Vec<_> = mdx_parser("export const meta = {}\n").collect();
3897 assert_eq!(events.len(), 1);
3898 assert!(matches!(&events[0], Event::MdxEsm(s) if s.contains("export")));
3899 }
3900
3901 #[cfg(feature = "mdx")]
3902 #[test]
3903 fn mdx_flow_expression() {
3904 let events: Vec<_> = mdx_parser("{1 + 1}\n").collect();
3905 assert_eq!(events.len(), 1);
3906 assert!(matches!(&events[0], Event::MdxFlowExpression(s) if s.as_ref() == "1 + 1"));
3907 }
3908
3909 #[cfg(feature = "mdx")]
3910 #[test]
3911 fn mdx_jsx_flow_self_closing() {
3912 let events: Vec<_> = mdx_parser("<Chart values={[1,2,3]} />\n").collect();
3913 assert!(!events.is_empty());
3914 assert!(
3915 matches!(&events[0], Event::Start(Tag::MdxJsxFlowElement(s)) if s.contains("Chart"))
3916 );
3917 }
3918
3919 #[cfg(feature = "mdx")]
3920 #[test]
3921 fn mdx_jsx_flow_fragment() {
3922 let events: Vec<_> = mdx_parser("<>\n").collect();
3923 assert!(!events.is_empty());
3924 assert!(matches!(
3925 &events[0],
3926 Event::Start(Tag::MdxJsxFlowElement(_))
3927 ));
3928 }
3929
3930 #[cfg(feature = "mdx")]
3931 #[test]
3932 fn mdx_inline_expression() {
3933 let events: Vec<_> = mdx_parser("hello {name} world\n").collect();
3934 let has_expr = events
3935 .iter()
3936 .any(|e| matches!(e, Event::MdxTextExpression(s) if s.as_ref() == "name"));
3937 assert!(
3938 has_expr,
3939 "Expected inline MDX expression, got: {:?}",
3940 events
3941 );
3942 }
3943
3944 #[cfg(feature = "mdx")]
3945 #[test]
3946 fn mdx_inline_jsx() {
3947 let events: Vec<_> = mdx_parser("hello <Badge /> world\n").collect();
3948 let has_jsx = events
3949 .iter()
3950 .any(|e| matches!(e, Event::Start(Tag::MdxJsxTextElement(s)) if s.contains("Badge")));
3951 assert!(has_jsx, "Expected inline MDX JSX, got: {:?}", events);
3952 }
3953
3954 #[cfg(feature = "mdx")]
3955 #[test]
3956 fn mdx_all_tags_are_jsx() {
3957 let events: Vec<_> = mdx_parser("hello <em>world</em>\n").collect();
3959 let has_jsx = events
3960 .iter()
3961 .any(|e| matches!(e, Event::Start(Tag::MdxJsxTextElement(_))));
3962 assert!(has_jsx, "In MDX mode, <em> should be JSX: {:?}", events);
3963 }
3964
3965 #[test]
3966 fn mdx_does_not_interfere_without_flag() {
3967 let events: Vec<_> = Parser::new("import foo from 'bar'\n").collect();
3969 assert!(events
3971 .iter()
3972 .any(|e| matches!(e, Event::Start(Tag::Paragraph))));
3973 }
3974
3975 #[cfg(feature = "mdx")]
3976 #[test]
3977 fn mdx_expression_in_heading() {
3978 let events: Vec<_> = mdx_parser("# {title}\n").collect();
3979 let has_heading = events
3980 .iter()
3981 .any(|e| matches!(e, Event::Start(Tag::Heading { .. })));
3982 assert!(has_heading, "Should have a heading");
3983 let has_expr = events
3984 .iter()
3985 .any(|e| matches!(e, Event::MdxTextExpression(s) if s.as_ref() == "title"));
3986 assert!(
3987 has_expr,
3988 "Heading should contain MdxTextExpression, got: {:?}",
3989 events
3990 );
3991 }
3992
3993 #[cfg(feature = "mdx")]
3994 #[test]
3995 fn mdx_expression_mixed_text_in_heading() {
3996 let events: Vec<_> = mdx_parser("## Hello {name}\n").collect();
3997 let has_text = events
3998 .iter()
3999 .any(|e| matches!(e, Event::Text(s) if s.contains("Hello")));
4000 let has_expr = events
4001 .iter()
4002 .any(|e| matches!(e, Event::MdxTextExpression(s) if s.as_ref() == "name"));
4003 assert!(has_text, "Should have text, got: {:?}", events);
4004 assert!(has_expr, "Should have expression, got: {:?}", events);
4005 }
4006}