1use alloc::{borrow::ToOwned, boxed::Box, collections::VecDeque, string::String, vec::Vec};
24use core::{
25 cmp::{max, min},
26 iter::FusedIterator,
27 num::NonZeroUsize,
28 ops::{Index, Range},
29};
30use rustc_hash::FxHashMap;
31use unicase::UniCase;
32
33use crate::{
34 firstpass::run_first_pass,
35 linklabel::{scan_link_label_rest, FootnoteLabel, LinkLabel, ReferenceLabel},
36 mdx::*,
37 scanners::*,
38 strings::CowStr,
39 tree::{Tree, TreeIndex},
40 Alignment, BlockQuoteKind, CodeBlockKind, DirectiveKind, Event, HeadingLevel, LinkType,
41 MetadataBlockKind, Options, Tag, TagEnd,
42};
43
44pub(crate) const LINK_MAX_NESTED_PARENS: usize = 32;
50
51#[derive(Debug, Default, Clone, Copy)]
52pub(crate) struct Item {
53 pub start: usize,
54 pub end: usize,
55 pub body: ItemBody,
56}
57
58#[derive(Debug, PartialEq, Clone, Copy, Default)]
59pub(crate) enum ItemBody {
60 MaybeEmphasis(usize, bool, bool),
64 MaybeMath(bool, u8),
66 MaybeSmartQuote(u8, bool, bool),
68 MaybeCode(usize, bool), MaybeHtml,
70 MaybeLinkOpen,
71 MaybeLinkClose(bool),
73 MaybeImage,
74
75 Emphasis,
77 Strong,
78 Strikethrough,
79 Superscript,
80 Subscript,
81 Math(CowIndex, bool), Code(CowIndex),
83 Link(LinkIndex),
84 Image(LinkIndex),
85 FootnoteReference(CowIndex),
86 TaskListMarker(bool), InlineHtml,
90 OwnedInlineHtml(CowIndex),
91 SynthesizeText(CowIndex),
92 SynthesizeChar(char),
93 Html,
94 Text {
95 backslash_escaped: bool,
96 },
97 SoftBreak,
98 HardBreak(bool),
100
101 #[default]
103 Root,
104
105 Paragraph,
107 TightParagraph,
108 Rule,
109 Heading(HeadingLevel, Option<HeadingIndex>), FencedCodeBlock(CowIndex),
111 MathBlock(CowIndex), IndentCodeBlock(bool),
116 HtmlBlock(bool), BlockQuote(Option<BlockQuoteKind>),
120 ContainerDirective(u8, DirectiveIndex), LeafDirective(DirectiveIndex),
122 TextDirective(DirectiveIndex),
123 List(bool, u8, u64), ListItem(usize, bool), FootnoteDefinition(CowIndex),
126 MetadataBlock(MetadataBlockKind),
127
128 DefinitionList(bool), MaybeDefinitionListTitle,
133 DefinitionListTitle,
134 DefinitionListDefinition(usize),
135
136 Table(AlignmentIndex),
138 TableHead,
139 TableRow,
140 TableCell,
141
142 MdxJsxFlowElement(JsxElementIndex),
144 MdxJsxTextElement(JsxElementIndex),
145 MdxFlowExpression(CowIndex),
146 MdxTextExpression(CowIndex),
147 MdxEsm(CowIndex),
148}
149
150impl ItemBody {
151 pub(crate) fn is_maybe_inline(&self) -> bool {
152 use ItemBody::*;
153 matches!(
154 *self,
155 MaybeEmphasis(..)
156 | MaybeMath(..)
157 | MaybeSmartQuote(..)
158 | MaybeCode(..)
159 | MaybeHtml
160 | MaybeLinkOpen
161 | MaybeLinkClose(..)
162 | MaybeImage
163 )
164 }
165 pub(crate) fn is_block_level(&self) -> bool {
166 !self.is_inline() && !matches!(self, ItemBody::Root)
167 }
168 fn is_inline(&self) -> bool {
169 use ItemBody::*;
170 matches!(
171 *self,
172 MaybeEmphasis(..)
173 | MaybeMath(..)
174 | MaybeSmartQuote(..)
175 | MaybeCode(..)
176 | MaybeHtml
177 | MaybeLinkOpen
178 | MaybeLinkClose(..)
179 | MaybeImage
180 | Emphasis
181 | Strong
182 | Strikethrough
183 | Math(..)
184 | Code(..)
185 | Link(..)
186 | Image(..)
187 | FootnoteReference(..)
188 | TaskListMarker(..)
189 | InlineHtml
190 | OwnedInlineHtml(..)
191 | SynthesizeText(..)
192 | SynthesizeChar(..)
193 | Html
194 | Text { .. }
195 | SoftBreak
196 | HardBreak(..)
197 )
198 }
199}
200
201#[derive(Debug)]
202pub struct BrokenLink<'a> {
203 pub span: core::ops::Range<usize>,
204 pub link_type: LinkType,
205 pub reference: CowStr<'a>,
206}
207
208pub struct Parser<'input, CB = DefaultParserCallbacks> {
210 callbacks: CB,
211 inner: ParserInner<'input>,
212}
213
214pub(crate) struct ParserInner<'input> {
217 pub(crate) text: &'input str,
218 pub(crate) options: Options,
219 pub(crate) tree: Tree<Item>,
220 pub(crate) allocs: Allocations<'input>,
221 html_scan_guard: HtmlScanGuard,
222
223 link_ref_expansion_limit: usize,
240
241 pub(crate) mdx_errors: Vec<(usize, String)>,
243
244 inline_stack: InlineStack,
246 link_stack: LinkStack,
247 wikilink_stack: LinkStack,
248 code_delims: CodeDelims,
249 math_delims: MathDelims,
250}
251
252impl<'input, CB> core::fmt::Debug for Parser<'input, CB> {
253 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
254 f.debug_struct("Parser")
256 .field("text", &self.inner.text)
257 .field("options", &self.inner.options)
258 .field("callbacks", &..)
259 .finish()
260 }
261}
262
263impl<'a> BrokenLink<'a> {
264 pub fn into_static(self) -> BrokenLink<'static> {
268 BrokenLink {
269 span: self.span.clone(),
270 link_type: self.link_type,
271 reference: self.reference.into_string().into(),
272 }
273 }
274}
275
276impl<'input> Parser<'input, DefaultParserCallbacks> {
277 pub fn new(text: &'input str) -> Self {
279 Self::new_ext(text, Options::empty())
280 }
281
282 pub fn new_ext(text: &'input str, options: Options) -> Self {
284 Self::new_with_callbacks(text, options, DefaultParserCallbacks)
285 }
286}
287
288impl<'input, CB: ParserCallbacks<'input>> Parser<'input, CB> {
289 pub fn new_with_callbacks(text: &'input str, options: Options, callbacks: CB) -> Self {
314 let (mut tree, allocs, _firstpass_mdx_errors) = run_first_pass(text, options);
315 tree.reset();
316 let inline_stack = Default::default();
317 let link_stack = Default::default();
318 let wikilink_stack = Default::default();
319 let html_scan_guard = Default::default();
320 Parser {
321 callbacks,
322
323 inner: ParserInner {
324 text,
325 options,
326 tree,
327 allocs,
328 inline_stack,
329 link_stack,
330 wikilink_stack,
331 html_scan_guard,
332 link_ref_expansion_limit: text.len().max(100_000),
334 mdx_errors: Vec::new(),
335 code_delims: CodeDelims::new(),
336 math_delims: MathDelims::new(),
337 },
338 }
339 }
340
341 pub fn reference_definitions(&self) -> &RefDefs<'_> {
344 &self.inner.allocs.refdefs
345 }
346
347 pub fn mdx_errors(&self) -> &[(usize, String)] {
350 &self.inner.mdx_errors
351 }
352
353 pub fn into_offset_iter(self) -> OffsetIter<'input, CB> {
357 OffsetIter { parser: self }
358 }
359}
360
361impl<'input, F> Parser<'input, BrokenLinkCallback<F>> {
362 pub fn new_with_broken_link_callback(
371 text: &'input str,
372 options: Options,
373 broken_link_callback: Option<F>,
374 ) -> Self
375 where
376 F: FnMut(BrokenLink<'input>) -> Option<(CowStr<'input>, CowStr<'input>)>,
377 {
378 Self::new_with_callbacks(text, options, BrokenLinkCallback(broken_link_callback))
379 }
380}
381
382impl<'input> ParserInner<'input> {
383 pub(crate) fn new(text: &'input str, options: Options) -> Self {
384 let (mut tree, allocs, firstpass_mdx_errors) = run_first_pass(text, options);
385 tree.reset();
386 ParserInner {
387 text,
388 options,
389 tree,
390 allocs,
391 inline_stack: Default::default(),
392 link_stack: Default::default(),
393 wikilink_stack: Default::default(),
394 html_scan_guard: Default::default(),
395 link_ref_expansion_limit: text.len().max(100_000),
396 mdx_errors: firstpass_mdx_errors,
397 code_delims: CodeDelims::new(),
398 math_delims: MathDelims::new(),
399 }
400 }
401
402 fn fetch_link_type_url_title(
421 &mut self,
422 link_label: CowStr<'input>,
423 span: Range<usize>,
424 link_type: LinkType,
425 callbacks: &mut dyn ParserCallbacks<'input>,
426 ) -> Option<(LinkType, CowStr<'input>, CowStr<'input>)> {
427 if self.link_ref_expansion_limit == 0 {
428 return None;
429 }
430
431 let (link_type, url, title) = self
432 .allocs
433 .refdefs
434 .get(link_label.as_ref())
435 .map(|matching_def| {
436 let title = matching_def
438 .title
439 .as_ref()
440 .cloned()
441 .unwrap_or_else(|| "".into());
442 let url = matching_def.dest.clone();
443 (link_type, url, title)
444 })
445 .or_else(|| {
446 let broken_link = BrokenLink {
448 span,
449 link_type,
450 reference: link_label,
451 };
452
453 callbacks
454 .handle_broken_link(broken_link)
455 .map(|(url, title)| (link_type.to_unknown(), url, title))
456 })?;
457
458 self.link_ref_expansion_limit = self
462 .link_ref_expansion_limit
463 .saturating_sub(url.len() + title.len());
464
465 Some((link_type, url, title))
466 }
467
468 pub(crate) fn handle_inline(&mut self, callbacks: &mut dyn ParserCallbacks<'input>) {
475 self.handle_inline_pass1(callbacks);
476 let st_enabled = self.options.contains(Options::ENABLE_STRIKETHROUGH)
492 || self.options.contains(Options::ENABLE_SUBSCRIPT)
493 || self.options.contains(Options::ENABLE_SUPERSCRIPT);
494 if !st_enabled {
495 self.handle_emphasis_pass();
496 return;
497 }
498 let strikethrough_first = matches!(
499 self.first_inline_marker_char(self.tree.cur()),
500 Some(b'~') | Some(b'^')
501 );
502 if strikethrough_first {
503 self.handle_tildes_carets_pass();
504 self.handle_emphasis_pass();
505 } else {
506 self.handle_emphasis_pass();
507 self.handle_tildes_carets_pass();
508 }
509 }
510
511 fn first_inline_marker_char(&self, start: Option<TreeIndex>) -> Option<u8> {
514 let mut cur = start;
515 while let Some(cur_ix) = cur {
516 if let ItemBody::MaybeEmphasis(_, _, _) = self.tree[cur_ix].item.body {
517 let c = self.text.as_bytes()[self.tree[cur_ix].item.start];
518 if matches!(c, b'*' | b'_' | b'~' | b'^') {
519 return Some(c);
520 }
521 }
522 cur = self.tree[cur_ix].next;
523 }
524 None
525 }
526
527 fn handle_emphasis_pass(&mut self) {
532 let start = self.tree.cur();
533 self.resolve_emphasis_recursive(start);
534 }
535
536 fn resolve_emphasis_recursive(&mut self, start: Option<TreeIndex>) {
537 let saved = core::mem::take(&mut self.inline_stack);
541 self.handle_emphasis_in_scope(start);
542 self.inline_stack = saved;
543
544 let mut cur = start;
545 while let Some(cur_ix) = cur {
546 let next = self.tree[cur_ix].next;
547 match self.tree[cur_ix].item.body {
548 ItemBody::Emphasis
549 | ItemBody::Strong
550 | ItemBody::Strikethrough
551 | ItemBody::Subscript
552 | ItemBody::Superscript
553 | ItemBody::Link(_)
554 | ItemBody::Image(_) => {
555 let child = self.tree[cur_ix].child;
556 self.resolve_emphasis_recursive(child);
557 }
558 _ => {}
559 }
560 cur = next;
561 }
562 }
563
564 fn handle_inline_pass1(&mut self, callbacks: &mut dyn ParserCallbacks<'input>) {
570 let mut cur = self.tree.cur();
571 let mut prev = None;
572
573 let block_end = self.tree[self.tree.peek_up().unwrap()].item.end;
574 let block_text = &self.text[..block_end];
575
576 while let Some(mut cur_ix) = cur {
577 match self.tree[cur_ix].item.body {
578 ItemBody::MaybeHtml => {
579 if self.options.contains(Options::ENABLE_MDX) {
581 let start = self.tree[cur_ix].item.start;
582 let next_byte = block_text.as_bytes().get(start + 1).copied();
583
584 if next_byte == Some(b'!') {
586 self.mdx_errors.push((
587 start,
588 "Unexpected character `!` (U+0021) before name, expected a \
589 character that can start a name, such as a letter, `$`, or `_` \
590 (note: to create a comment in MDX, use `{/* text */}`)"
591 .to_string(),
592 ));
593 self.tree[cur_ix].item.body = ItemBody::Text {
594 backslash_escaped: false,
595 };
596 prev = cur;
597 cur = self.tree[cur_ix].next;
598 continue;
599 }
600
601 if let Some(total_len) =
602 scan_mdx_inline_jsx(&block_text.as_bytes()[start..])
603 {
604 let end = start + total_len;
605 let node = scan_nodes_to_ix(&self.tree, self.tree[cur_ix].next, end);
606 let raw = &block_text[start..end];
607 let col = crate::mdx::column_at(block_text.as_bytes(), start);
608 let jsx_data = crate::mdx::parse_jsx_tag_with_column(raw, col, 0);
609 let mut allocator = oxc_allocator::Allocator::default();
610 crate::mdx::validate_jsx_expressions(
611 &jsx_data.attrs,
612 start,
613 &mut allocator,
614 &mut self.mdx_errors,
615 );
616 let jsx_ix = self.allocs.allocate_jsx_element(jsx_data);
617 self.tree[cur_ix].item.body = ItemBody::MdxJsxTextElement(jsx_ix);
618 self.tree[cur_ix].item.end = end;
619 self.tree[cur_ix].next = node;
620 prev = cur;
621 cur = node;
622 if let Some(node_ix) = cur {
623 self.tree[node_ix].item.start =
624 max(self.tree[node_ix].item.start, end);
625 }
626 continue;
627 }
628
629 let bytes_block = block_text.as_bytes();
644 let is_text_fallback = match next_byte {
645 Some(b' ' | b'\t') => true,
646 Some(b'\n' | b'\r') => {
647 let bq_depth = self
653 .tree
654 .walk_spine()
655 .filter(|&&ix| {
656 matches!(self.tree[ix].item.body, ItemBody::BlockQuote(..))
657 })
658 .count();
659 let mut probe = start + 1;
660 loop {
661 while probe < bytes_block.len()
662 && matches!(
663 bytes_block[probe],
664 b' ' | b'\t' | b'\n' | b'\r'
665 )
666 {
667 probe += 1;
668 }
669 if bq_depth == 0
670 || probe >= bytes_block.len()
671 || bytes_block[probe] != b'>'
672 {
673 break;
674 }
675 let mut consumed = 0;
676 while consumed < bq_depth
677 && probe < bytes_block.len()
678 && bytes_block[probe] == b'>'
679 {
680 probe += 1;
681 if probe < bytes_block.len() && bytes_block[probe] == b' ' {
682 probe += 1;
683 }
684 consumed += 1;
685 }
686 }
687 if probe >= bytes_block.len() || bytes_block[probe] == b'>' {
688 false
689 } else {
690 let underline_char = bytes_block[probe];
700 if !matches!(underline_char, b'-' | b'=') {
701 true
702 } else {
703 let mut q = probe;
704 while q < bytes_block.len()
705 && bytes_block[q] == underline_char
706 {
707 q += 1;
708 }
709 while q < bytes_block.len()
710 && matches!(bytes_block[q], b' ' | b'\t')
711 {
712 q += 1;
713 }
714 let at_eol = q >= bytes_block.len()
715 || matches!(bytes_block[q], b'\n' | b'\r');
716 if !at_eol {
717 true
718 } else {
719 let mut ls = start;
738 while ls > 0
739 && !matches!(bytes_block[ls - 1], b'\n' | b'\r')
740 {
741 ls -= 1;
742 }
743 let mut k = ls;
744 let mut sp = 0;
745 while k < start && bytes_block[k] == b' ' && sp < 3 {
746 k += 1;
747 sp += 1;
748 }
749 if k < start && bytes_block[k] == b'>' {
750 true
751 } else {
752 let mut us = probe;
754 while us > 0
755 && !matches!(bytes_block[us - 1], b'\n' | b'\r')
756 {
757 us -= 1;
758 }
759 let mut underline_col = 0;
760 let mut uk = us;
761 while uk < probe && bytes_block[uk] == b' ' {
762 uk += 1;
763 underline_col += 1;
764 }
765 let listitem_indent = self
766 .tree
767 .walk_spine()
768 .filter_map(|&ix| {
769 match self.tree[ix].item.body {
770 ItemBody::ListItem(indent, _) => {
771 Some(indent)
772 }
773 _ => None,
774 }
775 })
776 .next();
777 let in_blockquote =
778 self.tree.walk_spine().any(|&ix| {
779 matches!(
780 self.tree[ix].item.body,
781 ItemBody::BlockQuote(..)
782 )
783 });
784 let bq_lazy = if in_blockquote {
794 underline_col < 1
795 || !bytes_block[us..probe].contains(&b'>')
796 } else {
797 false
798 };
799 matches!(listitem_indent, Some(i) if underline_col < i)
800 || bq_lazy
801 }
802 }
803 }
804 }
805 }
806 _ => false,
807 };
808 if !is_text_fallback {
809 self.mdx_errors.push((
810 start,
811 "Unexpected character after `<`, expected a valid JSX tag \
812 (note: to create a link in MDX, use `[text](url)`)"
813 .to_string(),
814 ));
815 }
816
817 self.tree[cur_ix].item.body = ItemBody::Text {
818 backslash_escaped: false,
819 };
820 prev = cur;
821 cur = self.tree[cur_ix].next;
822 continue;
823 }
824
825 let next = self.tree[cur_ix].next;
826 let autolink = if let Some(next_ix) = next {
827 scan_autolink(block_text, self.tree[next_ix].item.start)
828 } else {
829 None
830 };
831
832 if let Some((ix, uri, link_type)) = autolink {
833 let node = scan_nodes_to_ix(&self.tree, next, ix);
834 let text_node = self.tree.create_node(Item {
835 start: self.tree[cur_ix].item.start + 1,
836 end: ix - 1,
837 body: ItemBody::Text {
838 backslash_escaped: false,
839 },
840 });
841 let link_ix =
842 self.allocs
843 .allocate_link(link_type, uri, "".into(), "".into());
844 self.tree[cur_ix].item.body = ItemBody::Link(link_ix);
845 self.tree[cur_ix].item.end = ix;
846 self.tree[cur_ix].next = node;
847 self.tree[cur_ix].child = Some(text_node);
848 prev = cur;
849 cur = node;
850 if let Some(node_ix) = cur {
851 let orig_start = self.tree[node_ix].item.start;
852 let new_start = max(orig_start, ix);
853 self.tree[node_ix].item.start = new_start;
854 if new_start > orig_start {
861 if let ItemBody::Text { backslash_escaped } =
862 &mut self.tree[node_ix].item.body
863 {
864 *backslash_escaped = false;
865 }
866 }
867 }
868 continue;
869 } else {
870 let inline_html = next.and_then(|next_ix| {
871 self.scan_inline_html(
872 block_text.as_bytes(),
873 self.tree[next_ix].item.start,
874 )
875 });
876 if let Some((span, ix)) = inline_html {
877 let node = scan_nodes_to_ix(&self.tree, next, ix);
878 self.tree[cur_ix].item.body = if !span.is_empty() {
879 let converted_string =
880 String::from_utf8(span).expect("invalid utf8");
881 ItemBody::OwnedInlineHtml(
882 self.allocs.allocate_cow(converted_string.into()),
883 )
884 } else {
885 ItemBody::InlineHtml
886 };
887 self.tree[cur_ix].item.end = ix;
888 self.tree[cur_ix].next = node;
889 prev = cur;
890 cur = node;
891 if let Some(node_ix) = cur {
892 let orig_start = self.tree[node_ix].item.start;
893 let new_start = max(orig_start, ix);
894 self.tree[node_ix].item.start = new_start;
895 if new_start > orig_start {
901 if let ItemBody::Text { backslash_escaped } =
902 &mut self.tree[node_ix].item.body
903 {
904 *backslash_escaped = false;
905 }
906 }
907 }
908 continue;
909 }
910 }
911 self.tree[cur_ix].item.body = ItemBody::Text {
912 backslash_escaped: false,
913 };
914 }
915 ItemBody::MaybeMath(preceded_by_backslash, _brace_context) => {
916 if preceded_by_backslash {
917 self.tree[cur_ix].item.body = ItemBody::Text {
918 backslash_escaped: true,
919 };
920 prev = cur;
921 cur = self.tree[cur_ix].next;
922 continue;
923 }
924 let mut open_count = 1usize;
926 let mut open_end = cur_ix;
927 {
928 let mut peek = self.tree[cur_ix].next;
929 while let Some(peek_ix) = peek {
930 if matches!(self.tree[peek_ix].item.body, ItemBody::MaybeMath(..))
931 && self.tree[peek_ix].item.start == self.tree[open_end].item.end
932 {
933 open_count += 1;
934 open_end = peek_ix;
935 peek = self.tree[peek_ix].next;
936 } else {
937 break;
938 }
939 }
940 }
941
942 let mut scan = self.tree[open_end].next;
944 let mut close_ix = None;
945 while let Some(scan_ix) = scan {
946 if matches!(self.tree[scan_ix].item.body, ItemBody::MaybeMath(..)) {
947 let mut run = 1usize;
948 let mut run_end = scan_ix;
949 let mut peek = self.tree[scan_ix].next;
950 while let Some(peek_ix) = peek {
951 if matches!(self.tree[peek_ix].item.body, ItemBody::MaybeMath(..))
952 && self.tree[peek_ix].item.start == self.tree[run_end].item.end
953 {
954 run += 1;
955 run_end = peek_ix;
956 peek = self.tree[peek_ix].next;
957 } else {
958 break;
959 }
960 }
961 if run == open_count {
962 close_ix = Some(scan_ix);
963 break;
964 }
965 scan = self.tree[run_end].next;
967 continue;
968 }
969 scan = self.tree[scan_ix].next;
970 }
971
972 if let Some(scan_ix) = close_ix {
973 self.make_math_span(cur_ix, scan_ix);
974 } else {
975 let mut fail_ix = cur_ix;
976 loop {
977 self.tree[fail_ix].item.body = ItemBody::Text {
978 backslash_escaped: false,
979 };
980 if fail_ix == open_end {
981 break;
982 }
983 if let Some(next) = self.tree[fail_ix].next {
984 fail_ix = next;
985 } else {
986 break;
987 }
988 }
989 }
990 }
991 ItemBody::MaybeCode(mut search_count, preceded_by_backslash) => {
992 if preceded_by_backslash {
993 search_count -= 1;
994 if search_count == 0 {
995 self.tree[cur_ix].item.body = ItemBody::Text {
996 backslash_escaped: true,
997 };
998 prev = cur;
999 cur = self.tree[cur_ix].next;
1000 continue;
1001 }
1002 }
1003
1004 if self.code_delims.is_populated() {
1005 if let Some(scan_ix) = self.code_delims.find(cur_ix, search_count) {
1008 self.make_code_span(cur_ix, scan_ix, preceded_by_backslash);
1009 } else {
1010 self.tree[cur_ix].item.body = ItemBody::Text {
1011 backslash_escaped: preceded_by_backslash,
1012 };
1013 }
1014 } else {
1015 let mut scan = if search_count > 0 {
1018 self.tree[cur_ix].next
1019 } else {
1020 None
1021 };
1022 while let Some(scan_ix) = scan {
1023 if let ItemBody::MaybeCode(delim_count, _) =
1024 self.tree[scan_ix].item.body
1025 {
1026 if search_count == delim_count {
1027 self.make_code_span(cur_ix, scan_ix, preceded_by_backslash);
1028 self.code_delims.clear();
1029 break;
1030 } else {
1031 self.code_delims.insert(delim_count, scan_ix);
1032 }
1033 }
1034 scan = self.tree[scan_ix].next;
1035 }
1036 if scan.is_none() {
1037 self.tree[cur_ix].item.body = ItemBody::Text {
1038 backslash_escaped: preceded_by_backslash,
1039 };
1040 }
1041 }
1042 }
1043 ItemBody::MaybeLinkOpen => {
1044 self.tree[cur_ix].item.body = ItemBody::Text {
1045 backslash_escaped: false,
1046 };
1047 let link_open_doubled = self.tree[cur_ix]
1048 .next
1049 .map(|ix| self.tree[ix].item.body == ItemBody::MaybeLinkOpen)
1050 .unwrap_or(false);
1051 if self.options.contains(Options::ENABLE_WIKILINKS) && link_open_doubled {
1052 self.wikilink_stack.push(LinkStackEl {
1053 node: cur_ix,
1054 ty: LinkStackTy::Link,
1055 });
1056 }
1057 self.link_stack.push(LinkStackEl {
1058 node: cur_ix,
1059 ty: LinkStackTy::Link,
1060 });
1061 }
1062 ItemBody::MaybeImage => {
1063 self.tree[cur_ix].item.body = ItemBody::Text {
1064 backslash_escaped: false,
1065 };
1066 let link_open_doubled = self.tree[cur_ix]
1067 .next
1068 .map(|ix| self.tree[ix].item.body == ItemBody::MaybeLinkOpen)
1069 .unwrap_or(false);
1070 if self.options.contains(Options::ENABLE_WIKILINKS) && link_open_doubled {
1071 self.wikilink_stack.push(LinkStackEl {
1072 node: cur_ix,
1073 ty: LinkStackTy::Image,
1074 });
1075 }
1076 self.link_stack.push(LinkStackEl {
1077 node: cur_ix,
1078 ty: LinkStackTy::Image,
1079 });
1080 }
1081 ItemBody::MaybeLinkClose(could_be_ref) => {
1082 self.tree[cur_ix].item.body = ItemBody::Text {
1083 backslash_escaped: false,
1084 };
1085 let tos_link = self.link_stack.pop();
1086 if self.options.contains(Options::ENABLE_WIKILINKS)
1087 && self.tree[cur_ix]
1088 .next
1089 .map(|ix| {
1090 matches!(self.tree[ix].item.body, ItemBody::MaybeLinkClose(..))
1091 })
1092 .unwrap_or(false)
1093 {
1094 if let Some(node) = self.handle_wikilink(block_text, cur_ix, prev) {
1095 cur = self.tree[node].next;
1096 continue;
1097 }
1098 }
1099 if let Some(tos) = tos_link {
1100 if tos.ty != LinkStackTy::Image
1103 && matches!(
1104 self.tree[self.tree.peek_up().unwrap()].item.body,
1105 ItemBody::Link(..)
1106 )
1107 {
1108 continue;
1109 }
1110 if tos.ty == LinkStackTy::Disabled {
1111 continue;
1112 }
1113 let next = self.tree[cur_ix].next;
1114 if let Some((next_ix, url, title)) =
1115 self.scan_inline_link(block_text, self.tree[cur_ix].item.end, next)
1116 {
1117 let next_node = scan_nodes_to_ix(&self.tree, next, next_ix);
1118 if let Some(prev_ix) = prev {
1119 self.tree[prev_ix].next = None;
1120 }
1121 cur = Some(tos.node);
1122 cur_ix = tos.node;
1123 let link_ix =
1124 self.allocs
1125 .allocate_link(LinkType::Inline, url, title, "".into());
1126 self.tree[cur_ix].item.body = if tos.ty == LinkStackTy::Image {
1127 ItemBody::Image(link_ix)
1128 } else {
1129 ItemBody::Link(link_ix)
1130 };
1131 self.tree[cur_ix].child = self.tree[cur_ix].next;
1132 self.tree[cur_ix].next = next_node;
1133 self.tree[cur_ix].item.end = next_ix;
1134 if let Some(next_node_ix) = next_node {
1135 let orig_start = self.tree[next_node_ix].item.start;
1136 let new_start = max(orig_start, next_ix);
1137 self.tree[next_node_ix].item.start = new_start;
1138 if new_start > orig_start {
1147 if let ItemBody::Text { backslash_escaped } =
1148 &mut self.tree[next_node_ix].item.body
1149 {
1150 *backslash_escaped = false;
1151 }
1152 }
1153 }
1154
1155 if tos.ty == LinkStackTy::Link {
1156 self.disable_all_links();
1157 }
1158 } else {
1159 let first_bracket_start = self.tree[tos.node].item.start;
1166 let first_bracket_end = self.tree[cur_ix].item.end;
1167 let first_bracket_text =
1168 &self.text[first_bracket_start..first_bracket_end];
1169 if let Some((_, ReferenceLabel::Footnote(footlabel))) =
1170 scan_link_label(&self.tree, first_bracket_text, self.options)
1171 {
1172 if self.allocs.footdefs.contains(&footlabel) {
1173 let footref = self.allocs.allocate_cow(footlabel);
1174 if let Some(def) = self
1175 .allocs
1176 .footdefs
1177 .get_mut(self.allocs.cows[footref.0].to_owned())
1178 {
1179 def.use_count += 1;
1180 }
1181 let footnote_ix = if tos.ty == LinkStackTy::Image {
1182 self.tree[tos.node].next = Some(cur_ix);
1183 self.tree[tos.node].child = None;
1184 self.tree[tos.node].item.body =
1185 ItemBody::SynthesizeChar('!');
1186 self.tree[cur_ix].item.start =
1187 self.tree[tos.node].item.start + 1;
1188 self.tree[tos.node].item.end =
1189 self.tree[tos.node].item.start + 1;
1190 cur_ix
1191 } else {
1192 tos.node
1193 };
1194 self.tree[footnote_ix].next = next;
1195 self.tree[footnote_ix].child = None;
1196 self.tree[footnote_ix].item.body =
1197 ItemBody::FootnoteReference(footref);
1198 self.tree[footnote_ix].item.end = first_bracket_end;
1199 prev = Some(footnote_ix);
1200 cur = next;
1201 self.link_stack.clear();
1202 continue;
1203 }
1204 }
1205 let scan_result =
1208 scan_reference(&self.tree, block_text, next, self.options);
1209 let (node_after_link, link_type) = match scan_result {
1210 RefScan::LinkLabel(_, end_ix) => {
1212 let reference_close_node = if let Some(node) =
1217 scan_nodes_to_ix(&self.tree, next, end_ix - 1)
1218 {
1219 node
1220 } else {
1221 continue;
1222 };
1223 self.tree[reference_close_node].item.body =
1224 ItemBody::MaybeLinkClose(false);
1225 let next_node = self.tree[reference_close_node].next;
1226
1227 (next_node, LinkType::Reference)
1228 }
1229 RefScan::Collapsed(next_node) => {
1231 if !could_be_ref {
1234 continue;
1235 }
1236 (next_node, LinkType::Collapsed)
1237 }
1238 RefScan::UnexpectedFootnote => continue,
1245 RefScan::FailedInvalidLabel => continue,
1251 RefScan::Failed => {
1255 if !could_be_ref {
1256 continue;
1257 }
1258 (next, LinkType::Shortcut)
1259 }
1260 };
1261
1262 let label: Option<(ReferenceLabel<'input>, usize)> = match scan_result {
1267 RefScan::LinkLabel(l, end_ix) => {
1268 Some((ReferenceLabel::Link(l), end_ix))
1269 }
1270 RefScan::Collapsed(..)
1271 | RefScan::Failed
1272 | RefScan::FailedInvalidLabel
1273 | RefScan::UnexpectedFootnote => {
1274 let label_start = self.tree[tos.node].item.end - 1;
1276 let label_end = self.tree[cur_ix].item.end;
1277 scan_link_label(
1278 &self.tree,
1279 &self.text[label_start..label_end],
1280 self.options,
1281 )
1282 .map(|(ix, label)| (label, label_start + ix))
1283 .filter(|(_, end)| *end == label_end)
1284 }
1285 };
1286
1287 let id = match &label {
1288 Some(
1289 (ReferenceLabel::Link(l), _) | (ReferenceLabel::Footnote(l), _),
1290 ) => l.clone(),
1291 None => "".into(),
1292 };
1293
1294 if let Some((ReferenceLabel::Footnote(l), end)) = label {
1296 let footref = self.allocs.allocate_cow(l);
1297 if let Some(def) = self
1298 .allocs
1299 .footdefs
1300 .get_mut(self.allocs.cows[footref.0].to_owned())
1301 {
1302 def.use_count += 1;
1303 }
1304 if self.allocs.footdefs.contains(&self.allocs.cows[footref.0]) {
1305 let footnote_ix = if tos.ty == LinkStackTy::Image {
1308 self.tree[tos.node].next = Some(cur_ix);
1309 self.tree[tos.node].child = None;
1310 self.tree[tos.node].item.body =
1311 ItemBody::SynthesizeChar('!');
1312 self.tree[cur_ix].item.start =
1313 self.tree[tos.node].item.start + 1;
1314 self.tree[tos.node].item.end =
1315 self.tree[tos.node].item.start + 1;
1316 cur_ix
1317 } else {
1318 tos.node
1319 };
1320 self.tree[footnote_ix].next = next;
1324 self.tree[footnote_ix].child = None;
1325 self.tree[footnote_ix].item.body =
1326 ItemBody::FootnoteReference(footref);
1327 self.tree[footnote_ix].item.end = end;
1328 prev = Some(footnote_ix);
1329 cur = next;
1330 self.link_stack.clear();
1331 continue;
1332 }
1333 } else if let Some((ReferenceLabel::Link(link_label), end)) = label {
1334 if let Some((def_link_type, url, title)) = self
1335 .fetch_link_type_url_title(
1336 link_label,
1337 (self.tree[tos.node].item.start)..end,
1338 link_type,
1339 callbacks,
1340 )
1341 {
1342 let link_ix =
1343 self.allocs.allocate_link(def_link_type, url, title, id);
1344 self.tree[tos.node].item.body = if tos.ty == LinkStackTy::Image
1345 {
1346 ItemBody::Image(link_ix)
1347 } else {
1348 ItemBody::Link(link_ix)
1349 };
1350 let label_node = self.tree[tos.node].next;
1351
1352 self.tree[tos.node].next = node_after_link;
1355
1356 if label_node != cur {
1358 self.tree[tos.node].child = label_node;
1359
1360 if let Some(prev_ix) = prev {
1362 self.tree[prev_ix].next = None;
1363 }
1364 }
1365
1366 self.tree[tos.node].item.end = end;
1367
1368 cur = Some(tos.node);
1370 cur_ix = tos.node;
1371
1372 if tos.ty == LinkStackTy::Link {
1373 self.disable_all_links();
1374 }
1375 }
1376 }
1377 }
1378 }
1379 }
1380 _ => {}
1381 }
1382 prev = cur;
1383 cur = self.tree[cur_ix].next;
1384 }
1385 self.link_stack.clear();
1386 self.wikilink_stack.clear();
1387 self.code_delims.clear();
1388 self.math_delims.clear();
1389 }
1390
1391 fn handle_wikilink(
1397 &mut self,
1398 block_text: &'input str,
1399 cur_ix: TreeIndex,
1400 prev: Option<TreeIndex>,
1401 ) -> Option<TreeIndex> {
1402 let next_ix = self.tree[cur_ix].next.unwrap();
1403 if let Some(tos) = self.wikilink_stack.pop() {
1406 if tos.ty == LinkStackTy::Disabled {
1407 return None;
1408 }
1409 let Some(body_node) = self.tree[tos.node].next.and_then(|ix| self.tree[ix].next) else {
1411 return None;
1413 };
1414 let start_ix = self.tree[body_node].item.start;
1415 let end_ix = self.tree[cur_ix].item.start;
1416 let wikilink = match scan_wikilink_pipe(
1417 block_text,
1418 start_ix, end_ix - start_ix,
1420 ) {
1421 Some((rest, wikitext)) => {
1422 if wikitext.is_empty() {
1424 return None;
1425 }
1426 let body_node = scan_nodes_to_ix(&self.tree, Some(body_node), rest);
1428 if let Some(body_node) = body_node {
1429 self.tree[body_node].item.start = rest;
1432 Some((true, body_node, wikitext))
1433 } else {
1434 None
1435 }
1436 }
1437 None => {
1438 let wikitext = &block_text[start_ix..end_ix];
1439 if wikitext.is_empty() {
1441 return None;
1442 }
1443 let body_node = self.tree.create_node(Item {
1444 start: start_ix,
1445 end: end_ix,
1446 body: ItemBody::Text {
1447 backslash_escaped: false,
1448 },
1449 });
1450 Some((false, body_node, wikitext))
1451 }
1452 };
1453
1454 if let Some((has_pothole, body_node, wikiname)) = wikilink {
1455 let link_ix = self.allocs.allocate_link(
1456 LinkType::WikiLink { has_pothole },
1457 wikiname.into(),
1458 "".into(),
1459 "".into(),
1460 );
1461 if let Some(prev_ix) = prev {
1462 self.tree[prev_ix].next = None;
1463 }
1464 if tos.ty == LinkStackTy::Image {
1465 self.tree[tos.node].item.body = ItemBody::Image(link_ix);
1466 } else {
1467 self.tree[tos.node].item.body = ItemBody::Link(link_ix);
1468 }
1469 self.tree[tos.node].child = Some(body_node);
1470 self.tree[tos.node].next = self.tree[next_ix].next;
1471 self.tree[tos.node].item.end = end_ix + 2;
1472 self.disable_all_links();
1473 return Some(tos.node);
1474 }
1475 }
1476
1477 None
1478 }
1479
1480 fn handle_emphasis_in_scope(&mut self, start: Option<TreeIndex>) {
1481 let mut prev = None;
1482 let mut prev_ix: TreeIndex;
1483 let mut cur = start;
1484
1485 let mut single_quote_open: Option<TreeIndex> = None;
1486 let mut double_quote_open: bool = false;
1487
1488 while let Some(mut cur_ix) = cur {
1489 match self.tree[cur_ix].item.body {
1490 ItemBody::MaybeEmphasis(mut count, can_open, can_close) => {
1491 let run_length = count;
1492 let c = self.text.as_bytes()[self.tree[cur_ix].item.start];
1493 let both = can_open && can_close;
1494 if c == b'~' || c == b'^' {
1502 prev_ix = cur_ix + count - 1;
1503 prev = Some(prev_ix);
1504 cur = self.tree[prev_ix].next;
1505 continue;
1506 }
1507 if can_close {
1508 while let Some(el) =
1509 self.inline_stack
1510 .find_match(&mut self.tree, c, run_length, count, both)
1511 {
1512 if let Some(prev_ix) = prev {
1514 self.tree[prev_ix].next = None;
1515 }
1516 let match_count = min(2, min(count, el.count));
1525 let mut end = cur_ix - 1;
1527 let mut start = el.start + el.count;
1528
1529 while start > el.start + el.count - match_count {
1531 let inc = if start > el.start + el.count - match_count + 1 {
1532 2
1533 } else {
1534 1
1535 };
1536 let ty = if c == b'~' {
1537 if inc == 2 {
1538 if self.options.contains(Options::ENABLE_STRIKETHROUGH) {
1539 ItemBody::Strikethrough
1540 } else {
1541 ItemBody::Text {
1542 backslash_escaped: false,
1543 }
1544 }
1545 } else if self.options.contains(Options::ENABLE_SUBSCRIPT) {
1546 ItemBody::Subscript
1547 } else if self.options.contains(Options::ENABLE_STRIKETHROUGH) {
1548 ItemBody::Strikethrough
1549 } else {
1550 ItemBody::Text {
1551 backslash_escaped: false,
1552 }
1553 }
1554 } else if c == b'^' {
1555 if self.options.contains(Options::ENABLE_SUPERSCRIPT) {
1556 ItemBody::Superscript
1557 } else {
1558 ItemBody::Text {
1559 backslash_escaped: false,
1560 }
1561 }
1562 } else if inc == 2 {
1563 ItemBody::Strong
1564 } else {
1565 ItemBody::Emphasis
1566 };
1567
1568 let root = start - inc;
1569 end = end + inc;
1570 self.tree[root].item.body = ty;
1571 self.tree[root].item.end = self.tree[end].item.end;
1572 self.tree[root].child = Some(start);
1573 self.tree[root].next = None;
1574 start = root;
1575 }
1576
1577 prev_ix = el.start + el.count - match_count;
1579 prev = Some(prev_ix);
1580 cur = self.tree[cur_ix + match_count - 1].next;
1581 self.tree[prev_ix].next = cur;
1582
1583 if el.count > match_count {
1584 self.inline_stack.push(InlineEl {
1585 start: el.start,
1586 count: el.count - match_count,
1587 run_length: el.run_length,
1588 c: el.c,
1589 both: el.both,
1590 })
1591 }
1592 count -= match_count;
1593 if count > 0 {
1594 cur_ix = cur.unwrap();
1595 } else {
1596 break;
1597 }
1598 }
1599 }
1600 if count > 0 {
1601 if can_open {
1602 self.inline_stack.push(InlineEl {
1603 start: cur_ix,
1604 run_length,
1605 count,
1606 c,
1607 both,
1608 });
1609 } else {
1610 for i in 0..count {
1611 self.tree[cur_ix + i].item.body = ItemBody::Text {
1612 backslash_escaped: false,
1613 };
1614 }
1615 }
1616 prev_ix = cur_ix + count - 1;
1617 prev = Some(prev_ix);
1618 cur = self.tree[prev_ix].next;
1619 }
1620 }
1621 ItemBody::MaybeSmartQuote(c, can_open, can_close) => {
1622 self.tree[cur_ix].item.body = match c {
1623 b'\'' => {
1624 if let (Some(open_ix), true) = (single_quote_open, can_close) {
1625 self.tree[open_ix].item.body = ItemBody::SynthesizeChar('‘');
1626 single_quote_open = None;
1627 } else if can_open {
1628 single_quote_open = Some(cur_ix);
1629 }
1630 ItemBody::SynthesizeChar('’')
1631 }
1632 _ => {
1633 if can_close && double_quote_open {
1634 double_quote_open = false;
1635 ItemBody::SynthesizeChar('”')
1636 } else {
1637 if can_open && !double_quote_open {
1638 double_quote_open = true;
1639 }
1640 ItemBody::SynthesizeChar('“')
1641 }
1642 }
1643 };
1644 prev = cur;
1645 cur = self.tree[cur_ix].next;
1646 }
1647 ItemBody::HardBreak(true) => {
1648 if self.tree[cur_ix].next.is_none() {
1649 self.tree[cur_ix].item.body = ItemBody::SynthesizeChar('\\');
1650 }
1651 prev = cur;
1652 cur = self.tree[cur_ix].next;
1653 }
1654 _ => {
1655 prev = cur;
1656 cur = self.tree[cur_ix].next;
1657 }
1658 }
1659 }
1660 self.inline_stack.pop_all(&mut self.tree);
1661 }
1662
1663 fn handle_tildes_carets_pass(&mut self) {
1671 let start = self.tree.cur();
1672 self.resolve_tildes_carets_in_scope(start);
1673 }
1674 fn resolve_tildes_carets_in_scope(&mut self, start: Option<TreeIndex>) {
1675 let mut stack: Vec<InlineEl> = Vec::new();
1676 let mut cur = start;
1677 let mut prev: Option<TreeIndex> = None;
1678 while let Some(mut cur_ix) = cur {
1679 match self.tree[cur_ix].item.body {
1680 ItemBody::MaybeEmphasis(count, can_open, can_close) => {
1681 let c = self.text.as_bytes()[self.tree[cur_ix].item.start];
1682 if c != b'~' && c != b'^' {
1683 prev = Some(cur_ix);
1684 cur = self.tree[cur_ix].next;
1685 continue;
1686 }
1687 let run_length = count;
1688 let mut remaining = count;
1689 if can_close {
1690 while remaining > 0 {
1691 let res = stack
1692 .iter()
1693 .enumerate()
1694 .rfind(|(_, el)| el.c == c && el.run_length == run_length);
1695 let Some((matching_ix, matching_el)) = res else {
1696 break;
1697 };
1698 let matching_el = *matching_el;
1699 if let Some(prev_ix) = prev {
1700 self.tree[prev_ix].next = None;
1701 }
1702 for el in &stack[(matching_ix + 1)..] {
1705 for i in 0..el.count {
1706 self.tree[el.start + i].item.body = ItemBody::Text {
1707 backslash_escaped: false,
1708 };
1709 }
1710 }
1711 stack.truncate(matching_ix);
1712 let match_count =
1713 core::cmp::min(2, core::cmp::min(remaining, matching_el.count));
1714 let mut end = cur_ix - 1;
1715 let mut sub_start = matching_el.start + matching_el.count;
1716 while sub_start > matching_el.start + matching_el.count - match_count {
1717 let inc = if sub_start
1718 > matching_el.start + matching_el.count - match_count + 1
1719 {
1720 2
1721 } else {
1722 1
1723 };
1724 let ty = if c == b'~' {
1725 if inc == 2 {
1726 if self.options.contains(Options::ENABLE_STRIKETHROUGH) {
1727 ItemBody::Strikethrough
1728 } else {
1729 ItemBody::Text {
1730 backslash_escaped: false,
1731 }
1732 }
1733 } else if self.options.contains(Options::ENABLE_SUBSCRIPT) {
1734 ItemBody::Subscript
1735 } else if self.options.contains(Options::ENABLE_STRIKETHROUGH) {
1736 ItemBody::Strikethrough
1737 } else {
1738 ItemBody::Text {
1739 backslash_escaped: false,
1740 }
1741 }
1742 } else if self.options.contains(Options::ENABLE_SUPERSCRIPT) {
1743 ItemBody::Superscript
1744 } else {
1745 ItemBody::Text {
1746 backslash_escaped: false,
1747 }
1748 };
1749 let root = sub_start - inc;
1750 end = end + inc;
1751 self.tree[root].item.body = ty;
1752 self.tree[root].item.end = self.tree[end].item.end;
1753 self.tree[root].child = Some(sub_start);
1754 self.tree[root].next = None;
1755 sub_start = root;
1756 }
1757 let new_prev_ix = matching_el.start + matching_el.count - match_count;
1758 let new_cur = self.tree[cur_ix + match_count - 1].next;
1759 self.tree[new_prev_ix].next = new_cur;
1760 prev = Some(new_prev_ix);
1761 if matching_el.count > match_count {
1762 stack.push(InlineEl {
1763 start: matching_el.start,
1764 count: matching_el.count - match_count,
1765 run_length: matching_el.run_length,
1766 c: matching_el.c,
1767 both: matching_el.both,
1768 });
1769 }
1770 remaining -= match_count;
1771 if remaining > 0 {
1772 let Some(next_cur) = new_cur else { break };
1773 cur_ix = next_cur;
1774 } else {
1775 break;
1776 }
1777 }
1778 }
1779 if remaining > 0 {
1780 if can_open {
1781 stack.push(InlineEl {
1782 start: cur_ix,
1783 count: remaining,
1784 run_length,
1785 c,
1786 both: can_open && can_close,
1787 });
1788 } else {
1789 for i in 0..remaining {
1790 self.tree[cur_ix + i].item.body = ItemBody::Text {
1791 backslash_escaped: false,
1792 };
1793 }
1794 }
1795 let prev_ix = cur_ix + remaining - 1;
1796 prev = Some(prev_ix);
1797 cur = self.tree[prev_ix].next;
1798 } else {
1799 cur = self.tree[prev.unwrap()].next;
1800 }
1801 continue;
1802 }
1803 ItemBody::Emphasis
1804 | ItemBody::Strong
1805 | ItemBody::Strikethrough
1806 | ItemBody::Subscript
1807 | ItemBody::Superscript
1808 | ItemBody::Link(_)
1809 | ItemBody::Image(_) => {
1810 let child = self.tree[cur_ix].child;
1811 self.resolve_tildes_carets_in_scope(child);
1812 }
1813 _ => {}
1814 }
1815 prev = Some(cur_ix);
1816 cur = self.tree[cur_ix].next;
1817 }
1818 for el in stack {
1820 for i in 0..el.count {
1821 self.tree[el.start + i].item.body = ItemBody::Text {
1822 backslash_escaped: false,
1823 };
1824 }
1825 }
1826 }
1827
1828 fn disable_all_links(&mut self) {
1829 self.link_stack.disable_all_links();
1830 self.wikilink_stack.disable_all_links();
1831 }
1832
1833 fn scan_inline_link(
1835 &self,
1836 underlying: &'input str,
1837 mut ix: usize,
1838 node: Option<TreeIndex>,
1839 ) -> Option<(usize, CowStr<'input>, CowStr<'input>)> {
1840 if underlying.as_bytes().get(ix) != Some(&b'(') {
1841 return None;
1842 }
1843 ix += 1;
1844
1845 let scan_separator = |ix: &mut usize| {
1846 *ix += scan_while(&underlying.as_bytes()[*ix..], is_ascii_whitespace_no_nl);
1847 if let Some(bl) = scan_eol(&underlying.as_bytes()[*ix..]) {
1848 *ix += bl;
1849 *ix += skip_container_prefixes(
1850 &self.tree,
1851 &underlying.as_bytes()[*ix..],
1852 self.options,
1853 );
1854 }
1855 *ix += scan_while(&underlying.as_bytes()[*ix..], is_ascii_whitespace_no_nl);
1856 };
1857
1858 scan_separator(&mut ix);
1859
1860 let (dest_length, dest) = scan_link_dest(underlying, ix, LINK_MAX_NESTED_PARENS)?;
1861 let dest = unescape(dest, self.tree.is_in_table());
1862 ix += dest_length;
1863
1864 scan_separator(&mut ix);
1865
1866 let title = if let Some((bytes_scanned, t)) = self.scan_link_title(underlying, ix, node) {
1867 ix += bytes_scanned;
1868 scan_separator(&mut ix);
1869 t
1870 } else {
1871 "".into()
1872 };
1873 if underlying.as_bytes().get(ix) != Some(&b')') {
1874 return None;
1875 }
1876 ix += 1;
1877
1878 Some((ix, dest, title))
1879 }
1880
1881 fn scan_link_title(
1883 &self,
1884 text: &'input str,
1885 start_ix: usize,
1886 node: Option<TreeIndex>,
1887 ) -> Option<(usize, CowStr<'input>)> {
1888 let bytes = text.as_bytes();
1889 let open = match bytes.get(start_ix) {
1890 Some(b @ b'\'') | Some(b @ b'\"') | Some(b @ b'(') => *b,
1891 _ => return None,
1892 };
1893 let close = if open == b'(' { b')' } else { open };
1894
1895 let mut title = String::new();
1896 let mut mark = start_ix + 1;
1897 let mut i = start_ix + 1;
1898
1899 while i < bytes.len() {
1900 let c = bytes[i];
1901
1902 if c == close {
1903 let cow = if title.is_empty() {
1904 (i - start_ix + 1, text[mark..i].into())
1905 } else {
1906 title.push_str(&text[mark..i]);
1907 (i - start_ix + 1, title.into())
1908 };
1909
1910 return Some(cow);
1911 }
1912 if c == open {
1913 return None;
1914 }
1915
1916 if c == b'\n' || c == b'\r' {
1917 if let Some(node_ix) = scan_nodes_to_ix(&self.tree, node, i + 1) {
1918 if self.tree[node_ix].item.start > i {
1919 title.push_str(&text[mark..i]);
1920 title.push('\n');
1921 i = self.tree[node_ix].item.start;
1922 mark = i;
1923 continue;
1924 }
1925 }
1926 }
1927 if c == b'&' {
1928 if let (n, Some(value)) = scan_entity(&bytes[i..]) {
1929 title.push_str(&text[mark..i]);
1930 title.push_str(&value);
1931 i += n;
1932 mark = i;
1933 continue;
1934 }
1935 }
1936 if self.tree.is_in_table()
1937 && c == b'\\'
1938 && i + 2 < bytes.len()
1939 && bytes[i + 1] == b'\\'
1940 && bytes[i + 2] == b'|'
1941 {
1942 title.push_str(&text[mark..i]);
1945 i += 2;
1946 mark = i;
1947 }
1948 if c == b'\\' && i + 1 < bytes.len() && is_ascii_punctuation(bytes[i + 1]) {
1949 title.push_str(&text[mark..i]);
1950 i += 1;
1951 mark = i;
1952 }
1953
1954 i += 1;
1955 }
1956
1957 None
1958 }
1959
1960 fn make_math_span(&mut self, open: TreeIndex, close: TreeIndex) {
1961 let mut open_end = open;
1963 {
1964 let mut peek = self.tree[open].next;
1965 while let Some(peek_ix) = peek {
1966 if matches!(self.tree[peek_ix].item.body, ItemBody::MaybeMath(..))
1967 && self.tree[peek_ix].item.start == self.tree[open_end].item.end
1968 && peek_ix != close
1969 {
1970 open_end = peek_ix;
1971 peek = self.tree[peek_ix].next;
1972 } else {
1973 break;
1974 }
1975 }
1976 }
1977 let mut close_end = close;
1979 {
1980 let mut peek = self.tree[close].next;
1981 while let Some(peek_ix) = peek {
1982 if matches!(self.tree[peek_ix].item.body, ItemBody::MaybeMath(..))
1983 && self.tree[peek_ix].item.start == self.tree[close_end].item.end
1984 {
1985 close_end = peek_ix;
1986 peek = self.tree[peek_ix].next;
1987 } else {
1988 break;
1989 }
1990 }
1991 }
1992
1993 let span_start = self.tree[open_end].item.end;
1994 let span_end = self.tree[close].item.start;
1995
1996 if span_start > span_end {
1997 self.tree[open].item.body = ItemBody::Text {
1998 backslash_escaped: false,
1999 };
2000 return;
2001 }
2002
2003 let spanned_text = &self.text[span_start..span_end];
2004 let spanned_bytes = spanned_text.as_bytes();
2005 let mut buf: Option<String> = None;
2006
2007 let mut start_ix = 0;
2008 let mut ix = 0;
2009 while ix < spanned_bytes.len() {
2010 let c = spanned_bytes[ix];
2011 if c == b'\r' || c == b'\n' {
2012 ix += 1;
2013 let buf = buf.get_or_insert_with(|| String::with_capacity(spanned_bytes.len()));
2014 buf.push_str(&spanned_text[start_ix..ix]);
2015 let from = span_start + ix;
2024 let (scanned, leftover) = skip_container_prefixes_with_remaining(
2025 &self.tree,
2026 &self.text.as_bytes()[from..],
2027 self.options,
2028 );
2029 let scanned = scanned.min(spanned_bytes.len() - ix);
2030 ix += scanned;
2031 start_ix = ix;
2032 for _ in 0..leftover {
2036 buf.push(' ');
2037 }
2038 } else if c == b'\\'
2039 && spanned_bytes.get(ix + 1) == Some(&b'|')
2040 && self.tree.is_in_table()
2041 {
2042 let buf = buf.get_or_insert_with(|| String::with_capacity(spanned_bytes.len()));
2043 buf.push_str(&spanned_text[start_ix..ix]);
2044 buf.push('|');
2045 ix += 2;
2046 start_ix = ix;
2047 } else {
2048 ix += 1;
2049 }
2050 }
2051
2052 let (opening, closing, all_spaces) = {
2053 let s = if let Some(buf) = &mut buf {
2054 buf.push_str(&spanned_text[start_ix..]);
2055 &buf[..]
2056 } else {
2057 spanned_text
2058 };
2059 (
2060 matches!(s.as_bytes().first(), Some(b' ' | b'\n')),
2061 matches!(s.as_bytes().last(), Some(b' ' | b'\n')),
2062 s.bytes().all(|b| b == b' ' || b == b'\n'),
2063 )
2064 };
2065
2066 let cow: CowStr<'input> = if !all_spaces && opening && closing {
2067 if let Some(mut buf) = buf {
2068 if !buf.is_empty() {
2069 buf.remove(0);
2070 buf.pop();
2071 }
2072 buf.into()
2073 } else {
2074 spanned_text[1..(spanned_text.len() - 1).max(1)].into()
2075 }
2076 } else if let Some(buf) = buf {
2077 buf.into()
2078 } else {
2079 spanned_text.into()
2080 };
2081
2082 self.tree[open].item.body = ItemBody::Math(self.allocs.allocate_cow(cow), false);
2083 self.tree[open].item.end = self.tree[close_end].item.end;
2084 self.tree[open].next = self.tree[close_end].next;
2085 }
2086
2087 fn make_code_span(&mut self, open: TreeIndex, close: TreeIndex, preceding_backslash: bool) {
2091 let span_start = self.tree[open].item.end;
2092 let span_end = self.tree[close].item.start;
2093 let mut buf: Option<String> = None;
2094
2095 let spanned_text = &self.text[span_start..span_end];
2096 let spanned_bytes = spanned_text.as_bytes();
2097 let mut start_ix = 0;
2098 let mut ix = 0;
2099 while ix < spanned_bytes.len() {
2100 let c = spanned_bytes[ix];
2101 if c == b'\r' || c == b'\n' {
2102 let buf = buf.get_or_insert_with(|| String::with_capacity(spanned_bytes.len()));
2103 buf.push_str(&spanned_text[start_ix..ix]);
2104 buf.push('\n');
2105 ix += 1;
2106 if c == b'\r' && spanned_bytes.get(ix) == Some(&b'\n') {
2107 ix += 1;
2108 }
2109 let from = span_start + ix;
2118 let (scanned, leftover) = skip_container_prefixes_with_remaining(
2119 &self.tree,
2120 &self.text.as_bytes()[from..],
2121 self.options,
2122 );
2123 let scanned = scanned.min(spanned_bytes.len() - ix);
2124 ix += scanned;
2125 start_ix = ix;
2126 for _ in 0..leftover {
2130 buf.push(' ');
2131 }
2132 } else if c == b'\\'
2133 && spanned_bytes.get(ix + 1) == Some(&b'|')
2134 && self.tree.is_in_table()
2135 {
2136 let buf = buf.get_or_insert_with(|| String::with_capacity(spanned_bytes.len()));
2137 buf.push_str(&spanned_text[start_ix..ix]);
2138 buf.push('|');
2139 ix += 2;
2140 start_ix = ix;
2141 } else {
2142 ix += 1;
2143 }
2144 }
2145
2146 let (opening, closing, all_spaces) = {
2147 let s = if let Some(buf) = &mut buf {
2148 buf.push_str(&spanned_text[start_ix..]);
2149 &buf[..]
2150 } else {
2151 spanned_text
2152 };
2153 (
2154 matches!(s.as_bytes().first(), Some(b' ' | b'\n')),
2155 matches!(s.as_bytes().last(), Some(b' ' | b'\n')),
2156 s.bytes().all(|b| b == b' ' || b == b'\n'),
2157 )
2158 };
2159
2160 let cow: CowStr<'input> = if !all_spaces && opening && closing {
2161 if let Some(mut buf) = buf {
2162 if !buf.is_empty() {
2163 buf.remove(0);
2164 buf.pop();
2165 }
2166 buf.into()
2167 } else {
2168 spanned_text[1..(spanned_text.len() - 1).max(1)].into()
2169 }
2170 } else if let Some(buf) = buf {
2171 buf.into()
2172 } else {
2173 spanned_text.into()
2174 };
2175
2176 if preceding_backslash {
2177 self.tree[open].item.body = ItemBody::Text {
2178 backslash_escaped: true,
2179 };
2180 self.tree[open].item.end = self.tree[open].item.start + 1;
2181 self.tree[open].next = Some(close);
2182 self.tree[close].item.body = ItemBody::Code(self.allocs.allocate_cow(cow));
2183 self.tree[close].item.start = self.tree[open].item.start + 1;
2184 } else {
2185 self.tree[open].item.body = ItemBody::Code(self.allocs.allocate_cow(cow));
2186 self.tree[open].item.end = self.tree[close].item.end;
2187 self.tree[open].next = self.tree[close].next;
2188 }
2189
2190 if !self.mdx_errors.is_empty() {
2193 self.mdx_errors
2194 .retain(|(offset, _)| *offset < span_start || *offset >= span_end);
2195 }
2196 }
2197
2198 fn scan_inline_html(&mut self, bytes: &[u8], ix: usize) -> Option<(Vec<u8>, usize)> {
2202 let c = *bytes.get(ix)?;
2203 if c == b'!' {
2204 Some((
2205 vec![],
2206 scan_inline_html_comment(bytes, ix + 1, &mut self.html_scan_guard)?,
2207 ))
2208 } else if c == b'?' {
2209 Some((
2210 vec![],
2211 scan_inline_html_processing(bytes, ix + 1, &mut self.html_scan_guard)?,
2212 ))
2213 } else {
2214 let (span, i) = scan_html_block_inner(
2215 &bytes[(ix - 1)..],
2217 Some(&|bytes| skip_container_prefixes(&self.tree, bytes, self.options)),
2218 )?;
2219 Some((span, i + ix - 1))
2220 }
2221 }
2222}
2223
2224pub(crate) fn scan_containers(
2226 tree: &Tree<Item>,
2227 line_start: &mut LineStart<'_>,
2228 options: Options,
2229) -> usize {
2230 let mut i = 0;
2231 for &node_ix in tree.walk_spine() {
2232 match tree[node_ix].item.body {
2233 ItemBody::BlockQuote(..) => {
2234 let save = line_start.clone();
2235 if options.contains(Options::ENABLE_MDX) {
2240 line_start.scan_all_space();
2241 } else {
2242 let _ = line_start.scan_space(3);
2243 }
2244 if !line_start.scan_blockquote_marker() {
2245 *line_start = save;
2246 break;
2247 }
2248 }
2249 ItemBody::ListItem(indent, _) => {
2250 let save = line_start.clone();
2251 if !line_start.scan_space(indent) && !line_start.is_at_eol() {
2252 *line_start = save;
2253 break;
2254 }
2255 }
2256 ItemBody::DefinitionListDefinition(indent) => {
2257 let save = line_start.clone();
2258 if !line_start.scan_space(indent) && !line_start.is_at_eol() {
2259 *line_start = save;
2260 break;
2261 }
2262 }
2263 ItemBody::FootnoteDefinition(..) if options.contains(Options::ENABLE_FOOTNOTES) => {
2264 let save = line_start.clone();
2265 if !line_start.scan_space(4) && !line_start.is_at_eol() {
2266 *line_start = save;
2267 break;
2268 }
2269 }
2270 _ => (),
2271 }
2272 i += 1;
2273 }
2274 i
2275}
2276
2277pub(crate) fn skip_container_prefixes(tree: &Tree<Item>, bytes: &[u8], options: Options) -> usize {
2278 let mut line_start = LineStart::new(bytes);
2279 let _ = scan_containers(tree, &mut line_start, options);
2280 line_start.bytes_scanned()
2281}
2282
2283fn skip_container_prefixes_with_remaining(
2290 tree: &Tree<Item>,
2291 bytes: &[u8],
2292 options: Options,
2293) -> (usize, usize) {
2294 let mut line_start = LineStart::new(bytes);
2295 let _ = scan_containers(tree, &mut line_start, options);
2296 (line_start.bytes_scanned(), line_start.remaining_space())
2297}
2298
2299impl Tree<Item> {
2300 pub(crate) fn append_text(&mut self, start: usize, end: usize, backslash_escaped: bool) {
2301 if end > start {
2302 if let Some(ix) = self.cur() {
2303 if matches!(self[ix].item.body, ItemBody::Text { .. }) && self[ix].item.end == start
2304 {
2305 self[ix].item.end = end;
2306 return;
2307 }
2308 }
2309 self.append(Item {
2310 start,
2311 end,
2312 body: ItemBody::Text { backslash_escaped },
2313 });
2314 }
2315 }
2316 pub(crate) fn is_in_table(&self) -> bool {
2323 fn might_be_in_table(item: &Item) -> bool {
2324 item.body.is_inline()
2325 || matches!(item.body, |ItemBody::TableHead| ItemBody::TableRow
2326 | ItemBody::TableCell)
2327 }
2328 for &ix in self.walk_spine().rev() {
2329 if matches!(self[ix].item.body, ItemBody::Table(_)) {
2330 return true;
2331 }
2332 if !might_be_in_table(&self[ix].item) {
2333 return false;
2334 }
2335 }
2336 false
2337 }
2338}
2339
2340#[derive(Copy, Clone, Debug)]
2341struct InlineEl {
2342 start: TreeIndex,
2344 count: usize,
2346 run_length: usize,
2348 c: u8,
2350 both: bool,
2352}
2353
2354#[derive(Debug, Clone, Default)]
2355struct InlineStack {
2356 stack: Vec<InlineEl>,
2357 lower_bounds: [usize; 10],
2362}
2363
2364impl InlineStack {
2365 const UNDERSCORE_NOT_BOTH: usize = 0;
2369 const ASTERISK_NOT_BOTH: usize = 1;
2370 const ASTERISK_BASE: usize = 2;
2371 const TILDES: usize = 5;
2372 const UNDERSCORE_BASE: usize = 6;
2373 const CIRCUMFLEXES: usize = 9;
2374
2375 fn pop_all(&mut self, tree: &mut Tree<Item>) {
2376 for el in self.stack.drain(..) {
2377 for i in 0..el.count {
2378 tree[el.start + i].item.body = ItemBody::Text {
2379 backslash_escaped: false,
2380 };
2381 }
2382 }
2383 self.lower_bounds = [0; 10];
2384 }
2385
2386 fn get_lowerbound(&self, c: u8, count: usize, both: bool) -> usize {
2387 if c == b'_' {
2388 let mod3_lower = self.lower_bounds[InlineStack::UNDERSCORE_BASE + count % 3];
2389 if both {
2390 mod3_lower
2391 } else {
2392 min(
2393 mod3_lower,
2394 self.lower_bounds[InlineStack::UNDERSCORE_NOT_BOTH],
2395 )
2396 }
2397 } else if c == b'*' {
2398 let mod3_lower = self.lower_bounds[InlineStack::ASTERISK_BASE + count % 3];
2399 if both {
2400 mod3_lower
2401 } else {
2402 min(
2403 mod3_lower,
2404 self.lower_bounds[InlineStack::ASTERISK_NOT_BOTH],
2405 )
2406 }
2407 } else if c == b'^' {
2408 self.lower_bounds[InlineStack::CIRCUMFLEXES]
2409 } else {
2410 self.lower_bounds[InlineStack::TILDES]
2411 }
2412 }
2413
2414 fn set_lowerbound(&mut self, c: u8, count: usize, both: bool, new_bound: usize) {
2415 if c == b'_' {
2416 if both {
2417 self.lower_bounds[InlineStack::UNDERSCORE_BASE + count % 3] = new_bound;
2418 } else {
2419 self.lower_bounds[InlineStack::UNDERSCORE_NOT_BOTH] = new_bound;
2420 }
2421 } else if c == b'*' {
2422 self.lower_bounds[InlineStack::ASTERISK_BASE + count % 3] = new_bound;
2423 if !both {
2424 self.lower_bounds[InlineStack::ASTERISK_NOT_BOTH] = new_bound;
2425 }
2426 } else if c == b'^' {
2427 self.lower_bounds[InlineStack::CIRCUMFLEXES] = new_bound;
2428 } else {
2429 self.lower_bounds[InlineStack::TILDES] = new_bound;
2430 }
2431 }
2432
2433 fn truncate(&mut self, new_bound: usize) {
2434 self.stack.truncate(new_bound);
2435 for lower_bound in &mut self.lower_bounds {
2436 if *lower_bound > new_bound {
2437 *lower_bound = new_bound;
2438 }
2439 }
2440 }
2441
2442 fn find_match(
2455 &mut self,
2456 tree: &mut Tree<Item>,
2457 c: u8,
2458 run_length: usize,
2459 current_count: usize,
2460 both: bool,
2461 ) -> Option<InlineEl> {
2462 let lowerbound = min(
2472 self.stack.len(),
2473 self.get_lowerbound(c, current_count, both),
2474 );
2475 let res = self.stack[lowerbound..]
2476 .iter()
2477 .cloned()
2478 .enumerate()
2479 .rfind(|(_, el)| {
2480 if (c == b'~' || c == b'^') && run_length != el.run_length {
2481 return false;
2482 }
2483 el.c == c
2488 && (!both && !el.both
2489 || !(current_count + el.count).is_multiple_of(3)
2490 || current_count.is_multiple_of(3))
2491 });
2492
2493 if let Some((matching_ix, matching_el)) = res {
2494 let matching_ix = matching_ix + lowerbound;
2495 for el in &self.stack[(matching_ix + 1)..] {
2496 for i in 0..el.count {
2497 tree[el.start + i].item.body = ItemBody::Text {
2498 backslash_escaped: false,
2499 };
2500 }
2501 }
2502 self.truncate(matching_ix);
2503 Some(matching_el)
2504 } else {
2505 if c != b'~' && c != b'^' {
2515 self.set_lowerbound(c, current_count, both, self.stack.len());
2516 }
2517 None
2518 }
2519 }
2520
2521 fn trim_lower_bound(&mut self, ix: usize) {
2522 self.lower_bounds[ix] = self.lower_bounds[ix].min(self.stack.len());
2523 }
2524
2525 fn push(&mut self, el: InlineEl) {
2526 if el.c == b'~' {
2527 self.trim_lower_bound(InlineStack::TILDES);
2528 } else if el.c == b'^' {
2529 self.trim_lower_bound(InlineStack::CIRCUMFLEXES);
2530 }
2531 self.stack.push(el)
2532 }
2533}
2534
2535#[derive(Debug, Clone)]
2536enum RefScan<'a> {
2537 LinkLabel(CowStr<'a>, usize),
2539 Collapsed(Option<TreeIndex>),
2541 UnexpectedFootnote,
2542 Failed,
2543 FailedInvalidLabel,
2548}
2549
2550fn scan_nodes_to_ix(
2553 tree: &Tree<Item>,
2554 mut node: Option<TreeIndex>,
2555 ix: usize,
2556) -> Option<TreeIndex> {
2557 while let Some(node_ix) = node {
2558 if tree[node_ix].item.end <= ix {
2559 node = tree[node_ix].next;
2560 } else {
2561 break;
2562 }
2563 }
2564 node
2565}
2566
2567fn scan_link_label<'text>(
2570 tree: &Tree<Item>,
2571 text: &'text str,
2572 options: Options,
2573) -> Option<(usize, ReferenceLabel<'text>)> {
2574 let bytes = text.as_bytes();
2575 if bytes.len() < 2 || bytes[0] != b'[' {
2576 return None;
2577 }
2578 let linebreak_handler = |bytes: &[u8]| Some(skip_container_prefixes(tree, bytes, options));
2579 if options.contains(Options::ENABLE_FOOTNOTES)
2580 && b'^' == bytes[1]
2581 && bytes.get(2) != Some(&b']')
2582 {
2583 let linebreak_handler: &dyn Fn(&[u8]) -> Option<usize> = &|_| None;
2585 if let Some((byte_index, cow)) =
2586 scan_link_label_rest(&text[2..], linebreak_handler, tree.is_in_table())
2587 {
2588 return Some((byte_index + 2, ReferenceLabel::Footnote(cow)));
2589 }
2590 }
2591 let (byte_index, cow) =
2592 scan_link_label_rest(&text[1..], &linebreak_handler, tree.is_in_table())?;
2593 Some((byte_index + 1, ReferenceLabel::Link(cow)))
2594}
2595
2596fn scan_reference<'b>(
2597 tree: &Tree<Item>,
2598 text: &'b str,
2599 cur: Option<TreeIndex>,
2600 options: Options,
2601) -> RefScan<'b> {
2602 let cur_ix = match cur {
2603 None => return RefScan::Failed,
2604 Some(cur_ix) => cur_ix,
2605 };
2606 let start = tree[cur_ix].item.start;
2607 let tail = &text.as_bytes()[start..];
2608
2609 if tail.first() == Some(&b'[') && start > 0 {
2616 let src = text.as_bytes();
2617 let mut backslashes = 0usize;
2618 let mut j = start;
2619 while j > 0 && src[j - 1] == b'\\' {
2620 backslashes += 1;
2621 j -= 1;
2622 }
2623 if backslashes % 2 == 1 {
2624 return RefScan::Failed;
2625 }
2626 }
2627
2628 if tail.starts_with(b"[]") {
2629 let Some(closing_node) = tree[cur_ix].next else {
2634 return RefScan::Failed;
2635 };
2636 RefScan::Collapsed(tree[closing_node].next)
2637 } else {
2638 let label = scan_link_label(tree, &text[start..], options);
2639 match label {
2640 Some((ix, ReferenceLabel::Link(label))) => RefScan::LinkLabel(label, start + ix),
2641 Some((_ix, ReferenceLabel::Footnote(_label))) => RefScan::UnexpectedFootnote,
2642 None => {
2643 if tail.starts_with(b"[") {
2648 RefScan::FailedInvalidLabel
2649 } else {
2650 RefScan::Failed
2651 }
2652 }
2653 }
2654 }
2655}
2656
2657#[derive(Clone, Default)]
2658struct LinkStack {
2659 inner: Vec<LinkStackEl>,
2660 disabled_ix: usize,
2661}
2662
2663impl LinkStack {
2664 fn push(&mut self, el: LinkStackEl) {
2665 self.inner.push(el);
2666 }
2667
2668 fn pop(&mut self) -> Option<LinkStackEl> {
2669 let el = self.inner.pop();
2670 self.disabled_ix = core::cmp::min(self.disabled_ix, self.inner.len());
2671 el
2672 }
2673
2674 fn clear(&mut self) {
2675 self.inner.clear();
2676 self.disabled_ix = 0;
2677 }
2678
2679 fn disable_all_links(&mut self) {
2680 for el in &mut self.inner[self.disabled_ix..] {
2681 if el.ty == LinkStackTy::Link {
2682 el.ty = LinkStackTy::Disabled;
2683 }
2684 }
2685 self.disabled_ix = self.inner.len();
2686 }
2687}
2688
2689#[derive(Clone, Debug)]
2690struct LinkStackEl {
2691 node: TreeIndex,
2692 ty: LinkStackTy,
2693}
2694
2695#[derive(PartialEq, Clone, Debug)]
2696enum LinkStackTy {
2697 Link,
2698 Image,
2699 Disabled,
2700}
2701
2702#[derive(Clone, Debug)]
2704pub struct LinkDef<'a> {
2705 pub dest: CowStr<'a>,
2706 pub title: Option<CowStr<'a>>,
2707 pub span: Range<usize>,
2708}
2709
2710impl<'a> LinkDef<'a> {
2711 pub fn into_static(self) -> LinkDef<'static> {
2712 LinkDef {
2713 dest: self.dest.into_static(),
2714 title: self.title.map(|s| s.into_static()),
2715 span: self.span,
2716 }
2717 }
2718}
2719
2720#[derive(Clone, Debug)]
2722pub struct FootnoteDef {
2723 pub use_count: usize,
2724}
2725
2726struct CodeDelims {
2729 inner: FxHashMap<usize, VecDeque<TreeIndex>>,
2730 seen_first: bool,
2731}
2732
2733impl CodeDelims {
2734 fn new() -> Self {
2735 Self {
2736 inner: Default::default(),
2737 seen_first: false,
2738 }
2739 }
2740
2741 fn insert(&mut self, count: usize, ix: TreeIndex) {
2742 if self.seen_first {
2743 self.inner.entry(count).or_default().push_back(ix);
2744 } else {
2745 self.seen_first = true;
2748 }
2749 }
2750
2751 fn is_populated(&self) -> bool {
2752 !self.inner.is_empty()
2753 }
2754
2755 fn find(&mut self, open_ix: TreeIndex, count: usize) -> Option<TreeIndex> {
2756 while let Some(ix) = self.inner.get_mut(&count)?.pop_front() {
2757 if ix > open_ix {
2758 return Some(ix);
2759 }
2760 }
2761 None
2762 }
2763
2764 fn clear(&mut self) {
2765 self.inner.clear();
2766 self.seen_first = false;
2767 }
2768}
2769
2770struct MathDelims {
2773 inner: FxHashMap<u8, VecDeque<(TreeIndex, bool, bool)>>,
2774}
2775
2776impl MathDelims {
2777 fn new() -> Self {
2778 Self {
2779 inner: Default::default(),
2780 }
2781 }
2782
2783 fn clear(&mut self) {
2784 self.inner.clear();
2785 }
2786}
2787
2788#[derive(Copy, Clone, PartialEq, Eq, Debug)]
2789pub(crate) struct LinkIndex(usize);
2790
2791#[derive(Copy, Clone, PartialEq, Eq, Debug)]
2792pub(crate) struct CowIndex(usize);
2793
2794#[derive(Copy, Clone, PartialEq, Eq, Debug)]
2795pub(crate) struct AlignmentIndex(usize);
2796
2797#[derive(Copy, Clone, PartialEq, Eq, Debug)]
2798pub(crate) struct HeadingIndex(NonZeroUsize);
2799
2800#[derive(Copy, Clone, PartialEq, Eq, Debug)]
2801pub(crate) struct JsxElementIndex(usize);
2802
2803#[derive(Copy, Clone, PartialEq, Eq, Debug)]
2804pub(crate) struct DirectiveIndex(usize);
2805
2806#[derive(Debug, Clone)]
2808pub(crate) enum JsxAttr<'a> {
2809 Boolean(CowStr<'a>),
2810 Literal(CowStr<'a>, CowStr<'a>),
2811 Expression(CowStr<'a>, CowStr<'a>),
2812 Spread(CowStr<'a>),
2813}
2814
2815impl<'a> JsxAttr<'a> {
2816 pub fn into_static(self) -> JsxAttr<'static> {
2817 match self {
2818 JsxAttr::Boolean(n) => JsxAttr::Boolean(n.into_static()),
2819 JsxAttr::Literal(n, v) => JsxAttr::Literal(n.into_static(), v.into_static()),
2820 JsxAttr::Expression(n, v) => JsxAttr::Expression(n.into_static(), v.into_static()),
2821 JsxAttr::Spread(v) => JsxAttr::Spread(v.into_static()),
2822 }
2823 }
2824}
2825
2826#[derive(Debug, Clone)]
2828pub(crate) struct JsxElementData<'a> {
2829 pub name: CowStr<'a>,
2830 pub attrs: Vec<JsxAttr<'a>>,
2831 pub raw: CowStr<'a>,
2832 pub is_closing: bool,
2833 pub is_self_closing: bool,
2834}
2835
2836impl<'a> JsxElementData<'a> {
2837 pub fn into_static(self) -> JsxElementData<'static> {
2838 JsxElementData {
2839 name: self.name.into_static(),
2840 attrs: self.attrs.into_iter().map(|a| a.into_static()).collect(),
2841 raw: self.raw.into_static(),
2842 is_closing: self.is_closing,
2843 is_self_closing: self.is_self_closing,
2844 }
2845 }
2846}
2847
2848#[derive(Debug, Clone)]
2849pub(crate) struct DirectiveAttrData<'a> {
2850 pub name: CowStr<'a>,
2851 pub attributes: Vec<(CowStr<'a>, CowStr<'a>)>,
2852 pub label_start: usize,
2853 pub label_end: usize,
2854 pub initial_size: u8,
2860}
2861
2862#[derive(Clone)]
2863pub(crate) struct Allocations<'a> {
2864 pub refdefs: RefDefs<'a>,
2865 pub refdefs_all: Vec<(LinkLabel<'a>, LinkDef<'a>)>,
2870 pub footdefs: FootnoteDefs<'a>,
2871 links: Vec<(LinkType, CowStr<'a>, CowStr<'a>, CowStr<'a>)>,
2872 cows: Vec<CowStr<'a>>,
2873 alignments: Vec<Vec<Alignment>>,
2874 headings: Vec<HeadingAttributes<'a>>,
2875 jsx_elements: Vec<JsxElementData<'a>>,
2876 directives: Vec<DirectiveAttrData<'a>>,
2877}
2878
2879#[derive(Clone)]
2881pub(crate) struct HeadingAttributes<'a> {
2882 pub id: Option<CowStr<'a>>,
2883 pub classes: Vec<CowStr<'a>>,
2884 pub attrs: Vec<(CowStr<'a>, Option<CowStr<'a>>)>,
2885}
2886
2887#[derive(Clone, Default, Debug)]
2889pub struct RefDefs<'input>(pub(crate) FxHashMap<LinkLabel<'input>, LinkDef<'input>>);
2890
2891#[derive(Clone, Default, Debug)]
2893pub struct FootnoteDefs<'input>(pub(crate) FxHashMap<FootnoteLabel<'input>, FootnoteDef>);
2894
2895impl<'input, 'b, 's> RefDefs<'input>
2896where
2897 's: 'b,
2898{
2899 pub fn get(&'s self, key: &'b str) -> Option<&'b LinkDef<'input>> {
2901 self.0.get(&UniCase::new(key.into()))
2902 }
2903
2904 pub fn iter(&'s self) -> impl Iterator<Item = (&'s str, &'s LinkDef<'input>)> {
2906 self.0.iter().map(|(k, v)| (k.as_ref(), v))
2907 }
2908}
2909
2910impl<'input, 'b, 's> FootnoteDefs<'input>
2911where
2912 's: 'b,
2913{
2914 pub fn contains(&'s self, key: &'b str) -> bool {
2916 self.0.contains_key(&UniCase::new(key.into()))
2917 }
2918 pub fn get_mut(&'s mut self, key: CowStr<'input>) -> Option<&'s mut FootnoteDef> {
2920 self.0.get_mut(&UniCase::new(key))
2921 }
2922}
2923
2924impl<'a> Allocations<'a> {
2925 pub fn new() -> Self {
2926 Self {
2927 refdefs: RefDefs::default(),
2928 refdefs_all: Vec::new(),
2929 footdefs: FootnoteDefs::default(),
2930 links: Vec::with_capacity(128),
2931 cows: Vec::new(),
2932 alignments: Vec::new(),
2933 headings: Vec::new(),
2934 jsx_elements: Vec::new(),
2935 directives: Vec::new(),
2936 }
2937 }
2938
2939 pub fn allocate_cow(&mut self, cow: CowStr<'a>) -> CowIndex {
2940 let ix = self.cows.len();
2941 self.cows.push(cow);
2942 CowIndex(ix)
2943 }
2944
2945 pub fn allocate_link(
2946 &mut self,
2947 ty: LinkType,
2948 url: CowStr<'a>,
2949 title: CowStr<'a>,
2950 id: CowStr<'a>,
2951 ) -> LinkIndex {
2952 let ix = self.links.len();
2953 self.links.push((ty, url, title, id));
2954 LinkIndex(ix)
2955 }
2956
2957 pub fn allocate_alignment(&mut self, alignment: Vec<Alignment>) -> AlignmentIndex {
2958 let ix = self.alignments.len();
2959 self.alignments.push(alignment);
2960 AlignmentIndex(ix)
2961 }
2962
2963 pub fn allocate_heading(&mut self, attrs: HeadingAttributes<'a>) -> HeadingIndex {
2964 let ix = self.headings.len();
2965 self.headings.push(attrs);
2966 let ix_nonzero = NonZeroUsize::new(ix.wrapping_add(1)).expect("too many headings");
2969 HeadingIndex(ix_nonzero)
2970 }
2971
2972 pub fn take_cow(&mut self, ix: CowIndex) -> CowStr<'a> {
2973 core::mem::replace(&mut self.cows[ix.0], "".into())
2974 }
2975
2976 pub fn take_link(&mut self, ix: LinkIndex) -> (LinkType, CowStr<'a>, CowStr<'a>, CowStr<'a>) {
2977 let default_link = (LinkType::ShortcutUnknown, "".into(), "".into(), "".into());
2978 core::mem::replace(&mut self.links[ix.0], default_link)
2979 }
2980
2981 pub fn take_alignment(&mut self, ix: AlignmentIndex) -> Vec<Alignment> {
2982 core::mem::take(&mut self.alignments[ix.0])
2983 }
2984
2985 pub fn allocate_jsx_element(&mut self, data: JsxElementData<'a>) -> JsxElementIndex {
2986 let ix = self.jsx_elements.len();
2987 self.jsx_elements.push(data);
2988 JsxElementIndex(ix)
2989 }
2990
2991 pub fn allocate_directive(&mut self, data: DirectiveAttrData<'a>) -> DirectiveIndex {
2992 let ix = self.directives.len();
2993 self.directives.push(data);
2994 DirectiveIndex(ix)
2995 }
2996
2997 pub fn take_directive(&mut self, ix: DirectiveIndex) -> DirectiveAttrData<'a> {
2998 core::mem::replace(
2999 &mut self.directives[ix.0],
3000 DirectiveAttrData {
3001 name: "".into(),
3002 attributes: Vec::new(),
3003 label_start: 0,
3004 label_end: 0,
3005 initial_size: 0,
3006 },
3007 )
3008 }
3009
3010 pub fn directive_ref(&self, ix: DirectiveIndex) -> &DirectiveAttrData<'a> {
3011 &self.directives[ix.0]
3012 }
3013
3014 pub fn take_jsx_element(&mut self, ix: JsxElementIndex) -> JsxElementData<'a> {
3015 core::mem::replace(
3016 &mut self.jsx_elements[ix.0],
3017 JsxElementData {
3018 name: "".into(),
3019 attrs: Vec::new(),
3020 raw: "".into(),
3021 is_closing: false,
3022 is_self_closing: false,
3023 },
3024 )
3025 }
3026}
3027
3028impl<'a> Index<CowIndex> for Allocations<'a> {
3029 type Output = CowStr<'a>;
3030
3031 fn index(&self, ix: CowIndex) -> &Self::Output {
3032 self.cows.index(ix.0)
3033 }
3034}
3035
3036impl<'a> Index<LinkIndex> for Allocations<'a> {
3037 type Output = (LinkType, CowStr<'a>, CowStr<'a>, CowStr<'a>);
3038
3039 fn index(&self, ix: LinkIndex) -> &Self::Output {
3040 self.links.index(ix.0)
3041 }
3042}
3043
3044impl<'a> Index<AlignmentIndex> for Allocations<'a> {
3045 type Output = Vec<Alignment>;
3046
3047 fn index(&self, ix: AlignmentIndex) -> &Self::Output {
3048 self.alignments.index(ix.0)
3049 }
3050}
3051
3052impl<'a> Index<HeadingIndex> for Allocations<'a> {
3053 type Output = HeadingAttributes<'a>;
3054
3055 fn index(&self, ix: HeadingIndex) -> &Self::Output {
3056 self.headings.index(ix.0.get() - 1)
3057 }
3058}
3059
3060#[derive(Clone, Default)]
3066pub(crate) struct HtmlScanGuard {
3067 pub cdata: usize,
3068 pub processing: usize,
3069 pub declaration: usize,
3070 pub comment: usize,
3071}
3072
3073pub trait ParserCallbacks<'input> {
3077 fn handle_broken_link(
3085 &mut self,
3086 #[allow(unused_variables)] link: BrokenLink<'input>,
3087 ) -> Option<(CowStr<'input>, CowStr<'input>)> {
3088 None
3089 }
3090}
3091
3092#[allow(missing_debug_implementations)]
3096pub struct BrokenLinkCallback<F>(Option<F>);
3097
3098impl<'input, F> ParserCallbacks<'input> for BrokenLinkCallback<F>
3099where
3100 F: FnMut(BrokenLink<'input>) -> Option<(CowStr<'input>, CowStr<'input>)>,
3101{
3102 fn handle_broken_link(
3103 &mut self,
3104 link: BrokenLink<'input>,
3105 ) -> Option<(CowStr<'input>, CowStr<'input>)> {
3106 self.0.as_mut().and_then(|cb| cb(link))
3107 }
3108}
3109
3110impl<'input> ParserCallbacks<'input> for Box<dyn ParserCallbacks<'input>> {
3111 fn handle_broken_link(
3112 &mut self,
3113 link: BrokenLink<'input>,
3114 ) -> Option<(CowStr<'input>, CowStr<'input>)> {
3115 (**self).handle_broken_link(link)
3116 }
3117}
3118
3119#[allow(missing_debug_implementations)]
3123pub struct DefaultParserCallbacks;
3124
3125impl<'input> ParserCallbacks<'input> for DefaultParserCallbacks {}
3126
3127#[derive(Debug)]
3135pub struct OffsetIter<'a, CB> {
3136 parser: Parser<'a, CB>,
3137}
3138
3139impl<'a, CB: ParserCallbacks<'a>> OffsetIter<'a, CB> {
3140 pub fn reference_definitions(&self) -> &RefDefs<'_> {
3142 self.parser.reference_definitions()
3143 }
3144
3145 pub fn mdx_errors(&self) -> &[(usize, String)] {
3147 self.parser.mdx_errors()
3148 }
3149}
3150
3151impl<'a, CB: ParserCallbacks<'a>> Iterator for OffsetIter<'a, CB> {
3152 type Item = (Event<'a>, Range<usize>);
3153
3154 fn next(&mut self) -> Option<Self::Item> {
3155 self.parser
3156 .inner
3157 .next_event_range(&mut self.parser.callbacks)
3158 }
3159}
3160
3161impl<'a, CB: ParserCallbacks<'a>> Iterator for Parser<'a, CB> {
3162 type Item = Event<'a>;
3163
3164 fn next(&mut self) -> Option<Event<'a>> {
3165 self.inner
3166 .next_event_range(&mut self.callbacks)
3167 .map(|(event, _range)| event)
3168 }
3169}
3170
3171impl<'a, CB: ParserCallbacks<'a>> FusedIterator for Parser<'a, CB> {}
3172
3173impl<'input> ParserInner<'input> {
3174 fn next_event_range(
3175 &mut self,
3176 callbacks: &mut dyn ParserCallbacks<'input>,
3177 ) -> Option<(Event<'input>, Range<usize>)> {
3178 match self.tree.cur() {
3179 None => {
3180 let ix = self.tree.pop()?;
3181 let ix = if matches!(self.tree[ix].item.body, ItemBody::TightParagraph) {
3182 self.tree.next_sibling(ix);
3184 return self.next_event_range(callbacks);
3185 } else {
3186 ix
3187 };
3188 let tag_end = body_to_tag_end(&self.tree[ix].item.body);
3189 self.tree.next_sibling(ix);
3190 let span = self.tree[ix].item.start..self.tree[ix].item.end;
3191 debug_assert!(span.start <= span.end);
3192 Some((Event::End(tag_end), span))
3193 }
3194 Some(cur_ix) => {
3195 let cur_ix = if matches!(self.tree[cur_ix].item.body, ItemBody::TightParagraph) {
3196 self.tree.push();
3198 self.tree.cur().unwrap()
3199 } else {
3200 cur_ix
3201 };
3202 if self.tree[cur_ix].item.body.is_maybe_inline() {
3203 self.handle_inline(callbacks);
3204 }
3205
3206 let node = self.tree[cur_ix];
3207 let item = node.item;
3208 let event = item_to_event(item, self.text, &mut self.allocs);
3209 if let Event::Start(..) = event {
3210 self.tree.push();
3211 } else {
3212 self.tree.next_sibling(cur_ix);
3213 }
3214 debug_assert!(item.start <= item.end);
3215 Some((event, item.start..item.end))
3216 }
3217 }
3218 }
3219}
3220
3221fn body_to_tag_end(body: &ItemBody) -> TagEnd {
3222 match *body {
3223 ItemBody::Paragraph => TagEnd::Paragraph,
3224 ItemBody::Emphasis => TagEnd::Emphasis,
3225 ItemBody::Superscript => TagEnd::Superscript,
3226 ItemBody::Subscript => TagEnd::Subscript,
3227 ItemBody::Strong => TagEnd::Strong,
3228 ItemBody::Strikethrough => TagEnd::Strikethrough,
3229 ItemBody::Link(..) => TagEnd::Link,
3230 ItemBody::Image(..) => TagEnd::Image,
3231 ItemBody::Heading(level, _) => TagEnd::Heading(level),
3232 ItemBody::IndentCodeBlock(..) | ItemBody::FencedCodeBlock(..) | ItemBody::MathBlock(..) => {
3233 TagEnd::CodeBlock
3234 }
3235 ItemBody::ContainerDirective(..) => TagEnd::Directive(DirectiveKind::Container),
3236 ItemBody::LeafDirective(..) => TagEnd::Directive(DirectiveKind::Leaf),
3237 ItemBody::TextDirective(..) => TagEnd::Directive(DirectiveKind::Text),
3238 ItemBody::BlockQuote(kind) => TagEnd::BlockQuote(kind),
3239 ItemBody::HtmlBlock(_) => TagEnd::HtmlBlock,
3240 ItemBody::List(_, c, _) => {
3241 let is_ordered = c == b'.' || c == b')';
3242 TagEnd::List(is_ordered)
3243 }
3244 ItemBody::ListItem(_, _) => TagEnd::Item,
3245 ItemBody::TableHead => TagEnd::TableHead,
3246 ItemBody::TableCell => TagEnd::TableCell,
3247 ItemBody::TableRow => TagEnd::TableRow,
3248 ItemBody::Table(..) => TagEnd::Table,
3249 ItemBody::FootnoteDefinition(..) => TagEnd::FootnoteDefinition,
3250 ItemBody::MetadataBlock(kind) => TagEnd::MetadataBlock(kind),
3251 ItemBody::DefinitionList(_) => TagEnd::DefinitionList,
3252 ItemBody::DefinitionListTitle => TagEnd::DefinitionListTitle,
3253 ItemBody::DefinitionListDefinition(_) => TagEnd::DefinitionListDefinition,
3254 ItemBody::MdxJsxFlowElement(..) => TagEnd::MdxJsxFlowElement,
3255 ItemBody::MdxJsxTextElement(..) => TagEnd::MdxJsxTextElement,
3256 _ => panic!("unexpected item body {:?}", body),
3257 }
3258}
3259
3260fn item_to_event<'a>(item: Item, text: &'a str, allocs: &mut Allocations<'a>) -> Event<'a> {
3261 let tag = match item.body {
3262 ItemBody::Text { .. } => return Event::Text(text[item.start..item.end].into()),
3263 ItemBody::Code(cow_ix) => return Event::Code(allocs.take_cow(cow_ix)),
3264 ItemBody::SynthesizeText(cow_ix) => return Event::Text(allocs.take_cow(cow_ix)),
3265 ItemBody::SynthesizeChar(c) => return Event::Text(c.into()),
3266 ItemBody::HtmlBlock(_) => Tag::HtmlBlock,
3267 ItemBody::Html => return Event::Html(text[item.start..item.end].into()),
3268 ItemBody::InlineHtml => return Event::InlineHtml(text[item.start..item.end].into()),
3269 ItemBody::OwnedInlineHtml(cow_ix) => return Event::InlineHtml(allocs.take_cow(cow_ix)),
3270 ItemBody::SoftBreak => return Event::SoftBreak,
3271 ItemBody::HardBreak(_) => return Event::HardBreak,
3272 ItemBody::FootnoteReference(cow_ix) => {
3273 return Event::FootnoteReference(allocs.take_cow(cow_ix))
3274 }
3275 ItemBody::TaskListMarker(checked) => return Event::TaskListMarker(checked),
3276 ItemBody::Rule => return Event::Rule,
3277 ItemBody::Paragraph => Tag::Paragraph,
3278 ItemBody::Emphasis => Tag::Emphasis,
3279 ItemBody::Superscript => Tag::Superscript,
3280 ItemBody::Subscript => Tag::Subscript,
3281 ItemBody::Strong => Tag::Strong,
3282 ItemBody::Strikethrough => Tag::Strikethrough,
3283 ItemBody::Link(link_ix) => {
3284 let (link_type, dest_url, title, id) = allocs.take_link(link_ix);
3285 Tag::Link {
3286 link_type,
3287 dest_url,
3288 title,
3289 id,
3290 }
3291 }
3292 ItemBody::Image(link_ix) => {
3293 let (link_type, dest_url, title, id) = allocs.take_link(link_ix);
3294 Tag::Image {
3295 link_type,
3296 dest_url,
3297 title,
3298 id,
3299 }
3300 }
3301 ItemBody::Heading(level, Some(heading_ix)) => {
3302 let HeadingAttributes { id, classes, attrs } = allocs.index(heading_ix);
3303 Tag::Heading {
3304 level,
3305 id: id.clone(),
3306 classes: classes.clone(),
3307 attrs: attrs.clone(),
3308 }
3309 }
3310 ItemBody::Heading(level, None) => Tag::Heading {
3311 level,
3312 id: None,
3313 classes: Vec::new(),
3314 attrs: Vec::new(),
3315 },
3316 ItemBody::MathBlock(cow_ix) => {
3317 Tag::CodeBlock(CodeBlockKind::Fenced(allocs.take_cow(cow_ix)))
3318 }
3319 ItemBody::FencedCodeBlock(cow_ix) => {
3320 Tag::CodeBlock(CodeBlockKind::Fenced(allocs.take_cow(cow_ix)))
3321 }
3322 ItemBody::IndentCodeBlock(..) => Tag::CodeBlock(CodeBlockKind::Indented),
3323 ItemBody::ContainerDirective(_, dir_ix)
3324 | ItemBody::LeafDirective(dir_ix)
3325 | ItemBody::TextDirective(dir_ix) => {
3326 let kind = match item.body {
3327 ItemBody::ContainerDirective(..) => DirectiveKind::Container,
3328 ItemBody::LeafDirective(..) => DirectiveKind::Leaf,
3329 _ => DirectiveKind::Text,
3330 };
3331 let dir = allocs.take_directive(dir_ix);
3332 Tag::Directive {
3333 kind,
3334 name: dir.name,
3335 attributes: dir.attributes,
3336 }
3337 }
3338 ItemBody::BlockQuote(kind) => Tag::BlockQuote(kind),
3339 ItemBody::List(is_tight, c, listitem_start) => {
3340 if c == b'.' || c == b')' {
3341 Tag::List(Some(listitem_start), is_tight)
3342 } else {
3343 Tag::List(None, is_tight)
3344 }
3345 }
3346 ItemBody::ListItem(_, _) => Tag::Item,
3347 ItemBody::TableHead => Tag::TableHead,
3348 ItemBody::TableCell => Tag::TableCell,
3349 ItemBody::TableRow => Tag::TableRow,
3350 ItemBody::Table(alignment_ix) => Tag::Table(allocs.take_alignment(alignment_ix)),
3351 ItemBody::FootnoteDefinition(cow_ix) => Tag::FootnoteDefinition(allocs.take_cow(cow_ix)),
3352 ItemBody::MetadataBlock(kind) => Tag::MetadataBlock(kind),
3353 ItemBody::Math(cow_ix, is_display) => {
3354 return if is_display {
3355 Event::DisplayMath(allocs.take_cow(cow_ix))
3356 } else {
3357 Event::InlineMath(allocs.take_cow(cow_ix))
3358 }
3359 }
3360 ItemBody::DefinitionList(_) => Tag::DefinitionList,
3361 ItemBody::DefinitionListTitle => Tag::DefinitionListTitle,
3362 ItemBody::DefinitionListDefinition(_) => Tag::DefinitionListDefinition,
3363 ItemBody::MdxJsxFlowElement(jsx_ix) => {
3364 let jsx = allocs.take_jsx_element(jsx_ix);
3365 Tag::MdxJsxFlowElement(jsx.raw)
3366 }
3367 ItemBody::MdxJsxTextElement(jsx_ix) => {
3368 let jsx = allocs.take_jsx_element(jsx_ix);
3369 Tag::MdxJsxTextElement(jsx.raw)
3370 }
3371 ItemBody::MdxFlowExpression(cow_ix) => {
3372 return Event::MdxFlowExpression(allocs.take_cow(cow_ix))
3373 }
3374 ItemBody::MdxTextExpression(cow_ix) => {
3375 return Event::MdxTextExpression(allocs.take_cow(cow_ix))
3376 }
3377 ItemBody::MdxEsm(cow_ix) => return Event::MdxEsm(allocs.take_cow(cow_ix)),
3378 _ => panic!("unexpected item body {:?}", item.body),
3379 };
3380
3381 Event::Start(tag)
3382}
3383
3384#[cfg(test)]
3385mod test {
3386 use alloc::{borrow::ToOwned, string::ToString, vec::Vec};
3387
3388 use super::*;
3389 use crate::tree::Node;
3390
3391 fn parser_with_extensions(text: &str) -> Parser<'_> {
3394 let mut opts = Options::empty();
3395 opts.insert(Options::ENABLE_TABLES);
3396 opts.insert(Options::ENABLE_FOOTNOTES);
3397 opts.insert(Options::ENABLE_STRIKETHROUGH);
3398 opts.insert(Options::ENABLE_SUPERSCRIPT);
3399 opts.insert(Options::ENABLE_SUBSCRIPT);
3400 opts.insert(Options::ENABLE_TASKLISTS);
3401
3402 Parser::new_ext(text, opts)
3403 }
3404
3405 #[test]
3406 #[cfg(target_pointer_width = "64")]
3407 fn node_size() {
3408 let node_size = core::mem::size_of::<Node<Item>>();
3409 assert_eq!(48, node_size);
3410 }
3411
3412 #[test]
3413 #[cfg(target_pointer_width = "64")]
3414 fn body_size() {
3415 let body_size = core::mem::size_of::<ItemBody>();
3416 assert_eq!(16, body_size);
3417 }
3418
3419 #[test]
3420 fn single_open_fish_bracket() {
3421 assert_eq!(3, Parser::new("<").count());
3423 }
3424
3425 #[test]
3426 fn lone_hashtag() {
3427 assert_eq!(2, Parser::new("#").count());
3429 }
3430
3431 #[test]
3432 fn lots_of_backslashes() {
3433 Parser::new("\\\\\r\r").count();
3435 Parser::new("\\\r\r\\.\\\\\r\r\\.\\").count();
3436 }
3437
3438 #[test]
3439 fn issue_1030() {
3440 let mut opts = Options::empty();
3441 opts.insert(Options::ENABLE_WIKILINKS);
3442
3443 let parser = Parser::new_ext("For a new ferrari, [[Wikientry|click here]]!", opts);
3444
3445 let offsets = parser
3446 .into_offset_iter()
3447 .map(|(_ev, range)| range)
3448 .collect::<Vec<_>>();
3449 let expected_offsets = vec![
3450 (0..44), (0..19), (19..43), (31..41), (19..43), (43..44), (0..44), ];
3458 assert_eq!(offsets, expected_offsets);
3459 }
3460
3461 #[test]
3462 fn issue_320() {
3463 parser_with_extensions(":\r\t> |\r:\r\t> |\r").count();
3465 }
3466
3467 #[test]
3468 fn issue_319() {
3469 parser_with_extensions("|\r-]([^|\r-]([^").count();
3471 parser_with_extensions("|\r\r=][^|\r\r=][^car").count();
3472 }
3473
3474 #[test]
3475 fn issue_303() {
3476 parser_with_extensions("[^\r\ra]").count();
3478 parser_with_extensions("\r\r]Z[^\x00\r\r]Z[^\x00").count();
3479 }
3480
3481 #[test]
3482 fn issue_313() {
3483 parser_with_extensions("*]0[^\r\r*]0[^").count();
3485 parser_with_extensions("[^\r> `][^\r> `][^\r> `][").count();
3486 }
3487
3488 #[test]
3489 fn issue_311() {
3490 parser_with_extensions("\\\u{0d}-\u{09}\\\u{0d}-\u{09}").count();
3492 }
3493
3494 #[test]
3495 fn issue_283() {
3496 let input = core::str::from_utf8(b"\xf0\x9b\xb2\x9f<td:^\xf0\x9b\xb2\x9f").unwrap();
3497 parser_with_extensions(input).count();
3499 }
3500
3501 #[test]
3502 fn issue_289() {
3503 parser_with_extensions("> - \\\n> - ").count();
3505 parser_with_extensions("- \n\n").count();
3506 }
3507
3508 #[test]
3509 fn issue_306() {
3510 parser_with_extensions("*\r_<__*\r_<__*\r_<__*\r_<__").count();
3512 }
3513
3514 #[test]
3515 fn issue_305() {
3516 parser_with_extensions("_6**6*_*").count();
3518 }
3519
3520 #[test]
3521 fn another_emphasis_panic() {
3522 parser_with_extensions("*__#_#__*").count();
3523 }
3524
3525 #[test]
3526 fn offset_iter() {
3527 let event_offsets: Vec<_> = Parser::new("*hello* world")
3528 .into_offset_iter()
3529 .map(|(_ev, range)| range)
3530 .collect();
3531 let expected_offsets = vec![(0..13), (0..7), (1..6), (0..7), (7..13), (0..13)];
3532 assert_eq!(expected_offsets, event_offsets);
3533 }
3534
3535 #[test]
3536 fn reference_link_offsets() {
3537 let range =
3538 Parser::new("# H1\n[testing][Some reference]\n\n[Some reference]: https://github.com")
3539 .into_offset_iter()
3540 .filter_map(|(ev, range)| match ev {
3541 Event::Start(
3542 Tag::Link {
3543 link_type: LinkType::Reference,
3544 ..
3545 },
3546 ..,
3547 ) => Some(range),
3548 _ => None,
3549 })
3550 .next()
3551 .unwrap();
3552 assert_eq!(5..30, range);
3553 }
3554
3555 #[test]
3556 fn footnote_offsets() {
3557 let range = parser_with_extensions("Testing this[^1] out.\n\n[^1]: Footnote.")
3558 .into_offset_iter()
3559 .filter_map(|(ev, range)| match ev {
3560 Event::FootnoteReference(..) => Some(range),
3561 _ => None,
3562 })
3563 .next()
3564 .unwrap();
3565 assert_eq!(12..16, range);
3566 }
3567
3568 #[test]
3569 fn footnote_offsets_exclamation() {
3570 let mut immediately_before_footnote = None;
3571 let range = parser_with_extensions("Testing this![^1] out.\n\n[^1]: Footnote.")
3572 .into_offset_iter()
3573 .filter_map(|(ev, range)| match ev {
3574 Event::FootnoteReference(..) => Some(range),
3575 _ => {
3576 immediately_before_footnote = Some((ev, range));
3577 None
3578 }
3579 })
3580 .next()
3581 .unwrap();
3582 assert_eq!(13..17, range);
3583 if let (Event::Text(exclamation), range_exclamation) =
3584 immediately_before_footnote.as_ref().unwrap()
3585 {
3586 assert_eq!("!", &exclamation[..]);
3587 assert_eq!(&(12..13), range_exclamation);
3588 } else {
3589 panic!("what came first, then? {immediately_before_footnote:?}");
3590 }
3591 }
3592
3593 #[test]
3594 fn table_offset() {
3595 let markdown = "a\n\nTesting|This|Outtt\n--|:--:|--:\nSome Data|Other data|asdf";
3596 let event_offset = parser_with_extensions(markdown)
3597 .into_offset_iter()
3598 .map(|(_ev, range)| range)
3599 .nth(3)
3600 .unwrap();
3601 let expected_offset = 3..59;
3602 assert_eq!(expected_offset, event_offset);
3603 }
3604
3605 #[test]
3606 fn table_cell_span() {
3607 let markdown = "a|b|c\n--|--|--\na| |c";
3608 let event_offset = parser_with_extensions(markdown)
3609 .into_offset_iter()
3610 .filter_map(|(ev, span)| match ev {
3611 Event::Start(Tag::TableCell) => Some(span),
3612 _ => None,
3613 })
3614 .nth(4)
3615 .unwrap();
3616 let expected_offset_start = "a|b|c\n--|--|--\na".len();
3618 assert_eq!(
3619 expected_offset_start..(expected_offset_start + 3),
3620 event_offset
3621 );
3622 }
3623
3624 #[test]
3625 fn offset_iter_issue_378() {
3626 let event_offsets: Vec<_> = Parser::new("a [b](c) d")
3627 .into_offset_iter()
3628 .map(|(_ev, range)| range)
3629 .collect();
3630 let expected_offsets = vec![(0..10), (0..2), (2..8), (3..4), (2..8), (8..10), (0..10)];
3631 assert_eq!(expected_offsets, event_offsets);
3632 }
3633
3634 #[test]
3635 fn offset_iter_issue_404() {
3636 let event_offsets: Vec<_> = Parser::new("###\n")
3637 .into_offset_iter()
3638 .map(|(_ev, range)| range)
3639 .collect();
3640 let expected_offsets = vec![(0..4), (0..4)];
3641 assert_eq!(expected_offsets, event_offsets);
3642 }
3643
3644 #[test]
3645 fn broken_links_called_only_once() {
3646 for &(markdown, expected) in &[
3647 ("See also [`g()`][crate::g].", 1),
3648 ("See also [`g()`][crate::g][].", 1),
3649 ("[brokenlink1] some other node [brokenlink2]", 2),
3650 ] {
3651 let mut times_called = 0;
3652 let callback = &mut |_broken_link: BrokenLink| {
3653 times_called += 1;
3654 None
3655 };
3656 let parser =
3657 Parser::new_with_broken_link_callback(markdown, Options::empty(), Some(callback));
3658 for _ in parser {}
3659 assert_eq!(times_called, expected);
3660 }
3661 }
3662
3663 #[test]
3664 fn simple_broken_link_callback() {
3665 let test_str = "This is a link w/o def: [hello][world]";
3666 let mut callback = |broken_link: BrokenLink| {
3667 assert_eq!("world", broken_link.reference.as_ref());
3668 assert_eq!(&test_str[broken_link.span], "[hello][world]");
3669 let url = "YOLO".into();
3670 let title = "SWAG".to_owned().into();
3671 Some((url, title))
3672 };
3673 let parser =
3674 Parser::new_with_broken_link_callback(test_str, Options::empty(), Some(&mut callback));
3675 let mut link_tag_count = 0;
3676 for (typ, url, title, id) in parser.filter_map(|event| match event {
3677 Event::Start(Tag::Link {
3678 link_type,
3679 dest_url,
3680 title,
3681 id,
3682 }) => Some((link_type, dest_url, title, id)),
3683 _ => None,
3684 }) {
3685 link_tag_count += 1;
3686 assert_eq!(typ, LinkType::ReferenceUnknown);
3687 assert_eq!(url.as_ref(), "YOLO");
3688 assert_eq!(title.as_ref(), "SWAG");
3689 assert_eq!(id.as_ref(), "world");
3690 }
3691 assert!(link_tag_count > 0);
3692 }
3693
3694 #[test]
3695 fn code_block_kind_check_fenced() {
3696 let parser = Parser::new("hello\n```test\ntadam\n```");
3697 let mut found = 0;
3698 for (ev, _range) in parser.into_offset_iter() {
3699 if let Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(syntax))) = ev {
3700 assert_eq!(syntax.as_ref(), "test");
3701 found += 1;
3702 }
3703 }
3704 assert_eq!(found, 1);
3705 }
3706
3707 #[test]
3708 fn code_block_kind_check_indented() {
3709 let parser = Parser::new("hello\n\n ```test\n tadam\nhello");
3710 let mut found = 0;
3711 for (ev, _range) in parser.into_offset_iter() {
3712 if let Event::Start(Tag::CodeBlock(CodeBlockKind::Indented)) = ev {
3713 found += 1;
3714 }
3715 }
3716 assert_eq!(found, 1);
3717 }
3718
3719 #[test]
3720 fn ref_defs() {
3721 let input = r###"[a B c]: http://example.com
3722[another]: https://google.com
3723
3724text
3725
3726[final ONE]: http://wikipedia.org
3727"###;
3728 let mut parser = Parser::new(input);
3729
3730 assert!(parser.reference_definitions().get("a b c").is_some());
3731 assert!(parser.reference_definitions().get("nope").is_none());
3732
3733 if let Some(_event) = parser.next() {
3734 let s = "final one".to_owned();
3736 let link_def = parser.reference_definitions().get(&s).unwrap();
3737 let span = &input[link_def.span.clone()];
3738 assert_eq!(span, "[final ONE]: http://wikipedia.org");
3739 }
3740 }
3741
3742 #[test]
3743 #[allow(clippy::extra_unused_lifetimes)]
3744 fn common_lifetime_patterns_allowed<'b>() {
3745 let temporary_str = String::from("xyz");
3746
3747 let mut closure = |link: BrokenLink<'b>| Some(("#".into(), link.reference));
3751
3752 fn function(link: BrokenLink<'_>) -> Option<(CowStr<'_>, CowStr<'_>)> {
3753 Some(("#".into(), link.reference))
3754 }
3755
3756 for _ in Parser::new_with_broken_link_callback(
3757 "static lifetime",
3758 Options::empty(),
3759 Some(&mut closure),
3760 ) {}
3761 for _ in Parser::new_with_broken_link_callback(
3770 "static lifetime",
3771 Options::empty(),
3772 Some(&mut function),
3773 ) {}
3774 for _ in Parser::new_with_broken_link_callback(
3775 &temporary_str,
3776 Options::empty(),
3777 Some(&mut function),
3778 ) {}
3779 }
3780
3781 #[test]
3782 fn inline_html_inside_blockquote() {
3783 let input = "> <foo\n> bar>";
3785 let events: Vec<_> = Parser::new(input).collect();
3786 let expected = [
3787 Event::Start(Tag::BlockQuote(None)),
3788 Event::Start(Tag::Paragraph),
3789 Event::InlineHtml(CowStr::Boxed("<foo\nbar>".to_string().into())),
3790 Event::End(TagEnd::Paragraph),
3791 Event::End(TagEnd::BlockQuote(None)),
3792 ];
3793 assert_eq!(&events, &expected);
3794 }
3795
3796 #[test]
3797 fn wikilink_has_pothole() {
3798 let input = "[[foo]] [[bar|baz]]";
3799 let events: Vec<_> = Parser::new_ext(input, Options::ENABLE_WIKILINKS).collect();
3800 let expected = [
3801 Event::Start(Tag::Paragraph),
3802 Event::Start(Tag::Link {
3803 link_type: LinkType::WikiLink { has_pothole: false },
3804 dest_url: CowStr::Borrowed("foo"),
3805 title: CowStr::Borrowed(""),
3806 id: CowStr::Borrowed(""),
3807 }),
3808 Event::Text(CowStr::Borrowed("foo")),
3809 Event::End(TagEnd::Link),
3810 Event::Text(CowStr::Borrowed(" ")),
3811 Event::Start(Tag::Link {
3812 link_type: LinkType::WikiLink { has_pothole: true },
3813 dest_url: CowStr::Borrowed("bar"),
3814 title: CowStr::Borrowed(""),
3815 id: CowStr::Borrowed(""),
3816 }),
3817 Event::Text(CowStr::Borrowed("baz")),
3818 Event::End(TagEnd::Link),
3819 Event::End(TagEnd::Paragraph),
3820 ];
3821 assert_eq!(&events, &expected);
3822 }
3823
3824 fn mdx_parser(text: &str) -> Parser<'_> {
3825 Parser::new_ext(text, Options::ENABLE_MDX)
3826 }
3827
3828 #[test]
3829 fn mdx_esm_import() {
3830 let events: Vec<_> = mdx_parser("import {Chart} from './chart.js'\n").collect();
3831 assert_eq!(events.len(), 1);
3832 assert!(matches!(&events[0], Event::MdxEsm(s) if s.contains("import")));
3833 }
3834
3835 #[test]
3836 fn mdx_esm_export() {
3837 let events: Vec<_> = mdx_parser("export const meta = {}\n").collect();
3838 assert_eq!(events.len(), 1);
3839 assert!(matches!(&events[0], Event::MdxEsm(s) if s.contains("export")));
3840 }
3841
3842 #[test]
3843 fn mdx_flow_expression() {
3844 let events: Vec<_> = mdx_parser("{1 + 1}\n").collect();
3845 assert_eq!(events.len(), 1);
3846 assert!(matches!(&events[0], Event::MdxFlowExpression(s) if s.as_ref() == "1 + 1"));
3847 }
3848
3849 #[test]
3850 fn mdx_jsx_flow_self_closing() {
3851 let events: Vec<_> = mdx_parser("<Chart values={[1,2,3]} />\n").collect();
3852 assert!(!events.is_empty());
3853 assert!(
3854 matches!(&events[0], Event::Start(Tag::MdxJsxFlowElement(s)) if s.contains("Chart"))
3855 );
3856 }
3857
3858 #[test]
3859 fn mdx_jsx_flow_fragment() {
3860 let events: Vec<_> = mdx_parser("<>\n").collect();
3861 assert!(!events.is_empty());
3862 assert!(matches!(
3863 &events[0],
3864 Event::Start(Tag::MdxJsxFlowElement(_))
3865 ));
3866 }
3867
3868 #[test]
3869 fn mdx_inline_expression() {
3870 let events: Vec<_> = mdx_parser("hello {name} world\n").collect();
3871 let has_expr = events
3872 .iter()
3873 .any(|e| matches!(e, Event::MdxTextExpression(s) if s.as_ref() == "name"));
3874 assert!(
3875 has_expr,
3876 "Expected inline MDX expression, got: {:?}",
3877 events
3878 );
3879 }
3880
3881 #[test]
3882 fn mdx_inline_jsx() {
3883 let events: Vec<_> = mdx_parser("hello <Badge /> world\n").collect();
3884 let has_jsx = events
3885 .iter()
3886 .any(|e| matches!(e, Event::Start(Tag::MdxJsxTextElement(s)) if s.contains("Badge")));
3887 assert!(has_jsx, "Expected inline MDX JSX, got: {:?}", events);
3888 }
3889
3890 #[test]
3891 fn mdx_all_tags_are_jsx() {
3892 let events: Vec<_> = mdx_parser("hello <em>world</em>\n").collect();
3894 let has_jsx = events
3895 .iter()
3896 .any(|e| matches!(e, Event::Start(Tag::MdxJsxTextElement(_))));
3897 assert!(has_jsx, "In MDX mode, <em> should be JSX: {:?}", events);
3898 }
3899
3900 #[test]
3901 fn mdx_does_not_interfere_without_flag() {
3902 let events: Vec<_> = Parser::new("import foo from 'bar'\n").collect();
3904 assert!(events
3906 .iter()
3907 .any(|e| matches!(e, Event::Start(Tag::Paragraph))));
3908 }
3909
3910 #[test]
3911 fn mdx_expression_in_heading() {
3912 let events: Vec<_> = mdx_parser("# {title}\n").collect();
3913 let has_heading = events
3914 .iter()
3915 .any(|e| matches!(e, Event::Start(Tag::Heading { .. })));
3916 assert!(has_heading, "Should have a heading");
3917 let has_expr = events
3918 .iter()
3919 .any(|e| matches!(e, Event::MdxTextExpression(s) if s.as_ref() == "title"));
3920 assert!(
3921 has_expr,
3922 "Heading should contain MdxTextExpression, got: {:?}",
3923 events
3924 );
3925 }
3926
3927 #[test]
3928 fn mdx_expression_mixed_text_in_heading() {
3929 let events: Vec<_> = mdx_parser("## Hello {name}\n").collect();
3930 let has_text = events
3931 .iter()
3932 .any(|e| matches!(e, Event::Text(s) if s.contains("Hello")));
3933 let has_expr = events
3934 .iter()
3935 .any(|e| matches!(e, Event::MdxTextExpression(s) if s.as_ref() == "name"));
3936 assert!(has_text, "Should have text, got: {:?}", events);
3937 assert!(has_expr, "Should have expression, got: {:?}", events);
3938 }
3939}