1#![allow(
31 dead_code,
32 reason = "the tree is a private parse-time representation with targeted unit coverage"
33)]
34
35use std::ops::Range;
36
37use pulldown_cmark::{Alignment, CodeBlockKind, CowStr, Event, LinkType, Tag};
38
39use crate::HeadingAttrs;
40use crate::heading::find_attr_trailer_range;
41use crate::refs::ReferenceTable;
42use mdwright_math::{MathRegion, MathSpan};
43
44#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
47pub(crate) struct NodeId(u32);
48
49impl NodeId {
50 #[must_use]
51 pub(crate) fn idx(self) -> usize {
52 self.0 as usize
53 }
54}
55
56#[derive(Clone, Debug)]
59pub(crate) struct Node {
60 pub(crate) kind: NodeKind,
61 pub(crate) raw_range: Range<usize>,
62 pub(crate) children: Range<u32>,
66 pub(crate) subtree_end: u32,
69}
70
71#[derive(Clone, Debug)]
81pub(crate) enum NodeKind {
82 Document,
84 Paragraph,
85 Heading {
86 level: u32,
87 setext: bool,
89 attrs: Option<Box<HeadingAttrs>>,
94 },
95 BlockQuote,
96 List {
97 ordered: bool,
98 start: u64,
100 tight: bool,
103 marker_byte: u8,
107 },
108 Item {
109 task: Option<bool>,
113 },
114 CodeBlock {
115 fenced: bool,
116 info: String,
117 body: String,
123 },
124 HtmlBlock {
125 body: String,
128 },
129 ThematicBreak,
130 Table {
131 alignments: Vec<TableAlign>,
132 },
133 TableHead,
134 TableRow,
135 TableCell,
136 FootnoteDefinition {
137 label: String,
138 },
139 DefinitionList,
145 DefinitionTerm,
148 DefinitionDescription,
151 Run,
154 CodeRun,
156 Emphasis,
157 Strong,
158 Strikethrough,
159 Link {
160 reference_label: Option<String>,
164 },
165 Image {
166 reference_label: Option<String>,
169 },
170 Autolink,
171 HtmlSpan,
173 FootnoteReference,
174 TaskListMarker(bool),
175 Math(Box<MathSpan>),
185
186 Unknown {
192 tag: &'static str,
193 },
194}
195
196#[derive(Copy, Clone, Debug, PartialEq, Eq)]
198pub enum TableAlign {
199 None,
200 Left,
201 Center,
202 Right,
203}
204
205impl TableAlign {
206 fn from_alignment(a: Alignment) -> Self {
207 match a {
208 Alignment::None => Self::None,
209 Alignment::Left => Self::Left,
210 Alignment::Center => Self::Center,
211 Alignment::Right => Self::Right,
212 }
213 }
214}
215
216#[derive(Debug)]
218pub(crate) struct Tree {
219 arena: Vec<Node>,
220 child_ids: Vec<NodeId>,
221 parents: Vec<Option<NodeId>>,
222}
223
224impl Tree {
225 #[must_use]
227 #[allow(clippy::unused_self)]
228 pub(crate) fn root(&self) -> NodeId {
229 NodeId(0)
230 }
231
232 #[must_use]
235 pub(crate) fn node(&self, id: NodeId) -> Option<&Node> {
236 self.arena.get(id.idx())
237 }
238
239 #[must_use]
244 pub(crate) fn raw_text<'a>(&self, source: &'a str, id: NodeId) -> &'a str {
245 self.node(id)
246 .and_then(|n| source.get(n.raw_range.clone()))
247 .unwrap_or("")
248 }
249
250 pub(crate) fn children(&self, id: NodeId) -> Children<'_> {
252 let range = self.node(id).map_or(0..0, |n| n.children.clone());
253 Children { tree: self, range }
254 }
255
256 #[must_use]
258 pub(crate) fn parent(&self, id: NodeId) -> Option<NodeId> {
259 self.parents.get(id.idx()).copied().flatten()
260 }
261
262 pub(crate) fn descendants(&self, id: NodeId) -> Descendants<'_> {
264 let start = id.idx().saturating_add(1);
265 let end = self.node(id).map_or(start, |n| n.subtree_end as usize);
266 Descendants {
267 tree: self,
268 next: start as u32,
269 end: end as u32,
270 }
271 }
272
273 #[must_use]
275 pub(crate) fn len(&self) -> usize {
276 self.arena.len()
277 }
278
279 #[must_use]
281 pub(crate) fn is_empty(&self) -> bool {
282 self.arena.len() <= 1
283 }
284
285 pub(crate) fn list_tightness_by_start(&self) -> Vec<(usize, bool)> {
286 self.descendants(self.root())
287 .filter_map(|id| {
288 let node = self.node(id)?;
289 match &node.kind {
290 NodeKind::List { tight, .. } => Some((node.raw_range.start, *tight)),
291 NodeKind::Document
292 | NodeKind::Paragraph
293 | NodeKind::Heading { .. }
294 | NodeKind::BlockQuote
295 | NodeKind::Item { .. }
296 | NodeKind::CodeBlock { .. }
297 | NodeKind::HtmlBlock { .. }
298 | NodeKind::ThematicBreak
299 | NodeKind::Table { .. }
300 | NodeKind::TableHead
301 | NodeKind::TableRow
302 | NodeKind::TableCell
303 | NodeKind::FootnoteDefinition { .. }
304 | NodeKind::DefinitionList
305 | NodeKind::DefinitionTerm
306 | NodeKind::DefinitionDescription
307 | NodeKind::Run
308 | NodeKind::CodeRun
309 | NodeKind::Emphasis
310 | NodeKind::Strong
311 | NodeKind::Strikethrough
312 | NodeKind::Link { .. }
313 | NodeKind::Image { .. }
314 | NodeKind::Autolink
315 | NodeKind::HtmlSpan
316 | NodeKind::FootnoteReference
317 | NodeKind::TaskListMarker(_)
318 | NodeKind::Math(_)
319 | NodeKind::Unknown { .. } => None,
320 }
321 })
322 .collect()
323 }
324
325 pub(crate) fn link_like_ranges(&self) -> Vec<Range<usize>> {
326 self.descendants(self.root())
327 .filter_map(|id| {
328 let node = self.node(id)?;
329 match &node.kind {
330 NodeKind::Link { .. } | NodeKind::Image { .. } | NodeKind::Autolink => Some(node.raw_range.clone()),
331 NodeKind::Document
332 | NodeKind::Paragraph
333 | NodeKind::Heading { .. }
334 | NodeKind::BlockQuote
335 | NodeKind::List { .. }
336 | NodeKind::Item { .. }
337 | NodeKind::CodeBlock { .. }
338 | NodeKind::HtmlBlock { .. }
339 | NodeKind::ThematicBreak
340 | NodeKind::Table { .. }
341 | NodeKind::TableHead
342 | NodeKind::TableRow
343 | NodeKind::TableCell
344 | NodeKind::FootnoteDefinition { .. }
345 | NodeKind::DefinitionList
346 | NodeKind::DefinitionTerm
347 | NodeKind::DefinitionDescription
348 | NodeKind::Run
349 | NodeKind::CodeRun
350 | NodeKind::Emphasis
351 | NodeKind::Strong
352 | NodeKind::Strikethrough
353 | NodeKind::HtmlSpan
354 | NodeKind::FootnoteReference
355 | NodeKind::TaskListMarker(_)
356 | NodeKind::Math(_)
357 | NodeKind::Unknown { .. } => None,
358 }
359 })
360 .collect()
361 }
362}
363
364pub(crate) struct Children<'t> {
367 tree: &'t Tree,
368 range: Range<u32>,
369}
370
371impl Iterator for Children<'_> {
372 type Item = NodeId;
373
374 fn next(&mut self) -> Option<Self::Item> {
375 let i = self.range.next()?;
376 self.tree.child_ids.get(i as usize).copied()
377 }
378}
379
380pub(crate) struct Descendants<'t> {
383 tree: &'t Tree,
384 next: u32,
385 end: u32,
386}
387
388impl Iterator for Descendants<'_> {
389 type Item = NodeId;
390
391 fn next(&mut self) -> Option<Self::Item> {
392 if self.next >= self.end {
393 return None;
394 }
395 let id = NodeId(self.next);
396 let _ = self.tree.node(id)?;
401 self.next = self.next.saturating_add(1);
402 Some(id)
403 }
404}
405
406pub(crate) struct TreeBuilder<'a> {
416 source: &'a str,
417 arena: Vec<Node>,
418 child_ids: Vec<NodeId>,
419 parents: Vec<Option<NodeId>>,
420 pending: Vec<NodeId>,
423 open: Vec<OpenFrame>,
424 inline_range: Option<Range<usize>>,
426 math_regions: &'a [MathRegion],
431 math_cursor: usize,
434 math_emitted_until: usize,
438}
439
440#[derive(Debug)]
441struct OpenFrame {
442 arena_id: NodeId,
443 pending_start: u32,
444 raw_start: usize,
445 body_accum: Option<String>,
450}
451
452impl<'a> TreeBuilder<'a> {
453 pub(crate) fn new(source: &'a str, math_regions: &'a [MathRegion]) -> Self {
454 let root = Node {
457 kind: NodeKind::Document,
458 raw_range: 0..source.len(),
459 children: 0..0,
460 subtree_end: 1,
461 };
462 Self {
463 source,
464 arena: vec![root],
465 child_ids: Vec::new(),
466 parents: vec![None],
467 pending: Vec::new(),
468 open: vec![OpenFrame {
469 arena_id: NodeId(0),
470 pending_start: 0,
471 raw_start: 0,
472 body_accum: None,
473 }],
474 inline_range: None,
475 math_regions,
476 math_cursor: 0,
477 math_emitted_until: 0,
478 }
479 }
480
481 pub(crate) fn arena_len(&self) -> usize {
485 self.arena.len()
486 }
487
488 #[allow(clippy::wildcard_enum_match_arm)]
489 pub(crate) fn handle(&mut self, event: &Event<'a>, range: Range<usize>) {
490 if self.math_emitted_until > range.start && range.end <= self.math_emitted_until {
499 return;
500 }
501 match event {
502 Event::Start(tag) => {
503 self.flush_inline_run();
504 let kind = self.kind_for_start(tag, &range);
505 let range = match &kind {
520 NodeKind::CodeBlock { fenced: false, .. } => widen_to_line_start(self.source, range),
521 NodeKind::HtmlBlock { .. } => widen_to_line_start_through_ws(self.source, range),
522 _ => range,
523 };
524 let body_accum =
525 matches!(&kind, NodeKind::CodeBlock { .. } | NodeKind::HtmlBlock { .. }).then(String::new);
526 self.open_container(kind, range, body_accum);
527 }
528 Event::End(end) => {
529 self.flush_inline_run();
530 self.close_container(range);
531 let _ = end;
532 }
533 Event::Text(cow) => {
534 if let Some(buf) = self.body_accum_mut() {
535 buf.push_str(cow);
536 return;
537 }
538 if range.start < self.math_emitted_until {
546 let trimmed = self.math_emitted_until..range.end;
547 if trimmed.is_empty() {
548 return;
549 }
550 if self.text_overlaps_math(&trimmed) {
551 self.splice_text_with_math(trimmed);
552 } else {
553 self.push_source_prose(trimmed);
554 }
555 return;
556 }
557 let raw_range = self.extend_for_backslash(range);
558 if self.text_overlaps_math(&raw_range) {
559 self.splice_text_with_math(raw_range);
560 } else {
561 let _ = cow;
562 self.push_inline_text(raw_range);
563 }
564 }
565 Event::Code(cow) => {
566 self.flush_inline_run();
567 let _ = cow;
568 self.push_leaf(NodeKind::CodeRun, range);
569 }
570 Event::Html(cow) => {
571 if let Some(buf) = self.body_accum_mut() {
572 buf.push_str(cow);
573 return;
574 }
575 self.flush_inline_run();
579 let _ = cow;
580 self.push_leaf(NodeKind::HtmlSpan, range);
581 }
582 Event::InlineHtml(cow) => {
583 self.flush_inline_run();
584 let _ = cow;
585 self.push_leaf(NodeKind::HtmlSpan, range);
586 }
587 Event::FootnoteReference(label) => {
588 self.flush_inline_run();
589 let _ = label;
590 self.push_leaf(NodeKind::FootnoteReference, range);
591 }
592 Event::SoftBreak => {
593 self.push_inline_break(range);
594 }
595 Event::HardBreak => {
596 self.push_inline_break(range);
597 }
598 Event::Rule => {
599 self.flush_inline_run();
600 self.push_leaf(NodeKind::ThematicBreak, range);
601 }
602 Event::TaskListMarker(checked) => {
603 self.flush_inline_run();
604 if let Some(frame) = self.open.last()
605 && let Some(node) = self.arena.get_mut(frame.arena_id.idx())
606 && let NodeKind::Item { ref mut task } = node.kind
607 {
608 *task = Some(*checked);
609 }
610 self.push_leaf(NodeKind::TaskListMarker(*checked), range);
611 }
612 Event::InlineMath(cow) | Event::DisplayMath(cow) => {
615 let raw_range = range;
616 let _ = cow;
617 self.push_inline_text(raw_range);
618 }
619 }
620 }
621
622 fn push_inline_text(&mut self, range: Range<usize>) {
624 self.extend_inline_range(&range);
625 }
626
627 fn text_overlaps_math(&mut self, range: &Range<usize>) -> bool {
631 while self.math_cursor < self.math_regions.len()
632 && self
633 .math_regions
634 .get(self.math_cursor)
635 .is_some_and(|r| r.range.end <= range.start)
636 {
637 self.math_cursor = self.math_cursor.saturating_add(1);
638 }
639 self.math_regions
640 .get(self.math_cursor)
641 .is_some_and(|r| r.range.start < range.end)
642 }
643
644 fn splice_text_with_math(&mut self, raw_range: Range<usize>) {
662 let mut cursor = raw_range.start.max(self.math_emitted_until);
663 while self.math_cursor < self.math_regions.len() {
664 let Some(region) = self.math_regions.get(self.math_cursor) else {
665 break;
666 };
667 if region.range.start >= raw_range.end {
668 break;
669 }
670 if region.range.end <= cursor {
671 self.math_cursor = self.math_cursor.saturating_add(1);
672 continue;
673 }
674 if cursor < region.range.start {
675 let chunk = cursor..region.range.start;
676 self.push_source_prose(chunk);
677 }
678 self.flush_inline_run();
679 let span = region.span().clone();
680 let region_range = region.range.clone();
681 tracing::trace!(?region_range, "math leaf");
682 self.push_leaf(NodeKind::Math(Box::new(span)), region_range.clone());
683 self.math_emitted_until = region_range.end;
684 cursor = region_range.end;
685 self.math_cursor = self.math_cursor.saturating_add(1);
686 }
687 if cursor < raw_range.end {
688 self.push_source_prose(cursor..raw_range.end);
689 }
690 }
691
692 fn push_source_prose(&mut self, range: Range<usize>) {
695 self.push_inline_text(range);
696 }
697
698 fn push_inline_break(&mut self, range: Range<usize>) {
700 self.extend_inline_range(&range);
701 }
702
703 fn extend_inline_range(&mut self, range: &Range<usize>) {
704 match &mut self.inline_range {
705 Some(r) => {
706 if range.start < r.start {
707 r.start = range.start;
708 }
709 if range.end > r.end {
710 r.end = range.end;
711 }
712 }
713 None => self.inline_range = Some(range.clone()),
714 }
715 }
716
717 fn flush_inline_run(&mut self) {
720 if let Some(range) = self.inline_range.take()
721 && !range.is_empty()
722 {
723 self.push_leaf(NodeKind::Run, range);
724 }
725 }
726
727 fn body_accum_mut(&mut self) -> Option<&mut String> {
730 self.open.last_mut().and_then(|f| f.body_accum.as_mut())
731 }
732
733 #[tracing::instrument(level = "debug", skip(self, refs))]
739 pub(crate) fn finalize(mut self, refs: &ReferenceTable) -> Tree {
740 self.flush_inline_run();
743 let doc_pending_start = self.open.pop().map_or(0u32, |f| f.pending_start);
747 let doc_children: Vec<NodeId> = self.pending.drain(doc_pending_start as usize..).collect();
748
749 downgrade_unresolved_links(&mut self.arena, refs);
754
755 let children_start = u32::try_from(self.child_ids.len()).unwrap_or(u32::MAX);
756 self.child_ids.extend(doc_children.iter().copied());
757 let children_end = u32::try_from(self.child_ids.len()).unwrap_or(u32::MAX);
758 let subtree_end = u32::try_from(self.arena.len()).unwrap_or(u32::MAX);
759
760 if let Some(root) = self.arena.get_mut(0) {
761 root.children = children_start..children_end;
762 root.subtree_end = subtree_end;
763 root.raw_range = 0..self.source.len();
764 }
765
766 Tree {
767 arena: self.arena,
768 child_ids: self.child_ids,
769 parents: self.parents,
770 }
771 }
772
773 fn alloc_node(&mut self, kind: NodeKind, raw_range: Range<usize>) -> NodeId {
774 let id = NodeId(u32::try_from(self.arena.len()).unwrap_or(u32::MAX));
775 let subtree_end = id.0.saturating_add(1);
776 self.arena.push(Node {
777 kind,
778 raw_range,
779 children: 0..0,
780 subtree_end,
781 });
782 let parent = self.open.last().map(|f| f.arena_id);
783 self.parents.push(parent);
784 self.pending.push(id);
786 id
787 }
788
789 fn open_container(&mut self, kind: NodeKind, range: Range<usize>, body_accum: Option<String>) {
790 let raw_start = range.start;
791 let id = self.alloc_node(kind, range);
792 let pending_start = u32::try_from(self.pending.len()).unwrap_or(u32::MAX);
793 self.open.push(OpenFrame {
794 arena_id: id,
795 pending_start,
796 raw_start,
797 body_accum,
798 });
799 }
800
801 fn close_container(&mut self, range: Range<usize>) {
802 let Some(frame) = self.open.pop() else {
803 return;
804 };
805 let pending_start = frame.pending_start as usize;
808 let children_start = u32::try_from(self.child_ids.len()).unwrap_or(u32::MAX);
809 self.child_ids.extend(self.pending.drain(pending_start..));
810 let children_end = u32::try_from(self.child_ids.len()).unwrap_or(u32::MAX);
811 let subtree_end = u32::try_from(self.arena.len()).unwrap_or(u32::MAX);
812
813 let raw_range = frame.raw_start..range.end;
814 let node_is_list = matches!(
815 self.arena.get(frame.arena_id.idx()).map(|n| &n.kind),
816 Some(NodeKind::List { .. })
817 );
818
819 if let Some(node) = self.arena.get_mut(frame.arena_id.idx()) {
823 node.children = children_start..children_end;
824 node.subtree_end = subtree_end;
825 node.raw_range = raw_range;
826 #[allow(clippy::wildcard_enum_match_arm)]
828 if let Some(body) = frame.body_accum {
829 match &mut node.kind {
830 NodeKind::CodeBlock { body: dst, .. } => *dst = body,
831 NodeKind::HtmlBlock { body: dst } => *dst = body,
832 _ => {}
833 }
834 }
835 }
836
837 if node_is_list {
838 let list_tight = list_has_no_direct_paragraph_items(&self.arena, &self.child_ids, frame.arena_id);
839 if let Some(node) = self.arena.get_mut(frame.arena_id.idx())
840 && let NodeKind::List { tight, .. } = &mut node.kind
841 {
842 *tight = list_tight;
843 }
844 }
845 }
846
847 fn push_leaf(&mut self, kind: NodeKind, range: Range<usize>) {
848 self.alloc_node(kind, range);
849 }
850
851 fn extend_for_backslash(&self, range: Range<usize>) -> Range<usize> {
854 if range.start > 0 {
855 let bytes = self.source.as_bytes();
856 if bytes.get(range.start.saturating_sub(1)) == Some(&b'\\') {
857 return range.start.saturating_sub(1)..range.end;
858 }
859 }
860 range
861 }
862
863 fn kind_for_start(&self, tag: &Tag<'a>, range: &Range<usize>) -> NodeKind {
864 match tag {
865 Tag::Paragraph => NodeKind::Paragraph,
866 Tag::Heading {
867 level,
868 id,
869 classes,
870 attrs,
871 } => {
872 let lvl = *level as u32;
873 let setext = first_non_whitespace_byte(self.source, range.start) != Some(b'#');
876 let parsed = build_heading_attrs(self.source, range, id.as_deref(), classes, attrs);
877 NodeKind::Heading {
878 level: lvl,
879 setext,
880 attrs: parsed.map(Box::new),
881 }
882 }
883 Tag::BlockQuote(_) => NodeKind::BlockQuote,
884 Tag::CodeBlock(kind) => {
885 let (fenced, info) = match kind {
886 CodeBlockKind::Fenced(s) => (true, s.to_string()),
887 CodeBlockKind::Indented => (false, String::new()),
888 };
889 NodeKind::CodeBlock {
890 fenced,
891 info,
892 body: String::new(),
893 }
894 }
895 Tag::HtmlBlock => NodeKind::HtmlBlock { body: String::new() },
896 Tag::List(start) => NodeKind::List {
897 ordered: start.is_some(),
898 start: start.unwrap_or(0),
899 tight: true,
900 marker_byte: derive_list_marker_byte(self.source, range.clone(), start.is_some()).unwrap_or(0),
901 },
902 Tag::Item => NodeKind::Item { task: None },
903 Tag::FootnoteDefinition(label) => NodeKind::FootnoteDefinition {
904 label: label.to_string(),
905 },
906 Tag::Table(aligns) => NodeKind::Table {
907 alignments: aligns.iter().copied().map(TableAlign::from_alignment).collect(),
908 },
909 Tag::TableHead => NodeKind::TableHead,
910 Tag::TableRow => NodeKind::TableRow,
911 Tag::TableCell => NodeKind::TableCell,
912 Tag::Emphasis => NodeKind::Emphasis,
913 Tag::Strong => NodeKind::Strong,
914 Tag::Strikethrough => NodeKind::Strikethrough,
915 Tag::Link {
916 link_type,
917 dest_url,
918 title,
919 id,
920 } => link_kind(*link_type, dest_url, title, id, false),
921 Tag::Image {
922 link_type,
923 dest_url,
924 title,
925 id,
926 } => link_kind(*link_type, dest_url, title, id, true),
927 Tag::Superscript => NodeKind::Unknown { tag: "Superscript" },
928 Tag::Subscript => NodeKind::Unknown { tag: "Subscript" },
929 Tag::DefinitionList => NodeKind::DefinitionList,
930 Tag::DefinitionListTitle => NodeKind::DefinitionTerm,
931 Tag::DefinitionListDefinition => NodeKind::DefinitionDescription,
932 Tag::MetadataBlock(_) => NodeKind::Unknown { tag: "MetadataBlock" },
933 }
934 }
935}
936
937#[allow(clippy::wildcard_enum_match_arm)] fn downgrade_unresolved_links(arena: &mut [Node], refs: &ReferenceTable) {
943 for node in arena.iter_mut() {
944 let (label_opt, is_image): (Option<&str>, bool) = match &node.kind {
945 NodeKind::Link { reference_label } => (reference_label.as_deref(), false),
946 NodeKind::Image { reference_label } => (reference_label.as_deref(), true),
947 _ => (None, false),
948 };
949 let Some(label) = label_opt else { continue };
950 if refs.resolve(label).is_some() {
951 continue;
952 }
953 let tag = if is_image { "Image" } else { "Link" };
954 node.kind = NodeKind::Unknown { tag };
955 node.children = 0..0;
961 }
962}
963
964fn link_kind(lt: LinkType, dest_url: &CowStr<'_>, title: &CowStr<'_>, id: &CowStr<'_>, is_image: bool) -> NodeKind {
965 let _ = (dest_url, title);
966 let reference_label = match lt {
967 LinkType::Autolink => {
968 return NodeKind::Autolink;
969 }
970 LinkType::Email => {
971 return NodeKind::Autolink;
972 }
973 LinkType::WikiLink { .. } => return NodeKind::Unknown { tag: "WikiLink" },
974 LinkType::Inline => None,
975 LinkType::Reference
976 | LinkType::ReferenceUnknown
977 | LinkType::Collapsed
978 | LinkType::CollapsedUnknown
979 | LinkType::Shortcut
980 | LinkType::ShortcutUnknown => Some(id.to_string()),
981 };
982 if is_image {
983 NodeKind::Image { reference_label }
984 } else {
985 NodeKind::Link { reference_label }
986 }
987}
988
989fn first_non_whitespace_byte(source: &str, start: usize) -> Option<u8> {
990 source
991 .as_bytes()
992 .get(start..)?
993 .iter()
994 .copied()
995 .find(|b| !matches!(b, b' ' | b'\t'))
996}
997
998fn derive_list_marker_byte(source: &str, range: Range<usize>, ordered: bool) -> Option<u8> {
1009 source.as_bytes().get(range)?.iter().copied().find(|b| {
1010 if ordered {
1011 b.is_ascii_digit()
1012 } else {
1013 matches!(b, b'-' | b'*' | b'+')
1014 }
1015 })
1016}
1017
1018fn widen_to_line_start(source: &str, range: Range<usize>) -> Range<usize> {
1024 let bytes = source.as_bytes();
1025 let mut start = range.start.min(bytes.len());
1026 while start > 0 && bytes.get(start.saturating_sub(1)).copied() != Some(b'\n') {
1027 start = start.saturating_sub(1);
1028 }
1029 start..range.end
1030}
1031
1032fn widen_to_line_start_through_ws(source: &str, range: Range<usize>) -> Range<usize> {
1039 let bytes = source.as_bytes();
1040 let mut start = range.start.min(bytes.len());
1041 while start > 0 {
1042 match bytes.get(start.saturating_sub(1)).copied() {
1043 Some(b' ' | b'\t') => start = start.saturating_sub(1),
1044 Some(b'\n') | None => break,
1045 Some(_) => return range, }
1047 }
1048 start..range.end
1049}
1050
1051fn build_heading_attrs(
1059 source: &str,
1060 range: &Range<usize>,
1061 id: Option<&str>,
1062 classes: &[CowStr<'_>],
1063 attrs: &[(CowStr<'_>, Option<CowStr<'_>>)],
1064) -> Option<HeadingAttrs> {
1065 if id.is_none() && classes.is_empty() && attrs.is_empty() {
1066 return None;
1067 }
1068 let raw = source.get(range.clone()).unwrap_or("");
1069 let trailer = find_attr_trailer_range(raw)
1070 .and_then(|r| raw.get(r))
1071 .unwrap_or("")
1072 .to_owned();
1073 Some(HeadingAttrs {
1074 id: id.map(str::to_owned),
1075 classes: classes.iter().map(|c| c.to_string()).collect(),
1076 attrs: attrs
1077 .iter()
1078 .map(|(k, v)| (k.to_string(), v.as_ref().map(|v| v.to_string())))
1079 .collect(),
1080 source_trailer: trailer,
1081 })
1082}
1083
1084fn list_has_no_direct_paragraph_items(arena: &[Node], child_ids: &[NodeId], list_id: NodeId) -> bool {
1085 let Some(list_node) = arena.get(list_id.idx()) else {
1086 return true;
1087 };
1088 for i in list_node.children.clone() {
1089 let Some(&item_id) = child_ids.get(i as usize) else {
1090 continue;
1091 };
1092 let Some(item_node) = arena.get(item_id.idx()) else {
1093 continue;
1094 };
1095 if !matches!(item_node.kind, NodeKind::Item { .. }) {
1096 continue;
1097 }
1098 if item_has_direct_paragraph(arena, child_ids, item_node) {
1099 return false;
1100 }
1101 }
1102 true
1103}
1104
1105fn item_has_direct_paragraph(arena: &[Node], child_ids: &[NodeId], item: &Node) -> bool {
1106 for j in item.children.clone() {
1107 let Some(&cid) = child_ids.get(j as usize) else {
1108 continue;
1109 };
1110 if matches!(arena.get(cid.idx()).map(|n| &n.kind), Some(NodeKind::Paragraph)) {
1111 return true;
1112 }
1113 }
1114 false
1115}
1116
1117#[cfg(test)]
1118#[allow(clippy::expect_used)]
1119mod tests {
1120 use super::*;
1121 use crate::ir::Ir;
1122
1123 #[test]
1124 fn empty_doc_has_root_only() {
1125 let ir = Ir::parse_str("");
1126 let tree = &ir.tree;
1127 assert_eq!(tree.root(), NodeId(0));
1128 assert!(tree.is_empty());
1129 assert!(matches!(
1130 tree.node(tree.root()).map(|n| &n.kind),
1131 Some(NodeKind::Document)
1132 ));
1133 }
1134
1135 #[test]
1136 fn paragraph_and_text_present() {
1137 let ir = Ir::parse_str("Hello world\n");
1138 let tree = &ir.tree;
1139 let kinds: Vec<&NodeKind> = tree
1140 .descendants(tree.root())
1141 .filter_map(|id| tree.node(id).map(|n| &n.kind))
1142 .collect();
1143 assert!(kinds.iter().any(|k| matches!(k, NodeKind::Paragraph)));
1144 assert!(kinds.iter().any(|k| matches!(k, NodeKind::Run)));
1145 }
1146
1147 #[test]
1148 fn raw_ranges_are_well_formed() {
1149 let src = "# Title\n\nA paragraph.\n\n- one\n- two\n";
1150 let ir = Ir::parse_str(src);
1151 let tree = &ir.tree;
1152 for id in tree.descendants(tree.root()) {
1153 let n = tree.node(id).expect("descendants only yields valid ids");
1154 assert!(n.raw_range.start <= n.raw_range.end);
1155 assert!(n.raw_range.end <= src.len());
1156 }
1157 }
1158
1159 #[test]
1160 fn raw_range_covers_leading_sigil_per_block_kind() {
1161 let src = "\
1167# Heading
1168> quote
1169- list item
1170```rust
1171let x = 1;
1172```
1173 indented
1174---
1175<!-- html block -->
1176";
1177 let ir = Ir::parse_str(src);
1178 let tree = &ir.tree;
1179 for id in tree.descendants(tree.root()) {
1180 let n = tree.node(id).expect("descendants yields valid ids");
1181 let raw = tree.raw_text(src, id);
1182 #[allow(clippy::wildcard_enum_match_arm)]
1183 match &n.kind {
1184 NodeKind::Heading { setext: false, .. } => {
1185 assert!(raw.starts_with('#'), "ATX heading missing `#`: {raw:?}");
1186 }
1187 NodeKind::BlockQuote => {
1188 assert!(raw.starts_with('>'), "blockquote missing `>`: {raw:?}");
1189 }
1190 NodeKind::List { .. } => {
1191 let first = raw.bytes().next().expect("non-empty list raw_text");
1192 assert!(
1193 matches!(first, b'-' | b'*' | b'+' | b'0'..=b'9'),
1194 "list missing bullet: {raw:?}",
1195 );
1196 }
1197 NodeKind::CodeBlock { fenced: true, .. } => {
1198 assert!(
1199 raw.starts_with("```") || raw.starts_with("~~~"),
1200 "fenced code block missing opening fence: {raw:?}",
1201 );
1202 assert!(
1203 raw.trim_end_matches('\n').ends_with("```") || raw.trim_end_matches('\n').ends_with("~~~"),
1204 "fenced code block missing closing fence: {raw:?}",
1205 );
1206 }
1207 NodeKind::CodeBlock { fenced: false, .. } => {
1208 assert!(
1209 raw.starts_with(" ") || raw.starts_with('\t'),
1210 "indented code block missing 4-space prefix: {raw:?}",
1211 );
1212 }
1213 NodeKind::HtmlBlock { .. } => {
1214 assert!(raw.starts_with('<'), "HTML block missing `<`: {raw:?}");
1215 }
1216 NodeKind::ThematicBreak => {
1217 let first = raw.bytes().next().expect("non-empty thematic break");
1218 assert!(
1219 matches!(first, b'-' | b'*' | b'_'),
1220 "thematic break missing marker: {raw:?}",
1221 );
1222 }
1223 _ => {}
1224 }
1225 }
1226 }
1227
1228 #[test]
1229 fn child_raw_range_is_contained_in_parent() {
1230 let src = "# H\n\n> quote with *em*\n\n- item one\n- item two\n";
1236 let ir = Ir::parse_str(src);
1237 let tree = &ir.tree;
1238 for id in tree.descendants(tree.root()) {
1239 if id == tree.root() {
1240 continue;
1241 }
1242 let Some(parent_id) = tree.parent(id) else {
1243 continue;
1244 };
1245 let child = tree.node(id).expect("valid child id");
1246 let parent = tree.node(parent_id).expect("valid parent id");
1247 assert!(
1248 parent.raw_range.start <= child.raw_range.start && child.raw_range.end <= parent.raw_range.end,
1249 "child {:?} {:?} outside parent {:?} {:?}",
1250 child.kind,
1251 child.raw_range,
1252 parent.kind,
1253 parent.raw_range,
1254 );
1255 }
1256 }
1257
1258 #[test]
1259 fn parent_chain_terminates_at_root() {
1260 let ir = Ir::parse_str("> a quote\n");
1261 let tree = &ir.tree;
1262 let last = NodeId(u32::try_from(tree.len().saturating_sub(1)).unwrap_or(0));
1263 let mut cur = last;
1264 let mut steps: u32 = 0;
1265 while let Some(p) = tree.parent(cur) {
1266 cur = p;
1267 steps = steps.saturating_add(1);
1268 assert!(steps < 32, "walk did not terminate");
1269 }
1270 assert_eq!(cur, tree.root());
1271 assert!(tree.parent(tree.root()).is_none());
1272 }
1273
1274 fn find_list_tight(tree: &Tree) -> Option<bool> {
1275 tree.descendants(tree.root())
1276 .find_map(|id| match tree.node(id).map(|n| &n.kind) {
1277 Some(NodeKind::List { tight, .. }) => Some(*tight),
1278 _ => None,
1279 })
1280 }
1281
1282 #[test]
1283 fn tight_list_one_text_child() {
1284 let ir = Ir::parse_str("- one\n- two\n");
1285 assert_eq!(find_list_tight(&ir.tree), Some(true));
1286 }
1287
1288 #[test]
1289 fn loose_list_with_blank_line_between_items() {
1290 let ir = Ir::parse_str("- one\n\n- two\n");
1291 assert_eq!(find_list_tight(&ir.tree), Some(false));
1292 }
1293
1294 #[test]
1295 fn nested_blockquote_under_list() {
1296 let ir = Ir::parse_str("- item\n\n > quote\n");
1297 let tree = &ir.tree;
1298 let bq = tree
1299 .descendants(tree.root())
1300 .find(|&id| matches!(tree.node(id).map(|n| &n.kind), Some(NodeKind::BlockQuote)));
1301 assert!(bq.is_some(), "blockquote nested under list item");
1302 }
1303
1304 #[test]
1305 fn reference_link_records_label() {
1306 let src = "[foo][bar]\n\n[bar]: https://example.com\n";
1307 let ir = Ir::parse_str(src);
1308 let tree = &ir.tree;
1309 let label = tree
1310 .descendants(tree.root())
1311 .find_map(|id| match tree.node(id).map(|n| &n.kind) {
1312 Some(NodeKind::Link { reference_label }) => reference_label.clone(),
1313 _ => None,
1314 })
1315 .expect("link present");
1316 assert_eq!(label, "bar");
1317 }
1318
1319 #[test]
1320 fn link_reference_definitions_appear_in_reference_table() {
1321 let src = "[a]: https://a.example\n[b]: https://b.example\n\n[a] and [b].\n";
1326 let ir = Ir::parse_str(src);
1327 let mut labels: Vec<String> = ir.refs.iter().map(|t| t.label_raw.clone()).collect();
1328 labels.sort();
1329 assert_eq!(labels, vec!["a".to_owned(), "b".to_owned()]);
1330 }
1331
1332 #[test]
1333 fn autolink_preserves_url() {
1334 let ir = Ir::parse_str("<https://example.com>\n");
1335 let tree = &ir.tree;
1336 let has_autolink = tree
1337 .descendants(tree.root())
1338 .find_map(|id| match tree.node(id).map(|n| &n.kind) {
1339 Some(NodeKind::Autolink) => Some(true),
1340 _ => None,
1341 })
1342 .expect("autolink present");
1343 assert!(has_autolink);
1344 }
1345
1346 #[test]
1347 fn task_list_marker_sets_item_task() {
1348 let ir = Ir::parse_str("- [x] done\n- [ ] todo\n");
1349 let tree = &ir.tree;
1350 let items: Vec<Option<bool>> = tree
1351 .descendants(tree.root())
1352 .filter_map(|id| match tree.node(id).map(|n| &n.kind) {
1353 Some(NodeKind::Item { task }) => Some(*task),
1354 _ => None,
1355 })
1356 .collect();
1357 assert_eq!(items, vec![Some(true), Some(false)]);
1358 }
1359
1360 #[test]
1361 fn code_block_info_string() {
1362 let ir = Ir::parse_str("```rust\nfn x() {}\n```\n");
1363 let tree = &ir.tree;
1364 let info = tree
1365 .descendants(tree.root())
1366 .find_map(|id| match tree.node(id).map(|n| &n.kind) {
1367 Some(NodeKind::CodeBlock { fenced: true, info, .. }) => Some(info.clone()),
1368 _ => None,
1369 })
1370 .expect("fenced code block");
1371 assert_eq!(info, "rust");
1372 }
1373}