#![allow(
dead_code,
reason = "the tree is a private parse-time representation with targeted unit coverage"
)]
use std::ops::Range;
use pulldown_cmark::{Alignment, CodeBlockKind, CowStr, Event, LinkType, Tag};
use crate::HeadingAttrs;
use crate::heading::find_attr_trailer_range;
use crate::refs::ReferenceTable;
use mdwright_math::{MathRegion, MathSpan};
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub(crate) struct NodeId(u32);
impl NodeId {
#[must_use]
pub(crate) fn idx(self) -> usize {
self.0 as usize
}
}
#[derive(Clone, Debug)]
pub(crate) struct Node {
pub(crate) kind: NodeKind,
pub(crate) raw_range: Range<usize>,
pub(crate) children: Range<u32>,
pub(crate) subtree_end: u32,
}
#[derive(Clone, Debug)]
pub(crate) enum NodeKind {
Document,
Paragraph,
Heading {
level: u32,
setext: bool,
attrs: Option<Box<HeadingAttrs>>,
},
BlockQuote,
List {
ordered: bool,
start: u64,
tight: bool,
marker_byte: u8,
},
Item {
task: Option<bool>,
},
CodeBlock {
fenced: bool,
info: String,
body: String,
},
HtmlBlock {
body: String,
},
ThematicBreak,
Table {
alignments: Vec<TableAlign>,
},
TableHead,
TableRow,
TableCell,
FootnoteDefinition {
label: String,
},
DefinitionList,
DefinitionTerm,
DefinitionDescription,
Run,
CodeRun,
Emphasis,
Strong,
Strikethrough,
Link {
reference_label: Option<String>,
},
Image {
reference_label: Option<String>,
},
Autolink,
HtmlSpan,
FootnoteReference,
TaskListMarker(bool),
Math(Box<MathSpan>),
Unknown {
tag: &'static str,
},
}
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum TableAlign {
None,
Left,
Center,
Right,
}
impl TableAlign {
fn from_alignment(a: Alignment) -> Self {
match a {
Alignment::None => Self::None,
Alignment::Left => Self::Left,
Alignment::Center => Self::Center,
Alignment::Right => Self::Right,
}
}
}
#[derive(Debug)]
pub(crate) struct Tree {
arena: Vec<Node>,
child_ids: Vec<NodeId>,
parents: Vec<Option<NodeId>>,
}
impl Tree {
#[must_use]
#[allow(clippy::unused_self)]
pub(crate) fn root(&self) -> NodeId {
NodeId(0)
}
#[must_use]
pub(crate) fn node(&self, id: NodeId) -> Option<&Node> {
self.arena.get(id.idx())
}
#[must_use]
pub(crate) fn raw_text<'a>(&self, source: &'a str, id: NodeId) -> &'a str {
self.node(id)
.and_then(|n| source.get(n.raw_range.clone()))
.unwrap_or("")
}
pub(crate) fn children(&self, id: NodeId) -> Children<'_> {
let range = self.node(id).map_or(0..0, |n| n.children.clone());
Children { tree: self, range }
}
#[must_use]
pub(crate) fn parent(&self, id: NodeId) -> Option<NodeId> {
self.parents.get(id.idx()).copied().flatten()
}
pub(crate) fn descendants(&self, id: NodeId) -> Descendants<'_> {
let start = id.idx().saturating_add(1);
let end = self.node(id).map_or(start, |n| n.subtree_end as usize);
Descendants {
tree: self,
next: start as u32,
end: end as u32,
}
}
#[must_use]
pub(crate) fn len(&self) -> usize {
self.arena.len()
}
#[must_use]
pub(crate) fn is_empty(&self) -> bool {
self.arena.len() <= 1
}
pub(crate) fn list_tightness_by_start(&self) -> Vec<(usize, bool)> {
self.descendants(self.root())
.filter_map(|id| {
let node = self.node(id)?;
match &node.kind {
NodeKind::List { tight, .. } => Some((node.raw_range.start, *tight)),
NodeKind::Document
| NodeKind::Paragraph
| NodeKind::Heading { .. }
| NodeKind::BlockQuote
| NodeKind::Item { .. }
| NodeKind::CodeBlock { .. }
| NodeKind::HtmlBlock { .. }
| NodeKind::ThematicBreak
| NodeKind::Table { .. }
| NodeKind::TableHead
| NodeKind::TableRow
| NodeKind::TableCell
| NodeKind::FootnoteDefinition { .. }
| NodeKind::DefinitionList
| NodeKind::DefinitionTerm
| NodeKind::DefinitionDescription
| NodeKind::Run
| NodeKind::CodeRun
| NodeKind::Emphasis
| NodeKind::Strong
| NodeKind::Strikethrough
| NodeKind::Link { .. }
| NodeKind::Image { .. }
| NodeKind::Autolink
| NodeKind::HtmlSpan
| NodeKind::FootnoteReference
| NodeKind::TaskListMarker(_)
| NodeKind::Math(_)
| NodeKind::Unknown { .. } => None,
}
})
.collect()
}
pub(crate) fn link_like_ranges(&self) -> Vec<Range<usize>> {
self.descendants(self.root())
.filter_map(|id| {
let node = self.node(id)?;
match &node.kind {
NodeKind::Link { .. } | NodeKind::Image { .. } | NodeKind::Autolink => Some(node.raw_range.clone()),
NodeKind::Document
| NodeKind::Paragraph
| NodeKind::Heading { .. }
| NodeKind::BlockQuote
| NodeKind::List { .. }
| NodeKind::Item { .. }
| NodeKind::CodeBlock { .. }
| NodeKind::HtmlBlock { .. }
| NodeKind::ThematicBreak
| NodeKind::Table { .. }
| NodeKind::TableHead
| NodeKind::TableRow
| NodeKind::TableCell
| NodeKind::FootnoteDefinition { .. }
| NodeKind::DefinitionList
| NodeKind::DefinitionTerm
| NodeKind::DefinitionDescription
| NodeKind::Run
| NodeKind::CodeRun
| NodeKind::Emphasis
| NodeKind::Strong
| NodeKind::Strikethrough
| NodeKind::HtmlSpan
| NodeKind::FootnoteReference
| NodeKind::TaskListMarker(_)
| NodeKind::Math(_)
| NodeKind::Unknown { .. } => None,
}
})
.collect()
}
}
pub(crate) struct Children<'t> {
tree: &'t Tree,
range: Range<u32>,
}
impl Iterator for Children<'_> {
type Item = NodeId;
fn next(&mut self) -> Option<Self::Item> {
let i = self.range.next()?;
self.tree.child_ids.get(i as usize).copied()
}
}
pub(crate) struct Descendants<'t> {
tree: &'t Tree,
next: u32,
end: u32,
}
impl Iterator for Descendants<'_> {
type Item = NodeId;
fn next(&mut self) -> Option<Self::Item> {
if self.next >= self.end {
return None;
}
let id = NodeId(self.next);
let _ = self.tree.node(id)?;
self.next = self.next.saturating_add(1);
Some(id)
}
}
pub(crate) struct TreeBuilder<'a> {
source: &'a str,
arena: Vec<Node>,
child_ids: Vec<NodeId>,
parents: Vec<Option<NodeId>>,
pending: Vec<NodeId>,
open: Vec<OpenFrame>,
inline_range: Option<Range<usize>>,
math_regions: &'a [MathRegion],
math_cursor: usize,
math_emitted_until: usize,
}
#[derive(Debug)]
struct OpenFrame {
arena_id: NodeId,
pending_start: u32,
raw_start: usize,
body_accum: Option<String>,
}
impl<'a> TreeBuilder<'a> {
pub(crate) fn new(source: &'a str, math_regions: &'a [MathRegion]) -> Self {
let root = Node {
kind: NodeKind::Document,
raw_range: 0..source.len(),
children: 0..0,
subtree_end: 1,
};
Self {
source,
arena: vec![root],
child_ids: Vec::new(),
parents: vec![None],
pending: Vec::new(),
open: vec![OpenFrame {
arena_id: NodeId(0),
pending_start: 0,
raw_start: 0,
body_accum: None,
}],
inline_range: None,
math_regions,
math_cursor: 0,
math_emitted_until: 0,
}
}
pub(crate) fn arena_len(&self) -> usize {
self.arena.len()
}
#[allow(clippy::wildcard_enum_match_arm)]
pub(crate) fn handle(&mut self, event: &Event<'a>, range: Range<usize>) {
if self.math_emitted_until > range.start && range.end <= self.math_emitted_until {
return;
}
match event {
Event::Start(tag) => {
self.flush_inline_run();
let kind = self.kind_for_start(tag, &range);
let range = match &kind {
NodeKind::CodeBlock { fenced: false, .. } => widen_to_line_start(self.source, range),
NodeKind::HtmlBlock { .. } => widen_to_line_start_through_ws(self.source, range),
_ => range,
};
let body_accum =
matches!(&kind, NodeKind::CodeBlock { .. } | NodeKind::HtmlBlock { .. }).then(String::new);
self.open_container(kind, range, body_accum);
}
Event::End(end) => {
self.flush_inline_run();
self.close_container(range);
let _ = end;
}
Event::Text(cow) => {
if let Some(buf) = self.body_accum_mut() {
buf.push_str(cow);
return;
}
if range.start < self.math_emitted_until {
let trimmed = self.math_emitted_until..range.end;
if trimmed.is_empty() {
return;
}
if self.text_overlaps_math(&trimmed) {
self.splice_text_with_math(trimmed);
} else {
self.push_source_prose(trimmed);
}
return;
}
let raw_range = self.extend_for_backslash(range);
if self.text_overlaps_math(&raw_range) {
self.splice_text_with_math(raw_range);
} else {
let _ = cow;
self.push_inline_text(raw_range);
}
}
Event::Code(cow) => {
self.flush_inline_run();
let _ = cow;
self.push_leaf(NodeKind::CodeRun, range);
}
Event::Html(cow) => {
if let Some(buf) = self.body_accum_mut() {
buf.push_str(cow);
return;
}
self.flush_inline_run();
let _ = cow;
self.push_leaf(NodeKind::HtmlSpan, range);
}
Event::InlineHtml(cow) => {
self.flush_inline_run();
let _ = cow;
self.push_leaf(NodeKind::HtmlSpan, range);
}
Event::FootnoteReference(label) => {
self.flush_inline_run();
let _ = label;
self.push_leaf(NodeKind::FootnoteReference, range);
}
Event::SoftBreak => {
self.push_inline_break(range);
}
Event::HardBreak => {
self.push_inline_break(range);
}
Event::Rule => {
self.flush_inline_run();
self.push_leaf(NodeKind::ThematicBreak, range);
}
Event::TaskListMarker(checked) => {
self.flush_inline_run();
if let Some(frame) = self.open.last()
&& let Some(node) = self.arena.get_mut(frame.arena_id.idx())
&& let NodeKind::Item { ref mut task } = node.kind
{
*task = Some(*checked);
}
self.push_leaf(NodeKind::TaskListMarker(*checked), range);
}
Event::InlineMath(cow) | Event::DisplayMath(cow) => {
let raw_range = range;
let _ = cow;
self.push_inline_text(raw_range);
}
}
}
fn push_inline_text(&mut self, range: Range<usize>) {
self.extend_inline_range(&range);
}
fn text_overlaps_math(&mut self, range: &Range<usize>) -> bool {
while self.math_cursor < self.math_regions.len()
&& self
.math_regions
.get(self.math_cursor)
.is_some_and(|r| r.range.end <= range.start)
{
self.math_cursor = self.math_cursor.saturating_add(1);
}
self.math_regions
.get(self.math_cursor)
.is_some_and(|r| r.range.start < range.end)
}
fn splice_text_with_math(&mut self, raw_range: Range<usize>) {
let mut cursor = raw_range.start.max(self.math_emitted_until);
while self.math_cursor < self.math_regions.len() {
let Some(region) = self.math_regions.get(self.math_cursor) else {
break;
};
if region.range.start >= raw_range.end {
break;
}
if region.range.end <= cursor {
self.math_cursor = self.math_cursor.saturating_add(1);
continue;
}
if cursor < region.range.start {
let chunk = cursor..region.range.start;
self.push_source_prose(chunk);
}
self.flush_inline_run();
let span = region.span().clone();
let region_range = region.range.clone();
tracing::trace!(?region_range, "math leaf");
self.push_leaf(NodeKind::Math(Box::new(span)), region_range.clone());
self.math_emitted_until = region_range.end;
cursor = region_range.end;
self.math_cursor = self.math_cursor.saturating_add(1);
}
if cursor < raw_range.end {
self.push_source_prose(cursor..raw_range.end);
}
}
fn push_source_prose(&mut self, range: Range<usize>) {
self.push_inline_text(range);
}
fn push_inline_break(&mut self, range: Range<usize>) {
self.extend_inline_range(&range);
}
fn extend_inline_range(&mut self, range: &Range<usize>) {
match &mut self.inline_range {
Some(r) => {
if range.start < r.start {
r.start = range.start;
}
if range.end > r.end {
r.end = range.end;
}
}
None => self.inline_range = Some(range.clone()),
}
}
fn flush_inline_run(&mut self) {
if let Some(range) = self.inline_range.take()
&& !range.is_empty()
{
self.push_leaf(NodeKind::Run, range);
}
}
fn body_accum_mut(&mut self) -> Option<&mut String> {
self.open.last_mut().and_then(|f| f.body_accum.as_mut())
}
#[tracing::instrument(level = "debug", skip(self, refs))]
pub(crate) fn finalize(mut self, refs: &ReferenceTable) -> Tree {
self.flush_inline_run();
let doc_pending_start = self.open.pop().map_or(0u32, |f| f.pending_start);
let doc_children: Vec<NodeId> = self.pending.drain(doc_pending_start as usize..).collect();
downgrade_unresolved_links(&mut self.arena, refs);
let children_start = u32::try_from(self.child_ids.len()).unwrap_or(u32::MAX);
self.child_ids.extend(doc_children.iter().copied());
let children_end = u32::try_from(self.child_ids.len()).unwrap_or(u32::MAX);
let subtree_end = u32::try_from(self.arena.len()).unwrap_or(u32::MAX);
if let Some(root) = self.arena.get_mut(0) {
root.children = children_start..children_end;
root.subtree_end = subtree_end;
root.raw_range = 0..self.source.len();
}
Tree {
arena: self.arena,
child_ids: self.child_ids,
parents: self.parents,
}
}
fn alloc_node(&mut self, kind: NodeKind, raw_range: Range<usize>) -> NodeId {
let id = NodeId(u32::try_from(self.arena.len()).unwrap_or(u32::MAX));
let subtree_end = id.0.saturating_add(1);
self.arena.push(Node {
kind,
raw_range,
children: 0..0,
subtree_end,
});
let parent = self.open.last().map(|f| f.arena_id);
self.parents.push(parent);
self.pending.push(id);
id
}
fn open_container(&mut self, kind: NodeKind, range: Range<usize>, body_accum: Option<String>) {
let raw_start = range.start;
let id = self.alloc_node(kind, range);
let pending_start = u32::try_from(self.pending.len()).unwrap_or(u32::MAX);
self.open.push(OpenFrame {
arena_id: id,
pending_start,
raw_start,
body_accum,
});
}
fn close_container(&mut self, range: Range<usize>) {
let Some(frame) = self.open.pop() else {
return;
};
let pending_start = frame.pending_start as usize;
let children_start = u32::try_from(self.child_ids.len()).unwrap_or(u32::MAX);
self.child_ids.extend(self.pending.drain(pending_start..));
let children_end = u32::try_from(self.child_ids.len()).unwrap_or(u32::MAX);
let subtree_end = u32::try_from(self.arena.len()).unwrap_or(u32::MAX);
let raw_range = frame.raw_start..range.end;
let node_is_list = matches!(
self.arena.get(frame.arena_id.idx()).map(|n| &n.kind),
Some(NodeKind::List { .. })
);
if let Some(node) = self.arena.get_mut(frame.arena_id.idx()) {
node.children = children_start..children_end;
node.subtree_end = subtree_end;
node.raw_range = raw_range;
#[allow(clippy::wildcard_enum_match_arm)]
if let Some(body) = frame.body_accum {
match &mut node.kind {
NodeKind::CodeBlock { body: dst, .. } => *dst = body,
NodeKind::HtmlBlock { body: dst } => *dst = body,
_ => {}
}
}
}
if node_is_list {
let list_tight = list_has_no_direct_paragraph_items(&self.arena, &self.child_ids, frame.arena_id);
if let Some(node) = self.arena.get_mut(frame.arena_id.idx())
&& let NodeKind::List { tight, .. } = &mut node.kind
{
*tight = list_tight;
}
}
}
fn push_leaf(&mut self, kind: NodeKind, range: Range<usize>) {
self.alloc_node(kind, range);
}
fn extend_for_backslash(&self, range: Range<usize>) -> Range<usize> {
if range.start > 0 {
let bytes = self.source.as_bytes();
if bytes.get(range.start.saturating_sub(1)) == Some(&b'\\') {
return range.start.saturating_sub(1)..range.end;
}
}
range
}
fn kind_for_start(&self, tag: &Tag<'a>, range: &Range<usize>) -> NodeKind {
match tag {
Tag::Paragraph => NodeKind::Paragraph,
Tag::Heading {
level,
id,
classes,
attrs,
} => {
let lvl = *level as u32;
let setext = first_non_whitespace_byte(self.source, range.start) != Some(b'#');
let parsed = build_heading_attrs(self.source, range, id.as_deref(), classes, attrs);
NodeKind::Heading {
level: lvl,
setext,
attrs: parsed.map(Box::new),
}
}
Tag::BlockQuote(_) => NodeKind::BlockQuote,
Tag::CodeBlock(kind) => {
let (fenced, info) = match kind {
CodeBlockKind::Fenced(s) => (true, s.to_string()),
CodeBlockKind::Indented => (false, String::new()),
};
NodeKind::CodeBlock {
fenced,
info,
body: String::new(),
}
}
Tag::HtmlBlock => NodeKind::HtmlBlock { body: String::new() },
Tag::List(start) => NodeKind::List {
ordered: start.is_some(),
start: start.unwrap_or(0),
tight: true,
marker_byte: derive_list_marker_byte(self.source, range.clone(), start.is_some()).unwrap_or(0),
},
Tag::Item => NodeKind::Item { task: None },
Tag::FootnoteDefinition(label) => NodeKind::FootnoteDefinition {
label: label.to_string(),
},
Tag::Table(aligns) => NodeKind::Table {
alignments: aligns.iter().copied().map(TableAlign::from_alignment).collect(),
},
Tag::TableHead => NodeKind::TableHead,
Tag::TableRow => NodeKind::TableRow,
Tag::TableCell => NodeKind::TableCell,
Tag::Emphasis => NodeKind::Emphasis,
Tag::Strong => NodeKind::Strong,
Tag::Strikethrough => NodeKind::Strikethrough,
Tag::Link {
link_type,
dest_url,
title,
id,
} => link_kind(*link_type, dest_url, title, id, false),
Tag::Image {
link_type,
dest_url,
title,
id,
} => link_kind(*link_type, dest_url, title, id, true),
Tag::Superscript => NodeKind::Unknown { tag: "Superscript" },
Tag::Subscript => NodeKind::Unknown { tag: "Subscript" },
Tag::DefinitionList => NodeKind::DefinitionList,
Tag::DefinitionListTitle => NodeKind::DefinitionTerm,
Tag::DefinitionListDefinition => NodeKind::DefinitionDescription,
Tag::MetadataBlock(_) => NodeKind::Unknown { tag: "MetadataBlock" },
}
}
}
#[allow(clippy::wildcard_enum_match_arm)] fn downgrade_unresolved_links(arena: &mut [Node], refs: &ReferenceTable) {
for node in arena.iter_mut() {
let (label_opt, is_image): (Option<&str>, bool) = match &node.kind {
NodeKind::Link { reference_label } => (reference_label.as_deref(), false),
NodeKind::Image { reference_label } => (reference_label.as_deref(), true),
_ => (None, false),
};
let Some(label) = label_opt else { continue };
if refs.resolve(label).is_some() {
continue;
}
let tag = if is_image { "Image" } else { "Link" };
node.kind = NodeKind::Unknown { tag };
node.children = 0..0;
}
}
fn link_kind(lt: LinkType, dest_url: &CowStr<'_>, title: &CowStr<'_>, id: &CowStr<'_>, is_image: bool) -> NodeKind {
let _ = (dest_url, title);
let reference_label = match lt {
LinkType::Autolink => {
return NodeKind::Autolink;
}
LinkType::Email => {
return NodeKind::Autolink;
}
LinkType::WikiLink { .. } => return NodeKind::Unknown { tag: "WikiLink" },
LinkType::Inline => None,
LinkType::Reference
| LinkType::ReferenceUnknown
| LinkType::Collapsed
| LinkType::CollapsedUnknown
| LinkType::Shortcut
| LinkType::ShortcutUnknown => Some(id.to_string()),
};
if is_image {
NodeKind::Image { reference_label }
} else {
NodeKind::Link { reference_label }
}
}
fn first_non_whitespace_byte(source: &str, start: usize) -> Option<u8> {
source
.as_bytes()
.get(start..)?
.iter()
.copied()
.find(|b| !matches!(b, b' ' | b'\t'))
}
fn derive_list_marker_byte(source: &str, range: Range<usize>, ordered: bool) -> Option<u8> {
source.as_bytes().get(range)?.iter().copied().find(|b| {
if ordered {
b.is_ascii_digit()
} else {
matches!(b, b'-' | b'*' | b'+')
}
})
}
fn widen_to_line_start(source: &str, range: Range<usize>) -> Range<usize> {
let bytes = source.as_bytes();
let mut start = range.start.min(bytes.len());
while start > 0 && bytes.get(start.saturating_sub(1)).copied() != Some(b'\n') {
start = start.saturating_sub(1);
}
start..range.end
}
fn widen_to_line_start_through_ws(source: &str, range: Range<usize>) -> Range<usize> {
let bytes = source.as_bytes();
let mut start = range.start.min(bytes.len());
while start > 0 {
match bytes.get(start.saturating_sub(1)).copied() {
Some(b' ' | b'\t') => start = start.saturating_sub(1),
Some(b'\n') | None => break,
Some(_) => return range, }
}
start..range.end
}
fn build_heading_attrs(
source: &str,
range: &Range<usize>,
id: Option<&str>,
classes: &[CowStr<'_>],
attrs: &[(CowStr<'_>, Option<CowStr<'_>>)],
) -> Option<HeadingAttrs> {
if id.is_none() && classes.is_empty() && attrs.is_empty() {
return None;
}
let raw = source.get(range.clone()).unwrap_or("");
let trailer = find_attr_trailer_range(raw)
.and_then(|r| raw.get(r))
.unwrap_or("")
.to_owned();
Some(HeadingAttrs {
id: id.map(str::to_owned),
classes: classes.iter().map(|c| c.to_string()).collect(),
attrs: attrs
.iter()
.map(|(k, v)| (k.to_string(), v.as_ref().map(|v| v.to_string())))
.collect(),
source_trailer: trailer,
})
}
fn list_has_no_direct_paragraph_items(arena: &[Node], child_ids: &[NodeId], list_id: NodeId) -> bool {
let Some(list_node) = arena.get(list_id.idx()) else {
return true;
};
for i in list_node.children.clone() {
let Some(&item_id) = child_ids.get(i as usize) else {
continue;
};
let Some(item_node) = arena.get(item_id.idx()) else {
continue;
};
if !matches!(item_node.kind, NodeKind::Item { .. }) {
continue;
}
if item_has_direct_paragraph(arena, child_ids, item_node) {
return false;
}
}
true
}
fn item_has_direct_paragraph(arena: &[Node], child_ids: &[NodeId], item: &Node) -> bool {
for j in item.children.clone() {
let Some(&cid) = child_ids.get(j as usize) else {
continue;
};
if matches!(arena.get(cid.idx()).map(|n| &n.kind), Some(NodeKind::Paragraph)) {
return true;
}
}
false
}
#[cfg(test)]
#[allow(clippy::expect_used)]
mod tests {
use super::*;
use crate::ir::Ir;
#[test]
fn empty_doc_has_root_only() {
let ir = Ir::parse_str("");
let tree = &ir.tree;
assert_eq!(tree.root(), NodeId(0));
assert!(tree.is_empty());
assert!(matches!(
tree.node(tree.root()).map(|n| &n.kind),
Some(NodeKind::Document)
));
}
#[test]
fn paragraph_and_text_present() {
let ir = Ir::parse_str("Hello world\n");
let tree = &ir.tree;
let kinds: Vec<&NodeKind> = tree
.descendants(tree.root())
.filter_map(|id| tree.node(id).map(|n| &n.kind))
.collect();
assert!(kinds.iter().any(|k| matches!(k, NodeKind::Paragraph)));
assert!(kinds.iter().any(|k| matches!(k, NodeKind::Run)));
}
#[test]
fn raw_ranges_are_well_formed() {
let src = "# Title\n\nA paragraph.\n\n- one\n- two\n";
let ir = Ir::parse_str(src);
let tree = &ir.tree;
for id in tree.descendants(tree.root()) {
let n = tree.node(id).expect("descendants only yields valid ids");
assert!(n.raw_range.start <= n.raw_range.end);
assert!(n.raw_range.end <= src.len());
}
}
#[test]
fn raw_range_covers_leading_sigil_per_block_kind() {
let src = "\
# Heading
> quote
- list item
```rust
let x = 1;
```
indented
---
<!-- html block -->
";
let ir = Ir::parse_str(src);
let tree = &ir.tree;
for id in tree.descendants(tree.root()) {
let n = tree.node(id).expect("descendants yields valid ids");
let raw = tree.raw_text(src, id);
#[allow(clippy::wildcard_enum_match_arm)]
match &n.kind {
NodeKind::Heading { setext: false, .. } => {
assert!(raw.starts_with('#'), "ATX heading missing `#`: {raw:?}");
}
NodeKind::BlockQuote => {
assert!(raw.starts_with('>'), "blockquote missing `>`: {raw:?}");
}
NodeKind::List { .. } => {
let first = raw.bytes().next().expect("non-empty list raw_text");
assert!(
matches!(first, b'-' | b'*' | b'+' | b'0'..=b'9'),
"list missing bullet: {raw:?}",
);
}
NodeKind::CodeBlock { fenced: true, .. } => {
assert!(
raw.starts_with("```") || raw.starts_with("~~~"),
"fenced code block missing opening fence: {raw:?}",
);
assert!(
raw.trim_end_matches('\n').ends_with("```") || raw.trim_end_matches('\n').ends_with("~~~"),
"fenced code block missing closing fence: {raw:?}",
);
}
NodeKind::CodeBlock { fenced: false, .. } => {
assert!(
raw.starts_with(" ") || raw.starts_with('\t'),
"indented code block missing 4-space prefix: {raw:?}",
);
}
NodeKind::HtmlBlock { .. } => {
assert!(raw.starts_with('<'), "HTML block missing `<`: {raw:?}");
}
NodeKind::ThematicBreak => {
let first = raw.bytes().next().expect("non-empty thematic break");
assert!(
matches!(first, b'-' | b'*' | b'_'),
"thematic break missing marker: {raw:?}",
);
}
_ => {}
}
}
}
#[test]
fn child_raw_range_is_contained_in_parent() {
let src = "# H\n\n> quote with *em*\n\n- item one\n- item two\n";
let ir = Ir::parse_str(src);
let tree = &ir.tree;
for id in tree.descendants(tree.root()) {
if id == tree.root() {
continue;
}
let Some(parent_id) = tree.parent(id) else {
continue;
};
let child = tree.node(id).expect("valid child id");
let parent = tree.node(parent_id).expect("valid parent id");
assert!(
parent.raw_range.start <= child.raw_range.start && child.raw_range.end <= parent.raw_range.end,
"child {:?} {:?} outside parent {:?} {:?}",
child.kind,
child.raw_range,
parent.kind,
parent.raw_range,
);
}
}
#[test]
fn parent_chain_terminates_at_root() {
let ir = Ir::parse_str("> a quote\n");
let tree = &ir.tree;
let last = NodeId(u32::try_from(tree.len().saturating_sub(1)).unwrap_or(0));
let mut cur = last;
let mut steps: u32 = 0;
while let Some(p) = tree.parent(cur) {
cur = p;
steps = steps.saturating_add(1);
assert!(steps < 32, "walk did not terminate");
}
assert_eq!(cur, tree.root());
assert!(tree.parent(tree.root()).is_none());
}
fn find_list_tight(tree: &Tree) -> Option<bool> {
tree.descendants(tree.root())
.find_map(|id| match tree.node(id).map(|n| &n.kind) {
Some(NodeKind::List { tight, .. }) => Some(*tight),
_ => None,
})
}
#[test]
fn tight_list_one_text_child() {
let ir = Ir::parse_str("- one\n- two\n");
assert_eq!(find_list_tight(&ir.tree), Some(true));
}
#[test]
fn loose_list_with_blank_line_between_items() {
let ir = Ir::parse_str("- one\n\n- two\n");
assert_eq!(find_list_tight(&ir.tree), Some(false));
}
#[test]
fn nested_blockquote_under_list() {
let ir = Ir::parse_str("- item\n\n > quote\n");
let tree = &ir.tree;
let bq = tree
.descendants(tree.root())
.find(|&id| matches!(tree.node(id).map(|n| &n.kind), Some(NodeKind::BlockQuote)));
assert!(bq.is_some(), "blockquote nested under list item");
}
#[test]
fn reference_link_records_label() {
let src = "[foo][bar]\n\n[bar]: https://example.com\n";
let ir = Ir::parse_str(src);
let tree = &ir.tree;
let label = tree
.descendants(tree.root())
.find_map(|id| match tree.node(id).map(|n| &n.kind) {
Some(NodeKind::Link { reference_label }) => reference_label.clone(),
_ => None,
})
.expect("link present");
assert_eq!(label, "bar");
}
#[test]
fn link_reference_definitions_appear_in_reference_table() {
let src = "[a]: https://a.example\n[b]: https://b.example\n\n[a] and [b].\n";
let ir = Ir::parse_str(src);
let mut labels: Vec<String> = ir.refs.iter().map(|t| t.label_raw.clone()).collect();
labels.sort();
assert_eq!(labels, vec!["a".to_owned(), "b".to_owned()]);
}
#[test]
fn autolink_preserves_url() {
let ir = Ir::parse_str("<https://example.com>\n");
let tree = &ir.tree;
let has_autolink = tree
.descendants(tree.root())
.find_map(|id| match tree.node(id).map(|n| &n.kind) {
Some(NodeKind::Autolink) => Some(true),
_ => None,
})
.expect("autolink present");
assert!(has_autolink);
}
#[test]
fn task_list_marker_sets_item_task() {
let ir = Ir::parse_str("- [x] done\n- [ ] todo\n");
let tree = &ir.tree;
let items: Vec<Option<bool>> = tree
.descendants(tree.root())
.filter_map(|id| match tree.node(id).map(|n| &n.kind) {
Some(NodeKind::Item { task }) => Some(*task),
_ => None,
})
.collect();
assert_eq!(items, vec![Some(true), Some(false)]);
}
#[test]
fn code_block_info_string() {
let ir = Ir::parse_str("```rust\nfn x() {}\n```\n");
let tree = &ir.tree;
let info = tree
.descendants(tree.root())
.find_map(|id| match tree.node(id).map(|n| &n.kind) {
Some(NodeKind::CodeBlock { fenced: true, info, .. }) => Some(info.clone()),
_ => None,
})
.expect("fenced code block");
assert_eq!(info, "rust");
}
}