Skip to main content

mdwright_document/
tree.rs

1//! Tree IR for structural document shape.
2//!
3//! The [`crate::ir::Ir`] flat IR is enough for `scan-and-emit-diagnostic`
4//! lint rules, but it does not retain container nesting: a paragraph
5//! that lives three list items deep inside a blockquote looks identical
6//! to a top-level paragraph in the flat IR. The tree IR is an owned arena of
7//! [`Node`] values rooted at a Document node, built during the *same*
8//! pulldown-cmark event walk as the flat IR: two IRs, one parse pass.
9//!
10//! ## Storage
11//!
12//! `Tree` holds three vectors:
13//!
14//! - `arena: Vec<Node>` in pre-order DFS. Each node carries
15//!   `subtree_end` so `descendants(id)` is the contiguous range
16//!   `arena[id+1 .. subtree_end]`: no recursion, no allocation.
17//! - `child_ids: Vec<NodeId>`: a parent's *direct* children are not
18//!   contiguous in the arena (between them sit each child's whole
19//!   subtree), so direct children are stored separately. Each node's
20//!   `children: Range<u32>` indexes into this table.
21//! - `parents: Vec<Option<NodeId>>`: parallel to `arena` for O(1)
22//!   parent lookup without bloating `Node`.
23//!
24//! ## Two IRs, one walk
25//!
26//! `Ir::parse` collects `pulldown_cmark::Parser` events once. The
27//! flat IR records lint-facing slices, then the tree builder consumes
28//! the same events after math regions are known.
29
30#![allow(
31    dead_code,
32    reason = "the tree is a private parse-time representation with targeted unit coverage"
33)]
34
35use std::ops::Range;
36
37use pulldown_cmark::{Alignment, CodeBlockKind, CowStr, Event, LinkType, Tag};
38
39use crate::HeadingAttrs;
40use crate::heading::find_attr_trailer_range;
41use crate::refs::ReferenceTable;
42use mdwright_math::{MathRegion, MathSpan};
43
44/// Index into [`Tree`]'s arena. Stable for the life of the tree;
45/// can only be obtained from `Tree` methods.
46#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
47pub(crate) struct NodeId(u32);
48
49impl NodeId {
50    #[must_use]
51    pub(crate) fn idx(self) -> usize {
52        self.0 as usize
53    }
54}
55
56/// One node in the document tree. A pure data carrier; behaviour
57/// lives in dedicated modules.
58#[derive(Clone, Debug)]
59pub(crate) struct Node {
60    pub(crate) kind: NodeKind,
61    pub(crate) raw_range: Range<usize>,
62    /// Range into the owning [`Tree`]'s child-id table. Iterate via
63    /// [`Tree::children`]; the field is exposed crate-internally so
64    /// the builder can fill it after seeing the matching End event.
65    pub(crate) children: Range<u32>,
66    /// Exclusive end of this node's subtree in the arena. Always
67    /// `>= self_id + 1`; equals `self_id + 1` for leaves.
68    pub(crate) subtree_end: u32,
69}
70
71/// All node kinds we recognise.
72///
73/// Container kinds (`Paragraph`, `Heading`, `BlockQuote`, `List`,
74/// `Item`, …) may carry direct children. Leaf kinds (`Text`, `Code`,
75/// `SoftBreak`, …)
76/// never do. The [`Unknown`](NodeKind::Unknown) variant is a forward-
77/// compatibility fallback for pulldown-cmark tags we don't model;
78/// the formatter falls back to verbatim source emission via
79/// [`Tree::raw_text`] for those.
80#[derive(Clone, Debug)]
81pub(crate) enum NodeKind {
82    /// The document root. Always at `NodeId(0)`.
83    Document,
84    Paragraph,
85    Heading {
86        level: u32,
87        /// `true` for setext (`Foo\n===`); `false` for ATX (`# Foo`).
88        setext: bool,
89        /// `Some` when the source carried an ATX `{#id .class key=val}`
90        /// trailer (pulldown `Tag::Heading::id|classes|attrs` non-empty);
91        /// `None` otherwise. Boxed so the enum's largest-variant size
92        /// is unaffected by the rare attribute case.
93        attrs: Option<Box<HeadingAttrs>>,
94    },
95    BlockQuote,
96    List {
97        ordered: bool,
98        /// Start index for ordered lists; `0` for unordered.
99        start: u64,
100        /// `true` iff no direct `Item` child contains a direct
101        /// [`Paragraph`](NodeKind::Paragraph) child. Computed at End.
102        tight: bool,
103        /// Marker byte of the *first* item (`-`, `*`, `+`, or the
104        /// first digit of an ordered marker). `0` if the list has no
105        /// items (a degenerate but parseable case).
106        marker_byte: u8,
107    },
108    Item {
109        /// `Some(checked)` for task list items; `None` otherwise.
110        /// Set when a [`TaskListMarker`](NodeKind::TaskListMarker)
111        /// child is seen inside the item.
112        task: Option<bool>,
113    },
114    CodeBlock {
115        fenced: bool,
116        info: String,
117        /// Body bytes the parser emitted inside this block. Pulldown
118        /// has already stripped any enclosing container's prefix
119        /// (list-continuation indent, blockquote `>` markers,
120        /// indented-code-block 4-space prefix), so this is the
121        /// minimum representation that survives re-nesting.
122        body: String,
123    },
124    HtmlBlock {
125        /// Body bytes the parser emitted inside this block. Same
126        /// prefix-stripped form as code-block bodies.
127        body: String,
128    },
129    ThematicBreak,
130    Table {
131        alignments: Vec<TableAlign>,
132    },
133    TableHead,
134    TableRow,
135    TableCell,
136    FootnoteDefinition {
137        label: String,
138    },
139    /// Container for a definition list (pulldown `Tag::DefinitionList`,
140    /// enabled by `Options::ENABLE_DEFINITION_LIST`). Direct children
141    /// are alternating [`DefinitionTerm`](NodeKind::DefinitionTerm) and
142    /// [`DefinitionDescription`](NodeKind::DefinitionDescription) nodes
143    /// in source order.
144    DefinitionList,
145    /// One term in a [`DefinitionList`](NodeKind::DefinitionList).
146    /// Children are inline.
147    DefinitionTerm,
148    /// One definition body in a [`DefinitionList`](NodeKind::DefinitionList).
149    /// Children are block-level (paragraphs, lists, code blocks, …).
150    DefinitionDescription,
151    // Inline:
152    /// A coalesced run of text + soft/hard breaks.
153    Run,
154    /// One inline code span.
155    CodeRun,
156    Emphasis,
157    Strong,
158    Strikethrough,
159    Link {
160        /// Raw reference label for reference-style links; `None` for
161        /// inline links. Used only to downgrade unresolved references
162        /// to raw-source nodes after the reference table is built.
163        reference_label: Option<String>,
164    },
165    Image {
166        /// Raw reference label for reference-style images; `None` for
167        /// inline images.
168        reference_label: Option<String>,
169    },
170    Autolink,
171    /// One inline HTML span.
172    HtmlSpan,
173    FootnoteReference,
174    TaskListMarker(bool),
175    /// One recognised math region.
176    ///
177    /// The `Box` is a deliberate layout choice: `MathSpan` is the
178    /// largest inline payload (≈56 B; `MathBody` carries a sorted
179    /// `Box<[Range<usize>]>` of transparent runs), and inlining it
180    /// here would grow every `Node` by the same margin even though
181    /// most trees have zero or one math leaf. Math leaves are rare
182    /// enough that the per-leaf heap allocation is not a hot-path
183    /// cost.
184    Math(Box<MathSpan>),
185
186    /// Forward-compatibility fallback. Pulldown-cmark may emit tags
187    /// we don't recognise (math when enabled, definition lists,
188    /// super/subscript, wiki links, metadata blocks). Rather than
189    /// panicking, the builder records an `Unknown` node with the raw
190    /// range; the formatter falls back to byte-verbatim emission.
191    Unknown {
192        tag: &'static str,
193    },
194}
195
196/// Column alignment for a recognised GFM table.
197#[derive(Copy, Clone, Debug, PartialEq, Eq)]
198pub enum TableAlign {
199    None,
200    Left,
201    Center,
202    Right,
203}
204
205impl TableAlign {
206    fn from_alignment(a: Alignment) -> Self {
207        match a {
208            Alignment::None => Self::None,
209            Alignment::Left => Self::Left,
210            Alignment::Center => Self::Center,
211            Alignment::Right => Self::Right,
212        }
213    }
214}
215
216/// An owned arena of [`Node`] values rooted at a Document node.
217#[derive(Debug)]
218pub(crate) struct Tree {
219    arena: Vec<Node>,
220    child_ids: Vec<NodeId>,
221    parents: Vec<Option<NodeId>>,
222}
223
224impl Tree {
225    /// The Document root. Always present.
226    #[must_use]
227    #[allow(clippy::unused_self)]
228    pub(crate) fn root(&self) -> NodeId {
229        NodeId(0)
230    }
231
232    /// Look up a node by id. Returns `None` for ids that did not come
233    /// from this tree.
234    #[must_use]
235    pub(crate) fn node(&self, id: NodeId) -> Option<&Node> {
236        self.arena.get(id.idx())
237    }
238
239    /// Source bytes covered by `id`. Empty string for ids that did not
240    /// come from this tree; otherwise always a valid slice.
241    ///
242    /// Caller passes the canonical source the tree was parsed from.
243    #[must_use]
244    pub(crate) fn raw_text<'a>(&self, source: &'a str, id: NodeId) -> &'a str {
245        self.node(id)
246            .and_then(|n| source.get(n.raw_range.clone()))
247            .unwrap_or("")
248    }
249
250    /// Direct children of `id` in source order.
251    pub(crate) fn children(&self, id: NodeId) -> Children<'_> {
252        let range = self.node(id).map_or(0..0, |n| n.children.clone());
253        Children { tree: self, range }
254    }
255
256    /// Parent of `id`, or `None` for the root and for unknown ids.
257    #[must_use]
258    pub(crate) fn parent(&self, id: NodeId) -> Option<NodeId> {
259        self.parents.get(id.idx()).copied().flatten()
260    }
261
262    /// Every descendant of `id` in pre-order (excluding `id` itself).
263    pub(crate) fn descendants(&self, id: NodeId) -> Descendants<'_> {
264        let start = id.idx().saturating_add(1);
265        let end = self.node(id).map_or(start, |n| n.subtree_end as usize);
266        Descendants {
267            tree: self,
268            next: start as u32,
269            end: end as u32,
270        }
271    }
272
273    /// Number of nodes in the tree. Includes the Document root.
274    #[must_use]
275    pub(crate) fn len(&self) -> usize {
276        self.arena.len()
277    }
278
279    /// `true` iff the tree has only the Document root.
280    #[must_use]
281    pub(crate) fn is_empty(&self) -> bool {
282        self.arena.len() <= 1
283    }
284
285    pub(crate) fn list_tightness_by_start(&self) -> Vec<(usize, bool)> {
286        self.descendants(self.root())
287            .filter_map(|id| {
288                let node = self.node(id)?;
289                match &node.kind {
290                    NodeKind::List { tight, .. } => Some((node.raw_range.start, *tight)),
291                    NodeKind::Document
292                    | NodeKind::Paragraph
293                    | NodeKind::Heading { .. }
294                    | NodeKind::BlockQuote
295                    | NodeKind::Item { .. }
296                    | NodeKind::CodeBlock { .. }
297                    | NodeKind::HtmlBlock { .. }
298                    | NodeKind::ThematicBreak
299                    | NodeKind::Table { .. }
300                    | NodeKind::TableHead
301                    | NodeKind::TableRow
302                    | NodeKind::TableCell
303                    | NodeKind::FootnoteDefinition { .. }
304                    | NodeKind::DefinitionList
305                    | NodeKind::DefinitionTerm
306                    | NodeKind::DefinitionDescription
307                    | NodeKind::Run
308                    | NodeKind::CodeRun
309                    | NodeKind::Emphasis
310                    | NodeKind::Strong
311                    | NodeKind::Strikethrough
312                    | NodeKind::Link { .. }
313                    | NodeKind::Image { .. }
314                    | NodeKind::Autolink
315                    | NodeKind::HtmlSpan
316                    | NodeKind::FootnoteReference
317                    | NodeKind::TaskListMarker(_)
318                    | NodeKind::Math(_)
319                    | NodeKind::Unknown { .. } => None,
320                }
321            })
322            .collect()
323    }
324
325    pub(crate) fn link_like_ranges(&self) -> Vec<Range<usize>> {
326        self.descendants(self.root())
327            .filter_map(|id| {
328                let node = self.node(id)?;
329                match &node.kind {
330                    NodeKind::Link { .. } | NodeKind::Image { .. } | NodeKind::Autolink => Some(node.raw_range.clone()),
331                    NodeKind::Document
332                    | NodeKind::Paragraph
333                    | NodeKind::Heading { .. }
334                    | NodeKind::BlockQuote
335                    | NodeKind::List { .. }
336                    | NodeKind::Item { .. }
337                    | NodeKind::CodeBlock { .. }
338                    | NodeKind::HtmlBlock { .. }
339                    | NodeKind::ThematicBreak
340                    | NodeKind::Table { .. }
341                    | NodeKind::TableHead
342                    | NodeKind::TableRow
343                    | NodeKind::TableCell
344                    | NodeKind::FootnoteDefinition { .. }
345                    | NodeKind::DefinitionList
346                    | NodeKind::DefinitionTerm
347                    | NodeKind::DefinitionDescription
348                    | NodeKind::Run
349                    | NodeKind::CodeRun
350                    | NodeKind::Emphasis
351                    | NodeKind::Strong
352                    | NodeKind::Strikethrough
353                    | NodeKind::HtmlSpan
354                    | NodeKind::FootnoteReference
355                    | NodeKind::TaskListMarker(_)
356                    | NodeKind::Math(_)
357                    | NodeKind::Unknown { .. } => None,
358                }
359            })
360            .collect()
361    }
362}
363
364/// Iterator over a node's direct children. Returned by
365/// [`Tree::children`].
366pub(crate) struct Children<'t> {
367    tree: &'t Tree,
368    range: Range<u32>,
369}
370
371impl Iterator for Children<'_> {
372    type Item = NodeId;
373
374    fn next(&mut self) -> Option<Self::Item> {
375        let i = self.range.next()?;
376        self.tree.child_ids.get(i as usize).copied()
377    }
378}
379
380/// Iterator over a node's descendants in pre-order. Returned by
381/// [`Tree::descendants`].
382pub(crate) struct Descendants<'t> {
383    tree: &'t Tree,
384    next: u32,
385    end: u32,
386}
387
388impl Iterator for Descendants<'_> {
389    type Item = NodeId;
390
391    fn next(&mut self) -> Option<Self::Item> {
392        if self.next >= self.end {
393            return None;
394        }
395        let id = NodeId(self.next);
396        // Need to advance, but the caller may not have all nodes
397        // built yet only inside the builder; outside, `arena` is
398        // complete. `tree.node` returning Some guarantees the
399        // `subtree_end` is valid.
400        let _ = self.tree.node(id)?;
401        self.next = self.next.saturating_add(1);
402        Some(id)
403    }
404}
405
406/// Walks the pulldown-cmark event stream and accumulates an arena
407/// tree. Lives inside [`crate::ir::Ir::parse`] alongside the flat-IR
408/// builder.
409///
410/// The builder records source ranges for structural nodes. It buffers
411/// `Event::Text` / `Event::SoftBreak` / `Event::HardBreak` into one
412/// `NodeKind::Run` until a non-text event ends the run. Block code and
413/// block HTML accumulate their bodies on the open frame so the closing
414/// event can stamp the container with a `body` field.
415pub(crate) struct TreeBuilder<'a> {
416    source: &'a str,
417    arena: Vec<Node>,
418    child_ids: Vec<NodeId>,
419    parents: Vec<Option<NodeId>>,
420    /// Scratch buffer; the tail beyond `open.last().pending_start` is
421    /// the current open frame's accumulated children.
422    pending: Vec<NodeId>,
423    open: Vec<OpenFrame>,
424    /// Source byte range covered by buffered inline events.
425    inline_range: Option<Range<usize>>,
426    /// Recognised math regions, sorted by `range.start`. The text
427    /// handler splices a [`NodeKind::Math`] leaf at each region and
428    /// drops subsequent pulldown events whose bytes were already
429    /// covered by the emitted leaf.
430    math_regions: &'a [MathRegion],
431    /// Monotonic index into [`Self::math_regions`]; advances past
432    /// regions whose end is before the next event we process.
433    math_cursor: usize,
434    /// Exclusive end byte of the most recently emitted math leaf.
435    /// Any pulldown event whose range ends at or before this byte was
436    /// already covered by the leaf and must be swallowed.
437    math_emitted_until: usize,
438}
439
440#[derive(Debug)]
441struct OpenFrame {
442    arena_id: NodeId,
443    pending_start: u32,
444    raw_start: usize,
445    /// `Some` for `CodeBlock` and `HtmlBlock`: the parser's text /
446    /// html payloads inside these containers append to this buffer
447    /// instead of going through the inline accumulator. The closing
448    /// event stamps the buffer onto the container's `body` field.
449    body_accum: Option<String>,
450}
451
452impl<'a> TreeBuilder<'a> {
453    pub(crate) fn new(source: &'a str, math_regions: &'a [MathRegion]) -> Self {
454        // Allocate the Document root at index 0 up front, then open
455        // a frame for it; `finalize` closes the frame.
456        let root = Node {
457            kind: NodeKind::Document,
458            raw_range: 0..source.len(),
459            children: 0..0,
460            subtree_end: 1,
461        };
462        Self {
463            source,
464            arena: vec![root],
465            child_ids: Vec::new(),
466            parents: vec![None],
467            pending: Vec::new(),
468            open: vec![OpenFrame {
469                arena_id: NodeId(0),
470                pending_start: 0,
471                raw_start: 0,
472                body_accum: None,
473            }],
474            inline_range: None,
475            math_regions,
476            math_cursor: 0,
477            math_emitted_until: 0,
478        }
479    }
480
481    /// Number of nodes allocated in the arena so far. Used only by
482    /// `Ir::parse` for trace output; not part of the builder's
483    /// formal interface.
484    pub(crate) fn arena_len(&self) -> usize {
485        self.arena.len()
486    }
487
488    #[allow(clippy::wildcard_enum_match_arm)]
489    pub(crate) fn handle(&mut self, event: &Event<'a>, range: Range<usize>) {
490        // Math-overlay swallow guard. Any event whose bytes were
491        // already covered by a previously-emitted `NodeKind::Math`
492        // leaf is dropped; its content is part of the math region's
493        // outer range and will be rendered via `MathSpan::pretty`.
494        // Multi-line display math (`$$\nx=1\n$$`) emits several
495        // `Event::Text` / `Event::SoftBreak` events whose ranges all
496        // fall inside the math region; this guard drops them after
497        // the first text event has emitted the leaf.
498        if self.math_emitted_until > range.start && range.end <= self.math_emitted_until {
499            return;
500        }
501        match event {
502            Event::Start(tag) => {
503                self.flush_inline_run();
504                let kind = self.kind_for_start(tag, &range);
505                // Pulldown's event range for an indented code block
506                // starts at the first content byte (after the 4-space
507                // / tab prefix). Walk back to the start of the line
508                // so `raw_text` includes the indent. The block's
509                // identity depends on it.
510                //
511                // HtmlBlock gets the same widening (limited to
512                // space/tab so we don't engulf preceding content)
513                // because pulldown's HTML render of `  <?` includes
514                // the leading whitespace as text inside the block
515                // container; emitting only the post-whitespace bytes
516                // drops content that pulldown re-renders identically
517                // when present. Fuzz-found
518                // `html-pi-leading-whitespace.in`.
519                let range = match &kind {
520                    NodeKind::CodeBlock { fenced: false, .. } => widen_to_line_start(self.source, range),
521                    NodeKind::HtmlBlock { .. } => widen_to_line_start_through_ws(self.source, range),
522                    _ => range,
523                };
524                let body_accum =
525                    matches!(&kind, NodeKind::CodeBlock { .. } | NodeKind::HtmlBlock { .. }).then(String::new);
526                self.open_container(kind, range, body_accum);
527            }
528            Event::End(end) => {
529                self.flush_inline_run();
530                self.close_container(range);
531                let _ = end;
532            }
533            Event::Text(cow) => {
534                if let Some(buf) = self.body_accum_mut() {
535                    buf.push_str(cow);
536                    return;
537                }
538                // If the event starts inside an already-emitted math
539                // region (because pulldown emitted the `\)` closer as
540                // a decoded `)` and `extend_for_backslash` would pull
541                // the range back into the leaf), trim the leading
542                // in-math portion and re-derive the trailing prose
543                // from source bytes; the decoded `cow` doesn't align
544                // with the trimmed range.
545                if range.start < self.math_emitted_until {
546                    let trimmed = self.math_emitted_until..range.end;
547                    if trimmed.is_empty() {
548                        return;
549                    }
550                    if self.text_overlaps_math(&trimmed) {
551                        self.splice_text_with_math(trimmed);
552                    } else {
553                        self.push_source_prose(trimmed);
554                    }
555                    return;
556                }
557                let raw_range = self.extend_for_backslash(range);
558                if self.text_overlaps_math(&raw_range) {
559                    self.splice_text_with_math(raw_range);
560                } else {
561                    let _ = cow;
562                    self.push_inline_text(raw_range);
563                }
564            }
565            Event::Code(cow) => {
566                self.flush_inline_run();
567                let _ = cow;
568                self.push_leaf(NodeKind::CodeRun, range);
569            }
570            Event::Html(cow) => {
571                if let Some(buf) = self.body_accum_mut() {
572                    buf.push_str(cow);
573                    return;
574                }
575                // Defensive: a block-level Html event outside an
576                // HtmlBlock container. Treat it as inline HTML so the
577                // bytes survive verbatim.
578                self.flush_inline_run();
579                let _ = cow;
580                self.push_leaf(NodeKind::HtmlSpan, range);
581            }
582            Event::InlineHtml(cow) => {
583                self.flush_inline_run();
584                let _ = cow;
585                self.push_leaf(NodeKind::HtmlSpan, range);
586            }
587            Event::FootnoteReference(label) => {
588                self.flush_inline_run();
589                let _ = label;
590                self.push_leaf(NodeKind::FootnoteReference, range);
591            }
592            Event::SoftBreak => {
593                self.push_inline_break(range);
594            }
595            Event::HardBreak => {
596                self.push_inline_break(range);
597            }
598            Event::Rule => {
599                self.flush_inline_run();
600                self.push_leaf(NodeKind::ThematicBreak, range);
601            }
602            Event::TaskListMarker(checked) => {
603                self.flush_inline_run();
604                if let Some(frame) = self.open.last()
605                    && let Some(node) = self.arena.get_mut(frame.arena_id.idx())
606                    && let NodeKind::Item { ref mut task } = node.kind
607                {
608                    *task = Some(*checked);
609                }
610                self.push_leaf(NodeKind::TaskListMarker(*checked), range);
611            }
612            // Math is not enabled in Options; if it ever appears,
613            // record the bytes inline as text.
614            Event::InlineMath(cow) | Event::DisplayMath(cow) => {
615                let raw_range = range;
616                let _ = cow;
617                self.push_inline_text(raw_range);
618            }
619        }
620    }
621
622    /// Push source text into the inline accumulator.
623    fn push_inline_text(&mut self, range: Range<usize>) {
624        self.extend_inline_range(&range);
625    }
626
627    /// `true` iff any math region overlaps `range`. Advances
628    /// `math_cursor` past regions entirely before `range.start` as a
629    /// side effect so the answer is `O(matching regions)` amortised.
630    fn text_overlaps_math(&mut self, range: &Range<usize>) -> bool {
631        while self.math_cursor < self.math_regions.len()
632            && self
633                .math_regions
634                .get(self.math_cursor)
635                .is_some_and(|r| r.range.end <= range.start)
636        {
637            self.math_cursor = self.math_cursor.saturating_add(1);
638        }
639        self.math_regions
640            .get(self.math_cursor)
641            .is_some_and(|r| r.range.start < range.end)
642    }
643
644    /// Splice `Event::Text` whose `raw_range` overlaps one or more
645    /// math regions. Prose chunks outside math become
646    /// source-byte runs; each math region flushes the current inline
647    /// run and emits a `NodeKind::Math` leaf. Pulldown's decoded
648    /// payload is dropped for the overlapping event because (a) for
649    /// dollar-form math the source bytes equal the decoded payload
650    /// (no decoding happens for `$`), and (b) for backslash-delimited
651    /// math (`\(` / `\[`) pulldown emits the decoded delimiter as a
652    /// separate, single-character event whose bytes lie fully inside
653    /// the math region. That event is dropped by the swallow guard
654    /// at the top of `handle`, never reaching this splice.
655    ///
656    /// `extend_for_backslash` can pull an event's start back to the
657    /// `\` byte of a delimiter pulldown already consumed (e.g., the
658    /// `\)` closing a `\(…\)` inline math); the cursor is clamped to
659    /// `math_emitted_until` and regions already past are skipped via
660    /// `math_cursor` so the leaf is not re-emitted.
661    fn splice_text_with_math(&mut self, raw_range: Range<usize>) {
662        let mut cursor = raw_range.start.max(self.math_emitted_until);
663        while self.math_cursor < self.math_regions.len() {
664            let Some(region) = self.math_regions.get(self.math_cursor) else {
665                break;
666            };
667            if region.range.start >= raw_range.end {
668                break;
669            }
670            if region.range.end <= cursor {
671                self.math_cursor = self.math_cursor.saturating_add(1);
672                continue;
673            }
674            if cursor < region.range.start {
675                let chunk = cursor..region.range.start;
676                self.push_source_prose(chunk);
677            }
678            self.flush_inline_run();
679            let span = region.span().clone();
680            let region_range = region.range.clone();
681            tracing::trace!(?region_range, "math leaf");
682            self.push_leaf(NodeKind::Math(Box::new(span)), region_range.clone());
683            self.math_emitted_until = region_range.end;
684            cursor = region_range.end;
685            self.math_cursor = self.math_cursor.saturating_add(1);
686        }
687        if cursor < raw_range.end {
688            self.push_source_prose(cursor..raw_range.end);
689        }
690    }
691
692    /// Push a prose chunk drawn directly from source bytes. Used by
693    /// the math splice for the prose segments around math regions.
694    fn push_source_prose(&mut self, range: Range<usize>) {
695        self.push_inline_text(range);
696    }
697
698    /// Push a break event into the inline accumulator.
699    fn push_inline_break(&mut self, range: Range<usize>) {
700        self.extend_inline_range(&range);
701    }
702
703    fn extend_inline_range(&mut self, range: &Range<usize>) {
704        match &mut self.inline_range {
705            Some(r) => {
706                if range.start < r.start {
707                    r.start = range.start;
708                }
709                if range.end > r.end {
710                    r.end = range.end;
711                }
712            }
713            None => self.inline_range = Some(range.clone()),
714        }
715    }
716
717    /// Flush any buffered inline events into a [`NodeKind::Run`] leaf
718    /// under the current open frame. No-op when the buffer is empty.
719    fn flush_inline_run(&mut self) {
720        if let Some(range) = self.inline_range.take()
721            && !range.is_empty()
722        {
723            self.push_leaf(NodeKind::Run, range);
724        }
725    }
726
727    /// `Some(&mut String)` when the current open frame is a
728    /// `CodeBlock` or `HtmlBlock` accumulating body bytes.
729    fn body_accum_mut(&mut self) -> Option<&mut String> {
730        self.open.last_mut().and_then(|f| f.body_accum.as_mut())
731    }
732
733    /// Downgrade unresolved reference-style links to raw source
734    /// emission, then seal the Document root. Reference definitions
735    /// themselves live in [`ReferenceTable`] (pulldown-cmark 0.13
736    /// does not emit events for them); the formatter reads that table
737    /// directly rather than via synthesised tree children.
738    #[tracing::instrument(level = "debug", skip(self, refs))]
739    pub(crate) fn finalize(mut self, refs: &ReferenceTable) -> Tree {
740        // Flush any inline events left in the buffer (the document's
741        // trailing run before the parser exhausted its events).
742        self.flush_inline_run();
743        // The Document frame is still open. Close it. `new` always
744        // pushed exactly one frame, so this pop must succeed; if it
745        // ever doesn't, fall through with no Document children.
746        let doc_pending_start = self.open.pop().map_or(0u32, |f| f.pending_start);
747        let doc_children: Vec<NodeId> = self.pending.drain(doc_pending_start as usize..).collect();
748
749        // Validate every reference-style Link / Image node against the
750        // table; unresolvable references downgrade to `Unknown` so the
751        // formatter emits the original source span verbatim (CM §4.7
752        // "leave as text").
753        downgrade_unresolved_links(&mut self.arena, refs);
754
755        let children_start = u32::try_from(self.child_ids.len()).unwrap_or(u32::MAX);
756        self.child_ids.extend(doc_children.iter().copied());
757        let children_end = u32::try_from(self.child_ids.len()).unwrap_or(u32::MAX);
758        let subtree_end = u32::try_from(self.arena.len()).unwrap_or(u32::MAX);
759
760        if let Some(root) = self.arena.get_mut(0) {
761            root.children = children_start..children_end;
762            root.subtree_end = subtree_end;
763            root.raw_range = 0..self.source.len();
764        }
765
766        Tree {
767            arena: self.arena,
768            child_ids: self.child_ids,
769            parents: self.parents,
770        }
771    }
772
773    fn alloc_node(&mut self, kind: NodeKind, raw_range: Range<usize>) -> NodeId {
774        let id = NodeId(u32::try_from(self.arena.len()).unwrap_or(u32::MAX));
775        let subtree_end = id.0.saturating_add(1);
776        self.arena.push(Node {
777            kind,
778            raw_range,
779            children: 0..0,
780            subtree_end,
781        });
782        let parent = self.open.last().map(|f| f.arena_id);
783        self.parents.push(parent);
784        // Stake this node as a child of the currently-open frame.
785        self.pending.push(id);
786        id
787    }
788
789    fn open_container(&mut self, kind: NodeKind, range: Range<usize>, body_accum: Option<String>) {
790        let raw_start = range.start;
791        let id = self.alloc_node(kind, range);
792        let pending_start = u32::try_from(self.pending.len()).unwrap_or(u32::MAX);
793        self.open.push(OpenFrame {
794            arena_id: id,
795            pending_start,
796            raw_start,
797            body_accum,
798        });
799    }
800
801    fn close_container(&mut self, range: Range<usize>) {
802        let Some(frame) = self.open.pop() else {
803            return;
804        };
805        // Drain this frame's direct children out of `pending` and
806        // record them contiguously in `child_ids`.
807        let pending_start = frame.pending_start as usize;
808        let children_start = u32::try_from(self.child_ids.len()).unwrap_or(u32::MAX);
809        self.child_ids.extend(self.pending.drain(pending_start..));
810        let children_end = u32::try_from(self.child_ids.len()).unwrap_or(u32::MAX);
811        let subtree_end = u32::try_from(self.arena.len()).unwrap_or(u32::MAX);
812
813        let raw_range = frame.raw_start..range.end;
814        let node_is_list = matches!(
815            self.arena.get(frame.arena_id.idx()).map(|n| &n.kind),
816            Some(NodeKind::List { .. })
817        );
818
819        // Stamp children / subtree_end / raw_range / body first so
820        // the arena reflects the final structure before deriving list
821        // tightness from direct item children.
822        if let Some(node) = self.arena.get_mut(frame.arena_id.idx()) {
823            node.children = children_start..children_end;
824            node.subtree_end = subtree_end;
825            node.raw_range = raw_range;
826            // Stamp the accumulated body onto CodeBlock / HtmlBlock.
827            #[allow(clippy::wildcard_enum_match_arm)]
828            if let Some(body) = frame.body_accum {
829                match &mut node.kind {
830                    NodeKind::CodeBlock { body: dst, .. } => *dst = body,
831                    NodeKind::HtmlBlock { body: dst } => *dst = body,
832                    _ => {}
833                }
834            }
835        }
836
837        if node_is_list {
838            let list_tight = list_has_no_direct_paragraph_items(&self.arena, &self.child_ids, frame.arena_id);
839            if let Some(node) = self.arena.get_mut(frame.arena_id.idx())
840                && let NodeKind::List { tight, .. } = &mut node.kind
841            {
842                *tight = list_tight;
843            }
844        }
845    }
846
847    fn push_leaf(&mut self, kind: NodeKind, range: Range<usize>) {
848        self.alloc_node(kind, range);
849    }
850
851    /// Reclaim a leading `\` that pulldown-cmark consumed as a
852    /// backslash escape. Mirrors `crate::ir::Builder::push_prose`.
853    fn extend_for_backslash(&self, range: Range<usize>) -> Range<usize> {
854        if range.start > 0 {
855            let bytes = self.source.as_bytes();
856            if bytes.get(range.start.saturating_sub(1)) == Some(&b'\\') {
857                return range.start.saturating_sub(1)..range.end;
858            }
859        }
860        range
861    }
862
863    fn kind_for_start(&self, tag: &Tag<'a>, range: &Range<usize>) -> NodeKind {
864        match tag {
865            Tag::Paragraph => NodeKind::Paragraph,
866            Tag::Heading {
867                level,
868                id,
869                classes,
870                attrs,
871            } => {
872                let lvl = *level as u32;
873                // ATX headings start with `#` after optional leading
874                // space; setext headings start with the heading text.
875                let setext = first_non_whitespace_byte(self.source, range.start) != Some(b'#');
876                let parsed = build_heading_attrs(self.source, range, id.as_deref(), classes, attrs);
877                NodeKind::Heading {
878                    level: lvl,
879                    setext,
880                    attrs: parsed.map(Box::new),
881                }
882            }
883            Tag::BlockQuote(_) => NodeKind::BlockQuote,
884            Tag::CodeBlock(kind) => {
885                let (fenced, info) = match kind {
886                    CodeBlockKind::Fenced(s) => (true, s.to_string()),
887                    CodeBlockKind::Indented => (false, String::new()),
888                };
889                NodeKind::CodeBlock {
890                    fenced,
891                    info,
892                    body: String::new(),
893                }
894            }
895            Tag::HtmlBlock => NodeKind::HtmlBlock { body: String::new() },
896            Tag::List(start) => NodeKind::List {
897                ordered: start.is_some(),
898                start: start.unwrap_or(0),
899                tight: true,
900                marker_byte: derive_list_marker_byte(self.source, range.clone(), start.is_some()).unwrap_or(0),
901            },
902            Tag::Item => NodeKind::Item { task: None },
903            Tag::FootnoteDefinition(label) => NodeKind::FootnoteDefinition {
904                label: label.to_string(),
905            },
906            Tag::Table(aligns) => NodeKind::Table {
907                alignments: aligns.iter().copied().map(TableAlign::from_alignment).collect(),
908            },
909            Tag::TableHead => NodeKind::TableHead,
910            Tag::TableRow => NodeKind::TableRow,
911            Tag::TableCell => NodeKind::TableCell,
912            Tag::Emphasis => NodeKind::Emphasis,
913            Tag::Strong => NodeKind::Strong,
914            Tag::Strikethrough => NodeKind::Strikethrough,
915            Tag::Link {
916                link_type,
917                dest_url,
918                title,
919                id,
920            } => link_kind(*link_type, dest_url, title, id, /* is_image= */ false),
921            Tag::Image {
922                link_type,
923                dest_url,
924                title,
925                id,
926            } => link_kind(*link_type, dest_url, title, id, /* is_image= */ true),
927            Tag::Superscript => NodeKind::Unknown { tag: "Superscript" },
928            Tag::Subscript => NodeKind::Unknown { tag: "Subscript" },
929            Tag::DefinitionList => NodeKind::DefinitionList,
930            Tag::DefinitionListTitle => NodeKind::DefinitionTerm,
931            Tag::DefinitionListDefinition => NodeKind::DefinitionDescription,
932            Tag::MetadataBlock(_) => NodeKind::Unknown { tag: "MetadataBlock" },
933        }
934    }
935}
936
937/// Replace every reference-style [`NodeKind::Link`] / [`NodeKind::Image`]
938/// whose label fails to resolve against `refs` with [`NodeKind::Unknown`].
939/// `Unknown` is the formatter's "emit verbatim source" fallback, which is
940/// exactly the behaviour CM §4.7 prescribes for an unresolvable reference.
941#[allow(clippy::wildcard_enum_match_arm)] // many irrelevant NodeKind variants
942fn downgrade_unresolved_links(arena: &mut [Node], refs: &ReferenceTable) {
943    for node in arena.iter_mut() {
944        let (label_opt, is_image): (Option<&str>, bool) = match &node.kind {
945            NodeKind::Link { reference_label } => (reference_label.as_deref(), false),
946            NodeKind::Image { reference_label } => (reference_label.as_deref(), true),
947            _ => (None, false),
948        };
949        let Some(label) = label_opt else { continue };
950        if refs.resolve(label).is_some() {
951            continue;
952        }
953        let tag = if is_image { "Image" } else { "Link" };
954        node.kind = NodeKind::Unknown { tag };
955        // Drop the subtree's structural children: `Unknown` is emitted
956        // verbatim from `raw_text`, so the children must not be
957        // rendered separately. Clearing the children range is enough;
958        // the arena entries linger but become unreachable from the
959        // root.
960        node.children = 0..0;
961    }
962}
963
964fn link_kind(lt: LinkType, dest_url: &CowStr<'_>, title: &CowStr<'_>, id: &CowStr<'_>, is_image: bool) -> NodeKind {
965    let _ = (dest_url, title);
966    let reference_label = match lt {
967        LinkType::Autolink => {
968            return NodeKind::Autolink;
969        }
970        LinkType::Email => {
971            return NodeKind::Autolink;
972        }
973        LinkType::WikiLink { .. } => return NodeKind::Unknown { tag: "WikiLink" },
974        LinkType::Inline => None,
975        LinkType::Reference
976        | LinkType::ReferenceUnknown
977        | LinkType::Collapsed
978        | LinkType::CollapsedUnknown
979        | LinkType::Shortcut
980        | LinkType::ShortcutUnknown => Some(id.to_string()),
981    };
982    if is_image {
983        NodeKind::Image { reference_label }
984    } else {
985        NodeKind::Link { reference_label }
986    }
987}
988
989fn first_non_whitespace_byte(source: &str, start: usize) -> Option<u8> {
990    source
991        .as_bytes()
992        .get(start..)?
993        .iter()
994        .copied()
995        .find(|b| !matches!(b, b' ' | b'\t'))
996}
997
998/// Find the *list marker byte* anywhere in the source range.
999///
1000/// `Tag::List`'s reported range can include a parent container's
1001/// marker bytes when the inner separator is a tab: `>\t-` makes
1002/// pulldown emit a List with range `0..3`, so a naive
1003/// "first non-whitespace byte from `range.start`" scan returns `>`,
1004/// not `-` (`fuzz_blockquote_tab_list_marker.in`). Scanning for the
1005/// first byte matching the *legal* marker set is unambiguous because
1006/// pulldown already decided this is a list, and parent container
1007/// prefixes (`>`, `|`) are never list markers.
1008fn derive_list_marker_byte(source: &str, range: Range<usize>, ordered: bool) -> Option<u8> {
1009    source.as_bytes().get(range)?.iter().copied().find(|b| {
1010        if ordered {
1011            b.is_ascii_digit()
1012        } else {
1013            matches!(b, b'-' | b'*' | b'+')
1014        }
1015    })
1016}
1017
1018/// Widen `range` so its start sits at the beginning of the line that
1019/// contains `range.start`. Used by [`Builder::handle`] for indented
1020/// code blocks: pulldown's event range starts at the first content
1021/// byte, which loses the 4-space / tab prefix the block's identity
1022/// depends on.
1023fn widen_to_line_start(source: &str, range: Range<usize>) -> Range<usize> {
1024    let bytes = source.as_bytes();
1025    let mut start = range.start.min(bytes.len());
1026    while start > 0 && bytes.get(start.saturating_sub(1)).copied() != Some(b'\n') {
1027        start = start.saturating_sub(1);
1028    }
1029    start..range.end
1030}
1031
1032/// Like [`widen_to_line_start`] but only consumes ASCII space / tab
1033/// bytes. Stops at any other content (a non-whitespace byte before
1034/// the line start means the range is genuinely mid-line and should
1035/// not be extended). Used for `HtmlBlock` whose CM §4.6 opener allows
1036/// 0–3 spaces of leading indent that pulldown's HTML render includes
1037/// as part of the block.
1038fn widen_to_line_start_through_ws(source: &str, range: Range<usize>) -> Range<usize> {
1039    let bytes = source.as_bytes();
1040    let mut start = range.start.min(bytes.len());
1041    while start > 0 {
1042        match bytes.get(start.saturating_sub(1)).copied() {
1043            Some(b' ' | b'\t') => start = start.saturating_sub(1),
1044            Some(b'\n') | None => break,
1045            Some(_) => return range, // non-whitespace before line start: don't widen
1046        }
1047    }
1048    start..range.end
1049}
1050
1051/// Build a [`HeadingAttrs`] from the parsed pulldown fields when the
1052/// heading carried a `{ #id .class key=val }` trailer. Returns `None`
1053/// when all three field slots are empty (the no-trailer case).
1054///
1055/// The `source_trailer` field is recovered from the heading's source
1056/// bytes so preserve-default formatting can round-trip the trailer
1057/// verbatim.
1058fn build_heading_attrs(
1059    source: &str,
1060    range: &Range<usize>,
1061    id: Option<&str>,
1062    classes: &[CowStr<'_>],
1063    attrs: &[(CowStr<'_>, Option<CowStr<'_>>)],
1064) -> Option<HeadingAttrs> {
1065    if id.is_none() && classes.is_empty() && attrs.is_empty() {
1066        return None;
1067    }
1068    let raw = source.get(range.clone()).unwrap_or("");
1069    let trailer = find_attr_trailer_range(raw)
1070        .and_then(|r| raw.get(r))
1071        .unwrap_or("")
1072        .to_owned();
1073    Some(HeadingAttrs {
1074        id: id.map(str::to_owned),
1075        classes: classes.iter().map(|c| c.to_string()).collect(),
1076        attrs: attrs
1077            .iter()
1078            .map(|(k, v)| (k.to_string(), v.as_ref().map(|v| v.to_string())))
1079            .collect(),
1080        source_trailer: trailer,
1081    })
1082}
1083
1084fn list_has_no_direct_paragraph_items(arena: &[Node], child_ids: &[NodeId], list_id: NodeId) -> bool {
1085    let Some(list_node) = arena.get(list_id.idx()) else {
1086        return true;
1087    };
1088    for i in list_node.children.clone() {
1089        let Some(&item_id) = child_ids.get(i as usize) else {
1090            continue;
1091        };
1092        let Some(item_node) = arena.get(item_id.idx()) else {
1093            continue;
1094        };
1095        if !matches!(item_node.kind, NodeKind::Item { .. }) {
1096            continue;
1097        }
1098        if item_has_direct_paragraph(arena, child_ids, item_node) {
1099            return false;
1100        }
1101    }
1102    true
1103}
1104
1105fn item_has_direct_paragraph(arena: &[Node], child_ids: &[NodeId], item: &Node) -> bool {
1106    for j in item.children.clone() {
1107        let Some(&cid) = child_ids.get(j as usize) else {
1108            continue;
1109        };
1110        if matches!(arena.get(cid.idx()).map(|n| &n.kind), Some(NodeKind::Paragraph)) {
1111            return true;
1112        }
1113    }
1114    false
1115}
1116
1117#[cfg(test)]
1118#[allow(clippy::expect_used)]
1119mod tests {
1120    use super::*;
1121    use crate::ir::Ir;
1122
1123    #[test]
1124    fn empty_doc_has_root_only() {
1125        let ir = Ir::parse_str("");
1126        let tree = &ir.tree;
1127        assert_eq!(tree.root(), NodeId(0));
1128        assert!(tree.is_empty());
1129        assert!(matches!(
1130            tree.node(tree.root()).map(|n| &n.kind),
1131            Some(NodeKind::Document)
1132        ));
1133    }
1134
1135    #[test]
1136    fn paragraph_and_text_present() {
1137        let ir = Ir::parse_str("Hello world\n");
1138        let tree = &ir.tree;
1139        let kinds: Vec<&NodeKind> = tree
1140            .descendants(tree.root())
1141            .filter_map(|id| tree.node(id).map(|n| &n.kind))
1142            .collect();
1143        assert!(kinds.iter().any(|k| matches!(k, NodeKind::Paragraph)));
1144        assert!(kinds.iter().any(|k| matches!(k, NodeKind::Run)));
1145    }
1146
1147    #[test]
1148    fn raw_ranges_are_well_formed() {
1149        let src = "# Title\n\nA paragraph.\n\n- one\n- two\n";
1150        let ir = Ir::parse_str(src);
1151        let tree = &ir.tree;
1152        for id in tree.descendants(tree.root()) {
1153            let n = tree.node(id).expect("descendants only yields valid ids");
1154            assert!(n.raw_range.start <= n.raw_range.end);
1155            assert!(n.raw_range.end <= src.len());
1156        }
1157    }
1158
1159    #[test]
1160    fn raw_range_covers_leading_sigil_per_block_kind() {
1161        // Verbatim emission relies on `raw_text(id)` containing every
1162        // block's full lexical extent, including the opening sigil
1163        // (`#`, `>`, `-`, fence markers, indented-code prefix). A
1164        // regression that drops the sigil would cause verbatim
1165        // emission to silently lose information.
1166        let src = "\
1167# Heading
1168> quote
1169- list item
1170```rust
1171let x = 1;
1172```
1173    indented
1174---
1175<!-- html block -->
1176";
1177        let ir = Ir::parse_str(src);
1178        let tree = &ir.tree;
1179        for id in tree.descendants(tree.root()) {
1180            let n = tree.node(id).expect("descendants yields valid ids");
1181            let raw = tree.raw_text(src, id);
1182            #[allow(clippy::wildcard_enum_match_arm)]
1183            match &n.kind {
1184                NodeKind::Heading { setext: false, .. } => {
1185                    assert!(raw.starts_with('#'), "ATX heading missing `#`: {raw:?}");
1186                }
1187                NodeKind::BlockQuote => {
1188                    assert!(raw.starts_with('>'), "blockquote missing `>`: {raw:?}");
1189                }
1190                NodeKind::List { .. } => {
1191                    let first = raw.bytes().next().expect("non-empty list raw_text");
1192                    assert!(
1193                        matches!(first, b'-' | b'*' | b'+' | b'0'..=b'9'),
1194                        "list missing bullet: {raw:?}",
1195                    );
1196                }
1197                NodeKind::CodeBlock { fenced: true, .. } => {
1198                    assert!(
1199                        raw.starts_with("```") || raw.starts_with("~~~"),
1200                        "fenced code block missing opening fence: {raw:?}",
1201                    );
1202                    assert!(
1203                        raw.trim_end_matches('\n').ends_with("```") || raw.trim_end_matches('\n').ends_with("~~~"),
1204                        "fenced code block missing closing fence: {raw:?}",
1205                    );
1206                }
1207                NodeKind::CodeBlock { fenced: false, .. } => {
1208                    assert!(
1209                        raw.starts_with("    ") || raw.starts_with('\t'),
1210                        "indented code block missing 4-space prefix: {raw:?}",
1211                    );
1212                }
1213                NodeKind::HtmlBlock { .. } => {
1214                    assert!(raw.starts_with('<'), "HTML block missing `<`: {raw:?}");
1215                }
1216                NodeKind::ThematicBreak => {
1217                    let first = raw.bytes().next().expect("non-empty thematic break");
1218                    assert!(
1219                        matches!(first, b'-' | b'*' | b'_'),
1220                        "thematic break missing marker: {raw:?}",
1221                    );
1222                }
1223                _ => {}
1224            }
1225        }
1226    }
1227
1228    #[test]
1229    fn child_raw_range_is_contained_in_parent() {
1230        // Containment is the structural form of "every node's source
1231        // bytes lie inside its parent's source bytes". Required for
1232        // verbatim emission to compose: a parent emitted verbatim
1233        // wholly covers all of its children, so nested re-emission
1234        // would double-print but never lose information.
1235        let src = "# H\n\n> quote with *em*\n\n- item one\n- item two\n";
1236        let ir = Ir::parse_str(src);
1237        let tree = &ir.tree;
1238        for id in tree.descendants(tree.root()) {
1239            if id == tree.root() {
1240                continue;
1241            }
1242            let Some(parent_id) = tree.parent(id) else {
1243                continue;
1244            };
1245            let child = tree.node(id).expect("valid child id");
1246            let parent = tree.node(parent_id).expect("valid parent id");
1247            assert!(
1248                parent.raw_range.start <= child.raw_range.start && child.raw_range.end <= parent.raw_range.end,
1249                "child {:?} {:?} outside parent {:?} {:?}",
1250                child.kind,
1251                child.raw_range,
1252                parent.kind,
1253                parent.raw_range,
1254            );
1255        }
1256    }
1257
1258    #[test]
1259    fn parent_chain_terminates_at_root() {
1260        let ir = Ir::parse_str("> a quote\n");
1261        let tree = &ir.tree;
1262        let last = NodeId(u32::try_from(tree.len().saturating_sub(1)).unwrap_or(0));
1263        let mut cur = last;
1264        let mut steps: u32 = 0;
1265        while let Some(p) = tree.parent(cur) {
1266            cur = p;
1267            steps = steps.saturating_add(1);
1268            assert!(steps < 32, "walk did not terminate");
1269        }
1270        assert_eq!(cur, tree.root());
1271        assert!(tree.parent(tree.root()).is_none());
1272    }
1273
1274    fn find_list_tight(tree: &Tree) -> Option<bool> {
1275        tree.descendants(tree.root())
1276            .find_map(|id| match tree.node(id).map(|n| &n.kind) {
1277                Some(NodeKind::List { tight, .. }) => Some(*tight),
1278                _ => None,
1279            })
1280    }
1281
1282    #[test]
1283    fn tight_list_one_text_child() {
1284        let ir = Ir::parse_str("- one\n- two\n");
1285        assert_eq!(find_list_tight(&ir.tree), Some(true));
1286    }
1287
1288    #[test]
1289    fn loose_list_with_blank_line_between_items() {
1290        let ir = Ir::parse_str("- one\n\n- two\n");
1291        assert_eq!(find_list_tight(&ir.tree), Some(false));
1292    }
1293
1294    #[test]
1295    fn nested_blockquote_under_list() {
1296        let ir = Ir::parse_str("- item\n\n  > quote\n");
1297        let tree = &ir.tree;
1298        let bq = tree
1299            .descendants(tree.root())
1300            .find(|&id| matches!(tree.node(id).map(|n| &n.kind), Some(NodeKind::BlockQuote)));
1301        assert!(bq.is_some(), "blockquote nested under list item");
1302    }
1303
1304    #[test]
1305    fn reference_link_records_label() {
1306        let src = "[foo][bar]\n\n[bar]: https://example.com\n";
1307        let ir = Ir::parse_str(src);
1308        let tree = &ir.tree;
1309        let label = tree
1310            .descendants(tree.root())
1311            .find_map(|id| match tree.node(id).map(|n| &n.kind) {
1312                Some(NodeKind::Link { reference_label }) => reference_label.clone(),
1313                _ => None,
1314            })
1315            .expect("link present");
1316        assert_eq!(label, "bar");
1317    }
1318
1319    #[test]
1320    fn link_reference_definitions_appear_in_reference_table() {
1321        // Defs only enter the table when at least one reference uses
1322        // them (the new pulldown-event-driven resolver dropped the
1323        // "emit unused defs verbatim" behaviour because unused defs
1324        // never affect HTML output anyway).
1325        let src = "[a]: https://a.example\n[b]: https://b.example\n\n[a] and [b].\n";
1326        let ir = Ir::parse_str(src);
1327        let mut labels: Vec<String> = ir.refs.iter().map(|t| t.label_raw.clone()).collect();
1328        labels.sort();
1329        assert_eq!(labels, vec!["a".to_owned(), "b".to_owned()]);
1330    }
1331
1332    #[test]
1333    fn autolink_preserves_url() {
1334        let ir = Ir::parse_str("<https://example.com>\n");
1335        let tree = &ir.tree;
1336        let has_autolink = tree
1337            .descendants(tree.root())
1338            .find_map(|id| match tree.node(id).map(|n| &n.kind) {
1339                Some(NodeKind::Autolink) => Some(true),
1340                _ => None,
1341            })
1342            .expect("autolink present");
1343        assert!(has_autolink);
1344    }
1345
1346    #[test]
1347    fn task_list_marker_sets_item_task() {
1348        let ir = Ir::parse_str("- [x] done\n- [ ] todo\n");
1349        let tree = &ir.tree;
1350        let items: Vec<Option<bool>> = tree
1351            .descendants(tree.root())
1352            .filter_map(|id| match tree.node(id).map(|n| &n.kind) {
1353                Some(NodeKind::Item { task }) => Some(*task),
1354                _ => None,
1355            })
1356            .collect();
1357        assert_eq!(items, vec![Some(true), Some(false)]);
1358    }
1359
1360    #[test]
1361    fn code_block_info_string() {
1362        let ir = Ir::parse_str("```rust\nfn x() {}\n```\n");
1363        let tree = &ir.tree;
1364        let info = tree
1365            .descendants(tree.root())
1366            .find_map(|id| match tree.node(id).map(|n| &n.kind) {
1367                Some(NodeKind::CodeBlock { fenced: true, info, .. }) => Some(info.clone()),
1368                _ => None,
1369            })
1370            .expect("fenced code block");
1371        assert_eq!(info, "rust");
1372    }
1373}