Skip to main content

mdwright_document/
format_facts.rs

1//! Source-coordinate facts used by formatter rewrite passes.
2//!
3//! The formatter needs byte ranges for recognised Markdown constructs,
4//! but it should not know how `pulldown-cmark` exposes those ranges.
5//! This module converts parser events into small domain records.
6
7#![allow(
8    clippy::wildcard_enum_match_arm,
9    reason = "document fact queries filter pulldown events and intentionally ignore unrelated variants"
10)]
11
12use std::ops::Range;
13
14use pulldown_cmark::{Event, Tag, TagEnd};
15
16use crate::gfm::AutolinkFact;
17use crate::heading::find_attr_trailer_range;
18use crate::ir::{CodeBlock, HtmlBlock};
19use crate::refs::NormalisedLabel;
20use crate::tree::{NodeKind, TableAlign, Tree};
21use crate::{Document, HeadingAttrs};
22use mdwright_math::MathRegion;
23
24/// Structural owner kinds with source ranges.
25#[derive(Clone, Copy, Debug, PartialEq, Eq)]
26pub enum StructuralKind {
27    Paragraph,
28    Heading,
29    BlockQuote,
30    List,
31    ListItem,
32    DefinitionList,
33    DefinitionDescription,
34    FootnoteDefinition,
35    ThematicBreak,
36    Table,
37}
38
39/// A recognised block/container range.
40#[derive(Clone, Debug)]
41pub struct StructuralSpan {
42    kind: StructuralKind,
43    raw_range: Range<usize>,
44}
45
46impl StructuralSpan {
47    #[must_use]
48    pub fn kind(&self) -> StructuralKind {
49        self.kind
50    }
51
52    #[must_use]
53    pub fn raw_range(&self) -> Range<usize> {
54        self.raw_range.clone()
55    }
56}
57
58/// Inline delimiter kind.
59#[derive(Clone, Copy, Debug, PartialEq, Eq)]
60pub enum InlineDelimiterKind {
61    Emphasis,
62    Strong,
63}
64
65/// Delimiter byte slots for one inline span.
66#[derive(Clone, Debug)]
67pub struct InlineDelimiterSlot {
68    pair: usize,
69    kind: InlineDelimiterKind,
70    open_lo: usize,
71    open_hi: usize,
72    close_lo: usize,
73    close_hi: usize,
74}
75
76impl InlineDelimiterSlot {
77    #[must_use]
78    pub fn pair(&self) -> usize {
79        self.pair
80    }
81
82    #[must_use]
83    pub fn kind(&self) -> InlineDelimiterKind {
84        self.kind
85    }
86
87    #[must_use]
88    pub fn open_range(&self) -> Range<usize> {
89        self.open_lo..self.open_hi
90    }
91
92    #[must_use]
93    pub fn close_range(&self) -> Range<usize> {
94        self.close_lo..self.close_hi
95    }
96}
97
98/// One unordered list item marker.
99#[derive(Clone, Debug)]
100pub struct UnorderedListMarkerSite {
101    marker: usize,
102}
103
104impl UnorderedListMarkerSite {
105    #[must_use]
106    pub fn marker_range(&self) -> Range<usize> {
107        self.marker..self.marker.saturating_add(1)
108    }
109}
110
111/// One ordered list item marker.
112#[derive(Clone, Debug)]
113pub struct OrderedListMarkerSite {
114    marker_lo: usize,
115    marker_hi: usize,
116    start_number: u64,
117    ordinal: usize,
118}
119
120impl OrderedListMarkerSite {
121    #[must_use]
122    pub fn marker_range(&self) -> Range<usize> {
123        self.marker_lo..self.marker_hi
124    }
125
126    #[must_use]
127    pub fn start_number(&self) -> u64 {
128        self.start_number
129    }
130
131    #[must_use]
132    pub fn ordinal(&self) -> usize {
133        self.ordinal
134    }
135}
136
137/// An ATX heading attribute trailer.
138#[derive(Clone, Debug)]
139pub struct HeadingAttrSite {
140    attrs: HeadingAttrs,
141    trailer: Range<usize>,
142}
143
144impl HeadingAttrSite {
145    #[must_use]
146    pub fn attrs(&self) -> &HeadingAttrs {
147        &self.attrs
148    }
149
150    #[must_use]
151    pub fn trailer(&self) -> Range<usize> {
152        self.trailer.clone()
153    }
154}
155
156/// An inline link or image destination byte slot.
157#[derive(Clone, Debug)]
158pub struct InlineLinkDestinationSlot {
159    range: Range<usize>,
160}
161
162impl InlineLinkDestinationSlot {
163    #[must_use]
164    pub fn range(&self) -> Range<usize> {
165        self.range.clone()
166    }
167}
168
169/// A link-reference definition destination byte range.
170#[derive(Clone, Debug)]
171pub struct ReferenceDefinitionSite {
172    raw_range: Range<usize>,
173    destination: Range<usize>,
174}
175
176impl ReferenceDefinitionSite {
177    #[must_use]
178    pub fn raw_range(&self) -> Range<usize> {
179        self.raw_range.clone()
180    }
181
182    #[must_use]
183    pub fn destination(&self) -> Range<usize> {
184        self.destination.clone()
185    }
186}
187
188/// One GFM table source range and its source rows.
189#[derive(Clone, Debug)]
190pub struct TableSite {
191    raw_range: Range<usize>,
192    alignments: Vec<TableAlign>,
193    rows: Vec<TableRowSite>,
194}
195
196impl TableSite {
197    #[must_use]
198    pub fn raw_range(&self) -> Range<usize> {
199        self.raw_range.clone()
200    }
201
202    #[must_use]
203    pub fn alignments(&self) -> &[TableAlign] {
204        &self.alignments
205    }
206
207    #[must_use]
208    pub fn rows(&self) -> &[TableRowSite] {
209        &self.rows
210    }
211}
212
213/// One raw table line.
214#[derive(Clone, Debug)]
215pub struct TableRowSite {
216    raw_range: Range<usize>,
217    cells: Vec<TableCellSite>,
218}
219
220impl TableRowSite {
221    #[must_use]
222    pub fn raw_range(&self) -> Range<usize> {
223        self.raw_range.clone()
224    }
225
226    #[must_use]
227    pub fn cells(&self) -> &[TableCellSite] {
228        &self.cells
229    }
230}
231
232/// One table cell's source range inside a raw table line.
233#[derive(Clone, Debug)]
234pub struct TableCellSite {
235    raw_range: Range<usize>,
236}
237
238impl TableCellSite {
239    #[must_use]
240    pub fn raw_range(&self) -> Range<usize> {
241        self.raw_range.clone()
242    }
243}
244
245/// A paragraph range with the inline facts needed by the wrap pass.
246#[derive(Clone, Debug)]
247pub struct WrappableParagraph {
248    line_lo: usize,
249    line_hi: usize,
250    content_lo: usize,
251    content_hi: usize,
252    owner_kind: StructuralKind,
253    first_prefix: String,
254    cont_prefix: String,
255    list_four_space_cont_prefix: Option<String>,
256    atomics: Vec<Range<usize>>,
257    hard_breaks: Vec<ParagraphHardBreak>,
258}
259
260impl WrappableParagraph {
261    #[must_use]
262    pub fn line_range(&self) -> Range<usize> {
263        self.line_lo..self.line_hi
264    }
265
266    #[must_use]
267    pub fn content_range(&self) -> Range<usize> {
268        self.content_lo..self.content_hi
269    }
270
271    #[must_use]
272    pub fn owner_kind(&self) -> StructuralKind {
273        self.owner_kind
274    }
275
276    #[must_use]
277    pub fn first_prefix(&self) -> &str {
278        &self.first_prefix
279    }
280
281    #[must_use]
282    pub fn cont_prefix(&self) -> &str {
283        &self.cont_prefix
284    }
285
286    #[must_use]
287    pub fn list_four_space_cont_prefix(&self) -> Option<&str> {
288        self.list_four_space_cont_prefix.as_deref()
289    }
290
291    #[must_use]
292    pub fn atomics(&self) -> &[Range<usize>] {
293        &self.atomics
294    }
295
296    #[must_use]
297    pub fn hard_breaks(&self) -> &[ParagraphHardBreak] {
298        &self.hard_breaks
299    }
300}
301
302#[derive(Clone, Debug)]
303pub struct ParagraphHardBreak {
304    marker_lo: usize,
305    nl: usize,
306    marker: &'static str,
307}
308
309impl ParagraphHardBreak {
310    #[must_use]
311    pub fn marker_start(&self) -> usize {
312        self.marker_lo
313    }
314
315    #[must_use]
316    pub fn newline(&self) -> usize {
317        self.nl
318    }
319
320    #[must_use]
321    pub fn marker(&self) -> &'static str {
322        self.marker
323    }
324}
325
326/// Cached source-coordinate facts consumed by formatter rewrite passes.
327#[derive(Clone, Debug, Default)]
328pub(crate) struct FormatFacts {
329    structural_spans: Vec<StructuralSpan>,
330    emphasis_delimiter_slots: Vec<InlineDelimiterSlot>,
331    strong_delimiter_slots: Vec<InlineDelimiterSlot>,
332    unordered_list_marker_sites: Vec<UnorderedListMarkerSite>,
333    ordered_list_marker_sites: Vec<OrderedListMarkerSite>,
334    thematic_break_ranges: Vec<Range<usize>>,
335    heading_attr_sites: Vec<HeadingAttrSite>,
336    inline_link_destination_slots: Vec<InlineLinkDestinationSlot>,
337    reference_definition_sites: Vec<ReferenceDefinitionSite>,
338    table_sites: Vec<TableSite>,
339    wrappable_paragraphs: Vec<WrappableParagraph>,
340}
341
342impl FormatFacts {
343    pub(crate) fn from_parts(
344        source: &str,
345        events: &[(Event<'_>, Range<usize>)],
346        autolinks: &[AutolinkFact],
347        math_regions: &[MathRegion],
348        code_blocks: &[CodeBlock],
349        html_blocks: &[HtmlBlock],
350        tree: &Tree,
351    ) -> Self {
352        Self {
353            structural_spans: structural_spans(events),
354            emphasis_delimiter_slots: inline_delimiter_slots(source, events, InlineDelimiterKind::Emphasis),
355            strong_delimiter_slots: inline_delimiter_slots(source, events, InlineDelimiterKind::Strong),
356            unordered_list_marker_sites: unordered_list_marker_sites(source, events),
357            ordered_list_marker_sites: ordered_list_marker_sites(source, events),
358            thematic_break_ranges: thematic_break_ranges(source, events),
359            heading_attr_sites: heading_attr_sites(source, events),
360            inline_link_destination_slots: inline_link_destination_slots(source, events),
361            reference_definition_sites: reference_definition_sites(source, code_blocks, html_blocks),
362            table_sites: table_sites(source, tree),
363            wrappable_paragraphs: wrappable_paragraphs(source, events, autolinks, math_regions),
364        }
365    }
366}
367
368impl Document {
369    /// Recognised block/container ranges used as rewrite owners.
370    #[must_use]
371    pub fn structural_spans(&self) -> &[StructuralSpan] {
372        &self.format_facts().structural_spans
373    }
374
375    /// Inline emphasis/strong delimiter slots.
376    #[must_use]
377    pub fn inline_delimiter_slots(&self, kind: InlineDelimiterKind) -> &[InlineDelimiterSlot] {
378        match kind {
379            InlineDelimiterKind::Emphasis => &self.format_facts().emphasis_delimiter_slots,
380            InlineDelimiterKind::Strong => &self.format_facts().strong_delimiter_slots,
381        }
382    }
383
384    /// Unordered list item marker sites.
385    #[must_use]
386    pub fn unordered_list_marker_sites(&self) -> &[UnorderedListMarkerSite] {
387        &self.format_facts().unordered_list_marker_sites
388    }
389
390    /// Ordered list item marker digit sites.
391    #[must_use]
392    pub fn ordered_list_marker_sites(&self) -> &[OrderedListMarkerSite] {
393        &self.format_facts().ordered_list_marker_sites
394    }
395
396    /// Thematic break source line ranges.
397    #[must_use]
398    pub fn thematic_break_ranges(&self) -> &[Range<usize>] {
399        &self.format_facts().thematic_break_ranges
400    }
401
402    /// Heading attribute trailer sites.
403    #[must_use]
404    pub fn heading_attr_sites(&self) -> &[HeadingAttrSite] {
405        &self.format_facts().heading_attr_sites
406    }
407
408    /// Inline link/image destination slots.
409    #[must_use]
410    pub fn inline_link_destination_slots(&self) -> &[InlineLinkDestinationSlot] {
411        &self.format_facts().inline_link_destination_slots
412    }
413
414    /// Reference-definition destination ranges.
415    #[must_use]
416    pub fn reference_definition_sites(&self) -> &[ReferenceDefinitionSite] {
417        &self.format_facts().reference_definition_sites
418    }
419
420    /// GFM table source rows and cell ranges.
421    #[must_use]
422    pub fn table_sites(&self) -> &[TableSite] {
423        &self.format_facts().table_sites
424    }
425
426    /// Paragraph ranges and inline atomics for the wrap pass.
427    #[must_use]
428    pub fn wrappable_paragraphs(&self) -> &[WrappableParagraph] {
429        &self.format_facts().wrappable_paragraphs
430    }
431}
432
433fn structural_spans(events: &[(Event<'_>, Range<usize>)]) -> Vec<StructuralSpan> {
434    let mut out = Vec::new();
435    for (event, range) in events {
436        match event {
437            Event::Start(Tag::Paragraph) => out.push(StructuralSpan {
438                kind: StructuralKind::Paragraph,
439                raw_range: range.clone(),
440            }),
441            Event::Start(Tag::Heading { .. }) => out.push(StructuralSpan {
442                kind: StructuralKind::Heading,
443                raw_range: range.clone(),
444            }),
445            Event::Start(Tag::BlockQuote(_)) => out.push(StructuralSpan {
446                kind: StructuralKind::BlockQuote,
447                raw_range: range.clone(),
448            }),
449            Event::Start(Tag::List(_)) => out.push(StructuralSpan {
450                kind: StructuralKind::List,
451                raw_range: range.clone(),
452            }),
453            Event::Start(Tag::Item) => out.push(StructuralSpan {
454                kind: StructuralKind::ListItem,
455                raw_range: range.clone(),
456            }),
457            Event::Start(Tag::FootnoteDefinition(_)) => out.push(StructuralSpan {
458                kind: StructuralKind::FootnoteDefinition,
459                raw_range: range.clone(),
460            }),
461            Event::Start(Tag::Table(_)) => out.push(StructuralSpan {
462                kind: StructuralKind::Table,
463                raw_range: range.clone(),
464            }),
465            Event::Start(Tag::DefinitionList) => out.push(StructuralSpan {
466                kind: StructuralKind::DefinitionList,
467                raw_range: range.clone(),
468            }),
469            Event::Start(Tag::DefinitionListDefinition) => out.push(StructuralSpan {
470                kind: StructuralKind::DefinitionDescription,
471                raw_range: range.clone(),
472            }),
473            Event::Rule => out.push(StructuralSpan {
474                kind: StructuralKind::ThematicBreak,
475                raw_range: range.clone(),
476            }),
477            _ => {}
478        }
479    }
480    out
481}
482
483fn inline_delimiter_slots(
484    source: &str,
485    events: &[(Event<'_>, Range<usize>)],
486    kind: InlineDelimiterKind,
487) -> Vec<InlineDelimiterSlot> {
488    let mut starts: Vec<usize> = Vec::new();
489    let mut slots: Vec<InlineDelimiterSlot> = Vec::new();
490    let delim_len = match kind {
491        InlineDelimiterKind::Emphasis => 1,
492        InlineDelimiterKind::Strong => 2,
493    };
494    let bytes = source.as_bytes();
495    for (ev, range) in events {
496        if delimiter_matches_start(ev, kind) {
497            starts.push(range.start);
498        } else if delimiter_matches_end(ev, kind) {
499            let Some(open_lo) = starts.pop() else { continue };
500            let close_hi = range.end;
501            if close_hi < delim_len {
502                continue;
503            }
504            let close_lo = close_hi.saturating_sub(delim_len);
505            let open_hi = open_lo.saturating_add(delim_len);
506            if open_hi > close_lo {
507                continue;
508            }
509            let Some(open) = bytes.get(open_lo..open_hi) else {
510                continue;
511            };
512            let Some(close) = bytes.get(close_lo..close_hi) else {
513                continue;
514            };
515            if !is_emphasis_delim_run(open) || !is_emphasis_delim_run(close) {
516                continue;
517            }
518            slots.push(InlineDelimiterSlot {
519                pair: slots.len(),
520                kind,
521                open_lo,
522                open_hi,
523                close_lo,
524                close_hi,
525            });
526        }
527    }
528    slots
529}
530
531fn unordered_list_marker_sites(source: &str, events: &[(Event<'_>, Range<usize>)]) -> Vec<UnorderedListMarkerSite> {
532    let bytes = source.as_bytes();
533    let mut stack: Vec<bool> = Vec::new();
534    let mut completed = Vec::new();
535    for (ev, range) in events {
536        match ev {
537            Event::Start(Tag::List(start)) => {
538                stack.push(start.is_none());
539            }
540            Event::End(TagEnd::List(_)) => {
541                stack.pop();
542            }
543            Event::Start(Tag::Item) => {
544                let Some(unordered) = stack.last().copied() else {
545                    continue;
546                };
547                if unordered && let Some(marker) = find_unordered_bullet(bytes, range.start, range.end) {
548                    completed.push(UnorderedListMarkerSite { marker });
549                }
550            }
551            _ => {}
552        }
553    }
554    completed
555}
556
557#[derive(Clone, Debug)]
558struct OrderedListFrame {
559    start_number: u64,
560    next_ordinal: usize,
561}
562
563fn ordered_list_marker_sites(source: &str, events: &[(Event<'_>, Range<usize>)]) -> Vec<OrderedListMarkerSite> {
564    let bytes = source.as_bytes();
565    let mut stack: Vec<Option<OrderedListFrame>> = Vec::new();
566    let mut completed = Vec::new();
567    for (ev, range) in events {
568        match ev {
569            Event::Start(Tag::List(start)) => {
570                stack.push(start.map(|start_number| OrderedListFrame {
571                    start_number,
572                    next_ordinal: 0,
573                }));
574            }
575            Event::End(TagEnd::List(_)) => {
576                stack.pop();
577            }
578            Event::Start(Tag::Item) => {
579                let Some(Some(frame)) = stack.last_mut() else {
580                    continue;
581                };
582                if let Some((marker_lo, marker_hi)) = find_ordered_marker_digits(bytes, range.start, range.end) {
583                    completed.push(OrderedListMarkerSite {
584                        marker_lo,
585                        marker_hi,
586                        start_number: frame.start_number,
587                        ordinal: frame.next_ordinal,
588                    });
589                    frame.next_ordinal = frame.next_ordinal.saturating_add(1);
590                }
591            }
592            _ => {}
593        }
594    }
595    completed
596}
597
598fn thematic_break_ranges(source: &str, events: &[(Event<'_>, Range<usize>)]) -> Vec<Range<usize>> {
599    let mut sites = Vec::new();
600    let bytes = source.as_bytes();
601    for (ev, range) in events {
602        if matches!(ev, Event::Rule) {
603            let mut hi = range.end.min(bytes.len());
604            while hi > range.start
605                && matches!(
606                    bytes.get(hi.saturating_sub(1)).copied(),
607                    Some(b' ' | b'\t' | 0x0c | b'\n' | b'\r')
608                )
609            {
610                hi = hi.saturating_sub(1);
611            }
612            sites.push(range.start..hi);
613        }
614    }
615    sites
616}
617
618fn heading_attr_sites(source: &str, events: &[(Event<'_>, Range<usize>)]) -> Vec<HeadingAttrSite> {
619    let mut sites = Vec::new();
620    for (ev, range) in events {
621        if let Event::Start(Tag::Heading { id, classes, attrs, .. }) = ev
622            && (id.is_some() || !classes.is_empty() || !attrs.is_empty())
623        {
624            let heading_attrs = HeadingAttrs {
625                id: id.as_ref().map(std::string::ToString::to_string),
626                classes: classes.iter().map(std::string::ToString::to_string).collect(),
627                attrs: attrs
628                    .iter()
629                    .map(|(k, v)| (k.to_string(), v.as_ref().map(std::string::ToString::to_string)))
630                    .collect(),
631                source_trailer: String::new(),
632            };
633            let Some(slice) = source.get(range.clone()) else {
634                continue;
635            };
636            if let Some(trailer) = find_attr_trailer_range(slice) {
637                sites.push(HeadingAttrSite {
638                    attrs: heading_attrs,
639                    trailer: range.start.saturating_add(trailer.start)..range.start.saturating_add(trailer.end),
640                });
641            }
642        }
643    }
644    sites
645}
646
647fn inline_link_destination_slots(source: &str, events: &[(Event<'_>, Range<usize>)]) -> Vec<InlineLinkDestinationSlot> {
648    let bytes = source.as_bytes();
649    let mut sites = Vec::new();
650    let mut link_stack = Vec::new();
651    for (ev, range) in events {
652        match ev {
653            Event::Start(Tag::Link { .. } | Tag::Image { .. }) => link_stack.push(range.start),
654            Event::End(TagEnd::Link | TagEnd::Image) => {
655                let Some(open) = link_stack.pop() else { continue };
656                if let Some((lo, hi)) = find_inline_dest_range(bytes, open, range.end) {
657                    sites.push(InlineLinkDestinationSlot { range: lo..hi });
658                }
659            }
660            _ => {}
661        }
662    }
663    sites
664}
665
666fn reference_definition_sites(
667    source: &str,
668    code_blocks: &[CodeBlock],
669    html_blocks: &[HtmlBlock],
670) -> Vec<ReferenceDefinitionSite> {
671    let excluded = excluded_block_ranges(code_blocks, html_blocks);
672    let mut seen = std::collections::HashSet::new();
673    let bytes = source.as_bytes();
674    let mut sites = Vec::new();
675    let mut line_start = 0usize;
676    while line_start <= bytes.len() {
677        let line_end = bytes
678            .get(line_start..)
679            .and_then(|tail| tail.iter().position(|&b| b == b'\n'))
680            .map_or(bytes.len(), |p| line_start.saturating_add(p));
681        if !range_start_is_excluded(line_start, &excluded)
682            && let Some(site) = parse_ref_def_line(bytes, line_start, line_end)
683            && let Some(norm) = NormalisedLabel::from_raw(&site.label)
684            && seen.insert(norm)
685        {
686            sites.push(ReferenceDefinitionSite {
687                raw_range: line_start..line_end,
688                destination: site.dest,
689            });
690        }
691        if line_end == bytes.len() {
692            break;
693        }
694        line_start = line_end.saturating_add(1);
695    }
696    sites
697}
698
699fn table_sites(source: &str, tree: &Tree) -> Vec<TableSite> {
700    let mut sites = Vec::new();
701    for id in tree.descendants(tree.root()) {
702        let Some(node) = tree.node(id) else { continue };
703        let NodeKind::Table { alignments } = &node.kind else {
704            continue;
705        };
706        let rows = table_rows(source, node.raw_range.clone());
707        if rows.len() >= 2 {
708            sites.push(TableSite {
709                raw_range: node.raw_range.clone(),
710                alignments: alignments.clone(),
711                rows,
712            });
713        }
714    }
715    sites
716}
717
718fn wrappable_paragraphs(
719    source: &str,
720    events: &[(Event<'_>, Range<usize>)],
721    autolinks: &[AutolinkFact],
722    math_regions: &[MathRegion],
723) -> Vec<WrappableParagraph> {
724    let mut paragraphs = Vec::new();
725    let bytes = source.as_bytes();
726    let mut current: Option<PartialParagraph> = None;
727    let mut paragraph_depth: u32 = 0;
728    let mut prose_container_depth: u32 = 0;
729
730    for (ev, range) in events {
731        match ev {
732            Event::Start(Tag::Paragraph) => {
733                if paragraph_depth == 0 {
734                    current = Some(PartialParagraph::new(range.clone()));
735                }
736                paragraph_depth = paragraph_depth.saturating_add(1);
737            }
738            Event::End(TagEnd::Paragraph) => {
739                paragraph_depth = paragraph_depth.saturating_sub(1);
740                if paragraph_depth == 0
741                    && let Some(p) = current.take()
742                    && let Some(finished) = p.finish(bytes, autolinks, math_regions)
743                {
744                    paragraphs.push(finished);
745                }
746            }
747            Event::Start(Tag::Item | Tag::DefinitionListDefinition | Tag::FootnoteDefinition(_)) => {
748                prose_container_depth = prose_container_depth.saturating_add(1);
749            }
750            Event::End(TagEnd::Item | TagEnd::DefinitionListDefinition | TagEnd::FootnoteDefinition) => {
751                prose_container_depth = prose_container_depth.saturating_sub(1);
752                if let Some(p) = current.take()
753                    && let Some(finished) = p.finish(bytes, autolinks, math_regions)
754                {
755                    paragraphs.push(finished);
756                }
757            }
758            Event::Start(
759                Tag::CodeBlock(_)
760                | Tag::HtmlBlock
761                | Tag::Heading { .. }
762                | Tag::BlockQuote(_)
763                | Tag::List(_)
764                | Tag::Table(_)
765                | Tag::DefinitionList
766                | Tag::DefinitionListTitle
767                | Tag::MetadataBlock(_),
768            ) => {
769                if let Some(p) = current.take()
770                    && let Some(finished) = p.finish(bytes, autolinks, math_regions)
771                {
772                    paragraphs.push(finished);
773                }
774            }
775            Event::Text(_) => {
776                if current.is_none() && paragraph_depth == 0 && prose_container_depth > 0 {
777                    current = Some(PartialParagraph::new(range.clone()));
778                }
779                if let Some(p) = current.as_mut()
780                    && range.end > p.content_hi
781                {
782                    p.content_hi = range.end;
783                }
784            }
785            Event::Code(_) | Event::InlineHtml(_) | Event::InlineMath(_) | Event::DisplayMath(_) => {
786                if current.is_none() && paragraph_depth == 0 && prose_container_depth > 0 {
787                    current = Some(PartialParagraph::new(range.clone()));
788                }
789                if let Some(p) = current.as_mut() {
790                    p.atomics.push(range.clone());
791                    if range.end > p.content_hi {
792                        p.content_hi = range.end;
793                    }
794                }
795            }
796            Event::SoftBreak => {
797                if let Some(p) = current.as_mut()
798                    && range.end > p.content_hi
799                {
800                    p.content_hi = range.end;
801                }
802            }
803            Event::Start(Tag::Link { .. } | Tag::Image { .. }) => {
804                if current.is_none() && paragraph_depth == 0 && prose_container_depth > 0 {
805                    current = Some(PartialParagraph::new(range.clone()));
806                }
807                if let Some(p) = current.as_mut() {
808                    p.link_stack.push(range.start);
809                    if range.end > p.content_hi {
810                        p.content_hi = range.end;
811                    }
812                }
813            }
814            Event::End(TagEnd::Link | TagEnd::Image) => {
815                if let Some(p) = current.as_mut() {
816                    if let Some(start) = p.link_stack.pop() {
817                        p.atomics.push(start..range.end);
818                    }
819                    if range.end > p.content_hi {
820                        p.content_hi = range.end;
821                    }
822                }
823            }
824            Event::Start(Tag::Emphasis | Tag::Strong | Tag::Strikethrough | Tag::Superscript | Tag::Subscript) => {
825                if current.is_none() && paragraph_depth == 0 && prose_container_depth > 0 {
826                    current = Some(PartialParagraph::new(range.clone()));
827                }
828                if let Some(p) = current.as_mut()
829                    && range.end > p.content_hi
830                {
831                    p.content_hi = range.end;
832                }
833            }
834            Event::End(
835                TagEnd::Emphasis | TagEnd::Strong | TagEnd::Strikethrough | TagEnd::Superscript | TagEnd::Subscript,
836            ) => {
837                if let Some(p) = current.as_mut()
838                    && range.end > p.content_hi
839                {
840                    p.content_hi = range.end;
841                }
842            }
843            Event::HardBreak => {
844                if let Some(p) = current.as_mut() {
845                    if let Some(hb) = classify_hard_break(bytes, range.start, range.end) {
846                        p.hard_breaks.push(hb);
847                    }
848                    if range.end > p.content_hi {
849                        p.content_hi = range.end;
850                    }
851                }
852            }
853            _ => {}
854        }
855    }
856    paragraphs
857}
858
859fn excluded_block_ranges(code_blocks: &[CodeBlock], html_blocks: &[HtmlBlock]) -> Vec<Range<usize>> {
860    code_blocks
861        .iter()
862        .map(|b| b.raw_range.clone())
863        .chain(html_blocks.iter().map(|b| b.raw_range.clone()))
864        .collect()
865}
866
867fn delimiter_matches_start(ev: &Event<'_>, kind: InlineDelimiterKind) -> bool {
868    match kind {
869        InlineDelimiterKind::Emphasis => matches!(ev, Event::Start(Tag::Emphasis)),
870        InlineDelimiterKind::Strong => matches!(ev, Event::Start(Tag::Strong)),
871    }
872}
873
874fn delimiter_matches_end(ev: &Event<'_>, kind: InlineDelimiterKind) -> bool {
875    match kind {
876        InlineDelimiterKind::Emphasis => matches!(ev, Event::End(TagEnd::Emphasis)),
877        InlineDelimiterKind::Strong => matches!(ev, Event::End(TagEnd::Strong)),
878    }
879}
880
881fn is_emphasis_delim_run(bytes: &[u8]) -> bool {
882    !bytes.is_empty() && bytes.iter().all(|&b| b == b'*' || b == b'_')
883}
884
885fn find_unordered_bullet(bytes: &[u8], start: usize, end: usize) -> Option<usize> {
886    let end = end.min(bytes.len());
887    let mut i = start;
888    while i < end {
889        let b = bytes.get(i).copied()?;
890        if b == b'-' || b == b'*' || b == b'+' {
891            return Some(i);
892        }
893        if b != b' ' && b != b'\t' {
894            return None;
895        }
896        i = i.saturating_add(1);
897    }
898    None
899}
900
901fn table_rows(source: &str, range: Range<usize>) -> Vec<TableRowSite> {
902    let mut rows = Vec::new();
903    let bytes = source.as_bytes();
904    let mut line_start = range.start.min(bytes.len());
905    let range_end = range.end.min(bytes.len());
906    while line_start < range_end {
907        let line_end = bytes
908            .get(line_start..range_end)
909            .and_then(|tail| tail.iter().position(|&b| b == b'\n'))
910            .map_or(range_end, |p| line_start.saturating_add(p));
911        let raw_end = if line_end > line_start && bytes.get(line_end.saturating_sub(1)) == Some(&b'\r') {
912            line_end.saturating_sub(1)
913        } else {
914            line_end
915        };
916        if let Some(row) = table_row(source, line_start..raw_end) {
917            rows.push(row);
918        }
919        if line_end == range_end {
920            break;
921        }
922        line_start = line_end.saturating_add(1);
923    }
924    rows
925}
926
927fn table_row(source: &str, range: Range<usize>) -> Option<TableRowSite> {
928    let bytes = source.as_bytes();
929    let line = bytes.get(range.clone())?;
930    let mut lo = range.start;
931    let mut hi = range.end;
932    while lo < hi && bytes.get(lo).is_some_and(u8::is_ascii_whitespace) {
933        lo = lo.saturating_add(1);
934    }
935    while hi > lo && bytes.get(hi.saturating_sub(1)).is_some_and(u8::is_ascii_whitespace) {
936        hi = hi.saturating_sub(1);
937    }
938    if lo < hi && bytes.get(lo) == Some(&b'|') {
939        lo = lo.saturating_add(1);
940    }
941    if hi > lo && bytes.get(hi.saturating_sub(1)) == Some(&b'|') {
942        hi = hi.saturating_sub(1);
943    }
944    let mut cells = Vec::new();
945    let mut cell_start = lo;
946    let mut i = lo;
947    let mut escaped = false;
948    while i < hi {
949        let Some(b) = bytes.get(i).copied() else {
950            break;
951        };
952        if b == b'|' && !escaped {
953            cells.push(TableCellSite {
954                raw_range: cell_start..i,
955            });
956            cell_start = i.saturating_add(1);
957        }
958        escaped = b == b'\\' && !escaped;
959        if b != b'\\' {
960            escaped = false;
961        }
962        i = i.saturating_add(1);
963    }
964    cells.push(TableCellSite {
965        raw_range: cell_start..hi,
966    });
967    if cells.is_empty() || !line.contains(&b'|') {
968        return None;
969    }
970    Some(TableRowSite {
971        raw_range: range,
972        cells,
973    })
974}
975
976fn find_ordered_marker_digits(bytes: &[u8], start: usize, end: usize) -> Option<(usize, usize)> {
977    let end = end.min(bytes.len());
978    let mut i = start;
979    while i < end {
980        let b = bytes.get(i).copied()?;
981        if b == b' ' || b == b'\t' {
982            i = i.saturating_add(1);
983            continue;
984        }
985        if !b.is_ascii_digit() {
986            return None;
987        }
988        let digit_lo = i;
989        while i < end && bytes.get(i).copied().is_some_and(|c| c.is_ascii_digit()) {
990            i = i.saturating_add(1);
991        }
992        return Some((digit_lo, i));
993    }
994    None
995}
996
997fn find_inline_dest_range(bytes: &[u8], start: usize, end: usize) -> Option<(usize, usize)> {
998    let end = end.min(bytes.len());
999    let bracket = if bytes.get(start).copied()? == b'!' {
1000        start.saturating_add(1)
1001    } else {
1002        start
1003    };
1004    if bytes.get(bracket).copied()? != b'[' {
1005        return None;
1006    }
1007    let mut depth: i32 = 1;
1008    let mut i = bracket.saturating_add(1);
1009    while i < end {
1010        let b = bytes.get(i).copied()?;
1011        match b {
1012            b'\\' => {
1013                i = i.saturating_add(2);
1014                continue;
1015            }
1016            b'[' => depth = depth.saturating_add(1),
1017            b']' => {
1018                depth = depth.saturating_sub(1);
1019                if depth == 0 {
1020                    break;
1021                }
1022            }
1023            _ => {}
1024        }
1025        i = i.saturating_add(1);
1026    }
1027    if depth != 0 || bytes.get(i).copied() != Some(b']') {
1028        return None;
1029    }
1030    let after_close = i.saturating_add(1);
1031    if bytes.get(after_close).copied() != Some(b'(') {
1032        return None;
1033    }
1034    let mut j = after_close.saturating_add(1);
1035    while j < end && matches!(bytes.get(j).copied(), Some(b' ' | b'\t' | b'\n')) {
1036        j = j.saturating_add(1);
1037    }
1038    let dest_lo = j;
1039    let dest_hi = if bytes.get(j).copied() == Some(b'<') {
1040        let mut k = j.saturating_add(1);
1041        while k < end && bytes.get(k).copied() != Some(b'>') {
1042            if bytes.get(k).copied() == Some(b'\n') {
1043                return None;
1044            }
1045            k = k.saturating_add(1);
1046        }
1047        if bytes.get(k).copied() != Some(b'>') {
1048            return None;
1049        }
1050        k.saturating_add(1)
1051    } else {
1052        let mut depth: i32 = 0;
1053        let mut k = j;
1054        while k < end {
1055            let b = bytes.get(k).copied()?;
1056            match b {
1057                b'\\' => {
1058                    k = k.saturating_add(2);
1059                    continue;
1060                }
1061                b'(' => depth = depth.saturating_add(1),
1062                b')' => {
1063                    if depth == 0 {
1064                        break;
1065                    }
1066                    depth = depth.saturating_sub(1);
1067                }
1068                b' ' | b'\t' | b'\n' => break,
1069                _ => {}
1070            }
1071            k = k.saturating_add(1);
1072        }
1073        k
1074    };
1075    if dest_hi <= dest_lo {
1076        return None;
1077    }
1078    Some((dest_lo, dest_hi))
1079}
1080
1081fn range_start_is_excluded(start: usize, excluded: &[Range<usize>]) -> bool {
1082    excluded.iter().any(|r| r.start <= start && start < r.end)
1083}
1084
1085struct RefDefSite {
1086    label: String,
1087    dest: Range<usize>,
1088}
1089
1090fn parse_ref_def_line(bytes: &[u8], lo: usize, hi: usize) -> Option<RefDefSite> {
1091    let mut i = lo;
1092    let mut spaces = 0usize;
1093    while i < hi && bytes.get(i).copied() == Some(b' ') && spaces < 3 {
1094        i = i.saturating_add(1);
1095        spaces = spaces.saturating_add(1);
1096    }
1097    if bytes.get(i).copied() != Some(b'[') {
1098        return None;
1099    }
1100    i = i.saturating_add(1);
1101    let label_lo = i;
1102    while i < hi {
1103        let b = bytes.get(i).copied()?;
1104        match b {
1105            b'\\' => i = i.saturating_add(2),
1106            b']' => break,
1107            b'\n' => return None,
1108            _ => i = i.saturating_add(1),
1109        }
1110    }
1111    let label_hi = i;
1112    if bytes.get(i).copied() != Some(b']') {
1113        return None;
1114    }
1115    i = i.saturating_add(1);
1116    if bytes.get(i).copied() != Some(b':') {
1117        return None;
1118    }
1119    i = i.saturating_add(1);
1120    while i < hi && matches!(bytes.get(i).copied(), Some(b' ' | b'\t')) {
1121        i = i.saturating_add(1);
1122    }
1123    if i >= hi {
1124        return None;
1125    }
1126    let dest_lo = i;
1127    let dest_hi = if bytes.get(i).copied() == Some(b'<') {
1128        let mut k = i.saturating_add(1);
1129        while k < hi && bytes.get(k).copied() != Some(b'>') {
1130            k = k.saturating_add(1);
1131        }
1132        if bytes.get(k).copied() != Some(b'>') {
1133            return None;
1134        }
1135        k.saturating_add(1)
1136    } else {
1137        let mut k = i;
1138        while k < hi && !matches!(bytes.get(k).copied(), Some(b' ' | b'\t')) {
1139            k = k.saturating_add(1);
1140        }
1141        k
1142    };
1143    if dest_hi <= dest_lo {
1144        return None;
1145    }
1146    let label = std::str::from_utf8(bytes.get(label_lo..label_hi)?).ok()?.to_owned();
1147    Some(RefDefSite {
1148        label,
1149        dest: dest_lo..dest_hi,
1150    })
1151}
1152
1153struct PartialParagraph {
1154    content_lo: usize,
1155    content_hi: usize,
1156    atomics: Vec<Range<usize>>,
1157    hard_breaks: Vec<ParagraphHardBreak>,
1158    link_stack: Vec<usize>,
1159}
1160
1161impl PartialParagraph {
1162    fn new(range: Range<usize>) -> Self {
1163        Self {
1164            content_lo: range.start,
1165            content_hi: range.end,
1166            atomics: Vec::new(),
1167            hard_breaks: Vec::new(),
1168            link_stack: Vec::new(),
1169        }
1170    }
1171
1172    fn finish(
1173        mut self,
1174        bytes: &[u8],
1175        extra_atomics: &[crate::AutolinkFact],
1176        math_regions: &[MathRegion],
1177    ) -> Option<WrappableParagraph> {
1178        let (line_lo, first_prefix) = extract_first_prefix(bytes, self.content_lo)?;
1179        let line_hi = extract_line_hi(bytes, self.content_hi);
1180        if is_mkdocs_admonition_paragraph(bytes, line_lo, line_hi) {
1181            return None;
1182        }
1183        let cont_prefix = derive_continuation_prefix(&first_prefix)?;
1184        let list_four_space_cont_prefix = derive_list_four_space_continuation_prefix(&first_prefix);
1185        let owner_kind = paragraph_owner_kind(&first_prefix);
1186        for autolink in extra_atomics {
1187            let raw_range = autolink.raw_range();
1188            if raw_range.start >= self.content_lo && raw_range.end <= self.content_hi {
1189                self.atomics.push(raw_range);
1190            }
1191        }
1192        for region in math_regions {
1193            if region.range.start >= self.content_lo && region.range.end <= self.content_hi {
1194                self.atomics.push(region.range.clone());
1195            }
1196        }
1197        let mut atomics = self.atomics;
1198        atomics.sort_by_key(|r| r.start);
1199        let mut hard_breaks = self.hard_breaks;
1200        hard_breaks.sort_by_key(|h| h.nl);
1201        Some(WrappableParagraph {
1202            line_lo,
1203            line_hi,
1204            content_lo: self.content_lo,
1205            content_hi: self.content_hi,
1206            owner_kind,
1207            first_prefix,
1208            cont_prefix,
1209            list_four_space_cont_prefix,
1210            atomics,
1211            hard_breaks,
1212        })
1213    }
1214}
1215
1216fn paragraph_owner_kind(first_prefix: &str) -> StructuralKind {
1217    let trimmed = first_prefix.trim_start_matches([' ', '\t']);
1218    if trimmed.starts_with('>') {
1219        StructuralKind::BlockQuote
1220    } else if trimmed.starts_with("[^") {
1221        StructuralKind::FootnoteDefinition
1222    } else if trimmed.starts_with(':') {
1223        StructuralKind::DefinitionDescription
1224    } else if trimmed.starts_with(['-', '*', '+']) || trimmed.as_bytes().first().is_some_and(u8::is_ascii_digit) {
1225        StructuralKind::ListItem
1226    } else {
1227        StructuralKind::Paragraph
1228    }
1229}
1230
1231fn is_mkdocs_admonition_paragraph(bytes: &[u8], line_lo: usize, line_hi: usize) -> bool {
1232    let Some(line) = bytes.get(line_lo..line_hi) else {
1233        return false;
1234    };
1235    let first_line_end = line.iter().position(|&b| b == b'\n').unwrap_or(line.len());
1236    let Some(first_line) = line.get(..first_line_end) else {
1237        return false;
1238    };
1239    let indent = first_line.iter().take_while(|&&b| b == b' ').count();
1240    if indent > 3 {
1241        return false;
1242    }
1243    let marker = first_line.get(indent..).unwrap_or(&[]);
1244    is_admonition_marker(marker, b"!!!") || is_admonition_marker(marker, b"???")
1245}
1246
1247fn is_admonition_marker(line: &[u8], opener: &[u8]) -> bool {
1248    let Some(after_opener) = line.get(opener.len()..) else {
1249        return false;
1250    };
1251    if !line.starts_with(opener) {
1252        return false;
1253    }
1254    match after_opener.first().copied() {
1255        Some(b' ' | b'\t') => true,
1256        Some(b'+' | b'-') => matches!(after_opener.get(1).copied(), Some(b' ' | b'\t')),
1257        _ => false,
1258    }
1259}
1260
1261fn classify_hard_break(bytes: &[u8], start: usize, end: usize) -> Option<ParagraphHardBreak> {
1262    let slice = bytes.get(start..end)?;
1263    let nl_off = slice.iter().rposition(|&b| b == b'\n')?;
1264    let nl = start.saturating_add(nl_off);
1265    let before_nl = bytes.get(nl.checked_sub(1)?).copied()?;
1266    if before_nl == b'\\' {
1267        let two_back = nl.checked_sub(2).and_then(|i| bytes.get(i).copied());
1268        if matches!(two_back, Some(b'\\')) {
1269            return None;
1270        }
1271        return Some(ParagraphHardBreak {
1272            marker_lo: nl.saturating_sub(1),
1273            nl,
1274            marker: "\\",
1275        });
1276    }
1277    if before_nl == b' ' {
1278        let two_back = nl.checked_sub(2).and_then(|i| bytes.get(i).copied());
1279        if matches!(two_back, Some(b' ')) {
1280            return Some(ParagraphHardBreak {
1281                marker_lo: nl.saturating_sub(2),
1282                nl,
1283                marker: "  ",
1284            });
1285        }
1286    }
1287    None
1288}
1289
1290fn extract_first_prefix(bytes: &[u8], content_lo: usize) -> Option<(usize, String)> {
1291    let line_lo = bytes
1292        .get(..content_lo)?
1293        .iter()
1294        .rposition(|&b| b == b'\n')
1295        .map_or(0, |p| p.saturating_add(1));
1296    let prefix = bytes.get(line_lo..content_lo)?;
1297    let s = std::str::from_utf8(prefix).ok()?.to_owned();
1298    Some((line_lo, s))
1299}
1300
1301fn extract_line_hi(bytes: &[u8], content_hi: usize) -> usize {
1302    let len = bytes.len();
1303    let content_hi = content_hi.min(len);
1304    if content_hi > 0 && bytes.get(content_hi.saturating_sub(1)).copied() == Some(b'\n') {
1305        return content_hi;
1306    }
1307    let Some(tail) = bytes.get(content_hi..) else {
1308        return len;
1309    };
1310    tail.iter()
1311        .position(|&b| b == b'\n')
1312        .map_or(len, |p| content_hi.saturating_add(p).saturating_add(1))
1313}
1314
1315fn derive_continuation_prefix(first: &str) -> Option<String> {
1316    let bytes = first.as_bytes();
1317    let mut out = String::with_capacity(first.len());
1318    let mut i = 0usize;
1319    while let Some(b) = bytes.get(i).copied() {
1320        match b {
1321            b'>' => {
1322                out.push('>');
1323                i = i.saturating_add(1);
1324                if bytes.get(i).copied() == Some(b' ') {
1325                    out.push(' ');
1326                    i = i.saturating_add(1);
1327                }
1328            }
1329            b' ' | b'\t' => {
1330                out.push(b as char);
1331                i = i.saturating_add(1);
1332            }
1333            b'-' | b'*' | b'+' => {
1334                out.push(' ');
1335                i = i.saturating_add(1);
1336                if bytes.get(i).copied() == Some(b' ') {
1337                    out.push(' ');
1338                    i = i.saturating_add(1);
1339                }
1340            }
1341            b'0'..=b'9' => {
1342                let start = i;
1343                while bytes.get(i).copied().is_some_and(|c| c.is_ascii_digit()) {
1344                    i = i.saturating_add(1);
1345                }
1346                if matches!(bytes.get(i).copied(), Some(b'.' | b')')) {
1347                    i = i.saturating_add(1);
1348                }
1349                if bytes.get(i).copied() == Some(b' ') {
1350                    i = i.saturating_add(1);
1351                }
1352                for _ in 0..i.saturating_sub(start) {
1353                    out.push(' ');
1354                }
1355            }
1356            b'[' if bytes.get(i.saturating_add(1)).copied() == Some(b'^') => {
1357                i = i.saturating_add(2);
1358                let mut closed = false;
1359                while let Some(c) = bytes.get(i).copied() {
1360                    i = i.saturating_add(1);
1361                    if c == b']' && bytes.get(i).copied() == Some(b':') {
1362                        i = i.saturating_add(1);
1363                        closed = true;
1364                        break;
1365                    }
1366                }
1367                if !closed {
1368                    return None;
1369                }
1370                while bytes.get(i).copied().is_some_and(|c| matches!(c, b' ' | b'\t')) {
1371                    i = i.saturating_add(1);
1372                }
1373                out.push_str("    ");
1374            }
1375            b':' => {
1376                let start = i;
1377                i = i.saturating_add(1);
1378                while bytes.get(i).copied().is_some_and(|c| matches!(c, b' ' | b'\t')) {
1379                    i = i.saturating_add(1);
1380                }
1381                if i == start.saturating_add(1) {
1382                    return None;
1383                }
1384                for _ in 0..i.saturating_sub(start) {
1385                    out.push(' ');
1386                }
1387            }
1388            _ => return None,
1389        }
1390    }
1391    Some(out)
1392}
1393
1394fn derive_list_four_space_continuation_prefix(first: &str) -> Option<String> {
1395    let bytes = first.as_bytes();
1396    let mut out = String::with_capacity(first.len().saturating_add(2));
1397    let mut i = 0usize;
1398    while let Some(b) = bytes.get(i).copied() {
1399        match b {
1400            b'>' => {
1401                out.push('>');
1402                i = i.saturating_add(1);
1403                if bytes.get(i).copied() == Some(b' ') {
1404                    out.push(' ');
1405                    i = i.saturating_add(1);
1406                }
1407            }
1408            b' ' | b'\t' => {
1409                out.push(b as char);
1410                i = i.saturating_add(1);
1411            }
1412            b'-' | b'*' | b'+' => {
1413                i = i.saturating_add(1);
1414                let _has_marker_space = bytes.get(i).copied() == Some(b' ');
1415                out.push_str("    ");
1416                return Some(out);
1417            }
1418            b'0'..=b'9' => {
1419                while bytes.get(i).copied().is_some_and(|c| c.is_ascii_digit()) {
1420                    i = i.saturating_add(1);
1421                }
1422                if !matches!(bytes.get(i).copied(), Some(b'.' | b')')) {
1423                    return None;
1424                }
1425                i = i.saturating_add(1);
1426                let _has_marker_space = bytes.get(i).copied() == Some(b' ');
1427                out.push_str("    ");
1428                return Some(out);
1429            }
1430            _ => return None,
1431        }
1432    }
1433    None
1434}
1435
1436#[cfg(test)]
1437#[allow(
1438    clippy::expect_used,
1439    reason = "fact tests assert a specific recognised paragraph exists"
1440)]
1441mod tests {
1442    use super::*;
1443
1444    #[test]
1445    fn footnote_definition_continuation_uses_four_space_indent() {
1446        let doc = Document::parse("[^long-label]: alpha beta gamma\n").expect("fixture parses");
1447        let paragraph = doc
1448            .wrappable_paragraphs()
1449            .iter()
1450            .next()
1451            .expect("footnote definition paragraph");
1452        assert_eq!(paragraph.cont_prefix, "    ");
1453    }
1454
1455    #[test]
1456    fn definition_list_continuation_uses_marker_width_indent() {
1457        let doc = Document::parse("term\n:   alpha beta gamma\n").expect("fixture parses");
1458        let paragraph = doc
1459            .wrappable_paragraphs()
1460            .iter()
1461            .next()
1462            .expect("definition list paragraph");
1463        assert_eq!(paragraph.cont_prefix, "    ");
1464    }
1465
1466    #[test]
1467    fn list_paragraph_exposes_four_space_continuation_prefix() {
1468        let doc = Document::parse("> - alpha beta gamma\n").expect("fixture parses");
1469        let paragraph = doc.wrappable_paragraphs().iter().next().expect("list paragraph");
1470        assert_eq!(paragraph.cont_prefix, ">   ");
1471        assert_eq!(paragraph.list_four_space_cont_prefix.as_deref(), Some(">     "));
1472    }
1473
1474    #[test]
1475    fn unordered_nested_list_marker_facts_are_marker_local() {
1476        let src = " *   * * *   * *   * *   *   * \\\\*";
1477        let doc = Document::parse(src).expect("fixture parses");
1478        let markers: Vec<_> = doc
1479            .unordered_list_marker_sites()
1480            .iter()
1481            .map(UnorderedListMarkerSite::marker_range)
1482            .collect();
1483        assert_eq!(
1484            markers,
1485            vec![1..2, 5..6, 7..8, 9..10, 13..14, 15..16, 19..20, 21..22, 25..26, 29..30]
1486        );
1487    }
1488
1489    #[test]
1490    fn ordered_list_marker_facts_carry_list_start_and_ordinal() {
1491        let doc = Document::parse("3. alpha\n4. beta\n").expect("fixture parses");
1492        let markers: Vec<_> = doc
1493            .ordered_list_marker_sites()
1494            .iter()
1495            .map(|site| (site.marker_range(), site.start_number(), site.ordinal()))
1496            .collect();
1497        assert_eq!(markers, vec![(0..1, 3, 0), (9..10, 3, 1)]);
1498    }
1499
1500    #[test]
1501    fn inline_delimiter_slots_are_pair_local() {
1502        let doc = Document::parse("__outer _inner___\n").expect("fixture parses");
1503        let strong: Vec<_> = doc
1504            .inline_delimiter_slots(InlineDelimiterKind::Strong)
1505            .iter()
1506            .map(|slot| (slot.pair(), slot.kind(), slot.open_range(), slot.close_range()))
1507            .collect();
1508        let emphasis: Vec<_> = doc
1509            .inline_delimiter_slots(InlineDelimiterKind::Emphasis)
1510            .iter()
1511            .map(|slot| (slot.pair(), slot.kind(), slot.open_range(), slot.close_range()))
1512            .collect();
1513
1514        assert_eq!(strong, vec![(0, InlineDelimiterKind::Strong, 0..2, 15..17)]);
1515        assert_eq!(emphasis, vec![(0, InlineDelimiterKind::Emphasis, 8..9, 14..15)]);
1516    }
1517
1518    #[test]
1519    fn inline_link_destination_slots_include_links_and_images() {
1520        let doc =
1521            Document::parse("[x](https://example.com) ![alt](https://example.com/img)\n").expect("fixture parses");
1522        let slots: Vec<_> = doc
1523            .inline_link_destination_slots()
1524            .iter()
1525            .map(InlineLinkDestinationSlot::range)
1526            .collect();
1527
1528        assert_eq!(slots, vec![4..23, 32..55]);
1529    }
1530
1531    #[test]
1532    fn table_cell_facts_preserve_escaped_pipes_inside_cells() {
1533        let doc =
1534            Document::parse("| code | escaped |\n| --- | --- |\n| `a\\|b` | left\\|right |\n").expect("fixture parses");
1535        let table = doc.table_sites().first().expect("table fact");
1536        let body = table.rows().get(2).expect("body row");
1537        let cells: Vec<_> = body.cells().iter().map(TableCellSite::raw_range).collect();
1538
1539        assert_eq!(cells.len(), 2);
1540        let first = cells.first().expect("first cell");
1541        let second = cells.get(1).expect("second cell");
1542        assert_eq!(doc.source().get(first.clone()), Some(" `a\\|b` "));
1543        assert_eq!(doc.source().get(second.clone()), Some(" left\\|right "));
1544    }
1545}