Skip to main content

carta_readers/commonmark/
mod.rs

1//! `CommonMark` reader.
2//!
3//! Parsing follows the spec's two-phase strategy: the block phase (`block`) consumes the input
4//! line by line into a tree of `IrBlock`s whose leaves still hold raw text, collecting link
5//! reference definitions; the inline phase (`inline`) then parses each leaf's text into inlines.
6//! The result is assembled into a [`Document`] (see `docs/plans/slice-1-commonmark-html.md`).
7
8mod attr;
9mod autolink;
10mod block;
11mod cursor;
12mod frontmatter;
13mod grid;
14mod html_block;
15mod identifiers;
16mod inline;
17mod scan;
18mod table;
19mod texttable;
20mod yaml;
21
22use std::collections::BTreeMap;
23
24use carta_ast::{Alignment, Attr, Block, Document, Format, Inline, ListAttributes};
25use carta_core::{Extensions, Reader, ReaderOptions, Result};
26
27pub(crate) use frontmatter::{parse_metadata_json, parse_metadata_yaml};
28
29/// Parses `CommonMark` text into the document model.
30///
31/// The strict `CommonMark` preset is the empty extension set; `options.extensions` additionally
32/// enables `strikeout`, `subscript`, `superscript`, `hard_line_breaks`, and `task_lists`
33/// (see `plans/006-commonmark-easy-extensions.md`). `raw_html` is always honored, so toggling it has
34/// no effect on the produced document.
35#[derive(Debug, Default, Clone, Copy)]
36pub struct CommonmarkReader;
37
38impl Reader for CommonmarkReader {
39    fn read(&self, input: &str, options: &ReaderOptions) -> Result<Document> {
40        let ext = options.extensions;
41        let normalized = normalize(input);
42        let frontmatter::FrontMatter { meta, body } = frontmatter::extract(&normalized, options)?;
43        let source = body.as_deref().unwrap_or(&normalized);
44        let (ir, refs, footnotes, examples) = block::parse(source, ext, options.greedy_paragraphs);
45        let blocks = inline::resolve_document(
46            &ir,
47            refs,
48            &footnotes,
49            &examples,
50            ext,
51            options.greedy_paragraphs,
52        );
53        Ok(Document {
54            meta: meta.into_iter().map(|(k, v)| (k.into(), v)).collect(),
55            blocks,
56            ..Document::default()
57        })
58    }
59}
60
61/// A block whose leaf content is still raw, undifferentiated text awaiting the inline phase.
62#[derive(Debug, Clone)]
63pub(crate) enum IrBlock {
64    /// A paragraph rendered as `Para` (loose context).
65    Para(String),
66    /// A paragraph rendered as `Plain` (tight list item).
67    Plain(String),
68    Heading(i32, String),
69    CodeBlock(Attr, String),
70    RawHtml(String),
71    /// A raw block in a named passthrough format (e.g. a fenced ```` ```{=latex} ```` block).
72    RawBlock(Format, String),
73    ThematicBreak,
74    /// A fenced div: its attributes and the recursively-parsed block content.
75    Div(Attr, Vec<IrBlock>),
76    BlockQuote(Vec<IrBlock>),
77    /// A line block: one entry per source line, each still-raw text parsed into inlines in the
78    /// inline phase. Division into lines and any preserved leading spaces are already baked into
79    /// the strings.
80    LineBlock(Vec<String>),
81    /// A definition list: one entry per term. Each term's raw text is parsed into inlines in the
82    /// inline phase; its definitions are already-resolved block lists with tight-vs-loose paragraph
83    /// demotion applied.
84    DefinitionList(Vec<IrDefItem>),
85    BulletList(Vec<Vec<IrBlock>>),
86    OrderedList(ListAttributes, Vec<Vec<IrBlock>>),
87    /// A pipe table: per-column alignments, the header row's cell texts, and the body rows' cell
88    /// texts. Each cell's text is parsed into inlines in the inline phase. Any caption is attached
89    /// after the block phase.
90    Table {
91        alignments: Vec<Alignment>,
92        header: Vec<String>,
93        rows: Vec<Vec<String>>,
94        caption: Option<String>,
95        /// Attributes attached via the caption line when `table_attributes` is enabled.
96        attr: Attr,
97    },
98    /// A grid table: column specs plus header and body rows of still-raw cell text, each cell parsed
99    /// as block content in the inline phase. Any caption is attached after the block phase.
100    GridTable(Box<grid::GridTable>),
101    /// A dash-ruled table: column specs plus an optional header row and body rows of still-raw cell
102    /// text, each cell parsed as inline content in the inline phase. Any caption is attached after
103    /// the block phase.
104    TextTable(Box<texttable::TextTable>),
105}
106
107/// One entry of a definition list: a term plus its definitions. The term holds raw text awaiting
108/// the inline phase; each definition is its block content (paragraph demotion to `Plain` already
109/// applied for tight entries).
110#[derive(Debug, Clone)]
111pub(crate) struct IrDefItem {
112    pub term: String,
113    pub definitions: Vec<Vec<IrBlock>>,
114}
115
116/// A resolved link reference definition: its destination URL and optional title.
117#[derive(Debug, Clone)]
118pub(crate) struct LinkDef {
119    pub url: String,
120    pub title: String,
121}
122
123/// Reference definitions keyed by their normalized label: the explicit `[label]: url` definitions,
124/// plus the implicit definitions a heading contributes when `implicit_header_references` is on. A
125/// heading's label is its source text normalized the same way, so both kinds resolve through one
126/// lookup; an explicit definition, registered first, wins over a heading with the same label.
127pub(crate) type RefMap = BTreeMap<String, LinkDef>;
128
129/// Footnote definitions, keyed by their normalized label; each value is the still-raw block content
130/// gathered for that footnote, resolved into a `Note` at every matching reference.
131pub(crate) type FootnoteDefs = BTreeMap<String, Vec<IrBlock>>;
132
133/// Example-list item numbers, keyed by `@label`. The block phase walks every example list in
134/// document order, assigning each distinct label the next number in a single shared sequence; a
135/// later `@label` reference resolves to that number.
136pub(crate) type ExampleMap = BTreeMap<String, i32>;
137
138/// Parse the text of a block-level metadata value into blocks, reusing the full block and inline
139/// pipeline. Front matter is not re-extracted, so a metadata value never recurses into another
140/// metadata block.
141pub(crate) fn parse_meta_blocks(
142    text: &str,
143    extensions: Extensions,
144    greedy_paragraphs: bool,
145) -> Vec<Block> {
146    let normalized = normalize(text);
147    let (ir, refs, footnotes, examples) = block::parse(&normalized, extensions, greedy_paragraphs);
148    inline::resolve_document(
149        &ir,
150        refs,
151        &footnotes,
152        &examples,
153        extensions,
154        greedy_paragraphs,
155    )
156}
157
158/// Parse the raw text of a table cell into block content, reusing the full block and inline
159/// pipeline. A tight cell — one with no internal blank line — demotes its top-level paragraphs to
160/// `Plain`; an empty cell carries no blocks.
161pub(crate) fn parse_table_cell(
162    text: &str,
163    tight: bool,
164    extensions: Extensions,
165    greedy_paragraphs: bool,
166) -> Vec<Block> {
167    if text.is_empty() {
168        return Vec::new();
169    }
170    let normalized = normalize(text);
171    let (mut ir, refs, footnotes, examples) =
172        block::parse(&normalized, extensions, greedy_paragraphs);
173    if tight {
174        block::demote_loose_paragraphs(&mut ir);
175    }
176    inline::resolve_document(
177        &ir,
178        refs,
179        &footnotes,
180        &examples,
181        extensions,
182        greedy_paragraphs,
183    )
184}
185
186/// Width of a tab stop in columns, used when expanding tabs during preprocessing.
187const TAB_STOP: usize = 4;
188
189/// Normalize line endings to `\n`, strip a leading UTF-8 BOM, and expand tabs to spaces.
190///
191/// Tabs are expanded by character column (reset at each line) so the rest of the parser sees only
192/// spaces.
193fn normalize(input: &str) -> String {
194    let without_bom = input.strip_prefix('\u{feff}').unwrap_or(input);
195    let mut out = String::with_capacity(without_bom.len());
196    let mut column = 0;
197    let mut chars = without_bom.chars().peekable();
198    while let Some(ch) = chars.next() {
199        match ch {
200            '\r' => {
201                if chars.peek() == Some(&'\n') {
202                    chars.next();
203                }
204                out.push('\n');
205                column = 0;
206            }
207            '\n' => {
208                out.push('\n');
209                column = 0;
210            }
211            '\t' => {
212                let width = TAB_STOP - (column % TAB_STOP);
213                for _ in 0..width {
214                    out.push(' ');
215                }
216                column += width;
217            }
218            other => {
219                out.push(other);
220                column += 1;
221            }
222        }
223    }
224    out
225}
226
227/// Helper used by the inline phase to wrap parsed inlines back into AST blocks.
228pub(crate) fn para(inlines: Vec<Inline>) -> Block {
229    Block::Para(inlines)
230}
231
232pub(crate) fn plain(inlines: Vec<Inline>) -> Block {
233    Block::Plain(inlines)
234}
235
236#[cfg(test)]
237mod tests {
238    use super::CommonmarkReader;
239    use carta_ast::{
240        Alignment, Attr, Block, Document, Inline, ListNumberDelim, ListNumberStyle, Target,
241    };
242    use carta_core::{Extension, Extensions, Reader, ReaderOptions};
243
244    fn blocks(input: &str) -> Vec<Block> {
245        CommonmarkReader
246            .read(input, &ReaderOptions::default())
247            .expect("reader should not fail")
248            .blocks
249    }
250
251    fn blocks_with(input: &str, ext: Extension) -> Vec<Block> {
252        let mut extensions = Extensions::empty();
253        extensions.insert(ext);
254        let mut options = ReaderOptions::default();
255        options.extensions = extensions;
256        CommonmarkReader
257            .read(input, &options)
258            .expect("reader should not fail")
259            .blocks
260    }
261
262    fn blocks_with_many(input: &str, exts: &[Extension]) -> Vec<Block> {
263        let mut extensions = Extensions::empty();
264        for ext in exts {
265            extensions.insert(*ext);
266        }
267        let mut options = ReaderOptions::default();
268        options.extensions = extensions;
269        CommonmarkReader
270            .read(input, &options)
271            .expect("reader should not fail")
272            .blocks
273    }
274
275    /// The inlines of a single-paragraph document, for footnote assertions.
276    fn para_inlines(input: &str, ext: Extension) -> Vec<Inline> {
277        match blocks_with(input, ext).as_slice() {
278            [Block::Para(inlines)] => inlines.clone(),
279            other => panic!("expected a single paragraph, got {other:?}"),
280        }
281    }
282
283    #[test]
284    fn footnote_reference_resolves_to_a_note_and_lifts_the_definition() {
285        // The definition leaves the body, so only the referencing paragraph remains, and its
286        // reference becomes a note carrying the definition's blocks.
287        let inlines = para_inlines("text[^a]\n\n[^a]: body\n", Extension::Footnotes);
288        let note = inlines
289            .iter()
290            .find_map(|inline| match inline {
291                Inline::Note(blocks) => Some(blocks.clone()),
292                _ => None,
293            })
294            .expect("a note should be present");
295        assert!(matches!(note.as_slice(), [Block::Para(_)]));
296    }
297
298    #[test]
299    fn deeply_nested_containers_do_not_overflow_the_stack() {
300        // The block tree is built iteratively, but the passes that turn it into the document — header
301        // gathering, reference resolution, IR-to-AST lowering — recurse through it. A single line of
302        // thousands of `>` nested a block quote per marker, deep enough to overflow the stack when
303        // those walks descended; a nightly fuzz run hit exactly this on the commonmark and (embedded)
304        // ipynb targets. Capping container nesting keeps the tree shallow enough to walk safely.
305        //
306        // Run on a normal application stack: the test harness gives each test a much smaller thread
307        // stack (as little as 512 KiB on macOS), so exercise the guarantee the cap provides rather
308        // than the harness's thread limit.
309        std::thread::Builder::new()
310            .stack_size(8 * 1024 * 1024)
311            .spawn(|| {
312                let deep_quotes = ">".repeat(50_000);
313                assert!(
314                    CommonmarkReader
315                        .read(&deep_quotes, &ReaderOptions::default())
316                        .is_ok()
317                );
318            })
319            .unwrap()
320            .join()
321            .unwrap();
322    }
323
324    /// Read in the markdown dialect (greedy paragraphs) with the given extensions enabled.
325    fn read_markdown(input: &str, exts: &[Extension]) -> Document {
326        let mut extensions = Extensions::empty();
327        for ext in exts {
328            extensions.insert(*ext);
329        }
330        let mut options = ReaderOptions::default();
331        options.extensions = extensions;
332        options.greedy_paragraphs = true;
333        CommonmarkReader
334            .read(input, &options)
335            .expect("reader should not fail")
336    }
337
338    #[test]
339    fn grid_cell_inlines_honor_the_markdown_dialect() {
340        // A grid-table cell parses its content under the document's dialect: in the markdown dialect
341        // a superscript rejects an inner space, so `^a b^` stays literal rather than wrapping.
342        let input = "+-------+\n| ^a b^ |\n+-------+\n";
343        let doc = read_markdown(input, &[Extension::GridTables, Extension::Superscript]);
344        let table = match doc.blocks.as_slice() {
345            [Block::Table(table)] => table,
346            other => panic!("expected a single table, got {other:?}"),
347        };
348        let cell = table
349            .bodies
350            .first()
351            .and_then(|body| body.body.first())
352            .and_then(|row| row.cells.first())
353            .expect("a single body cell");
354        let inlines = match cell.content.as_slice() {
355            [Block::Plain(inlines)] => inlines,
356            other => panic!("expected a plain cell, got {other:?}"),
357        };
358        assert!(
359            inlines.iter().all(|i| !matches!(i, Inline::Superscript(_))),
360            "grid cell should not build a superscript around an inner space: {inlines:?}"
361        );
362    }
363
364    #[test]
365    fn metadata_values_honor_the_markdown_dialect() {
366        use carta_ast::MetaValue;
367        // A YAML metadata value parses under the document's dialect too: the superscript with an
368        // inner space stays literal and the code span trims its padding to `x`.
369        let input = "---\ntitle: ^a b^ `  x  `\n---\n\nbody\n";
370        let doc = read_markdown(
371            input,
372            &[Extension::YamlMetadataBlock, Extension::Superscript],
373        );
374        let inlines = match doc.meta.get("title") {
375            Some(MetaValue::MetaInlines(inlines)) => inlines,
376            other => panic!("expected inline metadata, got {other:?}"),
377        };
378        assert!(
379            inlines.iter().all(|i| !matches!(i, Inline::Superscript(_))),
380            "metadata should not build a superscript around an inner space: {inlines:?}"
381        );
382        assert!(
383            inlines
384                .iter()
385                .any(|i| matches!(i, Inline::Code(_, code) if code == "x")),
386            "metadata code span should trim to `x`: {inlines:?}"
387        );
388    }
389
390    #[test]
391    fn attribute_only_table_caption_carries_no_blocks() {
392        // A caption line that is nothing but a trailing attribute block: the block is split off onto
393        // the table's own attributes, leaving the caption text empty. An empty caption parses to no
394        // blocks at all, never a `Plain` wrapping an empty inline list.
395        let input = "| a | b |\n|---|---|\n| 1 | 2 |\n\n: {#tid}\n";
396        let blocks = blocks_with_many(
397            input,
398            &[
399                Extension::PipeTables,
400                Extension::TableCaptions,
401                Extension::TableAttributes,
402            ],
403        );
404        let table = match blocks.as_slice() {
405            [Block::Table(table)] => table,
406            other => panic!("expected a single table, got {other:?}"),
407        };
408        assert!(table.caption.long.is_empty());
409        assert_eq!(table.attr.id, "tid");
410    }
411
412    #[test]
413    fn undefined_footnote_reference_stays_literal() {
414        // With no matching definition the brackets are ordinary text and no note is produced.
415        let inlines = para_inlines("text[^missing]\n", Extension::Footnotes);
416        assert!(inlines.iter().all(|i| !matches!(i, Inline::Note(_))));
417        assert!(
418            inlines
419                .iter()
420                .any(|i| matches!(i, Inline::Str(s) if s.contains("[^missing]")))
421        );
422    }
423
424    #[test]
425    fn footnote_extension_off_produces_no_note() {
426        // Without the toggle `[^a]: body` is an ordinary link reference definition, so `[^a]`
427        // resolves to a link and no note is created.
428        let result = blocks("text[^a]\n\n[^a]: body\n");
429        let [Block::Para(inlines)] = result.as_slice() else {
430            panic!("expected a single paragraph, got {result:?}");
431        };
432        assert!(inlines.iter().any(|i| matches!(i, Inline::Link(..))));
433        assert!(inlines.iter().all(|i| !matches!(i, Inline::Note(_))));
434    }
435
436    #[test]
437    fn footnote_definition_spans_indented_continuation_blocks() {
438        let inlines = para_inlines(
439            "ref[^a]\n\n[^a]: first\n\n    second\n",
440            Extension::Footnotes,
441        );
442        let note = inlines
443            .iter()
444            .find_map(|inline| match inline {
445                Inline::Note(blocks) => Some(blocks.clone()),
446                _ => None,
447            })
448            .expect("a note should be present");
449        assert!(matches!(note.as_slice(), [Block::Para(_), Block::Para(_)]));
450    }
451
452    #[test]
453    fn nested_footnote_reference_inside_a_definition_does_not_nest() {
454        // A reference within a definition's own body collapses to an empty string rather than
455        // embedding a further note.
456        let inlines = para_inlines(
457            "ref[^a]\n\n[^a]: see [^b]\n\n[^b]: inner\n",
458            Extension::Footnotes,
459        );
460        let note = inlines
461            .iter()
462            .find_map(|inline| match inline {
463                Inline::Note(blocks) => Some(blocks.clone()),
464                _ => None,
465            })
466            .expect("a note should be present");
467        let Some(Block::Para(body)) = note.first() else {
468            panic!("note should hold a paragraph");
469        };
470        assert!(body.iter().all(|i| !matches!(i, Inline::Note(_))));
471    }
472
473    #[test]
474    fn footnote_labels_fold_case_and_whitespace() {
475        let inlines = para_inlines("ref[^A B]\n\n[^a   b]: body\n", Extension::Footnotes);
476        assert!(inlines.iter().any(|i| matches!(i, Inline::Note(_))));
477    }
478
479    #[test]
480    fn defined_footnote_reference_wins_over_a_following_inline_target() {
481        // A defined reference consumes nothing past `]`, so the `(url)` stays literal text.
482        let inlines = para_inlines("[^a](url)\n\n[^a]: body\n", Extension::Footnotes);
483        assert!(inlines.iter().any(|i| matches!(i, Inline::Note(_))));
484        assert!(
485            inlines
486                .iter()
487                .any(|i| matches!(i, Inline::Str(s) if s.contains("(url)")))
488        );
489    }
490
491    #[test]
492    fn empty_list_marker_below_an_unmatched_container_starts_a_list() {
493        // The paragraph that the `- ` could interrupt sits in the unmatched block quote, a level
494        // below where the marker opens, so the marker is not interrupting it: the quote closes and
495        // an empty bullet list begins rather than the `- ` continuing the paragraph lazily.
496        let result = blocks("> two\n- \n");
497        assert!(matches!(
498            result.as_slice(),
499            [Block::BlockQuote(_), Block::BulletList(items)] if items.as_slice() == [Vec::new()]
500        ));
501    }
502
503    #[test]
504    fn bare_marker_trailed_by_spaces_leaves_an_empty_item() {
505        // The whitespace after a contentless marker is not a non-blank line, so it leaves the item
506        // empty rather than opening an indented code block inside it.
507        assert!(matches!(
508            blocks("-     \n").as_slice(),
509            [Block::BulletList(items)] if items.as_slice() == [Vec::new()]
510        ));
511    }
512
513    #[test]
514    fn empty_list_marker_still_cannot_interrupt_a_same_level_paragraph() {
515        // At the same level the restriction holds: an empty marker is absorbed into the paragraph.
516        // (`*` is used rather than `-` so the line is not read as a setext heading underline.)
517        assert!(matches!(blocks("para\n* \n").as_slice(), [Block::Para(_)]));
518    }
519
520    #[test]
521    fn long_digit_run_is_not_an_ordered_list() {
522        // Regression (found by fuzzing): a digit run longer than nine is not an ordered-list
523        // marker, and computing its start value must not overflow.
524        let input = format!("{}*:*\n", "8".repeat(34));
525        assert!(matches!(blocks(&input).as_slice(), [Block::Para(_)]));
526    }
527
528    #[test]
529    fn ordered_list_start_caps_at_nine_digits() {
530        assert!(matches!(
531            blocks("999999999. a\n").as_slice(),
532            [Block::OrderedList(..)]
533        ));
534        assert!(matches!(
535            blocks("1234567890. a\n").as_slice(),
536            [Block::Para(_)]
537        ));
538    }
539
540    #[test]
541    fn fenced_div_bare_word_names_a_single_class() {
542        let result = blocks_with("::: warning\nbody\n:::\n", Extension::FencedDivs);
543        let [Block::Div(attr, children)] = result.as_slice() else {
544            panic!("expected a single div, got {result:?}");
545        };
546        assert!(attr.id.is_empty());
547        assert_eq!(attr.classes, ["warning"]);
548        assert!(attr.attributes.is_empty());
549        assert!(matches!(children.as_slice(), [Block::Para(_)]));
550    }
551
552    #[test]
553    fn fenced_div_brace_spec_carries_id_classes_and_pairs() {
554        let result = blocks_with("::: {#a .b .c k=v}\nbody\n:::\n", Extension::FencedDivs);
555        let [Block::Div(attr, _)] = result.as_slice() else {
556            panic!("expected a single div, got {result:?}");
557        };
558        assert_eq!(attr.id, "a");
559        assert_eq!(attr.classes, ["b", "c"]);
560        assert_eq!(attr.attributes, [("k".into(), "v".into())]);
561    }
562
563    #[test]
564    fn fenced_divs_nest_with_the_inner_closing_first() {
565        let result = blocks_with(
566            "::: outer\n::: inner\nx\n:::\ny\n:::\n",
567            Extension::FencedDivs,
568        );
569        let [Block::Div(outer, outer_children)] = result.as_slice() else {
570            panic!("expected a single outer div, got {result:?}");
571        };
572        assert_eq!(outer.classes, ["outer"]);
573        let [Block::Div(inner, _), Block::Para(_)] = outer_children.as_slice() else {
574            panic!("outer should hold an inner div then a paragraph, got {outer_children:?}");
575        };
576        assert_eq!(inner.classes, ["inner"]);
577    }
578
579    #[test]
580    fn a_shorter_colon_run_does_not_close_a_longer_fence() {
581        // The div opens with four colons, so a three-colon line inside it is ordinary text and the
582        // div runs to the matching four-colon close.
583        let result = blocks_with(
584            ":::: wide\n:::\nstill inside\n::::\n",
585            Extension::FencedDivs,
586        );
587        let [Block::Div(attr, children)] = result.as_slice() else {
588            panic!("expected a single div, got {result:?}");
589        };
590        assert_eq!(attr.classes, ["wide"]);
591        assert!(matches!(children.as_slice(), [Block::Para(_)]));
592    }
593
594    #[test]
595    fn fenced_div_syntax_without_the_extension_stays_text() {
596        // With the toggle off, the colon fences are ordinary paragraph text and no div is produced.
597        let result = blocks("::: warning\nbody\n:::\n");
598        assert!(result.iter().all(|b| !matches!(b, Block::Div(..))));
599    }
600
601    #[test]
602    fn blank_after_a_div_in_a_list_item_makes_the_list_loose() {
603        let result = blocks_with(
604            "- ::: note\n  inside\n  :::\n\n  after\n",
605            Extension::FencedDivs,
606        );
607        // The blank between the closed div and `after` is a gap inside the item, so the list is
608        // loose and the trailing paragraph stays `Para` rather than being demoted to `Plain`.
609        let [Block::BulletList(items)] = result.as_slice() else {
610            panic!("expected a single bullet list, got {result:?}");
611        };
612        let Some([Block::Div(..), tail]) = items.first().map(Vec::as_slice) else {
613            panic!("the item should hold a div then a trailing block, got {items:?}");
614        };
615        assert!(
616            matches!(tail, Block::Para(_)),
617            "loose list should keep the trailing paragraph as Para, got {tail:?}"
618        );
619    }
620
621    #[test]
622    fn blank_ending_a_nested_block_quote_makes_the_list_loose() {
623        // The blank line after the first item's block quote leaves that quote unmatched, so it
624        // ends there and the blank counts toward the list's looseness. A loose list keeps its item
625        // paragraphs as `Para` (a tight list would demote them to `Plain`).
626        let result = blocks("- item\n  > q\n\n- item2\n");
627        let [Block::BulletList(items)] = result.as_slice() else {
628            panic!("expected a single bullet list, got {result:?}");
629        };
630        let Some([first, ..]) = items.first().map(Vec::as_slice) else {
631            panic!("the first item should have content");
632        };
633        assert!(
634            matches!(first, Block::Para(_)),
635            "loose list should keep the item paragraph as Para, got {first:?}"
636        );
637    }
638
639    #[test]
640    fn image_only_paragraph_becomes_a_figure_captioned_by_its_alt_text() {
641        let result = blocks_with("![a gull](gull.png)\n", Extension::ImplicitFigures);
642        let [Block::Figure(attr, caption, body)] = result.as_slice() else {
643            panic!("expected a single figure, got {result:?}");
644        };
645        assert_eq!(*attr, Box::new(carta_ast::Attr::default()));
646        assert!(caption.short.is_none());
647        // The caption is a clone of the image's alt inlines wrapped in one `Plain`.
648        let [Block::Plain(caption_inlines)] = caption.long.as_slice() else {
649            panic!("caption should be a single Plain, got {:?}", caption.long);
650        };
651        assert!(matches!(
652            caption_inlines.as_slice(),
653            [Inline::Str(a), Inline::Space, Inline::Str(b)] if a == "a" && b == "gull"
654        ));
655        // The body is the original image, unchanged, inside a single `Plain`.
656        let [Block::Plain(image_inlines)] = body.as_slice() else {
657            panic!("body should be a single Plain, got {body:?}");
658        };
659        let [Inline::Image(_, alt, target)] = image_inlines.as_slice() else {
660            panic!("body should wrap an Image, got {image_inlines:?}");
661        };
662        assert_eq!(*caption_inlines, *alt, "alt is duplicated into the caption");
663        assert_eq!(target.url, "gull.png");
664    }
665
666    #[test]
667    fn an_empty_alt_image_stays_a_paragraph() {
668        // The decisive condition is a non-empty alt; a title does not change that.
669        let result = blocks_with("![](spacer.png \"t\")\n", Extension::ImplicitFigures);
670        let [Block::Para(inlines)] = result.as_slice() else {
671            panic!("expected a paragraph, got {result:?}");
672        };
673        assert!(matches!(inlines.as_slice(), [Inline::Image(_, alt, _)] if alt.is_empty()));
674    }
675
676    #[test]
677    fn the_image_title_is_not_used_as_the_caption() {
678        let result = blocks_with("![cap](c.png \"tooltip\")\n", Extension::ImplicitFigures);
679        let [Block::Figure(_, caption, _)] = result.as_slice() else {
680            panic!("expected a figure, got {result:?}");
681        };
682        let [Block::Plain(inlines)] = caption.long.as_slice() else {
683            panic!("caption should be a single Plain, got {:?}", caption.long);
684        };
685        assert!(matches!(inlines.as_slice(), [Inline::Str(s)] if s == "cap"));
686    }
687
688    #[test]
689    fn an_extra_inline_or_a_wrapper_keeps_the_paragraph() {
690        // A second inline disqualifies the paragraph.
691        assert!(matches!(
692            blocks_with("look at ![this](i.png)\n", Extension::ImplicitFigures).as_slice(),
693            [Block::Para(_)]
694        ));
695        // A link wrapping the image makes the link the sole inline, not the image.
696        let linked = blocks_with("[![a](i.png)](u)\n", Extension::ImplicitFigures);
697        let [Block::Para(inlines)] = linked.as_slice() else {
698            panic!("expected a paragraph, got {linked:?}");
699        };
700        assert!(matches!(inlines.as_slice(), [Inline::Link(..)]));
701    }
702
703    #[test]
704    fn implicit_figures_off_keeps_the_image_paragraph() {
705        assert!(matches!(
706            blocks("![a gull](gull.png)\n").as_slice(),
707            [Block::Para(_)]
708        ));
709    }
710
711    fn header_ids(blocks: &[Block]) -> Vec<String> {
712        blocks
713            .iter()
714            .filter_map(|b| match b {
715                Block::Header(_, attr, _) => Some(attr.id.to_string()),
716                _ => None,
717            })
718            .collect()
719    }
720
721    #[test]
722    fn gfm_auto_identifiers_slug_headers_and_count_duplicates() {
723        let result = blocks_with(
724            "# Foo & Bar\n\n# 1.2 Section\n\n# Foo & Bar\n",
725            Extension::GfmAutoIdentifiers,
726        );
727        // Punctuation drops without collapsing the gaps, dots vanish, leading digits stay, and a
728        // repeated slug is suffixed by its occurrence count.
729        assert_eq!(
730            header_ids(&result),
731            ["foo--bar", "12-section", "foo--bar-1"]
732        );
733    }
734
735    #[test]
736    fn auto_identifiers_strip_leading_runs_and_increment_until_unique() {
737        let result = blocks_with(
738            "# 1. Intro\n\n# Intro\n\n# Intro\n",
739            Extension::AutoIdentifiers,
740        );
741        // The leading non-letter run is stripped, then each repeat increments until the whole
742        // identifier is unused.
743        assert_eq!(header_ids(&result), ["intro", "intro-1", "intro-2"]);
744    }
745
746    #[test]
747    fn auto_identifiers_fall_back_to_section_for_empty_slugs() {
748        let result = blocks_with("# !!!\n\n# ???\n", Extension::AutoIdentifiers);
749        // Both headings reduce to nothing, so the fallback `section` applies and the second is
750        // disambiguated.
751        assert_eq!(header_ids(&result), ["section", "section-1"]);
752    }
753
754    #[test]
755    fn auto_identifiers_off_leaves_headers_unidentified() {
756        assert_eq!(header_ids(&blocks("# Hello World\n")), [""]);
757    }
758
759    const HEADER_REFS: &[Extension] = &[
760        Extension::GfmAutoIdentifiers,
761        Extension::ImplicitHeaderReferences,
762    ];
763
764    /// The link and image targets reached from every paragraph, in order.
765    fn reference_targets(blocks: &[Block]) -> Vec<String> {
766        fn collect(inlines: &[Inline], out: &mut Vec<String>) {
767            for inline in inlines {
768                match inline {
769                    Inline::Link(_, _, target) | Inline::Image(_, _, target) => {
770                        out.push(target.url.to_string());
771                    }
772                    _ => {}
773                }
774            }
775        }
776        let mut out = Vec::new();
777        for block in blocks {
778            if let Block::Para(inlines) = block {
779                collect(inlines, &mut out);
780            }
781        }
782        out
783    }
784
785    #[test]
786    fn implicit_header_references_resolve_a_shortcut_reference() {
787        let result = blocks_with_many("# Some Heading\n\n[Some Heading]\n", HEADER_REFS);
788        // The heading registers a definition keyed by its label, so the bare reference links to
789        // the heading's identifier.
790        assert_eq!(reference_targets(&result), ["#some-heading"]);
791    }
792
793    #[test]
794    fn implicit_header_references_match_full_collapsed_and_image_forms() {
795        let result = blocks_with_many(
796            "# Some Heading\n\n[text][Some Heading] [Some Heading][] ![Some Heading]\n",
797            HEADER_REFS,
798        );
799        // Full, collapsed, and image references all resolve to the same anchor.
800        assert_eq!(
801            reference_targets(&result),
802            ["#some-heading", "#some-heading", "#some-heading"]
803        );
804    }
805
806    #[test]
807    fn implicit_header_references_fold_case_and_collapse_whitespace() {
808        let result = blocks_with_many("# Some Heading\n\n[SOME    HEADING]\n", HEADER_REFS);
809        assert_eq!(reference_targets(&result), ["#some-heading"]);
810    }
811
812    #[test]
813    fn implicit_header_references_match_on_label_source_not_decoded_text() {
814        // The label is matched against the heading's literal source, so the marked-up form
815        // resolves while the same words without the emphasis markers do not.
816        let result = blocks_with_many(
817            "# Heading with *emphasis*\n\n[Heading with *emphasis*] [Heading with emphasis]\n",
818            HEADER_REFS,
819        );
820        assert_eq!(reference_targets(&result), ["#heading-with-emphasis"]);
821    }
822
823    #[test]
824    fn an_explicit_definition_outranks_an_implicit_header_reference() {
825        let result = blocks_with_many(
826            "# Linked Elsewhere\n\n[Linked Elsewhere]: https://example.com/x\n\n[Linked Elsewhere]\n",
827            HEADER_REFS,
828        );
829        // An explicit definition with the same label is registered first and keeps the link.
830        assert_eq!(reference_targets(&result), ["https://example.com/x"]);
831    }
832
833    #[test]
834    fn a_repeated_heading_is_reachable_only_through_the_first() {
835        let result = blocks_with_many("# Twice\n\n# Twice\n\n[Twice]\n", HEADER_REFS);
836        // The first heading keeps the bare identifier; the reference resolves to it, not the
837        // disambiguated second occurrence.
838        assert_eq!(reference_targets(&result), ["#twice"]);
839    }
840
841    #[test]
842    fn implicit_header_references_resolve_before_their_heading() {
843        let result = blocks_with_many("[Later Section]\n\n# Later Section\n", HEADER_REFS);
844        // A reference may precede the heading it points at.
845        assert_eq!(reference_targets(&result), ["#later-section"]);
846    }
847
848    #[test]
849    fn implicit_header_references_off_leaves_the_label_literal() {
850        let result = blocks_with(
851            "# Some Heading\n\n[Some Heading]\n",
852            Extension::GfmAutoIdentifiers,
853        );
854        assert!(reference_targets(&result).is_empty());
855        let [_, Block::Para(inlines)] = result.as_slice() else {
856            panic!("expected a heading then a paragraph, got {result:?}");
857        };
858        assert!(
859            inlines
860                .iter()
861                .any(|i| matches!(i, Inline::Str(s) if s.contains("[Some")))
862        );
863    }
864
865    #[test]
866    fn implicit_header_references_plain_heading_matches_an_ordinary_paragraph_parse() {
867        let result = blocks_with_many("# Simple title\n\nSimple title\n", HEADER_REFS);
868        let [
869            Block::Header(_, _, header_inlines),
870            Block::Para(para_inlines),
871        ] = result.as_slice()
872        else {
873            panic!("expected a heading then a paragraph, got {result:?}");
874        };
875        // The heading's content has no reference/citation/note trigger character, so its parse is
876        // reused from the pre-pass rather than reparsed; it still matches an ordinary parse of the
877        // same text.
878        assert_eq!(header_inlines, para_inlines);
879    }
880
881    fn cite_note_nums(blocks: &[Block]) -> Vec<i32> {
882        fn collect(inlines: &[Inline], out: &mut Vec<i32>) {
883            for inline in inlines {
884                if let Inline::Cite(citations, _) = inline {
885                    out.extend(citations.iter().map(|c| c.note_num));
886                }
887            }
888        }
889        let mut out = Vec::new();
890        for block in blocks {
891            match block {
892                Block::Header(_, _, inlines) | Block::Para(inlines) => collect(inlines, &mut out),
893                _ => {}
894            }
895        }
896        out
897    }
898
899    #[test]
900    fn implicit_header_references_heading_with_a_citation_is_not_cached() {
901        let result = blocks_with_many(
902            "# About @doe99\n\nSee @smith too.\n",
903            &[
904                Extension::GfmAutoIdentifiers,
905                Extension::ImplicitHeaderReferences,
906                Extension::Citations,
907            ],
908        );
909        // The heading's content contains `@`, so the pre-pass parse (built against a scratch
910        // citation count) is not reused; the body pass reparses it against the body's running
911        // count, so the heading's citation is numbered first and the paragraph's second.
912        assert_eq!(cite_note_nums(&result), [1, 2]);
913    }
914
915    #[test]
916    fn implicit_header_references_heading_with_a_footnote_resolves_in_the_body_pass() {
917        let result = blocks_with_many(
918            "# Title[^1]\n\n[^1]: the note body\n",
919            &[
920                Extension::GfmAutoIdentifiers,
921                Extension::ImplicitHeaderReferences,
922                Extension::Footnotes,
923            ],
924        );
925        let [Block::Header(_, _, inlines)] = result.as_slice() else {
926            panic!("expected a single heading, got {result:?}");
927        };
928        let note = inlines
929            .iter()
930            .find_map(|inline| match inline {
931                Inline::Note(blocks) => Some(blocks.clone()),
932                _ => None,
933            })
934            .expect("a note should be present");
935        // The heading's content contains `^`, so the pre-pass parse (built with no footnote bodies
936        // available) is not reused; the body pass sees the real footnote body.
937        assert!(matches!(note.as_slice(), [Block::Para(_)]));
938    }
939
940    #[test]
941    fn implicit_header_references_heading_referencing_a_later_heading_resolves_in_the_body_pass() {
942        let result = blocks_with_many("# See [Later Heading]\n\n# Later Heading\n", HEADER_REFS);
943        let [Block::Header(_, _, inlines), _] = result.as_slice() else {
944            panic!("expected two headings, got {result:?}");
945        };
946        // The heading's content contains `[`, so the pre-pass parse (built before the later
947        // heading had registered its own reference) is not reused; the body pass sees the full
948        // reference map and resolves the link.
949        assert!(matches!(inlines.as_slice(), [.., Inline::Link(..)]));
950    }
951
952    #[test]
953    fn implicit_header_references_duplicate_headings_both_resolve_and_get_disambiguated_ids() {
954        let result = blocks_with_many("# Dup\n\n# Dup\n", HEADER_REFS);
955        assert_eq!(header_ids(&result), ["dup", "dup-1"]);
956        let [Block::Header(_, _, first), Block::Header(_, _, second)] = result.as_slice() else {
957            panic!("expected two headings, got {result:?}");
958        };
959        // Both occurrences pop their own queued parse and resolve identically.
960        assert_eq!(first, second);
961    }
962
963    const LINE_BLOCKS: &[Extension] = &[Extension::LineBlocks];
964    const LINE_BLOCKS_TABLES: &[Extension] = &[Extension::LineBlocks, Extension::PipeTables];
965
966    /// Plain-text rendering of one inline run, enough to assert a line block's entries.
967    fn flatten_inlines(inlines: &[Inline]) -> String {
968        let mut out = String::new();
969        for inline in inlines {
970            match inline {
971                Inline::Str(text) | Inline::Code(_, text) => out.push_str(text),
972                Inline::Space | Inline::SoftBreak | Inline::LineBreak => out.push(' '),
973                Inline::Emph(children)
974                | Inline::Strong(children)
975                | Inline::Link(_, children, _) => out.push_str(&flatten_inlines(children)),
976                _ => {}
977            }
978        }
979        out
980    }
981
982    /// The flattened text of every entry across all line blocks in a document.
983    fn line_block_entries(blocks: &[Block]) -> Vec<String> {
984        let mut entries = Vec::new();
985        for block in blocks {
986            if let Block::LineBlock(lines) = block {
987                entries.extend(lines.iter().map(|line| flatten_inlines(line)));
988            }
989        }
990        entries
991    }
992
993    #[test]
994    fn line_block_keeps_each_marked_line_as_its_own_entry() {
995        let blocks = blocks_with_many("| Line one\n| Line two\n", LINE_BLOCKS);
996        assert!(matches!(blocks.as_slice(), [Block::LineBlock(_)]));
997        assert_eq!(line_block_entries(&blocks), ["Line one", "Line two"]);
998    }
999
1000    #[test]
1001    fn line_block_preserves_leading_spaces_as_non_breaking() {
1002        let blocks = blocks_with_many("|   indented\n", LINE_BLOCKS);
1003        assert_eq!(line_block_entries(&blocks), ["\u{a0}\u{a0}indented"]);
1004    }
1005
1006    #[test]
1007    fn line_block_bar_alone_is_an_empty_entry() {
1008        let blocks = blocks_with_many("|\n| after\n", LINE_BLOCKS);
1009        assert_eq!(line_block_entries(&blocks), ["", "after"]);
1010    }
1011
1012    #[test]
1013    fn line_block_folds_an_indented_continuation_into_the_entry_above() {
1014        let blocks = blocks_with_many("| first part\n  second part\n", LINE_BLOCKS);
1015        assert_eq!(line_block_entries(&blocks), ["first part second part"]);
1016    }
1017
1018    #[test]
1019    fn line_block_collapses_internal_runs_and_drops_trailing_space() {
1020        let blocks = blocks_with_many("| a    b    c   \n", LINE_BLOCKS);
1021        assert_eq!(line_block_entries(&blocks), ["a b c"]);
1022    }
1023
1024    #[test]
1025    fn line_block_all_space_entry_collapses_to_empty() {
1026        let blocks = blocks_with_many("|    \n| x\n", LINE_BLOCKS);
1027        assert_eq!(line_block_entries(&blocks), ["", "x"]);
1028    }
1029
1030    #[test]
1031    fn a_bar_without_a_following_space_is_not_a_line_block() {
1032        let blocks = blocks_with_many("|nospace\n", LINE_BLOCKS);
1033        assert!(matches!(blocks.as_slice(), [Block::Para(_)]));
1034    }
1035
1036    #[test]
1037    fn a_line_block_does_not_interrupt_a_paragraph() {
1038        let blocks = blocks_with_many("ordinary text\n| still the paragraph\n", LINE_BLOCKS);
1039        assert!(matches!(blocks.as_slice(), [Block::Para(_)]));
1040        assert!(line_block_entries(&blocks).is_empty());
1041    }
1042
1043    #[test]
1044    fn a_blank_line_ends_a_line_block() {
1045        let blocks = blocks_with_many("| a\n\nplain\n", LINE_BLOCKS);
1046        assert!(matches!(
1047            blocks.as_slice(),
1048            [Block::LineBlock(_), Block::Para(_)]
1049        ));
1050    }
1051
1052    #[test]
1053    fn a_whitespace_only_line_continues_a_non_empty_entry() {
1054        // Unlike a wholly blank line, a line of only spaces folds into the entry above it (adding
1055        // nothing), so the block stays open and the next bar line is a second entry.
1056        let blocks = blocks_with_many("| a\n  \n| b\n", LINE_BLOCKS);
1057        assert!(matches!(blocks.as_slice(), [Block::LineBlock(_)]));
1058        assert_eq!(line_block_entries(&blocks), ["a", "b"]);
1059    }
1060
1061    #[test]
1062    fn a_continuation_under_an_empty_entry_ends_the_block() {
1063        // With no content to extend, a whitespace-led line closes the block and is reparsed.
1064        let blocks = blocks_with_many("| \n |\n", LINE_BLOCKS);
1065        assert!(matches!(
1066            blocks.as_slice(),
1067            [Block::LineBlock(_), Block::Para(_)]
1068        ));
1069        assert_eq!(line_block_entries(&blocks), [""]);
1070    }
1071
1072    #[test]
1073    fn a_delimiter_row_under_a_single_bar_line_makes_a_table() {
1074        let blocks = blocks_with_many("| a | b |\n|---|---|\n| 1 | 2 |\n", LINE_BLOCKS_TABLES);
1075        assert!(matches!(blocks.as_slice(), [Block::Table(_)]));
1076        assert!(line_block_entries(&blocks).is_empty());
1077    }
1078
1079    #[test]
1080    fn a_bar_line_with_no_delimiter_stays_a_line_block() {
1081        let blocks = blocks_with_many("| a | b |\nplain\n", LINE_BLOCKS_TABLES);
1082        assert!(matches!(
1083            blocks.as_slice(),
1084            [Block::LineBlock(_), Block::Para(_)]
1085        ));
1086    }
1087
1088    #[test]
1089    fn with_the_extension_off_a_bar_line_is_literal_paragraph_text() {
1090        let blocks = blocks("| a\n");
1091        let [Block::Para(inlines)] = blocks.as_slice() else {
1092            panic!("expected a single paragraph, got {blocks:?}");
1093        };
1094        assert!(matches!(inlines.first(), Some(Inline::Str(text)) if text == "|"));
1095    }
1096
1097    /// The (term-text, definitions) pairs of the first definition list in a document.
1098    fn definition_items(blocks: &[Block]) -> Vec<(String, Vec<Vec<Block>>)> {
1099        for block in blocks {
1100            if let Block::DefinitionList(items) = block {
1101                return items
1102                    .iter()
1103                    .map(|(term, defs)| (flatten_inlines(term), defs.clone()))
1104                    .collect();
1105            }
1106        }
1107        Vec::new()
1108    }
1109
1110    #[test]
1111    fn a_term_above_a_colon_line_becomes_one_tight_definition() {
1112        let items = definition_items(&blocks_with("apple\n: red\n", Extension::DefinitionLists));
1113        let [(term, defs)] = items.as_slice() else {
1114            panic!("expected one item, got {items:?}");
1115        };
1116        assert_eq!(term, "apple");
1117        assert!(matches!(defs.as_slice(), [one] if matches!(one.as_slice(), [Block::Plain(_)])));
1118    }
1119
1120    #[test]
1121    fn a_term_carries_several_definitions_under_colon_or_tilde_markers() {
1122        let items = definition_items(&blocks_with(
1123            "water\n: clear\n~ vital\n",
1124            Extension::DefinitionLists,
1125        ));
1126        let [(term, defs)] = items.as_slice() else {
1127            panic!("expected one item, got {items:?}");
1128        };
1129        assert_eq!(term, "water");
1130        assert_eq!(defs.len(), 2);
1131    }
1132
1133    #[test]
1134    fn consecutive_terms_join_one_list() {
1135        let items = definition_items(&blocks_with(
1136            "a\n: x\n\nb\n: y\n",
1137            Extension::DefinitionLists,
1138        ));
1139        let terms: Vec<&str> = items.iter().map(|(term, _)| term.as_str()).collect();
1140        assert_eq!(terms, ["a", "b"]);
1141    }
1142
1143    #[test]
1144    fn a_blank_line_before_the_marker_makes_the_definition_loose() {
1145        let items = definition_items(&blocks_with(
1146            "planet\n\n: orbits\n",
1147            Extension::DefinitionLists,
1148        ));
1149        let [(_, defs)] = items.as_slice() else {
1150            panic!("expected one item, got {items:?}");
1151        };
1152        assert!(matches!(defs.as_slice(), [one] if matches!(one.as_slice(), [Block::Para(_)])));
1153    }
1154
1155    #[test]
1156    fn an_indented_continuation_keeps_a_second_block_in_the_definition() {
1157        let items = definition_items(&blocks_with(
1158            "essay\n: first.\n\n  second.\n",
1159            Extension::DefinitionLists,
1160        ));
1161        let [(_, defs)] = items.as_slice() else {
1162            panic!("expected one item, got {items:?}");
1163        };
1164        let [blocks] = defs.as_slice() else {
1165            panic!("expected one definition, got {defs:?}");
1166        };
1167        assert_eq!(blocks.len(), 2);
1168    }
1169
1170    #[test]
1171    fn a_definition_holds_a_nested_block_when_indented_to_the_content_column() {
1172        let items = definition_items(&blocks_with(
1173            "shapes\n: items:\n\n    - circle\n    - square\n",
1174            Extension::DefinitionLists,
1175        ));
1176        let [(_, defs)] = items.as_slice() else {
1177            panic!("expected one item, got {items:?}");
1178        };
1179        let [blocks] = defs.as_slice() else {
1180            panic!("expected one definition, got {defs:?}");
1181        };
1182        assert!(matches!(
1183            blocks.as_slice(),
1184            [Block::Plain(_), Block::BulletList(_)]
1185        ));
1186    }
1187
1188    #[test]
1189    fn lines_above_the_marker_fold_into_one_term() {
1190        let items = definition_items(&blocks_with(
1191            "one\ntwo\n: both\n",
1192            Extension::DefinitionLists,
1193        ));
1194        let [(term, _)] = items.as_slice() else {
1195            panic!("expected one item, got {items:?}");
1196        };
1197        assert_eq!(term, "one two");
1198    }
1199
1200    #[test]
1201    fn an_unindented_line_lazily_continues_the_definition() {
1202        let items = definition_items(&blocks_with(
1203            "apple\n: red\norange\n",
1204            Extension::DefinitionLists,
1205        ));
1206        let [(_, defs)] = items.as_slice() else {
1207            panic!("expected one item, got {items:?}");
1208        };
1209        let [blocks] = defs.as_slice() else {
1210            panic!("expected one definition, got {defs:?}");
1211        };
1212        assert!(matches!(blocks.as_slice(), [Block::Plain(_)]));
1213    }
1214
1215    #[test]
1216    fn a_colon_without_a_following_space_is_not_a_marker() {
1217        let blocks = blocks_with("term\n:def\n", Extension::DefinitionLists);
1218        assert!(matches!(blocks.as_slice(), [Block::Para(_)]));
1219    }
1220
1221    #[test]
1222    fn an_empty_definition_yields_an_empty_block_list() {
1223        let blocks = blocks_with("T\n:\nmore\n", Extension::DefinitionLists);
1224        let items = definition_items(&blocks);
1225        let [(term, defs)] = items.as_slice() else {
1226            panic!("expected one item, got {items:?}");
1227        };
1228        assert_eq!(term, "T");
1229        assert!(matches!(defs.as_slice(), [one] if one.is_empty()));
1230        // The unindented line ends the list and stands as its own paragraph.
1231        assert!(matches!(
1232            blocks.as_slice(),
1233            [Block::DefinitionList(_), Block::Para(_)]
1234        ));
1235    }
1236
1237    #[test]
1238    fn an_empty_definition_absorbs_a_deferred_indented_block() {
1239        // A blank line does not close an as-yet-empty definition; the indented line that follows
1240        // becomes its body.
1241        let items = definition_items(&blocks_with(
1242            "T\n:\n\n    code\n",
1243            Extension::DefinitionLists,
1244        ));
1245        let [(_, defs)] = items.as_slice() else {
1246            panic!("expected one item, got {items:?}");
1247        };
1248        assert!(matches!(defs.as_slice(), [one] if matches!(one.as_slice(), [Block::Plain(_)])));
1249    }
1250
1251    #[test]
1252    fn with_the_extension_off_a_colon_line_is_literal_paragraph_text() {
1253        let blocks = blocks("apple\n: red\n");
1254        assert!(matches!(blocks.as_slice(), [Block::Para(_)]));
1255        assert!(definition_items(&blocks).is_empty());
1256    }
1257
1258    /// Each ordered list in `input` (parsed with fancy lists on) reduced to its
1259    /// `(start, style, delimiter, item count)`.
1260    fn ordered_lists(input: &str) -> Vec<(i32, ListNumberStyle, ListNumberDelim, usize)> {
1261        fn collect(
1262            blocks: &[Block],
1263            out: &mut Vec<(i32, ListNumberStyle, ListNumberDelim, usize)>,
1264        ) {
1265            for block in blocks {
1266                if let Block::OrderedList(attrs, items) = block {
1267                    out.push((attrs.start, attrs.style, attrs.delim, items.len()));
1268                    for item in items {
1269                        collect(item, out);
1270                    }
1271                }
1272            }
1273        }
1274        let mut out = Vec::new();
1275        collect(&blocks_with(input, Extension::FancyLists), &mut out);
1276        out
1277    }
1278
1279    #[test]
1280    fn lowercase_letters_form_an_alphabetic_list() {
1281        assert_eq!(
1282            ordered_lists("a. one\nb. two\nc. three\n"),
1283            [(1, ListNumberStyle::LowerAlpha, ListNumberDelim::Period, 3)]
1284        );
1285    }
1286
1287    #[test]
1288    fn an_alphabetic_list_starts_at_its_first_letter() {
1289        assert_eq!(
1290            ordered_lists("c. three\nd. four\n"),
1291            [(3, ListNumberStyle::LowerAlpha, ListNumberDelim::Period, 2)]
1292        );
1293    }
1294
1295    #[test]
1296    fn a_roman_run_is_a_roman_list() {
1297        assert_eq!(
1298            ordered_lists("i. one\nii. two\niii. three\niv. four\n"),
1299            [(1, ListNumberStyle::LowerRoman, ListNumberDelim::Period, 4)]
1300        );
1301    }
1302
1303    #[test]
1304    fn a_lone_i_opens_a_roman_list() {
1305        assert_eq!(
1306            ordered_lists("i. only\n"),
1307            [(1, ListNumberStyle::LowerRoman, ListNumberDelim::Period, 1)]
1308        );
1309    }
1310
1311    #[test]
1312    fn an_alphabetic_list_absorbs_a_following_i() {
1313        // `h. i. j.` is one alphabetic list: `i` continues it as the ninth letter rather than
1314        // restarting as a roman one.
1315        assert_eq!(
1316            ordered_lists("h. eight\ni. nine\nj. ten\n"),
1317            [(8, ListNumberStyle::LowerAlpha, ListNumberDelim::Period, 3)]
1318        );
1319    }
1320
1321    #[test]
1322    fn a_multi_letter_roman_does_not_continue_an_alphabetic_list() {
1323        assert_eq!(
1324            ordered_lists("a. one\nii. two\n"),
1325            [
1326                (1, ListNumberStyle::LowerAlpha, ListNumberDelim::Period, 1),
1327                (2, ListNumberStyle::LowerRoman, ListNumberDelim::Period, 1),
1328            ]
1329        );
1330    }
1331
1332    #[test]
1333    fn a_lone_i_after_a_list_reads_as_the_ninth_letter() {
1334        // Following another list, the ambiguous `i` resolves to the alphabetic reading.
1335        assert_eq!(
1336            ordered_lists("1. one\ni. two\n"),
1337            [
1338                (1, ListNumberStyle::Decimal, ListNumberDelim::Period, 1),
1339                (9, ListNumberStyle::LowerAlpha, ListNumberDelim::Period, 1),
1340            ]
1341        );
1342    }
1343
1344    #[test]
1345    fn parenthesized_and_single_paren_delimiters_are_distinguished() {
1346        assert_eq!(
1347            ordered_lists("(a) one\n"),
1348            [(
1349                1,
1350                ListNumberStyle::LowerAlpha,
1351                ListNumberDelim::TwoParens,
1352                1
1353            )]
1354        );
1355        assert_eq!(
1356            ordered_lists("a) one\n"),
1357            [(1, ListNumberStyle::LowerAlpha, ListNumberDelim::OneParen, 1)]
1358        );
1359    }
1360
1361    #[test]
1362    fn an_uppercase_letter_and_period_need_two_spaces() {
1363        // One space reads as an ordinary sentence; two spaces make it a list.
1364        assert!(matches!(
1365            blocks_with("B. Franklin\n", Extension::FancyLists).as_slice(),
1366            [Block::Para(_)]
1367        ));
1368        assert_eq!(
1369            ordered_lists("B.  item\n"),
1370            [(2, ListNumberStyle::UpperAlpha, ListNumberDelim::Period, 1)]
1371        );
1372    }
1373
1374    #[test]
1375    fn an_uppercase_letter_with_one_space_is_a_list_under_other_delimiters() {
1376        // The two-space rule guards only the period; a paren delimiter is unambiguous.
1377        assert_eq!(
1378            ordered_lists("B) item\n"),
1379            [(2, ListNumberStyle::UpperAlpha, ListNumberDelim::OneParen, 1)]
1380        );
1381    }
1382
1383    #[test]
1384    fn only_a_decimal_one_interrupts_a_paragraph() {
1385        assert!(matches!(
1386            blocks_with("text\na. item\n", Extension::FancyLists).as_slice(),
1387            [Block::Para(_)]
1388        ));
1389        assert!(matches!(
1390            blocks_with("text\n1. item\n", Extension::FancyLists).as_slice(),
1391            [Block::Para(_), Block::OrderedList(..)]
1392        ));
1393        assert!(matches!(
1394            blocks_with("text\n(1) item\n", Extension::FancyLists).as_slice(),
1395            [Block::Para(_), Block::OrderedList(..)]
1396        ));
1397    }
1398
1399    #[test]
1400    fn with_the_extension_off_a_letter_marker_is_paragraph_text() {
1401        assert!(matches!(blocks("a. one\n").as_slice(), [Block::Para(_)]));
1402    }
1403
1404    /// Every example list in `input` (parsed with example lists on) as (start, style, delim, item
1405    /// count), in document order, descendants included.
1406    fn example_lists(input: &str) -> Vec<(i32, ListNumberStyle, ListNumberDelim, usize)> {
1407        fn collect(
1408            blocks: &[Block],
1409            out: &mut Vec<(i32, ListNumberStyle, ListNumberDelim, usize)>,
1410        ) {
1411            for block in blocks {
1412                match block {
1413                    Block::OrderedList(attrs, items) => {
1414                        out.push((attrs.start, attrs.style, attrs.delim, items.len()));
1415                        for item in items {
1416                            collect(item, out);
1417                        }
1418                    }
1419                    Block::BulletList(items) => {
1420                        for item in items {
1421                            collect(item, out);
1422                        }
1423                    }
1424                    _ => {}
1425                }
1426            }
1427        }
1428        let mut out = Vec::new();
1429        collect(&blocks_with(input, Extension::ExampleLists), &mut out);
1430        out
1431    }
1432
1433    /// The flattened text of every top-level paragraph in `input` (example lists on), joined by a
1434    /// space — enough to observe how `@label` references resolve.
1435    fn example_text(input: &str) -> String {
1436        blocks_with(input, Extension::ExampleLists)
1437            .iter()
1438            .filter_map(|block| match block {
1439                Block::Para(inlines) => Some(flatten_inlines(inlines)),
1440                _ => None,
1441            })
1442            .collect::<Vec<_>>()
1443            .join(" ")
1444    }
1445
1446    #[test]
1447    fn the_three_example_markers_open_example_lists() {
1448        use ListNumberDelim::{OneParen, Period, TwoParens};
1449        use ListNumberStyle::Example;
1450        assert_eq!(
1451            example_lists("(@) one\n\n@. two\n\n@) three\n"),
1452            [
1453                (1, Example, TwoParens, 1),
1454                (2, Example, Period, 1),
1455                (3, Example, OneParen, 1),
1456            ]
1457        );
1458    }
1459
1460    #[test]
1461    fn a_reference_resolves_to_its_example_number() {
1462        assert_eq!(example_text("(@a) apple\n\nSee (@a).\n"), "See (1).");
1463    }
1464
1465    #[test]
1466    fn a_bare_reference_drops_the_parentheses() {
1467        assert_eq!(example_text("(@a) apple\n\nbare @a end\n"), "bare 1 end");
1468    }
1469
1470    #[test]
1471    fn the_counter_skips_ordinary_ordered_lists() {
1472        // A plain decimal list between two examples does not advance the example counter.
1473        assert_eq!(
1474            example_lists("(@a) x\n\n1. p\n2. q\n\n(@b) y\n"),
1475            [
1476                (1, ListNumberStyle::Example, ListNumberDelim::TwoParens, 1),
1477                (1, ListNumberStyle::Decimal, ListNumberDelim::Period, 2),
1478                (2, ListNumberStyle::Example, ListNumberDelim::TwoParens, 1),
1479            ]
1480        );
1481        assert_eq!(
1482            example_text("(@a) x\n\n1. p\n2. q\n\n(@b) y\n\nRefs (@a) and (@b)\n"),
1483            "Refs (1) and (2)"
1484        );
1485    }
1486
1487    #[test]
1488    fn a_repeated_label_reuses_its_number() {
1489        use ListNumberDelim::{OneParen, Period, TwoParens};
1490        use ListNumberStyle::Example;
1491        // The second `@a` neither takes a fresh number nor advances the counter, so the distinct
1492        // label `@b` is two, not three. Three delimiters keep the examples in separate lists.
1493        assert_eq!(
1494            example_lists("(@a) x\n\n@a. y\n\n@b) z\n"),
1495            [
1496                (1, Example, TwoParens, 1),
1497                (1, Example, Period, 1),
1498                (2, Example, OneParen, 1),
1499            ]
1500        );
1501        assert_eq!(
1502            example_text("(@a) x\n\n@a. y\n\n@b) z\n\nRef (@a) (@b)\n"),
1503            "Ref (1) (2)"
1504        );
1505    }
1506
1507    #[test]
1508    fn an_anonymous_example_advances_the_counter() {
1509        // The unreferenceable `(@)` takes number one, so the following labelled example is two.
1510        assert_eq!(
1511            example_lists("(@) x\n\n@a. y\n"),
1512            [
1513                (1, ListNumberStyle::Example, ListNumberDelim::TwoParens, 1),
1514                (2, ListNumberStyle::Example, ListNumberDelim::Period, 1),
1515            ]
1516        );
1517        assert_eq!(example_text("(@) x\n\n@a. y\n\nSee (@a)\n"), "See (2)");
1518    }
1519
1520    #[test]
1521    fn an_anonymous_reference_stays_literal() {
1522        assert_eq!(example_text("(@) x\n\nSee (@).\n"), "See (@).");
1523    }
1524
1525    #[test]
1526    fn an_undefined_reference_stays_literal() {
1527        assert_eq!(example_text("(@a) x\n\nSee (@b).\n"), "See (@b).");
1528    }
1529
1530    #[test]
1531    fn a_reference_resolves_within_emphasis_but_not_within_code() {
1532        // Emphasis content is parsed, so the reference resolves; a code span is verbatim.
1533        assert_eq!(example_text("(@a) x\n\n*em (@a)*\n"), "em (1)");
1534        assert_eq!(example_text("(@a) x\n\n`(@a)`\n"), "(@a)");
1535    }
1536
1537    #[test]
1538    fn the_counter_spans_nested_example_lists() {
1539        // Reading order crosses container boundaries: the example nested in a bullet is two.
1540        assert_eq!(
1541            example_text("(@a) x\n\n- bullet\n\n    (@b) nested\n\nRefs (@a) and (@b)\n"),
1542            "Refs (1) and (2)"
1543        );
1544    }
1545
1546    #[test]
1547    fn with_the_extension_off_an_example_marker_is_paragraph_text() {
1548        assert!(matches!(blocks("(@) one\n").as_slice(), [Block::Para(_)]));
1549        assert!(matches!(blocks("@a. one\n").as_slice(), [Block::Para(_)]));
1550    }
1551
1552    fn document(input: &str, exts: &[Extension]) -> carta_ast::Document {
1553        let mut options = ReaderOptions::default();
1554        options.extensions = Extensions::from_list(exts);
1555        CommonmarkReader
1556            .read(input, &options)
1557            .expect("reader should not fail")
1558    }
1559
1560    /// Parse with greedy paragraphs enabled (the markdown dialect) and the given extensions.
1561    fn greedy_blocks(input: &str, exts: &[Extension]) -> Vec<Block> {
1562        let mut options = ReaderOptions::default();
1563        options.extensions = Extensions::from_list(exts);
1564        options.greedy_paragraphs = true;
1565        CommonmarkReader
1566            .read(input, &options)
1567            .expect("reader should not fail")
1568            .blocks
1569    }
1570
1571    #[test]
1572    fn a_greedy_paragraph_folds_a_following_block_quote_heading_and_break() {
1573        // A block-quote, heading, or thematic-break line right under a paragraph continues it. The
1574        // block-quote and heading folds are gated on the `blank_before_*` toggles the markdown
1575        // dialect carries; the thematic break folds on the plain greedy flag.
1576        let toggles = &[
1577            Extension::BlankBeforeBlockquote,
1578            Extension::BlankBeforeHeader,
1579        ];
1580        for line in ["> quote", "# heading", "***"] {
1581            let input = format!("text\n{line}\n");
1582            assert!(
1583                matches!(greedy_blocks(&input, toggles).as_slice(), [Block::Para(_)]),
1584                "expected one paragraph for {input:?}"
1585            );
1586        }
1587    }
1588
1589    #[test]
1590    fn a_heading_or_block_quote_interrupts_without_its_blank_before_toggle() {
1591        // Without `blank_before_header` / `blank_before_blockquote`, the opener interrupts an open
1592        // paragraph as in strict CommonMark, even where paragraphs are otherwise greedy.
1593        assert!(matches!(
1594            greedy_blocks("text\n# heading\n", &[]).as_slice(),
1595            [Block::Para(_), Block::Header(_, _, _)]
1596        ));
1597        assert!(matches!(
1598            greedy_blocks("text\n> quote\n", &[]).as_slice(),
1599            [Block::Para(_), Block::BlockQuote(_)]
1600        ));
1601        // The thematic break is not toggle-gated, so it still folds into the greedy paragraph.
1602        assert!(matches!(
1603            greedy_blocks("text\n***\n", &[]).as_slice(),
1604            [Block::Para(_)]
1605        ));
1606    }
1607
1608    #[test]
1609    fn a_greedy_paragraph_is_not_interrupted_by_a_list_marker() {
1610        // At the top level a fresh list cannot interrupt a paragraph; the marker reads as text.
1611        assert!(matches!(
1612            greedy_blocks("text\n- item\n", &[]).as_slice(),
1613            [Block::Para(_)]
1614        ));
1615    }
1616
1617    #[test]
1618    fn lists_without_preceding_blankline_lets_a_fresh_list_interrupt_a_paragraph() {
1619        let ext = &[Extension::ListsWithoutPrecedingBlankline];
1620        // A bullet and a decimal marker both open a list directly under the paragraph.
1621        assert!(matches!(
1622            greedy_blocks("text\n- item\n", ext).as_slice(),
1623            [Block::Para(_), Block::BulletList(_)]
1624        ));
1625        assert!(matches!(
1626            greedy_blocks("text\n2. item\n", ext).as_slice(),
1627            [Block::Para(_), Block::OrderedList(_, _)]
1628        ));
1629    }
1630
1631    #[test]
1632    fn a_list_shaped_line_ends_a_paragraph_even_when_no_list_opens() {
1633        // With no enabled enumerator style for these shapes, the line still ends the paragraph and
1634        // becomes a fresh paragraph of its own rather than folding in.
1635        let ext = &[Extension::ListsWithoutPrecedingBlankline];
1636        for line in ["(5) item", "ii. item", "a) item"] {
1637            let input = format!("text\n{line}\n");
1638            assert!(
1639                matches!(
1640                    greedy_blocks(&input, ext).as_slice(),
1641                    [Block::Para(_), Block::Para(_)]
1642                ),
1643                "expected two paragraphs for {input:?}"
1644            );
1645        }
1646        // With `space_in_atx_header` off, a hash run glued to a marker opens a heading rather than
1647        // continuing the paragraph.
1648        assert!(
1649            matches!(
1650                greedy_blocks("text\n#) item\n", ext).as_slice(),
1651                [Block::Para(_), Block::Header(1, _, _)]
1652            ),
1653            "expected a paragraph then a heading for a glued hash marker"
1654        );
1655    }
1656
1657    #[test]
1658    fn definition_and_example_markers_end_a_greedy_paragraph() {
1659        // Under `lists_without_preceding_blankline`, a definition marker (`:`/`~`) with definition
1660        // lists off, and an example marker (`(@)`, `(@label)`) with example lists off, each end a
1661        // greedy paragraph and start a fresh one rather than folding in — even though no list opens.
1662        let ext = &[Extension::ListsWithoutPrecedingBlankline];
1663        for line in [": def", "~ def", "(@) item", "(@label) item"] {
1664            let input = format!("text\n{line}\n");
1665            assert!(
1666                matches!(
1667                    greedy_blocks(&input, ext).as_slice(),
1668                    [Block::Para(_), Block::Para(_)]
1669                ),
1670                "expected two paragraphs for {input:?}"
1671            );
1672        }
1673    }
1674
1675    #[test]
1676    fn a_definition_marker_opens_a_list_when_definition_lists_are_on() {
1677        // With definition lists enabled the same marker instead turns the paragraph into a term and
1678        // opens a definition list, so it does not split into two plain paragraphs.
1679        let ext = &[
1680            Extension::ListsWithoutPrecedingBlankline,
1681            Extension::DefinitionLists,
1682        ];
1683        assert!(matches!(
1684            greedy_blocks("text\n: def\n", ext).as_slice(),
1685            [Block::DefinitionList(_)]
1686        ));
1687    }
1688
1689    #[test]
1690    fn a_decimal_marker_closed_by_one_paren_stays_prose() {
1691        // `2)` is too easily ordinary prose, so it neither opens a list nor ends the paragraph; the
1692        // two lines fold into a single paragraph.
1693        let ext = &[Extension::ListsWithoutPrecedingBlankline];
1694        assert!(matches!(
1695            greedy_blocks("text\n2) still prose\n", ext).as_slice(),
1696            [Block::Para(_)]
1697        ));
1698    }
1699
1700    #[test]
1701    fn a_greedy_paragraph_folds_a_fenced_div_and_footnote_definition() {
1702        assert!(matches!(
1703            greedy_blocks("text\n::: note\nx\n:::\n", &[Extension::FencedDivs]).as_slice(),
1704            [Block::Para(_)]
1705        ));
1706        assert!(matches!(
1707            greedy_blocks("text\n[^1]: a note\n", &[Extension::Footnotes]).as_slice(),
1708            [Block::Para(_)]
1709        ));
1710    }
1711
1712    #[test]
1713    fn a_definition_marker_ends_an_open_footnote_definition() {
1714        // A footnote-definition marker folds into an ordinary paragraph, but when the paragraph it
1715        // would continue is itself a definition's body, the marker ends that definition and opens a
1716        // new one — so consecutive definitions, and a marker after a definition's continuation line,
1717        // stay separate rather than being swallowed.
1718        let blocks = greedy_blocks(
1719            "x[^1] y[^2]\n\n[^1]: one\n[^2]: two\n",
1720            &[Extension::Footnotes],
1721        );
1722        let notes: Vec<_> = blocks
1723            .iter()
1724            .flat_map(|block| match block {
1725                Block::Para(inlines) => inlines.clone(),
1726                _ => Vec::new(),
1727            })
1728            .filter(|inline| matches!(inline, Inline::Note(_)))
1729            .collect();
1730        assert_eq!(notes.len(), 2, "each definition resolves to its own note");
1731        for note in &notes {
1732            let Inline::Note(body) = note else { continue };
1733            let Some(Block::Para(para)) = body.first() else {
1734                panic!("a note holds a single-line paragraph");
1735            };
1736            assert_eq!(para.len(), 1, "no following definition is swallowed in");
1737        }
1738    }
1739
1740    #[test]
1741    fn a_closed_fenced_code_block_ends_a_greedy_paragraph() {
1742        // A backtick fence ends a greedy paragraph only once its character is enabled and it is
1743        // closed; the block then opens as its own sibling.
1744        assert!(matches!(
1745            greedy_blocks("text\n```\ncode\n```\n", &[Extension::BacktickCodeBlocks]).as_slice(),
1746            [Block::Para(_), Block::CodeBlock(_, _)]
1747        ));
1748    }
1749
1750    #[test]
1751    fn a_fence_without_its_character_enabled_folds_into_the_paragraph() {
1752        // With no `backtick_code_blocks`, the fence names no code block; the run of lines up to its
1753        // close stays paragraph text, where the matching backtick runs read as an inline code span.
1754        assert!(matches!(
1755            greedy_blocks("text\n```\ncode\n```\n", &[]).as_slice(),
1756            [Block::Para(_)]
1757        ));
1758    }
1759
1760    #[test]
1761    fn an_unclosed_fence_folds_into_the_paragraph() {
1762        // A fence with its character enabled but no closing fence opens nothing: it runs to the end
1763        // of its container, so the dialect keeps its lines as paragraph text instead.
1764        assert!(matches!(
1765            greedy_blocks("text\n```\ncode\n", &[Extension::BacktickCodeBlocks]).as_slice(),
1766            [Block::Para(_)]
1767        ));
1768    }
1769
1770    #[test]
1771    fn a_blank_line_lets_a_block_open_after_a_greedy_paragraph() {
1772        assert!(matches!(
1773            greedy_blocks("text\n\n# heading\n", &[]).as_slice(),
1774            [Block::Para(_), Block::Header(_, _, _)]
1775        ));
1776        assert!(matches!(
1777            greedy_blocks("text\n\n- item\n", &[]).as_slice(),
1778            [Block::Para(_), Block::BulletList(_)]
1779        ));
1780    }
1781
1782    #[test]
1783    fn sibling_list_items_are_not_folded_into_each_other() {
1784        // Greediness suppresses only a fresh list interrupting a paragraph, never the markers that
1785        // continue an open list.
1786        let blocks = greedy_blocks("- a\n- b\n", &[]);
1787        let [Block::BulletList(items)] = blocks.as_slice() else {
1788            panic!("expected a bullet list");
1789        };
1790        assert_eq!(items.len(), 2);
1791    }
1792
1793    #[test]
1794    fn a_sublist_opens_under_an_item_regardless_of_its_start_number() {
1795        // An indented ordered marker opens a sublist even when it does not start at one.
1796        let blocks = greedy_blocks("1. a\n   3. b\n", &[Extension::FancyLists]);
1797        let [Block::OrderedList(_, items)] = blocks.as_slice() else {
1798            panic!("expected an ordered list");
1799        };
1800        let [first] = items.as_slice() else {
1801            panic!("expected one outer item");
1802        };
1803        assert!(
1804            first
1805                .iter()
1806                .any(|block| matches!(block, Block::OrderedList(_, _))),
1807            "the item should contain a nested ordered list"
1808        );
1809    }
1810
1811    #[test]
1812    fn a_yaml_metadata_block_populates_meta_and_is_removed_from_the_body() {
1813        use carta_ast::MetaValue;
1814        let doc = document(
1815            "---\ntitle: A Note\nflag: true\nempty: ~\nrevision: 007\n---\n\nBody.\n",
1816            &[Extension::YamlMetadataBlock],
1817        );
1818        assert!(matches!(
1819            doc.meta.get("title"),
1820            Some(MetaValue::MetaInlines(_))
1821        ));
1822        assert_eq!(doc.meta.get("flag"), Some(&MetaValue::MetaBool(true)));
1823        assert_eq!(
1824            doc.meta.get("empty"),
1825            Some(&MetaValue::MetaString(carta_ast::Text::default()))
1826        );
1827        // An unquoted numeric scalar is canonicalized before it is parsed as inline markdown.
1828        assert_eq!(
1829            doc.meta.get("revision"),
1830            Some(&MetaValue::MetaInlines(vec![Inline::Str(
1831                "7".to_owned().into()
1832            )]))
1833        );
1834        assert!(matches!(doc.blocks.as_slice(), [Block::Para(_)]));
1835    }
1836
1837    #[test]
1838    fn a_yaml_block_without_a_closing_fence_is_not_metadata() {
1839        let doc = document(
1840            "---\ntitle: A Note\n\nBody.\n",
1841            &[Extension::YamlMetadataBlock],
1842        );
1843        assert!(doc.meta.is_empty());
1844    }
1845
1846    #[test]
1847    fn yaml_metadata_is_inert_without_the_extension() {
1848        let doc = document("---\nk: v\n---\n\nBody.\n", &[]);
1849        assert!(doc.meta.is_empty());
1850    }
1851
1852    #[test]
1853    fn a_title_block_sets_title_author_and_date() {
1854        use carta_ast::MetaValue;
1855        let doc = document(
1856            "% A Note\n% Ada; Grace\n% 2026\n\nBody.\n",
1857            &[Extension::PandocTitleBlock],
1858        );
1859        assert!(matches!(
1860            doc.meta.get("title"),
1861            Some(MetaValue::MetaInlines(_))
1862        ));
1863        match doc.meta.get("author") {
1864            Some(MetaValue::MetaList(authors)) => assert_eq!(authors.len(), 2),
1865            other => panic!("expected two authors, got {other:?}"),
1866        }
1867        assert!(matches!(
1868            doc.meta.get("date"),
1869            Some(MetaValue::MetaInlines(_))
1870        ));
1871        assert!(matches!(doc.blocks.as_slice(), [Block::Para(_)]));
1872    }
1873
1874    #[test]
1875    fn malformed_yaml_metadata_is_an_error() {
1876        let mut options = ReaderOptions::default();
1877        options.extensions = Extensions::from_list(&[Extension::YamlMetadataBlock]);
1878        let error = CommonmarkReader
1879            .read("---\nx: [\n---\n\nBody.\n", &options)
1880            .expect_err("malformed metadata should fail");
1881        assert!(matches!(error, carta_core::Error::InvalidMetadata(_)));
1882    }
1883
1884    /// The inline caption of the first block, or `None` when that block is not a table or carries no
1885    /// caption.
1886    fn caption_inlines(blocks: &[Block]) -> Option<&[Inline]> {
1887        let Block::Table(table) = blocks.first()? else {
1888            return None;
1889        };
1890        match table.caption.long.as_slice() {
1891            [Block::Plain(inlines)] => Some(inlines),
1892            _ => None,
1893        }
1894    }
1895
1896    #[test]
1897    fn a_pipe_table_takes_a_below_caption() {
1898        let doc = document(
1899            "| a | b |\n|---|---|\n| 1 | 2 |\n\nTable: A caption.\n",
1900            &[Extension::PipeTables, Extension::TableCaptions],
1901        );
1902        assert!(matches!(doc.blocks.as_slice(), [Block::Table(_)]));
1903        let inlines = caption_inlines(&doc.blocks).expect("captioned table");
1904        assert_eq!(inlines.first(), Some(&Inline::Str("A".to_owned().into())));
1905    }
1906
1907    #[test]
1908    fn an_indented_simple_table_header_aligns_against_its_own_column() {
1909        // The header sits two columns in from the ruling. Within each column the dashes are flush
1910        // with the header text on the right and reach past it on the left, so the columns are right-
1911        // or center-aligned — not the left alignment a header read at the ruling's margin would give.
1912        let doc = read_markdown(
1913            "  Right     Left     Center\n-------   ------   ----------\n     12     12        12\n",
1914            &[Extension::SimpleTables],
1915        );
1916        let aligns: Vec<Alignment> = match doc.blocks.as_slice() {
1917            [Block::Table(table)] => table
1918                .col_specs
1919                .iter()
1920                .map(|spec| spec.align.clone())
1921                .collect(),
1922            other => panic!("expected a single table, got {other:?}"),
1923        };
1924        assert_eq!(
1925            aligns,
1926            vec![
1927                Alignment::AlignRight,
1928                Alignment::AlignRight,
1929                Alignment::AlignCenter,
1930            ]
1931        );
1932    }
1933
1934    #[test]
1935    fn a_paragraph_interrupted_by_an_html_block_reads_tight() {
1936        // No blank line separates the paragraph from the div, so the div interrupts it as a block
1937        // and the paragraph reads tight — `Plain` rather than `Para`.
1938        let doc = read_markdown(
1939            "text before\n<div>\ninside\n</div>\n",
1940            &[Extension::MarkdownInHtmlBlocks, Extension::NativeDivs],
1941        );
1942        assert!(
1943            matches!(doc.blocks.as_slice(), [Block::Plain(_), Block::Div(..)]),
1944            "expected a tight paragraph then a div, got {:?}",
1945            doc.blocks
1946        );
1947
1948        // A blank line before the element leaves the paragraph loose, so it stays a full paragraph.
1949        let loose = read_markdown(
1950            "text before\n\n<div>\ninside\n</div>\n",
1951            &[Extension::MarkdownInHtmlBlocks, Extension::NativeDivs],
1952        );
1953        assert!(
1954            matches!(loose.blocks.as_slice(), [Block::Para(_), Block::Div(..)]),
1955            "expected a loose paragraph then a div, got {:?}",
1956            loose.blocks
1957        );
1958    }
1959
1960    #[test]
1961    fn a_simple_table_takes_an_above_caption() {
1962        let doc = document(
1963            "table: Above it.\n\nName   Age\n----   ---\nAnn    9\n",
1964            &[Extension::SimpleTables, Extension::TableCaptions],
1965        );
1966        assert!(matches!(doc.blocks.as_slice(), [Block::Table(_)]));
1967        assert!(caption_inlines(&doc.blocks).is_some());
1968    }
1969
1970    #[test]
1971    fn a_multiline_caption_folds_across_lines() {
1972        let doc = document(
1973            "| a | b |\n|---|---|\n| 1 | 2 |\n\nTable: First line\nsecond line.\n",
1974            &[Extension::PipeTables, Extension::TableCaptions],
1975        );
1976        let inlines = caption_inlines(&doc.blocks).expect("captioned table");
1977        assert!(inlines.contains(&Inline::SoftBreak));
1978    }
1979
1980    #[test]
1981    fn a_bare_colon_below_a_pipe_table_is_a_caption_not_a_definition() {
1982        // The `:` marker also opens a definition list; below a pipe table it is the table's caption,
1983        // so the table must survive rather than collapsing into a definition term.
1984        let doc = document(
1985            "| a | b |\n|---|---|\n| 1 | 2 |\n\n: A bare-colon caption.\n",
1986            &[
1987                Extension::PipeTables,
1988                Extension::TableCaptions,
1989                Extension::DefinitionLists,
1990            ],
1991        );
1992        assert!(matches!(doc.blocks.as_slice(), [Block::Table(_)]));
1993        assert!(caption_inlines(&doc.blocks).is_some());
1994    }
1995
1996    #[test]
1997    fn an_uppercase_table_marker_is_not_a_caption() {
1998        let doc = document(
1999            "| a | b |\n|---|---|\n| 1 | 2 |\n\nTABLE: not a caption\n",
2000            &[Extension::PipeTables, Extension::TableCaptions],
2001        );
2002        assert!(matches!(
2003            doc.blocks.as_slice(),
2004            [Block::Table(_), Block::Para(_)]
2005        ));
2006        assert!(caption_inlines(&doc.blocks).is_none());
2007    }
2008
2009    #[test]
2010    fn an_ordinary_definition_list_is_unaffected_by_caption_handling() {
2011        let doc = document(
2012            "Term\n\n: Its definition.\n",
2013            &[
2014                Extension::PipeTables,
2015                Extension::TableCaptions,
2016                Extension::DefinitionLists,
2017            ],
2018        );
2019        assert!(matches!(doc.blocks.as_slice(), [Block::DefinitionList(_)]));
2020    }
2021
2022    /// The inlines of a single-paragraph markdown-dialect document, for inline assertions.
2023    fn md_para(input: &str, exts: &[Extension]) -> Vec<Inline> {
2024        match read_markdown(input, exts).blocks.as_slice() {
2025            [Block::Para(inlines)] => inlines.clone(),
2026            other => panic!("expected a single paragraph, got {other:?}"),
2027        }
2028    }
2029
2030    // --- Gap 1: triple-emphasis nests strong on the outside, emphasis on the inside ---
2031
2032    #[test]
2033    fn markdown_nests_strong_outside_emph_for_a_triple_run() {
2034        let inlines = md_para("***both***\n", &[]);
2035        assert!(
2036            matches!(
2037                inlines.as_slice(),
2038                [Inline::Strong(inner)]
2039                    if matches!(inner.as_slice(), [Inline::Emph(text)]
2040                        if matches!(text.as_slice(), [Inline::Str(s)] if s == "both"))
2041            ),
2042            "expected Strong[Emph[both]], got {inlines:?}"
2043        );
2044    }
2045
2046    #[test]
2047    fn markdown_keeps_a_run_of_four_delimiters_literal() {
2048        // Four `*` open no emphasis in the markdown dialect; the run stays text.
2049        let inlines = md_para("****a****\n", &[]);
2050        assert!(
2051            matches!(inlines.as_slice(), [Inline::Str(s)] if s == "****a****"),
2052            "expected literal text, got {inlines:?}"
2053        );
2054    }
2055
2056    #[test]
2057    fn markdown_underscore_triple_run_also_nests_strong_outside() {
2058        let inlines = md_para("___both___\n", &[]);
2059        assert!(
2060            matches!(
2061                inlines.as_slice(),
2062                [Inline::Strong(inner)] if matches!(inner.as_slice(), [Inline::Emph(_)])
2063            ),
2064            "expected Strong[Emph[..]], got {inlines:?}"
2065        );
2066    }
2067
2068    // --- Gap 2: explicit angle autolinks carry a uri/email class ---
2069
2070    fn single_link(inlines: &[Inline]) -> Option<(&Attr, &Target)> {
2071        match inlines {
2072            [Inline::Link(attr, _, target)] => Some((attr, target)),
2073            _ => None,
2074        }
2075    }
2076
2077    #[test]
2078    fn markdown_uri_autolink_carries_the_uri_class() {
2079        let inlines = md_para("<http://example.com>\n", &[]);
2080        let (attr, target) = single_link(&inlines).expect("a single link");
2081        assert_eq!(attr.classes, vec!["uri".to_owned()]);
2082        assert_eq!(target.url, "http://example.com");
2083    }
2084
2085    #[test]
2086    fn markdown_email_autolink_carries_the_email_class_and_mailto_url() {
2087        let inlines = md_para("<a@b.com>\n", &[]);
2088        let (attr, target) = single_link(&inlines).expect("a single link");
2089        assert_eq!(attr.classes, vec!["email".to_owned()]);
2090        assert_eq!(target.url, "mailto:a@b.com");
2091    }
2092
2093    #[test]
2094    fn markdown_scheme_autolink_carries_the_uri_class() {
2095        for input in ["<ftp://x.y>\n", "<mailto:a@b.com>\n", "<tel:+123>\n"] {
2096            let inlines = md_para(input, &[]);
2097            let (attr, _) = single_link(&inlines).expect("a single link");
2098            assert_eq!(attr.classes, vec!["uri".to_owned()], "for {input:?}");
2099        }
2100    }
2101
2102    #[test]
2103    fn commonmark_angle_autolink_carries_no_class() {
2104        // In the strict CommonMark dialect the autolink class list is empty.
2105        let inlines = match blocks("<http://example.com>\n").as_slice() {
2106            [Block::Para(inlines)] => inlines.clone(),
2107            other => panic!("expected a paragraph, got {other:?}"),
2108        };
2109        let (attr, _) = single_link(&inlines).expect("a single link");
2110        assert!(
2111            attr.classes.is_empty(),
2112            "expected empty classes, got {attr:?}"
2113        );
2114    }
2115
2116    // --- Gap 5: balanced parentheses inside an inline link destination ---
2117
2118    #[test]
2119    fn markdown_link_destination_keeps_balanced_inner_parentheses() {
2120        let inlines = md_para("[c](/u (d))\n", &[]);
2121        let (_, target) = single_link(&inlines).expect("a single link");
2122        // The space is percent-encoded and the inner `(d)` is part of the destination.
2123        assert_eq!(target.url, "/u%20(d)");
2124        assert_eq!(target.title, "");
2125    }
2126
2127    #[test]
2128    fn markdown_link_destination_separates_a_trailing_title() {
2129        let inlines = md_para("[c](/u (d) \"t\")\n", &[]);
2130        let (_, target) = single_link(&inlines).expect("a single link");
2131        assert_eq!(target.url, "/u%20(d)");
2132        assert_eq!(target.title, "t");
2133    }
2134
2135    #[test]
2136    fn markdown_link_destination_keeps_nested_balanced_parentheses() {
2137        let inlines = md_para("[c](/u(a(b)c)d)\n", &[]);
2138        let (_, target) = single_link(&inlines).expect("a single link");
2139        assert_eq!(target.url, "/u(a(b)c)d");
2140    }
2141
2142    // --- Gap 6: tilde delimiter runs resolve to subscript or strikeout ---
2143
2144    #[test]
2145    fn markdown_single_tilde_pair_is_a_subscript() {
2146        let inlines = md_para("z ~x~\n", &[Extension::Subscript, Extension::Strikeout]);
2147        assert!(
2148            inlines.iter().any(|i| matches!(i, Inline::Subscript(_))),
2149            "expected a subscript, got {inlines:?}"
2150        );
2151    }
2152
2153    #[test]
2154    fn markdown_double_tilde_pair_is_a_strikeout() {
2155        let inlines = md_para("z ~~x~~\n", &[Extension::Subscript, Extension::Strikeout]);
2156        assert!(
2157            inlines.iter().any(|i| matches!(i, Inline::Strikeout(_))),
2158            "expected a strikeout, got {inlines:?}"
2159        );
2160    }
2161
2162    #[test]
2163    fn markdown_triple_tilde_run_collapses_to_a_single_subscript() {
2164        // The whole odd run is consumed into one subscript; no strikeout nests inside it.
2165        let inlines = md_para(
2166            "z ~~~triple~~~\n",
2167            &[Extension::Subscript, Extension::Strikeout],
2168        );
2169        let sub = inlines
2170            .iter()
2171            .find_map(|i| match i {
2172                Inline::Subscript(content) => Some(content.clone()),
2173                _ => None,
2174            })
2175            .expect("a subscript");
2176        assert!(
2177            matches!(sub.as_slice(), [Inline::Str(s)] if s == "triple"),
2178            "expected Subscript[triple], got {sub:?}"
2179        );
2180        assert!(
2181            !inlines.iter().any(|i| matches!(i, Inline::Strikeout(_))),
2182            "a triple-tilde run should not form a strikeout: {inlines:?}"
2183        );
2184    }
2185}