Skip to main content

carta_readers/
rst.rs

1//! reStructuredText reader.
2//!
3//! Parsing runs in two structural passes. The first pass scans the whole input for the explicit
4//! markup that defines document-global references — hyperlink targets, substitution definitions,
5//! footnotes, and citations — since a reference may resolve against a definition that appears later.
6//! The second pass walks the line structure block by block, building the document tree and resolving
7//! each reference against the collected definitions. Inline markup is parsed from the raw text of
8//! each leaf during the second pass.
9
10use std::collections::{BTreeMap, VecDeque};
11
12use carta_ast::{
13    Alignment, Attr, Block, Caption, Cell, ColSpec, ColWidth, Document, Format, Inline,
14    ListAttributes, ListNumberDelim, ListNumberStyle, MathType, QuoteType, Row, Table, TableBody,
15    TableFoot, TableHead, Target, Text,
16};
17use carta_core::{Extension, Extensions, Reader, ReaderOptions, Result};
18
19use crate::heading_ids::{IdRegistry, IdScheme};
20use crate::inline_text::trim_inline_ends;
21
22/// Parses reStructuredText into the document model.
23///
24/// `auto_identifiers` (on by default) derives a slug identifier for each section header; with it
25/// off, headers carry no identifier.
26#[derive(Debug, Default, Clone, Copy)]
27pub struct RstReader;
28
29impl Reader for RstReader {
30    fn read(&self, input: &str, options: &ReaderOptions) -> Result<Document> {
31        let lines = preprocess(input);
32        let defs = collect_definitions(&lines);
33        let mut parser = Parser {
34            defs: &defs,
35            ext: options.extensions,
36            heading_styles: Vec::new(),
37            ids: IdRegistry::default(),
38            auto_footnote: 0,
39            symbol_footnote: 0,
40            anonymous: 0,
41            custom_roles: BTreeMap::new(),
42            default_role: DEFAULT_ROLE.to_string(),
43            include_depth: 0,
44            active_substitutions: Vec::new(),
45            deferred: BTreeMap::new(),
46        };
47        let mut blocks = parser.blocks(&lines);
48        if let Some(div) = parser.citation_block() {
49            blocks.push(div);
50        }
51        parser.resolve_deferred(&mut blocks);
52        Ok(Document {
53            blocks,
54            ..Document::default()
55        })
56    }
57}
58
59// --- preprocessing -----------------------------------------------------------------------------
60
61const TAB_STOP: usize = 8;
62
63/// A reserved first-class marker on a `Div` left by an empty `class` directive, signaling that the
64/// directive's classes apply to the next sibling block. Carries a NUL so it cannot collide with a
65/// class name drawn from the input.
66const PENDING_CLASS: &str = "\u{0}pending-class";
67
68/// Prefix marking a link destination that names an unresolved reference rather than a concrete URL.
69/// A reference may point at a target or section that appears later in the document, so the link is
70/// emitted carrying this marker plus the normalized name and resolved in a final pass once every
71/// definition is known. The leading NUL keeps it from colliding with any real destination.
72const REF_SENTINEL: &str = "\u{0}ref\u{0}";
73
74/// Mark a normalized reference name as an unresolved link destination, to be filled in once every
75/// definition in the document has been seen. A name's destination cannot be known at the reference
76/// site because it may be defined later (a forward reference) or redefined (the last definition
77/// wins); the marker carries the name through tree construction so a final pass can resolve it.
78fn defer_reference(name: &str) -> String {
79    format!("{REF_SENTINEL}{}", normalize_name(name))
80}
81
82/// The target name an indirect destination points at, if any. An indirect target's destination is
83/// the name of another target written with a trailing underscore (`other_` or `` `other name`_ ``);
84/// the underscore, surrounding whitespace, and backtick quoting are stripped to recover the name. A
85/// doubled trailing underscore is an anonymous reference, not an indirect name, and yields `None`.
86fn indirect_referent(url: &str) -> Option<String> {
87    let referent = url.strip_suffix('_')?;
88    if referent.ends_with('_') {
89        return None;
90    }
91    Some(referent.trim().trim_matches('`').trim().to_string())
92}
93
94/// Percent-encode the characters a URL may not carry literally: whitespace and the delimiter set
95/// `<>|"{}[]^` plus the backtick. Each such character's UTF-8 bytes become `%XX` with uppercase
96/// hexadecimal digits; every other character passes through unchanged.
97fn escape_uri(url: &str) -> String {
98    let mut out = String::with_capacity(url.len());
99    for ch in url.chars() {
100        if ch.is_whitespace()
101            || matches!(
102                ch,
103                '<' | '>' | '|' | '"' | '{' | '}' | '[' | ']' | '^' | '`'
104            )
105        {
106            let mut buf = [0u8; 4];
107            for &byte in ch.encode_utf8(&mut buf).as_bytes() {
108                out.push('%');
109                out.push(hex_digit(byte >> 4));
110                out.push(hex_digit(byte & 0x0f));
111            }
112        } else {
113            out.push(ch);
114        }
115    }
116    out
117}
118
119/// The uppercase hexadecimal digit for a nibble (`0..=15`); values above `15` are not produced by
120/// the callers.
121fn hex_digit(nibble: u8) -> char {
122    match nibble {
123        0..=9 => (b'0' + nibble) as char,
124        _ => (b'A' + (nibble - 10)) as char,
125    }
126}
127
128/// Normalize line endings, expand tabs to spaces on an eight-column grid, and split into lines with
129/// trailing whitespace removed.
130fn preprocess(input: &str) -> Vec<String> {
131    input
132        .replace("\r\n", "\n")
133        .replace('\r', "\n")
134        .split('\n')
135        .map(|line| expand_tabs(line).trim_end().to_string())
136        .collect()
137}
138
139fn expand_tabs(line: &str) -> String {
140    let mut out = String::with_capacity(line.len());
141    let mut col = 0;
142    for ch in line.chars() {
143        if ch == '\t' {
144            let next = (col / TAB_STOP + 1) * TAB_STOP;
145            while col < next {
146                out.push(' ');
147                col += 1;
148            }
149        } else {
150            out.push(ch);
151            col += 1;
152        }
153    }
154    out
155}
156
157fn is_blank(line: &str) -> bool {
158    line.chars().all(char::is_whitespace)
159}
160
161fn indent_of(line: &str) -> usize {
162    line.chars().take_while(|c| *c == ' ').count()
163}
164
165fn line_at(lines: &[String], i: usize) -> &str {
166    lines.get(i).map_or("", String::as_str)
167}
168
169/// A reference name normalized for case-insensitive, whitespace-insensitive lookup.
170fn normalize_name(name: &str) -> String {
171    name.split_whitespace()
172        .collect::<Vec<_>>()
173        .join(" ")
174        .to_lowercase()
175}
176
177/// Drop `count` leading columns of spaces from a line, keeping any content that begins before the
178/// cut intact.
179fn dedent(line: &str, count: usize) -> String {
180    let mut skipped = 0;
181    for (idx, ch) in line.char_indices() {
182        if ch == ' ' && skipped < count {
183            skipped += 1;
184        } else {
185            return line.get(idx..).unwrap_or("").to_string();
186        }
187    }
188    String::new()
189}
190
191// --- adornments and markers --------------------------------------------------------------------
192
193const ADORNMENT_CHARS: &str = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~";
194
195/// The single repeated punctuation character of a section adornment or transition line, or `None`
196/// when the line is not a run of one such character.
197fn adornment_char(line: &str) -> Option<char> {
198    let trimmed = line.trim();
199    let mut chars = trimmed.chars();
200    let first = chars.next()?;
201    if !ADORNMENT_CHARS.contains(first) {
202        return None;
203    }
204    if chars.all(|c| c == first) {
205        Some(first)
206    } else {
207        None
208    }
209}
210
211const BULLETS: &str = "*+-\u{2022}\u{2023}\u{2043}";
212
213/// For a bullet list item, the column at which its content begins.
214fn bullet_content_col(line: &str) -> Option<usize> {
215    let mut chars = line.chars();
216    let marker = chars.next()?;
217    if !BULLETS.contains(marker) {
218        return None;
219    }
220    match chars.next() {
221        None => Some(1),
222        Some(' ') => Some(2 + chars.take_while(|c| *c == ' ').count()),
223        Some(_) => None,
224    }
225}
226
227fn roman_value(text: &str) -> Option<i32> {
228    let mut total = 0;
229    let mut prev = 0;
230    for ch in text.chars().rev() {
231        let value = match ch.to_ascii_lowercase() {
232            'i' => 1,
233            'v' => 5,
234            'x' => 10,
235            'l' => 50,
236            'c' => 100,
237            'd' => 500,
238            'm' => 1000,
239            _ => return None,
240        };
241        if value < prev {
242            total -= value;
243        } else {
244            total += value;
245            prev = value;
246        }
247    }
248    if total > 0 { Some(total) } else { None }
249}
250
251/// The parsed leading marker of an enumerated list item: its start value, numeral style, delimiter,
252/// and the column at which its content begins.
253fn enumerator(line: &str) -> Option<(i32, ListNumberStyle, ListNumberDelim, usize)> {
254    let bytes: Vec<char> = line.chars().collect();
255    let (two_parens, numeral_start) = match bytes.first() {
256        Some('(') => (true, 1),
257        _ => (false, 0),
258    };
259    let mut end = numeral_start;
260    while let Some(ch) = bytes.get(end) {
261        if ch.is_ascii_alphanumeric() || *ch == '#' {
262            end += 1;
263        } else {
264            break;
265        }
266    }
267    let numeral: String = bytes.get(numeral_start..end)?.iter().collect();
268    if numeral.is_empty() {
269        return None;
270    }
271    let (style, start) = classify_numeral(&numeral)?;
272    let delim = if two_parens {
273        if bytes.get(end) != Some(&')') {
274            return None;
275        }
276        end += 1;
277        ListNumberDelim::TwoParens
278    } else {
279        match bytes.get(end) {
280            Some('.') => {
281                end += 1;
282                ListNumberDelim::Period
283            }
284            Some(')') => {
285                end += 1;
286                ListNumberDelim::OneParen
287            }
288            _ => return None,
289        }
290    };
291    // An auto-numbered (`#`) enumerator carries no concrete style or delimiter.
292    let delim = if numeral == "#" {
293        ListNumberDelim::DefaultDelim
294    } else {
295        delim
296    };
297    // An enumerator must be followed by whitespace; a marker that ends the line is ordinary text.
298    match bytes.get(end) {
299        Some(' ') => {
300            let spaces = bytes
301                .get(end + 1..)?
302                .iter()
303                .take_while(|c| **c == ' ')
304                .count();
305            Some((start, style, delim, end + 1 + spaces))
306        }
307        _ => None,
308    }
309}
310
311fn classify_numeral(numeral: &str) -> Option<(ListNumberStyle, i32)> {
312    if numeral == "#" {
313        return Some((ListNumberStyle::DefaultStyle, 1));
314    }
315    if numeral.chars().all(|c| c.is_ascii_digit()) {
316        return numeral
317            .parse::<i32>()
318            .ok()
319            .map(|n| (ListNumberStyle::Decimal, n));
320    }
321    // A lone letter is ambiguous between alphabetic and Roman numbering; it defaults to alphabetic
322    // unless it is `i`/`I`, the only single letter taken as Roman. A multi-letter token that is a
323    // valid Roman numeral (`iv`, `xii`) is Roman.
324    let mut chars = numeral.chars();
325    let single = chars.next()?;
326    if chars.next().is_none() && single.is_ascii_alphabetic() && !matches!(single, 'i' | 'I') {
327        let ordinal = i32::from((single.to_ascii_lowercase() as u8) - b'a' + 1);
328        let style = if single.is_ascii_uppercase() {
329            ListNumberStyle::UpperAlpha
330        } else {
331            ListNumberStyle::LowerAlpha
332        };
333        return Some((style, ordinal));
334    }
335    if let Some(value) = roman_value(numeral) {
336        let style = if numeral.chars().all(|c| c.is_ascii_uppercase()) {
337            ListNumberStyle::UpperRoman
338        } else {
339            ListNumberStyle::LowerRoman
340        };
341        return Some((style, value));
342    }
343    None
344}
345
346/// Whether `ch` is one of the letters that form a Roman numeral.
347fn is_roman_letter(ch: char) -> bool {
348    matches!(
349        ch.to_ascii_lowercase(),
350        'i' | 'v' | 'x' | 'l' | 'c' | 'd' | 'm'
351    )
352}
353
354/// The leading enumerator numeral of `line` — the token before its delimiter — when `line` opens
355/// with an enumerator. Used to reinterpret an ambiguous single-letter enumerator in the context of
356/// an already-established list style.
357fn enum_numeral(line: &str) -> Option<String> {
358    let chars: Vec<char> = line.chars().collect();
359    let start = usize::from(chars.first() == Some(&'('));
360    let mut end = start;
361    while chars
362        .get(end)
363        .is_some_and(|c| c.is_ascii_alphanumeric() || *c == '#')
364    {
365        end += 1;
366    }
367    let numeral: String = chars.get(start..end)?.iter().collect();
368    if numeral.is_empty() {
369        None
370    } else {
371        Some(numeral)
372    }
373}
374
375/// Whether a single-letter `numeral` continues a list whose established `style` it does not match on
376/// its own: any letter (of the style's case) continues an alphabetic list; only a Roman-numeral
377/// letter continues a Roman list.
378fn letter_continues(numeral: &str, style: ListNumberStyle) -> bool {
379    let mut chars = numeral.chars();
380    let (Some(ch), None) = (chars.next(), chars.next()) else {
381        return false;
382    };
383    if !ch.is_ascii_alphabetic() {
384        return false;
385    }
386    let upper = ch.is_ascii_uppercase();
387    match style {
388        ListNumberStyle::UpperAlpha => upper,
389        ListNumberStyle::LowerAlpha => !upper,
390        ListNumberStyle::UpperRoman => upper && is_roman_letter(ch),
391        ListNumberStyle::LowerRoman => !upper && is_roman_letter(ch),
392        _ => false,
393    }
394}
395
396/// Whether the enumerator opening `line` can belong to a list whose first item established `style`
397/// and `delim`. An auto-numbered (`#`) item joins any list and vice versa; otherwise the delimiter
398/// must match and the style must match directly or by an ambiguous single letter adopting it.
399fn enum_compatible(line: &str, style: ListNumberStyle, delim: ListNumberDelim) -> bool {
400    let Some((_, s, d, _)) = enumerator(line) else {
401        return false;
402    };
403    let item_auto = s == ListNumberStyle::DefaultStyle && d == ListNumberDelim::DefaultDelim;
404    let list_auto =
405        style == ListNumberStyle::DefaultStyle && delim == ListNumberDelim::DefaultDelim;
406    let style_ok = style == s || enum_numeral(line).is_some_and(|n| letter_continues(&n, style));
407    item_auto || list_auto || (style_ok && delim == d)
408}
409
410/// Whether the enumerated-list item whose first line is `lines[idx]` (content column `col`) is a
411/// well-formed item rather than the opening of an ordinary wrapped paragraph. The line after the
412/// item's first line must be blank, indented into the item, or itself a matching sibling enumerator;
413/// an under-indented line of ordinary text means the construct is a paragraph, not a list.
414fn item_well_formed(
415    lines: &[String],
416    idx: usize,
417    col: usize,
418    style: ListNumberStyle,
419    delim: ListNumberDelim,
420) -> bool {
421    let next = line_at(lines, idx + 1);
422    if is_blank(next) || indent_of(next) >= col {
423        return true;
424    }
425    enum_compatible(next, style, delim)
426}
427
428/// A field marker `:name: value`: the field name and the column at which the value begins.
429fn field_marker(line: &str) -> Option<(String, usize)> {
430    let chars: Vec<char> = line.chars().collect();
431    if chars.first() != Some(&':') {
432        return None;
433    }
434    let mut idx = 1;
435    while let Some(ch) = chars.get(idx) {
436        if *ch == ':' && (chars.get(idx + 1).is_none() || chars.get(idx + 1) == Some(&' ')) {
437            let name: String = chars.get(1..idx)?.iter().collect();
438            if name.is_empty() {
439                return None;
440            }
441            let value_col = if chars.get(idx + 1).is_some() {
442                idx + 2
443            } else {
444                idx + 1
445            };
446            return Some((name, value_col));
447        }
448        if *ch == ':' && idx == 1 {
449            return None;
450        }
451        idx += 1;
452    }
453    None
454}
455
456/// An option-list marker: an option group (a comma-separated run of `-a`, `-fARG`, `-f ARG`,
457/// `--word`, `--word=ARG`, or `/S` options) that fully fills the line up to the first run of two
458/// or more spaces (or the end of line). Returns the option-group text and the column at which the
459/// description body begins. The group must consume its entire candidate span — a trailing token
460/// after a single-space gap (e.g. `-f FILE extra`) is ordinary prose, not an option list.
461fn option_marker(line: &str) -> Option<(String, usize)> {
462    let chars: Vec<char> = line.chars().collect();
463    let gap = chars.windows(2).position(|pair| pair == [' ', ' ']);
464    let candidate_end = gap.unwrap_or(chars.len());
465    let candidate: String = chars.get(..candidate_end)?.iter().collect();
466    let candidate = candidate.trim_end();
467    if !valid_option_group(candidate) {
468        return None;
469    }
470    let value_col = match gap {
471        Some(g) => {
472            let mut v = g;
473            while chars.get(v) == Some(&' ') {
474                v += 1;
475            }
476            v
477        }
478        None => candidate.chars().count(),
479    };
480    Some((candidate.to_string(), value_col))
481}
482
483/// Whether `text` is a complete, comma-separated group of option specifiers with nothing left over.
484fn valid_option_group(text: &str) -> bool {
485    let chars: Vec<char> = text.chars().collect();
486    if chars.is_empty() {
487        return false;
488    }
489    let mut i = 0;
490    loop {
491        let Some(next) = parse_one_option(&chars, i) else {
492            return false;
493        };
494        i = next;
495        if i == chars.len() {
496            return true;
497        }
498        // Options are joined by a comma and a single space.
499        if chars.get(i) == Some(&',') && chars.get(i + 1) == Some(&' ') {
500            i += 2;
501        } else {
502            return false;
503        }
504    }
505}
506
507/// Parse a single option specifier starting at `i`, returning the index just past it (and any
508/// argument). Recognizes long options (`--word`, `--word=ARG`, `--word ARG`), short options
509/// (`-a`, `-aARG`, `-a ARG`), and DOS-style options (`/S`, `/S ARG`). Returns `None` if no valid
510/// specifier begins at `i`.
511fn parse_one_option(chars: &[char], i: usize) -> Option<usize> {
512    match chars.get(i) {
513        Some('-') if chars.get(i + 1) == Some(&'-') => {
514            // Long option: a name of letters, digits, and hyphens.
515            let mut j = i + 2;
516            let name_start = j;
517            while chars
518                .get(j)
519                .is_some_and(|c| c.is_ascii_alphanumeric() || *c == '-')
520            {
521                j += 1;
522            }
523            if j == name_start {
524                return None;
525            }
526            parse_optional_arg(chars, j)
527        }
528        Some('-') => {
529            // Short option: a hyphen and exactly one alphanumeric character.
530            let ch = chars.get(i + 1)?;
531            if !ch.is_ascii_alphanumeric() {
532                return None;
533            }
534            parse_optional_arg(chars, i + 2)
535        }
536        Some('/') => {
537            // DOS/VMS-style option: a slash and exactly one alphanumeric character.
538            let ch = chars.get(i + 1)?;
539            if !ch.is_ascii_alphanumeric() {
540                return None;
541            }
542            parse_optional_arg(chars, i + 2)
543        }
544        _ => None,
545    }
546}
547
548/// Parse an optional argument that follows an option specifier at `i`: an `=`-delimited argument,
549/// a single-space-delimited argument, or an argument attached directly to a short option. Returns
550/// the index just past the option (and argument, if present).
551fn parse_optional_arg(chars: &[char], i: usize) -> Option<usize> {
552    let delim = chars.get(i);
553    if delim == Some(&'=') || delim == Some(&' ') {
554        let arg_start = i + 1;
555        let mut j = arg_start;
556        while chars
557            .get(j)
558            .is_some_and(|c| !c.is_whitespace() && *c != ',')
559        {
560            j += 1;
561        }
562        if j == arg_start {
563            return None;
564        }
565        return Some(j);
566    }
567    // An argument attached directly to a short option (e.g. `-fARG`).
568    let mut j = i;
569    while chars
570        .get(j)
571        .is_some_and(|c| !c.is_whitespace() && *c != ',')
572    {
573        j += 1;
574    }
575    Some(j)
576}
577
578// --- explicit markup blocks --------------------------------------------------------------------
579
580/// The extent of an explicit-markup block (a `..` or `__` construct, a directive, or a comment):
581/// the index one past its last content line. The block runs over its first line plus all following
582/// blank or further-indented lines, up to but not including the next line indented no more than the
583/// marker.
584fn explicit_extent(lines: &[String], start: usize, marker_indent: usize) -> usize {
585    let mut last_content = start;
586    let mut i = start + 1;
587    while let Some(line) = lines.get(i) {
588        if is_blank(line) {
589            i += 1;
590        } else if indent_of(line) > marker_indent {
591            last_content = i;
592            i += 1;
593        } else {
594            break;
595        }
596    }
597    last_content + 1
598}
599
600/// The body region of an explicit-markup block: the first line's text after `prefix_len` columns,
601/// followed by the continuation lines dedented by their shared minimum indentation. A leading empty
602/// first-line remainder is dropped.
603fn explicit_body(lines: &[String], start: usize, end: usize, prefix_len: usize) -> Vec<String> {
604    let mut body = Vec::new();
605    let first = line_at(lines, start);
606    let remainder: String = first.chars().skip(prefix_len).collect();
607    if !remainder.trim().is_empty() {
608        body.push(remainder.trim_start().to_string());
609    }
610    let continuation: Vec<&String> = (start + 1..end).filter_map(|i| lines.get(i)).collect();
611    let min_indent = continuation
612        .iter()
613        .filter(|l| !is_blank(l))
614        .map(|l| indent_of(l))
615        .min()
616        .unwrap_or(0);
617    for line in continuation {
618        if is_blank(line) {
619            body.push(String::new());
620        } else {
621            body.push(dedent(line, min_indent));
622        }
623    }
624    while body.last().is_some_and(std::string::String::is_empty) {
625        body.pop();
626    }
627    body
628}
629
630/// A classified explicit-markup construct, by the first non-`..` token on its line.
631enum Explicit {
632    Target,
633    AnonymousTarget,
634    Footnote(String),
635    Citation(String),
636    Substitution,
637    Directive(String),
638    Comment,
639}
640
641fn classify_explicit(line: &str) -> Option<Explicit> {
642    let trimmed = line.trim_start();
643    if trimmed == "__" || trimmed.starts_with("__ ") {
644        return Some(Explicit::AnonymousTarget);
645    }
646    if trimmed != ".." && !trimmed.starts_with(".. ") {
647        return None;
648    }
649    let rest = trimmed.strip_prefix("..").unwrap_or("").trim_start();
650    if rest.is_empty() {
651        return Some(Explicit::Comment);
652    }
653    if rest.starts_with("__") {
654        return Some(Explicit::AnonymousTarget);
655    }
656    if rest.starts_with('_') {
657        return Some(Explicit::Target);
658    }
659    if let Some(after) = rest.strip_prefix('[') {
660        if let Some(close) = after.find(']') {
661            let label = after.get(..close).unwrap_or("");
662            if !label.is_empty() {
663                return Some(if is_citation_label(label) {
664                    Explicit::Citation(label.to_string())
665                } else {
666                    Explicit::Footnote(label.to_string())
667                });
668            }
669        }
670        return Some(Explicit::Comment);
671    }
672    if rest.starts_with('|') {
673        return Some(Explicit::Substitution);
674    }
675    if let Some(name) = directive_name(rest) {
676        return Some(Explicit::Directive(name));
677    }
678    Some(Explicit::Comment)
679}
680
681/// A footnote label is a number, `#`, `#name`, or `*`; any other bracket label is a citation.
682fn is_citation_label(label: &str) -> bool {
683    !(label.chars().all(|c| c.is_ascii_digit())
684        || label == "*"
685        || label == "#"
686        || label.starts_with('#'))
687}
688
689/// The lowercased name of a directive (`name::`), or `None` when the text is not a directive.
690fn directive_name(rest: &str) -> Option<String> {
691    let end = rest.find("::")?;
692    let name = rest.get(..end)?;
693    if name.is_empty()
694        || !name
695            .chars()
696            .all(|c| c.is_ascii_alphanumeric() || matches!(c, '-' | '_' | '+' | '.' | ':'))
697    {
698        return None;
699    }
700    Some(name.to_lowercase())
701}
702
703// --- definitions (pass one) --------------------------------------------------------------------
704
705#[derive(Default)]
706struct Definitions {
707    /// Anonymous-target destinations, in document order.
708    anonymous: Vec<String>,
709    /// Normalized substitution name to its definition.
710    substitutions: BTreeMap<String, Substitution>,
711    /// Labeled footnote bodies, keyed by the label as written (`1`, `#name`).
712    footnotes: BTreeMap<String, Vec<String>>,
713    /// Auto-numbered (`#`) footnote bodies, in document order.
714    auto_footnotes: Vec<Vec<String>>,
715    /// Symbol (`*`) footnote bodies, in document order.
716    symbol_footnotes: Vec<Vec<String>>,
717    /// Citations: original label and body, in document order.
718    citations: Vec<(String, Vec<String>)>,
719}
720
721#[derive(Clone)]
722enum Substitution {
723    Replace(String),
724    Image(String, Attr, Vec<Inline>),
725}
726
727/// A custom interpreted-text role declared by a `role` directive: an optional base role whose
728/// formatting it inherits, the classes it adds, and the format or language its base needs (a `raw`
729/// base takes a `:format:`, a `code` base a `:language:`).
730#[derive(Clone, Default)]
731struct RoleDef {
732    base: Option<String>,
733    classes: Vec<String>,
734    format: Option<String>,
735    language: Option<String>,
736}
737
738/// The result of following a custom-role chain to the builtin role that renders it: the builtin
739/// role name (empty for a plain baseless role), the classes accumulated along the chain, and the
740/// format and language the chain declares.
741#[derive(Default)]
742struct RoleChain {
743    base: String,
744    classes: Vec<String>,
745    format: Option<String>,
746    language: Option<String>,
747}
748
749/// Read and parse an included file, returning its blocks for splicing into the document. Returns
750/// `None` when the file cannot be read.
751fn included_blocks(path: &str, ext: Extensions, depth: usize) -> Option<Vec<Block>> {
752    let content = std::fs::read_to_string(path).ok()?;
753    let lines = preprocess(&content);
754    let defs = collect_definitions(&lines);
755    let mut parser = Parser {
756        defs: &defs,
757        ext,
758        heading_styles: Vec::new(),
759        ids: IdRegistry::default(),
760        auto_footnote: 0,
761        symbol_footnote: 0,
762        anonymous: 0,
763        custom_roles: BTreeMap::new(),
764        default_role: DEFAULT_ROLE.to_string(),
765        include_depth: depth,
766        active_substitutions: Vec::new(),
767        deferred: BTreeMap::new(),
768    };
769    let mut blocks = parser.blocks(&lines);
770    if let Some(div) = parser.citation_block() {
771        blocks.push(div);
772    }
773    parser.resolve_deferred(&mut blocks);
774    Some(blocks)
775}
776
777fn collect_definitions(lines: &[String]) -> Definitions {
778    let mut defs = Definitions::default();
779    let mut i = 0;
780    while i < lines.len() {
781        let line = line_at(lines, i);
782        if is_blank(line) {
783            i += 1;
784            continue;
785        }
786        let indent = indent_of(line);
787        let trimmed = line.trim_start();
788        if let Some(kind) = classify_explicit(trimmed) {
789            let end = explicit_extent(lines, i, indent);
790            record_definition(&mut defs, lines, i, end, indent, kind);
791            i = end;
792        } else {
793            i += 1;
794        }
795    }
796    defs
797}
798
799fn record_definition(
800    defs: &mut Definitions,
801    lines: &[String],
802    start: usize,
803    end: usize,
804    indent: usize,
805    kind: Explicit,
806) {
807    let first = line_at(lines, start).trim_start();
808    match kind {
809        Explicit::AnonymousTarget => {
810            let url = parse_anonymous(first, lines, start, end, indent);
811            defs.anonymous.push(url);
812        }
813        Explicit::Footnote(label) => {
814            let body = footnote_body(lines, start, end, indent);
815            if label == "#" {
816                defs.auto_footnotes.push(body);
817            } else if label == "*" {
818                defs.symbol_footnotes.push(body);
819            } else {
820                defs.footnotes.insert(label, body);
821            }
822        }
823        Explicit::Citation(label) => {
824            let body = footnote_body(lines, start, end, indent);
825            defs.citations.push((label, body));
826        }
827        Explicit::Substitution => {
828            if let Some((name, subst)) = parse_substitution(first, lines, start, end, indent) {
829                defs.substitutions.insert(normalize_name(&name), subst);
830            }
831        }
832        // Named hyperlink targets are registered in document order during tree construction so the
833        // last definition of a name wins; directives and comments define no reference. None of these
834        // contribute to the first pass.
835        Explicit::Target | Explicit::Directive(_) | Explicit::Comment => {}
836    }
837}
838
839/// Parse a hyperlink target `_name: url` (the URL may continue across lines, joined without spaces).
840fn parse_target(
841    first: &str,
842    lines: &[String],
843    start: usize,
844    end: usize,
845    indent: usize,
846) -> Option<(String, String)> {
847    let rest = first.strip_prefix("..").unwrap_or(first).trim_start();
848    let rest = rest.strip_prefix('_')?;
849    let (name, after) = split_target_name(rest)?;
850    let mut url = after.trim().to_string();
851    for i in start + 1..end {
852        let line = line_at(lines, i);
853        if !is_blank(line) && indent_of(line) > indent {
854            url.push_str(line.trim());
855        }
856    }
857    Some((name, url))
858}
859
860/// Split a target's name from its destination at the terminating colon, honoring a backtick-quoted
861/// phrase name.
862fn split_target_name(rest: &str) -> Option<(String, String)> {
863    if let Some(after) = rest.strip_prefix('`') {
864        let close = after.find('`')?;
865        let name = &after[..close];
866        let tail = after.get(close + 1..)?.trim_start();
867        let tail = tail.strip_prefix(':')?;
868        return Some((name.to_string(), tail.to_string()));
869    }
870    // The name runs up to the first colon that is unescaped and followed by a space or the end of
871    // the line; a backslash-escaped colon is part of the name.
872    let (colon, after_colon) = unescaped_terminator(rest)?;
873    let name = rest.get(..colon)?.replace("\\:", ":");
874    let after = rest.get(after_colon..).unwrap_or("");
875    Some((name, after.to_string()))
876}
877
878/// Find the colon that terminates a target name: the first `:` that is not backslash-escaped and is
879/// followed by a space or the end of the line. Returns the colon's byte offset and the offset just
880/// past it.
881fn unescaped_terminator(rest: &str) -> Option<(usize, usize)> {
882    let mut escaped = false;
883    for (offset, ch) in rest.char_indices() {
884        if escaped {
885            escaped = false;
886            continue;
887        }
888        match ch {
889            '\\' => escaped = true,
890            ':' => {
891                let after = offset + ch.len_utf8();
892                if rest
893                    .get(after..)
894                    .and_then(|t| t.chars().next())
895                    .is_none_or(|c| c == ' ')
896                {
897                    return Some((offset, after));
898                }
899            }
900            _ => {}
901        }
902    }
903    None
904}
905
906fn parse_anonymous(
907    first: &str,
908    lines: &[String],
909    start: usize,
910    end: usize,
911    indent: usize,
912) -> String {
913    let rest = first.strip_prefix("..").map_or(first, str::trim_start);
914    let rest = rest.trim_start_matches('_');
915    let rest = rest.trim_start_matches(':');
916    let mut url = rest.trim().to_string();
917    for i in start + 1..end {
918        let line = line_at(lines, i);
919        if !is_blank(line) && indent_of(line) > indent {
920            url.push_str(line.trim());
921        }
922    }
923    if indirect_referent(&url).is_some() {
924        url
925    } else {
926        escape_uri(&url)
927    }
928}
929
930/// The body region of a footnote or citation: the text after the `.. [label]` marker, plus the
931/// dedented continuation, which the second pass parses as block content.
932fn footnote_body(lines: &[String], start: usize, end: usize, indent: usize) -> Vec<String> {
933    let first = line_at(lines, start);
934    let trimmed = first.trim_start();
935    let prefix_len = indent + trimmed.find(']').map_or_else(|| trimmed.len(), |p| p + 1);
936    explicit_body(lines, start, end, prefix_len)
937}
938
939fn parse_substitution(
940    first: &str,
941    lines: &[String],
942    start: usize,
943    end: usize,
944    indent: usize,
945) -> Option<(String, Substitution)> {
946    let trimmed = first.strip_prefix("..").unwrap_or(first).trim_start();
947    let rest = trimmed.strip_prefix('|')?;
948    let close = rest.find('|')?;
949    let name = rest.get(..close)?.to_string();
950    let after = rest.get(close + 1..)?.trim_start();
951    let coloncolon = after.find("::")?;
952    let directive = after.get(..coloncolon)?.trim().to_lowercase();
953    let arg_remainder = after.get(coloncolon + 2..).unwrap_or("").trim_start();
954    let prefix_len = indent + (first.chars().count() - arg_remainder.chars().count());
955    let body = explicit_body(lines, start, end, prefix_len);
956    let (argument, options, _content) = split_directive(&body);
957    match directive.as_str() {
958        "replace" => Some((name, Substitution::Replace(argument))),
959        "image" => {
960            let (mut attr, mut alt, url) = image_parts(&argument, &options);
961            attr.classes = image_classes(&options)
962                .into_iter()
963                .map(Into::into)
964                .collect();
965            // A substitution image with no explicit alt text falls back to the substitution name.
966            if alt.is_empty() {
967                push_text(&mut alt, &name);
968            }
969            Some((name, Substitution::Image(url, attr, alt)))
970        }
971        "unicode" => Some((name, Substitution::Replace(unicode_chars(&argument)))),
972        "date" => Some((name, Substitution::Replace(format_date(argument.trim())))),
973        _ => Some((name, Substitution::Replace(String::new()))),
974    }
975}
976
977/// Decode the tokens of a `unicode::` substitution argument. A token written as a hexadecimal code
978/// point (`0x`, `x`, `u`, `\x`, `\u`, `U+`, or an `&#x…;` character reference) becomes its
979/// character; any other token, including a bare decimal number, stays as written. Tokens are joined
980/// with a single space, and a standalone `..` ends the text.
981fn unicode_chars(argument: &str) -> String {
982    let mut tokens = Vec::new();
983    for token in argument.split_whitespace() {
984        if token == ".." {
985            break;
986        }
987        tokens.push(decode_unicode_token(token));
988    }
989    tokens.join(" ")
990}
991
992fn decode_unicode_token(token: &str) -> String {
993    if let Some(rest) = token.strip_prefix("&#x")
994        && let Some(hex) = rest.strip_suffix(';')
995        && let Some(ch) = code_point(hex)
996    {
997        return ch.to_string();
998    }
999    let hex = token
1000        .strip_prefix("U+")
1001        .or_else(|| token.strip_prefix("0x"))
1002        .or_else(|| token.strip_prefix("\\u"))
1003        .or_else(|| token.strip_prefix("\\x"))
1004        .or_else(|| token.strip_prefix('x'))
1005        .or_else(|| token.strip_prefix('u'));
1006    if let Some(hex) = hex
1007        && let Some(ch) = code_point(hex)
1008    {
1009        return ch.to_string();
1010    }
1011    token.to_string()
1012}
1013
1014/// Parse a non-empty run of hexadecimal digits into its character, or `None` for empty or
1015/// out-of-range input.
1016fn code_point(hex: &str) -> Option<char> {
1017    if hex.is_empty() || !hex.bytes().all(|b| b.is_ascii_hexdigit()) {
1018        return None;
1019    }
1020    u32::from_str_radix(hex, 16).ok().and_then(char::from_u32)
1021}
1022
1023/// Render the current date with a strftime-style format string, defaulting to `%Y-%m-%d`. The date
1024/// is taken in UTC.
1025fn format_date(format: &str) -> String {
1026    let format = if format.is_empty() {
1027        "%Y-%m-%d"
1028    } else {
1029        format
1030    };
1031    let secs = std::time::SystemTime::now()
1032        .duration_since(std::time::UNIX_EPOCH)
1033        .ok()
1034        .and_then(|d| i64::try_from(d.as_secs()).ok())
1035        .unwrap_or(0);
1036    render_date(secs, format)
1037}
1038
1039const MONTH_NAMES: [&str; 12] = [
1040    "January",
1041    "February",
1042    "March",
1043    "April",
1044    "May",
1045    "June",
1046    "July",
1047    "August",
1048    "September",
1049    "October",
1050    "November",
1051    "December",
1052];
1053const WEEKDAY_NAMES: [&str; 7] = [
1054    "Sunday",
1055    "Monday",
1056    "Tuesday",
1057    "Wednesday",
1058    "Thursday",
1059    "Friday",
1060    "Saturday",
1061];
1062
1063/// Expand a strftime-style format against the civil date and time of day at `secs` seconds past the
1064/// epoch (UTC). Unrecognized `%`-codes are emitted verbatim; `%%` yields a single percent.
1065fn render_date(secs: i64, format: &str) -> String {
1066    let parts = DateParts::from_secs(secs);
1067    let mut out = String::new();
1068    let mut chars = format.chars();
1069    while let Some(c) = chars.next() {
1070        if c != '%' {
1071            out.push(c);
1072            continue;
1073        }
1074        match chars.next() {
1075            Some(spec) => {
1076                if let Some(value) = parts.field(spec) {
1077                    out.push_str(&value);
1078                } else {
1079                    out.push('%');
1080                    if spec != '%' {
1081                        out.push(spec);
1082                    }
1083                }
1084            }
1085            None => out.push('%'),
1086        }
1087    }
1088    out
1089}
1090
1091fn pad2(n: i64) -> String {
1092    format!("{n:02}")
1093}
1094
1095fn pad3(n: i64) -> String {
1096    format!("{n:03}")
1097}
1098
1099fn space2(n: i64) -> String {
1100    format!("{n:2}")
1101}
1102
1103/// `53` for ISO long years (those whose 1 January is a Thursday, or whose previous year's 1 January
1104/// is a Wednesday), `52` otherwise.
1105fn iso_weeks_in_year(year: i64) -> i64 {
1106    let dominical =
1107        |y: i64| (y + y.div_euclid(4) - y.div_euclid(100) + y.div_euclid(400)).rem_euclid(7);
1108    if dominical(year) == 4 || dominical(year - 1) == 3 {
1109        53
1110    } else {
1111        52
1112    }
1113}
1114
1115/// The decomposed civil date and time of day for a moment, in UTC.
1116struct DateParts {
1117    year: i64,
1118    /// 1-12.
1119    month: i64,
1120    /// 1-31.
1121    day: i64,
1122    hour: i64,
1123    minute: i64,
1124    second: i64,
1125    /// 0 = Sunday … 6 = Saturday.
1126    weekday: i64,
1127    /// Day of the year, 1-366.
1128    yday: i64,
1129}
1130
1131impl DateParts {
1132    fn from_secs(secs: i64) -> Self {
1133        let days = secs.div_euclid(86_400);
1134        let day_secs = secs.rem_euclid(86_400);
1135        let (year, month, day) = civil_from_days(days);
1136        Self {
1137            year,
1138            month,
1139            day,
1140            hour: day_secs / 3600,
1141            minute: day_secs / 60 % 60,
1142            second: day_secs % 60,
1143            // 1970-01-01 was a Thursday (index 4).
1144            weekday: (days.rem_euclid(7) + 4).rem_euclid(7),
1145            yday: days - days_from_civil(year, 1, 1) + 1,
1146        }
1147    }
1148
1149    /// ISO 8601 weekday: 1 = Monday … 7 = Sunday.
1150    fn iso_weekday(&self) -> i64 {
1151        if self.weekday == 0 { 7 } else { self.weekday }
1152    }
1153
1154    /// Hour on a 12-hour clock, 1-12.
1155    fn hour12(&self) -> i64 {
1156        let h = self.hour % 12;
1157        if h == 0 { 12 } else { h }
1158    }
1159
1160    fn meridiem(&self, upper: bool) -> &'static str {
1161        match (self.hour < 12, upper) {
1162            (true, true) => "AM",
1163            (true, false) => "am",
1164            (false, true) => "PM",
1165            (false, false) => "pm",
1166        }
1167    }
1168
1169    /// Week of the year counting from the first Sunday (`%U`), 00-53.
1170    fn week_from_sunday(&self) -> i64 {
1171        (self.yday - 1 + 7 - self.weekday) / 7
1172    }
1173
1174    /// Week of the year counting from the first Monday (`%W`), 00-53.
1175    fn week_from_monday(&self) -> i64 {
1176        (self.yday - 1 + 7 - (self.weekday + 6) % 7) / 7
1177    }
1178
1179    /// ISO 8601 (week-numbering-year, week-of-year), the latter 01-53.
1180    fn iso_week(&self) -> (i64, i64) {
1181        let week = (self.yday + 10 - self.iso_weekday()) / 7;
1182        if week < 1 {
1183            (self.year - 1, iso_weeks_in_year(self.year - 1))
1184        } else if week > iso_weeks_in_year(self.year) {
1185            (self.year + 1, 1)
1186        } else {
1187            (self.year, week)
1188        }
1189    }
1190
1191    /// The rendering of one strftime field, or `None` for an unrecognized code.
1192    fn field(&self, spec: char) -> Option<String> {
1193        let month_name = MONTH_NAMES
1194            .get(usize::try_from(self.month - 1).unwrap_or(0))
1195            .copied()
1196            .unwrap_or("");
1197        let weekday_name = WEEKDAY_NAMES
1198            .get(usize::try_from(self.weekday).unwrap_or(0))
1199            .copied()
1200            .unwrap_or("");
1201        Some(match spec {
1202            'Y' => self.year.to_string(),
1203            'y' => pad2(self.year.rem_euclid(100)),
1204            'C' => pad2(self.year.div_euclid(100)),
1205            'm' => pad2(self.month),
1206            'd' => pad2(self.day),
1207            'e' => space2(self.day),
1208            'H' => pad2(self.hour),
1209            'k' => space2(self.hour),
1210            'I' => pad2(self.hour12()),
1211            'l' => space2(self.hour12()),
1212            'M' => pad2(self.minute),
1213            'S' => pad2(self.second),
1214            'j' => pad3(self.yday),
1215            'p' => self.meridiem(true).to_string(),
1216            'P' => self.meridiem(false).to_string(),
1217            'u' => self.iso_weekday().to_string(),
1218            'w' => self.weekday.to_string(),
1219            'U' => pad2(self.week_from_sunday()),
1220            'W' => pad2(self.week_from_monday()),
1221            'V' => pad2(self.iso_week().1),
1222            'G' => self.iso_week().0.to_string(),
1223            'g' => pad2(self.iso_week().0.rem_euclid(100)),
1224            'B' => month_name.to_string(),
1225            'b' | 'h' => month_name.get(..3).unwrap_or(month_name).to_string(),
1226            'A' => weekday_name.to_string(),
1227            'a' => weekday_name.get(..3).unwrap_or(weekday_name).to_string(),
1228            'D' => format!(
1229                "{:02}/{:02}/{:02}",
1230                self.month,
1231                self.day,
1232                self.year.rem_euclid(100)
1233            ),
1234            'F' => format!("{}-{:02}-{:02}", self.year, self.month, self.day),
1235            'R' => format!("{:02}:{:02}", self.hour, self.minute),
1236            'T' => format!("{:02}:{:02}:{:02}", self.hour, self.minute, self.second),
1237            'r' => format!(
1238                "{:02}:{:02}:{:02} {}",
1239                self.hour12(),
1240                self.minute,
1241                self.second,
1242                self.meridiem(true)
1243            ),
1244            'n' => "\n".to_string(),
1245            't' => "\t".to_string(),
1246            _ => return None,
1247        })
1248    }
1249}
1250
1251/// The civil (year, month, day) of a day count measured from the epoch, by the standard
1252/// days-to-civil conversion. `month` is 1-12 and `day` is 1-31.
1253fn civil_from_days(days: i64) -> (i64, i64, i64) {
1254    let z = days + 719_468;
1255    let era = z.div_euclid(146_097);
1256    let doe = z - era * 146_097;
1257    let yoe = (doe - doe / 1460 + doe / 36_524 - doe / 146_096) / 365;
1258    let year = yoe + era * 400;
1259    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
1260    let mp = (5 * doy + 2) / 153;
1261    let day = doy - (153 * mp + 2) / 5 + 1;
1262    let month = if mp < 10 { mp + 3 } else { mp - 9 };
1263    (if month <= 2 { year + 1 } else { year }, month, day)
1264}
1265
1266/// The day count from the epoch of a civil date, the inverse of `civil_from_days`.
1267fn days_from_civil(year: i64, month: i64, day: i64) -> i64 {
1268    let y = if month <= 2 { year - 1 } else { year };
1269    let era = y.div_euclid(400);
1270    let yoe = y - era * 400;
1271    let mp = if month > 2 { month - 3 } else { month + 9 };
1272    let doy = (153 * mp + 2) / 5 + day - 1;
1273    let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy;
1274    era * 146_097 + doe - 719_468
1275}
1276
1277// --- block parsing (pass two) ------------------------------------------------------------------
1278
1279/// The role applied to interpreted text written without an explicit role, until a `default-role`
1280/// directive selects another.
1281const DEFAULT_ROLE: &str = "title-reference";
1282
1283struct Parser<'a> {
1284    defs: &'a Definitions,
1285    ext: Extensions,
1286    heading_styles: Vec<(char, bool)>,
1287    ids: IdRegistry,
1288    auto_footnote: usize,
1289    symbol_footnote: usize,
1290    anonymous: usize,
1291    /// Roles declared by `role` directives, keyed by role name.
1292    custom_roles: BTreeMap<String, RoleDef>,
1293    /// The role applied to interpreted text with no explicit role.
1294    default_role: String,
1295    /// How many nested `include` directives deep this parser is, bounding include recursion.
1296    include_depth: usize,
1297    /// The chain of substitution names currently being expanded, by normalized name. A
1298    /// substitution replacement is itself parsed as inline markup, so a definition that refers
1299    /// to itself — directly or through a cycle of other definitions — would recurse without
1300    /// bound and overflow the stack. RST forbids circular substitution references; a name already
1301    /// on this stack is left unexpanded instead of re-entered.
1302    active_substitutions: Vec<String>,
1303    /// Every hyperlink-target name discovered while building the tree — explicit targets, internal
1304    /// targets, section titles, and the labels of phrase references with an embedded destination —
1305    /// mapped to its destination. Filled in document order so a later definition supersedes an
1306    /// earlier one, and consulted by the final pass that resolves the references left deferred
1307    /// during tree construction.
1308    deferred: BTreeMap<String, String>,
1309}
1310
1311/// The deepest chain of nested `include` directives that is followed before further includes are
1312/// ignored, guarding against a cycle of files including one another.
1313const MAX_INCLUDE_DEPTH: usize = 64;
1314
1315impl Parser<'_> {
1316    fn blocks(&mut self, lines: &[String]) -> Vec<Block> {
1317        let mut out = Vec::new();
1318        let mut pending_classes: Option<Vec<String>> = None;
1319        let mut pending_targets: Vec<String> = Vec::new();
1320        let mut i = 0;
1321        while i < lines.len() {
1322            let line = line_at(lines, i);
1323            if is_blank(line) {
1324                i += 1;
1325                continue;
1326            }
1327            // A hyperlink target registers its destination in document order. A target with no
1328            // destination is internal: a reference to it points at the identifier the target carries
1329            // onto the block that follows it. A destination naming another target (a trailing
1330            // underscore) is indirect and kept verbatim so the chain can be followed; any other
1331            // destination is a URL and is percent-encoded.
1332            if matches!(classify_explicit(line), Some(Explicit::Target)) {
1333                let indent = indent_of(line);
1334                let end = explicit_extent(lines, i, indent);
1335                if let Some((name, url)) = parse_target(line.trim_start(), lines, i, end, indent) {
1336                    if url.trim().is_empty() {
1337                        self.deferred
1338                            .insert(normalize_name(&name), format!("#{}", name.trim()));
1339                        pending_targets.push(name.trim().to_string());
1340                    } else {
1341                        let destination = if indirect_referent(&url).is_some() {
1342                            url
1343                        } else {
1344                            escape_uri(&url)
1345                        };
1346                        self.deferred.insert(normalize_name(&name), destination);
1347                    }
1348                    i = end;
1349                    continue;
1350                }
1351            }
1352            let before = out.len();
1353            let scanned_from = i;
1354            i = self.block_at(lines, i, &mut out);
1355            // Every block parser consumes at least the line it opens on; force progress so a
1356            // construct that yields nothing can never stall the scan on a single line.
1357            i = i.max(scanned_from + 1);
1358            // A preceding empty `class` directive wraps the block just produced.
1359            if let Some(classes) = pending_classes.take()
1360                && out.len() > before
1361            {
1362                let wrapped = out.split_off(before);
1363                out.push(class_div(classes, wrapped));
1364            }
1365            // Internal targets seen since the last block attach their identifiers to it.
1366            if !pending_targets.is_empty() && out.len() > before {
1367                let produced = out.split_off(before);
1368                out.extend(attach_targets(
1369                    produced,
1370                    std::mem::take(&mut pending_targets),
1371                ));
1372            }
1373            // An empty `class` directive leaves a marker whose classes wrap the next block.
1374            if let Some(Block::Div(attr, content)) = out.last()
1375                && content.is_empty()
1376                && attr.classes.first().map(Text::as_str) == Some(PENDING_CLASS)
1377            {
1378                pending_classes = Some(
1379                    attr.classes
1380                        .get(1..)
1381                        .unwrap_or(&[])
1382                        .iter()
1383                        .map(ToString::to_string)
1384                        .collect(),
1385                );
1386                out.pop();
1387            }
1388        }
1389        out
1390    }
1391
1392    /// Parse the block beginning at line `i`, appending it to `out`, and return the next line index.
1393    fn block_at(&mut self, lines: &[String], i: usize, out: &mut Vec<Block>) -> usize {
1394        let line = line_at(lines, i);
1395        let indent = indent_of(line);
1396
1397        if indent > 0 {
1398            return self.block_quote(lines, i, out);
1399        }
1400
1401        if let Some(c) = adornment_char(line) {
1402            // Overline section header: the overline and underline must be the same character and
1403            // length, and at least as long as the title between them. A shorter or mismatched run is
1404            // not a header and falls through (a single-column simple table opens the same way).
1405            let title = line_at(lines, i + 1);
1406            let under = line_at(lines, i + 2);
1407            let overline_len = line.trim().chars().count();
1408            if !is_blank(title)
1409                && adornment_char(title).is_none()
1410                && adornment_char(under) == Some(c)
1411                && overline_len == under.trim().chars().count()
1412                && overline_len >= title.trim().chars().count()
1413            {
1414                out.push(self.header(title.trim(), c, true));
1415                return i + 3;
1416            }
1417            if line.trim().chars().count() >= 4
1418                && (i + 1 >= lines.len() || is_blank(line_at(lines, i + 1)))
1419            {
1420                out.push(Block::HorizontalRule);
1421                return i + 1;
1422            }
1423        }
1424
1425        // Underline section header.
1426        let next = line_at(lines, i + 1);
1427        if let Some(c) = adornment_char(next)
1428            && next.trim().chars().count() >= line.trim().chars().count()
1429        {
1430            out.push(self.header(line.trim(), c, false));
1431            return i + 2;
1432        }
1433
1434        if line.starts_with('+')
1435            && let Some(next_i) = self.grid_table(lines, i, out)
1436        {
1437            return next_i;
1438        }
1439
1440        if is_simple_table_ruler(line)
1441            && let Some(next_i) = self.simple_table(lines, i, out)
1442        {
1443            return next_i;
1444        }
1445
1446        if bullet_content_col(line).is_some() {
1447            return self.bullet_list(lines, i, out);
1448        }
1449
1450        if let Some((_, style, delim, col)) = enumerator(line)
1451            && item_well_formed(lines, i, col, style, delim)
1452        {
1453            return self.ordered_list(lines, i, out);
1454        }
1455
1456        if field_marker(line).is_some() {
1457            return self.field_list(lines, i, out);
1458        }
1459
1460        if classify_explicit(line).is_some() {
1461            return self.explicit(lines, i, out);
1462        }
1463
1464        // A line block opens with `|` followed by a space or the end of the line, examined after any
1465        // leading indentation is dropped. The character after the pipe — not the line's second
1466        // character — decides this, so an indented or non-space-led pipe is not mistaken for one.
1467        if let Some(after_pipe) = line.trim_start().strip_prefix('|')
1468            && matches!(after_pipe.chars().next(), Some(' ') | None)
1469        {
1470            return self.line_block(lines, i, out);
1471        }
1472
1473        if option_marker(line).is_some() {
1474            return self.option_list(lines, i, out);
1475        }
1476
1477        // Definition list: a single-line term immediately followed by a more-indented definition.
1478        if !is_blank(next) && indent_of(next) > 0 {
1479            return self.definition_list(lines, i, out);
1480        }
1481
1482        self.paragraph(lines, i, out)
1483    }
1484
1485    fn header(&mut self, title: &str, adornment: char, overline: bool) -> Block {
1486        let level = self.heading_level(adornment, overline);
1487        let inlines = self.inlines(title);
1488        let plain = carta_ast::to_plain_text(&inlines);
1489        let id = match IdScheme::select(self.ext, false) {
1490            Some(scheme) => {
1491                let text = if self.ext.contains(Extension::AsciiIdentifiers) {
1492                    asciify(&plain)
1493                } else {
1494                    plain.clone()
1495                };
1496                // A title whose characters all drop out under the count-suffix scheme takes the
1497                // fallback identifier `section`, disambiguated like any other repeat.
1498                if matches!(scheme, IdScheme::Gfm) && carta_ast::slug_gfm(&text).is_empty() {
1499                    self.ids.assign(scheme, "section")
1500                } else {
1501                    self.ids.assign(scheme, &text)
1502                }
1503            }
1504            None => String::new(),
1505        };
1506        // Every section title is an implicit hyperlink target, referenceable by its text and
1507        // resolving to the section's identifier. A later section with the same title supersedes an
1508        // earlier one.
1509        if !plain.trim().is_empty() {
1510            self.deferred
1511                .insert(normalize_name(&plain), format!("#{id}"));
1512        }
1513        Block::Header(
1514            level,
1515            Box::new(Attr {
1516                id: id.into(),
1517                classes: Vec::new(),
1518                attributes: Vec::new(),
1519            }),
1520            inlines,
1521        )
1522    }
1523
1524    fn heading_level(&mut self, adornment: char, overline: bool) -> i32 {
1525        let key = (adornment, overline);
1526        let level = if let Some(pos) = self.heading_styles.iter().position(|s| *s == key) {
1527            pos + 1
1528        } else {
1529            self.heading_styles.push(key);
1530            self.heading_styles.len()
1531        };
1532        i32::try_from(level).unwrap_or(i32::MAX)
1533    }
1534
1535    fn block_quote(&mut self, lines: &[String], start: usize, out: &mut Vec<Block>) -> usize {
1536        let base = indent_of(line_at(lines, start));
1537        let mut end = start;
1538        let mut i = start;
1539        while let Some(line) = lines.get(i) {
1540            if is_blank(line) {
1541                i += 1;
1542            } else if indent_of(line) >= base {
1543                end = i;
1544                i += 1;
1545            } else {
1546                break;
1547            }
1548        }
1549        let region: Vec<String> = (start..=end)
1550            .filter_map(|j| lines.get(j))
1551            .map(|l| {
1552                if is_blank(l) {
1553                    String::new()
1554                } else {
1555                    dedent(l, base)
1556                }
1557            })
1558            .collect();
1559        let inner = self.blocks(&region);
1560        out.push(Block::BlockQuote(inner));
1561        end + 1
1562    }
1563
1564    fn paragraph(&mut self, lines: &[String], start: usize, out: &mut Vec<Block>) -> usize {
1565        let mut collected: Vec<&str> = Vec::new();
1566        let mut i = start;
1567        while let Some(line) = lines.get(i) {
1568            if is_blank(line) {
1569                break;
1570            }
1571            // A title underline below an earlier line ends the paragraph at that line.
1572            if i > start && adornment_char(line).is_some() {
1573                let prev = line_at(lines, i - 1).trim();
1574                if line.trim().chars().count() >= prev.chars().count() {
1575                    break;
1576                }
1577            }
1578            collected.push(line.trim());
1579            i += 1;
1580        }
1581        let text = collected.join("\n");
1582        let literal = text.trim_end().ends_with("::");
1583        if literal && let Some((code, next)) = Self::literal_block(lines, i) {
1584            let trimmed = minimize_colons(&text);
1585            if !trimmed.is_empty() {
1586                out.push(Block::Para(splice_lone_span(self.inlines(&trimmed))));
1587            }
1588            out.push(code);
1589            return next;
1590        }
1591        out.push(Block::Para(splice_lone_span(self.inlines(&text))));
1592        i
1593    }
1594
1595    /// The literal (code) block following a `::` paragraph, when an indented block follows.
1596    fn literal_block(lines: &[String], from: usize) -> Option<(Block, usize)> {
1597        let mut i = from;
1598        while lines.get(i).is_some_and(|l| is_blank(l)) {
1599            i += 1;
1600        }
1601        let line = lines.get(i)?;
1602        let base = indent_of(line);
1603        if base == 0 {
1604            // An unindented block whose every line opens with the same quoting character is a
1605            // quoted literal block; the quoting characters are kept verbatim.
1606            return Self::quoted_literal_block(lines, i);
1607        }
1608        let start = i;
1609        let mut end = i;
1610        while let Some(l) = lines.get(i) {
1611            if is_blank(l) {
1612                i += 1;
1613            } else if indent_of(l) >= base {
1614                end = i;
1615                i += 1;
1616            } else {
1617                break;
1618            }
1619        }
1620        let mut text_lines: Vec<String> = (start..=end)
1621            .filter_map(|j| lines.get(j))
1622            .map(|l| {
1623                if is_blank(l) {
1624                    String::new()
1625                } else {
1626                    dedent(l, base)
1627                }
1628            })
1629            .collect();
1630        while text_lines.last().is_some_and(std::string::String::is_empty) {
1631            text_lines.pop();
1632        }
1633        Some((
1634            Block::CodeBlock(Box::default(), text_lines.join("\n").into()),
1635            end + 1,
1636        ))
1637    }
1638
1639    /// A quoted literal block: an unindented run of lines that each begin with the same quoting
1640    /// character (one of the adornment characters). The lines, quoting characters included, are the
1641    /// code block's verbatim text.
1642    fn quoted_literal_block(lines: &[String], start: usize) -> Option<(Block, usize)> {
1643        let quote = line_at(lines, start).chars().next()?;
1644        if !ADORNMENT_CHARS.contains(quote) {
1645            return None;
1646        }
1647        let mut i = start;
1648        let mut text_lines: Vec<String> = Vec::new();
1649        while let Some(line) = lines.get(i) {
1650            if is_blank(line) || !line.starts_with(quote) {
1651                break;
1652            }
1653            text_lines.push(line.clone());
1654            i += 1;
1655        }
1656        if text_lines.is_empty() {
1657            return None;
1658        }
1659        Some((
1660            Block::CodeBlock(Box::default(), text_lines.join("\n").into()),
1661            i,
1662        ))
1663    }
1664
1665    fn line_block(&mut self, lines: &[String], start: usize, out: &mut Vec<Block>) -> usize {
1666        let base = indent_of(line_at(lines, start));
1667        let mut entries: Vec<String> = Vec::new();
1668        let mut i = start;
1669        while let Some(line) = lines.get(i) {
1670            if is_blank(line) {
1671                break;
1672            }
1673            let trimmed = line.trim_start();
1674            if let Some(rest) = trimmed.strip_prefix('|') {
1675                if !matches!(rest.chars().next(), Some(' ') | None) {
1676                    break;
1677                }
1678                let rest = rest.strip_prefix(' ').unwrap_or(rest);
1679                // Indentation beyond the single separating space is preserved as non-breaking
1680                // spaces so it survives into the rendered line.
1681                let leading = rest.chars().take_while(|c| *c == ' ').count();
1682                let content = format!(
1683                    "{}{}",
1684                    "\u{a0}".repeat(leading),
1685                    rest.trim_start_matches(' ')
1686                );
1687                entries.push(content);
1688                i += 1;
1689            } else if !entries.is_empty() && indent_of(line) > base {
1690                // A further-indented line without its own `|` continues the preceding line,
1691                // joined to it by a single space.
1692                if let Some(last) = entries.last_mut() {
1693                    last.push(' ');
1694                    last.push_str(trimmed);
1695                }
1696                i += 1;
1697            } else {
1698                break;
1699            }
1700        }
1701        let parsed = entries.iter().map(|entry| self.inlines(entry)).collect();
1702        out.push(Block::LineBlock(parsed));
1703        i
1704    }
1705
1706    fn bullet_list(&mut self, lines: &[String], start: usize, out: &mut Vec<Block>) -> usize {
1707        let mut items: Vec<Vec<Block>> = Vec::new();
1708        let mut i = start;
1709        while let Some(line) = lines.get(i) {
1710            if is_blank(line) {
1711                i += 1;
1712                continue;
1713            }
1714            if indent_of(line) != 0 {
1715                break;
1716            }
1717            let Some(col) = bullet_content_col(line) else {
1718                break;
1719            };
1720            let (region, next) = Self::item_region(lines, i, col);
1721            items.push(self.blocks(&region));
1722            i = next;
1723        }
1724        compactify(&mut items);
1725        out.push(Block::BulletList(items));
1726        i
1727    }
1728
1729    fn ordered_list(&mut self, lines: &[String], start: usize, out: &mut Vec<Block>) -> usize {
1730        let Some((start_num, style, delim, _)) = enumerator(line_at(lines, start)) else {
1731            return self.paragraph(lines, start, out);
1732        };
1733        let mut items: Vec<Vec<Block>> = Vec::new();
1734        let mut i = start;
1735        while let Some(line) = lines.get(i) {
1736            if is_blank(line) {
1737                i += 1;
1738                continue;
1739            }
1740            if indent_of(line) != 0 {
1741                break;
1742            }
1743            let Some((_, _, _, col)) = enumerator(line) else {
1744                break;
1745            };
1746            // An auto-numbered (`#`) item joins whatever list is open and vice versa; otherwise the
1747            // delimiter must match and the style must match directly or by an ambiguous single
1748            // letter adopting the list's established style. A later item that is itself a run-on
1749            // paragraph (its continuation under-indented) ends the list before it.
1750            if !enum_compatible(line, style, delim)
1751                || !item_well_formed(lines, i, col, style, delim)
1752            {
1753                break;
1754            }
1755            let (region, next) = Self::item_region(lines, i, col);
1756            items.push(self.blocks(&region));
1757            i = next;
1758        }
1759        compactify(&mut items);
1760        out.push(Block::OrderedList(
1761            ListAttributes {
1762                start: start_num,
1763                style,
1764                delim,
1765            },
1766            items,
1767        ));
1768        i
1769    }
1770
1771    /// The dedented body region of a list item beginning at line `start`, whose content starts at
1772    /// column `col`.
1773    fn item_region(lines: &[String], start: usize, col: usize) -> (Vec<String>, usize) {
1774        let first: String = line_at(lines, start).chars().skip(col).collect();
1775        let mut region = vec![first];
1776        let mut end = start;
1777        let mut i = start + 1;
1778        while let Some(line) = lines.get(i) {
1779            if is_blank(line) {
1780                i += 1;
1781            } else if indent_of(line) >= col {
1782                end = i;
1783                i += 1;
1784            } else {
1785                break;
1786            }
1787        }
1788        for j in start + 1..=end {
1789            let line = line_at(lines, j);
1790            region.push(if is_blank(line) {
1791                String::new()
1792            } else {
1793                dedent(line, col)
1794            });
1795        }
1796        while region.last().is_some_and(std::string::String::is_empty) {
1797            region.pop();
1798        }
1799        (region, end + 1)
1800    }
1801
1802    fn field_list(&mut self, lines: &[String], start: usize, out: &mut Vec<Block>) -> usize {
1803        let mut entries: Vec<(Vec<Inline>, Vec<Block>)> = Vec::new();
1804        let mut i = start;
1805        while let Some(line) = lines.get(i) {
1806            if is_blank(line) {
1807                i += 1;
1808                continue;
1809            }
1810            if indent_of(line) != 0 {
1811                break;
1812            }
1813            let Some((name, value_col)) = field_marker(line) else {
1814                break;
1815            };
1816            let end = explicit_extent(lines, i, indent_of(line));
1817            let body = explicit_body(lines, i, end, value_col);
1818            let term = self.inlines(&name);
1819            entries.push((term, self.blocks(&body)));
1820            i = end;
1821        }
1822        let mut defs: Vec<Vec<Block>> = entries.iter().map(|(_, blocks)| blocks.clone()).collect();
1823        compactify(&mut defs);
1824        let items = entries
1825            .into_iter()
1826            .zip(defs)
1827            .map(|((term, _), blocks)| (term, vec![blocks]))
1828            .collect();
1829        out.push(Block::DefinitionList(items));
1830        i
1831    }
1832
1833    fn definition_list(&mut self, lines: &[String], start: usize, out: &mut Vec<Block>) -> usize {
1834        let mut items: Vec<(Vec<Inline>, Vec<Vec<Block>>)> = Vec::new();
1835        let mut i = start;
1836        while let Some(line) = lines.get(i) {
1837            if is_blank(line) {
1838                i += 1;
1839                continue;
1840            }
1841            if indent_of(line) != 0 {
1842                break;
1843            }
1844            let def = line_at(lines, i + 1);
1845            if is_blank(def) || indent_of(def) == 0 {
1846                break;
1847            }
1848            let term = self.inlines(line.trim());
1849            let col = indent_of(def);
1850            let (region, next) = Self::item_region(lines, i + 1, col);
1851            items.push((term, vec![self.blocks(&region)]));
1852            i = next;
1853        }
1854        out.push(Block::DefinitionList(items));
1855        i
1856    }
1857
1858    /// An option list: each item pairs an option group (`-a`, `--all=ARG`, `/S`, comma-joined
1859    /// variants) rendered as inline code with a description body. The body begins after the
1860    /// two-or-more-space gap that follows the option group, or on the following indented lines.
1861    fn option_list(&mut self, lines: &[String], start: usize, out: &mut Vec<Block>) -> usize {
1862        let mut items: Vec<(Vec<Inline>, Vec<Vec<Block>>)> = Vec::new();
1863        let mut i = start;
1864        while let Some(line) = lines.get(i) {
1865            if is_blank(line) {
1866                i += 1;
1867                continue;
1868            }
1869            if indent_of(line) != 0 {
1870                break;
1871            }
1872            let Some((term, value_col)) = option_marker(line) else {
1873                break;
1874            };
1875            let end = explicit_extent(lines, i, 0);
1876            let body = explicit_body(lines, i, end, value_col);
1877            let term_inline = vec![Inline::Code(Box::default(), term.into())];
1878            items.push((term_inline, vec![self.blocks(&body)]));
1879            i = end;
1880        }
1881        out.push(Block::DefinitionList(items));
1882        i
1883    }
1884
1885    // --- explicit markup ---
1886
1887    fn explicit(&mut self, lines: &[String], start: usize, out: &mut Vec<Block>) -> usize {
1888        let line = line_at(lines, start);
1889        let indent = indent_of(line);
1890        let end = explicit_extent(lines, start, indent);
1891        if let Some(Explicit::Directive(name)) = classify_explicit(line) {
1892            self.directive(&name, lines, start, end, out);
1893        }
1894        end
1895    }
1896
1897    #[allow(clippy::too_many_lines)]
1898    fn directive(
1899        &mut self,
1900        name: &str,
1901        lines: &[String],
1902        start: usize,
1903        end: usize,
1904        out: &mut Vec<Block>,
1905    ) {
1906        let first = line_at(lines, start).trim_start();
1907        let after = first
1908            .strip_prefix("..")
1909            .unwrap_or(first)
1910            .trim_start()
1911            .strip_prefix(name)
1912            .and_then(|r| r.strip_prefix("::"))
1913            .unwrap_or("");
1914        let prefix_len = line_at(lines, start).len() - after.len();
1915        let body = explicit_body(lines, start, end, prefix_len);
1916        let (argument, options, content) = split_directive(&body);
1917
1918        match name {
1919            "raw" => {
1920                out.push(Block::RawBlock(
1921                    Format(argument.trim().into()),
1922                    content.join("\n").into(),
1923                ));
1924            }
1925            "code" | "code-block" | "sourcecode" => {
1926                let attr = code_attr(&argument, &options);
1927                let mut text = content.join("\n");
1928                while text.ends_with('\n') {
1929                    text.pop();
1930                }
1931                out.push(Block::CodeBlock(Box::new(attr), text.into()));
1932            }
1933            "math" => {
1934                let mut equations = Vec::new();
1935                if !argument.trim().is_empty() {
1936                    equations.push(argument.trim().to_string());
1937                }
1938                equations.extend(blank_separated(&content));
1939                let math: Vec<Inline> = equations
1940                    .into_iter()
1941                    .map(|eq| Inline::Math(MathType::DisplayMath, eq.into()))
1942                    .collect();
1943                let (id, classes, attributes) = common_options(&options);
1944                // Options (a `:label:`, `:nowrap:`, …) attach to the whole equation group through a
1945                // wrapping span; without them the equations stand on their own.
1946                let inlines = if id.is_empty() && classes.is_empty() && attributes.is_empty() {
1947                    math
1948                } else {
1949                    vec![Inline::Span(
1950                        Box::new(Attr {
1951                            id: id.into(),
1952                            classes: classes.into_iter().map(Into::into).collect(),
1953                            attributes: attributes
1954                                .into_iter()
1955                                .map(|(k, v)| (k.into(), v.into()))
1956                                .collect(),
1957                        }),
1958                        math,
1959                    )]
1960                };
1961                out.push(Block::Para(inlines));
1962            }
1963            "image" => {
1964                let (mut attr, mut alt, url) = image_parts(&argument, &options);
1965                attr.classes = image_classes(&options)
1966                    .into_iter()
1967                    .map(Into::into)
1968                    .collect();
1969                if alt.is_empty() {
1970                    alt = vec![Inline::Str("image".into())];
1971                }
1972                let image = Inline::Image(
1973                    Box::new(attr),
1974                    alt,
1975                    Box::new(Target {
1976                        url: url.into(),
1977                        title: carta_ast::Text::default(),
1978                    }),
1979                );
1980                out.push(Block::Para(vec![Self::wrap_target(image, &options)]));
1981            }
1982            "figure" => out.push(self.figure(&argument, &options, &content)),
1983            "note" | "warning" | "attention" | "caution" | "danger" | "error" | "hint"
1984            | "important" | "tip" => {
1985                let title = capitalize(name);
1986                let mut blocks = vec![Block::Div(
1987                    Box::new(Attr {
1988                        id: carta_ast::Text::default(),
1989                        classes: vec!["title".into()],
1990                        attributes: Vec::new(),
1991                    }),
1992                    vec![Block::Para(vec![Inline::Str(title.into())])],
1993                )];
1994                blocks.extend(self.blocks(&directive_content(&body)));
1995                out.push(options_div(name, &options, blocks));
1996            }
1997            "admonition" => {
1998                let mut blocks = Vec::new();
1999                if !argument.trim().is_empty() {
2000                    blocks.push(Block::Para(self.inlines(argument.trim())));
2001                }
2002                blocks.extend(self.blocks(&content));
2003                out.push(class_div(vec!["admonition".to_string()], blocks));
2004            }
2005            "topic" | "sidebar" => {
2006                let mut blocks = Vec::new();
2007                if !argument.trim().is_empty() {
2008                    // A sidebar's subtitle joins its title, separated by a colon; for a topic the
2009                    // title stands alone. Either way the subtitle is also kept as an attribute by
2010                    // the surrounding division.
2011                    let subtitle = options.iter().find(|(k, _)| k == "subtitle");
2012                    let title = match (name, subtitle) {
2013                        ("sidebar", Some((_, subtitle))) => {
2014                            format!("{}: {}", argument.trim(), subtitle.trim())
2015                        }
2016                        _ => argument.trim().to_string(),
2017                    };
2018                    blocks.push(Block::Para(vec![Inline::Strong(self.inlines(&title))]));
2019                }
2020                blocks.extend(self.blocks(&content));
2021                out.push(options_div(name, &options, blocks));
2022            }
2023            "rubric" => {
2024                out.push(Block::Para(vec![Inline::Strong(
2025                    self.inlines(argument.trim()),
2026                )]));
2027            }
2028            "container" => {
2029                let mut classes = vec!["container".to_string()];
2030                classes.extend(argument.split_whitespace().map(str::to_string));
2031                out.push(class_div(classes, self.blocks(&content)));
2032            }
2033            "epigraph" | "highlights" | "pull-quote" => {
2034                out.push(Block::BlockQuote(self.blocks(&content)));
2035            }
2036            "compound" => out.extend(self.blocks(&content)),
2037            "csv-table" => self.csv_table(&argument, &options, &content, out),
2038            "list-table" => self.list_table(&argument, &options, &content, out),
2039            "class" => {
2040                let classes: Vec<String> =
2041                    argument.split_whitespace().map(str::to_string).collect();
2042                if content.is_empty() {
2043                    // Apply the classes to the next sibling block via a marker the loop unwraps.
2044                    let mut marker = vec![PENDING_CLASS.to_string()];
2045                    marker.extend(classes);
2046                    out.push(class_div(marker, Vec::new()));
2047                } else {
2048                    out.push(class_div(classes, self.blocks(&content)));
2049                }
2050            }
2051            "line-block" => out.push(self.line_block_directive(&content)),
2052            "table" => self.table_directive(&argument, &options, &content, out),
2053            // A role definition configures inline interpretation; it produces no block of its own.
2054            "role" => self.register_role(&argument, &options),
2055            "default-role" => {
2056                let selected = argument.trim();
2057                self.default_role = if selected.is_empty() {
2058                    DEFAULT_ROLE.to_string()
2059                } else {
2060                    selected.to_string()
2061                };
2062            }
2063            // An include directive splices the parsed content of an external file in place. A file
2064            // that cannot be read contributes nothing.
2065            "include" => {
2066                if self.include_depth < MAX_INCLUDE_DEPTH
2067                    && let Some(blocks) =
2068                        included_blocks(argument.trim(), self.ext, self.include_depth + 1)
2069                {
2070                    out.extend(blocks);
2071                }
2072            }
2073            _ => {
2074                let mut blocks = Vec::new();
2075                if !argument.trim().is_empty() {
2076                    blocks.push(Block::Para(self.inlines(argument.trim())));
2077                }
2078                blocks.extend(self.blocks(&content));
2079                out.push(options_div(name, &options, blocks));
2080            }
2081        }
2082    }
2083
2084    /// Record a `role` directive: an `name(base)` argument names the role and the base role it
2085    /// inherits, while options supply the classes (`:class:`), the raw output format (`:format:`),
2086    /// and the highlighting language (`:language:`) the role carries.
2087    fn register_role(&mut self, argument: &str, options: &[(String, String)]) {
2088        let argument = argument.trim();
2089        let (name, base) = match argument.split_once('(') {
2090            Some((name, rest)) => (
2091                name.trim(),
2092                Some(rest.trim_end_matches(')').trim().to_string()),
2093            ),
2094            None => (argument, None),
2095        };
2096        if name.is_empty() {
2097            return;
2098        }
2099        let base = base.filter(|b| !b.is_empty());
2100        let classes = class_list(options, "class");
2101        let option_value = |key: &str| {
2102            options
2103                .iter()
2104                .find(|(k, _)| k == key)
2105                .map(|(_, v)| v.trim().to_string())
2106                .filter(|v| !v.is_empty())
2107        };
2108        self.custom_roles.insert(
2109            name.to_string(),
2110            RoleDef {
2111                base,
2112                classes,
2113                format: option_value("format"),
2114                language: option_value("language"),
2115            },
2116        );
2117    }
2118
2119    fn wrap_target(image: Inline, options: &[(String, String)]) -> Inline {
2120        if let Some((_, url)) = options.iter().find(|(k, _)| k == "target") {
2121            Inline::Link(
2122                Box::default(),
2123                vec![image],
2124                Box::new(Target {
2125                    url: url.clone().into(),
2126                    title: carta_ast::Text::default(),
2127                }),
2128            )
2129        } else {
2130            image
2131        }
2132    }
2133
2134    fn figure(
2135        &mut self,
2136        argument: &str,
2137        options: &[(String, String)],
2138        content: &[String],
2139    ) -> Block {
2140        let (img_attr, alt, url) = image_parts(argument, options);
2141        let inner = self.blocks(content);
2142        let mut caption = Caption::default();
2143        let mut caption_inlines = Vec::new();
2144        let mut iter = inner.into_iter();
2145        if let Some(first) = iter.next() {
2146            let plain = to_plain(first);
2147            if let Block::Plain(inlines) = &plain {
2148                caption_inlines.clone_from(inlines);
2149            }
2150            // The first body block is the caption proper; any further blocks are the legend, which
2151            // joins the caption rather than the figure body.
2152            caption.long = vec![plain];
2153            caption.long.extend(iter);
2154        }
2155        // The image description defaults to the figure's caption when no explicit alt is given.
2156        let description = if alt.is_empty() { caption_inlines } else { alt };
2157        let image = Inline::Image(
2158            Box::new(img_attr),
2159            description,
2160            Box::new(Target {
2161                url: url.into(),
2162                title: carta_ast::Text::default(),
2163            }),
2164        );
2165        let body = vec![Block::Plain(vec![image])];
2166        Block::Figure(Box::new(figure_attr(options)), Box::new(caption), body)
2167    }
2168
2169    /// A `line-block` directive: each body line becomes one line of the block, with a blank body line
2170    /// rendering as an empty line.
2171    fn line_block_directive(&mut self, content: &[String]) -> Block {
2172        let mut end = content.len();
2173        while end > 0 && content.get(end - 1).is_some_and(|l| l.trim().is_empty()) {
2174            end -= 1;
2175        }
2176        let lines = content
2177            .get(..end)
2178            .unwrap_or(&[])
2179            .iter()
2180            .map(|line| self.inlines(line.trim()))
2181            .collect();
2182        Block::LineBlock(lines)
2183    }
2184
2185    /// A `table` directive: its body is an ordinary table whose caption is taken from the directive's
2186    /// argument.
2187    fn table_directive(
2188        &mut self,
2189        argument: &str,
2190        _options: &[(String, String)],
2191        content: &[String],
2192        out: &mut Vec<Block>,
2193    ) {
2194        let mut blocks = self.blocks(content);
2195        let argument = argument.trim();
2196        if !argument.is_empty() {
2197            let caption = self.inlines(argument);
2198            if let Some(Block::Table(table)) =
2199                blocks.iter_mut().find(|b| matches!(b, Block::Table(_)))
2200            {
2201                table.caption = Caption {
2202                    short: None,
2203                    long: vec![Block::Plain(caption)],
2204                };
2205            }
2206        }
2207        out.extend(blocks);
2208    }
2209
2210    /// The trailing `citations` division gathering every citation definition, or `None` when the
2211    /// document defines no citations.
2212    fn citation_block(&mut self) -> Option<Block> {
2213        if self.defs.citations.is_empty() {
2214            return None;
2215        }
2216        let items = self
2217            .defs
2218            .citations
2219            .iter()
2220            .map(|(label, body)| {
2221                let term = vec![Inline::Span(
2222                    Box::new(Attr {
2223                        id: label.clone().into(),
2224                        classes: vec!["citation-label".into()],
2225                        attributes: Vec::new(),
2226                    }),
2227                    vec![Inline::Str(label.clone().into())],
2228                )];
2229                (term, vec![self.blocks(body)])
2230            })
2231            .collect();
2232        Some(Block::Div(
2233            Box::new(Attr {
2234                id: "citations".into(),
2235                classes: Vec::new(),
2236                attributes: Vec::new(),
2237            }),
2238            vec![Block::DefinitionList(items)],
2239        ))
2240    }
2241
2242    // --- table directives ---
2243
2244    /// A `csv-table` directive: its rows are comma-separated values, with an optional explicit
2245    /// `:header:` row and/or a count of leading `:header-rows:` taken from the data.
2246    fn csv_table(
2247        &mut self,
2248        argument: &str,
2249        options: &[(String, String)],
2250        content: &[String],
2251        out: &mut Vec<Block>,
2252    ) {
2253        let widths = directive_widths(options);
2254        let mut records = parse_csv(&content.join("\n"));
2255        let mut header_records: Vec<Vec<String>> = Vec::new();
2256        if let Some((_, header)) = options.iter().find(|(k, _)| k == "header") {
2257            header_records.extend(parse_csv(header));
2258        }
2259        let take = directive_count(options, "header-rows").min(records.len());
2260        header_records.extend(records.drain(..take));
2261        let num_cols = header_records
2262            .iter()
2263            .chain(records.iter())
2264            .map(Vec::len)
2265            .max()
2266            .unwrap_or(0);
2267        if num_cols == 0 {
2268            return;
2269        }
2270        let head_rows = header_records
2271            .iter()
2272            .map(|r| self.csv_row(r, num_cols))
2273            .collect();
2274        let body_rows = records.iter().map(|r| self.csv_row(r, num_cols)).collect();
2275        out.push(self.make_table(argument, widths.as_deref(), head_rows, body_rows, num_cols));
2276    }
2277
2278    fn csv_row(&mut self, fields: &[String], num_cols: usize) -> Vec<Cell> {
2279        (0..num_cols)
2280            .map(|i| {
2281                let content = match fields.get(i) {
2282                    Some(f) if !f.is_empty() => vec![Block::Plain(self.inlines(f))],
2283                    _ => Vec::new(),
2284                };
2285                Cell {
2286                    attr: Attr::default(),
2287                    align: Alignment::AlignDefault,
2288                    row_span: 1,
2289                    col_span: 1,
2290                    content,
2291                }
2292            })
2293            .collect()
2294    }
2295
2296    /// A `list-table` directive: a two-level bullet list where each outer item is a row and its
2297    /// nested bullet list supplies the row's cells.
2298    fn list_table(
2299        &mut self,
2300        argument: &str,
2301        options: &[(String, String)],
2302        content: &[String],
2303        out: &mut Vec<Block>,
2304    ) {
2305        let widths = directive_widths(options);
2306        let mut rows: Vec<Vec<Vec<Block>>> = Vec::new();
2307        for block in self.blocks(content) {
2308            if let Block::BulletList(items) = block {
2309                for item in items {
2310                    let mut cells = Vec::new();
2311                    for inner in item {
2312                        if let Block::BulletList(cell_items) = inner {
2313                            cells.extend(cell_items);
2314                        }
2315                    }
2316                    rows.push(cells);
2317                }
2318            }
2319        }
2320        let num_cols = rows.iter().map(Vec::len).max().unwrap_or(0);
2321        if num_cols == 0 {
2322            return;
2323        }
2324        let take = directive_count(options, "header-rows").min(rows.len());
2325        let head_src: Vec<Vec<Vec<Block>>> = rows.drain(..take).collect();
2326        let head_rows = head_src
2327            .into_iter()
2328            .map(|r| list_row(r, num_cols))
2329            .collect();
2330        let body_rows = rows.into_iter().map(|r| list_row(r, num_cols)).collect();
2331        out.push(self.make_table(argument, widths.as_deref(), head_rows, body_rows, num_cols));
2332    }
2333
2334    /// Assemble a table from already-built header and body cell rows, a caption drawn from the
2335    /// directive argument, and either explicit column widths or the default.
2336    fn make_table(
2337        &mut self,
2338        caption: &str,
2339        widths: Option<&[f64]>,
2340        head_rows: Vec<Vec<Cell>>,
2341        body_rows: Vec<Vec<Cell>>,
2342        num_cols: usize,
2343    ) -> Block {
2344        let caption = if caption.trim().is_empty() {
2345            Caption::default()
2346        } else {
2347            Caption {
2348                short: None,
2349                long: vec![Block::Plain(self.inlines(caption.trim()))],
2350            }
2351        };
2352        let col_specs = (0..num_cols)
2353            .map(|i| ColSpec {
2354                align: Alignment::AlignDefault,
2355                width: match widths {
2356                    Some(w) if w.len() == num_cols => w
2357                        .get(i)
2358                        .copied()
2359                        .map_or(ColWidth::ColWidthDefault, ColWidth::ColWidth),
2360                    _ => ColWidth::ColWidthDefault,
2361                },
2362            })
2363            .collect();
2364        Block::Table(Box::new(Table {
2365            attr: Attr::default(),
2366            caption,
2367            col_specs,
2368            head: TableHead {
2369                attr: Attr::default(),
2370                rows: cells_to_rows(head_rows),
2371            },
2372            bodies: vec![TableBody {
2373                attr: Attr::default(),
2374                row_head_columns: 0,
2375                head: Vec::new(),
2376                body: cells_to_rows(body_rows),
2377            }],
2378            foot: TableFoot::default(),
2379        }))
2380    }
2381
2382    // --- grid tables ---
2383
2384    // Column widths are small character spans, far inside f64's exact-integer range.
2385    #[allow(clippy::cast_precision_loss)]
2386    // The grid parser walks the character matrix in one pass; splitting it would scatter the shared
2387    // cursor state across helpers without making the logic clearer.
2388    #[allow(clippy::too_many_lines)]
2389    fn grid_table(
2390        &mut self,
2391        lines: &[String],
2392        start: usize,
2393        out: &mut Vec<Block>,
2394    ) -> Option<usize> {
2395        // The table runs over consecutive lines that belong to the grid (a border or a `|`-led row).
2396        let mut end = start;
2397        while lines.get(end).is_some_and(|l| is_grid_line(l)) {
2398            end += 1;
2399        }
2400        if end - start < 3 {
2401            return None;
2402        }
2403        // A padded character matrix so every position can be addressed by (row, column).
2404        let width = (start..end)
2405            .filter_map(|i| lines.get(i))
2406            .map(|l| l.chars().count())
2407            .max()
2408            .unwrap_or(0);
2409        let block: Vec<Vec<char>> = (start..end)
2410            .filter_map(|i| lines.get(i))
2411            .map(|l| {
2412                let mut row: Vec<char> = l.chars().collect();
2413                row.resize(width, ' ');
2414                row
2415            })
2416            .collect();
2417
2418        let cells = scan_grid_cells(&block)?;
2419        if cells.is_empty() {
2420            return None;
2421        }
2422
2423        // The vertical and horizontal grid lines, as the distinct cell-edge positions.
2424        let mut col_edges: Vec<usize> = cells.iter().flat_map(|c| [c.left, c.right]).collect();
2425        col_edges.sort_unstable();
2426        col_edges.dedup();
2427        let mut row_edges: Vec<usize> = cells.iter().flat_map(|c| [c.top, c.bottom]).collect();
2428        row_edges.sort_unstable();
2429        row_edges.dedup();
2430        let col_index = |pos: usize| col_edges.iter().position(|e| *e == pos);
2431        let row_index = |pos: usize| row_edges.iter().position(|e| *e == pos);
2432        let num_cols = col_edges.len().checked_sub(1)?;
2433        let num_rows = row_edges.len().checked_sub(1)?;
2434        if num_cols == 0 || num_rows == 0 {
2435            return None;
2436        }
2437
2438        // Place each cell into a row/column grid, validating that the cells tile it exactly.
2439        let mut grid: Vec<Vec<Option<GridCell>>> = vec![vec![None; num_cols]; num_rows];
2440        let mut covered = vec![vec![false; num_cols]; num_rows];
2441        for cell in &cells {
2442            let r0 = row_index(cell.top)?;
2443            let r1 = row_index(cell.bottom)?;
2444            let c0 = col_index(cell.left)?;
2445            let c1 = col_index(cell.right)?;
2446            let text: String = (cell.top + 1..cell.bottom)
2447                .filter_map(|r| block.get(r))
2448                .map(|row| {
2449                    let seg: String = row
2450                        .get(cell.left + 1..cell.right)
2451                        .map_or_else(String::new, |s| s.iter().collect());
2452                    seg.trim_end().to_string()
2453                })
2454                .collect::<Vec<_>>()
2455                .join("\n");
2456            for r in r0..r1 {
2457                for c in c0..c1 {
2458                    if covered.get(r).and_then(|row| row.get(c)).copied() != Some(false) {
2459                        return None;
2460                    }
2461                    if let Some(slot) = covered.get_mut(r).and_then(|row| row.get_mut(c)) {
2462                        *slot = true;
2463                    }
2464                }
2465            }
2466            if let Some(slot) = grid.get_mut(r0).and_then(|row| row.get_mut(c0)) {
2467                *slot = Some(GridCell {
2468                    text,
2469                    row_span: r1 - r0,
2470                    col_span: c1 - c0,
2471                });
2472            }
2473        }
2474        if covered.iter().any(|row| row.iter().any(|c| !c)) {
2475            return None;
2476        }
2477
2478        // A `=` separator line marks the boundary between header rows and body rows.
2479        let header_rows = row_edges
2480            .iter()
2481            .position(|edge| block.get(*edge).is_some_and(|row| row.contains(&'=')))
2482            .unwrap_or(0);
2483
2484        let last = *col_edges.last()?;
2485        let first = *col_edges.first()?;
2486        let total = last.saturating_sub(first).saturating_sub(num_cols);
2487        let divisor = total.max(72) as f64;
2488        let col_specs: Vec<ColSpec> = (0..num_cols)
2489            .map(|i| {
2490                let lo = col_edges.get(i).copied().unwrap_or(0);
2491                let hi = col_edges.get(i + 1).copied().unwrap_or(lo);
2492                ColSpec {
2493                    align: Alignment::AlignDefault,
2494                    width: ColWidth::ColWidth(hi.saturating_sub(lo) as f64 / divisor),
2495                }
2496            })
2497            .collect();
2498
2499        let mut head_rows = Vec::new();
2500        let mut body_rows = Vec::new();
2501        for (r, row) in grid.iter().enumerate() {
2502            let built = self.grid_row(row);
2503            if r < header_rows {
2504                head_rows.push(built);
2505            } else {
2506                body_rows.push(built);
2507            }
2508        }
2509
2510        let table = Table {
2511            attr: Attr::default(),
2512            caption: Caption::default(),
2513            col_specs,
2514            head: TableHead {
2515                attr: Attr::default(),
2516                rows: head_rows,
2517            },
2518            bodies: vec![TableBody {
2519                attr: Attr::default(),
2520                row_head_columns: 0,
2521                head: Vec::new(),
2522                body: body_rows,
2523            }],
2524            foot: TableFoot::default(),
2525        };
2526        out.push(Block::Table(Box::new(table)));
2527        Some(end)
2528    }
2529
2530    /// Build one table row, emitting only the cells that originate in this row band; positions
2531    /// covered by a row- or column-spanning cell that began earlier carry no cell of their own.
2532    fn grid_row(&mut self, row: &[Option<GridCell>]) -> Row {
2533        let cells = row
2534            .iter()
2535            .filter_map(|slot| slot.as_ref())
2536            .map(|cell| {
2537                let row_span = i32::try_from(cell.row_span).unwrap_or(1);
2538                let col_span = i32::try_from(cell.col_span).unwrap_or(1);
2539                self.text_cell(&cell.text, row_span, col_span)
2540            })
2541            .collect();
2542        Row {
2543            attr: Attr::default(),
2544            cells,
2545        }
2546    }
2547
2548    /// Build a cell from its newline-joined text. The shared blank-edges/min-indent normalization is
2549    /// applied, the text is parsed as block content, and a lone paragraph is demoted to a plain block.
2550    fn text_cell(&mut self, text: &str, row_span: i32, col_span: i32) -> Cell {
2551        let raw: Vec<String> = text.split('\n').map(str::to_string).collect();
2552        let trimmed = trim_blank_edges(raw);
2553        let min_indent = trimmed
2554            .iter()
2555            .filter(|l| !is_blank(l))
2556            .map(|l| indent_of(l))
2557            .min()
2558            .unwrap_or(0);
2559        let region: Vec<String> = trimmed
2560            .iter()
2561            .map(|l| {
2562                if is_blank(l) {
2563                    String::new()
2564                } else {
2565                    dedent(l, min_indent)
2566                }
2567            })
2568            .collect();
2569        let mut content = self.blocks(&region);
2570        if let [Block::Para(_)] = content.as_slice()
2571            && let Some(Block::Para(inlines)) = content.pop()
2572        {
2573            content.push(Block::Plain(inlines));
2574        }
2575        Cell {
2576            attr: Attr::default(),
2577            align: Alignment::AlignDefault,
2578            row_span,
2579            col_span,
2580            content,
2581        }
2582    }
2583
2584    // --- simple tables ---
2585
2586    /// Parse a simple table beginning at its top border. Columns come from the `=` runs of the top
2587    /// border; the bottom border is the first `=` border followed by a blank line or the end of
2588    /// input, and any earlier interior `=` border separates the header rows from the body. Returns
2589    /// `None` (so the caller falls back to paragraph parsing) when no bottom border is found.
2590    fn simple_table(
2591        &mut self,
2592        lines: &[String],
2593        start: usize,
2594        out: &mut Vec<Block>,
2595    ) -> Option<usize> {
2596        let columns = simple_columns(line_at(lines, start))?;
2597        let mut header_end: Option<usize> = None;
2598        let mut bottom: Option<usize> = None;
2599        let mut i = start + 1;
2600        while let Some(line) = lines.get(i) {
2601            if is_equals_border(line) {
2602                let next_blank = lines.get(i + 1).is_none_or(|l| is_blank(l));
2603                if next_blank {
2604                    bottom = Some(i);
2605                    break;
2606                }
2607                if header_end.is_none() {
2608                    header_end = Some(i);
2609                }
2610            }
2611            i += 1;
2612        }
2613        let bottom = bottom?;
2614        let header_lines: Vec<String> = match header_end {
2615            Some(end) => (start + 1..end)
2616                .filter_map(|j| lines.get(j).cloned())
2617                .collect(),
2618            None => Vec::new(),
2619        };
2620        let body_start = header_end.map_or(start + 1, |end| end + 1);
2621        let body_lines: Vec<String> = (body_start..bottom)
2622            .filter_map(|j| lines.get(j).cloned())
2623            .collect();
2624
2625        let head_rows = self.simple_rows(&header_lines, &columns);
2626        let body_rows = self.simple_rows(&body_lines, &columns);
2627
2628        let col_specs: Vec<ColSpec> = columns
2629            .iter()
2630            .map(|_| ColSpec {
2631                align: Alignment::AlignDefault,
2632                width: ColWidth::ColWidthDefault,
2633            })
2634            .collect();
2635        let table = Table {
2636            attr: Attr::default(),
2637            caption: Caption::default(),
2638            col_specs,
2639            head: TableHead {
2640                attr: Attr::default(),
2641                rows: head_rows,
2642            },
2643            bodies: vec![TableBody {
2644                attr: Attr::default(),
2645                row_head_columns: 0,
2646                head: Vec::new(),
2647                body: body_rows,
2648            }],
2649            foot: TableFoot::default(),
2650        };
2651        out.push(Block::Table(Box::new(table)));
2652        Some(bottom + 1)
2653    }
2654
2655    /// Group a region's lines into table rows. A line whose first column carries text starts a new
2656    /// row; a text line with a blank first column continues the current one. A `-` underline ends the
2657    /// row above it, joining the columns its filled margins span.
2658    fn simple_rows(&mut self, lines: &[String], columns: &[(usize, usize)]) -> Vec<Row> {
2659        let mut rows = Vec::new();
2660        let mut current: Vec<String> = Vec::new();
2661        for line in lines {
2662            if let Some(groups) = span_underline_groups(line, columns) {
2663                if !current.is_empty() {
2664                    rows.push(self.simple_row(&current, columns, &groups));
2665                    current.clear();
2666                }
2667                continue;
2668            }
2669            if is_blank(line) {
2670                if !current.is_empty() {
2671                    current.push(String::new());
2672                }
2673                continue;
2674            }
2675            if !current.is_empty() && first_column_blank(line, columns) {
2676                current.push(line.clone());
2677            } else {
2678                if !current.is_empty() {
2679                    let groups = default_groups(columns.len());
2680                    rows.push(self.simple_row(&current, columns, &groups));
2681                    current.clear();
2682                }
2683                current.push(line.clone());
2684            }
2685        }
2686        if !current.is_empty() {
2687            let groups = default_groups(columns.len());
2688            rows.push(self.simple_row(&current, columns, &groups));
2689        }
2690        rows
2691    }
2692
2693    fn simple_row(
2694        &mut self,
2695        row_lines: &[String],
2696        columns: &[(usize, usize)],
2697        groups: &[(usize, usize)],
2698    ) -> Row {
2699        let last_col = columns.len().saturating_sub(1);
2700        let cells = groups
2701            .iter()
2702            .map(|(a, b)| {
2703                let lo = columns.get(*a).map_or(0, |c| c.0);
2704                let hi = if *b >= last_col {
2705                    usize::MAX
2706                } else {
2707                    columns.get(b + 1).map_or(usize::MAX, |c| c.0)
2708                };
2709                let text = row_lines
2710                    .iter()
2711                    .map(|line| {
2712                        let cs: Vec<char> = line.chars().collect();
2713                        let end = hi.min(cs.len());
2714                        let seg: String = cs
2715                            .get(lo..end)
2716                            .map(|s| s.iter().collect())
2717                            .unwrap_or_default();
2718                        seg.trim_end().to_string()
2719                    })
2720                    .collect::<Vec<_>>()
2721                    .join("\n");
2722                self.text_cell(&text, 1, i32::try_from(b - a + 1).unwrap_or(1))
2723            })
2724            .collect();
2725        Row {
2726            attr: Attr::default(),
2727            cells,
2728        }
2729    }
2730
2731    // --- inline parsing ---
2732
2733    fn inlines(&mut self, text: &str) -> Vec<Inline> {
2734        let mut out = self.inlines_no_trim(text);
2735        trim_inline_ends(&mut out);
2736        out
2737    }
2738
2739    /// Parse inline markup without trimming the leading and trailing whitespace nodes. Interpreted
2740    /// text keeps the spacing around its content, so role content is parsed through this entry.
2741    fn inlines_no_trim(&mut self, text: &str) -> Vec<Inline> {
2742        let chars: Vec<char> = text.chars().collect();
2743        let smart = self.ext.contains(Extension::Smart);
2744        let mut out = Vec::new();
2745        let mut pending = String::new();
2746        let mut pos = 0;
2747        while pos < chars.len() {
2748            let ch = chars.get(pos).copied().unwrap_or(' ');
2749            let prev = pos.checked_sub(1).and_then(|p| chars.get(p)).copied();
2750            if ch == '\\' {
2751                match chars.get(pos + 1) {
2752                    Some(next) if next.is_whitespace() => pos += 2,
2753                    Some(next) => {
2754                        pending.push(*next);
2755                        pos += 2;
2756                    }
2757                    None => {
2758                        pending.push('\\');
2759                        pos += 1;
2760                    }
2761                }
2762                continue;
2763            }
2764            // An inline internal hyperlink target `_`name`` becomes a span carrying a slug
2765            // identifier so the location can be linked to.
2766            if ch == '_'
2767                && chars.get(pos + 1) == Some(&'`')
2768                && inline_start_ok(prev)
2769                && let Some((span, next)) = self.inline_target(&chars, pos)
2770            {
2771                push_text(&mut out, &pending);
2772                pending.clear();
2773                out.push(span);
2774                pos = next;
2775                continue;
2776            }
2777            // A trailing underscore closes a simple hyperlink reference whose name is the run of
2778            // name characters that has just accumulated.
2779            if ch == '_'
2780                && let Some((link, next)) = self.simple_reference(&chars, pos, &mut pending)
2781            {
2782                push_text(&mut out, &pending);
2783                pending.clear();
2784                out.push(link);
2785                pos = next;
2786                continue;
2787            }
2788            if let Some((inline, drop_space, next)) = self.try_markup(&chars, pos) {
2789                push_text(&mut out, &pending);
2790                pending.clear();
2791                if drop_space && matches!(out.last(), Some(Inline::Space)) {
2792                    out.pop();
2793                }
2794                out.extend(inline);
2795                pos = next;
2796                continue;
2797            }
2798            // A bare URI or email address that begins at a word boundary is auto-linked.
2799            if autolink_boundary(prev)
2800                && let Some((link, next)) = autolink(&chars, pos)
2801            {
2802                push_text(&mut out, &pending);
2803                pending.clear();
2804                out.push(link);
2805                pos = next;
2806                continue;
2807            }
2808            // Typographic punctuation under the `smart` extension: paired quotes become quotation
2809            // nodes, a lone quote its apt curly glyph, hyphen runs en/em dashes, dot runs ellipses.
2810            if smart {
2811                match ch {
2812                    '"' | '\'' => {
2813                        if let Some((quoted, next)) = self.smart_quote(&chars, pos, ch) {
2814                            push_text(&mut out, &pending);
2815                            pending.clear();
2816                            out.push(quoted);
2817                            pos = next;
2818                            continue;
2819                        }
2820                        pending.push(quote_glyph(&chars, pos, ch));
2821                        pos += 1;
2822                        continue;
2823                    }
2824                    '-' => {
2825                        let n = run_length(&chars, pos, '-');
2826                        pending.push_str(&fold_dashes(n));
2827                        pos += n;
2828                        continue;
2829                    }
2830                    '.' => {
2831                        let n = run_length(&chars, pos, '.');
2832                        pending.push_str(&fold_ellipsis(n));
2833                        pos += n;
2834                        continue;
2835                    }
2836                    _ => {}
2837                }
2838            }
2839            pending.push(ch);
2840            pos += 1;
2841        }
2842        push_text(&mut out, &pending);
2843        out
2844    }
2845
2846    /// An inline internal hyperlink target (written `` _`name` `` in source): a span whose
2847    /// identifier is the slug of its text, marking a location elsewhere markup can link to.
2848    fn inline_target(&mut self, chars: &[char], pos: usize) -> Option<(Inline, usize)> {
2849        let (name, end) = find_close_literal(chars, pos + 2, "`")?;
2850        if name.trim().is_empty() {
2851            return None;
2852        }
2853        let inner = self.inlines(&name);
2854        let id = carta_ast::slug(&carta_ast::to_plain_text(&inner));
2855        Some((
2856            Inline::Span(
2857                Box::new(Attr {
2858                    id: id.into(),
2859                    classes: Vec::new(),
2860                    attributes: Vec::new(),
2861                }),
2862                inner,
2863            ),
2864            end,
2865        ))
2866    }
2867
2868    /// A quoted run opened by a straight quote: scan for a matching closer and, on success, parse the
2869    /// interior recursively into a quotation node. Returns `None` when the quote cannot open a run or
2870    /// has no closer, leaving the caller to fold it into a lone glyph.
2871    fn smart_quote(&mut self, chars: &[char], pos: usize, quote: char) -> Option<(Inline, usize)> {
2872        if !can_open_quote(chars, pos) {
2873            return None;
2874        }
2875        // A single quote against a preceding letter or digit is a word-internal apostrophe, never the
2876        // opener of a quoted run.
2877        if quote == '\'' {
2878            let before = pos.checked_sub(1).and_then(|p| chars.get(p)).copied();
2879            if before.is_some_and(char::is_alphanumeric) {
2880                return None;
2881            }
2882        }
2883        let mut j = pos + 1;
2884        while j < chars.len() {
2885            match chars.get(j).copied() {
2886                Some('\\') => j += 2,
2887                Some(c) if c == quote && can_close_quote(chars, j, quote) => {
2888                    let content: String = chars.get(pos + 1..j)?.iter().collect();
2889                    let inner = self.inlines(&content);
2890                    return Some((Inline::Quoted(quote_type(quote), inner), j + 1));
2891                }
2892                Some(_) => j += 1,
2893                None => break,
2894            }
2895        }
2896        None
2897    }
2898
2899    /// Attempt to parse inline markup at `pos`. On success returns the produced inlines, whether a
2900    /// directly preceding space should be dropped (footnotes), and the index past the construct.
2901    fn try_markup(&mut self, chars: &[char], pos: usize) -> Option<(Vec<Inline>, bool, usize)> {
2902        let ch = chars.get(pos).copied()?;
2903        let prev = pos.checked_sub(1).and_then(|p| chars.get(p)).copied();
2904        match ch {
2905            '`' => self.backtick(chars, pos, prev),
2906            '*' => Self::emphasis(chars, pos, prev),
2907            '|' => self.substitution(chars, pos, prev),
2908            '[' => self.note_reference(chars, pos, prev),
2909            ':' => self.role_prefix(chars, pos, prev),
2910            _ => None,
2911        }
2912    }
2913
2914    fn emphasis(
2915        chars: &[char],
2916        pos: usize,
2917        prev: Option<char>,
2918    ) -> Option<(Vec<Inline>, bool, usize)> {
2919        if !inline_start_ok(prev) {
2920            return None;
2921        }
2922        if chars.get(pos + 1) == Some(&'*') {
2923            if chars.get(pos + 2).is_none_or(|c| c.is_whitespace()) {
2924                return None;
2925            }
2926            let (inner, end) = Self::scan_strong(chars, pos)?;
2927            if quote_suppresses(prev, chars.get(end).copied()) {
2928                return None;
2929            }
2930            return Some((vec![Inline::Strong(inner)], false, end));
2931        }
2932        if chars.get(pos + 1).is_none_or(|c| c.is_whitespace()) {
2933            return None;
2934        }
2935        let (inner, end) = Self::scan_emphasis(chars, pos)?;
2936        if quote_suppresses(prev, chars.get(end).copied()) {
2937            return None;
2938        }
2939        Some((vec![Inline::Emph(inner)], false, end))
2940    }
2941
2942    /// Scan a strong span opened by `**` at `pos`. Its content is verbatim text in which a single `*`
2943    /// is an ordinary character; the first later run of two or more `*` closes the span. Returns the
2944    /// parsed content and the index past the closing delimiter, or `None` when no closer is found.
2945    fn scan_strong(chars: &[char], pos: usize) -> Option<(Vec<Inline>, usize)> {
2946        let mut pending = String::new();
2947        let mut i = pos + 2;
2948        while i < chars.len() {
2949            match chars.get(i).copied() {
2950                Some('\\') => {
2951                    pending.push('\\');
2952                    if let Some(&next) = chars.get(i + 1) {
2953                        pending.push(next);
2954                        i += 2;
2955                    } else {
2956                        i += 1;
2957                    }
2958                }
2959                Some('*') if run_length(chars, i, '*') >= 2 => {
2960                    return Some((literal_text(&pending), i + 2));
2961                }
2962                Some(c) => {
2963                    pending.push(c);
2964                    i += 1;
2965                }
2966                None => break,
2967            }
2968        }
2969        None
2970    }
2971
2972    /// Scan an emphasis span opened by a single `*` at `pos`. A later single `*`, or a `**` run that
2973    /// is followed by whitespace, closes the span (consuming one `*`); a `**` run followed by content
2974    /// is an inner strong start-string that is stripped, flushing the text gathered so far as its own
2975    /// segment. Returns the content segments and the index past the closing `*`, or `None` with no
2976    /// closer.
2977    fn scan_emphasis(chars: &[char], pos: usize) -> Option<(Vec<Inline>, usize)> {
2978        let mut result = Vec::new();
2979        let mut pending = String::new();
2980        let mut i = pos + 1;
2981        while i < chars.len() {
2982            match chars.get(i).copied() {
2983                Some('\\') => {
2984                    pending.push('\\');
2985                    if let Some(&next) = chars.get(i + 1) {
2986                        pending.push(next);
2987                        i += 2;
2988                    } else {
2989                        i += 1;
2990                    }
2991                }
2992                Some('*') => {
2993                    let run = run_length(chars, i, '*');
2994                    let after = chars.get(i + run).copied();
2995                    if run >= 2 && after.is_some_and(|c| !c.is_whitespace()) {
2996                        result.extend(literal_text(&pending));
2997                        pending.clear();
2998                        i += run;
2999                    } else {
3000                        result.extend(literal_text(&pending));
3001                        return Some((result, i + 1));
3002                    }
3003                }
3004                Some(c) => {
3005                    pending.push(c);
3006                    i += 1;
3007                }
3008                None => break,
3009            }
3010        }
3011        None
3012    }
3013
3014    fn backtick(
3015        &mut self,
3016        chars: &[char],
3017        pos: usize,
3018        prev: Option<char>,
3019    ) -> Option<(Vec<Inline>, bool, usize)> {
3020        // An inline literal is recognized wherever its delimiters appear, even mid-word; the other
3021        // backtick constructs require a boundary before their opening delimiter.
3022        if chars.get(pos + 1) == Some(&'`') {
3023            let (content, end) = find_close_literal(chars, pos + 2, "``")?;
3024            return Some((
3025                vec![Inline::Code(
3026                    Box::default(),
3027                    normalize_inline_literal(&content).into(),
3028                )],
3029                false,
3030                end,
3031            ));
3032        }
3033        if !inline_start_ok(prev) {
3034            return None;
3035        }
3036        let (content, mut end) = find_close_literal(chars, pos + 1, "`")?;
3037        // A trailing underscore turns interpreted text into a hyperlink reference.
3038        if chars.get(end) == Some(&'_') {
3039            let anonymous = chars.get(end + 1) == Some(&'_');
3040            end += if anonymous { 2 } else { 1 };
3041            if quote_suppresses(prev, chars.get(end).copied()) {
3042                return None;
3043            }
3044            return Some((vec![self.phrase_reference(&content, anonymous)], false, end));
3045        }
3046        // A trailing role applies to the interpreted text.
3047        if chars.get(end) == Some(&':')
3048            && let Some((role, role_end)) = parse_role(chars, end)
3049        {
3050            if quote_suppresses(prev, chars.get(role_end).copied()) {
3051                return None;
3052            }
3053            let inline = self.apply_role(&role, &content);
3054            return Some((vec![inline], false, role_end));
3055        }
3056        if quote_suppresses(prev, chars.get(end).copied()) {
3057            return None;
3058        }
3059        let role = self.default_role.clone();
3060        Some((vec![self.apply_role(&role, &content)], false, end))
3061    }
3062
3063    fn role_prefix(
3064        &mut self,
3065        chars: &[char],
3066        pos: usize,
3067        prev: Option<char>,
3068    ) -> Option<(Vec<Inline>, bool, usize)> {
3069        if !inline_start_ok(prev) {
3070            return None;
3071        }
3072        let (role, after) = parse_role(chars, pos)?;
3073        if chars.get(after) != Some(&'`') {
3074            return None;
3075        }
3076        let (content, end) = find_close_literal(chars, after + 1, "`")?;
3077        Some((vec![self.apply_role(&role, &content)], false, end))
3078    }
3079
3080    fn apply_role(&mut self, role: &str, content: &str) -> Inline {
3081        let chain = self.resolve_role(role);
3082        match chain.base.as_str() {
3083            "emphasis" => Inline::Emph(self.inlines_no_trim(content)),
3084            "strong" => Inline::Strong(self.inlines_no_trim(content)),
3085            "subscript" | "sub" => Inline::Subscript(self.inlines_no_trim(content)),
3086            "superscript" | "sup" => Inline::Superscript(self.inlines_no_trim(content)),
3087            "math" => Inline::Math(MathType::InlineMath, content.into()),
3088            // A raw role emits its content verbatim under the format its chain declares (empty when
3089            // none is given); the accumulated classes do not apply to raw inlines.
3090            "raw" => Inline::RawInline(
3091                Format(chain.format.unwrap_or_default().into()),
3092                content.into(),
3093            ),
3094            // A code/literal role's content is verbatim; a chain's classes lead, then the language.
3095            "literal" | "code" => {
3096                let mut classes = chain.classes;
3097                if let Some(language) = chain.language {
3098                    classes.push(language);
3099                }
3100                Inline::Code(Box::new(class_attr(classes)), content.into())
3101            }
3102            "title-reference" | "title" | "t" => {
3103                let mut classes = chain.classes;
3104                classes.push("title-ref".to_string());
3105                Inline::Span(Box::new(class_attr(classes)), self.inlines_no_trim(content))
3106            }
3107            // A chain that bottoms out in no base role (a plain custom role) wraps the content in a
3108            // span carrying its accumulated classes.
3109            "" => Inline::Span(
3110                Box::new(class_attr(chain.classes)),
3111                self.inlines_no_trim(content),
3112            ),
3113            // An unrecognized role keeps its content verbatim, tagged with the role name so the
3114            // information survives a round-trip.
3115            other => Inline::Code(
3116                Box::new(Attr {
3117                    id: carta_ast::Text::default(),
3118                    classes: vec!["interpreted-text".into()],
3119                    attributes: vec![("role".into(), other.into())],
3120                }),
3121                content.into(),
3122            ),
3123        }
3124    }
3125
3126    /// Follow a custom-role chain to the builtin role that supplies its rendering, accumulating the
3127    /// classes each role in the chain contributes (its `:class:` list, or its own name when it sets
3128    /// none) outermost-first, along with the first `:format:` and `:language:` the chain declares.
3129    /// `base` is the builtin role name, an unknown role name, or empty for a plain (baseless) role.
3130    fn resolve_role(&self, role: &str) -> RoleChain {
3131        let mut chain = RoleChain::default();
3132        let mut current = role.to_string();
3133        let mut seen = std::collections::BTreeSet::new();
3134        loop {
3135            if !seen.insert(current.clone()) {
3136                return chain;
3137            }
3138            let Some(def) = self.custom_roles.get(&current) else {
3139                chain.base = current;
3140                return chain;
3141            };
3142            if def.classes.is_empty() {
3143                chain.classes.push(current.clone());
3144            } else {
3145                chain.classes.extend(def.classes.iter().cloned());
3146            }
3147            if chain.format.is_none() {
3148                chain.format.clone_from(&def.format);
3149            }
3150            if chain.language.is_none() {
3151                chain.language.clone_from(&def.language);
3152            }
3153            match &def.base {
3154                Some(base) => current.clone_from(base),
3155                None => return chain,
3156            }
3157        }
3158    }
3159
3160    fn substitution(
3161        &mut self,
3162        chars: &[char],
3163        pos: usize,
3164        _prev: Option<char>,
3165    ) -> Option<(Vec<Inline>, bool, usize)> {
3166        if chars.get(pos + 1).is_some_and(|c| c.is_whitespace()) {
3167            return None;
3168        }
3169        let (name, mut end) = find_close_literal(chars, pos + 1, "|")?;
3170        // A trailing underscore turns the substitution into a hyperlink reference: the expansion
3171        // becomes the link text and the like-named target supplies the destination.
3172        let referenced = chars.get(end) == Some(&'_');
3173        if referenced {
3174            end += 1;
3175        }
3176        let key = normalize_name(&name);
3177        // A circular substitution reference (`|a|` expanding to text that references `|a|`, or a
3178        // longer cycle through other definitions) would recurse without bound: expanding a
3179        // replacement parses it as inline markup, which re-enters here. RST forbids such cycles,
3180        // so a name already being expanded is left as an unresolved placeholder rather than
3181        // re-entered.
3182        if self.active_substitutions.iter().any(|n| n == &key) {
3183            let mut display = Vec::new();
3184            push_text(&mut display, &format!("|{name}|"));
3185            return Some((
3186                vec![Inline::Link(
3187                    Box::default(),
3188                    display,
3189                    Box::new(Target {
3190                        url: format!("##SUBST##|{name}|").into(),
3191                        title: carta_ast::Text::default(),
3192                    }),
3193                )],
3194                false,
3195                end,
3196            ));
3197        }
3198        let expansion = match self.defs.substitutions.get(&key).cloned() {
3199            Some(Substitution::Replace(text)) => {
3200                self.active_substitutions.push(key.clone());
3201                let inlines = self.inlines(&text);
3202                self.active_substitutions.pop();
3203                // A replacement that expands to several inlines is kept together as one unit.
3204                match inlines.len() {
3205                    1 => inlines,
3206                    _ => vec![Inline::Span(Box::default(), inlines)],
3207                }
3208            }
3209            Some(Substitution::Image(url, attr, alt)) => vec![Inline::Image(
3210                Box::new(attr),
3211                alt,
3212                Box::new(Target {
3213                    url: url.into(),
3214                    title: carta_ast::Text::default(),
3215                }),
3216            )],
3217            None => {
3218                // An undefined substitution is preserved as a placeholder link whose visible text is
3219                // the reference as written and whose destination flags it as unresolved.
3220                let mut display = Vec::new();
3221                push_text(&mut display, &format!("|{name}|"));
3222                return Some((
3223                    vec![Inline::Link(
3224                        Box::default(),
3225                        display,
3226                        Box::new(Target {
3227                            url: format!("##SUBST##|{name}|").into(),
3228                            title: carta_ast::Text::default(),
3229                        }),
3230                    )],
3231                    false,
3232                    end,
3233                ));
3234            }
3235        };
3236        let result = if referenced {
3237            vec![Inline::Link(
3238                Box::default(),
3239                expansion,
3240                Box::new(Target {
3241                    url: defer_reference(&name).into(),
3242                    title: carta_ast::Text::default(),
3243                }),
3244            )]
3245        } else {
3246            expansion
3247        };
3248        Some((result, false, end))
3249    }
3250
3251    fn note_reference(
3252        &mut self,
3253        chars: &[char],
3254        pos: usize,
3255        prev: Option<char>,
3256    ) -> Option<(Vec<Inline>, bool, usize)> {
3257        if !inline_start_ok(prev) {
3258            return None;
3259        }
3260        let (label, after) = find_close_literal(chars, pos + 1, "]")?;
3261        if chars.get(after) != Some(&'_') {
3262            return None;
3263        }
3264        let end = after + 1;
3265        if !inline_end_ok(chars.get(end).copied()) {
3266            return None;
3267        }
3268        if is_citation_label(&label) {
3269            let url = format!("#{label}");
3270            let link = Inline::Link(
3271                Box::new(Attr {
3272                    id: carta_ast::Text::default(),
3273                    classes: vec!["citation".into()],
3274                    attributes: Vec::new(),
3275                }),
3276                vec![Inline::Str(format!("[{label}]").into())],
3277                Box::new(Target {
3278                    url: url.into(),
3279                    title: carta_ast::Text::default(),
3280                }),
3281            );
3282            return Some((vec![link], false, end));
3283        }
3284        let body = self.footnote_body_for(&label)?;
3285        let blocks = self.blocks(&body);
3286        Some((vec![Inline::Note(blocks)], true, end))
3287    }
3288
3289    fn footnote_body_for(&mut self, label: &str) -> Option<Vec<String>> {
3290        if label == "#" {
3291            let body = self.defs.auto_footnotes.get(self.auto_footnote)?.clone();
3292            self.auto_footnote += 1;
3293            Some(body)
3294        } else if label == "*" {
3295            let body = self
3296                .defs
3297                .symbol_footnotes
3298                .get(self.symbol_footnote)?
3299                .clone();
3300            self.symbol_footnote += 1;
3301            Some(body)
3302        } else {
3303            self.defs.footnotes.get(label).cloned()
3304        }
3305    }
3306
3307    fn phrase_reference(&mut self, text: &str, anonymous: bool) -> Inline {
3308        let (label, url) = split_embedded_uri(text);
3309        let display = if label.trim().is_empty() {
3310            url.clone().unwrap_or_default()
3311        } else {
3312            label.clone()
3313        };
3314        let target = match url {
3315            // An embedded destination may itself name another target (`<other_>`); such an indirect
3316            // destination is resolved through the reference table, otherwise it is a concrete URL.
3317            Some(url) => match indirect_referent(&url) {
3318                Some(referent) => defer_reference(&referent),
3319                None => url,
3320            },
3321            None if anonymous => self.next_anonymous(),
3322            None => defer_reference(&label),
3323        };
3324        // A named phrase reference with an embedded destination also defines the label as a target,
3325        // so that bare references to the same name resolve to it.
3326        if !anonymous && !label.trim().is_empty() && !target.starts_with(REF_SENTINEL) {
3327            self.deferred.insert(normalize_name(&label), target.clone());
3328        }
3329        Inline::Link(
3330            Box::default(),
3331            self.inlines(&display),
3332            Box::new(Target {
3333                url: target.into(),
3334                title: carta_ast::Text::default(),
3335            }),
3336        )
3337    }
3338
3339    /// Close a simple reference `name_` (or anonymous `name__`) whose name is the trailing run of
3340    /// name characters already accumulated in `pending`. The name is removed from `pending` and the
3341    /// link returned, with the index past the closing underscore(s).
3342    fn simple_reference(
3343        &mut self,
3344        chars: &[char],
3345        pos: usize,
3346        pending: &mut String,
3347    ) -> Option<(Inline, usize)> {
3348        let anonymous = chars.get(pos + 1) == Some(&'_');
3349        let after = pos + if anonymous { 2 } else { 1 };
3350        if !inline_end_ok(chars.get(after).copied()) {
3351            return None;
3352        }
3353        let (name, before_name) = trailing_reference_name(pending)?;
3354        // The reference name must begin at a word boundary; a name butting up against other text
3355        // (the trailing run of `__init__`, or the `b` in `a __b__ c`) is not a reference.
3356        if !inline_start_ok(before_name) {
3357            return None;
3358        }
3359        // A reference wrapped in matching quotes is suppressed: the quotes and underscore stay
3360        // literal text.
3361        if quote_suppresses(before_name, chars.get(after).copied()) {
3362            return None;
3363        }
3364        let keep = pending.len().saturating_sub(name.len());
3365        pending.truncate(keep);
3366        let url = if anonymous {
3367            self.next_anonymous()
3368        } else {
3369            defer_reference(&name)
3370        };
3371        let link = Inline::Link(
3372            Box::default(),
3373            vec![Inline::Str(name.into())],
3374            Box::new(Target {
3375                url: url.into(),
3376                title: carta_ast::Text::default(),
3377            }),
3378        );
3379        Some((link, after))
3380    }
3381
3382    /// Resolve a normalized reference name to its destination, following an indirect chain (a target
3383    /// whose destination is another target's name) to a concrete URL. Returns an empty string when
3384    /// the name is undefined or the chain forms a cycle.
3385    fn lookup_url(&self, name: &str) -> String {
3386        let mut current = name.to_string();
3387        let mut seen = std::collections::BTreeSet::new();
3388        while seen.insert(current.clone()) {
3389            let Some(url) = self.deferred.get(&current) else {
3390                return String::new();
3391            };
3392            let referent = indirect_referent(url)
3393                .map(|r| normalize_name(&r))
3394                .filter(|key| self.deferred.contains_key(key));
3395            match referent {
3396                Some(next) => current = next,
3397                None => return url.clone(),
3398            }
3399        }
3400        String::new()
3401    }
3402
3403    /// Fill in every link and image destination left deferred during tree construction, now that all
3404    /// targets, sections, and phrase-reference labels have been registered.
3405    fn resolve_deferred(&self, blocks: &mut [Block]) {
3406        for block in blocks {
3407            self.resolve_block(block);
3408        }
3409    }
3410
3411    fn resolve_block(&self, block: &mut Block) {
3412        match block {
3413            Block::Plain(inlines) | Block::Para(inlines) | Block::Header(_, _, inlines) => {
3414                self.resolve_inlines(inlines);
3415            }
3416            Block::LineBlock(lines) => {
3417                for line in lines {
3418                    self.resolve_inlines(line);
3419                }
3420            }
3421            Block::BlockQuote(children)
3422            | Block::Div(_, children)
3423            | Block::Figure(_, _, children) => self.resolve_deferred(children),
3424            Block::BulletList(items) | Block::OrderedList(_, items) => {
3425                for item in items {
3426                    self.resolve_deferred(item);
3427                }
3428            }
3429            Block::DefinitionList(items) => {
3430                for (term, definitions) in items {
3431                    self.resolve_inlines(term);
3432                    for definition in definitions {
3433                        self.resolve_deferred(definition);
3434                    }
3435                }
3436            }
3437            Block::Table(table) => self.resolve_table(table),
3438            _ => {}
3439        }
3440    }
3441
3442    fn resolve_table(&self, table: &mut carta_ast::Table) {
3443        self.resolve_caption(&mut table.caption);
3444        let body_rows = table
3445            .bodies
3446            .iter_mut()
3447            .flat_map(|body| body.head.iter_mut().chain(body.body.iter_mut()));
3448        let rows = table
3449            .head
3450            .rows
3451            .iter_mut()
3452            .chain(body_rows)
3453            .chain(table.foot.rows.iter_mut());
3454        for row in rows {
3455            for cell in &mut row.cells {
3456                self.resolve_deferred(&mut cell.content);
3457            }
3458        }
3459    }
3460
3461    fn resolve_caption(&self, caption: &mut carta_ast::Caption) {
3462        if let Some(short) = &mut caption.short {
3463            self.resolve_inlines(short);
3464        }
3465        self.resolve_deferred(&mut caption.long);
3466    }
3467
3468    fn resolve_inlines(&self, inlines: &mut [Inline]) {
3469        for inline in inlines {
3470            match inline {
3471                Inline::Link(_, children, target) | Inline::Image(_, children, target) => {
3472                    if let Some(name) = target.url.strip_prefix(REF_SENTINEL) {
3473                        target.url = self.lookup_url(name).into();
3474                    }
3475                    self.resolve_inlines(children);
3476                }
3477                Inline::Emph(children)
3478                | Inline::Underline(children)
3479                | Inline::Strong(children)
3480                | Inline::Strikeout(children)
3481                | Inline::Superscript(children)
3482                | Inline::Subscript(children)
3483                | Inline::SmallCaps(children)
3484                | Inline::Quoted(_, children)
3485                | Inline::Cite(_, children)
3486                | Inline::Span(_, children) => self.resolve_inlines(children),
3487                Inline::Note(blocks) => self.resolve_deferred(blocks),
3488                _ => {}
3489            }
3490        }
3491    }
3492
3493    fn next_anonymous(&mut self) -> String {
3494        let idx = self.anonymous;
3495        self.anonymous += 1;
3496        self.defs.anonymous.get(idx).cloned().unwrap_or_default()
3497    }
3498}
3499
3500// --- directive helpers -------------------------------------------------------------------------
3501
3502/// Split a directive body into its argument (first line), its options (the immediately following
3503/// `:key: value` lines), and its content (everything after the blank separator).
3504fn split_directive(body: &[String]) -> (String, Vec<(String, String)>, Vec<String>) {
3505    let mut idx = 0;
3506    let mut argument = String::new();
3507    if let Some(first) = body.first()
3508        && !first.is_empty()
3509        && option_line(first).is_none()
3510    {
3511        argument.clone_from(first);
3512        idx = 1;
3513    }
3514    let mut options = Vec::new();
3515    while let Some(line) = body.get(idx) {
3516        match option_line(line) {
3517            Some(option) => {
3518                options.push(option);
3519                idx += 1;
3520            }
3521            None => break,
3522        }
3523    }
3524    while body.get(idx).is_some_and(std::string::String::is_empty) {
3525        idx += 1;
3526    }
3527    let content = body.get(idx..).unwrap_or(&[]).to_vec();
3528    (argument, options, content)
3529}
3530
3531/// The block content of a directive whose first-line text is body content rather than an argument:
3532/// the body with any leading option lines (and the blank line that follows them) removed.
3533fn directive_content(body: &[String]) -> Vec<String> {
3534    let mut idx = 0;
3535    while body.get(idx).is_some_and(|l| option_line(l).is_some()) {
3536        idx += 1;
3537    }
3538    if idx > 0 {
3539        while body.get(idx).is_some_and(std::string::String::is_empty) {
3540            idx += 1;
3541        }
3542    }
3543    body.get(idx..).unwrap_or(&[]).to_vec()
3544}
3545
3546/// The normalized column widths from a `:widths:` option, each as a fraction of their sum.
3547/// `None` when the option is absent, set to `auto`, or carries no positive numbers.
3548fn directive_widths(options: &[(String, String)]) -> Option<Vec<f64>> {
3549    let value = options.iter().find(|(k, _)| k == "widths")?.1.trim();
3550    if value.is_empty() || value == "auto" {
3551        return None;
3552    }
3553    let nums: Vec<f64> = value
3554        .split(|c: char| c == ',' || c.is_whitespace())
3555        .filter(|s| !s.is_empty())
3556        .filter_map(|s| s.parse::<f64>().ok())
3557        .collect();
3558    let sum: f64 = nums.iter().sum();
3559    if nums.is_empty() || sum <= 0.0 {
3560        return None;
3561    }
3562    Some(nums.iter().map(|n| n / sum).collect())
3563}
3564
3565/// The non-negative integer value of a directive option, defaulting to zero when absent or unparsable.
3566fn directive_count(options: &[(String, String)], key: &str) -> usize {
3567    options
3568        .iter()
3569        .find(|(k, _)| k == key)
3570        .and_then(|(_, v)| v.trim().parse().ok())
3571        .unwrap_or(0)
3572}
3573
3574/// Wrap each row's cells in a table [`Row`].
3575fn cells_to_rows(rows: Vec<Vec<Cell>>) -> Vec<Row> {
3576    rows.into_iter()
3577        .map(|cells| Row {
3578            attr: Attr::default(),
3579            cells,
3580        })
3581        .collect()
3582}
3583
3584/// Build one `list-table` row, padding short rows with empty cells and demoting a lone paragraph
3585/// in a cell to a plain block.
3586fn list_row(cells: Vec<Vec<Block>>, num_cols: usize) -> Vec<Cell> {
3587    let mut row: Vec<Cell> = cells
3588        .into_iter()
3589        .map(|content| {
3590            let content = if let [Block::Para(_)] = content.as_slice() {
3591                content.into_iter().map(to_plain).collect()
3592            } else {
3593                content
3594            };
3595            Cell {
3596                attr: Attr::default(),
3597                align: Alignment::AlignDefault,
3598                row_span: 1,
3599                col_span: 1,
3600                content,
3601            }
3602        })
3603        .collect();
3604    while row.len() < num_cols {
3605        row.push(Cell {
3606            attr: Attr::default(),
3607            align: Alignment::AlignDefault,
3608            row_span: 1,
3609            col_span: 1,
3610            content: Vec::new(),
3611        });
3612    }
3613    row
3614}
3615
3616/// Parse comma-separated values into records of trimmed fields. Fields may be double-quoted, with a
3617/// doubled quote denoting a literal quote; whitespace after a delimiter is ignored; and a quoted
3618/// field may span lines. Blank records are dropped.
3619fn parse_csv(text: &str) -> Vec<Vec<String>> {
3620    let chars: Vec<char> = text.chars().collect();
3621    let mut records: Vec<Vec<String>> = Vec::new();
3622    let mut record: Vec<String> = Vec::new();
3623    let mut i = 0;
3624    while i < chars.len() {
3625        while matches!(chars.get(i), Some(' ' | '\t')) {
3626            i += 1;
3627        }
3628        let mut field = String::new();
3629        if chars.get(i) == Some(&'"') {
3630            i += 1;
3631            loop {
3632                match chars.get(i) {
3633                    Some('"') if chars.get(i + 1) == Some(&'"') => {
3634                        field.push('"');
3635                        i += 2;
3636                    }
3637                    Some('"') => {
3638                        i += 1;
3639                        break;
3640                    }
3641                    Some(c) => {
3642                        field.push(*c);
3643                        i += 1;
3644                    }
3645                    None => break,
3646                }
3647            }
3648            while !matches!(chars.get(i), Some(',' | '\n') | None) {
3649                i += 1;
3650            }
3651        } else {
3652            while !matches!(chars.get(i), Some(',' | '\n') | None) {
3653                if let Some(c) = chars.get(i) {
3654                    field.push(*c);
3655                }
3656                i += 1;
3657            }
3658        }
3659        record.push(field.trim().to_string());
3660        // A field separator and the end of input both just advance the cursor; spelling the comma
3661        // out keeps the three field terminators (separator, record break, end) side by side.
3662        #[allow(clippy::match_same_arms)]
3663        match chars.get(i) {
3664            Some(',') => i += 1,
3665            Some('\n') => {
3666                i += 1;
3667                records.push(std::mem::take(&mut record));
3668            }
3669            _ => i += 1,
3670        }
3671    }
3672    if !record.is_empty() {
3673        records.push(record);
3674    }
3675    records.retain(|r| !(r.len() == 1 && r.first().is_some_and(String::is_empty)));
3676    records
3677}
3678
3679/// Parse a directive option line `:key: value`, returning the key and its trimmed value.
3680fn option_line(line: &str) -> Option<(String, String)> {
3681    let (name, col) = field_marker(line)?;
3682    let value: String = line.chars().skip(col).collect();
3683    Some((name, value.trim().to_string()))
3684}
3685
3686/// Build the attributes of a code block from its language argument and options.
3687fn code_attr(argument: &str, options: &[(String, String)]) -> Attr {
3688    let mut classes = Vec::new();
3689    let lang = argument.trim();
3690    if !lang.is_empty() {
3691        classes.push(lang.to_string());
3692    }
3693    let mut id = String::new();
3694    let mut attributes = Vec::new();
3695    for (key, value) in options {
3696        match key.as_str() {
3697            "name" => id.clone_from(value),
3698            "class" => classes.extend(value.split_whitespace().map(str::to_string)),
3699            // Line numbering is requested by a marker class; a non-empty value sets the first line.
3700            "number-lines" => {
3701                classes.push("numberLines".to_string());
3702                let start = value.trim();
3703                if !start.is_empty() {
3704                    attributes.push(("startFrom".to_string(), start.to_string()));
3705                }
3706            }
3707            other => attributes.push((other.to_string(), value.clone())),
3708        }
3709    }
3710    Attr {
3711        id: id.into(),
3712        classes: classes.into_iter().map(Into::into).collect(),
3713        attributes: attributes
3714            .into_iter()
3715            .map(|(k, v)| (k.into(), v.into()))
3716            .collect(),
3717    }
3718}
3719
3720/// Build the attributes, description, and destination of an image from its URI argument and options.
3721/// The returned classes are the plain `:class:` list; callers that render a standalone image fold
3722/// the alignment into them with [`image_classes`].
3723fn image_parts(argument: &str, options: &[(String, String)]) -> (Attr, Vec<Inline>, String) {
3724    let url = argument.split_whitespace().collect::<Vec<_>>().join("");
3725    let mut id = String::new();
3726    let mut description = Vec::new();
3727    for (key, value) in options {
3728        match key.as_str() {
3729            "alt" => description = vec![Inline::Str(value.clone().into())],
3730            "name" => id.clone_from(value),
3731            _ => {}
3732        }
3733    }
3734    (
3735        Attr {
3736            id: id.into(),
3737            classes: class_list(options, "class")
3738                .into_iter()
3739                .map(Into::into)
3740                .collect(),
3741            attributes: image_dimensions(options)
3742                .into_iter()
3743                .map(|(k, v)| (k.into(), v.into()))
3744                .collect(),
3745        },
3746        description,
3747        url,
3748    )
3749}
3750
3751/// The classes of a standalone image: the `:class:` list, repeated, with the alignment appended to
3752/// the last entry (or standing alone when there are no classes).
3753fn image_classes(options: &[(String, String)]) -> Vec<String> {
3754    let classes = class_list(options, "class");
3755    aligned_classes(classes.clone(), classes, &align_suffix(options))
3756}
3757
3758/// Build the attributes of a figure from its options: its `:figclass:` and `:class:` lists with the
3759/// alignment folded in. The figure's `:name:` identifies its image, not the figure itself.
3760fn figure_attr(options: &[(String, String)]) -> Attr {
3761    Attr {
3762        id: carta_ast::Text::default(),
3763        classes: aligned_classes(
3764            class_list(options, "figclass"),
3765            class_list(options, "class"),
3766            &align_suffix(options),
3767        )
3768        .into_iter()
3769        .map(Into::into)
3770        .collect(),
3771        attributes: Vec::new(),
3772    }
3773}
3774
3775/// The values of every option named `key`, split on whitespace, in source order.
3776fn class_list(options: &[(String, String)], key: &str) -> Vec<String> {
3777    options
3778        .iter()
3779        .filter(|(k, _)| k == key)
3780        .flat_map(|(_, v)| v.split_whitespace().map(str::to_string))
3781        .collect()
3782}
3783
3784/// The class an `:align:` option contributes (`align-<value>`), or empty when there is none.
3785fn align_suffix(options: &[(String, String)]) -> String {
3786    options
3787        .iter()
3788        .find(|(k, _)| k == "align")
3789        .map(|(_, v)| v.trim())
3790        .filter(|v| !v.is_empty())
3791        .map_or_else(String::new, |v| format!("align-{v}"))
3792}
3793
3794/// Combine two class lists with an optional alignment class. With no alignment the lists are
3795/// concatenated; otherwise the alignment is appended to the last class of the second list, or stands
3796/// alone when that list is empty.
3797fn aligned_classes(first: Vec<String>, second: Vec<String>, align: &str) -> Vec<String> {
3798    let mut classes = first;
3799    if align.is_empty() {
3800        classes.extend(second);
3801    } else if second.is_empty() {
3802        classes.push(align.to_string());
3803    } else {
3804        let last = second.len() - 1;
3805        for (index, mut class) in second.into_iter().enumerate() {
3806            if index == last {
3807                class.push_str(align);
3808            }
3809            classes.push(class);
3810        }
3811    }
3812    classes
3813}
3814
3815/// The `width`/`height` attributes of an image, each normalized and scaled by an `:scale:` option.
3816fn image_dimensions(options: &[(String, String)]) -> Vec<(String, String)> {
3817    let scale = options
3818        .iter()
3819        .find(|(k, _)| k == "scale")
3820        .and_then(|(_, v)| parse_scale(v));
3821    let mut attributes = Vec::new();
3822    for (key, value) in options {
3823        if key == "width" || key == "height" {
3824            attributes.push((key.clone(), normalize_dimension(value, scale)));
3825        }
3826    }
3827    attributes
3828}
3829
3830/// A length with the unit categories the output distinguishes: integral pixels, a percentage, or a
3831/// value in some other unit.
3832enum Dimension {
3833    Pixel(f64),
3834    Percent(f64),
3835    Other(f64, String),
3836}
3837
3838/// Parse a `:scale:` value into its factor and whether it was written as a percentage. A bare number
3839/// scales directly; a trailing `%` divides by a hundred.
3840fn parse_scale(value: &str) -> Option<(f64, bool)> {
3841    let value = value.trim();
3842    let percent = value.contains('%');
3843    let digits: String = value
3844        .chars()
3845        .take_while(|c| c.is_ascii_digit() || *c == '.')
3846        .collect();
3847    digits.parse::<f64>().ok().map(|factor| (factor, percent))
3848}
3849
3850/// Normalize a dimension and apply a scale factor: pixels round to the nearest integer (ties to
3851/// even), percentages always carry a fractional part, and other units keep their shortest form.
3852fn normalize_dimension(value: &str, scale: Option<(f64, bool)>) -> String {
3853    let Some(dimension) = parse_dimension(value) else {
3854        return value.to_string();
3855    };
3856    let dimension = scale_dimension(dimension, scale);
3857    match dimension {
3858        Dimension::Pixel(pixels) => format!("{}px", pixels.round_ties_even()),
3859        Dimension::Percent(percent) => {
3860            let text = format!("{percent}");
3861            if text.contains('.') {
3862                format!("{text}%")
3863            } else {
3864                format!("{text}.0%")
3865            }
3866        }
3867        Dimension::Other(magnitude, unit) => format!("{magnitude}{unit}"),
3868    }
3869}
3870
3871fn parse_dimension(value: &str) -> Option<Dimension> {
3872    let value = value.trim();
3873    let split = value
3874        .char_indices()
3875        .find(|(_, c)| !(c.is_ascii_digit() || *c == '.'))
3876        .map_or(value.len(), |(index, _)| index);
3877    let magnitude: f64 = value.get(..split)?.parse().ok()?;
3878    let unit = value.get(split..).unwrap_or("").trim();
3879    Some(match unit {
3880        "" | "px" => Dimension::Pixel(magnitude.trunc()),
3881        "%" => Dimension::Percent(magnitude),
3882        other => Dimension::Other(magnitude, other.to_string()),
3883    })
3884}
3885
3886fn scale_dimension(dimension: Dimension, scale: Option<(f64, bool)>) -> Dimension {
3887    let Some((factor, percent)) = scale else {
3888        return dimension;
3889    };
3890    let divisor = if percent { 100.0 } else { 1.0 };
3891    let apply = |value: f64| value * factor / divisor;
3892    match dimension {
3893        Dimension::Pixel(value) => Dimension::Pixel(apply(value)),
3894        Dimension::Percent(value) => Dimension::Percent(apply(value)),
3895        Dimension::Other(value, unit) => Dimension::Other(apply(value), unit),
3896    }
3897}
3898
3899fn class_div(classes: Vec<String>, blocks: Vec<Block>) -> Block {
3900    Block::Div(
3901        Box::new(Attr {
3902            id: carta_ast::Text::default(),
3903            classes: classes.into_iter().map(Into::into).collect(),
3904            attributes: Vec::new(),
3905        }),
3906        blocks,
3907    )
3908}
3909
3910/// When a paragraph's only content is an attribute-free span — the shape a multi-inline substitution
3911/// expands to — the span dissolves into the paragraph, which carries its inlines directly.
3912fn splice_lone_span(mut inlines: Vec<Inline>) -> Vec<Inline> {
3913    let lone_plain_span = matches!(
3914        inlines.as_slice(),
3915        [Inline::Span(attr, _)]
3916            if attr.id.is_empty() && attr.classes.is_empty() && attr.attributes.is_empty()
3917    );
3918    if lone_plain_span && let Some(Inline::Span(_, inner)) = inlines.pop() {
3919        return inner;
3920    }
3921    inlines
3922}
3923
3924/// Attach internal-target identifiers to the block they precede. A single target immediately before
3925/// a section heading supplies the heading's identifier; otherwise each target wraps the block in a
3926/// division carrying its identifier, the last target sitting innermost.
3927/// Normalize the text of an inline literal: a line break within it folds to a single space, interior
3928/// spacing is otherwise preserved, and leading and trailing whitespace is removed.
3929fn normalize_inline_literal(content: &str) -> String {
3930    content.replace('\n', " ").trim().to_string()
3931}
3932
3933/// An attribute set carrying only classes, with no identifier or key-value attributes.
3934fn class_attr(classes: Vec<String>) -> Attr {
3935    Attr {
3936        id: carta_ast::Text::default(),
3937        classes: classes.into_iter().map(Into::into).collect(),
3938        attributes: Vec::new(),
3939    }
3940}
3941
3942fn attach_targets(mut blocks: Vec<Block>, mut targets: Vec<String>) -> Vec<Block> {
3943    // A run of internal targets in front of a section title all attach to that title: the last takes
3944    // the title's identifier, and the rest become empty spans appended to the title in reverse, each
3945    // carrying its name so it can still be linked to.
3946    if let [Block::Header(_, attr, inlines)] = blocks.as_mut_slice()
3947        && let Some(last) = targets.pop()
3948    {
3949        attr.id = last.into();
3950        for name in targets.into_iter().rev() {
3951            inlines.push(Inline::Span(
3952                Box::new(Attr {
3953                    id: name.into(),
3954                    classes: Vec::new(),
3955                    attributes: Vec::new(),
3956                }),
3957                Vec::new(),
3958            ));
3959        }
3960        return blocks;
3961    }
3962    for name in targets.into_iter().rev() {
3963        blocks = vec![Block::Div(
3964            Box::new(Attr {
3965                id: name.into(),
3966                classes: Vec::new(),
3967                attributes: Vec::new(),
3968            }),
3969            blocks,
3970        )];
3971    }
3972    blocks
3973}
3974
3975/// Split a directive's options into the identifier it sets (`:name:`), the extra classes it adds
3976/// (`:class:`), and the remaining options carried as attributes, each in source order.
3977fn common_options(options: &[(String, String)]) -> (String, Vec<String>, Vec<(String, String)>) {
3978    let mut id = String::new();
3979    let mut classes = Vec::new();
3980    let mut attributes = Vec::new();
3981    for (key, value) in options {
3982        match key.as_str() {
3983            "name" => id.clone_from(value),
3984            "class" => classes.extend(value.split_whitespace().map(str::to_string)),
3985            other => attributes.push((other.to_string(), value.clone())),
3986        }
3987    }
3988    (id, classes, attributes)
3989}
3990
3991/// Wrap a directive's blocks in a division named for the directive, folding its common options into
3992/// the division's identifier, classes, and attributes. The directive name leads the class list.
3993fn options_div(name: &str, options: &[(String, String)], blocks: Vec<Block>) -> Block {
3994    let (id, extra, attributes) = common_options(options);
3995    let mut classes = vec![name.to_string()];
3996    classes.extend(extra);
3997    Block::Div(
3998        Box::new(Attr {
3999            id: id.into(),
4000            classes: classes.into_iter().map(Into::into).collect(),
4001            attributes: attributes
4002                .into_iter()
4003                .map(|(k, v)| (k.into(), v.into()))
4004                .collect(),
4005        }),
4006        blocks,
4007    )
4008}
4009
4010/// Group a directive body into the runs of consecutive non-blank lines, joined with newlines and
4011/// trimmed. A blank line separates one group from the next; empty groups are dropped.
4012fn blank_separated(lines: &[String]) -> Vec<String> {
4013    let mut groups = Vec::new();
4014    let mut current: Vec<&str> = Vec::new();
4015    for line in lines {
4016        if line.trim().is_empty() {
4017            if !current.is_empty() {
4018                groups.push(current.join("\n").trim().to_string());
4019                current.clear();
4020            }
4021        } else {
4022            current.push(line);
4023        }
4024    }
4025    if !current.is_empty() {
4026        groups.push(current.join("\n").trim().to_string());
4027    }
4028    groups
4029}
4030
4031fn capitalize(text: &str) -> String {
4032    let mut chars = text.chars();
4033    match chars.next() {
4034        Some(first) => first.to_uppercase().collect::<String>() + chars.as_str(),
4035        None => String::new(),
4036    }
4037}
4038
4039/// Reduce text to ASCII for identifier derivation: an accented Latin letter maps to its base letter,
4040/// any remaining non-ASCII character is dropped, and ASCII characters pass through unchanged. The
4041/// caller's slug step then keeps only the identifier-valid characters.
4042fn asciify(text: &str) -> String {
4043    let mut out = String::with_capacity(text.len());
4044    for ch in text.chars() {
4045        if ch.is_ascii() {
4046            out.push(ch);
4047        } else if let Some(base) = ascii_base(ch) {
4048            out.push(base);
4049        }
4050    }
4051    out
4052}
4053
4054/// The base ASCII letter an accented Latin letter reduces to, or `None` when the character has no
4055/// such base (ligatures, stroked letters, and non-Latin scripts are dropped).
4056// Laid out as parallel uppercase and lowercase blocks, each alphabetical by base letter, so the
4057// mapping stays auditable; an uppercase and a lowercase accent reducing to the same base letter are
4058// kept on separate lines rather than merged.
4059#[allow(clippy::match_same_arms)]
4060fn ascii_base(ch: char) -> Option<char> {
4061    let base = match ch {
4062        'À' | 'Á' | 'Â' | 'Ã' | 'Ä' | 'Å' | 'Ā' | 'Ă' | 'Ą' => 'a',
4063        'Ç' | 'Ć' | 'Č' | 'Ĉ' | 'Ċ' => 'c',
4064        'Ď' | 'Ḋ' => 'd',
4065        'È' | 'É' | 'Ê' | 'Ë' | 'Ē' | 'Ĕ' | 'Ė' | 'Ę' | 'Ě' => 'e',
4066        'Ĝ' | 'Ğ' | 'Ġ' | 'Ģ' => 'g',
4067        'Ĥ' => 'h',
4068        'Ì' | 'Í' | 'Î' | 'Ï' | 'Ĩ' | 'Ī' | 'Ĭ' | 'Į' | 'İ' => 'i',
4069        'Ĵ' => 'j',
4070        'Ķ' => 'k',
4071        'Ĺ' | 'Ļ' | 'Ľ' => 'l',
4072        'Ñ' | 'Ń' | 'Ņ' | 'Ň' => 'n',
4073        'Ò' | 'Ó' | 'Ô' | 'Õ' | 'Ö' | 'Ō' | 'Ŏ' | 'Ő' => 'o',
4074        'Ŕ' | 'Ŗ' | 'Ř' => 'r',
4075        'Ś' | 'Ŝ' | 'Ş' | 'Š' => 's',
4076        'Ţ' | 'Ť' => 't',
4077        'Ù' | 'Ú' | 'Û' | 'Ü' | 'Ũ' | 'Ū' | 'Ŭ' | 'Ů' | 'Ű' | 'Ų' => 'u',
4078        'Ŵ' => 'w',
4079        'Ý' | 'Ŷ' | 'Ÿ' => 'y',
4080        'Ź' | 'Ż' | 'Ž' => 'z',
4081        'à' | 'á' | 'â' | 'ã' | 'ä' | 'å' | 'ā' | 'ă' | 'ą' => 'a',
4082        'ç' | 'ć' | 'č' | 'ĉ' | 'ċ' => 'c',
4083        'ď' | 'ḋ' => 'd',
4084        'è' | 'é' | 'ê' | 'ë' | 'ē' | 'ĕ' | 'ė' | 'ę' | 'ě' => 'e',
4085        'ĝ' | 'ğ' | 'ġ' | 'ģ' => 'g',
4086        'ĥ' => 'h',
4087        'ì' | 'í' | 'î' | 'ï' | 'ĩ' | 'ī' | 'ĭ' | 'į' | 'ı' => 'i',
4088        'ĵ' => 'j',
4089        'ķ' => 'k',
4090        'ĺ' | 'ļ' | 'ľ' => 'l',
4091        'ñ' | 'ń' | 'ņ' | 'ň' => 'n',
4092        'ò' | 'ó' | 'ô' | 'õ' | 'ö' | 'ō' | 'ŏ' | 'ő' => 'o',
4093        'ŕ' | 'ŗ' | 'ř' => 'r',
4094        'ś' | 'ŝ' | 'ş' | 'š' => 's',
4095        'ţ' | 'ť' => 't',
4096        'ù' | 'ú' | 'û' | 'ü' | 'ũ' | 'ū' | 'ŭ' | 'ů' | 'ű' | 'ų' => 'u',
4097        'ŵ' => 'w',
4098        'ý' | 'ŷ' | 'ÿ' => 'y',
4099        'ź' | 'ż' | 'ž' => 'z',
4100        _ => return None,
4101    };
4102    Some(base)
4103}
4104
4105/// Demote a leading paragraph to a plain block, leaving any other block unchanged.
4106fn to_plain(block: Block) -> Block {
4107    match block {
4108        Block::Para(inlines) => Block::Plain(inlines),
4109        other => other,
4110    }
4111}
4112
4113// --- list looseness ----------------------------------------------------------------------------
4114
4115/// Tighten a list: when no item holds two or more paragraphs, each item's paragraphs become plain
4116/// blocks so the list renders compactly.
4117fn compactify(items: &mut [Vec<Block>]) {
4118    let loose = items
4119        .iter()
4120        .any(|item| item.iter().filter(|b| matches!(b, Block::Para(_))).count() >= 2);
4121    if loose {
4122        return;
4123    }
4124    for item in items.iter_mut() {
4125        for block in item.iter_mut() {
4126            if let Block::Para(inlines) = block {
4127                *block = Block::Plain(std::mem::take(inlines));
4128            }
4129        }
4130    }
4131}
4132
4133/// Trim the literal-block marker from a paragraph's text: a trailing `::` is removed entirely when
4134/// preceded by whitespace (or when it is all the paragraph holds), and replaced by a single colon
4135/// otherwise.
4136fn minimize_colons(text: &str) -> String {
4137    let trimmed = text.trim_end();
4138    let body = trimmed.strip_suffix("::").unwrap_or(trimmed);
4139    if body.trim().is_empty() {
4140        return String::new();
4141    }
4142    if body.ends_with(char::is_whitespace) {
4143        body.trim_end().to_string()
4144    } else {
4145        format!("{body}:")
4146    }
4147}
4148
4149// --- inline text helpers -----------------------------------------------------------------------
4150
4151/// Append raw text to an inline sequence, splitting on the regular space into words and single
4152/// spaces, with embedded newlines becoming soft breaks and space runs collapsing. Other whitespace
4153/// (such as a non-breaking space) stays part of its surrounding word.
4154fn push_text(out: &mut Vec<Inline>, text: &str) {
4155    let mut word = String::new();
4156    for ch in text.chars() {
4157        if ch == '\n' {
4158            if !word.is_empty() {
4159                out.push(Inline::Str(std::mem::take(&mut word).into()));
4160            }
4161            out.push(Inline::SoftBreak);
4162        } else if ch == ' ' || ch == '\t' {
4163            if !word.is_empty() {
4164                out.push(Inline::Str(std::mem::take(&mut word).into()));
4165            }
4166            // Collapse a run of spaces to one node, but keep a leading space: callers that must not
4167            // begin or end with one trim their result, while interpreted-text content keeps it.
4168            if !matches!(out.last(), Some(Inline::Space | Inline::SoftBreak)) {
4169                out.push(Inline::Space);
4170            }
4171        } else {
4172            word.push(ch);
4173        }
4174    }
4175    if !word.is_empty() {
4176        out.push(Inline::Str(word.into()));
4177    }
4178}
4179
4180/// Convert verbatim text into inlines without interpreting further markup: emphasis and strong
4181/// spans do not nest, so their content is plain text with only backslash escapes resolved.
4182fn literal_text(text: &str) -> Vec<Inline> {
4183    let chars: Vec<char> = text.chars().collect();
4184    let mut resolved = String::new();
4185    let mut pos = 0;
4186    while let Some(&ch) = chars.get(pos) {
4187        if ch == '\\' {
4188            match chars.get(pos + 1) {
4189                Some(next) if next.is_whitespace() => pos += 2,
4190                Some(next) => {
4191                    resolved.push(*next);
4192                    pos += 2;
4193                }
4194                None => {
4195                    resolved.push('\\');
4196                    pos += 1;
4197                }
4198            }
4199            continue;
4200        }
4201        resolved.push(ch);
4202        pos += 1;
4203    }
4204    let mut out = Vec::new();
4205    push_text(&mut out, &resolved);
4206    trim_inline_ends(&mut out);
4207    out
4208}
4209
4210/// Whether markup is suppressed because it is wrapped in a matching pair of quoting characters: a
4211/// run opened by `"`, `'`, or `<` and closed by its partner keeps its contents as literal text.
4212fn quote_suppresses(before: Option<char>, after: Option<char>) -> bool {
4213    matches!(
4214        (before, after),
4215        (Some('"'), Some('"')) | (Some('\''), Some('\'')) | (Some('<'), Some('>'))
4216    )
4217}
4218
4219/// The trailing simple-reference name in accumulated text, with the character that precedes it. A
4220/// simple reference name is a run of alphanumerics joined by isolated internal punctuation drawn
4221/// from `-_.:+` (no two adjacent, none leading or trailing), so the name both starts and ends with
4222/// an alphanumeric. Returns `None` when the text does not end in such a name. The returned name is a
4223/// suffix of `pending`.
4224fn trailing_reference_name(pending: &str) -> Option<(String, Option<char>)> {
4225    let chars: Vec<char> = pending.chars().collect();
4226    let last = chars.last()?;
4227    if !last.is_alphanumeric() {
4228        return None;
4229    }
4230    let mut start = chars.len() - 1;
4231    loop {
4232        if start == 0 {
4233            break;
4234        }
4235        let prev = chars.get(start - 1).copied();
4236        if prev.is_some_and(char::is_alphanumeric) {
4237            start -= 1;
4238            continue;
4239        }
4240        // An internal punctuation character extends the name only when an alphanumeric precedes it,
4241        // so it stays isolated and never leads the name.
4242        if prev.is_some_and(|c| matches!(c, '-' | '_' | '.' | ':' | '+'))
4243            && start
4244                .checked_sub(2)
4245                .and_then(|i| chars.get(i))
4246                .copied()
4247                .is_some_and(char::is_alphanumeric)
4248        {
4249            start -= 2;
4250            continue;
4251        }
4252        break;
4253    }
4254    let name: String = chars.get(start..)?.iter().collect();
4255    let before = start.checked_sub(1).and_then(|i| chars.get(i)).copied();
4256    Some((name, before))
4257}
4258
4259/// Whether the character before a markup start string allows it to begin markup: a boundary, a
4260/// whitespace, or one of the opening punctuation characters.
4261fn inline_start_ok(prev: Option<char>) -> bool {
4262    match prev {
4263        None => true,
4264        Some(c) => {
4265            c.is_whitespace() || matches!(c, '-' | ':' | '/' | '\'' | '"' | '<' | '(' | '[' | '{')
4266        }
4267    }
4268}
4269
4270/// Whether the character after a markup end string allows it to end markup: a boundary, a
4271/// whitespace, or one of the closing punctuation characters.
4272fn inline_end_ok(next: Option<char>) -> bool {
4273    match next {
4274        None => true,
4275        Some(c) => {
4276            c.is_whitespace()
4277                || matches!(
4278                    c,
4279                    '-' | '.'
4280                        | ','
4281                        | ':'
4282                        | ';'
4283                        | '!'
4284                        | '?'
4285                        | '\\'
4286                        | '/'
4287                        | '\''
4288                        | '"'
4289                        | ')'
4290                        | ']'
4291                        | '}'
4292                        | '>'
4293                )
4294        }
4295    }
4296}
4297
4298fn matches_at(chars: &[char], at: usize, delim: &[char]) -> bool {
4299    delim
4300        .iter()
4301        .enumerate()
4302        .all(|(k, d)| chars.get(at + k) == Some(d))
4303}
4304
4305/// Find the closing delimiter of a verbatim span (inline literal, interpreted text, substitution,
4306/// bracketed label): the next occurrence of the delimiter. Returns the verbatim inner text and the
4307/// index past the closing delimiter.
4308fn find_close_literal(chars: &[char], start: usize, delim: &str) -> Option<(String, usize)> {
4309    let dchars: Vec<char> = delim.chars().collect();
4310    let mut i = start;
4311    while i < chars.len() {
4312        if matches_at(chars, i, &dchars) {
4313            let content: String = chars.get(start..i)?.iter().collect();
4314            return Some((content, i + dchars.len()));
4315        }
4316        i += 1;
4317    }
4318    None
4319}
4320
4321/// Parse a role token `:name:` beginning at `pos` (which must be the opening colon), returning the
4322/// role name and the index past the closing colon.
4323fn parse_role(chars: &[char], pos: usize) -> Option<(String, usize)> {
4324    if chars.get(pos) != Some(&':') {
4325        return None;
4326    }
4327    let mut name = String::new();
4328    let mut i = pos + 1;
4329    while let Some(&c) = chars.get(i) {
4330        if c == ':' {
4331            if name.is_empty() {
4332                return None;
4333            }
4334            return Some((name, i + 1));
4335        }
4336        if c.is_alphanumeric() || matches!(c, '-' | '_' | '+' | '.') {
4337            name.push(c);
4338            i += 1;
4339        } else {
4340            return None;
4341        }
4342    }
4343    None
4344}
4345
4346/// Split interpreted-text content into its display label and an optional embedded destination
4347/// `<uri>`.
4348fn split_embedded_uri(text: &str) -> (String, Option<String>) {
4349    let trimmed = text.trim_end();
4350    if trimmed.ends_with('>')
4351        && let Some(open) = text.rfind('<')
4352        && let Some(close) = text.rfind('>')
4353        && open < close
4354    {
4355        let url = text.get(open + 1..close).unwrap_or("").trim().to_string();
4356        let label = text.get(..open).unwrap_or("").trim().to_string();
4357        return (label, Some(url));
4358    }
4359    (text.to_string(), None)
4360}
4361
4362// --- bare URI and email autolinking ------------------------------------------------------------
4363
4364/// Whether the character before a candidate autolink permits it to begin: a boundary, whitespace, or
4365/// an opening bracket. This keeps an address that is already part of larger markup (an angle-bracket
4366/// URI, a word fragment) from being linked twice.
4367fn autolink_boundary(prev: Option<char>) -> bool {
4368    match prev {
4369        None => true,
4370        Some(c) => c.is_whitespace() || matches!(c, '(' | '[' | '{'),
4371    }
4372}
4373
4374/// Attempt to auto-link a bare URI or email address beginning at `pos`.
4375fn autolink(chars: &[char], pos: usize) -> Option<(Inline, usize)> {
4376    try_uri_autolink(chars, pos).or_else(|| try_email_autolink(chars, pos))
4377}
4378
4379/// Match a bare URI `scheme://…` whose scheme is registered, returning the link and the end index.
4380fn try_uri_autolink(chars: &[char], pos: usize) -> Option<(Inline, usize)> {
4381    if !chars.get(pos).is_some_and(char::is_ascii_alphabetic) {
4382        return None;
4383    }
4384    let mut k = pos;
4385    while chars
4386        .get(k)
4387        .is_some_and(|&c| c.is_ascii_alphanumeric() || matches!(c, '.' | '+' | '-'))
4388    {
4389        k += 1;
4390    }
4391    if !(chars.get(k) == Some(&':')
4392        && chars.get(k + 1) == Some(&'/')
4393        && chars.get(k + 2) == Some(&'/'))
4394    {
4395        return None;
4396    }
4397    let scheme: String = chars.get(pos..k)?.iter().collect::<String>().to_lowercase();
4398    if !crate::url_schemes::is_scheme(&scheme) {
4399        return None;
4400    }
4401    let content_start = k + 3;
4402    let scan_end = forward_scan(chars, pos);
4403    let end = trim_trailing(chars, content_start, scan_end);
4404    if end <= content_start {
4405        return None;
4406    }
4407    let url: String = chars.get(pos..end)?.iter().collect();
4408    // The link text shows the URL as written; the destination is percent-encoded.
4409    Some((
4410        Inline::Link(
4411            Box::default(),
4412            vec![Inline::Str(url.clone().into())],
4413            Box::new(Target {
4414                url: escape_uri(&url).into(),
4415                title: carta_ast::Text::default(),
4416            }),
4417        ),
4418        end,
4419    ))
4420}
4421
4422/// Match a bare email address `local@domain`, returning a `mailto:` link and the end index.
4423fn try_email_autolink(chars: &[char], pos: usize) -> Option<(Inline, usize)> {
4424    let mut i = pos;
4425    while chars.get(i).is_some_and(|&c| is_email_local(c)) {
4426        i += 1;
4427    }
4428    if i == pos || chars.get(i) != Some(&'@') {
4429        return None;
4430    }
4431    i += 1;
4432    let domain_start = i;
4433    let mut dots = 0usize;
4434    let mut end = i;
4435    loop {
4436        let label_start = i;
4437        if !chars.get(i).is_some_and(char::is_ascii_alphanumeric) {
4438            break;
4439        }
4440        while chars
4441            .get(i)
4442            .is_some_and(|&c| c.is_ascii_alphanumeric() || c == '-')
4443        {
4444            i += 1;
4445        }
4446        let mut label_end = i;
4447        while label_end > label_start && chars.get(label_end - 1) == Some(&'-') {
4448            label_end -= 1;
4449        }
4450        end = label_end;
4451        i = label_end;
4452        if chars.get(i) == Some(&'.') {
4453            dots += 1;
4454            i += 1;
4455        } else {
4456            break;
4457        }
4458    }
4459    if dots == 0 || end <= domain_start {
4460        return None;
4461    }
4462    let address: String = chars.get(pos..end)?.iter().collect();
4463    Some((
4464        Inline::Link(
4465            Box::default(),
4466            vec![Inline::Str(address.clone().into())],
4467            Box::new(Target {
4468                url: format!("mailto:{address}").into(),
4469                title: carta_ast::Text::default(),
4470            }),
4471        ),
4472        end,
4473    ))
4474}
4475
4476/// Whether a character may appear in an email address's local part.
4477fn is_email_local(c: char) -> bool {
4478    c.is_ascii_alphanumeric()
4479        || matches!(
4480            c,
4481            '.' | '!'
4482                | '#'
4483                | '$'
4484                | '%'
4485                | '&'
4486                | '\''
4487                | '*'
4488                | '+'
4489                | '/'
4490                | '='
4491                | '?'
4492                | '^'
4493                | '_'
4494                | '`'
4495                | '{'
4496                | '|'
4497                | '}'
4498                | '~'
4499                | '-'
4500        )
4501}
4502
4503/// Walk a URL run forward to its raw extent, stopping only at whitespace or an angle bracket
4504/// (`<` or `>`). Brackets are taken in; whether a trailing one belongs to the URL is decided by
4505/// [`trim_trailing`] from the run's bracket balance.
4506fn forward_scan(chars: &[char], from: usize) -> usize {
4507    let mut j = from;
4508    while let Some(&c) = chars.get(j) {
4509        if c.is_whitespace() || matches!(c, '<' | '>') {
4510            break;
4511        }
4512        j += 1;
4513    }
4514    j
4515}
4516
4517/// The number of occurrences of `target` in `chars[min..end]`.
4518fn count_char(chars: &[char], min: usize, end: usize, target: char) -> usize {
4519    chars
4520        .get(min..end)
4521        .map_or(0, |run| run.iter().filter(|&&c| c == target).count())
4522}
4523
4524/// Drop trailing punctuation from a URL run, never below `min`. A trailing `;` takes a preceding
4525/// `&entity;` with it. A trailing closing bracket is dropped only when it is unbalanced within the
4526/// run — there are more of it than its matching opener — so a bracketed path stays whole.
4527fn trim_trailing(chars: &[char], min: usize, mut end: usize) -> usize {
4528    while end > min {
4529        match chars.get(end - 1) {
4530            Some('!' | '"' | '\'' | '*' | ',' | '.' | ':' | '?' | '_' | '~') => end -= 1,
4531            Some(&close @ (')' | ']' | '}')) => {
4532                let open = match close {
4533                    ')' => '(',
4534                    ']' => '[',
4535                    _ => '{',
4536                };
4537                if count_char(chars, min, end, close) > count_char(chars, min, end, open) {
4538                    end -= 1;
4539                } else {
4540                    break;
4541                }
4542            }
4543            Some(';') => {
4544                let mut j = end - 1;
4545                while j > min
4546                    && chars
4547                        .get(j - 1)
4548                        .is_some_and(|&c| c.is_ascii_alphanumeric() || c == '#')
4549                {
4550                    j -= 1;
4551                }
4552                end = if j > min && chars.get(j - 1) == Some(&'&') {
4553                    j - 1
4554                } else {
4555                    end - 1
4556                };
4557            }
4558            _ => break,
4559        }
4560    }
4561    end
4562}
4563
4564// --- typographic punctuation (smart) -----------------------------------------------------------
4565
4566/// The number of consecutive `ch` at `pos`.
4567fn run_length(chars: &[char], pos: usize, ch: char) -> usize {
4568    let mut n = 0;
4569    while chars.get(pos + n) == Some(&ch) {
4570        n += 1;
4571    }
4572    n
4573}
4574
4575/// Fold a run of `n` hyphens into em and en dashes: every three become an em dash, a remaining two a
4576/// single en dash, a remaining one a hyphen.
4577fn fold_dashes(n: usize) -> String {
4578    let mut s = "\u{2014}".repeat(n / 3);
4579    match n % 3 {
4580        2 => s.push('\u{2013}'),
4581        1 => s.push('-'),
4582        _ => {}
4583    }
4584    s
4585}
4586
4587/// Fold a run of `n` dots: every three become an ellipsis, with any remainder kept as dots.
4588fn fold_ellipsis(n: usize) -> String {
4589    let mut s = "\u{2026}".repeat(n / 3);
4590    s.push_str(&".".repeat(n % 3));
4591    s
4592}
4593
4594/// The quote-node kind for a straight quote character.
4595fn quote_type(quote: char) -> QuoteType {
4596    if quote == '\'' {
4597        QuoteType::SingleQuote
4598    } else {
4599        QuoteType::DoubleQuote
4600    }
4601}
4602
4603/// The curly glyph a non-paired straight quote folds into: an apostrophe for `'`, and an opening or
4604/// closing double quote depending on which side it leans.
4605fn quote_glyph(chars: &[char], pos: usize, quote: char) -> char {
4606    if quote == '\'' {
4607        '\u{2019}'
4608    } else if can_open_quote(chars, pos) {
4609        '\u{201c}'
4610    } else {
4611        '\u{201d}'
4612    }
4613}
4614
4615/// Whether a character counts as punctuation for flanking: ASCII punctuation, or any other
4616/// non-alphanumeric, non-whitespace character.
4617fn is_punct(c: char) -> bool {
4618    c.is_ascii_punctuation() || (!c.is_alphanumeric() && !c.is_whitespace())
4619}
4620
4621fn is_ws_opt(opt: Option<char>) -> bool {
4622    opt.is_none_or(char::is_whitespace)
4623}
4624
4625fn is_punct_opt(opt: Option<char>) -> bool {
4626    opt.is_some_and(is_punct)
4627}
4628
4629/// Whether the quote at `pos` leans against following content (may open a quoted run).
4630fn can_open_quote(chars: &[char], pos: usize) -> bool {
4631    let before = pos.checked_sub(1).and_then(|p| chars.get(p)).copied();
4632    let after = chars.get(pos + 1).copied();
4633    !is_ws_opt(after) && (!is_punct_opt(after) || is_ws_opt(before) || is_punct_opt(before))
4634}
4635
4636/// Whether the quote at `pos` leans against preceding content (may close a quoted run). A single
4637/// quote may not close against a following alphanumeric, so a word-internal apostrophe never ends a
4638/// quotation.
4639fn can_close_quote(chars: &[char], pos: usize, quote: char) -> bool {
4640    let before = pos.checked_sub(1).and_then(|p| chars.get(p)).copied();
4641    let after = chars.get(pos + 1).copied();
4642    let right_flanking =
4643        !is_ws_opt(before) && (!is_punct_opt(before) || is_ws_opt(after) || is_punct_opt(after));
4644    if !right_flanking {
4645        return false;
4646    }
4647    if quote == '\'' {
4648        !after.is_some_and(char::is_alphanumeric)
4649    } else {
4650        true
4651    }
4652}
4653
4654// --- grid table helpers ------------------------------------------------------------------------
4655
4656/// Parse a grid table's top border into the inclusive-exclusive character ranges of its columns.
4657fn is_grid_line(line: &str) -> bool {
4658    line.starts_with('+') || line.starts_with('|')
4659}
4660
4661/// A cell rectangle traced out of a grid table, in (line, column) matrix coordinates: its corners
4662/// are the `+` at the top-left and the `+` at the bottom-right.
4663struct ScanCell {
4664    top: usize,
4665    left: usize,
4666    bottom: usize,
4667    right: usize,
4668}
4669
4670/// A placed grid-table cell: its raw interior text and its extent in row and column bands.
4671#[derive(Clone)]
4672struct GridCell {
4673    text: String,
4674    row_span: usize,
4675    col_span: usize,
4676}
4677
4678fn grid_at(block: &[Vec<char>], row: usize, col: usize) -> Option<char> {
4679    block.get(row).and_then(|r| r.get(col)).copied()
4680}
4681
4682/// Trace every cell of a grid table out of its character matrix. From the top-left corner, each
4683/// cell rectangle is found by following its top edge right to a `+`, its right edge down to a `+`,
4684/// its bottom edge left to the starting column, and its left edge back up to the top — each edge
4685/// made solely of its border character (`-` across, `|` down), with `+` permitted where another
4686/// grid line crosses. The corners opposite each cell seed the search for its right and lower
4687/// neighbours. Returns `None` for a matrix that does not open with a corner.
4688fn scan_grid_cells(block: &[Vec<char>]) -> Option<Vec<ScanCell>> {
4689    let height = block.len();
4690    let width = block.first().map_or(0, Vec::len);
4691    if height < 2 || width < 2 || grid_at(block, 0, 0) != Some('+') {
4692        return None;
4693    }
4694    let bottom = height - 1;
4695    let right = width - 1;
4696    let mut cells = Vec::new();
4697    let mut visited = vec![vec![false; width]; height];
4698    let mut queue: VecDeque<(usize, usize)> = VecDeque::new();
4699    queue.push_back((0, 0));
4700    while let Some((top, left)) = queue.pop_front() {
4701        if top >= bottom || left >= right {
4702            continue;
4703        }
4704        if visited.get(top).and_then(|r| r.get(left)).copied() == Some(true) {
4705            continue;
4706        }
4707        if let Some(slot) = visited.get_mut(top).and_then(|r| r.get_mut(left)) {
4708            *slot = true;
4709        }
4710        let Some(cell) = trace_cell(block, top, left, bottom, right) else {
4711            continue;
4712        };
4713        queue.push_back((cell.top, cell.right));
4714        queue.push_back((cell.bottom, cell.left));
4715        cells.push(cell);
4716    }
4717    Some(cells)
4718}
4719
4720fn trace_cell(
4721    block: &[Vec<char>],
4722    top: usize,
4723    left: usize,
4724    bottom: usize,
4725    right: usize,
4726) -> Option<ScanCell> {
4727    for col in left + 1..=right {
4728        match grid_at(block, top, col) {
4729            Some('+') => {
4730                if let Some(b) = scan_cell_down(block, top, left, col, bottom) {
4731                    return Some(ScanCell {
4732                        top,
4733                        left,
4734                        bottom: b,
4735                        right: col,
4736                    });
4737                }
4738            }
4739            // A `-` extends a body border; `=` extends the header/body separator.
4740            Some('-' | '=') => {}
4741            _ => return None,
4742        }
4743    }
4744    None
4745}
4746
4747fn scan_cell_down(
4748    block: &[Vec<char>],
4749    top: usize,
4750    left: usize,
4751    right: usize,
4752    bottom: usize,
4753) -> Option<usize> {
4754    for row in top + 1..=bottom {
4755        match grid_at(block, row, right) {
4756            Some('+') => {
4757                if scan_cell_close(block, top, left, right, row) {
4758                    return Some(row);
4759                }
4760            }
4761            Some('|') => {}
4762            _ => return None,
4763        }
4764    }
4765    None
4766}
4767
4768/// Verify the bottom and left edges of a candidate cell: the bottom edge from `right` back to
4769/// `left` is `-` (or a `+` crossing) and reaches a `+` at the bottom-left corner, and the left edge
4770/// from `bottom` back to `top` is `|` (or a `+` crossing).
4771fn scan_cell_close(
4772    block: &[Vec<char>],
4773    top: usize,
4774    left: usize,
4775    right: usize,
4776    bottom: usize,
4777) -> bool {
4778    for col in left + 1..right {
4779        if !matches!(grid_at(block, bottom, col), Some('-' | '=' | '+')) {
4780            return false;
4781        }
4782    }
4783    if grid_at(block, bottom, left) != Some('+') {
4784        return false;
4785    }
4786    for row in top + 1..bottom {
4787        if !matches!(grid_at(block, row, left), Some('|' | '+')) {
4788            return false;
4789        }
4790    }
4791    true
4792}
4793
4794/// Whether a line is a simple-table ruler: two or more space-separated runs of `=`.
4795fn is_simple_table_ruler(line: &str) -> bool {
4796    let trimmed = line.trim();
4797    !trimmed.is_empty() && trimmed.starts_with('=') && trimmed.chars().all(|c| c == '=' || c == ' ')
4798}
4799
4800/// The inclusive-exclusive character ranges of a simple table's columns, from the `=` runs of its
4801/// top border. `None` unless the border is made solely of `=` runs and spaces. A single column is
4802/// allowed: a lone `=` run is rejected as a section adornment or transition before the table parser
4803/// is reached, and the parser still requires a closing border to confirm a table.
4804fn simple_columns(border: &str) -> Option<Vec<(usize, usize)>> {
4805    let chars: Vec<char> = border.chars().collect();
4806    let mut columns = Vec::new();
4807    let mut i = 0;
4808    while let Some(c) = chars.get(i) {
4809        match c {
4810            '=' => {
4811                let start = i;
4812                while chars.get(i) == Some(&'=') {
4813                    i += 1;
4814                }
4815                columns.push((start, i));
4816            }
4817            ' ' => i += 1,
4818            _ => return None,
4819        }
4820    }
4821    (!columns.is_empty()).then_some(columns)
4822}
4823
4824/// Whether a line is a `=` border: a non-empty run of `=` and spaces with no other content.
4825fn is_equals_border(line: &str) -> bool {
4826    let trimmed = line.trim();
4827    !trimmed.is_empty() && trimmed.chars().all(|c| c == '=' || c == ' ')
4828}
4829
4830/// Whether a line's first column holds no text, marking it a continuation of the row above.
4831fn first_column_blank(line: &str, columns: &[(usize, usize)]) -> bool {
4832    let chars: Vec<char> = line.chars().collect();
4833    let lo = columns.first().map_or(0, |c| c.0);
4834    let hi = columns.get(1).map_or(chars.len(), |c| c.0);
4835    (lo..hi).all(|p| chars.get(p).is_none_or(|c| c.is_whitespace()))
4836}
4837
4838/// Each column standing alone, the column grouping a row carries when no span underline joins any.
4839fn default_groups(count: usize) -> Vec<(usize, usize)> {
4840    (0..count).map(|i| (i, i)).collect()
4841}
4842
4843/// The column groups a `-` underline imposes on the row above it: a margin filled with `-` joins the
4844/// columns on either side into one span. `None` unless the line is solely `-` and spaces with at
4845/// least one `-`, which is what distinguishes an underline from cell text.
4846fn span_underline_groups(line: &str, columns: &[(usize, usize)]) -> Option<Vec<(usize, usize)>> {
4847    let chars: Vec<char> = line.chars().collect();
4848    let has_dash = chars.contains(&'-');
4849    if !has_dash || !chars.iter().all(|c| matches!(c, '-' | ' ')) {
4850        return None;
4851    }
4852    let mut groups = Vec::new();
4853    let mut group_start = 0;
4854    let n = columns.len();
4855    for i in 0..n.saturating_sub(1) {
4856        let left_end = columns.get(i).map_or(0, |c| c.1);
4857        let right_start = columns.get(i + 1).map_or(left_end, |c| c.0);
4858        let filled = (left_end..right_start).any(|p| chars.get(p) == Some(&'-'));
4859        if !filled {
4860            groups.push((group_start, i));
4861            group_start = i + 1;
4862        }
4863    }
4864    groups.push((group_start, n.saturating_sub(1)));
4865    Some(groups)
4866}
4867
4868fn trim_blank_edges(mut lines: Vec<String>) -> Vec<String> {
4869    while lines.first().is_some_and(|l| is_blank(l)) {
4870        lines.remove(0);
4871    }
4872    while lines.last().is_some_and(|l| is_blank(l)) {
4873        lines.pop();
4874    }
4875    lines
4876}
4877
4878#[cfg(test)]
4879mod tests {
4880    use super::*;
4881
4882    fn parse(input: &str) -> Vec<Block> {
4883        parse_ext(input, Extensions::default())
4884    }
4885
4886    fn parse_ext(input: &str, extensions: Extensions) -> Vec<Block> {
4887        let reader = RstReader;
4888        let mut options = ReaderOptions::default();
4889        options.extensions = extensions;
4890        reader
4891            .read(input, &options)
4892            .expect("reader does not fail")
4893            .blocks
4894    }
4895
4896    fn with_auto_ids() -> Extensions {
4897        let mut extensions = Extensions::default();
4898        extensions.insert(Extension::AutoIdentifiers);
4899        extensions
4900    }
4901
4902    #[test]
4903    fn leading_punctuation_before_name_does_not_underflow() {
4904        // Scanning a trailing reference name backwards must stop at the buffer start: a punctuation
4905        // character with nothing before it cannot extend the name, and the scan must not look past
4906        // index zero.
4907        let _ = parse("_C_\n");
4908        let _ = parse("_C");
4909        let _ = parse(":a");
4910    }
4911
4912    #[test]
4913    fn circular_substitution_does_not_overflow_the_stack() {
4914        // A substitution whose replacement references itself — directly, or through a cycle of
4915        // other definitions — would expand without bound, because a replacement is itself parsed
4916        // as inline markup. RST forbids circular references; the reader must leave such a name
4917        // unexpanded rather than recurse into a stack overflow. A nightly fuzz run hit this.
4918        let _ = parse(".. |a| replace:: |a|\n\n|a|\n");
4919        let _ = parse(".. |a| replace:: |b|\n.. |b| replace:: |a|\n\n|a|\n");
4920        // The reduced reproducer libFuzzer minimized from the crashing input.
4921        let bytes = [
4922            84u8, 46, 46, 32, 124, 124, 97, 112, 124, 0, 32, 10, 46, 46, 32, 124, 46, 46, 32, 124,
4923            117, 110, 105, 99, 111, 100, 101, 58, 58, 32, 124, 124, 124, 124, 95, 58, 58, 32, 124,
4924            124, 46, 124, 46, 9, 124, 1, 0, 46, 46, 32, 124, 117, 110, 32, 124, 46, 46, 32, 124,
4925            117, 110, 105, 99, 111, 100, 101, 58, 58, 32, 124, 124, 124, 124, 95, 58, 58, 32, 124,
4926            124, 46, 46, 124, 124, 1, 9, 0, 46, 46, 32, 124, 117, 110, 105, 99, 111, 100, 101, 58,
4927            44, 32, 124, 124, 46, 105, 99, 111, 100, 101, 58, 44, 32, 124, 124, 46, 124, 46, 9,
4928            124, 1, 0, 0, 114, 10, 9, 46, 116, 0,
4929        ];
4930        let _ = parse(std::str::from_utf8(&bytes).unwrap());
4931    }
4932
4933    #[test]
4934    fn pipe_not_followed_by_space_does_not_stall_the_scan() {
4935        // A `|` not followed by a space or end of line does not open a line block. An indented or
4936        // otherwise non-conforming pipe must fall through to ordinary block parsing, and the scan
4937        // must advance past its line rather than re-examine it without end.
4938        let _ = parse("\u{0b}\t|\u{0}");
4939        let _ = parse("   |x");
4940        let _ = parse("|x\n");
4941    }
4942
4943    #[test]
4944    fn paragraph_with_inline_markup() {
4945        let blocks = parse("A *word* and **two** and ``lit``.\n");
4946        assert_eq!(
4947            blocks,
4948            vec![Block::Para(vec![
4949                Inline::Str("A".into()),
4950                Inline::Space,
4951                Inline::Emph(vec![Inline::Str("word".into())]),
4952                Inline::Space,
4953                Inline::Str("and".into()),
4954                Inline::Space,
4955                Inline::Strong(vec![Inline::Str("two".into())]),
4956                Inline::Space,
4957                Inline::Str("and".into()),
4958                Inline::Space,
4959                Inline::Code(Box::default(), "lit".into()),
4960                Inline::Str(".".into()),
4961            ])]
4962        );
4963    }
4964
4965    #[test]
4966    fn underline_section_header_gets_slug_id() {
4967        let blocks = parse_ext("Title\n=====\n", with_auto_ids());
4968        assert_eq!(
4969            blocks,
4970            vec![Block::Header(
4971                1,
4972                Box::new(Attr {
4973                    id: "title".into(),
4974                    classes: Vec::new(),
4975                    attributes: Vec::new(),
4976                }),
4977                vec![Inline::Str("Title".into())],
4978            )]
4979        );
4980    }
4981
4982    #[test]
4983    fn header_levels_follow_first_seen_adornment_order() {
4984        let blocks = parse("A\n=\n\nB\n-\n\nC\n=\n");
4985        let levels: Vec<i32> = blocks
4986            .iter()
4987            .filter_map(|b| match b {
4988                Block::Header(level, _, _) => Some(*level),
4989                _ => None,
4990            })
4991            .collect();
4992        assert_eq!(levels, vec![1, 2, 1]);
4993    }
4994
4995    #[test]
4996    fn transition_is_a_horizontal_rule() {
4997        let blocks = parse("Above\n\n----\n\nBelow\n");
4998        assert_eq!(blocks.get(1), Some(&Block::HorizontalRule));
4999    }
5000
5001    #[test]
5002    fn bullet_list_is_tight() {
5003        let blocks = parse("- one\n- two\n");
5004        assert_eq!(
5005            blocks,
5006            vec![Block::BulletList(vec![
5007                vec![Block::Plain(vec![Inline::Str("one".into())])],
5008                vec![Block::Plain(vec![Inline::Str("two".into())])],
5009            ])]
5010        );
5011    }
5012
5013    #[test]
5014    fn enumerated_list_carries_style_and_start() {
5015        let blocks = parse("3. third\n4. fourth\n");
5016        match blocks.first() {
5017            Some(Block::OrderedList(attrs, items)) => {
5018                assert_eq!(attrs.start, 3);
5019                assert_eq!(attrs.style, ListNumberStyle::Decimal);
5020                assert_eq!(attrs.delim, ListNumberDelim::Period);
5021                assert_eq!(items.len(), 2);
5022            }
5023            other => panic!("expected ordered list, got {other:?}"),
5024        }
5025    }
5026
5027    #[test]
5028    fn literal_block_drops_marker_paragraph() {
5029        let blocks = parse("::\n\n    code line\n");
5030        assert_eq!(
5031            blocks,
5032            vec![Block::CodeBlock(Box::default(), "code line".into())]
5033        );
5034    }
5035
5036    #[test]
5037    fn literal_block_keeps_single_colon() {
5038        let blocks = parse("Example::\n\n    code\n");
5039        assert_eq!(
5040            blocks.first(),
5041            Some(&Block::Para(vec![Inline::Str("Example:".into())]))
5042        );
5043    }
5044
5045    #[test]
5046    fn field_list_becomes_definition_list() {
5047        let blocks = parse(":Author: Me\n");
5048        assert_eq!(
5049            blocks,
5050            vec![Block::DefinitionList(vec![(
5051                vec![Inline::Str("Author".into())],
5052                vec![vec![Block::Plain(vec![Inline::Str("Me".into())])]],
5053            )])]
5054        );
5055    }
5056
5057    #[test]
5058    fn named_target_resolves_reference() {
5059        let blocks = parse("See website_.\n\n.. _website: https://example.org\n");
5060        match blocks.first() {
5061            Some(Block::Para(inlines)) => {
5062                let link = inlines.iter().find(|i| matches!(i, Inline::Link(..)));
5063                assert_eq!(
5064                    link,
5065                    Some(&Inline::Link(
5066                        Box::default(),
5067                        vec![Inline::Str("website".into())],
5068                        Box::new(Target {
5069                            url: "https://example.org".into(),
5070                            title: carta_ast::Text::default(),
5071                        }),
5072                    ))
5073                );
5074            }
5075            other => panic!("expected paragraph, got {other:?}"),
5076        }
5077    }
5078
5079    #[test]
5080    fn footnote_reference_inlines_the_note() {
5081        let blocks = parse("Ref [1]_\n\n.. [1] The note.\n");
5082        match blocks.first() {
5083            Some(Block::Para(inlines)) => {
5084                assert!(inlines.iter().any(|i| matches!(i, Inline::Note(_))));
5085                // The space before the note marker is dropped.
5086                assert_eq!(inlines.first(), Some(&Inline::Str("Ref".into())));
5087                assert!(matches!(inlines.get(1), Some(Inline::Note(_))));
5088            }
5089            other => panic!("expected paragraph, got {other:?}"),
5090        }
5091    }
5092
5093    #[test]
5094    fn comment_produces_no_output() {
5095        let blocks = parse(".. This is a comment.\n");
5096        assert!(blocks.is_empty());
5097    }
5098
5099    #[test]
5100    fn interpreted_text_defaults_to_title_reference() {
5101        let blocks = parse("A `book title` here.\n");
5102        match blocks.first() {
5103            Some(Block::Para(inlines)) => {
5104                assert!(inlines.iter().any(|i| matches!(
5105                    i,
5106                    Inline::Span(attr, _) if attr.classes == vec!["title-ref".to_string()]
5107                )));
5108            }
5109            other => panic!("expected paragraph, got {other:?}"),
5110        }
5111    }
5112
5113    #[test]
5114    fn auto_identifiers_off_yields_no_id() {
5115        let blocks = parse_ext("Title\n=====\n", Extensions::empty());
5116        match blocks.first() {
5117            Some(Block::Header(_, attr, _)) => assert!(attr.id.is_empty()),
5118            other => panic!("expected header, got {other:?}"),
5119        }
5120    }
5121
5122    #[test]
5123    fn date_renders_strftime_fields_for_fixed_timestamps() {
5124        // Expected values follow the Gregorian calendar in UTC; each timestamp is seconds past the
5125        // epoch. The `date` directive's live form draws on the wall clock, so it is exercised here
5126        // against frozen moments to keep the assertions reproducible.
5127        let cases: &[(i64, &str, &str)] = &[
5128            // 2026-06-29 14:50:50 UTC, a Monday.
5129            (1_782_744_650, "%Y-%m-%d", "2026-06-29"),
5130            (1_782_744_650, "%j", "180"),
5131            (1_782_744_650, "%A %a", "Monday Mon"),
5132            (1_782_744_650, "%B %b %h", "June Jun Jun"),
5133            (1_782_744_650, "%u %w", "1 1"),
5134            (1_782_744_650, "%U %W", "26 26"),
5135            (1_782_744_650, "%V %G %g", "27 2026 26"),
5136            (1_782_744_650, "%I %l %p %P", "02  2 PM pm"),
5137            (1_782_744_650, "%C %y", "20 26"),
5138            (1_782_744_650, "%D", "06/29/26"),
5139            (1_782_744_650, "%F %T", "2026-06-29 14:50:50"),
5140            (1_782_744_650, "%R %k", "14:50 14"),
5141            (1_782_744_650, "%r", "02:50:50 PM"),
5142            (1_782_744_650, "%e", "29"),
5143            // 2024-02-29 00:00:00 UTC, a leap day on a Thursday.
5144            (1_709_164_800, "%Y-%m-%d", "2024-02-29"),
5145            (1_709_164_800, "%j", "060"),
5146            (1_709_164_800, "%A", "Thursday"),
5147            (1_709_164_800, "%U %W", "08 09"),
5148            (1_709_164_800, "%V %G %g", "09 2024 24"),
5149            (1_709_164_800, "%I %p", "12 AM"),
5150            (1_709_164_800, "%e", "29"),
5151            // 1970-01-01 00:00:00 UTC, the epoch, a Thursday.
5152            (0, "%Y-%m-%d", "1970-01-01"),
5153            (0, "%j", "001"),
5154            (0, "%A", "Thursday"),
5155            (0, "%U %W", "00 00"),
5156            (0, "%V %G %g", "01 1970 70"),
5157            (0, "%e", " 1"),
5158            // 2027-01-01 12:00:00 UTC: an ISO week that rolls back into the previous year.
5159            (1_798_804_800, "%V %G %g", "53 2026 26"),
5160            (1_798_804_800, "%A", "Friday"),
5161            (1_798_804_800, "%r", "12:00:00 PM"),
5162            // A literal percent, and an unrecognized code emitted verbatim.
5163            (0, "before %% after", "before % after"),
5164            (0, "%Q", "%Q"),
5165        ];
5166        for (secs, format, expected) in cases {
5167            assert_eq!(
5168                &render_date(*secs, format),
5169                expected,
5170                "render_date({secs}, {format:?})"
5171            );
5172        }
5173        // The empty format string falls back to an ISO date, whatever today happens to be.
5174        let today = format_date("");
5175        assert_eq!(today.len(), 10);
5176        assert_eq!(today.matches('-').count(), 2);
5177    }
5178
5179    #[test]
5180    fn include_directive_splices_referenced_file() {
5181        let path =
5182            std::env::temp_dir().join(format!("carta_rst_include_{}.rst", std::process::id()));
5183        std::fs::write(&path, "Pulled in **bold** text.\n").expect("write temp include");
5184        let source = format!("Before.\n\n.. include:: {}\n\nAfter.\n", path.display());
5185        let blocks = parse(&source);
5186        std::fs::remove_file(&path).ok();
5187
5188        let paragraphs: Vec<&Vec<Inline>> = blocks
5189            .iter()
5190            .filter_map(|block| match block {
5191                Block::Para(inlines) => Some(inlines),
5192                _ => None,
5193            })
5194            .collect();
5195        assert_eq!(paragraphs.len(), 3);
5196        let included = paragraphs.get(1).expect("the spliced include paragraph");
5197        assert!(
5198            included
5199                .iter()
5200                .any(|inline| matches!(inline, Inline::Strong(_)))
5201        );
5202    }
5203
5204    /// The attributes of the first image found in a paragraph or plain block.
5205    fn first_image_attr(blocks: &[Block]) -> Option<Attr> {
5206        for block in blocks {
5207            let (Block::Para(inlines) | Block::Plain(inlines)) = block else {
5208                continue;
5209            };
5210            for inline in inlines {
5211                if let Inline::Image(attr, _, _) = inline {
5212                    return Some(*attr.clone());
5213                }
5214            }
5215        }
5216        None
5217    }
5218
5219    fn image_width(source: &str) -> Option<String> {
5220        first_image_attr(&parse(source))?
5221            .attributes
5222            .into_iter()
5223            .find(|(key, _)| key == "width")
5224            .map(|(_, value)| value.to_string())
5225    }
5226
5227    #[test]
5228    fn image_directive_resolves_width_and_scale() {
5229        // A pixel width is truncated to an integer at parse time and rounds to even at the boundary
5230        // once a scale is applied.
5231        assert_eq!(
5232            image_width(".. image:: a.png\n   :width: 200px\n   :scale: 50%\n"),
5233            Some("100px".into())
5234        );
5235        assert_eq!(
5236            image_width(".. image:: a.png\n   :width: 201px\n   :scale: 50%\n"),
5237            Some("100px".into())
5238        );
5239        assert_eq!(
5240            image_width(".. image:: a.png\n   :width: 100.7px\n"),
5241            Some("100px".into())
5242        );
5243        // A percentage width keeps a single fractional digit.
5244        assert_eq!(
5245            image_width(".. image:: a.png\n   :width: 100%\n   :scale: 33\n"),
5246            Some("3300.0%".into())
5247        );
5248        // A physical unit scales and renders in the shortest form.
5249        assert_eq!(
5250            image_width(".. image:: a.png\n   :width: 2.5in\n   :scale: 50%\n"),
5251            Some("1.25in".into())
5252        );
5253        assert_eq!(
5254            image_width(".. image:: a.png\n   :width: 3cm\n"),
5255            Some("3cm".into())
5256        );
5257    }
5258
5259    #[test]
5260    fn image_directive_doubles_classes_and_appends_alignment() {
5261        let classes = |source: &str| first_image_attr(&parse(source)).expect("an image").classes;
5262        // Alignment alone becomes an `align-<value>` class.
5263        assert_eq!(
5264            classes(".. image:: a.png\n   :align: center\n"),
5265            vec!["align-center".to_string()]
5266        );
5267        // An explicit class list is doubled, with the alignment fused onto the final entry.
5268        assert_eq!(
5269            classes(".. image:: a.png\n   :class: foo\n   :align: center\n"),
5270            vec!["foo".to_string(), "fooalign-center".to_string()]
5271        );
5272        assert_eq!(
5273            classes(".. image:: a.png\n   :class: foo bar\n"),
5274            vec![
5275                "foo".to_string(),
5276                "bar".to_string(),
5277                "foo".to_string(),
5278                "bar".to_string()
5279            ]
5280        );
5281    }
5282
5283    #[test]
5284    fn substitution_image_carries_options() {
5285        let badge = parse("|i|\n\n.. |i| image:: a.png\n   :class: foo\n   :align: middle\n");
5286        assert_eq!(
5287            first_image_attr(&badge).expect("an image").classes,
5288            vec!["foo".to_string(), "fooalign-middle".to_string()]
5289        );
5290        assert_eq!(
5291            image_width("|i|\n\n.. |i| image:: a.png\n   :width: 200px\n   :scale: 50%\n"),
5292            Some("100px".into())
5293        );
5294    }
5295
5296    #[test]
5297    fn figure_directive_separates_figure_and_image_attributes() {
5298        // `:name:` identifies the inner image, `:align:` classes the figure; the figure id is empty.
5299        let blocks = parse(".. figure:: a.png\n   :name: first\n   :align: center\n\n   Cap\n");
5300        let (outer, body) = match blocks.first() {
5301            Some(Block::Figure(attr, _, body)) => (attr.clone(), body.clone()),
5302            other => panic!("expected a figure, got {other:?}"),
5303        };
5304        assert!(outer.id.is_empty());
5305        assert_eq!(outer.classes, vec!["align-center".to_string()]);
5306        let inner = first_image_attr(&body).expect("an inner image");
5307        assert_eq!(inner.id.as_str(), "first");
5308        assert!(inner.classes.is_empty());
5309
5310        // `:figclass:` and `:class:` both class the figure; only `:class:` reaches the inner image.
5311        let blocks = parse(".. figure:: a.png\n   :figclass: frame\n   :class: photo\n\n   Cap\n");
5312        let (outer, body) = match blocks.first() {
5313            Some(Block::Figure(attr, _, body)) => (attr.clone(), body.clone()),
5314            other => panic!("expected a figure, got {other:?}"),
5315        };
5316        assert_eq!(
5317            outer.classes,
5318            vec!["frame".to_string(), "photo".to_string()]
5319        );
5320        let inner = first_image_attr(&body).expect("an inner image");
5321        assert_eq!(inner.classes, vec!["photo".to_string()]);
5322    }
5323}