badness 0.0.1 - Docs.rs

//! The formatter entry points and the CST → [`Ir`] lowering.
//!
//! Implemented rules:
//! - **Whitespace normalization**: trailing whitespace is trimmed, runs of 2+
//!   blank lines collapse to a single blank line, and the document ends with
//!   exactly one newline.
//! - **Environment indentation**: the body of `\begin{…} … \end{…}` is indented
//!   one step, nesting recursively, with `\begin`/`\end` flush. All indentation
//!   is computed by the printer, never preserved from input — so reformatting
//!   re-indents idempotently.
//! - **Group/argument indentation**: the body of a *multi-line* brace group
//!   `{…}` or optional-argument group `[…]` is indented one step, the same way
//!   (delimiters flush, body indented). Single-line groups are left inline;
//!   existing line breaks are respected.
//! - **Prose-argument reflow** (under [`WrapMode::Reflow`]): an argument the
//!   signature DB marks `prose` (a `\footnote`/`\caption` body, a sectioning
//!   title) is reflowed to the line width like a paragraph — joined when it fits,
//!   wrapped when it does not (see [`lower_command`] / [`lower_prose_group`]).
//!   Non-prose groups (`\newcommand` body, `\label`) are left as authored.
//!
//! Everything else is emitted verbatim: paragraph structure, intra-line spacing,
//! and protected regions (`\verb`, verbatim bodies, comments) are preserved.
//!
//! The mechanism flows entirely through the Wadler [`Ir`]: each maximal run of
//! `WHITESPACE`/`NEWLINE` trivia is replaced by a single break primitive
//! ([`Ir::hard_line`] for one newline, [`Ir::empty_line`] for a blank line),
//! whose printer (`super::printer`) defers indentation and so drops trailing
//! whitespace for free, and [`Ir::indent`] raises the indent inside environment
//! bodies.
//!
//! The lowering (`lower_node`) is the LaTeX-specific part that replaces arity's
//! R `ir_expr_node` dispatch; the surrounding `format`/`format_with_style`
//! framework mirrors arity's `src/formatter/core.rs`.

use std::iter::Peekable;

use crate::ast::{command_name, environment_name};
use crate::parser::parse;
use crate::semantic::{ArgKind, ArgSpec, Signatures, scan_definitions};
use crate::syntax::{SyntaxElement, SyntaxKind, SyntaxNode, SyntaxToken};

use super::context::FormatContext;
use super::ir::Ir;
use super::printer::Printer;
use super::style::{FormatStyle, WrapMode};

/// Why a document could not be formatted. The formatter only operates on a clean
/// parse: anything the parser flagged, or any `ERROR` token, is refused rather
/// than silently reshaped.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum FormatError {
    /// The input parsed with `count` syntax error(s); the formatter only
    /// supports input the parser accepts without diagnostics.
    ParseErrors { count: usize },
    /// The CST contains an `ERROR` token the lowering does not handle.
    UnsupportedConstruct { kind: SyntaxKind, snippet: String },
}

impl std::fmt::Display for FormatError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            Self::ParseErrors { count } => write!(
                f,
                "input contains {count} parser diagnostic(s); formatter only supports parseable input"
            ),
            Self::UnsupportedConstruct { kind, snippet } => {
                write!(
                    f,
                    "unsupported construct for formatter: {kind:?} near {snippet:?}"
                )
            }
        }
    }
}

impl std::error::Error for FormatError {}

/// Format `input` with the default [`FormatStyle`].
pub fn format(input: &str) -> Result<String, FormatError> {
    format_with_style(input, FormatStyle::default())
}

/// Format `input` under `style`. Returns [`FormatError`] if the input does not
/// parse cleanly. Note: badness's [`crate::parser::Parse`] carries `errors` +
/// `syntax()` (arity uses `diagnostics` + `cst`).
pub fn format_with_style(input: &str, style: FormatStyle) -> Result<String, FormatError> {
    let parsed = parse(input);
    if !parsed.errors.is_empty() {
        return Err(FormatError::ParseErrors {
            count: parsed.errors.len(),
        });
    }

    format_node(&parsed.syntax(), style)
}

/// Format an already-parsed CST `root` under `style`. This is the
/// reparse-free entry: the language server hands it the salsa-cached tree
/// (`db.parsed_tree`) instead of re-running the parser. The caller owns the
/// `ParseErrors` guard — this entry assumes the parse was clean and only
/// enforces the `ERROR`-token invariant ([`validate_supported_tokens`]).
/// [`format_with_style`] is the parse-then-format convenience wrapper.
pub fn format_node(root: &SyntaxNode, style: FormatStyle) -> Result<String, FormatError> {
    validate_supported_tokens(root)?;

    let ctx = FormatContext::new(style);
    let mut formatted = format_root(root, ctx);
    // Normalize the document's trailing edge: drop any trailing blank lines and
    // per-line trailing whitespace at EOF, then guarantee exactly one final
    // newline. Empty output stays empty. Only ASCII whitespace/newlines are
    // trimmed, so trailing Unicode content (e.g. a non-breaking space) survives.
    let trimmed_len = formatted.trim_end_matches([' ', '\t', '\n', '\r']).len();
    formatted.truncate(trimmed_len);
    if !formatted.is_empty() {
        formatted.push('\n');
    }
    Ok(formatted)
}

/// Refuse any `ERROR` token. A clean parse should contain none, but the parser
/// can emit them on recovery; the formatter never reshapes around them.
fn validate_supported_tokens(root: &SyntaxNode) -> Result<(), FormatError> {
    for element in root.descendants_with_tokens() {
        let Some(token) = element.into_token() else {
            continue;
        };
        if token.kind() == SyntaxKind::ERROR {
            return Err(FormatError::UnsupportedConstruct {
                kind: token.kind(),
                snippet: token.text().to_string(),
            });
        }
    }
    Ok(())
}

fn format_root(root: &SyntaxNode, ctx: FormatContext) -> String {
    // Scan the document's own `\newcommand`/`\newenvironment`/xparse definitions
    // once, so the lowering resolves a locally-defined environment's arity (not
    // just the built-in DB's). Held by value for the whole lowering.
    let user = scan_definitions(root);
    let cx = LowerCtx {
        wrap: ctx.style().wrap,
        signatures: Signatures::new(&user),
    };
    let ir = lower_node(root, cx);
    Printer::new(ctx.style()).print(&ir)
}

/// The state threaded through every lowering call: the active [`WrapMode`] plus the
/// per-document [`Signatures`] overlay (scanned definitions over the built-in DB)
/// that [`lower_begin`] consults for environment arity. `Copy`, so it passes by
/// value like the bare `wrap` mode it replaced.
#[derive(Clone, Copy)]
struct LowerCtx<'a> {
    wrap: WrapMode,
    signatures: Signatures<'a>,
}

/// Lower a CST node to IR. Most nodes lower generically (see
/// [`lower_element_stream`]); an [`SyntaxKind::ENVIRONMENT`] is special-cased to
/// indent its body (see [`lower_environment`]), and under [`WrapMode::Reflow`] a
/// [`SyntaxKind::PARAGRAPH`] is wrapped to the line width (see
/// [`lower_paragraph_reflow`]). The [`LowerCtx`] (wrap mode + signature overlay) is
/// threaded through so it reaches every nested paragraph (including environment and
/// group bodies).
fn lower_node(node: &SyntaxNode, cx: LowerCtx<'_>) -> Ir {
    match node.kind() {
        SyntaxKind::PARAGRAPH if cx.wrap == WrapMode::Reflow => {
            return lower_paragraph_reflow(node, cx);
        }
        SyntaxKind::ENVIRONMENT if !has_verbatim_body(node) => {
            return lower_environment(node, cx);
        }
        SyntaxKind::COMMAND if cx.wrap == WrapMode::Reflow && command_has_prose_arg(node, cx) => {
            return lower_command(node, cx);
        }
        SyntaxKind::GROUP if spans_multiple_lines(node) => {
            return lower_bracketed(node, SyntaxKind::L_BRACE, SyntaxKind::R_BRACE, cx);
        }
        SyntaxKind::OPTIONAL if spans_multiple_lines(node) => {
            return lower_bracketed(node, SyntaxKind::L_BRACKET, SyntaxKind::R_BRACKET, cx);
        }
        _ => {}
    }
    Ir::concat(lower_element_stream(node.children_with_tokens(), cx))
}

/// Lower a [`SyntaxKind::PARAGRAPH`] under [`WrapMode::Reflow`]: greedily wrap its
/// prose to the line width. Maximal runs of *adjacent* non-whitespace elements
/// glue into one unbreakable *atom* (so `Hello,` and `\emph{x}` never split);
/// inter-word whitespace — or a lone newline, since a paragraph holds no blank
/// lines — is a break opportunity. The run lowers to an [`Ir::fill`], which the
/// printer wraps word-by-word.
///
/// Three things end a line rather than flow into the fill: an explicit `\\` line
/// break (a [`SyntaxKind::LINE_BREAK`] node — the parser groups `\\` with its
/// `*` / `[len]` so the whole unit stays on one line), a `%` comment (which must
/// terminate its line), and a nested *block* (an environment or multi-line group
/// whose IR carries a forced break). Each emits the run-so-far as a fill, then
/// the line breaks; a fresh run continues after. The paragraph's lines are joined
/// by [`Ir::hard_line`].
fn lower_paragraph_reflow(node: &SyntaxNode, cx: LowerCtx<'_>) -> Ir {
    reflow_elements(node.children_with_tokens(), cx)
}

/// Greedily reflow a stream of inline elements to the line width, the shared core
/// of paragraph reflow ([`lower_paragraph_reflow`]) and prose-argument reflow
/// ([`lower_prose_group`]). Maximal runs of *adjacent* non-whitespace elements glue
/// into one unbreakable *atom* (so `Hello,` and `\emph{x}` never split); inter-word
/// whitespace or a lone newline is a break opportunity. A run of atoms lowers to an
/// [`Ir::fill`], which the printer wraps word-by-word.
///
/// Three things end a fill line rather than flow into it: an explicit `\\` line
/// break (a [`SyntaxKind::LINE_BREAK`] node), a `%` comment (which must terminate
/// its line), and a nested *block* (an environment or multi-line group whose IR
/// carries a forced break). Each commits the run-so-far as a fill, then a fresh run
/// continues after, the lines joined by [`Ir::hard_line`].
///
/// Unlike a `PARAGRAPH` (which holds no blank lines by construction), an argument
/// *group* body may contain blank-line paragraph breaks; a blank-line trivia run
/// ends the current line and separates the next with an [`Ir::empty_line`].
fn reflow_elements(elements: impl Iterator<Item = SyntaxElement>, cx: LowerCtx<'_>) -> Ir {
    // Glued pieces of the atom in progress.
    let mut atom: Vec<Ir> = Vec::new();
    // Atoms of the current fill run (the current logical line).
    let mut run: Vec<Ir> = Vec::new();
    // Completed lines (fills and blocks), interleaved with `seps` at the end.
    let mut lines: Vec<Ir> = Vec::new();
    // The separator *preceding* each committed line (`seps[0]` is unused). A blank
    // line in the source promotes the next separator to an [`Ir::empty_line`].
    let mut seps: Vec<Ir> = Vec::new();
    // The separator to record before the next committed line. Default: one break.
    let mut pending_sep: Ir = Ir::hard_line();

    /// Commit the atom in progress (if any) as one atom of the current run.
    fn flush_atom(atom: &mut Vec<Ir>, run: &mut Vec<Ir>) {
        if !atom.is_empty() {
            run.push(Ir::concat(atom.drain(..)));
        }
    }
    /// Commit `content` as the next logical line, recording the separator before
    /// it and resetting `pending_sep` to a single break.
    fn push_segment(content: Ir, lines: &mut Vec<Ir>, seps: &mut Vec<Ir>, pending_sep: &mut Ir) {
        seps.push(std::mem::replace(pending_sep, Ir::hard_line()));
        lines.push(content);
    }
    /// End the current logical line: flush the atom and, when non-empty, commit the
    /// run as a fill segment.
    fn end_line(
        atom: &mut Vec<Ir>,
        run: &mut Vec<Ir>,
        lines: &mut Vec<Ir>,
        seps: &mut Vec<Ir>,
        pending_sep: &mut Ir,
    ) {
        flush_atom(atom, run);
        if !run.is_empty() {
            push_segment(Ir::fill(run.drain(..)), lines, seps, pending_sep);
        }
    }

    let mut iter = elements.peekable();
    while let Some(element) = iter.next() {
        match element {
            // Whitespace / newline run: an atom boundary. A blank line additionally
            // ends the line and promotes the next separator to a blank line.
            SyntaxElement::Token(token) if is_collapsible_trivia(token.kind()) => {
                let (newlines, _) = consume_trivia_run(&token, &mut iter);
                if newlines >= 2 {
                    end_line(&mut atom, &mut run, &mut lines, &mut seps, &mut pending_sep);
                    pending_sep = Ir::empty_line();
                } else {
                    flush_atom(&mut atom, &mut run);
                }
            }
            // A comment rides the end of the current line, then forces a break.
            SyntaxElement::Token(token) if token.kind() == SyntaxKind::COMMENT => {
                atom.push(Ir::verbatim(token.text()));
                end_line(&mut atom, &mut run, &mut lines, &mut seps, &mut pending_sep);
            }
            // A token that carries its own newline — a `\`-at-end-of-line control
            // symbol, kept verbatim for losslessness — ends the line: emit the
            // part before the break as a flat atom and let the line break supply
            // the newline, so the result reparses to the same token (idempotent)
            // instead of leaving an unbreakable multi-line atom inside the fill.
            SyntaxElement::Token(token) if token.text().contains('\n') => {
                let before = token.text().split_once('\n').map(|(b, _)| b).unwrap_or("");
                if !before.is_empty() {
                    atom.push(Ir::verbatim(before));
                }
                end_line(&mut atom, &mut run, &mut lines, &mut seps, &mut pending_sep);
            }
            // Any other token (WORD, `~`, `&`, `#`, `^`, `_`, brackets, `\verb`,
            // a bare control symbol) glues onto the current atom.
            SyntaxElement::Token(token) => atom.push(Ir::verbatim(token.text())),
            // An explicit `\\` line break (with its `*` / `[len]`, grouped by the
            // parser into one node) rides the end of the current line, then breaks.
            SyntaxElement::Node(child) if child.kind() == SyntaxKind::LINE_BREAK => {
                atom.push(lower_node(&child, cx));
                end_line(&mut atom, &mut run, &mut lines, &mut seps, &mut pending_sep);
            }
            SyntaxElement::Node(child) => {
                let ir = lower_node(&child, cx);
                if ir.contains_forced_break() {
                    // A block amid prose: end the current line, then place the
                    // block on its own line(s); a fresh run continues after.
                    end_line(&mut atom, &mut run, &mut lines, &mut seps, &mut pending_sep);
                    push_segment(ir, &mut lines, &mut seps, &mut pending_sep);
                } else {
                    atom.push(ir);
                }
            }
        }
    }
    end_line(&mut atom, &mut run, &mut lines, &mut seps, &mut pending_sep);

    // Interleave the recorded separators between committed lines.
    let mut result: Vec<Ir> = Vec::with_capacity(lines.len().saturating_mul(2));
    for (i, line) in lines.into_iter().enumerate() {
        if i > 0 {
            result.push(seps[i].clone());
        }
        result.push(line);
    }
    Ir::concat(result)
}

/// Lower a stream of elements: child nodes recurse, non-trivia tokens (and the
/// protected `\verb`/verbatim/comment tokens) are emitted verbatim, and maximal
/// runs of `WHITESPACE`/`NEWLINE` trivia are collapsed into a single break
/// primitive by [`classify_trivia`]. Comments deliberately *break* a trivia run
/// (they are content, never collapsed away), so the run on either side is
/// classified independently.
fn lower_element_stream(
    elements: impl Iterator<Item = SyntaxElement>,
    cx: LowerCtx<'_>,
) -> Vec<Ir> {
    let mut out = Vec::new();
    let mut iter = elements.peekable();
    while let Some(element) = iter.next() {
        match element {
            SyntaxElement::Node(child) => out.push(lower_node(&child, cx)),
            SyntaxElement::Token(token) if is_collapsible_trivia(token.kind()) => {
                let (newlines, trailing_ws) = consume_trivia_run(&token, &mut iter);
                out.push(classify_trivia(newlines, trailing_ws));
            }
            SyntaxElement::Token(token) => out.push(Ir::verbatim(token.text())),
        }
    }
    out
}

/// Lower an `\begin{…} … \end{…}` environment, indenting its body one step. A
/// clean-parse environment is `[BEGIN, body…, END]`: the framing nodes are
/// lowered directly, and the body between them is wrapped in [`Ir::indent`] with
/// a leading [`Ir::hard_line`] (so it starts on its own indented line) and a
/// trailing `hard_line` at the *outer* indent (so `\end` sits flush with
/// `\begin`). All indentation is owned by the printer, so the body's own leading
/// and trailing breaks are trimmed before wrapping — this is what makes
/// re-indentation idempotent.
///
/// Verbatim-like environments never reach here (their opaque `VERBATIM_BODY`
/// token would be corrupted by reflow); [`lower_node`] routes them to the
/// generic path, which emits the body verbatim.
fn lower_environment(node: &SyntaxNode, cx: LowerCtx<'_>) -> Ir {
    let mut begin = Ir::Nil;
    let mut end = Ir::Nil;
    let mut body_elements: Vec<SyntaxElement> = Vec::new();
    for element in node.children_with_tokens() {
        match &element {
            SyntaxElement::Node(child) if child.kind() == SyntaxKind::BEGIN => {
                begin = lower_begin(child, cx);
            }
            SyntaxElement::Node(child) if child.kind() == SyntaxKind::END => {
                end = lower_node(child, cx);
            }
            _ => body_elements.push(element),
        }
    }

    let body = Ir::concat(lower_element_stream(body_elements.into_iter(), cx));
    let body = trim_trailing_break(trim_leading_break(body));

    if matches!(body, Ir::Nil) {
        // Empty body: keep `\begin` and `\end` on their own lines.
        Ir::concat([begin, Ir::hard_line(), end])
    } else {
        Ir::concat([
            begin,
            Ir::indent(Ir::concat([Ir::hard_line(), body])),
            Ir::hard_line(),
            end,
        ])
    }
}

/// Lower a `\begin{name}` node, keeping the environment's *declared* argument
/// groups on the `\begin` header line instead of letting a source line break push
/// them onto their own (indented) line. For example `\begin{tabular}\n{cc}` renders
/// as a single `\begin{tabular}{cc}` header.
///
/// The arity comes from the [`Signatures`] overlay (`cx.signatures`): a document's
/// own `\newenvironment{thm}[1]…` is honored just like a built-in `tabular`, with
/// the scanned definition shadowing a built-in of the same name. The first `arity`
/// argument groups are glued to `\begin{name}` (intervening breaks and inline
/// whitespace dropped), and anything past the declared arity — which the greedy
/// parser may have over-attached — lowers generically, preserving today's behavior.
/// Environments neither the document nor the DB knows, or that take no arguments,
/// also take the generic path, so nothing regresses. A `\begin` header carrying a
/// comment is left to the generic path too: gluing across a `%` comment would let
/// it swallow the next line.
fn lower_begin(begin: &SyntaxNode, cx: LowerCtx<'_>) -> Ir {
    let arity = environment_name(begin)
        .and_then(|name| cx.signatures.environment(&name))
        .map(|sig| sig.args.len())
        .unwrap_or(0);
    let has_comment = begin
        .children_with_tokens()
        .filter_map(|element| element.into_token())
        .any(|token| token.kind() == SyntaxKind::COMMENT);
    if arity == 0 || has_comment {
        return lower_node(begin, cx);
    }

    let mut head: Vec<Ir> = Vec::new();
    let mut tail: Vec<SyntaxElement> = Vec::new();
    let mut args_seen = 0;
    let mut in_tail = false;
    for element in begin.children_with_tokens() {
        if in_tail {
            tail.push(element);
            continue;
        }
        match &element {
            SyntaxElement::Node(child)
                if matches!(child.kind(), SyntaxKind::GROUP | SyntaxKind::OPTIONAL) =>
            {
                head.push(lower_node(child, cx));
                args_seen += 1;
                if args_seen == arity {
                    in_tail = true;
                }
            }
            // The `\begin` control word and the `{name}` group stay on the line.
            SyntaxElement::Node(child) => head.push(lower_node(child, cx)),
            // Drop header breaks/whitespace: the arguments glue to `\begin{name}`.
            SyntaxElement::Token(token) if is_collapsible_trivia(token.kind()) => {}
            SyntaxElement::Token(token) => head.push(Ir::verbatim(token.text())),
        }
    }
    if !tail.is_empty() {
        head.extend(lower_element_stream(tail.into_iter(), cx));
    }
    Ir::concat(head)
}

/// Lower a delimited group — a brace group `{…}` (`open`/`close` =
/// `L_BRACE`/`R_BRACE`) or an optional-argument group `[…]`
/// (`L_BRACKET`/`R_BRACKET`) — indenting its body one step, exactly like
/// [`lower_environment`] but with token delimiters instead of `BEGIN`/`END`
/// nodes. Only called for multi-line groups (see [`spans_multiple_lines`]);
/// single-line groups stay inline on the generic path.
///
/// Inside a group the parser emits body tokens directly (no `PARAGRAPH`
/// wrapping), so the only `open` token is the first child and the only `close`
/// token is the last — but an `OPTIONAL` body may contain a stray `[` (TeX does
/// not nest `[`), so the opener is captured only once (`open_ir` still `Nil`).
fn lower_bracketed(node: &SyntaxNode, open: SyntaxKind, close: SyntaxKind, cx: LowerCtx<'_>) -> Ir {
    let mut open_ir = Ir::Nil;
    let mut close_ir = Ir::Nil;
    let mut body_elements: Vec<SyntaxElement> = Vec::new();
    for element in node.children_with_tokens() {
        match &element {
            SyntaxElement::Token(t) if t.kind() == open && matches!(open_ir, Ir::Nil) => {
                open_ir = Ir::verbatim(t.text());
            }
            SyntaxElement::Token(t) if t.kind() == close => {
                close_ir = Ir::verbatim(t.text());
            }
            _ => body_elements.push(element),
        }
    }

    let body = Ir::concat(lower_element_stream(body_elements.into_iter(), cx));
    let body = trim_trailing_break(trim_leading_break(body));

    if matches!(body, Ir::Nil) {
        // Empty multi-line body collapses to the bare delimiters, e.g. `{\n}` → `{}`.
        Ir::concat([open_ir, close_ir])
    } else {
        Ir::concat([
            open_ir,
            Ir::indent(Ir::concat([Ir::hard_line(), body])),
            Ir::hard_line(),
            close_ir,
        ])
    }
}

/// Whether `command`'s signature marks any argument as reflowable prose. The
/// cheap guard that gates the [`lower_command`] path in [`lower_node`]: a command
/// with no prose argument (the overwhelming common case) lowers generically, so
/// nothing regresses.
fn command_has_prose_arg(command: &SyntaxNode, cx: LowerCtx<'_>) -> bool {
    command_name(command)
        .and_then(|name| cx.signatures.command(&name))
        .is_some_and(|sig| sig.args.iter().any(|spec| spec.prose))
}

/// Lower a `COMMAND` whose signature marks an argument as prose (see
/// [`command_has_prose_arg`], which gates this path). Each attached `{…}`/`[…]`
/// group is matched to its signature slot — kind-aware, so an omitted optional does
/// not misalign positions (`\section{Title}` binds the `{title}` slot, not a
/// leading `[short]`) — and a group filling a prose slot is reflowed via
/// [`lower_prose_group`]. Everything else (non-prose slots, groups past the declared
/// arity that the greedy parser over-attached, trivia) lowers exactly as the generic
/// path would.
fn lower_command(node: &SyntaxNode, cx: LowerCtx<'_>) -> Ir {
    let Some(sig) = command_name(node).and_then(|name| cx.signatures.command(&name)) else {
        // Defensive: the guard already proved a prose signature exists.
        return Ir::concat(lower_element_stream(node.children_with_tokens(), cx));
    };

    let mut out: Vec<Ir> = Vec::new();
    let mut slot = 0usize;
    let mut iter = node.children_with_tokens().peekable();
    while let Some(element) = iter.next() {
        match element {
            SyntaxElement::Node(child)
                if matches!(child.kind(), SyntaxKind::GROUP | SyntaxKind::OPTIONAL) =>
            {
                let is_bracket = child.kind() == SyntaxKind::OPTIONAL;
                let prose =
                    match_arg_slot(&sig.args, &mut slot, is_bracket).is_some_and(|spec| spec.prose);
                if prose {
                    let (open, close) = if is_bracket {
                        (SyntaxKind::L_BRACKET, SyntaxKind::R_BRACKET)
                    } else {
                        (SyntaxKind::L_BRACE, SyntaxKind::R_BRACE)
                    };
                    out.push(lower_prose_group(&child, open, close, cx));
                } else {
                    out.push(lower_node(&child, cx));
                }
            }
            SyntaxElement::Node(child) => out.push(lower_node(&child, cx)),
            SyntaxElement::Token(token) if is_collapsible_trivia(token.kind()) => {
                let (newlines, trailing_ws) = consume_trivia_run(&token, &mut iter);
                out.push(classify_trivia(newlines, trailing_ws));
            }
            SyntaxElement::Token(token) => out.push(Ir::verbatim(token.text())),
        }
    }
    Ir::concat(out)
}

/// Match the next attached argument group (a brace group, or a bracket group when
/// `is_bracket`) to a signature slot, advancing `slot` past it. Skips leading
/// optional (`[…]`) slots the document omitted, so a mandatory prose slot still
/// binds when an optional before it is absent. Returns the matched [`ArgSpec`], or
/// `None` when the group has no matching slot (e.g. an unexpected `[…]` the greedy
/// parser over-attached, or a group past the declared arity), in which case `slot`
/// is left untouched so later groups still match.
fn match_arg_slot(args: &[ArgSpec], slot: &mut usize, is_bracket: bool) -> Option<ArgSpec> {
    while *slot < args.len() {
        let spec = args[*slot];
        let spec_bracket = matches!(spec.kind, ArgKind::Bracket);
        if spec_bracket == is_bracket {
            *slot += 1;
            return Some(spec);
        }
        if spec_bracket {
            // A declared optional the document omitted: skip it and keep matching.
            *slot += 1;
            continue;
        }
        // A required `{…}` slot but the group is a `[…]`: not this slot. Leave the
        // slot intact for a later brace group and treat this group as non-prose.
        return None;
    }
    None
}

/// Lower a prose argument group: like [`lower_bracketed`], but the body is reflowed
/// to the line width ([`reflow_elements`]) and the whole thing is wrapped in a soft
/// [`Ir::group`] so it stays on one line when it fits (`\footnote{short}`) and
/// breaks the delimiters onto their own lines, indenting and word-wrapping the body,
/// when it does not. Empty bodies collapse to the bare delimiters.
fn lower_prose_group(
    node: &SyntaxNode,
    open: SyntaxKind,
    close: SyntaxKind,
    cx: LowerCtx<'_>,
) -> Ir {
    let mut open_ir = Ir::Nil;
    let mut close_ir = Ir::Nil;
    let mut body_elements: Vec<SyntaxElement> = Vec::new();
    for element in node.children_with_tokens() {
        match &element {
            SyntaxElement::Token(t) if t.kind() == open && matches!(open_ir, Ir::Nil) => {
                open_ir = Ir::verbatim(t.text());
            }
            SyntaxElement::Token(t) if t.kind() == close => {
                close_ir = Ir::verbatim(t.text());
            }
            _ => body_elements.push(element),
        }
    }

    let body = reflow_elements(body_elements.into_iter(), cx);
    if matches!(body, Ir::Nil) {
        Ir::concat([open_ir, close_ir])
    } else {
        Ir::group(Ir::concat([
            open_ir,
            Ir::indent(Ir::concat([Ir::soft_line(), body])),
            Ir::soft_line(),
            close_ir,
        ]))
    }
}

/// True if `node` directly contains a `NEWLINE` token — i.e. the group itself
/// spans multiple physical lines. Newlines inside a *nested* group/environment
/// belong to that child node, not to `node`, so this attributes line-spanning to
/// the group that physically owns the break — which keeps re-indentation stable.
fn spans_multiple_lines(node: &SyntaxNode) -> bool {
    node.children_with_tokens()
        .filter_map(|e| e.into_token())
        .any(|t| t.kind() == SyntaxKind::NEWLINE)
}

/// True if `node` directly contains a `VERBATIM_BODY` token — i.e. it is a
/// verbatim-like environment whose body must be emitted byte-for-byte.
fn has_verbatim_body(node: &SyntaxNode) -> bool {
    node.children_with_tokens()
        .filter_map(|e| e.into_token())
        .any(|t| t.kind() == SyntaxKind::VERBATIM_BODY)
}

/// Whitespace and newlines are the only trivia the formatter rewrites. Comments
/// are preserved verbatim and so are *not* collapsible.
fn is_collapsible_trivia(kind: SyntaxKind) -> bool {
    matches!(kind, SyntaxKind::WHITESPACE | SyntaxKind::NEWLINE)
}

/// Consume the maximal run of collapsible trivia beginning at `first`, returning
/// the number of newlines it spans and the whitespace following the *last*
/// newline (the run's preserved leading indentation; whitespace before a newline
/// is trailing whitespace and is dropped). For a run with no newline the whole
/// run is whitespace and is returned as `trailing_ws`.
fn consume_trivia_run(
    first: &SyntaxToken,
    iter: &mut Peekable<impl Iterator<Item = SyntaxElement>>,
) -> (usize, String) {
    let mut newlines = 0;
    let mut trailing_ws = String::new();
    absorb(first, &mut newlines, &mut trailing_ws);
    loop {
        match iter.peek() {
            Some(SyntaxElement::Token(tok)) if is_collapsible_trivia(tok.kind()) => {}
            _ => break,
        }
        let token = match iter.next() {
            Some(SyntaxElement::Token(tok)) => tok,
            _ => unreachable!("peeked a collapsible trivia token"),
        };
        absorb(&token, &mut newlines, &mut trailing_ws);
    }
    (newlines, trailing_ws)
}

fn absorb(tok: &SyntaxToken, newlines: &mut usize, trailing_ws: &mut String) {
    if tok.kind() == SyntaxKind::NEWLINE {
        *newlines += 1;
        trailing_ws.clear();
    } else {
        trailing_ws.push_str(tok.text());
    }
}

/// Map a trivia run to a single IR primitive: no newline → the inline whitespace
/// (a genuine inter-word space) kept verbatim; one newline → a [`Ir::hard_line`];
/// two or more → a single [`Ir::empty_line`] (one blank line). Whitespace that
/// followed the last newline is *indentation*, which the printer owns and
/// recreates, so it is dropped here — keeping it would double-indent on reformat.
fn classify_trivia(newlines: usize, trailing_ws: String) -> Ir {
    match newlines {
        0 => Ir::verbatim(trailing_ws),
        1 => Ir::hard_line(),
        _ => Ir::empty_line(),
    }
}

/// A break the indenter supplies itself and so trims from a body edge: a forced
/// line break, an inline whitespace chunk (indentation), or [`Ir::Nil`]. A
/// `VERBATIM_BODY` (force-break verbatim, or non-blank text) is never trimmable,
/// so protected content survives.
fn is_trimmable_break(ir: &Ir) -> bool {
    match ir {
        Ir::HardLine | Ir::EmptyLine | Ir::Nil => true,
        Ir::Verbatim { text, force_break } => {
            !force_break && text.chars().all(|c| c == ' ' || c == '\t')
        }
        _ => false,
    }
}

/// Drop leading break/indentation IR from `ir`, recursing into a leading
/// `Concat` (the body's first break is often buried inside the first paragraph).
fn trim_leading_break(ir: Ir) -> Ir {
    if is_trimmable_break(&ir) {
        return Ir::Nil;
    }
    match ir {
        Ir::Concat(items) => {
            let mut v: Vec<Ir> = items.iter().cloned().collect();
            while !v.is_empty() {
                let head = trim_leading_break(v.remove(0));
                if matches!(head, Ir::Nil) {
                    continue;
                }
                v.insert(0, head);
                break;
            }
            Ir::concat(v)
        }
        other => other,
    }
}

/// Drop trailing break/indentation IR from `ir`, recursing into a trailing
/// `Concat` (mirror of [`trim_leading_break`]).
fn trim_trailing_break(ir: Ir) -> Ir {
    if is_trimmable_break(&ir) {
        return Ir::Nil;
    }
    match ir {
        Ir::Concat(items) => {
            let mut v: Vec<Ir> = items.iter().cloned().collect();
            while let Some(last) = v.pop() {
                let tail = trim_trailing_break(last);
                if matches!(tail, Ir::Nil) {
                    continue;
                }
                v.push(tail);
                break;
            }
            Ir::concat(v)
        }
        other => other,
    }
}