Skip to main content

mdwright_latex/
layout.rs

1//! Unicode terminal layout for parsed TeX math bodies.
2//!
3//! This is not a TeX box model. It is a conservative grid renderer:
4//! each intermediate value has a display-cell width, a positive
5//! height, a baseline row inside that height, and padded rows. The
6//! constructors maintain those invariants so callers cannot observe
7//! ragged multi-line output.
8
9use std::fmt;
10
11use unicode_width::UnicodeWidthStr;
12
13use crate::error::{LatexError, LatexErrorKind, SourceSpan};
14use crate::parser::{
15    Accent, AccentKind, Atom, Delimited, Delimiter, Environment, Fraction, Group, MathBody, Node, NodeKind,
16    ParseDiagnostic, ParseDiagnosticKind, Row, Script, ScriptArgument, ScriptBase, Sqrt, parse_math_body,
17};
18use crate::registry::{latex_symbol, unicode_sub_str, unicode_super_str};
19
20/// Unicode layout output for a TeX math body.
21#[derive(Clone, Debug, PartialEq, Eq)]
22pub struct RenderedLatex {
23    lines: Vec<String>,
24    baseline: usize,
25    width: usize,
26}
27
28impl RenderedLatex {
29    fn from_grid(grid: Grid) -> Self {
30        Self {
31            lines: grid.lines,
32            baseline: grid.baseline,
33            width: grid.width,
34        }
35    }
36
37    /// Rendered terminal lines.
38    #[must_use]
39    pub fn lines(&self) -> &[String] {
40        &self.lines
41    }
42
43    /// Baseline line index.
44    #[must_use]
45    pub const fn baseline(&self) -> usize {
46        self.baseline
47    }
48
49    /// Display-cell width.
50    #[must_use]
51    pub const fn width(&self) -> usize {
52        self.width
53    }
54
55    /// Materialise the rendered block as newline-separated text.
56    #[must_use]
57    pub fn as_text(&self) -> String {
58        self.lines.join("\n")
59    }
60}
61
62impl fmt::Display for RenderedLatex {
63    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
64        f.write_str(&self.as_text())
65    }
66}
67
68/// Render one TeX math body as terminal-friendly Unicode.
69///
70/// # Errors
71///
72/// Returns [`LatexError`] when the body is malformed or contains a
73/// parsed construct that mdwright cannot render honestly as Unicode.
74pub fn render_unicode_math(source: &str) -> Result<RenderedLatex, LatexError> {
75    let body = parse_math_body(source).map_err(first_parse_error)?;
76    render_body(&body).map(RenderedLatex::from_grid)
77}
78
79#[derive(Clone, Debug, PartialEq, Eq)]
80struct Grid {
81    lines: Vec<String>,
82    width: usize,
83    baseline: usize,
84}
85
86impl Grid {
87    fn new(lines: Vec<String>, baseline: usize) -> Self {
88        let width = lines
89            .iter()
90            .map(|line| UnicodeWidthStr::width(line.as_str()))
91            .max()
92            .unwrap_or(0);
93        let mut padded = if lines.is_empty() { vec![String::new()] } else { lines };
94        for line in &mut padded {
95            *line = pad_to_width(line, width);
96        }
97        let baseline = baseline.min(padded.len().saturating_sub(1));
98        Self {
99            lines: padded,
100            width,
101            baseline,
102        }
103    }
104
105    fn text(text: impl Into<String>) -> Self {
106        Self::new(vec![text.into()], 0)
107    }
108
109    fn empty() -> Self {
110        Self::text("")
111    }
112
113    fn height(&self) -> usize {
114        self.lines.len()
115    }
116
117    fn hcat(&self, rhs: &Self) -> Self {
118        let baseline = self.baseline.max(rhs.baseline);
119        let self_below = self.height().saturating_sub(self.baseline.saturating_add(1));
120        let rhs_below = rhs.height().saturating_sub(rhs.baseline.saturating_add(1));
121        let below = self_below.max(rhs_below);
122        let height = baseline.saturating_add(1).saturating_add(below);
123        let mut lines = Vec::with_capacity(height);
124        for row in 0..height {
125            let lhs_line = self.line_at(row, baseline);
126            let rhs_line = rhs.line_at(row, baseline);
127            lines.push(format!("{lhs_line}{rhs_line}"));
128        }
129        Self::new(lines, baseline)
130    }
131
132    fn append_baseline_suffix(mut self, suffix: &str) -> Self {
133        if let Some(line) = self.lines.get_mut(self.baseline) {
134            line.push_str(suffix);
135        }
136        Self::new(self.lines, self.baseline)
137    }
138
139    fn line_at(&self, row: usize, target_baseline: usize) -> String {
140        let source_row = if row >= target_baseline {
141            self.baseline.checked_add(row.saturating_sub(target_baseline))
142        } else {
143            self.baseline.checked_sub(target_baseline.saturating_sub(row))
144        };
145        source_row
146            .and_then(|idx| self.lines.get(idx))
147            .map_or_else(|| " ".repeat(self.width), ToOwned::to_owned)
148    }
149
150    fn single_line_text(&self) -> Option<&str> {
151        (self.lines.len() == 1).then(|| self.lines.first().map(String::as_str))?
152    }
153}
154
155fn render_body(body: &MathBody<'_>) -> Result<Grid, LatexError> {
156    let parts = body.elements.iter().map(render_node).collect::<Result<Vec<_>, _>>()?;
157    Ok(hcat_all(&parts))
158}
159
160fn render_node(node: &Node<'_>) -> Result<Grid, LatexError> {
161    match &node.kind {
162        NodeKind::Atom(atom) => render_atom(*atom, node.span),
163        NodeKind::Group(group) => render_group(group),
164        NodeKind::Fraction(fraction) => render_fraction(fraction, node.span),
165        NodeKind::Sqrt(sqrt) => render_sqrt(sqrt, node.span),
166        NodeKind::Accent(accent) => render_accent(accent, node.span),
167        NodeKind::Script(script) => render_script(script, node.span),
168        NodeKind::Delimited(delimited) => render_delimited(delimited),
169        NodeKind::Environment(environment) => render_environment(environment, node.span),
170    }
171}
172
173fn render_atom(atom: Atom<'_>, span: SourceSpan) -> Result<Grid, LatexError> {
174    match atom {
175        Atom::Identifier(text) | Atom::Number(text) | Atom::Punctuation(text) | Atom::UnicodeSymbol(text) => {
176            Ok(Grid::text(text))
177        }
178        Atom::ControlSymbol(text) => Ok(Grid::text(control_symbol_text(text))),
179        Atom::CommandSymbol(name) => latex_symbol(name)
180            .map(Grid::text)
181            .ok_or_else(|| unsupported(span, format!("unsupported TeX command `\\{name}`"))),
182        Atom::Delimiter(delimiter) => Ok(Grid::text(delimiter_text(delimiter))),
183    }
184}
185
186fn render_group(group: &Group<'_>) -> Result<Grid, LatexError> {
187    render_body(&group.body)
188}
189
190fn render_fraction(fraction: &Fraction<'_>, span: SourceSpan) -> Result<Grid, LatexError> {
191    let numerator = render_group(&fraction.numerator)?;
192    let denominator = render_group(&fraction.denominator)?;
193    let width = numerator.width.max(denominator.width).max(1);
194    let mut lines = Vec::new();
195    lines.extend(numerator.lines.iter().map(|line| center(line, width)));
196    let baseline = lines.len();
197    lines.push("─".repeat(width));
198    lines.extend(denominator.lines.iter().map(|line| center(line, width)));
199    let grid = Grid::new(lines, baseline);
200    if grid.width == 0 {
201        Err(unsupported(span, "empty fraction cannot be rendered"))
202    } else {
203        Ok(grid)
204    }
205}
206
207fn render_sqrt(sqrt: &Sqrt<'_>, span: SourceSpan) -> Result<Grid, LatexError> {
208    let radicand = render_group(&sqrt.body)?;
209    let root = match &sqrt.degree {
210        Some(degree) => {
211            let degree = render_group(degree)?;
212            let Some(text) = degree.single_line_text() else {
213                return Err(unsupported(span, "multi-line root degree cannot be rendered"));
214            };
215            let rendered = unicode_super_str(text)
216                .ok_or_else(|| unsupported(span, "root degree has no Unicode superscript form"))?;
217            format!("{rendered}√")
218        }
219        None => "√".to_owned(),
220    };
221    Ok(prefix_baseline(&root, &radicand))
222}
223
224fn render_accent(accent: &Accent<'_>, span: SourceSpan) -> Result<Grid, LatexError> {
225    let body = render_group(&accent.body)?;
226    let Some(text) = body.single_line_text() else {
227        return Err(unsupported(span, "multi-line accent body cannot be rendered"));
228    };
229    let mark = match accent.accent {
230        AccentKind::Hat => '\u{302}',
231        AccentKind::Bar => '\u{305}',
232        AccentKind::Tilde => '\u{303}',
233        AccentKind::Vec => '\u{20d7}',
234    };
235    let mut out = String::new();
236    for ch in text.chars() {
237        out.push(ch);
238        if !ch.is_whitespace() {
239            out.push(mark);
240        }
241    }
242    Ok(Grid::text(out))
243}
244
245fn render_script(script: &Script<'_>, span: SourceSpan) -> Result<Grid, LatexError> {
246    let mut base = render_script_base(&script.base)?;
247    if let Some(superscript) = &script.superscript {
248        let text = render_script_argument(superscript)?;
249        let rendered =
250            unicode_super_str(&text).ok_or_else(|| unsupported(span, format!("unsupported superscript {text:?}")))?;
251        base = base.append_baseline_suffix(&rendered);
252    }
253    if let Some(subscript) = &script.subscript {
254        let text = render_script_argument(subscript)?;
255        let rendered =
256            unicode_sub_str(&text).ok_or_else(|| unsupported(span, format!("unsupported subscript {text:?}")))?;
257        base = base.append_baseline_suffix(&rendered);
258    }
259    Ok(base)
260}
261
262fn render_script_base(base: &ScriptBase<'_>) -> Result<Grid, LatexError> {
263    match base {
264        ScriptBase::Atom(atom) => render_atom(*atom, SourceSpan::new(0, 0)),
265        ScriptBase::Group(group) => render_group(group),
266        ScriptBase::Fraction(fraction) => render_fraction(fraction, SourceSpan::new(0, 0)),
267        ScriptBase::Sqrt(sqrt) => render_sqrt(sqrt, SourceSpan::new(0, 0)),
268        ScriptBase::Accent(accent) => render_accent(accent, SourceSpan::new(0, 0)),
269        ScriptBase::Delimited(delimited) => render_delimited(delimited),
270    }
271}
272
273fn render_script_argument(argument: &ScriptArgument<'_>) -> Result<String, LatexError> {
274    let rendered = match argument {
275        ScriptArgument::Atom { atom, span } => render_atom(*atom, *span)?,
276        ScriptArgument::Group(group) => render_group(group)?,
277    };
278    rendered
279        .single_line_text()
280        .map(ToOwned::to_owned)
281        .ok_or_else(|| unsupported(argument.span(), "multi-line script cannot be rendered"))
282}
283
284fn render_delimited(delimited: &Delimited<'_>) -> Result<Grid, LatexError> {
285    let opener = Grid::text(delimiter_text(delimited.opener));
286    let body = render_body(&delimited.body)?;
287    let closer = Grid::text(delimiter_text(delimited.closer));
288    Ok(opener.hcat(&body).hcat(&closer))
289}
290
291fn render_environment(environment: &Environment<'_>, span: SourceSpan) -> Result<Grid, LatexError> {
292    match environment.name {
293        "matrix" | "pmatrix" | "bmatrix" | "Bmatrix" | "vmatrix" | "Vmatrix" | "cases" | "array" => {
294            render_matrix_like(environment, span)
295        }
296        "aligned" | "split" => render_matrix_rows(&environment.rows, span),
297        name => Err(unsupported(span, format!("unsupported environment `{name}`"))),
298    }
299}
300
301fn render_matrix_like(environment: &Environment<'_>, span: SourceSpan) -> Result<Grid, LatexError> {
302    let rows = render_matrix_rows(&environment.rows, span)?;
303    let (left, right) = match environment.name {
304        "pmatrix" => ("(", ")"),
305        "bmatrix" => ("[", "]"),
306        "Bmatrix" => ("{", "}"),
307        "vmatrix" => ("|", "|"),
308        "Vmatrix" => ("‖", "‖"),
309        "cases" => ("{", ""),
310        _ => ("", ""),
311    };
312    Ok(wrap_rows(left, rows, right))
313}
314
315fn render_matrix_rows(rows: &[Row<'_>], span: SourceSpan) -> Result<Grid, LatexError> {
316    if rows.is_empty() {
317        return Ok(Grid::empty());
318    }
319    let rendered = rows.iter().map(render_row).collect::<Result<Vec<_>, _>>()?;
320    if rendered
321        .iter()
322        .flat_map(|row| row.iter())
323        .any(|cell| cell.height() != 1)
324    {
325        return Err(unsupported(span, "multi-line matrix cells cannot be rendered"));
326    }
327    let columns = rendered.iter().map(Vec::len).max().unwrap_or(0);
328    let mut widths = vec![0usize; columns];
329    for row in &rendered {
330        for (idx, cell) in row.iter().enumerate() {
331            if let Some(width) = widths.get_mut(idx) {
332                *width = (*width).max(cell.width);
333            }
334        }
335    }
336    let mut lines = Vec::with_capacity(rendered.len());
337    for row in rendered {
338        let mut parts = Vec::with_capacity(columns);
339        for idx in 0..columns {
340            let cell = row.get(idx).map_or_else(Grid::empty, Clone::clone);
341            let width = widths.get(idx).copied().unwrap_or(0);
342            parts.push(center(cell.single_line_text().unwrap_or(""), width));
343        }
344        lines.push(parts.join("  "));
345    }
346    let baseline = lines.len() / 2;
347    Ok(Grid::new(lines, baseline))
348}
349
350fn render_row(row: &Row<'_>) -> Result<Vec<Grid>, LatexError> {
351    row.cells
352        .iter()
353        .map(|cell| render_body(&cell.body))
354        .collect::<Result<Vec<_>, _>>()
355}
356
357fn hcat_all(parts: &[Grid]) -> Grid {
358    parts.iter().fold(Grid::empty(), |acc, part| acc.hcat(part))
359}
360
361fn prefix_baseline(prefix: &str, rhs: &Grid) -> Grid {
362    let prefix_width = UnicodeWidthStr::width(prefix);
363    let mut lines = Vec::with_capacity(rhs.lines.len());
364    for (idx, line) in rhs.lines.iter().enumerate() {
365        if idx == rhs.baseline {
366            lines.push(format!("{prefix}{line}"));
367        } else {
368            lines.push(format!("{}{line}", " ".repeat(prefix_width)));
369        }
370    }
371    Grid::new(lines, rhs.baseline)
372}
373
374fn wrap_rows(left: &str, body: Grid, right: &str) -> Grid {
375    if left.is_empty() && right.is_empty() {
376        return body;
377    }
378    let lines = body
379        .lines
380        .iter()
381        .map(|line| format!("{left}{line}{right}"))
382        .collect::<Vec<_>>();
383    Grid::new(lines, body.baseline)
384}
385
386fn delimiter_text(delimiter: Delimiter<'_>) -> &str {
387    match delimiter {
388        Delimiter::Source(".") => "",
389        Delimiter::Source(source) => source,
390    }
391}
392
393fn control_symbol_text(source: &str) -> &str {
394    source.strip_prefix('\\').unwrap_or(source)
395}
396
397fn pad_to_width(line: &str, width: usize) -> String {
398    let current = UnicodeWidthStr::width(line);
399    if current >= width {
400        line.to_owned()
401    } else {
402        format!("{line}{}", " ".repeat(width.saturating_sub(current)))
403    }
404}
405
406fn center(line: &str, width: usize) -> String {
407    let current = UnicodeWidthStr::width(line);
408    if current >= width {
409        return line.to_owned();
410    }
411    let pad = width.saturating_sub(current);
412    let left = pad / 2;
413    let right = pad.saturating_sub(left);
414    format!("{}{}{}", " ".repeat(left), line, " ".repeat(right))
415}
416
417fn first_parse_error(diagnostics: Vec<ParseDiagnostic>) -> LatexError {
418    diagnostics.into_iter().next().map_or_else(
419        || LatexError::new(LatexErrorKind::Syntax, SourceSpan::new(0, 0), "invalid TeX math"),
420        |diagnostic| parse_error(&diagnostic),
421    )
422}
423
424fn parse_error(diagnostic: &ParseDiagnostic) -> LatexError {
425    let kind = match diagnostic.kind() {
426        ParseDiagnosticKind::Lexical => LatexErrorKind::Lexical,
427        ParseDiagnosticKind::UnsupportedCommand | ParseDiagnosticKind::UnsupportedEnvironment => {
428            LatexErrorKind::Unsupported
429        }
430        ParseDiagnosticKind::UnexpectedToken
431        | ParseDiagnosticKind::MissingRequiredArgument
432        | ParseDiagnosticKind::UnbalancedGroup
433        | ParseDiagnosticKind::UnmatchedEnvironmentEnd
434        | ParseDiagnosticKind::ScriptWithoutBase
435        | ParseDiagnosticKind::DuplicateSubscript
436        | ParseDiagnosticKind::DuplicateSuperscript => LatexErrorKind::Syntax,
437    };
438    LatexError::new(kind, diagnostic.span(), diagnostic.message())
439}
440
441fn unsupported(span: SourceSpan, message: impl Into<String>) -> LatexError {
442    LatexError::new(LatexErrorKind::Unsupported, span, message)
443}
444
445#[cfg(test)]
446mod tests {
447    #![allow(
448        clippy::expect_used,
449        clippy::indexing_slicing,
450        clippy::literal_string_with_formatting_args,
451        reason = "layout tests inspect concrete grid output"
452    )]
453
454    use super::*;
455
456    fn text(source: &str) -> String {
457        render_unicode_math(source).expect("math renders").as_text()
458    }
459
460    #[test]
461    fn grid_constructor_normalises_ragged_lines() {
462        let grid = Grid::new(vec!["x".to_owned(), "alpha".to_owned()], 9);
463
464        assert_eq!(grid.width, 5);
465        assert_eq!(grid.baseline, 1);
466        assert_eq!(UnicodeWidthStr::width(grid.lines[0].as_str()), 5);
467        assert_eq!(UnicodeWidthStr::width(grid.lines[1].as_str()), 5);
468    }
469
470    #[test]
471    fn simple_symbols_and_scripts_render() {
472        assert_eq!(text(r"\alpha_i"), "αᵢ");
473        assert_eq!(text("x^{2}"), "x²");
474        assert_eq!(text("x^{-1}"), "x⁻¹");
475    }
476
477    #[test]
478    fn fractions_and_nested_fractions_render_with_stable_baselines() {
479        let rendered = render_unicode_math(r"\frac{a}{b}").expect("fraction renders");
480        assert_eq!(rendered.lines(), &["a".to_owned(), "─".to_owned(), "b".to_owned()]);
481        assert_eq!(rendered.baseline(), 1);
482        assert_eq!(rendered.width(), 1);
483
484        let nested = render_unicode_math(r"\frac{\frac{a}{b}}{c}").expect("nested fraction renders");
485        assert_eq!(
486            nested.lines(),
487            &[
488                "a".to_owned(),
489                "─".to_owned(),
490                "b".to_owned(),
491                "─".to_owned(),
492                "c".to_owned()
493            ]
494        );
495        assert_eq!(nested.baseline(), 3);
496    }
497
498    #[test]
499    fn square_roots_and_root_degrees_render() {
500        assert_eq!(text(r"\sqrt{x}"), "√x");
501        assert_eq!(text(r"\sqrt[n]{x}"), "ⁿ√x");
502    }
503
504    #[test]
505    fn accents_render_with_combining_marks() {
506        assert_eq!(text(r"\hat{x}"), "x\u{302}");
507        assert_eq!(text(r"\vec{v}"), "v\u{20d7}");
508    }
509
510    #[test]
511    fn delimiters_render_around_body() {
512        assert_eq!(text(r"\left( x \right)"), "(x)");
513        assert_eq!(text(r"\left. x \right|"), "x|");
514    }
515
516    #[test]
517    fn matrices_and_cases_render_as_grid_text() {
518        let matrix = render_unicode_math(r"\begin{pmatrix}a & bb \\ c & d\end{pmatrix}").expect("matrix renders");
519        assert_eq!(matrix.lines(), &["(a  bb)".to_owned(), "(c  d )".to_owned()]);
520        assert_eq!(matrix.baseline(), 1);
521
522        let cases = render_unicode_math(r"\begin{cases}x & y \\ z & w\end{cases}").expect("cases renders");
523        assert_eq!(cases.lines(), &["{x  y".to_owned(), "{z  w".to_owned()]);
524    }
525
526    #[test]
527    fn unsupported_constructs_return_typed_errors() {
528        let err = render_unicode_math(r"\color{red}{x}").expect_err("color is unsupported");
529        assert_eq!(err.kind(), &LatexErrorKind::Unsupported);
530
531        let err = render_unicode_math(r"\frac{a}").expect_err("fraction is malformed");
532        assert_eq!(err.kind(), &LatexErrorKind::Syntax);
533    }
534}