Skip to main content

mdwright_math/
normalise.rs

1//! Pure string-to-string math-body normalisations.
2//!
3//! Formatter canonicalisation applies these transforms to rendered
4//! output bytes.
5//!
6//! Every function in this module takes a body slice and returns an
7//! owned `String`. The opener / closer reconstruction lives in the
8//! canonicalise caller — these helpers are pure body transforms.
9//!
10//! Brace-balance probing is shared with [`super::scan`] so the
11//! lint rule, the scanner, and the canonicalise gate agree on what
12//! "balanced" means; see [`body_braces_balanced`].
13
14use unicode_width::UnicodeWidthStr;
15
16/// Walk `body` and confirm `{` / `}` balance.
17///
18/// `\{` and `\}` are escapes and do not count.
19///
20/// # Errors
21///
22/// Returns the byte offset of the first offending byte: either an
23/// unmatched `}` or the body start when the body ends mid-group.
24pub fn body_braces_balanced(body: &str) -> Result<(), usize> {
25    let bytes = body.as_bytes();
26    let mut depth: i64 = 0;
27    let mut i = 0usize;
28    while let Some(b) = bytes.get(i).copied() {
29        if b == b'\\' {
30            if matches!(bytes.get(i.saturating_add(1)).copied(), Some(b'{' | b'}')) {
31                i = i.saturating_add(2);
32                continue;
33            }
34        } else if b == b'{' {
35            depth = depth.saturating_add(1);
36        } else if b == b'}' {
37            depth = depth.saturating_sub(1);
38            if depth < 0 {
39                return Err(i);
40            }
41        }
42        i = i.saturating_add(1);
43    }
44    if depth == 0 { Ok(()) } else { Err(0) }
45}
46
47/// Lay out an aligning-environment body: split on `\\` rows, then on
48/// `&` cells, pad each cell to its column's max display width, emit
49/// with `" & "` separators and a `" \\\n"` row break.
50pub fn align_env_body(body: &str) -> String {
51    let raw_rows = split_rows(body);
52    if raw_rows.is_empty() {
53        return String::new();
54    }
55    let rows: Vec<Vec<String>> = raw_rows
56        .iter()
57        .map(|row| split_cells(row).into_iter().map(|c| c.trim().to_owned()).collect())
58        .collect();
59
60    let n_cols = rows.iter().map(Vec::len).max().unwrap_or(0);
61    let mut widths: Vec<usize> = vec![0; n_cols];
62    for row in &rows {
63        for (j, cell) in row.iter().enumerate() {
64            let w = UnicodeWidthStr::width(cell.as_str());
65            if let Some(slot) = widths.get_mut(j)
66                && w > *slot
67            {
68                *slot = w;
69            }
70        }
71    }
72
73    let mut out = String::with_capacity(body.len());
74    for (i, row) in rows.iter().enumerate() {
75        if i > 0 {
76            out.push_str(" \\\\\n");
77        }
78        let last_j = row.len().saturating_sub(1);
79        for (j, cell) in row.iter().enumerate() {
80            if j > 0 {
81                out.push_str(" & ");
82            }
83            out.push_str(cell);
84            if j < last_j {
85                let w = UnicodeWidthStr::width(cell.as_str());
86                let pad = widths.get(j).copied().unwrap_or(0).saturating_sub(w);
87                for _ in 0..pad {
88                    out.push(' ');
89                }
90            }
91        }
92    }
93    out
94}
95
96/// Split an environment body on unescaped `\\` row separators.
97fn split_rows(body: &str) -> Vec<&str> {
98    let bytes = body.as_bytes();
99    let mut rows: Vec<&str> = Vec::new();
100    let mut last = 0usize;
101    let mut i = 0usize;
102    while let Some(b) = bytes.get(i).copied() {
103        let b2 = bytes.get(i.saturating_add(1)).copied();
104        if b == b'\\' && b2 == Some(b'\\') {
105            let prev = i.checked_sub(1).and_then(|p| bytes.get(p).copied());
106            if prev != Some(b'\\') {
107                let segment = body.get(last..i).unwrap_or("").trim_matches('\n');
108                rows.push(segment);
109                last = i.saturating_add(2);
110                i = last;
111                continue;
112            }
113        }
114        i = i.saturating_add(1);
115    }
116    let tail = body.get(last..).unwrap_or("").trim_matches('\n');
117    if !tail.is_empty() {
118        rows.push(tail);
119    }
120    rows
121}
122
123/// Split a row on unescaped `&` column separators.
124fn split_cells(row: &str) -> Vec<&str> {
125    let bytes = row.as_bytes();
126    let mut cells: Vec<&str> = Vec::new();
127    let mut last = 0usize;
128    let mut i = 0usize;
129    while let Some(b) = bytes.get(i).copied() {
130        let prev = i.checked_sub(1).and_then(|p| bytes.get(p).copied());
131        if b == b'&' && prev != Some(b'\\') {
132            cells.push(row.get(last..i).unwrap_or(""));
133            last = i.saturating_add(1);
134        }
135        i = i.saturating_add(1);
136    }
137    cells.push(row.get(last..).unwrap_or(""));
138    cells
139}
140
141#[cfg(test)]
142#[allow(clippy::indexing_slicing)]
143mod tests {
144    use super::*;
145
146    #[test]
147    fn body_braces_balanced_accepts_matched() {
148        assert!(body_braces_balanced("a{b{c}d}e").is_ok());
149    }
150
151    #[test]
152    fn body_braces_balanced_rejects_unmatched() {
153        assert!(body_braces_balanced("a}").is_err());
154        assert!(body_braces_balanced("a{b").is_err());
155    }
156
157    #[test]
158    fn align_pads_columns() {
159        let out = align_env_body("x &= a + b \\\\ longvar &= cc");
160        let lines: Vec<&str> = out.lines().collect();
161        assert_eq!(lines.len(), 2);
162        assert!(lines[0].starts_with("x       "), "got {:?}", lines[0]);
163        assert!(lines[1].starts_with("longvar"), "got {:?}", lines[1]);
164    }
165}