mdwright_math/span.rs
1//! Delimiter classification, recogniser error types, and per-region
2//! tagged spans.
3//!
4//! The recogniser ([`super::scan::scan_math_regions`]) produces one
5//! [`super::MathRegion`] per recognised math region, each tagged with
6//! a [`MathSpan`] that records *which* delimiter family or environment
7//! introduced it plus the body byte range.
8//!
9//! Unmatched openers and brace-imbalanced bodies become [`MathError`]
10//! values so the lint rules `math/unbalanced-delim`,
11//! `math/unbalanced-env`, and `math/unbalanced-braces` can surface a
12//! useful diagnostic without aborting the scan.
13
14use std::borrow::Cow;
15use std::ops::Range;
16
17use super::env::EnvKind;
18
19/// One of the four primitive math delimiter families.
20#[derive(Copy, Clone, Debug, PartialEq, Eq)]
21pub enum AnyDelim {
22 /// `\(` / `\)`
23 Paren,
24 /// `\[` / `\]`
25 Bracket,
26 /// `$` / `$`
27 Dollar,
28 /// `$$` / `$$`
29 Dollar2,
30}
31
32impl AnyDelim {
33 pub const fn is_display(self) -> bool {
34 matches!(self, Self::Bracket | Self::Dollar2)
35 }
36
37 pub const fn open(self) -> &'static str {
38 match self {
39 Self::Paren => r"\(",
40 Self::Bracket => r"\[",
41 Self::Dollar => "$",
42 Self::Dollar2 => "$$",
43 }
44 }
45
46 pub const fn close(self) -> &'static str {
47 match self {
48 Self::Paren => r"\)",
49 Self::Bracket => r"\]",
50 Self::Dollar => "$",
51 Self::Dollar2 => "$$",
52 }
53 }
54}
55
56/// Inline delimiter pair carried on [`MathSpan::Inline`].
57#[derive(Copy, Clone, Debug, PartialEq, Eq)]
58pub enum InlineDelim {
59 /// `\(` / `\)`
60 Paren,
61 /// `$` / `$`
62 Dollar,
63}
64
65/// Display delimiter pair carried on [`MathSpan::Display`].
66#[derive(Copy, Clone, Debug, PartialEq, Eq)]
67pub enum DisplayDelim {
68 /// `\[` / `\]`
69 Bracket,
70 /// `$$` / `$$`
71 Dollar2,
72}
73
74/// Per-region classification produced by the scanner.
75///
76/// Each variant carries the body as a [`MathBody`] — a hidden
77/// abstraction that yields clean math content regardless of where the
78/// math appeared in the source (top-level, blockquote, list item).
79/// Callers read the body through [`MathBody::as_str`].
80#[derive(Clone, Debug, PartialEq, Eq)]
81pub enum MathSpan {
82 Inline { delim: InlineDelim, body: MathBody },
83 Display { delim: DisplayDelim, body: MathBody },
84 Environment { env: EnvKind, body: MathBody },
85}
86
87impl MathSpan {
88 /// Body of this span. Provided so callers do not have to
89 /// destructure the enum to read the body.
90 pub fn body(&self) -> &MathBody {
91 match self {
92 Self::Inline { body, .. } | Self::Display { body, .. } | Self::Environment { body, .. } => body,
93 }
94 }
95}
96
97/// Math-body content with container prefixes hidden.
98///
99/// `range` is the outer body byte range (between the delimiters, in
100/// source bytes). `transparent` lists byte ranges intersecting the
101/// body that the consumer should treat as if they do not exist —
102/// blockquote `>` markers and list-item continuation indentation
103/// captured by the recogniser at scan time.
104///
105/// The abstraction lets callers consume math content without knowing
106/// whether the region happened to be nested in a container. The
107/// common case (no container) keeps the [`Cow::Borrowed`] fast path;
108/// container-nested math allocates one `String` per region.
109#[derive(Clone, Debug, PartialEq, Eq)]
110pub struct MathBody {
111 range: Range<usize>,
112 /// Sorted, non-overlapping ranges that intersect `range`. Stored
113 /// unclipped — `as_str` and `clean_offset_to_source` clip against
114 /// `range` on every use.
115 transparent: Box<[Range<usize>]>,
116}
117
118impl MathBody {
119 pub fn new(range: Range<usize>, transparent: Box<[Range<usize>]>) -> Self {
120 Self { range, transparent }
121 }
122
123 /// Source byte range of the delimiter-excluded math body.
124 #[must_use]
125 pub fn source_range(&self) -> Range<usize> {
126 self.range.clone()
127 }
128
129 /// Materialised body content with transparent runs removed.
130 /// Borrows the source slice when no runs intersect; allocates a
131 /// new `String` only when stripping is required.
132 pub fn as_str<'src>(&self, source: &'src str) -> Cow<'src, str> {
133 if self.transparent.is_empty() {
134 return Cow::Borrowed(source.get(self.range.clone()).unwrap_or(""));
135 }
136 let mut out = String::with_capacity(self.range.end.saturating_sub(self.range.start));
137 let mut cursor = self.range.start;
138 for run in &self.transparent {
139 let run_start = run.start.max(self.range.start);
140 let run_end = run.end.min(self.range.end);
141 if run_start >= run_end {
142 continue;
143 }
144 if cursor < run_start
145 && let Some(slice) = source.get(cursor..run_start)
146 {
147 out.push_str(slice);
148 }
149 cursor = run_end;
150 }
151 if cursor < self.range.end
152 && let Some(slice) = source.get(cursor..self.range.end)
153 {
154 out.push_str(slice);
155 }
156 Cow::Owned(out)
157 }
158
159 /// Map a byte offset inside the clean (stripped) body back to a
160 /// source-absolute byte. Walks the same prefix iteration
161 /// [`Self::as_str`] uses, so an offset produced by a check on the
162 /// clean body resolves to the correct source position even when
163 /// container prefixes have been stripped.
164 pub fn clean_offset_to_source(&self, clean_off: usize) -> usize {
165 if self.transparent.is_empty() {
166 return self.range.start.saturating_add(clean_off);
167 }
168 let mut consumed = 0usize;
169 let mut cursor = self.range.start;
170 for run in &self.transparent {
171 let run_start = run.start.max(self.range.start);
172 let run_end = run.end.min(self.range.end);
173 if run_start >= run_end {
174 continue;
175 }
176 let slice_len = run_start.saturating_sub(cursor);
177 if clean_off < consumed.saturating_add(slice_len) {
178 return cursor.saturating_add(clean_off.saturating_sub(consumed));
179 }
180 consumed = consumed.saturating_add(slice_len);
181 cursor = run_end;
182 }
183 cursor.saturating_add(clean_off.saturating_sub(consumed))
184 }
185}
186
187/// An unrecoverable shape the recogniser saw. The scanner never
188/// panics; it accumulates these and keeps scanning the rest of the
189/// document.
190//
191// The `Unbalanced` prefix is part of the user-facing diagnostic
192// vocabulary (it mirrors the rule names `math/unbalanced-delim`,
193// `math/unbalanced-env`, `math/unbalanced-braces`), so the
194// shared-prefix nudge does not apply here.
195#[allow(clippy::enum_variant_names)]
196#[derive(Clone, Debug)]
197pub enum MathError {
198 /// `\[`, `\(`, `$$`, or `$` with no matching close.
199 UnbalancedDelim {
200 delim: AnyDelim,
201 /// Byte range of the opening delimiter token.
202 range: Range<usize>,
203 },
204 /// `\begin{name}` with no matching `\end{name}` at the same depth.
205 UnbalancedEnv {
206 name: String,
207 /// Byte range covering `\begin{name}` itself.
208 range: Range<usize>,
209 },
210 /// `{` and `}` inside a recognised math body do not balance. The
211 /// region still scans because markers are balanced, but body
212 /// normalisation is skipped.
213 UnbalancedBraces {
214 /// Byte offset (absolute, into the source) of the offending
215 /// brace — either an unmatched `}` or the start of the body
216 /// when the document ends mid-group.
217 offset: usize,
218 /// Byte range of the math region whose body failed validation.
219 region: Range<usize>,
220 },
221}