Skip to main content

mdwright_math/
span.rs

1//! Delimiter classification, recogniser error types, and per-region
2//! tagged spans.
3//!
4//! The recogniser ([`super::scan::scan_math_regions`]) produces one
5//! [`super::MathRegion`] per recognised math region, each tagged with
6//! a [`MathSpan`] that records *which* delimiter family or environment
7//! introduced it plus the body byte range.
8//!
9//! Unmatched openers and brace-imbalanced bodies become [`MathError`]
10//! values so the lint rules `math/unbalanced-delim`,
11//! `math/unbalanced-env`, and `math/unbalanced-braces` can surface a
12//! useful diagnostic without aborting the scan.
13
14use std::borrow::Cow;
15use std::ops::Range;
16
17use super::env::EnvKind;
18
19/// One of the four primitive math delimiter families.
20#[derive(Copy, Clone, Debug, PartialEq, Eq)]
21pub enum AnyDelim {
22    /// `\(` / `\)`
23    Paren,
24    /// `\[` / `\]`
25    Bracket,
26    /// `$` / `$`
27    Dollar,
28    /// `$$` / `$$`
29    Dollar2,
30}
31
32impl AnyDelim {
33    pub const fn is_display(self) -> bool {
34        matches!(self, Self::Bracket | Self::Dollar2)
35    }
36
37    pub const fn open(self) -> &'static str {
38        match self {
39            Self::Paren => r"\(",
40            Self::Bracket => r"\[",
41            Self::Dollar => "$",
42            Self::Dollar2 => "$$",
43        }
44    }
45
46    pub const fn close(self) -> &'static str {
47        match self {
48            Self::Paren => r"\)",
49            Self::Bracket => r"\]",
50            Self::Dollar => "$",
51            Self::Dollar2 => "$$",
52        }
53    }
54}
55
56/// Inline delimiter pair carried on [`MathSpan::Inline`].
57#[derive(Copy, Clone, Debug, PartialEq, Eq)]
58pub enum InlineDelim {
59    /// `\(` / `\)`
60    Paren,
61    /// `$` / `$`
62    Dollar,
63}
64
65/// Display delimiter pair carried on [`MathSpan::Display`].
66#[derive(Copy, Clone, Debug, PartialEq, Eq)]
67pub enum DisplayDelim {
68    /// `\[` / `\]`
69    Bracket,
70    /// `$$` / `$$`
71    Dollar2,
72}
73
74/// Per-region classification produced by the scanner.
75///
76/// Each variant carries the body as a [`MathBody`] — a hidden
77/// abstraction that yields clean math content regardless of where the
78/// math appeared in the source (top-level, blockquote, list item).
79/// Callers read the body through [`MathBody::as_str`].
80#[derive(Clone, Debug, PartialEq, Eq)]
81pub enum MathSpan {
82    Inline { delim: InlineDelim, body: MathBody },
83    Display { delim: DisplayDelim, body: MathBody },
84    Environment { env: EnvKind, body: MathBody },
85}
86
87impl MathSpan {
88    /// Body of this span. Provided so callers do not have to
89    /// destructure the enum to read the body.
90    pub fn body(&self) -> &MathBody {
91        match self {
92            Self::Inline { body, .. } | Self::Display { body, .. } | Self::Environment { body, .. } => body,
93        }
94    }
95}
96
97/// Math-body content with container prefixes hidden.
98///
99/// `range` is the outer body byte range (between the delimiters, in
100/// source bytes). `transparent` lists byte ranges intersecting the
101/// body that the consumer should treat as if they do not exist —
102/// blockquote `>` markers and list-item continuation indentation
103/// captured by the recogniser at scan time.
104///
105/// The abstraction lets callers consume math content without knowing
106/// whether the region happened to be nested in a container. The
107/// common case (no container) keeps the [`Cow::Borrowed`] fast path;
108/// container-nested math allocates one `String` per region.
109#[derive(Clone, Debug, PartialEq, Eq)]
110pub struct MathBody {
111    range: Range<usize>,
112    /// Sorted, non-overlapping ranges that intersect `range`. Stored
113    /// unclipped — `as_str` and `clean_offset_to_source` clip against
114    /// `range` on every use.
115    transparent: Box<[Range<usize>]>,
116}
117
118impl MathBody {
119    pub fn new(range: Range<usize>, transparent: Box<[Range<usize>]>) -> Self {
120        Self { range, transparent }
121    }
122
123    /// Source byte range of the delimiter-excluded math body.
124    #[must_use]
125    pub fn source_range(&self) -> Range<usize> {
126        self.range.clone()
127    }
128
129    /// Materialised body content with transparent runs removed.
130    /// Borrows the source slice when no runs intersect; allocates a
131    /// new `String` only when stripping is required.
132    pub fn as_str<'src>(&self, source: &'src str) -> Cow<'src, str> {
133        if self.transparent.is_empty() {
134            return Cow::Borrowed(source.get(self.range.clone()).unwrap_or(""));
135        }
136        let mut out = String::with_capacity(self.range.end.saturating_sub(self.range.start));
137        let mut cursor = self.range.start;
138        for run in &self.transparent {
139            let run_start = run.start.max(self.range.start);
140            let run_end = run.end.min(self.range.end);
141            if run_start >= run_end {
142                continue;
143            }
144            if cursor < run_start
145                && let Some(slice) = source.get(cursor..run_start)
146            {
147                out.push_str(slice);
148            }
149            cursor = run_end;
150        }
151        if cursor < self.range.end
152            && let Some(slice) = source.get(cursor..self.range.end)
153        {
154            out.push_str(slice);
155        }
156        Cow::Owned(out)
157    }
158
159    /// Map a byte offset inside the clean (stripped) body back to a
160    /// source-absolute byte. Walks the same prefix iteration
161    /// [`Self::as_str`] uses, so an offset produced by a check on the
162    /// clean body resolves to the correct source position even when
163    /// container prefixes have been stripped.
164    pub fn clean_offset_to_source(&self, clean_off: usize) -> usize {
165        if self.transparent.is_empty() {
166            return self.range.start.saturating_add(clean_off);
167        }
168        let mut consumed = 0usize;
169        let mut cursor = self.range.start;
170        for run in &self.transparent {
171            let run_start = run.start.max(self.range.start);
172            let run_end = run.end.min(self.range.end);
173            if run_start >= run_end {
174                continue;
175            }
176            let slice_len = run_start.saturating_sub(cursor);
177            if clean_off < consumed.saturating_add(slice_len) {
178                return cursor.saturating_add(clean_off.saturating_sub(consumed));
179            }
180            consumed = consumed.saturating_add(slice_len);
181            cursor = run_end;
182        }
183        cursor.saturating_add(clean_off.saturating_sub(consumed))
184    }
185}
186
187/// An unrecoverable shape the recogniser saw. The scanner never
188/// panics; it accumulates these and keeps scanning the rest of the
189/// document.
190//
191// The `Unbalanced` prefix is part of the user-facing diagnostic
192// vocabulary (it mirrors the rule names `math/unbalanced-delim`,
193// `math/unbalanced-env`, `math/unbalanced-braces`), so the
194// shared-prefix nudge does not apply here.
195#[allow(clippy::enum_variant_names)]
196#[derive(Clone, Debug)]
197pub enum MathError {
198    /// `\[`, `\(`, `$$`, or `$` with no matching close.
199    UnbalancedDelim {
200        delim: AnyDelim,
201        /// Byte range of the opening delimiter token.
202        range: Range<usize>,
203    },
204    /// `\begin{name}` with no matching `\end{name}` at the same depth.
205    UnbalancedEnv {
206        name: String,
207        /// Byte range covering `\begin{name}` itself.
208        range: Range<usize>,
209    },
210    /// `{` and `}` inside a recognised math body do not balance. The
211    /// region still scans because markers are balanced, but body
212    /// normalisation is skipped.
213    UnbalancedBraces {
214        /// Byte offset (absolute, into the source) of the offending
215        /// brace — either an unmatched `}` or the start of the body
216        /// when the document ends mid-group.
217        offset: usize,
218        /// Byte range of the math region whose body failed validation.
219        region: Range<usize>,
220    },
221}