Skip to main content

panache_parser/parser/
math.rs

1//! In-tree TeX math content parser.
2//!
3//! Produces a lossless structural CST for the *content* between math
4//! delimiters (the delimiters themselves are owned by the host `INLINE_MATH` /
5//! `DISPLAY_MATH` nodes, see `parser/inlines/math.rs`). The returned subtree is
6//! rooted at [`SyntaxKind::MATH_CONTENT`] and is spliced directly into the host
7//! document tree, replacing the opaque content `TEXT` token.
8//!
9//! This is a *syntactic* parse, not a semantic one: TeX is a Turing-complete
10//! macro language, so we only capture structure that a formatter can safely act
11//! on — brace groups, `\begin`/`\end` environments, control sequences,
12//! alignment tabs (`&`), line breaks (`\\`), sub/superscript markers, comments,
13//! and whitespace. Everything else is an ordinary-atom run ([`MATH_TEXT`]).
14//!
15//! Two outputs, two channels — the same split YAML uses (see
16//! `parser/yaml/model.rs`) and that texlab uses for LaTeX:
17//!
18//! - the **CST is lossless and never fails** (`node.text() == content` for every
19//!   input; worst case is a single `MATH_TEXT` atom), and
20//! - **errors ride a side-channel** ([`MathParseReport::diagnostics`]) so the
21//!   linter (and by proxy the LSP) can surface unbalanced braces and mismatched
22//!   environments without the parser ever rejecting input.
23//!
24//! [`MATH_TEXT`]: SyntaxKind::MATH_TEXT
25
26use crate::parser::inlines::bookdown::try_parse_bookdown_equation_definition;
27use crate::syntax::SyntaxKind;
28use rowan::{GreenNode, GreenNodeBuilder};
29
30/// A non-fatal problem found while parsing math content. Byte offsets are
31/// relative to the math content string (the caller offsets them into host
32/// document coordinates when surfacing through the linter/LSP).
33#[derive(Debug, Clone, PartialEq, Eq)]
34pub struct MathDiagnostic {
35    pub code: &'static str,
36    pub message: &'static str,
37    pub byte_start: usize,
38    pub byte_end: usize,
39}
40
41/// The lossless CST plus any diagnostics gathered on the side-channel.
42#[derive(Debug, Clone)]
43pub struct MathParseReport {
44    pub green: GreenNode,
45    pub diagnostics: Vec<MathDiagnostic>,
46}
47
48/// Stable diagnostic codes for math content. Mirrors `yaml::diagnostic_codes`.
49pub mod diagnostic_codes {
50    /// A `{` was never closed before the end of the math content.
51    pub const UNCLOSED_GROUP: &str = "MATH_UNCLOSED_GROUP";
52    /// A `}` appeared with no matching `{`.
53    pub const UNEXPECTED_CLOSE_BRACE: &str = "MATH_UNEXPECTED_CLOSE_BRACE";
54    /// A `\begin{env}` was never closed by a matching `\end{env}`.
55    pub const UNCLOSED_ENVIRONMENT: &str = "MATH_UNCLOSED_ENVIRONMENT";
56    /// A `\begin{a}` was closed by `\end{b}` with a different name.
57    pub const MISMATCHED_ENVIRONMENT: &str = "MATH_MISMATCHED_ENVIRONMENT";
58    /// An `\end` appeared with no open `\begin`.
59    pub const UNEXPECTED_END: &str = "MATH_UNEXPECTED_END";
60}
61
62/// Flavor-/extension-dependent parsing options for math content. Default is
63/// all-off (pure TeX). The math grammar itself is flavor-agnostic; only
64/// constructs layered on top of TeX by a Markdown flavor live here.
65#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
66pub struct MathParseOptions {
67    /// Recognize bookdown equation labels `(\#eq:label)` as a single
68    /// [`SyntaxKind::MATH_EQUATION_LABEL`] token (gated on the
69    /// `bookdown_equation_references` extension).
70    pub bookdown_equation_labels: bool,
71}
72
73/// Parse math content into a lossless `MATH_CONTENT` green node, discarding
74/// diagnostics. `content` is the raw text between (but excluding) the math
75/// delimiters.
76pub fn parse_math_content(content: &str, opts: MathParseOptions) -> GreenNode {
77    parse_math_report(content, opts).green
78}
79
80/// Parse math content into a lossless CST plus a side-channel of diagnostics.
81pub fn parse_math_report(content: &str, opts: MathParseOptions) -> MathParseReport {
82    let mut parser = MathParser {
83        input: content,
84        pos: 0,
85        builder: GreenNodeBuilder::new(),
86        diagnostics: Vec::new(),
87        opts,
88    };
89    parser.builder.start_node(SyntaxKind::MATH_CONTENT.into());
90    parser.parse_elements(Ctx::Top);
91    parser.builder.finish_node();
92    MathParseReport {
93        green: parser.builder.finish(),
94        diagnostics: parser.diagnostics,
95    }
96}
97
98/// Parse context, controlling which delimiter ends the current element run.
99#[derive(Debug, Clone, Copy, PartialEq, Eq)]
100enum Ctx {
101    /// Top level of the math content.
102    Top,
103    /// Inside a `{ ... }` brace group; stops at the matching `}`.
104    Group,
105    /// Inside a `\begin{env} ... \end{env}` body; stops at `\end`.
106    Env,
107}
108
109struct MathParser<'a> {
110    input: &'a str,
111    pos: usize,
112    builder: GreenNodeBuilder<'static>,
113    diagnostics: Vec<MathDiagnostic>,
114    opts: MathParseOptions,
115}
116
117impl MathParser<'_> {
118    fn rest(&self) -> &str {
119        &self.input[self.pos..]
120    }
121
122    fn peek_char(&self) -> Option<char> {
123        self.rest().chars().next()
124    }
125
126    fn diagnose(&mut self, code: &'static str, message: &'static str, start: usize, end: usize) {
127        self.diagnostics.push(MathDiagnostic {
128            code,
129            message,
130            byte_start: start,
131            byte_end: end,
132        });
133    }
134
135    /// Emit a token of `len` bytes (from the current position) with `kind`.
136    fn bump_bytes(&mut self, len: usize, kind: SyntaxKind) {
137        let text = &self.input[self.pos..self.pos + len];
138        self.builder.token(kind.into(), text);
139        self.pos += len;
140    }
141
142    /// If the cursor is at a control word (`\` followed by ASCII letters or
143    /// `@`, matching TeX/texlab's control-word class), return that word
144    /// (without the backslash) without consuming anything.
145    fn peek_control_word(&self) -> Option<&str> {
146        let after = self.rest().strip_prefix('\\')?;
147        let len: usize = after
148            .bytes()
149            .take_while(|b| b.is_ascii_alphabetic() || *b == b'@')
150            .count();
151        if len == 0 { None } else { Some(&after[..len]) }
152    }
153
154    fn parse_elements(&mut self, ctx: Ctx) {
155        while let Some(c) = self.peek_char() {
156            match c {
157                '}' if ctx == Ctx::Group => break,
158                // A `}` outside any group is an unmatched close: keep it as a
159                // faithful (stray) close token and flag it on the side-channel.
160                '}' => {
161                    self.diagnose(
162                        diagnostic_codes::UNEXPECTED_CLOSE_BRACE,
163                        "unmatched closing brace `}`",
164                        self.pos,
165                        self.pos + 1,
166                    );
167                    self.bump_bytes(1, SyntaxKind::MATH_GROUP_CLOSE);
168                }
169                '\\' => {
170                    if self.rest().starts_with("\\\\") {
171                        self.bump_bytes(2, SyntaxKind::MATH_LINE_BREAK);
172                    } else if let Some(word) = self.peek_control_word() {
173                        match word {
174                            "begin" => self.parse_environment(),
175                            "end" if ctx == Ctx::Env => break,
176                            "end" => {
177                                // Stray `\end` with no open `\begin` at this level.
178                                self.diagnose(
179                                    diagnostic_codes::UNEXPECTED_END,
180                                    "`\\end` without a matching `\\begin`",
181                                    self.pos,
182                                    self.pos + 1 + word.len(),
183                                );
184                                self.parse_control_word();
185                            }
186                            _ => self.parse_control_word(),
187                        }
188                    } else {
189                        self.parse_control_symbol();
190                    }
191                }
192                '{' => self.parse_group(),
193                // Bookdown equation label `(\#eq:label)`, only when enabled.
194                // When off, `(` is not intercepted here and flows into an
195                // ordinary atom run, so the CST is unchanged for plain math.
196                '(' if self.opts.bookdown_equation_labels => match self.equation_label_len() {
197                    Some(len) => self.bump_bytes(len, SyntaxKind::MATH_EQUATION_LABEL),
198                    // A non-matching `(` is just one ordinary atom (it is a
199                    // text-run boundary only while the extension is on).
200                    None => self.bump_bytes(1, SyntaxKind::MATH_TEXT),
201                },
202                '&' => self.bump_bytes(1, SyntaxKind::MATH_ALIGN),
203                '^' | '_' => self.bump_bytes(1, SyntaxKind::MATH_SCRIPT),
204                // Operator atoms (`+ - * = < >`), one token per char. Class and
205                // precedence are *not* assigned here: TeX itself coerces a
206                // binary atom to ordinary by its neighbors (unary minus), so the
207                // class is a property of list position, owned by the formatter.
208                c if is_operator(c) => self.bump_bytes(1, SyntaxKind::MATH_OPERATOR),
209                '%' => self.parse_comment(),
210                ' ' | '\t' => self.parse_spaces(),
211                '\n' => self.bump_bytes(1, SyntaxKind::MATH_NEWLINE),
212                '\r' => {
213                    let len = if self.rest().starts_with("\r\n") {
214                        2
215                    } else {
216                        1
217                    };
218                    self.bump_bytes(len, SyntaxKind::MATH_NEWLINE);
219                }
220                _ => self.parse_text(),
221            }
222        }
223    }
224
225    /// `\begin{env} ... \end{env}`. Matching is done by recursion plus the
226    /// `Env` context; name mismatches and missing `\end` are reported on the
227    /// side-channel but never abort the parse.
228    fn parse_environment(&mut self) {
229        let begin_start = self.pos;
230        self.builder.start_node(SyntaxKind::MATH_ENVIRONMENT.into());
231        self.parse_control_word(); // \begin
232        let begin_name = self.parse_environment_name();
233        self.parse_elements(Ctx::Env);
234        if self.peek_control_word() == Some("end") {
235            let end_start = self.pos;
236            self.parse_control_word(); // \end
237            let end_name = self.parse_environment_name();
238            if begin_name != end_name {
239                self.diagnose(
240                    diagnostic_codes::MISMATCHED_ENVIRONMENT,
241                    "`\\end` name does not match the open `\\begin`",
242                    end_start,
243                    self.pos,
244                );
245            }
246        } else {
247            self.diagnose(
248                diagnostic_codes::UNCLOSED_ENVIRONMENT,
249                "`\\begin` without a matching `\\end`",
250                begin_start,
251                self.pos,
252            );
253        }
254        self.builder.finish_node();
255    }
256
257    /// Parse the `{name}` group following `\begin` / `\end` (if present) and
258    /// return the inner name text for matching. Empty when absent.
259    fn parse_environment_name(&mut self) -> String {
260        if self.peek_char() != Some('{') {
261            return String::new();
262        }
263        let open = self.pos;
264        self.parse_group();
265        // Inner text = the group span minus its braces.
266        self.input[open..self.pos]
267            .trim_start_matches('{')
268            .trim_end_matches('}')
269            .to_string()
270    }
271
272    fn parse_group(&mut self) {
273        let open = self.pos;
274        self.builder.start_node(SyntaxKind::MATH_GROUP.into());
275        self.bump_bytes(1, SyntaxKind::MATH_GROUP_OPEN); // {
276        self.parse_elements(Ctx::Group);
277        if self.peek_char() == Some('}') {
278            self.bump_bytes(1, SyntaxKind::MATH_GROUP_CLOSE); // }
279        } else {
280            self.diagnose(
281                diagnostic_codes::UNCLOSED_GROUP,
282                "unclosed `{` group",
283                open,
284                open + 1,
285            );
286        }
287        self.builder.finish_node();
288    }
289
290    /// `\` + a run of control-word characters (e.g. `\alpha`, `\frac`, `\begin`).
291    fn parse_control_word(&mut self) {
292        let word_len = self.peek_control_word().map(str::len).unwrap_or(0);
293        self.bump_bytes(1 + word_len, SyntaxKind::MATH_COMMAND);
294    }
295
296    /// `\` + exactly one following character (e.g. `\%`, `\{`, `\,`), or a
297    /// lone trailing backslash at EOF.
298    fn parse_control_symbol(&mut self) {
299        let after = &self.input[self.pos + 1..];
300        let len = 1 + after.chars().next().map(char::len_utf8).unwrap_or(0);
301        self.bump_bytes(len, SyntaxKind::MATH_COMMAND);
302    }
303
304    /// `%` to (but not including) the end of the line.
305    fn parse_comment(&mut self) {
306        let len = self
307            .rest()
308            .find(['\n', '\r'])
309            .unwrap_or_else(|| self.rest().len());
310        self.bump_bytes(len, SyntaxKind::MATH_COMMENT);
311    }
312
313    fn parse_spaces(&mut self) {
314        let len = self
315            .rest()
316            .bytes()
317            .take_while(|&b| b == b' ' || b == b'\t')
318            .count();
319        self.bump_bytes(len, SyntaxKind::MATH_SPACE);
320    }
321
322    /// A run of ordinary atoms, up to the next structural character. While the
323    /// bookdown extension is on, `(` also bounds the run so every `(` reaches
324    /// the dispatcher's equation-label check.
325    fn parse_text(&mut self) {
326        let bookdown = self.opts.bookdown_equation_labels;
327        let len = self
328            .rest()
329            .find(|c: char| is_special(c) || (bookdown && c == '('))
330            .unwrap_or_else(|| self.rest().len());
331        debug_assert!(len > 0, "parse_text on a special char");
332        self.bump_bytes(len, SyntaxKind::MATH_TEXT);
333    }
334
335    /// If the cursor is at a bookdown equation label `(\#eq:label)`, return its
336    /// byte length. Reuses the shared bookdown definition parser so the
337    /// recognized span matches the rest of the codebase exactly.
338    fn equation_label_len(&self) -> Option<usize> {
339        try_parse_bookdown_equation_definition(self.rest()).map(|(len, _)| len)
340    }
341}
342
343/// Characters that terminate a [`SyntaxKind::MATH_TEXT`] run.
344fn is_special(c: char) -> bool {
345    is_operator(c)
346        || matches!(
347            c,
348            '\\' | '{' | '}' | '&' | '^' | '_' | '%' | ' ' | '\t' | '\n' | '\r'
349        )
350}
351
352/// Operator atoms split out of ordinary text into their own
353/// [`SyntaxKind::MATH_OPERATOR`] token. The TeX mathbin (`+ - *`) and mathrel
354/// (`= < >`) core; the formatter assigns class/precedence/spacing downstream.
355fn is_operator(c: char) -> bool {
356    matches!(c, '+' | '-' | '*' | '=' | '<' | '>')
357}
358
359#[cfg(test)]
360mod tests {
361    use super::*;
362    use crate::syntax::SyntaxNode;
363
364    fn node(content: &str) -> SyntaxNode {
365        SyntaxNode::new_root(parse_math_content(content, MathParseOptions::default()))
366    }
367
368    fn node_with(content: &str, opts: MathParseOptions) -> SyntaxNode {
369        SyntaxNode::new_root(parse_math_content(content, opts))
370    }
371
372    fn token_kinds(content: &str) -> Vec<SyntaxKind> {
373        node(content)
374            .descendants_with_tokens()
375            .filter_map(|el| el.into_token())
376            .map(|tok| tok.kind())
377            .collect()
378    }
379
380    fn codes(content: &str) -> Vec<&'static str> {
381        parse_math_report(content, MathParseOptions::default())
382            .diagnostics
383            .into_iter()
384            .map(|d| d.code)
385            .collect()
386    }
387
388    /// Losslessness is the hard invariant for every input.
389    fn assert_lossless(content: &str) {
390        assert_eq!(
391            node(content).text().to_string(),
392            content,
393            "roundtrip: {content:?}"
394        );
395    }
396
397    #[test]
398    fn root_is_math_content() {
399        assert_eq!(node("x").kind(), SyntaxKind::MATH_CONTENT);
400    }
401
402    #[test]
403    fn plain_text_is_one_atom_run() {
404        // A run with no structural or operator chars stays a single atom.
405        assert_eq!(token_kinds("abc"), vec![SyntaxKind::MATH_TEXT]);
406        assert_lossless("abc");
407        // `/`, `.`, and parens are ordinary atoms, not operators.
408        assert_eq!(token_kinds("f(x)/2.5"), vec![SyntaxKind::MATH_TEXT]);
409        assert_lossless("f(x)/2.5");
410    }
411
412    #[test]
413    fn operators_split_atom_runs() {
414        // `+ - * = < >` each break the surrounding text into their own
415        // MATH_OPERATOR token. Class/precedence is deferred to the formatter.
416        assert_eq!(
417            token_kinds("a+b=c"),
418            vec![
419                SyntaxKind::MATH_TEXT,     // a
420                SyntaxKind::MATH_OPERATOR, // +
421                SyntaxKind::MATH_TEXT,     // b
422                SyntaxKind::MATH_OPERATOR, // =
423                SyntaxKind::MATH_TEXT,     // c
424            ]
425        );
426        assert_lossless("a+b=c");
427    }
428
429    #[test]
430    fn each_operator_char_is_its_own_token() {
431        for op in ["+", "-", "*", "=", "<", ">"] {
432            assert_eq!(
433                token_kinds(op),
434                vec![SyntaxKind::MATH_OPERATOR],
435                "operator {op:?}"
436            );
437            assert_lossless(op);
438        }
439        // Adjacent operators do not coalesce — one token per char.
440        assert_eq!(
441            token_kinds("a<=b"),
442            vec![
443                SyntaxKind::MATH_TEXT,
444                SyntaxKind::MATH_OPERATOR, // <
445                SyntaxKind::MATH_OPERATOR, // =
446                SyntaxKind::MATH_TEXT,
447            ]
448        );
449        // Unary vs binary minus is NOT distinguished here — both are operators.
450        assert_eq!(
451            token_kinds("-x"),
452            vec![SyntaxKind::MATH_OPERATOR, SyntaxKind::MATH_TEXT]
453        );
454        assert_lossless("-x");
455        // An escaped special stays a control symbol, never an operator.
456        assert_eq!(token_kinds(r"\<"), vec![SyntaxKind::MATH_COMMAND]);
457        assert_lossless(r"\<");
458    }
459
460    #[test]
461    fn operators_inside_groups_and_scripts_are_lossless() {
462        for content in [r"e^{-x}", r"10^{-3}", r"\frac{a+b}{c-d}", r"x_{i+1}"] {
463            assert_lossless(content);
464        }
465    }
466
467    #[test]
468    fn control_word_and_symbol() {
469        assert_eq!(
470            token_kinds(r"\alpha\,"),
471            vec![SyntaxKind::MATH_COMMAND, SyntaxKind::MATH_COMMAND]
472        );
473        assert_lossless(r"\alpha\,");
474        // Escaped specials are control symbols, not structural markers.
475        assert_eq!(token_kinds(r"\&\%\{\}"), vec![SyntaxKind::MATH_COMMAND; 4]);
476        assert_lossless(r"\&\%\{\}");
477    }
478
479    #[test]
480    fn brace_group_nests() {
481        let tree = node(r"x^{2}");
482        let group = tree
483            .descendants()
484            .find(|n| n.kind() == SyntaxKind::MATH_GROUP)
485            .expect("group");
486        let kinds: Vec<_> = group.children_with_tokens().map(|el| el.kind()).collect();
487        assert_eq!(
488            kinds,
489            vec![
490                SyntaxKind::MATH_GROUP_OPEN,
491                SyntaxKind::MATH_TEXT,
492                SyntaxKind::MATH_GROUP_CLOSE
493            ]
494        );
495        assert_lossless(r"x^{2}");
496    }
497
498    #[test]
499    fn line_break_alignment_and_scripts() {
500        assert_eq!(
501            token_kinds(r"x &= 1 \\"),
502            vec![
503                SyntaxKind::MATH_TEXT,       // x
504                SyntaxKind::MATH_SPACE,      // ' '
505                SyntaxKind::MATH_ALIGN,      // &
506                SyntaxKind::MATH_OPERATOR,   // =
507                SyntaxKind::MATH_SPACE,      // ' '
508                SyntaxKind::MATH_TEXT,       // 1
509                SyntaxKind::MATH_SPACE,      // ' '
510                SyntaxKind::MATH_LINE_BREAK, // \\
511            ]
512        );
513        assert_lossless(r"x &= 1 \\");
514        assert_eq!(
515            token_kinds("x^2_i"),
516            vec![
517                SyntaxKind::MATH_TEXT,
518                SyntaxKind::MATH_SCRIPT,
519                SyntaxKind::MATH_TEXT,
520                SyntaxKind::MATH_SCRIPT,
521                SyntaxKind::MATH_TEXT,
522            ]
523        );
524    }
525
526    #[test]
527    fn environment_wraps_body() {
528        let content = "\\begin{aligned}\nx &= 1\n\\end{aligned}";
529        let tree = node(content);
530        let env = tree
531            .descendants()
532            .find(|n| n.kind() == SyntaxKind::MATH_ENVIRONMENT)
533            .expect("environment");
534        assert_eq!(env.text().to_string(), content);
535        let commands = env
536            .children_with_tokens()
537            .filter(|el| el.kind() == SyntaxKind::MATH_COMMAND)
538            .count();
539        assert_eq!(commands, 2);
540        assert_lossless(content);
541        assert!(
542            codes(content).is_empty(),
543            "well-formed env has no diagnostics"
544        );
545    }
546
547    #[test]
548    fn nested_environments() {
549        let content = r"\begin{a}\begin{b}x\end{b}\end{a}";
550        let envs = node(content)
551            .descendants()
552            .filter(|n| n.kind() == SyntaxKind::MATH_ENVIRONMENT)
553            .count();
554        assert_eq!(envs, 2);
555        assert_lossless(content);
556        assert!(codes(content).is_empty());
557    }
558
559    #[test]
560    fn comment_runs_to_end_of_line() {
561        assert_eq!(
562            token_kinds("a % tail\nb"),
563            vec![
564                SyntaxKind::MATH_TEXT,
565                SyntaxKind::MATH_SPACE,
566                SyntaxKind::MATH_COMMENT,
567                SyntaxKind::MATH_NEWLINE,
568                SyntaxKind::MATH_TEXT,
569            ]
570        );
571        assert_lossless("a % tail\nb");
572    }
573
574    #[test]
575    fn crlf_and_unicode_are_lossless() {
576        assert_lossless("x &= 1\r\ny &= 2\r\n");
577        assert_lossless(r"\alpha + \beta \neq \gamma_{\text{αβγ}}");
578    }
579
580    #[test]
581    fn empty_content() {
582        assert_eq!(node("").text().to_string(), "");
583        assert!(token_kinds("").is_empty());
584    }
585
586    #[test]
587    fn trailing_backslash() {
588        assert_eq!(
589            token_kinds("a\\"),
590            vec![SyntaxKind::MATH_TEXT, SyntaxKind::MATH_COMMAND]
591        );
592        assert_lossless("a\\");
593    }
594
595    // --- Diagnostics side-channel (lossless even when malformed) ---
596
597    #[test]
598    fn unclosed_group_is_lossless_and_diagnosed() {
599        assert_lossless("{a");
600        assert_eq!(codes("{a"), vec![diagnostic_codes::UNCLOSED_GROUP]);
601    }
602
603    #[test]
604    fn stray_close_brace_is_lossless_and_diagnosed() {
605        assert_lossless("a}b");
606        assert_eq!(codes("a}b"), vec![diagnostic_codes::UNEXPECTED_CLOSE_BRACE]);
607    }
608
609    #[test]
610    fn unclosed_environment_is_diagnosed() {
611        let content = r"\begin{aligned} x &= 1";
612        assert_lossless(content);
613        assert_eq!(codes(content), vec![diagnostic_codes::UNCLOSED_ENVIRONMENT]);
614    }
615
616    #[test]
617    fn mismatched_environment_is_diagnosed() {
618        let content = r"\begin{aligned}x\end{matrix}";
619        assert_lossless(content);
620        assert_eq!(
621            codes(content),
622            vec![diagnostic_codes::MISMATCHED_ENVIRONMENT]
623        );
624    }
625
626    #[test]
627    fn stray_end_is_diagnosed() {
628        let content = r"x \end{aligned}";
629        assert_lossless(content);
630        assert_eq!(codes(content), vec![diagnostic_codes::UNEXPECTED_END]);
631    }
632
633    #[test]
634    fn well_formed_math_has_no_diagnostics() {
635        assert!(codes(r"\frac{1}{2} + x^{2}").is_empty());
636    }
637
638    // --- Bookdown equation labels (gated on the extension) ---
639
640    const BOOKDOWN: MathParseOptions = MathParseOptions {
641        bookdown_equation_labels: true,
642    };
643
644    fn label_kinds(content: &str, opts: MathParseOptions) -> Vec<SyntaxKind> {
645        node_with(content, opts)
646            .descendants_with_tokens()
647            .filter_map(|el| el.into_token())
648            .map(|tok| tok.kind())
649            .collect()
650    }
651
652    #[test]
653    fn equation_label_recognized_when_enabled() {
654        let kinds = label_kinds(r"a (\#eq:foo)", BOOKDOWN);
655        assert!(kinds.contains(&SyntaxKind::MATH_EQUATION_LABEL));
656        // The label is a single token spanning the whole `(\#eq:foo)`.
657        let label = node_with(r"a (\#eq:foo)", BOOKDOWN)
658            .descendants_with_tokens()
659            .filter_map(|el| el.into_token())
660            .find(|t| t.kind() == SyntaxKind::MATH_EQUATION_LABEL)
661            .expect("label token");
662        assert_eq!(label.text(), r"(\#eq:foo)");
663    }
664
665    #[test]
666    fn equation_label_ignored_when_disabled() {
667        // Default options: no label token, and plain math is byte-identical.
668        let kinds = label_kinds(r"a (\#eq:foo)", MathParseOptions::default());
669        assert!(!kinds.contains(&SyntaxKind::MATH_EQUATION_LABEL));
670    }
671
672    #[test]
673    fn plain_parens_unchanged_when_disabled() {
674        // `(` must not fragment ordinary atom runs while the extension is off.
675        assert_eq!(token_kinds("f(x)"), vec![SyntaxKind::MATH_TEXT]);
676    }
677
678    #[test]
679    fn label_parsing_is_lossless() {
680        let content = "\\begin{align}\n  a (\\#eq:solveG)\n\\end{align}";
681        assert_eq!(node_with(content, BOOKDOWN).text().to_string(), content);
682    }
683}