Skip to main content

panache_parser/parser/inlines/
math.rs

1//! Math parsing for both inline and display math.
2//!
3//! This module handles all math-related parsing:
4//! - **Inline math**: `$...$`, `\(...\)`, `\\(...\\)` - single line only
5//! - **Display math**: `$$...$$`, `\[...\]`, `\\[...\\]` - can span multiple lines
6//!
7//! Display math can appear both inline (within paragraphs) and as block-level elements.
8//! The parsing functions return `Option<(usize, &str)>` tuples containing the length
9//! consumed and the math content, allowing calling contexts to emit appropriate nodes.
10
11use super::sink::InlineSink;
12use crate::parser::blocks::raw_blocks::{extract_environment_name, is_inline_math_environment};
13use crate::parser::math::{MathParseOptions, parse_math_content};
14use crate::parser::utils::tree_copy::copy_green_node;
15use crate::syntax::SyntaxKind;
16
17/// Emit the math content as a structural, lossless `MATH_CONTENT` subtree
18/// (brace groups, environments, control sequences, alignment, …) rather than an
19/// opaque `TEXT` token, so the formatter and linter can act on its structure.
20/// See [`crate::parser::math`]. Lossless: the subtree's text equals `content`.
21fn emit_math_content(builder: &mut impl InlineSink, content: &str, opts: MathParseOptions) {
22    copy_green_node(builder, &parse_math_content(content, opts));
23}
24
25/// Derive math-content parse options from the parser config. Keeps the
26/// flavor/extension → math-grammar mapping in one place.
27pub fn math_opts(config: &crate::options::ParserOptions) -> MathParseOptions {
28    MathParseOptions {
29        bookdown_equation_labels: config.extensions.bookdown_equation_references,
30    }
31}
32
33/// Try to parse an inline math span starting at the current position.
34/// Returns the number of characters consumed if successful, or None if not inline math.
35///
36/// Per Pandoc spec (tex_math_dollars extension):
37/// - Opening $ must have non-space character immediately to its right
38/// - Closing $ must have non-space character immediately to its left
39/// - Closing $ must not be followed immediately by a digit
40pub fn try_parse_inline_math(text: &str) -> Option<(usize, &str)> {
41    // Must start with exactly one $
42    if !text.starts_with('$') || text.starts_with("$$") {
43        return None;
44    }
45
46    let rest = &text[1..];
47
48    // Opening $ must have non-space character immediately to its right
49    if rest.is_empty() || rest.starts_with(char::is_whitespace) {
50        return None;
51    }
52
53    // Look for closing $
54    let mut pos = 0;
55    while pos < rest.len() {
56        let ch = rest[pos..].chars().next()?;
57
58        if ch == '$' {
59            // Check if it's escaped
60            if pos > 0 && rest.as_bytes()[pos - 1] == b'\\' {
61                // Escaped dollar, continue searching
62                pos += 1;
63                continue;
64            }
65
66            // Closing $ must have non-space character immediately to its left
67            if pos == 0 || rest[..pos].ends_with(char::is_whitespace) {
68                // Continue searching - this $ doesn't close the math
69                pos += 1;
70                continue;
71            }
72
73            // Closing $ must not be followed immediately by a digit
74            if let Some(next_ch) = rest[pos + 1..].chars().next()
75                && next_ch.is_ascii_digit()
76            {
77                // Continue searching - this $ doesn't close the math
78                pos += 1;
79                continue;
80            }
81
82            // Found valid closing $
83            let math_content = &rest[..pos];
84            let total_len = 1 + pos + 1; // opening $ + content + closing $
85            return Some((total_len, math_content));
86        }
87
88        // Dollar signs can't span multiple lines
89        if ch == '\n' {
90            return None;
91        }
92
93        pos += ch.len_utf8();
94    }
95
96    // No matching close found
97    None
98}
99
100/// Try to parse GFM inline math: $`...`$
101/// Extension: tex_math_gfm
102pub fn try_parse_gfm_inline_math(text: &str) -> Option<(usize, &str)> {
103    if !text.starts_with("$`") {
104        return None;
105    }
106
107    let rest = &text[2..];
108    if rest.is_empty() {
109        return None;
110    }
111
112    let mut pos = 0;
113    while pos < rest.len() {
114        let ch = rest[pos..].chars().next()?;
115        if ch == '\n' {
116            return None;
117        }
118        if rest[pos..].starts_with("`$") {
119            if pos == 0 {
120                return None;
121            }
122            let math_content = &rest[..pos];
123            let total_len = 2 + pos + 2; // $` + content + `$
124            return Some((total_len, math_content));
125        }
126        pos += ch.len_utf8();
127    }
128
129    None
130}
131
132/// Try to parse single backslash inline math: \(...\)
133/// Extension: tex_math_single_backslash
134pub fn try_parse_single_backslash_inline_math(text: &str) -> Option<(usize, &str)> {
135    if !text.starts_with(r"\(") {
136        return None;
137    }
138
139    let rest = &text[2..]; // Skip \(
140
141    // Look for closing \)
142    let mut pos = 0;
143    while pos < rest.len() {
144        let ch = rest[pos..].chars().next()?;
145
146        if ch == '\\' && rest[pos..].starts_with(r"\)") {
147            // Found closing \)
148            let math_content = &rest[..pos];
149            let total_len = 2 + pos + 2; // \( + content + \)
150            return Some((total_len, math_content));
151        }
152
153        // Can't span multiple lines
154        if ch == '\n' {
155            return None;
156        }
157
158        pos += ch.len_utf8();
159    }
160
161    None
162}
163
164/// Try to parse double backslash inline math: \\(...\\)
165/// Extension: tex_math_double_backslash
166pub fn try_parse_double_backslash_inline_math(text: &str) -> Option<(usize, &str)> {
167    if !text.starts_with(r"\\(") {
168        return None;
169    }
170
171    let rest = &text[3..]; // Skip \\(
172
173    // Look for closing \\)
174    let mut pos = 0;
175    while pos < rest.len() {
176        let ch = rest[pos..].chars().next()?;
177
178        if ch == '\\' && rest[pos..].starts_with(r"\\)") {
179            // Found closing \\)
180            let math_content = &rest[..pos];
181            let total_len = 3 + pos + 3; // \\( + content + \\)
182            return Some((total_len, math_content));
183        }
184
185        // Can't span multiple lines
186        if ch == '\n' {
187            return None;
188        }
189
190        pos += ch.len_utf8();
191    }
192
193    None
194}
195
196/// Try to parse display math ($$...$$) starting at the current position.
197/// Returns the number of characters consumed and the math content if successful.
198/// Display math can span multiple lines in inline contexts.
199///
200/// Per Pandoc spec (tex_math_dollars extension):
201/// - Opening delimiter is at least $$
202/// - Closing delimiter must have at least as many $ as opening
203/// - Content can span multiple lines
204pub fn try_parse_display_math(text: &str) -> Option<(usize, &str)> {
205    // Must start with at least $$
206    if !text.starts_with("$$") {
207        return None;
208    }
209
210    // Count opening dollar signs
211    let opening_count = text.chars().take_while(|&c| c == '$').count();
212    if opening_count < 2 {
213        return None;
214    }
215
216    let rest = &text[opening_count..];
217
218    // Look for matching closing delimiter
219    let mut pos = 0;
220    while pos < rest.len() {
221        let ch = rest[pos..].chars().next()?;
222
223        if ch == '$' {
224            // Check if it's escaped
225            if pos > 0 && rest.as_bytes()[pos - 1] == b'\\' {
226                // Escaped dollar, continue searching
227                pos += ch.len_utf8();
228                continue;
229            }
230
231            // Count closing dollar signs
232            let closing_count = rest[pos..].chars().take_while(|&c| c == '$').count();
233
234            // Must have at least as many closing dollars as opening
235            if closing_count >= opening_count {
236                let math_content = &rest[..pos];
237                let total_len = opening_count + pos + closing_count;
238                return Some((total_len, math_content));
239            }
240
241            // Not enough dollars, skip this run and continue
242            pos += closing_count;
243            continue;
244        }
245
246        pos += ch.len_utf8();
247    }
248
249    // No matching close found
250    None
251}
252
253/// Try to parse single backslash display math: \[...\]
254/// Extension: tex_math_single_backslash
255///
256/// Per Pandoc spec:
257/// - Content can span multiple lines
258/// - No escape handling needed (backslash is the delimiter)
259pub fn try_parse_single_backslash_display_math(text: &str) -> Option<(usize, &str)> {
260    if !text.starts_with(r"\[") {
261        return None;
262    }
263
264    let rest = &text[2..]; // Skip \[
265
266    // Look for closing \]
267    let mut pos = 0;
268    while pos < rest.len() {
269        let ch = rest[pos..].chars().next()?;
270
271        if ch == '\\' && rest[pos..].starts_with(r"\]") {
272            // Found closing \]
273            let math_content = &rest[..pos];
274            let total_len = 2 + pos + 2; // \[ + content + \]
275            return Some((total_len, math_content));
276        }
277
278        pos += ch.len_utf8();
279    }
280
281    None
282}
283
284/// Try to parse double backslash display math: \\[...\\]
285/// Extension: tex_math_double_backslash
286///
287/// Per Pandoc spec:
288/// - Content can span multiple lines
289/// - Double backslash is the delimiter
290pub fn try_parse_double_backslash_display_math(text: &str) -> Option<(usize, &str)> {
291    if !text.starts_with(r"\\[") {
292        return None;
293    }
294
295    let rest = &text[3..]; // Skip \\[
296
297    // Look for closing \\]
298    let mut pos = 0;
299    while pos < rest.len() {
300        let ch = rest[pos..].chars().next()?;
301
302        if ch == '\\' && rest[pos..].starts_with(r"\\]") {
303            // Found closing \\]
304            let math_content = &rest[..pos];
305            let total_len = 3 + pos + 3; // \\[ + content + \\]
306            return Some((total_len, math_content));
307        }
308
309        pos += ch.len_utf8();
310    }
311
312    None
313}
314
315/// Try to parse a LaTeX math environment (\begin{equation}...\end{equation})
316/// as display math. Returns (total_len, begin_marker, content, end_marker).
317pub fn try_parse_math_environment(text: &str) -> Option<(usize, &str, &str, &str)> {
318    let env_name = extract_environment_name(text)?;
319    if !is_inline_math_environment(env_name) {
320        return None;
321    }
322
323    let begin_marker_len = text.find('}')? + 1;
324    let begin_marker = &text[..begin_marker_len];
325    let end_marker = format!("\\end{{{}}}", env_name);
326
327    let after_begin = &text[begin_marker_len..];
328    let end_rel = after_begin.find(&end_marker)?;
329    let end_start = begin_marker_len + end_rel;
330    let end_marker_end = end_start + end_marker.len();
331
332    let mut end_line_end = end_marker_end;
333    while end_line_end < text.len() {
334        let ch = text[end_line_end..].chars().next()?;
335        if ch == '\n' || ch == '\r' {
336            break;
337        }
338        end_line_end += ch.len_utf8();
339    }
340
341    if end_line_end < text.len() {
342        if text[end_line_end..].starts_with("\r\n") {
343            end_line_end += 2;
344        } else {
345            end_line_end += 1;
346        }
347    }
348
349    let content = &text[begin_marker_len..end_start];
350    let end_marker_text = &text[end_start..end_line_end];
351    Some((end_line_end, begin_marker, content, end_marker_text))
352}
353
354/// Emit an inline math node to the builder.
355pub fn emit_inline_math(builder: &mut impl InlineSink, content: &str, opts: MathParseOptions) {
356    builder.start_node(SyntaxKind::INLINE_MATH.into());
357
358    // Opening $
359    builder.token(SyntaxKind::INLINE_MATH_MARKER.into(), "$");
360
361    // Math content
362    emit_math_content(builder, content, opts);
363
364    // Closing $
365    builder.token(SyntaxKind::INLINE_MATH_MARKER.into(), "$");
366
367    builder.finish_node();
368}
369
370/// Emit a GFM inline math node: $`...`$
371pub fn emit_gfm_inline_math(builder: &mut impl InlineSink, content: &str, opts: MathParseOptions) {
372    builder.start_node(SyntaxKind::INLINE_MATH.into());
373    builder.token(SyntaxKind::INLINE_MATH_MARKER.into(), "$`");
374    emit_math_content(builder, content, opts);
375    builder.token(SyntaxKind::INLINE_MATH_MARKER.into(), "`$");
376    builder.finish_node();
377}
378
379/// Emit a single backslash inline math node: \(...\)
380pub fn emit_single_backslash_inline_math(
381    builder: &mut impl InlineSink,
382    content: &str,
383    opts: MathParseOptions,
384) {
385    builder.start_node(SyntaxKind::INLINE_MATH.into());
386
387    builder.token(SyntaxKind::INLINE_MATH_MARKER.into(), r"\(");
388    emit_math_content(builder, content, opts);
389    builder.token(SyntaxKind::INLINE_MATH_MARKER.into(), r"\)");
390
391    builder.finish_node();
392}
393
394/// Emit a double backslash inline math node: \\(...\\)
395pub fn emit_double_backslash_inline_math(
396    builder: &mut impl InlineSink,
397    content: &str,
398    opts: MathParseOptions,
399) {
400    builder.start_node(SyntaxKind::INLINE_MATH.into());
401
402    builder.token(SyntaxKind::INLINE_MATH_MARKER.into(), r"\\(");
403    emit_math_content(builder, content, opts);
404    builder.token(SyntaxKind::INLINE_MATH_MARKER.into(), r"\\)");
405
406    builder.finish_node();
407}
408
409/// Emit a display math node to the builder (when occurring inline in paragraph).
410pub fn emit_display_math(
411    builder: &mut impl InlineSink,
412    content: &str,
413    dollar_count: usize,
414    opts: MathParseOptions,
415) {
416    builder.start_node(SyntaxKind::DISPLAY_MATH.into());
417
418    // Opening $$
419    let marker = "$".repeat(dollar_count);
420    builder.token(SyntaxKind::DISPLAY_MATH_MARKER.into(), &marker);
421
422    // Math content
423    emit_math_content(builder, content, opts);
424
425    // Closing $$
426    builder.token(SyntaxKind::DISPLAY_MATH_MARKER.into(), &marker);
427
428    builder.finish_node();
429}
430
431/// Emit a display math environment node using raw \begin...\end... markers.
432pub fn emit_display_math_environment(
433    builder: &mut impl InlineSink,
434    begin_marker: &str,
435    content: &str,
436    end_marker: &str,
437    opts: MathParseOptions,
438) {
439    builder.start_node(SyntaxKind::DISPLAY_MATH.into());
440    builder.token(SyntaxKind::DISPLAY_MATH_MARKER.into(), begin_marker);
441    emit_math_content(builder, content, opts);
442    builder.token(SyntaxKind::DISPLAY_MATH_MARKER.into(), end_marker);
443    builder.finish_node();
444}
445
446/// Emit a single backslash display math node: \[...\]
447pub fn emit_single_backslash_display_math(
448    builder: &mut impl InlineSink,
449    content: &str,
450    opts: MathParseOptions,
451) {
452    builder.start_node(SyntaxKind::DISPLAY_MATH.into());
453
454    builder.token(SyntaxKind::DISPLAY_MATH_MARKER.into(), r"\[");
455    emit_math_content(builder, content, opts);
456    builder.token(SyntaxKind::DISPLAY_MATH_MARKER.into(), r"\]");
457
458    builder.finish_node();
459}
460
461/// Emit a double backslash display math node: \\[...\\]
462pub fn emit_double_backslash_display_math(
463    builder: &mut impl InlineSink,
464    content: &str,
465    opts: MathParseOptions,
466) {
467    builder.start_node(SyntaxKind::DISPLAY_MATH.into());
468
469    builder.token(SyntaxKind::DISPLAY_MATH_MARKER.into(), r"\\[");
470    emit_math_content(builder, content, opts);
471    builder.token(SyntaxKind::DISPLAY_MATH_MARKER.into(), r"\\]");
472
473    builder.finish_node();
474}
475
476#[cfg(test)]
477mod tests {
478    use super::*;
479
480    #[test]
481    fn test_parse_simple_inline_math() {
482        let result = try_parse_inline_math("$x = y$");
483        assert_eq!(result, Some((7, "x = y")));
484    }
485
486    #[test]
487    fn test_parse_inline_math_with_spaces_inside() {
488        // Spaces inside math are OK, just not immediately after opening or before closing
489        let result = try_parse_inline_math("$a + b$");
490        assert_eq!(result, Some((7, "a + b")));
491    }
492
493    #[test]
494    fn test_parse_inline_math_complex() {
495        let result = try_parse_inline_math(r"$\frac{1}{2}$");
496        assert_eq!(result, Some((13, r"\frac{1}{2}")));
497    }
498
499    #[test]
500    fn test_not_inline_math_display() {
501        // $$ is display math, not inline
502        let result = try_parse_inline_math("$$x = y$$");
503        assert_eq!(result, None);
504    }
505
506    #[test]
507    fn test_inline_math_no_close() {
508        let result = try_parse_inline_math("$no close");
509        assert_eq!(result, None);
510    }
511
512    #[test]
513    fn test_inline_math_no_multiline() {
514        let result = try_parse_inline_math("$x =\ny$");
515        assert_eq!(result, None);
516    }
517
518    #[test]
519    fn test_not_inline_math() {
520        let result = try_parse_inline_math("no dollar");
521        assert_eq!(result, None);
522    }
523
524    #[test]
525    fn test_inline_math_with_trailing_text() {
526        let result = try_parse_inline_math("$x$ and more");
527        assert_eq!(result, Some((3, "x")));
528    }
529
530    #[test]
531    fn test_spec_opening_must_have_non_space_right() {
532        // Per Pandoc spec: opening $ must have non-space immediately to right
533        let result = try_parse_inline_math("$ x$");
534        assert_eq!(result, None, "Opening $ with space should not parse");
535    }
536
537    #[test]
538    fn test_spec_closing_must_have_non_space_left() {
539        // Per Pandoc spec: closing $ must have non-space immediately to left
540        let result = try_parse_inline_math("$x $");
541        assert_eq!(result, None, "Closing $ with space should not parse");
542    }
543
544    #[test]
545    fn test_spec_closing_not_followed_by_digit() {
546        // Per Pandoc spec: closing $ must not be followed by digit
547        let result = try_parse_inline_math("$x$5");
548        assert_eq!(result, None, "Closing $ followed by digit should not parse");
549    }
550
551    #[test]
552    fn test_spec_dollar_amounts() {
553        // $20,000 should not parse as math
554        let result = try_parse_inline_math("$20,000");
555        assert_eq!(result, None, "Dollar amounts should not parse as math");
556    }
557
558    #[test]
559    fn test_valid_math_after_spec_checks() {
560        // $x$ alone should still parse
561        let result = try_parse_inline_math("$x$");
562        assert_eq!(result, Some((3, "x")), "Valid math should parse");
563    }
564
565    #[test]
566    fn test_math_followed_by_non_digit() {
567        // $x$a should parse (not followed by digit)
568        let result = try_parse_inline_math("$x$a");
569        assert_eq!(
570            result,
571            Some((3, "x")),
572            "Math followed by non-digit should parse"
573        );
574    }
575
576    // Display math tests
577    #[test]
578    fn test_parse_display_math_simple() {
579        let result = try_parse_display_math("$$x = y$$");
580        assert_eq!(result, Some((9, "x = y")));
581    }
582
583    #[test]
584    fn test_parse_display_math_multiline() {
585        let result = try_parse_display_math("$$\nx = y\n$$");
586        assert_eq!(result, Some((11, "\nx = y\n")));
587    }
588
589    #[test]
590    fn test_parse_display_math_triple_dollars() {
591        let result = try_parse_display_math("$$$x = y$$$");
592        assert_eq!(result, Some((11, "x = y")));
593    }
594
595    #[test]
596    fn test_parse_display_math_no_close() {
597        let result = try_parse_display_math("$$no close");
598        assert_eq!(result, None);
599    }
600
601    #[test]
602    fn test_not_display_math() {
603        let result = try_parse_display_math("$single dollar");
604        assert_eq!(result, None);
605    }
606
607    #[test]
608    fn test_display_math_with_trailing_text() {
609        let result = try_parse_display_math("$$x = y$$ and more");
610        assert_eq!(result, Some((9, "x = y")));
611    }
612
613    // Single backslash math tests
614    #[test]
615    fn test_single_backslash_inline_math() {
616        let result = try_parse_single_backslash_inline_math(r"\(x^2\)");
617        assert_eq!(result, Some((7, "x^2")));
618    }
619
620    #[test]
621    fn test_single_backslash_inline_math_complex() {
622        let result = try_parse_single_backslash_inline_math(r"\(\frac{a}{b}\)");
623        assert_eq!(result, Some((15, r"\frac{a}{b}")));
624    }
625
626    #[test]
627    fn test_single_backslash_inline_math_no_close() {
628        let result = try_parse_single_backslash_inline_math(r"\(no close");
629        assert_eq!(result, None);
630    }
631
632    #[test]
633    fn test_single_backslash_inline_math_no_multiline() {
634        let result = try_parse_single_backslash_inline_math("\\(x =\ny\\)");
635        assert_eq!(result, None);
636    }
637
638    #[test]
639    fn test_single_backslash_display_math() {
640        let result = try_parse_single_backslash_display_math(r"\[E = mc^2\]");
641        assert_eq!(result, Some((12, "E = mc^2")));
642    }
643
644    #[test]
645    fn test_single_backslash_display_math_multiline() {
646        let result = try_parse_single_backslash_display_math("\\[\nx = y\n\\]");
647        assert_eq!(result, Some((11, "\nx = y\n")));
648    }
649
650    #[test]
651    fn test_single_backslash_display_math_no_close() {
652        let result = try_parse_single_backslash_display_math(r"\[no close");
653        assert_eq!(result, None);
654    }
655
656    // Double backslash math tests
657    #[test]
658    fn test_double_backslash_inline_math() {
659        let result = try_parse_double_backslash_inline_math(r"\\(x^2\\)");
660        assert_eq!(result, Some((9, "x^2")));
661    }
662
663    #[test]
664    fn test_double_backslash_inline_math_complex() {
665        let result = try_parse_double_backslash_inline_math(r"\\(\alpha + \beta\\)");
666        assert_eq!(result, Some((20, r"\alpha + \beta")));
667    }
668
669    #[test]
670    fn test_double_backslash_inline_math_no_close() {
671        let result = try_parse_double_backslash_inline_math(r"\\(no close");
672        assert_eq!(result, None);
673    }
674
675    #[test]
676    fn test_double_backslash_inline_math_no_multiline() {
677        let result = try_parse_double_backslash_inline_math("\\\\(x =\ny\\\\)");
678        assert_eq!(result, None);
679    }
680
681    #[test]
682    fn test_double_backslash_display_math() {
683        let result = try_parse_double_backslash_display_math(r"\\[E = mc^2\\]");
684        assert_eq!(result, Some((14, "E = mc^2")));
685    }
686
687    #[test]
688    fn test_double_backslash_display_math_multiline() {
689        let result = try_parse_double_backslash_display_math("\\\\[\nx = y\n\\\\]");
690        assert_eq!(result, Some((13, "\nx = y\n")));
691    }
692
693    #[test]
694    fn test_double_backslash_display_math_no_close() {
695        let result = try_parse_double_backslash_display_math(r"\\[no close");
696        assert_eq!(result, None);
697    }
698
699    // Additional edge case tests
700    #[test]
701    fn test_display_math_escaped_dollar() {
702        // Escaped dollar should be skipped
703        let result = try_parse_display_math(r"$$a = \$100$$");
704        assert_eq!(result, Some((13, r"a = \$100")));
705    }
706
707    #[test]
708    fn test_display_math_with_content_on_fence_line() {
709        // Content can appear on same line as opening delimiter
710        let result = try_parse_display_math("$$x = y\n$$");
711        assert_eq!(result, Some((10, "x = y\n")));
712    }
713}