Skip to main content

panache_parser/parser/inlines/
math.rs

1//! Math parsing for both inline and display math.
2//!
3//! This module handles all math-related parsing:
4//! - **Inline math**: `$...$`, `\(...\)`, `\\(...\\)` - single line only
5//! - **Display math**: `$$...$$`, `\[...\]`, `\\[...\\]` - can span multiple lines
6//!
7//! Display math can appear both inline (within paragraphs) and as block-level elements.
8//! The parsing functions return `Option<(usize, &str)>` tuples containing the length
9//! consumed and the math content, allowing calling contexts to emit appropriate nodes.
10
11use crate::parser::blocks::raw_blocks::{extract_environment_name, is_inline_math_environment};
12use crate::syntax::SyntaxKind;
13use rowan::GreenNodeBuilder;
14
15/// Try to parse an inline math span starting at the current position.
16/// Returns the number of characters consumed if successful, or None if not inline math.
17///
18/// Per Pandoc spec (tex_math_dollars extension):
19/// - Opening $ must have non-space character immediately to its right
20/// - Closing $ must have non-space character immediately to its left
21/// - Closing $ must not be followed immediately by a digit
22pub fn try_parse_inline_math(text: &str) -> Option<(usize, &str)> {
23    // Must start with exactly one $
24    if !text.starts_with('$') || text.starts_with("$$") {
25        return None;
26    }
27
28    let rest = &text[1..];
29
30    // Opening $ must have non-space character immediately to its right
31    if rest.is_empty() || rest.starts_with(char::is_whitespace) {
32        return None;
33    }
34
35    // Look for closing $
36    let mut pos = 0;
37    while pos < rest.len() {
38        let ch = rest[pos..].chars().next()?;
39
40        if ch == '$' {
41            // Check if it's escaped
42            if pos > 0 && rest.as_bytes()[pos - 1] == b'\\' {
43                // Escaped dollar, continue searching
44                pos += 1;
45                continue;
46            }
47
48            // Closing $ must have non-space character immediately to its left
49            if pos == 0 || rest[..pos].ends_with(char::is_whitespace) {
50                // Continue searching - this $ doesn't close the math
51                pos += 1;
52                continue;
53            }
54
55            // Closing $ must not be followed immediately by a digit
56            if let Some(next_ch) = rest[pos + 1..].chars().next()
57                && next_ch.is_ascii_digit()
58            {
59                // Continue searching - this $ doesn't close the math
60                pos += 1;
61                continue;
62            }
63
64            // Found valid closing $
65            let math_content = &rest[..pos];
66            let total_len = 1 + pos + 1; // opening $ + content + closing $
67            return Some((total_len, math_content));
68        }
69
70        // Dollar signs can't span multiple lines
71        if ch == '\n' {
72            return None;
73        }
74
75        pos += ch.len_utf8();
76    }
77
78    // No matching close found
79    None
80}
81
82/// Try to parse GFM inline math: $`...`$
83/// Extension: tex_math_gfm
84pub fn try_parse_gfm_inline_math(text: &str) -> Option<(usize, &str)> {
85    if !text.starts_with("$`") {
86        return None;
87    }
88
89    let rest = &text[2..];
90    if rest.is_empty() {
91        return None;
92    }
93
94    let mut pos = 0;
95    while pos < rest.len() {
96        let ch = rest[pos..].chars().next()?;
97        if ch == '\n' {
98            return None;
99        }
100        if rest[pos..].starts_with("`$") {
101            if pos == 0 {
102                return None;
103            }
104            let math_content = &rest[..pos];
105            let total_len = 2 + pos + 2; // $` + content + `$
106            return Some((total_len, math_content));
107        }
108        pos += ch.len_utf8();
109    }
110
111    None
112}
113
114/// Try to parse single backslash inline math: \(...\)
115/// Extension: tex_math_single_backslash
116pub fn try_parse_single_backslash_inline_math(text: &str) -> Option<(usize, &str)> {
117    if !text.starts_with(r"\(") {
118        return None;
119    }
120
121    let rest = &text[2..]; // Skip \(
122
123    // Look for closing \)
124    let mut pos = 0;
125    while pos < rest.len() {
126        let ch = rest[pos..].chars().next()?;
127
128        if ch == '\\' && rest[pos..].starts_with(r"\)") {
129            // Found closing \)
130            let math_content = &rest[..pos];
131            let total_len = 2 + pos + 2; // \( + content + \)
132            return Some((total_len, math_content));
133        }
134
135        // Can't span multiple lines
136        if ch == '\n' {
137            return None;
138        }
139
140        pos += ch.len_utf8();
141    }
142
143    None
144}
145
146/// Try to parse double backslash inline math: \\(...\\)
147/// Extension: tex_math_double_backslash
148pub fn try_parse_double_backslash_inline_math(text: &str) -> Option<(usize, &str)> {
149    if !text.starts_with(r"\\(") {
150        return None;
151    }
152
153    let rest = &text[3..]; // Skip \\(
154
155    // Look for closing \\)
156    let mut pos = 0;
157    while pos < rest.len() {
158        let ch = rest[pos..].chars().next()?;
159
160        if ch == '\\' && rest[pos..].starts_with(r"\\)") {
161            // Found closing \\)
162            let math_content = &rest[..pos];
163            let total_len = 3 + pos + 3; // \\( + content + \\)
164            return Some((total_len, math_content));
165        }
166
167        // Can't span multiple lines
168        if ch == '\n' {
169            return None;
170        }
171
172        pos += ch.len_utf8();
173    }
174
175    None
176}
177
178/// Try to parse display math ($$...$$) starting at the current position.
179/// Returns the number of characters consumed and the math content if successful.
180/// Display math can span multiple lines in inline contexts.
181///
182/// Per Pandoc spec (tex_math_dollars extension):
183/// - Opening delimiter is at least $$
184/// - Closing delimiter must have at least as many $ as opening
185/// - Content can span multiple lines
186pub fn try_parse_display_math(text: &str) -> Option<(usize, &str)> {
187    // Must start with at least $$
188    if !text.starts_with("$$") {
189        return None;
190    }
191
192    // Count opening dollar signs
193    let opening_count = text.chars().take_while(|&c| c == '$').count();
194    if opening_count < 2 {
195        return None;
196    }
197
198    let rest = &text[opening_count..];
199
200    // Look for matching closing delimiter
201    let mut pos = 0;
202    while pos < rest.len() {
203        let ch = rest[pos..].chars().next()?;
204
205        if ch == '$' {
206            // Check if it's escaped
207            if pos > 0 && rest.as_bytes()[pos - 1] == b'\\' {
208                // Escaped dollar, continue searching
209                pos += ch.len_utf8();
210                continue;
211            }
212
213            // Count closing dollar signs
214            let closing_count = rest[pos..].chars().take_while(|&c| c == '$').count();
215
216            // Must have at least as many closing dollars as opening
217            if closing_count >= opening_count {
218                let math_content = &rest[..pos];
219                let total_len = opening_count + pos + closing_count;
220                return Some((total_len, math_content));
221            }
222
223            // Not enough dollars, skip this run and continue
224            pos += closing_count;
225            continue;
226        }
227
228        pos += ch.len_utf8();
229    }
230
231    // No matching close found
232    None
233}
234
235/// Try to parse single backslash display math: \[...\]
236/// Extension: tex_math_single_backslash
237///
238/// Per Pandoc spec:
239/// - Content can span multiple lines
240/// - No escape handling needed (backslash is the delimiter)
241pub fn try_parse_single_backslash_display_math(text: &str) -> Option<(usize, &str)> {
242    if !text.starts_with(r"\[") {
243        return None;
244    }
245
246    let rest = &text[2..]; // Skip \[
247
248    // Look for closing \]
249    let mut pos = 0;
250    while pos < rest.len() {
251        let ch = rest[pos..].chars().next()?;
252
253        if ch == '\\' && rest[pos..].starts_with(r"\]") {
254            // Found closing \]
255            let math_content = &rest[..pos];
256            let total_len = 2 + pos + 2; // \[ + content + \]
257            return Some((total_len, math_content));
258        }
259
260        pos += ch.len_utf8();
261    }
262
263    None
264}
265
266/// Try to parse double backslash display math: \\[...\\]
267/// Extension: tex_math_double_backslash
268///
269/// Per Pandoc spec:
270/// - Content can span multiple lines
271/// - Double backslash is the delimiter
272pub fn try_parse_double_backslash_display_math(text: &str) -> Option<(usize, &str)> {
273    if !text.starts_with(r"\\[") {
274        return None;
275    }
276
277    let rest = &text[3..]; // Skip \\[
278
279    // Look for closing \\]
280    let mut pos = 0;
281    while pos < rest.len() {
282        let ch = rest[pos..].chars().next()?;
283
284        if ch == '\\' && rest[pos..].starts_with(r"\\]") {
285            // Found closing \\]
286            let math_content = &rest[..pos];
287            let total_len = 3 + pos + 3; // \\[ + content + \\]
288            return Some((total_len, math_content));
289        }
290
291        pos += ch.len_utf8();
292    }
293
294    None
295}
296
297/// Try to parse a LaTeX math environment (\begin{equation}...\end{equation})
298/// as display math. Returns (total_len, begin_marker, content, end_marker).
299pub fn try_parse_math_environment(text: &str) -> Option<(usize, &str, &str, &str)> {
300    let env_name = extract_environment_name(text)?;
301    if !is_inline_math_environment(&env_name) {
302        return None;
303    }
304
305    let begin_marker_len = text.find('}')? + 1;
306    let begin_marker = &text[..begin_marker_len];
307    let end_marker = format!("\\end{{{}}}", env_name);
308
309    let after_begin = &text[begin_marker_len..];
310    let end_rel = after_begin.find(&end_marker)?;
311    let end_start = begin_marker_len + end_rel;
312    let end_marker_end = end_start + end_marker.len();
313
314    let mut end_line_end = end_marker_end;
315    while end_line_end < text.len() {
316        let ch = text[end_line_end..].chars().next()?;
317        if ch == '\n' || ch == '\r' {
318            break;
319        }
320        end_line_end += ch.len_utf8();
321    }
322
323    if end_line_end < text.len() {
324        if text[end_line_end..].starts_with("\r\n") {
325            end_line_end += 2;
326        } else {
327            end_line_end += 1;
328        }
329    }
330
331    let content = &text[begin_marker_len..end_start];
332    let end_marker_text = &text[end_start..end_line_end];
333    Some((end_line_end, begin_marker, content, end_marker_text))
334}
335
336/// Emit an inline math node to the builder.
337pub fn emit_inline_math(builder: &mut GreenNodeBuilder, content: &str) {
338    builder.start_node(SyntaxKind::INLINE_MATH.into());
339
340    // Opening $
341    builder.token(SyntaxKind::INLINE_MATH_MARKER.into(), "$");
342
343    // Math content
344    builder.token(SyntaxKind::TEXT.into(), content);
345
346    // Closing $
347    builder.token(SyntaxKind::INLINE_MATH_MARKER.into(), "$");
348
349    builder.finish_node();
350}
351
352/// Emit a GFM inline math node: $`...`$
353pub fn emit_gfm_inline_math(builder: &mut GreenNodeBuilder, content: &str) {
354    builder.start_node(SyntaxKind::INLINE_MATH.into());
355    builder.token(SyntaxKind::INLINE_MATH_MARKER.into(), "$`");
356    builder.token(SyntaxKind::TEXT.into(), content);
357    builder.token(SyntaxKind::INLINE_MATH_MARKER.into(), "`$");
358    builder.finish_node();
359}
360
361/// Emit a single backslash inline math node: \(...\)
362pub fn emit_single_backslash_inline_math(builder: &mut GreenNodeBuilder, content: &str) {
363    builder.start_node(SyntaxKind::INLINE_MATH.into());
364
365    builder.token(SyntaxKind::INLINE_MATH_MARKER.into(), r"\(");
366    builder.token(SyntaxKind::TEXT.into(), content);
367    builder.token(SyntaxKind::INLINE_MATH_MARKER.into(), r"\)");
368
369    builder.finish_node();
370}
371
372/// Emit a double backslash inline math node: \\(...\\)
373pub fn emit_double_backslash_inline_math(builder: &mut GreenNodeBuilder, content: &str) {
374    builder.start_node(SyntaxKind::INLINE_MATH.into());
375
376    builder.token(SyntaxKind::INLINE_MATH_MARKER.into(), r"\\(");
377    builder.token(SyntaxKind::TEXT.into(), content);
378    builder.token(SyntaxKind::INLINE_MATH_MARKER.into(), r"\\)");
379
380    builder.finish_node();
381}
382
383/// Emit a display math node to the builder (when occurring inline in paragraph).
384pub fn emit_display_math(builder: &mut GreenNodeBuilder, content: &str, dollar_count: usize) {
385    builder.start_node(SyntaxKind::DISPLAY_MATH.into());
386
387    // Opening $$
388    let marker = "$".repeat(dollar_count);
389    builder.token(SyntaxKind::DISPLAY_MATH_MARKER.into(), &marker);
390
391    // Math content
392    builder.token(SyntaxKind::TEXT.into(), content);
393
394    // Closing $$
395    builder.token(SyntaxKind::DISPLAY_MATH_MARKER.into(), &marker);
396
397    builder.finish_node();
398}
399
400/// Emit a display math environment node using raw \begin...\end... markers.
401pub fn emit_display_math_environment(
402    builder: &mut GreenNodeBuilder,
403    begin_marker: &str,
404    content: &str,
405    end_marker: &str,
406) {
407    builder.start_node(SyntaxKind::DISPLAY_MATH.into());
408    builder.token(SyntaxKind::DISPLAY_MATH_MARKER.into(), begin_marker);
409    builder.token(SyntaxKind::TEXT.into(), content);
410    builder.token(SyntaxKind::DISPLAY_MATH_MARKER.into(), end_marker);
411    builder.finish_node();
412}
413
414/// Emit a single backslash display math node: \[...\]
415pub fn emit_single_backslash_display_math(builder: &mut GreenNodeBuilder, content: &str) {
416    builder.start_node(SyntaxKind::DISPLAY_MATH.into());
417
418    builder.token(SyntaxKind::DISPLAY_MATH_MARKER.into(), r"\[");
419    builder.token(SyntaxKind::TEXT.into(), content);
420    builder.token(SyntaxKind::DISPLAY_MATH_MARKER.into(), r"\]");
421
422    builder.finish_node();
423}
424
425/// Emit a double backslash display math node: \\[...\\]
426pub fn emit_double_backslash_display_math(builder: &mut GreenNodeBuilder, content: &str) {
427    builder.start_node(SyntaxKind::DISPLAY_MATH.into());
428
429    builder.token(SyntaxKind::DISPLAY_MATH_MARKER.into(), r"\\[");
430    builder.token(SyntaxKind::TEXT.into(), content);
431    builder.token(SyntaxKind::DISPLAY_MATH_MARKER.into(), r"\\]");
432
433    builder.finish_node();
434}
435
436#[cfg(test)]
437mod tests {
438    use super::*;
439
440    #[test]
441    fn test_parse_simple_inline_math() {
442        let result = try_parse_inline_math("$x = y$");
443        assert_eq!(result, Some((7, "x = y")));
444    }
445
446    #[test]
447    fn test_parse_inline_math_with_spaces_inside() {
448        // Spaces inside math are OK, just not immediately after opening or before closing
449        let result = try_parse_inline_math("$a + b$");
450        assert_eq!(result, Some((7, "a + b")));
451    }
452
453    #[test]
454    fn test_parse_inline_math_complex() {
455        let result = try_parse_inline_math(r"$\frac{1}{2}$");
456        assert_eq!(result, Some((13, r"\frac{1}{2}")));
457    }
458
459    #[test]
460    fn test_not_inline_math_display() {
461        // $$ is display math, not inline
462        let result = try_parse_inline_math("$$x = y$$");
463        assert_eq!(result, None);
464    }
465
466    #[test]
467    fn test_inline_math_no_close() {
468        let result = try_parse_inline_math("$no close");
469        assert_eq!(result, None);
470    }
471
472    #[test]
473    fn test_inline_math_no_multiline() {
474        let result = try_parse_inline_math("$x =\ny$");
475        assert_eq!(result, None);
476    }
477
478    #[test]
479    fn test_not_inline_math() {
480        let result = try_parse_inline_math("no dollar");
481        assert_eq!(result, None);
482    }
483
484    #[test]
485    fn test_inline_math_with_trailing_text() {
486        let result = try_parse_inline_math("$x$ and more");
487        assert_eq!(result, Some((3, "x")));
488    }
489
490    #[test]
491    fn test_spec_opening_must_have_non_space_right() {
492        // Per Pandoc spec: opening $ must have non-space immediately to right
493        let result = try_parse_inline_math("$ x$");
494        assert_eq!(result, None, "Opening $ with space should not parse");
495    }
496
497    #[test]
498    fn test_spec_closing_must_have_non_space_left() {
499        // Per Pandoc spec: closing $ must have non-space immediately to left
500        let result = try_parse_inline_math("$x $");
501        assert_eq!(result, None, "Closing $ with space should not parse");
502    }
503
504    #[test]
505    fn test_spec_closing_not_followed_by_digit() {
506        // Per Pandoc spec: closing $ must not be followed by digit
507        let result = try_parse_inline_math("$x$5");
508        assert_eq!(result, None, "Closing $ followed by digit should not parse");
509    }
510
511    #[test]
512    fn test_spec_dollar_amounts() {
513        // $20,000 should not parse as math
514        let result = try_parse_inline_math("$20,000");
515        assert_eq!(result, None, "Dollar amounts should not parse as math");
516    }
517
518    #[test]
519    fn test_valid_math_after_spec_checks() {
520        // $x$ alone should still parse
521        let result = try_parse_inline_math("$x$");
522        assert_eq!(result, Some((3, "x")), "Valid math should parse");
523    }
524
525    #[test]
526    fn test_math_followed_by_non_digit() {
527        // $x$a should parse (not followed by digit)
528        let result = try_parse_inline_math("$x$a");
529        assert_eq!(
530            result,
531            Some((3, "x")),
532            "Math followed by non-digit should parse"
533        );
534    }
535
536    // Display math tests
537    #[test]
538    fn test_parse_display_math_simple() {
539        let result = try_parse_display_math("$$x = y$$");
540        assert_eq!(result, Some((9, "x = y")));
541    }
542
543    #[test]
544    fn test_parse_display_math_multiline() {
545        let result = try_parse_display_math("$$\nx = y\n$$");
546        assert_eq!(result, Some((11, "\nx = y\n")));
547    }
548
549    #[test]
550    fn test_parse_display_math_triple_dollars() {
551        let result = try_parse_display_math("$$$x = y$$$");
552        assert_eq!(result, Some((11, "x = y")));
553    }
554
555    #[test]
556    fn test_parse_display_math_no_close() {
557        let result = try_parse_display_math("$$no close");
558        assert_eq!(result, None);
559    }
560
561    #[test]
562    fn test_not_display_math() {
563        let result = try_parse_display_math("$single dollar");
564        assert_eq!(result, None);
565    }
566
567    #[test]
568    fn test_display_math_with_trailing_text() {
569        let result = try_parse_display_math("$$x = y$$ and more");
570        assert_eq!(result, Some((9, "x = y")));
571    }
572
573    // Single backslash math tests
574    #[test]
575    fn test_single_backslash_inline_math() {
576        let result = try_parse_single_backslash_inline_math(r"\(x^2\)");
577        assert_eq!(result, Some((7, "x^2")));
578    }
579
580    #[test]
581    fn test_single_backslash_inline_math_complex() {
582        let result = try_parse_single_backslash_inline_math(r"\(\frac{a}{b}\)");
583        assert_eq!(result, Some((15, r"\frac{a}{b}")));
584    }
585
586    #[test]
587    fn test_single_backslash_inline_math_no_close() {
588        let result = try_parse_single_backslash_inline_math(r"\(no close");
589        assert_eq!(result, None);
590    }
591
592    #[test]
593    fn test_single_backslash_inline_math_no_multiline() {
594        let result = try_parse_single_backslash_inline_math("\\(x =\ny\\)");
595        assert_eq!(result, None);
596    }
597
598    #[test]
599    fn test_single_backslash_display_math() {
600        let result = try_parse_single_backslash_display_math(r"\[E = mc^2\]");
601        assert_eq!(result, Some((12, "E = mc^2")));
602    }
603
604    #[test]
605    fn test_single_backslash_display_math_multiline() {
606        let result = try_parse_single_backslash_display_math("\\[\nx = y\n\\]");
607        assert_eq!(result, Some((11, "\nx = y\n")));
608    }
609
610    #[test]
611    fn test_single_backslash_display_math_no_close() {
612        let result = try_parse_single_backslash_display_math(r"\[no close");
613        assert_eq!(result, None);
614    }
615
616    // Double backslash math tests
617    #[test]
618    fn test_double_backslash_inline_math() {
619        let result = try_parse_double_backslash_inline_math(r"\\(x^2\\)");
620        assert_eq!(result, Some((9, "x^2")));
621    }
622
623    #[test]
624    fn test_double_backslash_inline_math_complex() {
625        let result = try_parse_double_backslash_inline_math(r"\\(\alpha + \beta\\)");
626        assert_eq!(result, Some((20, r"\alpha + \beta")));
627    }
628
629    #[test]
630    fn test_double_backslash_inline_math_no_close() {
631        let result = try_parse_double_backslash_inline_math(r"\\(no close");
632        assert_eq!(result, None);
633    }
634
635    #[test]
636    fn test_double_backslash_inline_math_no_multiline() {
637        let result = try_parse_double_backslash_inline_math("\\\\(x =\ny\\\\)");
638        assert_eq!(result, None);
639    }
640
641    #[test]
642    fn test_double_backslash_display_math() {
643        let result = try_parse_double_backslash_display_math(r"\\[E = mc^2\\]");
644        assert_eq!(result, Some((14, "E = mc^2")));
645    }
646
647    #[test]
648    fn test_double_backslash_display_math_multiline() {
649        let result = try_parse_double_backslash_display_math("\\\\[\nx = y\n\\\\]");
650        assert_eq!(result, Some((13, "\nx = y\n")));
651    }
652
653    #[test]
654    fn test_double_backslash_display_math_no_close() {
655        let result = try_parse_double_backslash_display_math(r"\\[no close");
656        assert_eq!(result, None);
657    }
658
659    // Additional edge case tests
660    #[test]
661    fn test_display_math_escaped_dollar() {
662        // Escaped dollar should be skipped
663        let result = try_parse_display_math(r"$$a = \$100$$");
664        assert_eq!(result, Some((13, r"a = \$100")));
665    }
666
667    #[test]
668    fn test_display_math_with_content_on_fence_line() {
669        // Content can appear on same line as opening delimiter
670        let result = try_parse_display_math("$$x = y\n$$");
671        assert_eq!(result, Some((10, "x = y\n")));
672    }
673}