Skip to main content

panache_parser/parser/inlines/
superscript.rs

1//! Parsing for superscript (^text^)
2//!
3//! This is a Pandoc extension.
4//! Syntax: ^text^ produces superscript text.
5//!
6//! Rules:
7//! - Must have exactly 1 caret on each side
8//! - Content cannot be empty
9//! - Carets cannot have whitespace immediately inside
10//! - Must not be confused with ^[...] (inline footnotes)
11
12use super::core::parse_inline_text;
13use crate::options::ParserOptions;
14use crate::syntax::SyntaxKind;
15use rowan::GreenNodeBuilder;
16
17/// Try to parse superscript (^text^)
18/// Returns: (total_len, inner_content)
19pub fn try_parse_superscript(text: &str) -> Option<(usize, &str)> {
20    let bytes = text.as_bytes();
21
22    // Must start with ^
23    if bytes.is_empty() || bytes[0] != b'^' {
24        return None;
25    }
26
27    // Check that it's not ^[ (inline footnote)
28    if bytes.len() > 1 && bytes[1] == b'[' {
29        return None;
30    }
31
32    // Content cannot start with whitespace
33    if bytes.len() > 1 && bytes[1].is_ascii_whitespace() {
34        return None;
35    }
36
37    // Find the closing ^
38    let mut pos = 1;
39    let mut found_close = false;
40
41    while pos < bytes.len() {
42        if bytes[pos] == b'^' {
43            found_close = true;
44            break;
45        }
46        pos += 1;
47    }
48
49    if !found_close {
50        return None;
51    }
52
53    // Extract content between the delimiters
54    let content = &text[1..pos];
55
56    // Content cannot be empty or only whitespace
57    if content.trim().is_empty() {
58        return None;
59    }
60
61    // Content cannot end with whitespace
62    if content.ends_with(char::is_whitespace) {
63        return None;
64    }
65
66    let total_len = pos + 1; // Include closing ^
67    Some((total_len, content))
68}
69
70/// Emit a superscript node with its content
71pub fn emit_superscript(builder: &mut GreenNodeBuilder, inner_text: &str, config: &ParserOptions) {
72    builder.start_node(SyntaxKind::SUPERSCRIPT.into());
73
74    // Opening marker
75    builder.start_node(SyntaxKind::SUPERSCRIPT_MARKER.into());
76    builder.token(SyntaxKind::SUPERSCRIPT_MARKER.into(), "^");
77    builder.finish_node();
78
79    // Parse inner content recursively for nested inline elements
80    parse_inline_text(builder, inner_text, config, false);
81
82    // Closing marker
83    builder.start_node(SyntaxKind::SUPERSCRIPT_MARKER.into());
84    builder.token(SyntaxKind::SUPERSCRIPT_MARKER.into(), "^");
85    builder.finish_node();
86
87    builder.finish_node();
88}
89
90#[cfg(test)]
91mod tests {
92    use super::*;
93
94    #[test]
95    fn test_simple_superscript() {
96        assert_eq!(try_parse_superscript("^2^"), Some((3, "2")));
97        assert_eq!(try_parse_superscript("^nd^"), Some((4, "nd")));
98    }
99
100    #[test]
101    fn test_superscript_with_multiple_chars() {
102        assert_eq!(try_parse_superscript("^(tm)^"), Some((6, "(tm)")));
103        assert_eq!(try_parse_superscript("^text^"), Some((6, "text")));
104    }
105
106    #[test]
107    fn test_no_whitespace_inside_delimiters() {
108        // Content cannot start with whitespace
109        assert_eq!(try_parse_superscript("^ text^"), None);
110
111        // Content cannot end with whitespace
112        assert_eq!(try_parse_superscript("^text ^"), None);
113    }
114
115    #[test]
116    fn test_empty_content() {
117        assert_eq!(try_parse_superscript("^^"), None);
118        assert_eq!(try_parse_superscript("^ ^"), None);
119    }
120
121    #[test]
122    fn test_no_closing() {
123        assert_eq!(try_parse_superscript("^text"), None);
124        assert_eq!(try_parse_superscript("^hello world"), None);
125    }
126
127    #[test]
128    fn test_not_confused_with_inline_footnote() {
129        // ^[ should not be parsed as superscript
130        assert_eq!(try_parse_superscript("^[footnote]"), None);
131    }
132
133    #[test]
134    fn test_superscript_with_other_content_after() {
135        assert_eq!(try_parse_superscript("^2^ text"), Some((3, "2")));
136        assert_eq!(try_parse_superscript("^nd^ of the month"), Some((4, "nd")));
137    }
138
139    #[test]
140    fn test_spaces_inside_are_ok() {
141        assert_eq!(
142            try_parse_superscript("^some text^"),
143            Some((11, "some text"))
144        );
145    }
146
147    #[test]
148    fn test_single_char() {
149        assert_eq!(try_parse_superscript("^a^"), Some((3, "a")));
150    }
151}