Skip to main content

panache_parser/parser/inlines/
superscript.rs

1//! Parsing for superscript (^text^)
2//!
3//! This is a Pandoc extension.
4//! Syntax: ^text^ produces superscript text.
5//!
6//! Rules:
7//! - Must have exactly 1 caret on each side
8//! - Content cannot be empty
9//! - Carets cannot have whitespace immediately inside
10//! - Must not be confused with ^[...] (inline footnotes)
11
12use super::core::parse_inline_text;
13use crate::options::ParserOptions;
14use crate::syntax::SyntaxKind;
15use rowan::GreenNodeBuilder;
16
17/// Try to parse superscript (^text^)
18/// Returns: (total_len, inner_content)
19pub fn try_parse_superscript(text: &str) -> Option<(usize, &str)> {
20    let bytes = text.as_bytes();
21
22    // Must start with ^
23    if bytes.is_empty() || bytes[0] != b'^' {
24        return None;
25    }
26
27    // Check that it's not ^[ (inline footnote)
28    if bytes.len() > 1 && bytes[1] == b'[' {
29        return None;
30    }
31
32    // Content cannot start with whitespace
33    if bytes.len() > 1 && bytes[1].is_ascii_whitespace() {
34        return None;
35    }
36
37    // Find the closing ^
38    let mut pos = 1;
39    let mut found_close = false;
40
41    while pos < bytes.len() {
42        if bytes[pos] == b'^' {
43            found_close = true;
44            break;
45        }
46        pos += 1;
47    }
48
49    if !found_close {
50        return None;
51    }
52
53    // Extract content between the delimiters
54    let content = &text[1..pos];
55
56    // Content cannot be empty or only whitespace
57    if content.trim().is_empty() {
58        return None;
59    }
60
61    // Content cannot end with whitespace
62    if content.ends_with(char::is_whitespace) {
63        return None;
64    }
65
66    // Pandoc rule: superscripted text cannot contain unescaped whitespace.
67    // To include a space, source must escape it as `\ `. Verified against
68    // `pandoc -f markdown` for `^x y^` → not a superscript, `^x\ y^` →
69    // Superscript with NBSP-joined content.
70    if contains_unescaped_whitespace(content) {
71        return None;
72    }
73
74    let total_len = pos + 1; // Include closing ^
75    Some((total_len, content))
76}
77
78fn contains_unescaped_whitespace(content: &str) -> bool {
79    let bytes = content.as_bytes();
80    let mut i = 0;
81    while i < bytes.len() {
82        let b = bytes[i];
83        if b == b'\\' && i + 1 < bytes.len() {
84            i += 2;
85            continue;
86        }
87        if (b as char).is_whitespace() {
88            return true;
89        }
90        i += 1;
91    }
92    false
93}
94
95/// Emit a superscript node with its content
96pub fn emit_superscript(builder: &mut GreenNodeBuilder, inner_text: &str, config: &ParserOptions) {
97    builder.start_node(SyntaxKind::SUPERSCRIPT.into());
98
99    // Opening marker
100    builder.start_node(SyntaxKind::SUPERSCRIPT_MARKER.into());
101    builder.token(SyntaxKind::SUPERSCRIPT_MARKER.into(), "^");
102    builder.finish_node();
103
104    // Parse inner content recursively for nested inline elements
105    parse_inline_text(builder, inner_text, config, false);
106
107    // Closing marker
108    builder.start_node(SyntaxKind::SUPERSCRIPT_MARKER.into());
109    builder.token(SyntaxKind::SUPERSCRIPT_MARKER.into(), "^");
110    builder.finish_node();
111
112    builder.finish_node();
113}
114
115#[cfg(test)]
116mod tests {
117    use super::*;
118
119    #[test]
120    fn test_simple_superscript() {
121        assert_eq!(try_parse_superscript("^2^"), Some((3, "2")));
122        assert_eq!(try_parse_superscript("^nd^"), Some((4, "nd")));
123    }
124
125    #[test]
126    fn test_superscript_with_multiple_chars() {
127        assert_eq!(try_parse_superscript("^(tm)^"), Some((6, "(tm)")));
128        assert_eq!(try_parse_superscript("^text^"), Some((6, "text")));
129    }
130
131    #[test]
132    fn test_no_whitespace_inside_delimiters() {
133        // Content cannot start with whitespace
134        assert_eq!(try_parse_superscript("^ text^"), None);
135
136        // Content cannot end with whitespace
137        assert_eq!(try_parse_superscript("^text ^"), None);
138    }
139
140    #[test]
141    fn test_empty_content() {
142        assert_eq!(try_parse_superscript("^^"), None);
143        assert_eq!(try_parse_superscript("^ ^"), None);
144    }
145
146    #[test]
147    fn test_no_closing() {
148        assert_eq!(try_parse_superscript("^text"), None);
149        assert_eq!(try_parse_superscript("^hello world"), None);
150    }
151
152    #[test]
153    fn test_not_confused_with_inline_footnote() {
154        // ^[ should not be parsed as superscript
155        assert_eq!(try_parse_superscript("^[footnote]"), None);
156    }
157
158    #[test]
159    fn test_superscript_with_other_content_after() {
160        assert_eq!(try_parse_superscript("^2^ text"), Some((3, "2")));
161        assert_eq!(try_parse_superscript("^nd^ of the month"), Some((4, "nd")));
162    }
163
164    #[test]
165    fn test_internal_whitespace_rejected() {
166        // Pandoc rejects unescaped internal whitespace in superscripts;
167        // backslash-escaped spaces are accepted.
168        assert_eq!(try_parse_superscript("^some text^"), None);
169        assert_eq!(
170            try_parse_superscript("^some\\ text^"),
171            Some((12, "some\\ text"))
172        );
173    }
174
175    #[test]
176    fn test_single_char() {
177        assert_eq!(try_parse_superscript("^a^"), Some((3, "a")));
178    }
179}