Skip to main content

panache_parser/parser/inlines/
superscript.rs

1//! Parsing for superscript (^text^)
2//!
3//! This is a Pandoc extension.
4//! Syntax: ^text^ produces superscript text.
5//!
6//! Rules:
7//! - Must have exactly 1 caret on each side
8//! - Content cannot be empty
9//! - Carets cannot have whitespace immediately inside
10//! - Must not be confused with ^[...] (inline footnotes)
11
12use super::core::parse_inline_text;
13use crate::options::ParserOptions;
14use crate::syntax::SyntaxKind;
15use rowan::GreenNodeBuilder;
16
17/// Try to parse superscript (^text^)
18/// Returns: (total_len, inner_content)
19pub fn try_parse_superscript(text: &str) -> Option<(usize, &str)> {
20    let bytes = text.as_bytes();
21
22    // Must start with ^
23    if bytes.is_empty() || bytes[0] != b'^' {
24        return None;
25    }
26
27    // Check that it's not ^[ (inline footnote)
28    if bytes.len() > 1 && bytes[1] == b'[' {
29        return None;
30    }
31
32    // Content cannot start with whitespace
33    if bytes.len() > 1 && bytes[1].is_ascii_whitespace() {
34        return None;
35    }
36
37    // Find the closing ^
38    let mut pos = 1;
39    let mut found_close = false;
40
41    while pos < bytes.len() {
42        if bytes[pos] == b'^' {
43            found_close = true;
44            break;
45        }
46        pos += 1;
47    }
48
49    if !found_close {
50        return None;
51    }
52
53    // Extract content between the delimiters
54    let content = &text[1..pos];
55
56    // Content cannot be empty or only whitespace
57    if content.trim().is_empty() {
58        return None;
59    }
60
61    // Content cannot end with whitespace
62    if content.ends_with(char::is_whitespace) {
63        return None;
64    }
65
66    // Pandoc rule: superscripted text cannot contain unescaped whitespace.
67    // To include a space, source must escape it as `\ `. Verified against
68    // `pandoc -f markdown` for `^x y^` → not a superscript, `^x\ y^` →
69    // Superscript with NBSP-joined content.
70    if contains_unescaped_whitespace(content) {
71        return None;
72    }
73
74    let total_len = pos + 1; // Include closing ^
75    Some((total_len, content))
76}
77
78fn contains_unescaped_whitespace(content: &str) -> bool {
79    let bytes = content.as_bytes();
80    let mut i = 0;
81    while i < bytes.len() {
82        let b = bytes[i];
83        if b == b'\\' && i + 1 < bytes.len() {
84            i += 2;
85            continue;
86        }
87        if (b as char).is_whitespace() {
88            return true;
89        }
90        i += 1;
91    }
92    false
93}
94
95/// Emit a superscript node with its content
96pub fn emit_superscript(
97    builder: &mut GreenNodeBuilder,
98    inner_text: &str,
99    config: &ParserOptions,
100    suppress_footnote_refs: bool,
101) {
102    builder.start_node(SyntaxKind::SUPERSCRIPT.into());
103
104    // Opening marker
105    builder.start_node(SyntaxKind::SUPERSCRIPT_MARKER.into());
106    builder.token(SyntaxKind::SUPERSCRIPT_MARKER.into(), "^");
107    builder.finish_node();
108
109    // Parse inner content recursively for nested inline elements
110    parse_inline_text(builder, inner_text, config, false, suppress_footnote_refs);
111
112    // Closing marker
113    builder.start_node(SyntaxKind::SUPERSCRIPT_MARKER.into());
114    builder.token(SyntaxKind::SUPERSCRIPT_MARKER.into(), "^");
115    builder.finish_node();
116
117    builder.finish_node();
118}
119
120#[cfg(test)]
121mod tests {
122    use super::*;
123
124    #[test]
125    fn test_simple_superscript() {
126        assert_eq!(try_parse_superscript("^2^"), Some((3, "2")));
127        assert_eq!(try_parse_superscript("^nd^"), Some((4, "nd")));
128    }
129
130    #[test]
131    fn test_superscript_with_multiple_chars() {
132        assert_eq!(try_parse_superscript("^(tm)^"), Some((6, "(tm)")));
133        assert_eq!(try_parse_superscript("^text^"), Some((6, "text")));
134    }
135
136    #[test]
137    fn test_no_whitespace_inside_delimiters() {
138        // Content cannot start with whitespace
139        assert_eq!(try_parse_superscript("^ text^"), None);
140
141        // Content cannot end with whitespace
142        assert_eq!(try_parse_superscript("^text ^"), None);
143    }
144
145    #[test]
146    fn test_empty_content() {
147        assert_eq!(try_parse_superscript("^^"), None);
148        assert_eq!(try_parse_superscript("^ ^"), None);
149    }
150
151    #[test]
152    fn test_no_closing() {
153        assert_eq!(try_parse_superscript("^text"), None);
154        assert_eq!(try_parse_superscript("^hello world"), None);
155    }
156
157    #[test]
158    fn test_not_confused_with_inline_footnote() {
159        // ^[ should not be parsed as superscript
160        assert_eq!(try_parse_superscript("^[footnote]"), None);
161    }
162
163    #[test]
164    fn test_superscript_with_other_content_after() {
165        assert_eq!(try_parse_superscript("^2^ text"), Some((3, "2")));
166        assert_eq!(try_parse_superscript("^nd^ of the month"), Some((4, "nd")));
167    }
168
169    #[test]
170    fn test_internal_whitespace_rejected() {
171        // Pandoc rejects unescaped internal whitespace in superscripts;
172        // backslash-escaped spaces are accepted.
173        assert_eq!(try_parse_superscript("^some text^"), None);
174        assert_eq!(
175            try_parse_superscript("^some\\ text^"),
176            Some((12, "some\\ text"))
177        );
178    }
179
180    #[test]
181    fn test_single_char() {
182        assert_eq!(try_parse_superscript("^a^"), Some((3, "a")));
183    }
184}