Skip to main content

panache_parser/parser/inlines/
subscript.rs

1//! Parsing for subscript (~text~)
2//!
3//! This is a Pandoc extension.
4//! Syntax: ~text~ produces subscript text.
5//!
6//! Rules:
7//! - Must have exactly 1 tilde on each side
8//! - Content cannot be empty
9//! - Tildes cannot have whitespace immediately inside
10//! - Must not be confused with ~~ (strikeout)
11
12use super::core::parse_inline_text;
13use crate::options::ParserOptions;
14use crate::syntax::SyntaxKind;
15use rowan::GreenNodeBuilder;
16
17/// Try to parse subscript (~text~)
18/// Returns: (total_len, inner_content)
19pub fn try_parse_subscript(text: &str) -> Option<(usize, &str)> {
20    let bytes = text.as_bytes();
21
22    // Must start with ~
23    if bytes.is_empty() || bytes[0] != b'~' {
24        return None;
25    }
26
27    // Check that it's not ~~ (strikeout)
28    if bytes.len() > 1 && bytes[1] == b'~' {
29        return None;
30    }
31
32    // Content cannot start with whitespace
33    if bytes.len() > 1 && bytes[1].is_ascii_whitespace() {
34        return None;
35    }
36
37    // Find the closing ~
38    let mut pos = 1;
39    let mut found_close = false;
40
41    while pos < bytes.len() {
42        if bytes[pos] == b'~' {
43            // Make sure it's not part of ~~
44            if pos + 1 < bytes.len() && bytes[pos + 1] == b'~' {
45                return None;
46            }
47            found_close = true;
48            break;
49        }
50        pos += 1;
51    }
52
53    if !found_close {
54        return None;
55    }
56
57    // Extract content between the delimiters
58    let content = &text[1..pos];
59
60    // Content cannot be empty or only whitespace
61    if content.trim().is_empty() {
62        return None;
63    }
64
65    // Content cannot end with whitespace
66    if content.ends_with(char::is_whitespace) {
67        return None;
68    }
69
70    let total_len = pos + 1; // Include closing ~
71    Some((total_len, content))
72}
73
74/// Emit a subscript node with its content
75pub fn emit_subscript(builder: &mut GreenNodeBuilder, inner_text: &str, config: &ParserOptions) {
76    builder.start_node(SyntaxKind::SUBSCRIPT.into());
77
78    // Opening marker
79    builder.start_node(SyntaxKind::SUBSCRIPT_MARKER.into());
80    builder.token(SyntaxKind::SUBSCRIPT_MARKER.into(), "~");
81    builder.finish_node();
82
83    // Parse inner content recursively for nested inline elements
84    parse_inline_text(builder, inner_text, config, false);
85
86    // Closing marker
87    builder.start_node(SyntaxKind::SUBSCRIPT_MARKER.into());
88    builder.token(SyntaxKind::SUBSCRIPT_MARKER.into(), "~");
89    builder.finish_node();
90
91    builder.finish_node();
92}
93
94#[cfg(test)]
95mod tests {
96    use super::*;
97
98    #[test]
99    fn test_simple_subscript() {
100        assert_eq!(try_parse_subscript("~2~"), Some((3, "2")));
101        assert_eq!(try_parse_subscript("~n~"), Some((3, "n")));
102    }
103
104    #[test]
105    fn test_subscript_with_multiple_chars() {
106        assert_eq!(try_parse_subscript("~text~"), Some((6, "text")));
107        assert_eq!(try_parse_subscript("~i+1~"), Some((5, "i+1")));
108    }
109
110    #[test]
111    fn test_no_whitespace_inside_delimiters() {
112        // Content cannot start with whitespace
113        assert_eq!(try_parse_subscript("~ text~"), None);
114
115        // Content cannot end with whitespace
116        assert_eq!(try_parse_subscript("~text ~"), None);
117    }
118
119    #[test]
120    fn test_empty_content() {
121        assert_eq!(try_parse_subscript("~~"), None);
122        assert_eq!(try_parse_subscript("~ ~"), None);
123    }
124
125    #[test]
126    fn test_no_closing() {
127        assert_eq!(try_parse_subscript("~text"), None);
128        assert_eq!(try_parse_subscript("~hello world"), None);
129    }
130
131    #[test]
132    fn test_not_confused_with_strikeout() {
133        // ~~ should not be parsed as subscript
134        assert_eq!(try_parse_subscript("~~text~~"), None);
135    }
136
137    #[test]
138    fn test_subscript_with_other_content_after() {
139        assert_eq!(try_parse_subscript("~2~ text"), Some((3, "2")));
140        assert_eq!(try_parse_subscript("~n~ of sequence"), Some((3, "n")));
141    }
142
143    #[test]
144    fn test_spaces_inside_are_ok() {
145        assert_eq!(try_parse_subscript("~some text~"), Some((11, "some text")));
146    }
147
148    #[test]
149    fn test_single_char() {
150        assert_eq!(try_parse_subscript("~a~"), Some((3, "a")));
151    }
152
153    #[test]
154    fn test_subscript_before_strikeout_marker() {
155        // If there's a subscript followed by another ~, it should work
156        assert_eq!(try_parse_subscript("~x~ ~"), Some((3, "x")));
157    }
158}