Skip to main content

panache_parser/parser/inlines/
native_spans.rs

1//! Native span parsing for Pandoc's `native_spans` extension.
2//!
3//! Syntax: `<span class="foo">content</span>`
4//!
5//! When the `native_spans` extension is enabled, HTML `<span>` tags are
6//! treated as native Pandoc Span elements instead of raw HTML.
7
8use crate::options::ParserOptions;
9use crate::syntax::SyntaxKind;
10use rowan::GreenNodeBuilder;
11
12use super::core::parse_inline_text;
13
14/// Try to parse a native HTML span starting at the current position.
15/// Returns Some((length, content, attributes)) if successful.
16///
17/// Native spans have the form: <span attrs...>content</span>
18/// The content can contain markdown that will be parsed recursively.
19pub(crate) fn try_parse_native_span(text: &str) -> Option<(usize, &str, String)> {
20    let bytes = text.as_bytes();
21
22    // Must start with <span
23    if !text.starts_with("<span") {
24        return None;
25    }
26
27    let mut pos = 5; // After "<span"
28
29    // Next char must be space, >, or end of tag
30    if pos >= text.len() {
31        return None;
32    }
33
34    let next_char = bytes[pos] as char;
35    if !matches!(next_char, ' ' | '\t' | '\n' | '\r' | '>') {
36        // Could be <spanx> or something else, not a span tag
37        return None;
38    }
39
40    // Parse attributes until we find >
41    let attr_start = pos;
42    while pos < text.len() && bytes[pos] != b'>' {
43        // Handle quoted attributes
44        if bytes[pos] == b'"' || bytes[pos] == b'\'' {
45            let quote = bytes[pos];
46            pos += 1;
47            // Skip until closing quote
48            while pos < text.len() && bytes[pos] != quote {
49                if bytes[pos] == b'\\' {
50                    pos += 2; // Skip escaped character
51                } else {
52                    pos += 1;
53                }
54            }
55            if pos < text.len() {
56                pos += 1; // Skip closing quote
57            }
58        } else {
59            pos += 1;
60        }
61    }
62
63    if pos >= text.len() {
64        // No closing > found
65        return None;
66    }
67
68    // Extract attributes
69    let attributes = text[attr_start..pos].trim().to_string();
70
71    // Skip the >
72    pos += 1;
73
74    // Now find the closing </span>
75    let content_start = pos;
76    let mut depth = 1;
77
78    while pos < text.len() && depth > 0 {
79        // Check for nested <span>
80        if text[pos..].starts_with("<span") {
81            // Make sure it's actually a span tag (space or > follows)
82            let check_pos = pos + 5;
83            if check_pos < text.len() {
84                let ch = bytes[check_pos] as char;
85                if matches!(ch, ' ' | '\t' | '\n' | '\r' | '>') {
86                    depth += 1;
87                    pos += 5;
88                    continue;
89                }
90            }
91        }
92
93        // Check for closing </span>
94        if text[pos..].starts_with("</span>") {
95            depth -= 1;
96            if depth == 0 {
97                // Found the matching closing tag
98                let content = &text[content_start..pos];
99                let total_len = pos + 7; // Include </span>
100                return Some((total_len, content, attributes));
101            }
102            pos += 7;
103            continue;
104        }
105
106        pos += 1;
107    }
108
109    // No matching closing tag found
110    None
111}
112
113/// Emit a native span node to the builder.
114pub(crate) fn emit_native_span(
115    builder: &mut GreenNodeBuilder,
116    content: &str,
117    attributes: &str,
118    config: &ParserOptions,
119) {
120    builder.start_node(SyntaxKind::BRACKETED_SPAN.into());
121
122    // Opening tag
123    builder.token(SyntaxKind::SPAN_BRACKET_OPEN.into(), "<span");
124    if !attributes.is_empty() {
125        // Add space before attributes
126        builder.token(SyntaxKind::WHITESPACE.into(), " ");
127        builder.token(SyntaxKind::SPAN_ATTRIBUTES.into(), attributes);
128    }
129    builder.token(SyntaxKind::SPAN_BRACKET_OPEN.into(), ">");
130
131    // Parse the content recursively for inline markdown
132    builder.start_node(SyntaxKind::SPAN_CONTENT.into());
133    parse_inline_text(builder, content, config, false);
134    builder.finish_node();
135
136    // Closing tag
137    builder.token(SyntaxKind::SPAN_BRACKET_CLOSE.into(), "</span>");
138
139    builder.finish_node();
140}
141
142#[cfg(test)]
143mod tests {
144    use super::*;
145
146    #[test]
147    fn test_parse_simple_span() {
148        let result = try_parse_native_span("<span>text</span>");
149        assert_eq!(result, Some((17, "text", String::new())));
150    }
151
152    #[test]
153    fn test_parse_span_with_class() {
154        let result = try_parse_native_span(r#"<span class="foo">text</span>"#);
155        assert_eq!(result, Some((29, "text", r#"class="foo""#.to_string())));
156    }
157
158    #[test]
159    fn test_parse_span_with_id() {
160        let result = try_parse_native_span(r#"<span id="bar">text</span>"#);
161        assert_eq!(result, Some((26, "text", r#"id="bar""#.to_string())));
162    }
163
164    #[test]
165    fn test_parse_span_with_multiple_attrs() {
166        let result = try_parse_native_span(r#"<span id="x" class="y z">text</span>"#);
167        assert_eq!(
168            result,
169            Some((36, "text", r#"id="x" class="y z""#.to_string()))
170        );
171    }
172
173    #[test]
174    fn test_parse_span_with_markdown() {
175        let result = try_parse_native_span("<span>*emphasis* and `code`</span>");
176        assert_eq!(result, Some((34, "*emphasis* and `code`", String::new())));
177    }
178
179    #[test]
180    fn test_parse_nested_spans() {
181        let result = try_parse_native_span("<span>outer <span>inner</span> text</span>");
182        assert_eq!(
183            result,
184            Some((42, "outer <span>inner</span> text", String::new()))
185        );
186    }
187
188    #[test]
189    fn test_parse_span_with_newlines_in_content() {
190        let result = try_parse_native_span("<span>line 1\nline 2</span>");
191        assert_eq!(result, Some((26, "line 1\nline 2", String::new())));
192    }
193
194    #[test]
195    fn test_not_span_no_closing_tag() {
196        let result = try_parse_native_span("<span>text");
197        assert_eq!(result, None);
198    }
199
200    #[test]
201    fn test_not_span_wrong_tag() {
202        let result = try_parse_native_span("<spanx>text</spanx>");
203        assert_eq!(result, None);
204    }
205
206    #[test]
207    fn test_not_span_no_space_after() {
208        // <spanner> should not be parsed as <span>
209        let result = try_parse_native_span("<spanner>text</spanner>");
210        assert_eq!(result, None);
211    }
212
213    #[test]
214    fn test_parse_span_with_quoted_attrs_containing_gt() {
215        let result = try_parse_native_span(r#"<span title="a > b">text</span>"#);
216        assert_eq!(result, Some((31, "text", r#"title="a > b""#.to_string())));
217    }
218
219    #[test]
220    fn test_parse_empty_span() {
221        let result = try_parse_native_span("<span></span>");
222        assert_eq!(result, Some((13, "", String::new())));
223    }
224
225    #[test]
226    fn test_parse_span_trailing_text() {
227        let result = try_parse_native_span("<span>text</span> more");
228        assert_eq!(result, Some((17, "text", String::new())));
229    }
230}