panache_parser/parser/inlines/
native_spans.rs1use crate::options::{Dialect, ParserOptions};
9use crate::parser::utils::attributes::{emit_html_attrs_node, emit_html_span_attributes_node};
10use crate::syntax::SyntaxKind;
11use rowan::GreenNodeBuilder;
12
13use super::core::parse_inline_text;
14
15pub(crate) fn try_parse_native_span(text: &str) -> Option<(usize, &str, String)> {
21 let bytes = text.as_bytes();
22
23 if !text.starts_with("<span") {
25 return None;
26 }
27
28 let mut pos = 5; if pos >= text.len() {
32 return None;
33 }
34
35 let next_char = bytes[pos] as char;
36 if !matches!(next_char, ' ' | '\t' | '\n' | '\r' | '>') {
37 return None;
39 }
40
41 let attr_start = pos;
43 while pos < text.len() && bytes[pos] != b'>' {
44 if bytes[pos] == b'"' || bytes[pos] == b'\'' {
46 let quote = bytes[pos];
47 pos += 1;
48 while pos < text.len() && bytes[pos] != quote {
50 if bytes[pos] == b'\\' {
51 pos += 2; } else {
53 pos += 1;
54 }
55 }
56 if pos < text.len() {
57 pos += 1; }
59 } else {
60 pos += 1;
61 }
62 }
63
64 if pos >= text.len() {
65 return None;
67 }
68
69 let attributes = text[attr_start..pos].trim().to_string();
71
72 pos += 1;
74
75 let content_start = pos;
77 let mut depth = 1;
78
79 while pos < text.len() && depth > 0 {
80 if bytes
82 .get(pos..)
83 .is_some_and(|tail| tail.starts_with(b"<span"))
84 {
85 let check_pos = pos + 5;
87 if check_pos < text.len() {
88 let ch = bytes[check_pos] as char;
89 if matches!(ch, ' ' | '\t' | '\n' | '\r' | '>') {
90 depth += 1;
91 pos += 5;
92 continue;
93 }
94 }
95 }
96
97 if bytes
99 .get(pos..)
100 .is_some_and(|tail| tail.starts_with(b"</span>"))
101 {
102 depth -= 1;
103 if depth == 0 {
104 let content = &text[content_start..pos];
106 let total_len = pos + 7; return Some((total_len, content, attributes));
108 }
109 pos += 7;
110 continue;
111 }
112
113 pos += text[pos..].chars().next().map_or(1, char::len_utf8);
116 }
117
118 None
120}
121
122pub(crate) fn emit_native_span(
132 builder: &mut GreenNodeBuilder,
133 raw: &str,
134 content: &str,
135 config: &ParserOptions,
136 suppress_footnote_refs: bool,
137) {
138 let close_tag = "</span>";
139 let open_tag_end = raw.len().saturating_sub(content.len() + close_tag.len());
140 let open_tag = &raw[..open_tag_end];
141
142 if config.dialect == Dialect::Pandoc {
143 builder.start_node(SyntaxKind::INLINE_HTML_SPAN.into());
144 emit_span_open_tag_tokens(builder, open_tag);
145 builder.start_node(SyntaxKind::SPAN_CONTENT.into());
146 parse_inline_text(builder, content, config, false, suppress_footnote_refs);
147 builder.finish_node();
148 builder.token(SyntaxKind::SPAN_BRACKET_CLOSE.into(), close_tag);
149 builder.finish_node();
150 return;
151 }
152
153 let attrs_text = open_tag
158 .strip_prefix("<span")
159 .and_then(|s| s.strip_suffix('>'))
160 .map(str::trim)
161 .unwrap_or("");
162 builder.start_node(SyntaxKind::BRACKETED_SPAN.into());
163 builder.token(SyntaxKind::SPAN_BRACKET_OPEN.into(), "<span");
164 if !attrs_text.is_empty() {
165 builder.token(SyntaxKind::WHITESPACE.into(), " ");
166 emit_html_span_attributes_node(builder, attrs_text);
167 }
168 builder.token(SyntaxKind::SPAN_BRACKET_OPEN.into(), ">");
169 builder.start_node(SyntaxKind::SPAN_CONTENT.into());
170 parse_inline_text(builder, content, config, false, suppress_footnote_refs);
171 builder.finish_node();
172 builder.token(SyntaxKind::SPAN_BRACKET_CLOSE.into(), close_tag);
173 builder.finish_node();
174}
175
176fn emit_span_open_tag_tokens(builder: &mut GreenNodeBuilder<'_>, open_tag: &str) {
184 let Some(rest) = open_tag.strip_prefix("<span") else {
185 builder.token(SyntaxKind::TEXT.into(), open_tag);
188 return;
189 };
190 builder.token(SyntaxKind::TEXT.into(), "<span");
191 let Some(inside) = rest.strip_suffix('>') else {
192 builder.token(SyntaxKind::TEXT.into(), rest);
193 return;
194 };
195 let bytes = inside.as_bytes();
196 let leading_ws_end = bytes
198 .iter()
199 .position(|&b| !matches!(b, b' ' | b'\t' | b'\n' | b'\r'))
200 .unwrap_or(bytes.len());
201 let leading_ws = &inside[..leading_ws_end];
202 let after_leading = &inside[leading_ws_end..];
203 let trailing_ws_start = after_leading
204 .as_bytes()
205 .iter()
206 .rposition(|&b| !matches!(b, b' ' | b'\t' | b'\n' | b'\r'))
207 .map(|i| i + 1)
208 .unwrap_or(0);
209 let attrs_text = &after_leading[..trailing_ws_start];
210 let trailing_ws = &after_leading[trailing_ws_start..];
211
212 if !leading_ws.is_empty() {
213 builder.token(SyntaxKind::WHITESPACE.into(), leading_ws);
214 }
215 if !attrs_text.is_empty() {
216 emit_html_attrs_node(builder, attrs_text);
217 }
218 if !trailing_ws.is_empty() {
219 builder.token(SyntaxKind::WHITESPACE.into(), trailing_ws);
220 }
221 builder.token(SyntaxKind::TEXT.into(), ">");
222}
223
224#[cfg(test)]
225mod tests {
226 use super::*;
227
228 #[test]
229 fn test_parse_simple_span() {
230 let result = try_parse_native_span("<span>text</span>");
231 assert_eq!(result, Some((17, "text", String::new())));
232 }
233
234 #[test]
235 fn test_parse_span_with_class() {
236 let result = try_parse_native_span(r#"<span class="foo">text</span>"#);
237 assert_eq!(result, Some((29, "text", r#"class="foo""#.to_string())));
238 }
239
240 #[test]
241 fn test_parse_span_with_id() {
242 let result = try_parse_native_span(r#"<span id="bar">text</span>"#);
243 assert_eq!(result, Some((26, "text", r#"id="bar""#.to_string())));
244 }
245
246 #[test]
247 fn test_parse_span_with_multiple_attrs() {
248 let result = try_parse_native_span(r#"<span id="x" class="y z">text</span>"#);
249 assert_eq!(
250 result,
251 Some((36, "text", r#"id="x" class="y z""#.to_string()))
252 );
253 }
254
255 #[test]
256 fn test_parse_span_with_markdown() {
257 let result = try_parse_native_span("<span>*emphasis* and `code`</span>");
258 assert_eq!(result, Some((34, "*emphasis* and `code`", String::new())));
259 }
260
261 #[test]
262 fn test_parse_nested_spans() {
263 let result = try_parse_native_span("<span>outer <span>inner</span> text</span>");
264 assert_eq!(
265 result,
266 Some((42, "outer <span>inner</span> text", String::new()))
267 );
268 }
269
270 #[test]
271 fn test_parse_span_with_newlines_in_content() {
272 let result = try_parse_native_span("<span>line 1\nline 2</span>");
273 assert_eq!(result, Some((26, "line 1\nline 2", String::new())));
274 }
275
276 #[test]
277 fn test_not_span_no_closing_tag() {
278 let result = try_parse_native_span("<span>text");
279 assert_eq!(result, None);
280 }
281
282 #[test]
283 fn test_not_span_wrong_tag() {
284 let result = try_parse_native_span("<spanx>text</spanx>");
285 assert_eq!(result, None);
286 }
287
288 #[test]
289 fn test_not_span_no_space_after() {
290 let result = try_parse_native_span("<spanner>text</spanner>");
292 assert_eq!(result, None);
293 }
294
295 #[test]
296 fn test_parse_span_with_quoted_attrs_containing_gt() {
297 let result = try_parse_native_span(r#"<span title="a > b">text</span>"#);
298 assert_eq!(result, Some((31, "text", r#"title="a > b""#.to_string())));
299 }
300
301 #[test]
302 fn test_parse_empty_span() {
303 let result = try_parse_native_span("<span></span>");
304 assert_eq!(result, Some((13, "", String::new())));
305 }
306
307 #[test]
308 fn test_parse_span_trailing_text() {
309 let result = try_parse_native_span("<span>text</span> more");
310 assert_eq!(result, Some((17, "text", String::new())));
311 }
312
313 #[test]
314 fn test_parse_span_with_non_ascii_content() {
315 let result = try_parse_native_span(r#"<span class="rtl">(شربنا من النيل)</span>"#);
316 assert_eq!(
317 result,
318 Some((53, "(شربنا من النيل)", r#"class="rtl""#.to_string()))
319 );
320 }
321}