panache_parser/parser/inlines/
native_spans.rs1use crate::options::{Dialect, ParserOptions};
9use crate::syntax::SyntaxKind;
10use rowan::GreenNodeBuilder;
11
12use super::core::parse_inline_text;
13
14pub(crate) fn try_parse_native_span(text: &str) -> Option<(usize, &str, String)> {
20 let bytes = text.as_bytes();
21
22 if !text.starts_with("<span") {
24 return None;
25 }
26
27 let mut pos = 5; if pos >= text.len() {
31 return None;
32 }
33
34 let next_char = bytes[pos] as char;
35 if !matches!(next_char, ' ' | '\t' | '\n' | '\r' | '>') {
36 return None;
38 }
39
40 let attr_start = pos;
42 while pos < text.len() && bytes[pos] != b'>' {
43 if bytes[pos] == b'"' || bytes[pos] == b'\'' {
45 let quote = bytes[pos];
46 pos += 1;
47 while pos < text.len() && bytes[pos] != quote {
49 if bytes[pos] == b'\\' {
50 pos += 2; } else {
52 pos += 1;
53 }
54 }
55 if pos < text.len() {
56 pos += 1; }
58 } else {
59 pos += 1;
60 }
61 }
62
63 if pos >= text.len() {
64 return None;
66 }
67
68 let attributes = text[attr_start..pos].trim().to_string();
70
71 pos += 1;
73
74 let content_start = pos;
76 let mut depth = 1;
77
78 while pos < text.len() && depth > 0 {
79 if bytes
81 .get(pos..)
82 .is_some_and(|tail| tail.starts_with(b"<span"))
83 {
84 let check_pos = pos + 5;
86 if check_pos < text.len() {
87 let ch = bytes[check_pos] as char;
88 if matches!(ch, ' ' | '\t' | '\n' | '\r' | '>') {
89 depth += 1;
90 pos += 5;
91 continue;
92 }
93 }
94 }
95
96 if bytes
98 .get(pos..)
99 .is_some_and(|tail| tail.starts_with(b"</span>"))
100 {
101 depth -= 1;
102 if depth == 0 {
103 let content = &text[content_start..pos];
105 let total_len = pos + 7; return Some((total_len, content, attributes));
107 }
108 pos += 7;
109 continue;
110 }
111
112 pos += text[pos..].chars().next().map_or(1, char::len_utf8);
115 }
116
117 None
119}
120
121pub(crate) fn emit_native_span(
131 builder: &mut GreenNodeBuilder,
132 raw: &str,
133 content: &str,
134 config: &ParserOptions,
135) {
136 let close_tag = "</span>";
137 let open_tag_end = raw.len().saturating_sub(content.len() + close_tag.len());
138 let open_tag = &raw[..open_tag_end];
139
140 if config.dialect == Dialect::Pandoc {
141 builder.start_node(SyntaxKind::INLINE_HTML_SPAN.into());
142 emit_span_open_tag_tokens(builder, open_tag);
143 builder.start_node(SyntaxKind::SPAN_CONTENT.into());
144 parse_inline_text(builder, content, config, false);
145 builder.finish_node();
146 builder.token(SyntaxKind::SPAN_BRACKET_CLOSE.into(), close_tag);
147 builder.finish_node();
148 return;
149 }
150
151 let attrs_text = open_tag
156 .strip_prefix("<span")
157 .and_then(|s| s.strip_suffix('>'))
158 .map(str::trim)
159 .unwrap_or("");
160 builder.start_node(SyntaxKind::BRACKETED_SPAN.into());
161 builder.token(SyntaxKind::SPAN_BRACKET_OPEN.into(), "<span");
162 if !attrs_text.is_empty() {
163 builder.token(SyntaxKind::WHITESPACE.into(), " ");
164 builder.token(SyntaxKind::SPAN_ATTRIBUTES.into(), attrs_text);
165 }
166 builder.token(SyntaxKind::SPAN_BRACKET_OPEN.into(), ">");
167 builder.start_node(SyntaxKind::SPAN_CONTENT.into());
168 parse_inline_text(builder, content, config, false);
169 builder.finish_node();
170 builder.token(SyntaxKind::SPAN_BRACKET_CLOSE.into(), close_tag);
171 builder.finish_node();
172}
173
174fn emit_span_open_tag_tokens(builder: &mut GreenNodeBuilder<'_>, open_tag: &str) {
182 let Some(rest) = open_tag.strip_prefix("<span") else {
183 builder.token(SyntaxKind::TEXT.into(), open_tag);
186 return;
187 };
188 builder.token(SyntaxKind::TEXT.into(), "<span");
189 let Some(inside) = rest.strip_suffix('>') else {
190 builder.token(SyntaxKind::TEXT.into(), rest);
191 return;
192 };
193 let bytes = inside.as_bytes();
194 let leading_ws_end = bytes
196 .iter()
197 .position(|&b| !matches!(b, b' ' | b'\t' | b'\n' | b'\r'))
198 .unwrap_or(bytes.len());
199 let leading_ws = &inside[..leading_ws_end];
200 let after_leading = &inside[leading_ws_end..];
201 let trailing_ws_start = after_leading
202 .as_bytes()
203 .iter()
204 .rposition(|&b| !matches!(b, b' ' | b'\t' | b'\n' | b'\r'))
205 .map(|i| i + 1)
206 .unwrap_or(0);
207 let attrs_text = &after_leading[..trailing_ws_start];
208 let trailing_ws = &after_leading[trailing_ws_start..];
209
210 if !leading_ws.is_empty() {
211 builder.token(SyntaxKind::WHITESPACE.into(), leading_ws);
212 }
213 if !attrs_text.is_empty() {
214 builder.start_node(SyntaxKind::HTML_ATTRS.into());
215 builder.token(SyntaxKind::TEXT.into(), attrs_text);
216 builder.finish_node();
217 }
218 if !trailing_ws.is_empty() {
219 builder.token(SyntaxKind::WHITESPACE.into(), trailing_ws);
220 }
221 builder.token(SyntaxKind::TEXT.into(), ">");
222}
223
224#[cfg(test)]
225mod tests {
226 use super::*;
227
228 #[test]
229 fn test_parse_simple_span() {
230 let result = try_parse_native_span("<span>text</span>");
231 assert_eq!(result, Some((17, "text", String::new())));
232 }
233
234 #[test]
235 fn test_parse_span_with_class() {
236 let result = try_parse_native_span(r#"<span class="foo">text</span>"#);
237 assert_eq!(result, Some((29, "text", r#"class="foo""#.to_string())));
238 }
239
240 #[test]
241 fn test_parse_span_with_id() {
242 let result = try_parse_native_span(r#"<span id="bar">text</span>"#);
243 assert_eq!(result, Some((26, "text", r#"id="bar""#.to_string())));
244 }
245
246 #[test]
247 fn test_parse_span_with_multiple_attrs() {
248 let result = try_parse_native_span(r#"<span id="x" class="y z">text</span>"#);
249 assert_eq!(
250 result,
251 Some((36, "text", r#"id="x" class="y z""#.to_string()))
252 );
253 }
254
255 #[test]
256 fn test_parse_span_with_markdown() {
257 let result = try_parse_native_span("<span>*emphasis* and `code`</span>");
258 assert_eq!(result, Some((34, "*emphasis* and `code`", String::new())));
259 }
260
261 #[test]
262 fn test_parse_nested_spans() {
263 let result = try_parse_native_span("<span>outer <span>inner</span> text</span>");
264 assert_eq!(
265 result,
266 Some((42, "outer <span>inner</span> text", String::new()))
267 );
268 }
269
270 #[test]
271 fn test_parse_span_with_newlines_in_content() {
272 let result = try_parse_native_span("<span>line 1\nline 2</span>");
273 assert_eq!(result, Some((26, "line 1\nline 2", String::new())));
274 }
275
276 #[test]
277 fn test_not_span_no_closing_tag() {
278 let result = try_parse_native_span("<span>text");
279 assert_eq!(result, None);
280 }
281
282 #[test]
283 fn test_not_span_wrong_tag() {
284 let result = try_parse_native_span("<spanx>text</spanx>");
285 assert_eq!(result, None);
286 }
287
288 #[test]
289 fn test_not_span_no_space_after() {
290 let result = try_parse_native_span("<spanner>text</spanner>");
292 assert_eq!(result, None);
293 }
294
295 #[test]
296 fn test_parse_span_with_quoted_attrs_containing_gt() {
297 let result = try_parse_native_span(r#"<span title="a > b">text</span>"#);
298 assert_eq!(result, Some((31, "text", r#"title="a > b""#.to_string())));
299 }
300
301 #[test]
302 fn test_parse_empty_span() {
303 let result = try_parse_native_span("<span></span>");
304 assert_eq!(result, Some((13, "", String::new())));
305 }
306
307 #[test]
308 fn test_parse_span_trailing_text() {
309 let result = try_parse_native_span("<span>text</span> more");
310 assert_eq!(result, Some((17, "text", String::new())));
311 }
312
313 #[test]
314 fn test_parse_span_with_non_ascii_content() {
315 let result = try_parse_native_span(r#"<span class="rtl">(شربنا من النيل)</span>"#);
316 assert_eq!(
317 result,
318 Some((53, "(شربنا من النيل)", r#"class="rtl""#.to_string()))
319 );
320 }
321}