panache_parser/parser/inlines/
native_spans.rs1use crate::options::{Dialect, ParserOptions};
9use crate::syntax::SyntaxKind;
10use rowan::GreenNodeBuilder;
11
12use super::core::parse_inline_text;
13
14pub(crate) fn try_parse_native_span(text: &str) -> Option<(usize, &str, String)> {
20 let bytes = text.as_bytes();
21
22 if !text.starts_with("<span") {
24 return None;
25 }
26
27 let mut pos = 5; if pos >= text.len() {
31 return None;
32 }
33
34 let next_char = bytes[pos] as char;
35 if !matches!(next_char, ' ' | '\t' | '\n' | '\r' | '>') {
36 return None;
38 }
39
40 let attr_start = pos;
42 while pos < text.len() && bytes[pos] != b'>' {
43 if bytes[pos] == b'"' || bytes[pos] == b'\'' {
45 let quote = bytes[pos];
46 pos += 1;
47 while pos < text.len() && bytes[pos] != quote {
49 if bytes[pos] == b'\\' {
50 pos += 2; } else {
52 pos += 1;
53 }
54 }
55 if pos < text.len() {
56 pos += 1; }
58 } else {
59 pos += 1;
60 }
61 }
62
63 if pos >= text.len() {
64 return None;
66 }
67
68 let attributes = text[attr_start..pos].trim().to_string();
70
71 pos += 1;
73
74 let content_start = pos;
76 let mut depth = 1;
77
78 while pos < text.len() && depth > 0 {
79 if bytes
81 .get(pos..)
82 .is_some_and(|tail| tail.starts_with(b"<span"))
83 {
84 let check_pos = pos + 5;
86 if check_pos < text.len() {
87 let ch = bytes[check_pos] as char;
88 if matches!(ch, ' ' | '\t' | '\n' | '\r' | '>') {
89 depth += 1;
90 pos += 5;
91 continue;
92 }
93 }
94 }
95
96 if bytes
98 .get(pos..)
99 .is_some_and(|tail| tail.starts_with(b"</span>"))
100 {
101 depth -= 1;
102 if depth == 0 {
103 let content = &text[content_start..pos];
105 let total_len = pos + 7; return Some((total_len, content, attributes));
107 }
108 pos += 7;
109 continue;
110 }
111
112 pos += text[pos..].chars().next().map_or(1, char::len_utf8);
115 }
116
117 None
119}
120
121pub(crate) fn emit_native_span(
131 builder: &mut GreenNodeBuilder,
132 raw: &str,
133 content: &str,
134 config: &ParserOptions,
135 suppress_footnote_refs: bool,
136) {
137 let close_tag = "</span>";
138 let open_tag_end = raw.len().saturating_sub(content.len() + close_tag.len());
139 let open_tag = &raw[..open_tag_end];
140
141 if config.dialect == Dialect::Pandoc {
142 builder.start_node(SyntaxKind::INLINE_HTML_SPAN.into());
143 emit_span_open_tag_tokens(builder, open_tag);
144 builder.start_node(SyntaxKind::SPAN_CONTENT.into());
145 parse_inline_text(builder, content, config, false, suppress_footnote_refs);
146 builder.finish_node();
147 builder.token(SyntaxKind::SPAN_BRACKET_CLOSE.into(), close_tag);
148 builder.finish_node();
149 return;
150 }
151
152 let attrs_text = open_tag
157 .strip_prefix("<span")
158 .and_then(|s| s.strip_suffix('>'))
159 .map(str::trim)
160 .unwrap_or("");
161 builder.start_node(SyntaxKind::BRACKETED_SPAN.into());
162 builder.token(SyntaxKind::SPAN_BRACKET_OPEN.into(), "<span");
163 if !attrs_text.is_empty() {
164 builder.token(SyntaxKind::WHITESPACE.into(), " ");
165 builder.token(SyntaxKind::SPAN_ATTRIBUTES.into(), attrs_text);
166 }
167 builder.token(SyntaxKind::SPAN_BRACKET_OPEN.into(), ">");
168 builder.start_node(SyntaxKind::SPAN_CONTENT.into());
169 parse_inline_text(builder, content, config, false, suppress_footnote_refs);
170 builder.finish_node();
171 builder.token(SyntaxKind::SPAN_BRACKET_CLOSE.into(), close_tag);
172 builder.finish_node();
173}
174
175fn emit_span_open_tag_tokens(builder: &mut GreenNodeBuilder<'_>, open_tag: &str) {
183 let Some(rest) = open_tag.strip_prefix("<span") else {
184 builder.token(SyntaxKind::TEXT.into(), open_tag);
187 return;
188 };
189 builder.token(SyntaxKind::TEXT.into(), "<span");
190 let Some(inside) = rest.strip_suffix('>') else {
191 builder.token(SyntaxKind::TEXT.into(), rest);
192 return;
193 };
194 let bytes = inside.as_bytes();
195 let leading_ws_end = bytes
197 .iter()
198 .position(|&b| !matches!(b, b' ' | b'\t' | b'\n' | b'\r'))
199 .unwrap_or(bytes.len());
200 let leading_ws = &inside[..leading_ws_end];
201 let after_leading = &inside[leading_ws_end..];
202 let trailing_ws_start = after_leading
203 .as_bytes()
204 .iter()
205 .rposition(|&b| !matches!(b, b' ' | b'\t' | b'\n' | b'\r'))
206 .map(|i| i + 1)
207 .unwrap_or(0);
208 let attrs_text = &after_leading[..trailing_ws_start];
209 let trailing_ws = &after_leading[trailing_ws_start..];
210
211 if !leading_ws.is_empty() {
212 builder.token(SyntaxKind::WHITESPACE.into(), leading_ws);
213 }
214 if !attrs_text.is_empty() {
215 builder.start_node(SyntaxKind::HTML_ATTRS.into());
216 builder.token(SyntaxKind::TEXT.into(), attrs_text);
217 builder.finish_node();
218 }
219 if !trailing_ws.is_empty() {
220 builder.token(SyntaxKind::WHITESPACE.into(), trailing_ws);
221 }
222 builder.token(SyntaxKind::TEXT.into(), ">");
223}
224
225#[cfg(test)]
226mod tests {
227 use super::*;
228
229 #[test]
230 fn test_parse_simple_span() {
231 let result = try_parse_native_span("<span>text</span>");
232 assert_eq!(result, Some((17, "text", String::new())));
233 }
234
235 #[test]
236 fn test_parse_span_with_class() {
237 let result = try_parse_native_span(r#"<span class="foo">text</span>"#);
238 assert_eq!(result, Some((29, "text", r#"class="foo""#.to_string())));
239 }
240
241 #[test]
242 fn test_parse_span_with_id() {
243 let result = try_parse_native_span(r#"<span id="bar">text</span>"#);
244 assert_eq!(result, Some((26, "text", r#"id="bar""#.to_string())));
245 }
246
247 #[test]
248 fn test_parse_span_with_multiple_attrs() {
249 let result = try_parse_native_span(r#"<span id="x" class="y z">text</span>"#);
250 assert_eq!(
251 result,
252 Some((36, "text", r#"id="x" class="y z""#.to_string()))
253 );
254 }
255
256 #[test]
257 fn test_parse_span_with_markdown() {
258 let result = try_parse_native_span("<span>*emphasis* and `code`</span>");
259 assert_eq!(result, Some((34, "*emphasis* and `code`", String::new())));
260 }
261
262 #[test]
263 fn test_parse_nested_spans() {
264 let result = try_parse_native_span("<span>outer <span>inner</span> text</span>");
265 assert_eq!(
266 result,
267 Some((42, "outer <span>inner</span> text", String::new()))
268 );
269 }
270
271 #[test]
272 fn test_parse_span_with_newlines_in_content() {
273 let result = try_parse_native_span("<span>line 1\nline 2</span>");
274 assert_eq!(result, Some((26, "line 1\nline 2", String::new())));
275 }
276
277 #[test]
278 fn test_not_span_no_closing_tag() {
279 let result = try_parse_native_span("<span>text");
280 assert_eq!(result, None);
281 }
282
283 #[test]
284 fn test_not_span_wrong_tag() {
285 let result = try_parse_native_span("<spanx>text</spanx>");
286 assert_eq!(result, None);
287 }
288
289 #[test]
290 fn test_not_span_no_space_after() {
291 let result = try_parse_native_span("<spanner>text</spanner>");
293 assert_eq!(result, None);
294 }
295
296 #[test]
297 fn test_parse_span_with_quoted_attrs_containing_gt() {
298 let result = try_parse_native_span(r#"<span title="a > b">text</span>"#);
299 assert_eq!(result, Some((31, "text", r#"title="a > b""#.to_string())));
300 }
301
302 #[test]
303 fn test_parse_empty_span() {
304 let result = try_parse_native_span("<span></span>");
305 assert_eq!(result, Some((13, "", String::new())));
306 }
307
308 #[test]
309 fn test_parse_span_trailing_text() {
310 let result = try_parse_native_span("<span>text</span> more");
311 assert_eq!(result, Some((17, "text", String::new())));
312 }
313
314 #[test]
315 fn test_parse_span_with_non_ascii_content() {
316 let result = try_parse_native_span(r#"<span class="rtl">(شربنا من النيل)</span>"#);
317 assert_eq!(
318 result,
319 Some((53, "(شربنا من النيل)", r#"class="rtl""#.to_string()))
320 );
321 }
322}