use crate::options::{Dialect, ParserOptions};
use crate::syntax::SyntaxKind;
use rowan::GreenNodeBuilder;
use super::core::parse_inline_text;
pub(crate) fn try_parse_native_span(text: &str) -> Option<(usize, &str, String)> {
let bytes = text.as_bytes();
if !text.starts_with("<span") {
return None;
}
let mut pos = 5;
if pos >= text.len() {
return None;
}
let next_char = bytes[pos] as char;
if !matches!(next_char, ' ' | '\t' | '\n' | '\r' | '>') {
return None;
}
let attr_start = pos;
while pos < text.len() && bytes[pos] != b'>' {
if bytes[pos] == b'"' || bytes[pos] == b'\'' {
let quote = bytes[pos];
pos += 1;
while pos < text.len() && bytes[pos] != quote {
if bytes[pos] == b'\\' {
pos += 2; } else {
pos += 1;
}
}
if pos < text.len() {
pos += 1; }
} else {
pos += 1;
}
}
if pos >= text.len() {
return None;
}
let attributes = text[attr_start..pos].trim().to_string();
pos += 1;
let content_start = pos;
let mut depth = 1;
while pos < text.len() && depth > 0 {
if bytes
.get(pos..)
.is_some_and(|tail| tail.starts_with(b"<span"))
{
let check_pos = pos + 5;
if check_pos < text.len() {
let ch = bytes[check_pos] as char;
if matches!(ch, ' ' | '\t' | '\n' | '\r' | '>') {
depth += 1;
pos += 5;
continue;
}
}
}
if bytes
.get(pos..)
.is_some_and(|tail| tail.starts_with(b"</span>"))
{
depth -= 1;
if depth == 0 {
let content = &text[content_start..pos];
let total_len = pos + 7; return Some((total_len, content, attributes));
}
pos += 7;
continue;
}
pos += text[pos..].chars().next().map_or(1, char::len_utf8);
}
None
}
pub(crate) fn emit_native_span(
builder: &mut GreenNodeBuilder,
raw: &str,
content: &str,
config: &ParserOptions,
suppress_footnote_refs: bool,
) {
let close_tag = "</span>";
let open_tag_end = raw.len().saturating_sub(content.len() + close_tag.len());
let open_tag = &raw[..open_tag_end];
if config.dialect == Dialect::Pandoc {
builder.start_node(SyntaxKind::INLINE_HTML_SPAN.into());
emit_span_open_tag_tokens(builder, open_tag);
builder.start_node(SyntaxKind::SPAN_CONTENT.into());
parse_inline_text(builder, content, config, false, suppress_footnote_refs);
builder.finish_node();
builder.token(SyntaxKind::SPAN_BRACKET_CLOSE.into(), close_tag);
builder.finish_node();
return;
}
let attrs_text = open_tag
.strip_prefix("<span")
.and_then(|s| s.strip_suffix('>'))
.map(str::trim)
.unwrap_or("");
builder.start_node(SyntaxKind::BRACKETED_SPAN.into());
builder.token(SyntaxKind::SPAN_BRACKET_OPEN.into(), "<span");
if !attrs_text.is_empty() {
builder.token(SyntaxKind::WHITESPACE.into(), " ");
builder.token(SyntaxKind::SPAN_ATTRIBUTES.into(), attrs_text);
}
builder.token(SyntaxKind::SPAN_BRACKET_OPEN.into(), ">");
builder.start_node(SyntaxKind::SPAN_CONTENT.into());
parse_inline_text(builder, content, config, false, suppress_footnote_refs);
builder.finish_node();
builder.token(SyntaxKind::SPAN_BRACKET_CLOSE.into(), close_tag);
builder.finish_node();
}
fn emit_span_open_tag_tokens(builder: &mut GreenNodeBuilder<'_>, open_tag: &str) {
let Some(rest) = open_tag.strip_prefix("<span") else {
builder.token(SyntaxKind::TEXT.into(), open_tag);
return;
};
builder.token(SyntaxKind::TEXT.into(), "<span");
let Some(inside) = rest.strip_suffix('>') else {
builder.token(SyntaxKind::TEXT.into(), rest);
return;
};
let bytes = inside.as_bytes();
let leading_ws_end = bytes
.iter()
.position(|&b| !matches!(b, b' ' | b'\t' | b'\n' | b'\r'))
.unwrap_or(bytes.len());
let leading_ws = &inside[..leading_ws_end];
let after_leading = &inside[leading_ws_end..];
let trailing_ws_start = after_leading
.as_bytes()
.iter()
.rposition(|&b| !matches!(b, b' ' | b'\t' | b'\n' | b'\r'))
.map(|i| i + 1)
.unwrap_or(0);
let attrs_text = &after_leading[..trailing_ws_start];
let trailing_ws = &after_leading[trailing_ws_start..];
if !leading_ws.is_empty() {
builder.token(SyntaxKind::WHITESPACE.into(), leading_ws);
}
if !attrs_text.is_empty() {
builder.start_node(SyntaxKind::HTML_ATTRS.into());
builder.token(SyntaxKind::TEXT.into(), attrs_text);
builder.finish_node();
}
if !trailing_ws.is_empty() {
builder.token(SyntaxKind::WHITESPACE.into(), trailing_ws);
}
builder.token(SyntaxKind::TEXT.into(), ">");
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_simple_span() {
let result = try_parse_native_span("<span>text</span>");
assert_eq!(result, Some((17, "text", String::new())));
}
#[test]
fn test_parse_span_with_class() {
let result = try_parse_native_span(r#"<span class="foo">text</span>"#);
assert_eq!(result, Some((29, "text", r#"class="foo""#.to_string())));
}
#[test]
fn test_parse_span_with_id() {
let result = try_parse_native_span(r#"<span id="bar">text</span>"#);
assert_eq!(result, Some((26, "text", r#"id="bar""#.to_string())));
}
#[test]
fn test_parse_span_with_multiple_attrs() {
let result = try_parse_native_span(r#"<span id="x" class="y z">text</span>"#);
assert_eq!(
result,
Some((36, "text", r#"id="x" class="y z""#.to_string()))
);
}
#[test]
fn test_parse_span_with_markdown() {
let result = try_parse_native_span("<span>*emphasis* and `code`</span>");
assert_eq!(result, Some((34, "*emphasis* and `code`", String::new())));
}
#[test]
fn test_parse_nested_spans() {
let result = try_parse_native_span("<span>outer <span>inner</span> text</span>");
assert_eq!(
result,
Some((42, "outer <span>inner</span> text", String::new()))
);
}
#[test]
fn test_parse_span_with_newlines_in_content() {
let result = try_parse_native_span("<span>line 1\nline 2</span>");
assert_eq!(result, Some((26, "line 1\nline 2", String::new())));
}
#[test]
fn test_not_span_no_closing_tag() {
let result = try_parse_native_span("<span>text");
assert_eq!(result, None);
}
#[test]
fn test_not_span_wrong_tag() {
let result = try_parse_native_span("<spanx>text</spanx>");
assert_eq!(result, None);
}
#[test]
fn test_not_span_no_space_after() {
let result = try_parse_native_span("<spanner>text</spanner>");
assert_eq!(result, None);
}
#[test]
fn test_parse_span_with_quoted_attrs_containing_gt() {
let result = try_parse_native_span(r#"<span title="a > b">text</span>"#);
assert_eq!(result, Some((31, "text", r#"title="a > b""#.to_string())));
}
#[test]
fn test_parse_empty_span() {
let result = try_parse_native_span("<span></span>");
assert_eq!(result, Some((13, "", String::new())));
}
#[test]
fn test_parse_span_trailing_text() {
let result = try_parse_native_span("<span>text</span> more");
assert_eq!(result, Some((17, "text", String::new())));
}
#[test]
fn test_parse_span_with_non_ascii_content() {
let result = try_parse_native_span(r#"<span class="rtl">(شربنا من النيل)</span>"#);
assert_eq!(
result,
Some((53, "(شربنا من النيل)", r#"class="rtl""#.to_string()))
);
}
}