use rdx_ast::*;
use crate::source_map::SourceMap;
fn is_valid_crossref_target(target: &str) -> bool {
if target.is_empty() {
return false;
}
let bytes = target.as_bytes();
if !bytes[0].is_ascii_alphabetic() && bytes[0] != b'_' {
return false;
}
for &b in &bytes[1..] {
if !b.is_ascii_alphanumeric() && b != b'_' && b != b':' && b != b'.' && b != b'-' {
return false;
}
}
true
}
pub(crate) fn process_text(
text: &str,
start_offset: usize,
sm: &SourceMap,
in_code: bool,
) -> Vec<Node> {
if text.is_empty() {
return vec![];
}
if in_code {
return vec![Node::Text(TextNode {
value: text.to_string(),
position: sm.position(start_offset, start_offset + text.len()),
})];
}
let bytes = text.as_bytes();
let mut nodes = Vec::new();
let mut buf = String::new();
let mut buf_start = start_offset;
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'\\' && i + 1 < bytes.len() {
let next = bytes[i + 1];
if next == b'$' {
flush_text_buf(&mut nodes, &mut buf, buf_start, sm);
nodes.push(Node::Text(TextNode {
value: "$".to_string(),
position: sm.position(start_offset + i, start_offset + i + 2),
}));
i += 2;
buf_start = start_offset + i;
continue;
} else if next == b'[' && i + 2 < bytes.len() && bytes[i + 2] == b'@' {
flush_text_buf(&mut nodes, &mut buf, buf_start, sm);
nodes.push(Node::Text(TextNode {
value: "[@".to_string(),
position: sm.position(start_offset + i, start_offset + i + 3),
}));
i += 3;
buf_start = start_offset + i;
continue;
} else if next == b'{' && i + 2 < bytes.len() && bytes[i + 2] == b'@' {
flush_text_buf(&mut nodes, &mut buf, buf_start, sm);
nodes.push(Node::Text(TextNode {
value: "{@".to_string(),
position: sm.position(start_offset + i, start_offset + i + 3),
}));
i += 3;
buf_start = start_offset + i;
continue;
} else if next == b'{' && i + 2 < bytes.len() && bytes[i + 2] == b'$' {
if let Some(close_rel) = text[i + 2..].find('}') {
let literal = &text[i + 1..i + 2 + close_rel + 1]; flush_text_buf(&mut nodes, &mut buf, buf_start, sm);
nodes.push(Node::Text(TextNode {
value: literal.to_string(),
position: sm
.position(start_offset + i, start_offset + i + 2 + close_rel + 1),
}));
i = i + 2 + close_rel + 1;
buf_start = start_offset + i;
continue;
}
buf.push('\\');
i += 1;
continue;
} else if next == b'{' && i + 2 < bytes.len() && bytes[i + 2] == b'{' {
flush_text_buf(&mut nodes, &mut buf, buf_start, sm);
nodes.push(Node::Text(TextNode {
value: "{{".to_string(),
position: sm.position(start_offset + i, start_offset + i + 3),
}));
i += 3;
buf_start = start_offset + i;
continue;
} else if next == b'}' && i + 2 < bytes.len() && bytes[i + 2] == b'}' {
flush_text_buf(&mut nodes, &mut buf, buf_start, sm);
nodes.push(Node::Text(TextNode {
value: "}}".to_string(),
position: sm.position(start_offset + i, start_offset + i + 3),
}));
i += 3;
buf_start = start_offset + i;
continue;
} else if next == b'{' {
flush_text_buf(&mut nodes, &mut buf, buf_start, sm);
nodes.push(Node::Text(TextNode {
value: "{".to_string(),
position: sm.position(start_offset + i, start_offset + i + 2),
}));
i += 2;
buf_start = start_offset + i;
continue;
} else if next == b'\\' {
flush_text_buf(&mut nodes, &mut buf, buf_start, sm);
nodes.push(Node::Text(TextNode {
value: "\\".to_string(),
position: sm.position(start_offset + i, start_offset + i + 2),
}));
i += 2;
buf_start = start_offset + i;
continue;
}
buf.push('\\');
i += 1;
continue;
}
if bytes[i] == b'[' {
let mut j = i + 1;
let mut found_at = false;
while j < bytes.len() && bytes[j] != b']' && bytes[j] != b'\n' {
if bytes[j] == b'@' {
found_at = true;
}
j += 1;
}
if found_at && j < bytes.len() && bytes[j] == b']' {
let after_close = j + 1;
let is_link = after_close < bytes.len() && bytes[after_close] == b'(';
if !is_link {
let inner_start = if i + 1 < bytes.len() && bytes[i + 1] == b'@' {
i + 2 } else {
i + 1 };
let inner = &text[inner_start..j];
let cite_start = start_offset + i;
let cite_end = start_offset + after_close;
flush_text_buf(&mut nodes, &mut buf, buf_start, sm);
let keys = parse_citation_keys_with_prefix(
inner,
i + 1 < bytes.len() && bytes[i + 1] == b'@',
);
nodes.push(Node::Citation(CitationNode {
keys,
position: sm.position(cite_start, cite_end),
}));
i = after_close;
buf_start = start_offset + i;
continue;
}
}
}
if bytes[i] == b'{' && i + 1 < bytes.len() && bytes[i + 1] == b'$' {
if let Some(close_rel) = text[i..].find('}') {
let path = &text[i + 2..i + close_rel]; let var_start = start_offset + i;
let var_end = start_offset + i + close_rel + 1;
flush_text_buf(&mut nodes, &mut buf, buf_start, sm);
if crate::is_valid_variable_path(path) {
nodes.push(Node::Variable(VariableNode {
path: path.to_string(),
position: sm.position(var_start, var_end),
}));
} else {
nodes.push(Node::Error(ErrorNode {
message: format!("Invalid variable path: {}", path),
raw_content: text[i..i + close_rel + 1].to_string(),
position: sm.position(var_start, var_end),
}));
}
i = i + close_rel + 1;
buf_start = start_offset + i;
continue;
}
}
if bytes[i] == b'{'
&& i + 1 < bytes.len()
&& bytes[i + 1] == b'@'
&& let Some(close_rel) = text[i..].find('}')
{
let target = &text[i + 2..i + close_rel]; let ref_start = start_offset + i;
let ref_end = start_offset + i + close_rel + 1;
flush_text_buf(&mut nodes, &mut buf, buf_start, sm);
if is_valid_crossref_target(target) {
nodes.push(Node::CrossRef(CrossRefNode {
target: target.to_string(),
position: sm.position(ref_start, ref_end),
}));
} else {
nodes.push(Node::Error(ErrorNode {
message: format!("Invalid cross-reference target: {}", target),
raw_content: text[i..i + close_rel + 1].to_string(),
position: sm.position(ref_start, ref_end),
}));
}
i = i + close_rel + 1;
buf_start = start_offset + i;
continue;
}
if bytes[i] == b'$' && (i == 0 || bytes[i - 1] != b'{') {
if let Some(close_rel) = text[i + 1..].find('$') {
let math_content = &text[i + 1..i + 1 + close_rel];
if !math_content.is_empty()
&& !math_content.starts_with(' ')
&& !math_content.ends_with(' ')
{
let math_start = start_offset + i;
let math_end = start_offset + i + 1 + close_rel + 1;
flush_text_buf(&mut nodes, &mut buf, buf_start, sm);
let raw = math_content.to_string();
let tree = rdx_math::parse(&raw);
nodes.push(Node::MathInline(MathNode {
raw,
tree,
position: sm.position(math_start, math_end),
}));
i = i + 1 + close_rel + 1;
buf_start = start_offset + i;
continue;
}
}
}
let ch = text[i..].chars().next().unwrap();
buf.push(ch);
i += ch.len_utf8();
}
flush_text_buf(&mut nodes, &mut buf, buf_start, sm);
nodes
}
fn flush_text_buf(nodes: &mut Vec<Node>, buf: &mut String, buf_start: usize, sm: &SourceMap) {
if !buf.is_empty() {
let text = std::mem::take(buf);
let len = text.len();
nodes.push(Node::Text(TextNode {
value: text,
position: sm.position(buf_start, buf_start + len),
}));
}
}
fn parse_citation_keys_with_prefix(inner: &str, first_at_stripped: bool) -> Vec<CitationKey> {
let mut keys = Vec::new();
for (i, part) in inner.split(';').enumerate() {
let part = part.trim();
if part.is_empty() {
continue;
}
if let Some(at_pos) = part.find('@') {
let prefix_raw = part[..at_pos].trim();
let prefix = if prefix_raw.is_empty() {
None
} else {
Some(prefix_raw.to_string())
};
let after_at = &part[at_pos + 1..];
let (id, locator) = split_id_locator(after_at);
if !id.is_empty() {
keys.push(CitationKey {
id,
prefix,
locator,
});
}
} else if i == 0 && first_at_stripped {
let (id, locator) = split_id_locator(part);
if !id.is_empty() {
keys.push(CitationKey {
id,
prefix: None,
locator,
});
}
}
}
keys
}
fn split_id_locator(s: &str) -> (String, Option<String>) {
if let Some(comma_pos) = s.find(',') {
let id = s[..comma_pos].trim().to_string();
let loc = s[comma_pos + 1..].trim().to_string();
let locator = if loc.is_empty() { None } else { Some(loc) };
(id, locator)
} else {
(s.trim().to_string(), None)
}
}
#[cfg(test)]
mod tests {
use crate::parse;
use rdx_ast::*;
#[test]
fn variable_interpolation() {
let root = parse("Hello {$name}!\n");
match &root.children[0] {
Node::Paragraph(p) => {
let has_var = p
.children
.iter()
.any(|n| matches!(n, Node::Variable(v) if v.path == "name"));
assert!(has_var, "Should contain variable node: {:?}", p.children);
}
other => panic!("Expected paragraph, got {:?}", other),
}
}
#[test]
fn variable_not_in_inline_code() {
let root = parse("`{$name}`\n");
match &root.children[0] {
Node::Paragraph(p) => match &p.children[0] {
Node::CodeInline(t) => assert_eq!(t.value, "{$name}"),
other => panic!("Expected code_inline, got {:?}", other),
},
other => panic!("Expected paragraph, got {:?}", other),
}
}
#[test]
fn variable_not_in_code_block() {
let root = parse("```\n{$variable}\n```\n");
fn find_code_block(nodes: &[Node]) -> Option<&CodeBlockNode> {
for node in nodes {
if let Node::CodeBlock(t) = node {
return Some(t);
}
if let Node::Paragraph(b) | Node::Blockquote(b) = node
&& let Some(t) = find_code_block(&b.children)
{
return Some(t);
}
}
None
}
let code = find_code_block(&root.children).expect("Should have code block");
assert_eq!(code.value, "{$variable}\n");
}
#[test]
fn escape_variable() {
let root = parse("\\{$path}\n");
match &root.children[0] {
Node::Paragraph(p) => {
let has_var = p.children.iter().any(|n| matches!(n, Node::Variable(_)));
assert!(!has_var, "Escaped variable should not be a Variable node");
let text: String = p
.children
.iter()
.filter_map(|n| {
if let Node::Text(t) = n {
Some(t.value.as_str())
} else {
None
}
})
.collect();
assert!(
text.contains("{$path}"),
"Should contain literal {{$path}}, got: {}",
text
);
}
other => panic!("Expected paragraph, got {:?}", other),
}
}
#[test]
fn escape_double_braces() {
let root = parse("\\{{ and \\}}\n");
match &root.children[0] {
Node::Paragraph(p) => {
let text: String = p
.children
.iter()
.filter_map(|n| {
if let Node::Text(t) = n {
Some(t.value.as_str())
} else {
None
}
})
.collect();
assert!(text.contains("{{"), "got: {}", text);
assert!(text.contains("}}"), "got: {}", text);
}
other => panic!("Expected paragraph, got {:?}", other),
}
}
#[test]
fn escape_single_brace() {
let root = parse("\\{not a var\n");
match &root.children[0] {
Node::Paragraph(p) => {
let text: String = p
.children
.iter()
.filter_map(|n| {
if let Node::Text(t) = n {
Some(t.value.as_str())
} else {
None
}
})
.collect();
assert!(text.contains("{"), "got: {}", text);
}
other => panic!("Expected paragraph, got {:?}", other),
}
}
#[test]
fn escape_backslash() {
let root = parse("A \\\\ B\n");
match &root.children[0] {
Node::Paragraph(p) => {
let text: String = p
.children
.iter()
.filter_map(|n| {
if let Node::Text(t) = n {
Some(t.value.as_str())
} else {
None
}
})
.collect();
assert!(
text.contains("\\"),
"Should contain literal backslash, got: {}",
text
);
}
other => panic!("Expected paragraph, got {:?}", other),
}
}
#[test]
fn backslash_before_non_special_passthrough() {
let root = parse("\\a\n");
match &root.children[0] {
Node::Paragraph(p) => {
let text: String = p
.children
.iter()
.filter_map(|n| {
if let Node::Text(t) = n {
Some(t.value.as_str())
} else {
None
}
})
.collect();
assert!(
text.contains("\\a"),
"Should pass through \\a as-is, got: {}",
text
);
}
other => panic!("Expected paragraph, got {:?}", other),
}
}
#[test]
fn invalid_variable_path_produces_error() {
let root = parse("Hello {$123invalid}!\n");
fn has_error(nodes: &[Node]) -> bool {
nodes.iter().any(|n| match n {
Node::Error(_) => true,
Node::Paragraph(b)
| Node::Heading(b)
| Node::List(b)
| Node::ListItem(b)
| Node::Blockquote(b)
| Node::Html(b)
| Node::Table(b)
| Node::TableRow(b)
| Node::TableCell(b)
| Node::Emphasis(b)
| Node::Strong(b)
| Node::Strikethrough(b) => has_error(&b.children),
Node::Link(l) => has_error(&l.children),
Node::Image(i) => has_error(&i.children),
Node::Component(c) => has_error(&c.children),
Node::FootnoteDefinition(f) => has_error(&f.children),
_ => false,
})
}
assert!(
has_error(&root.children),
"Should have error: {:?}",
root.children
);
}
#[test]
fn escape_dollar_sign() {
let root = parse("Price is \\$10.\n");
match &root.children[0] {
Node::Paragraph(p) => {
let has_math = p.children.iter().any(|n| matches!(n, Node::MathInline(_)));
assert!(
!has_math,
"Escaped $ should not produce math: {:?}",
p.children
);
let text: String = p
.children
.iter()
.filter_map(|n| {
if let Node::Text(t) = n {
Some(t.value.as_str())
} else {
None
}
})
.collect();
assert!(
text.contains('$'),
"Should contain literal $, got: {}",
text
);
}
other => panic!("Expected paragraph, got {:?}", other),
}
}
#[test]
fn escape_citation_bracket() {
let root = parse("Not a citation: \\[@smith2024].\n");
match &root.children[0] {
Node::Paragraph(p) => {
let has_citation = p.children.iter().any(|n| matches!(n, Node::Citation(_)));
assert!(
!has_citation,
"Escaped [@ should not produce citation: {:?}",
p.children
);
let text: String = p
.children
.iter()
.filter_map(|n| {
if let Node::Text(t) = n {
Some(t.value.as_str())
} else {
None
}
})
.collect();
assert!(
text.contains("[@"),
"Should contain literal [@, got: {}",
text
);
}
other => panic!("Expected paragraph, got {:?}", other),
}
}
#[test]
fn escape_crossref_brace() {
let root = parse("Not a ref: \\{@fig:arch}.\n");
match &root.children[0] {
Node::Paragraph(p) => {
let has_ref = p.children.iter().any(|n| matches!(n, Node::CrossRef(_)));
assert!(
!has_ref,
"Escaped {{@ should not produce cross-ref: {:?}",
p.children
);
let text: String = p
.children
.iter()
.filter_map(|n| {
if let Node::Text(t) = n {
Some(t.value.as_str())
} else {
None
}
})
.collect();
assert!(
text.contains("{@"),
"Should contain literal {{@, got: {}",
text
);
}
other => panic!("Expected paragraph, got {:?}", other),
}
}
#[test]
fn citation_simple() {
let root = parse("See [@smith2024].\n");
match &root.children[0] {
Node::Paragraph(p) => {
let citation = p.children.iter().find_map(|n| {
if let Node::Citation(c) = n {
Some(c)
} else {
None
}
});
assert!(
citation.is_some(),
"Should have citation node: {:?}",
p.children
);
let c = citation.unwrap();
assert_eq!(c.keys.len(), 1);
assert_eq!(c.keys[0].id, "smith2024");
assert!(c.keys[0].prefix.is_none());
assert!(c.keys[0].locator.is_none());
}
other => panic!("Expected paragraph, got {:?}", other),
}
}
#[test]
fn citation_multiple_keys() {
let root = parse("Works [@a; @b].\n");
match &root.children[0] {
Node::Paragraph(p) => {
let citation = p.children.iter().find_map(|n| {
if let Node::Citation(c) = n {
Some(c)
} else {
None
}
});
assert!(citation.is_some(), "Should have citation: {:?}", p.children);
let c = citation.unwrap();
assert_eq!(c.keys.len(), 2);
assert_eq!(c.keys[0].id, "a");
assert_eq!(c.keys[1].id, "b");
}
other => panic!("Expected paragraph, got {:?}", other),
}
}
#[test]
fn citation_with_prefix_and_locator() {
let root = parse("As noted [see @smith2024, p. 42].\n");
match &root.children[0] {
Node::Paragraph(p) => {
let citation = p.children.iter().find_map(|n| {
if let Node::Citation(c) = n {
Some(c)
} else {
None
}
});
assert!(citation.is_some(), "Should have citation: {:?}", p.children);
let c = citation.unwrap();
assert_eq!(c.keys.len(), 1);
assert_eq!(c.keys[0].id, "smith2024");
assert_eq!(c.keys[0].prefix.as_deref(), Some("see"));
assert_eq!(c.keys[0].locator.as_deref(), Some("p. 42"));
}
other => panic!("Expected paragraph, got {:?}", other),
}
}
#[test]
fn citation_bracket_followed_by_paren_is_link() {
let root = parse("[@twitter](https://twitter.com)\n");
match &root.children[0] {
Node::Paragraph(p) => {
let has_citation = p.children.iter().any(|n| matches!(n, Node::Citation(_)));
let has_link = p.children.iter().any(|n| matches!(n, Node::Link(_)));
assert!(
!has_citation,
"[@...](url) should NOT be citation: {:?}",
p.children
);
assert!(has_link, "[@...](url) should be a link: {:?}", p.children);
}
other => panic!("Expected paragraph, got {:?}", other),
}
}
#[test]
fn cross_ref_simple() {
let root = parse("See {@fig:arch}.\n");
match &root.children[0] {
Node::Paragraph(p) => {
let xref = p.children.iter().find_map(|n| {
if let Node::CrossRef(r) = n {
Some(r)
} else {
None
}
});
assert!(xref.is_some(), "Should have cross-ref: {:?}", p.children);
assert_eq!(xref.unwrap().target, "fig:arch");
}
other => panic!("Expected paragraph, got {:?}", other),
}
}
#[test]
fn cross_ref_with_colon_dot() {
let root = parse("Equation {@eq:euler} is famous.\n");
match &root.children[0] {
Node::Paragraph(p) => {
let xref = p.children.iter().find_map(|n| {
if let Node::CrossRef(r) = n {
Some(r)
} else {
None
}
});
assert!(xref.is_some(), "Should have cross-ref: {:?}", p.children);
assert_eq!(xref.unwrap().target, "eq:euler");
}
other => panic!("Expected paragraph, got {:?}", other),
}
}
#[test]
fn inline_math_does_not_parse_variables_inside() {
let root = parse("Math $x + {$y}$ here.\n");
match &root.children[0] {
Node::Paragraph(p) => {
let has_variable = p.children.iter().any(|n| matches!(n, Node::Variable(_)));
let _ = has_variable; }
other => panic!("Expected paragraph, got {:?}", other),
}
}
}