use std::borrow::Cow;
use rowan::NodeOrToken;
use crate::ast::{AstNode, RoxygenBlock, RoxygenParagraph, RoxygenSection, RoxygenTag};
use crate::parser::parse;
use crate::parser::roxygen::{
MdArgPiece, is_fragile_for_md, is_known_rd_macro, is_two_arg_rd_macro, resolve_md_inline,
resolve_md_inline_pieces, split_table_row_cells,
};
use crate::roxygen::entities;
use crate::syntax::{SyntaxKind, SyntaxNode};
pub fn project_to_rd(text: &str) -> String {
let cst = parse(text).cst;
let mut sections: Vec<String> = Vec::new();
for block in cst.descendants().filter_map(RoxygenBlock::cast) {
project_block(&block, &mut sections);
}
sections.sort();
sections.join("\n")
}
#[derive(Clone)]
enum Inline {
Text(String),
Macro(SyntaxNode),
MdCode(String),
MdEmphasis {
strong: bool,
children: Vec<Inline>,
},
MdList(SyntaxNode),
MdListResolved {
ordered: bool,
items: Vec<Vec<Inline>>,
},
MdLink(String),
MdInlineLink {
url: String,
display: Vec<Inline>,
},
MdRefLink {
dest: String,
display: Vec<Inline>,
},
MdShortcutLink {
display: Vec<Inline>,
},
MdImage(String),
MdCodeBlock(SyntaxNode),
MdIndentedCode(SyntaxNode),
MdHtml(String),
MdHtmlBlock(SyntaxNode),
MdTable(SyntaxNode),
MdHeading(SyntaxNode),
MdBlockQuote(SyntaxNode),
}
fn block_md(block: &RoxygenBlock) -> bool {
let mut md = false;
for section in block.sections() {
if let Some(tag) = section.tag()
&& tag.arg().is_none()
&& tag.text().is_none()
{
match tag.name().as_deref() {
Some("md") => md = true,
Some("noMd") => md = false,
_ => {}
}
}
}
md
}
fn project_block(block: &RoxygenBlock, out: &mut Vec<String>) {
let md = block_md(block);
let mut intro_paras: Vec<Vec<Inline>> = Vec::new();
let mut tag_sections: Vec<(String, Vec<Inline>)> = Vec::new();
let mut slots: Vec<(String, Vec<Inline>)> = Vec::new();
let mut fields: Vec<(String, Vec<Inline>)> = Vec::new();
let mut has_examples = false;
for section in block.sections() {
if let Some(tag) = section.tag() {
let name = tag.name().map(|n| n.to_string()).unwrap_or_default();
let mut body = tag_inlines(&tag);
for part in section_body_parts(§ion) {
if !body.is_empty() && !matches!(part.first(), Some(Inline::MdBlockQuote(_))) {
body.push(Inline::Text("\n".to_string()));
}
body.extend(part);
}
match name.as_str() {
"slot" | "field" => {
if !rd_complete(§ion.syntax().text().to_string()) {
continue;
}
let arg = tag.arg().map(|t| t.text().to_string()).unwrap_or_default();
if name == "slot" {
slots.push((arg, body));
} else {
fields.push((arg, body));
}
}
"section" if !md && !rd_complete(§ion.syntax().text().to_string()) => {
out.push("(\\section (TEXT \"NA\"))".to_string());
}
"examples" | "examplesIf" => has_examples = true,
_ => tag_sections.push((name, body)),
}
} else {
intro_paras.extend(section_body_parts(§ion));
}
}
let has_explicit_title = tag_sections
.iter()
.any(|(n, b)| n == "title" && !is_null_section(b, md));
let has_explicit_desc = tag_sections
.iter()
.any(|(n, b)| n == "description" && !is_null_section(b, md));
let explicit_title_body = tag_sections
.iter()
.find(|(n, b)| n == "title" && !is_null_section(b, md))
.map(|(_, b)| b.clone());
let mut cursor = 0usize;
let intro_title = if has_explicit_title {
None
} else {
intro_paras.get(cursor).inspect(|_| cursor += 1).cloned()
};
let intro_desc = if has_explicit_desc {
None
} else {
intro_paras.get(cursor).inspect(|_| cursor += 1).cloned()
};
let intro_details = &intro_paras[cursor..];
let merge_details = !intro_details.is_empty();
if let Some(title) = &intro_title {
push_section(out, "title", title, md, false);
}
let description = match intro_desc {
Some(d) => Some(d),
None if has_explicit_desc => None, None => intro_title.clone().or(explicit_title_body),
};
if let Some(description) = description {
emit_section_with_headings(out, "description", &description, md, true);
}
if merge_details {
let mut body = join_paras(intro_details);
for (_, ed) in tag_sections.iter().filter(|(n, _)| n == "details") {
body.push(Inline::Text("\n".to_string()));
body.extend(join_paras(std::slice::from_ref(ed)));
}
emit_section_with_headings(out, "details", &body, md, true);
}
for (name, body) in &tag_sections {
if merge_details && name == "details" {
continue;
}
project_tag_section(name, body, out, md);
}
if !slots.is_empty() {
out.push(describe_section("Slots", &slots, md));
}
if !fields.is_empty() {
out.push(describe_section("Fields", &fields, md));
}
if has_examples {
out.push("(\\examples ...)".to_string());
}
}
fn describe_section(title: &str, items: &[(String, Vec<Inline>)], md: bool) -> String {
let mut item_atoms: Vec<String> = Vec::new();
for (name, def) in items {
let code_atoms = rcode_atoms(name);
let term = if code_atoms.is_empty() {
"(\\code)".to_string()
} else {
format!("(\\code {})", code_atoms.join(" "))
};
let mut parts = vec![term];
let def_arg = grp_arg(&serialize_prose_with_linkrefs(def, md));
if !def_arg.is_empty() {
parts.push(def_arg);
}
item_atoms.push(format!("(\\item {})", parts.join(" ")));
}
format!(
"(\\section (TEXT {}) (\\describe {}))",
encode_text(title),
item_atoms.join(" ")
)
}
fn join_paras(paras: &[Vec<Inline>]) -> Vec<Inline> {
let mut out: Vec<Inline> = Vec::new();
for (i, p) in paras.iter().enumerate() {
if i > 0 {
out.push(Inline::Text("\n".to_string()));
}
out.extend(p.iter().cloned());
}
out
}
fn project_tag_section(name: &str, body: &[Inline], out: &mut Vec<String>, md: bool) {
if NULL_SUPPRESSIBLE.contains(&name) && is_null_section(body, md) {
return;
}
match name {
"rawRd" => {
for atom in serialize_inlines(body, md) {
out.push(atom);
}
}
"description" => emit_section_with_headings(out, "description", body, md, true),
"details" => emit_section_with_headings(out, "details", body, md, true),
"return" => push_section(out, "value", body, md, false),
"seealso" => push_section(out, "seealso", body, md, false),
"source" => push_section(out, "source", body, md, false),
"format" => push_section(out, "format", body, md, false),
"references" => push_section(out, "references", body, md, false),
"note" => push_section(out, "note", body, md, false),
"author" => push_section(out, "author", body, md, false),
"title" => push_section(out, "title", body, md, false),
"section" => {
let transformed = md.then(|| resolve_linkrefs(body)).flatten();
let body = transformed.as_deref().unwrap_or(body);
let (heading, content) = split_section_title(body);
let title = serialize_inlines(&heading, md);
let mut content_atoms = serialize_inlines(&content, md);
if md {
for leaked in leaked_linkref_text(&inline_source_skeleton(body)) {
append_rendered_text(&mut content_atoms, &leaked);
}
}
let mut inner = grp_arg(&title);
if !content_atoms.is_empty() {
if !inner.is_empty() {
inner.push(' ');
}
inner.push_str(&grp_arg(&content_atoms));
}
out.push(format!("(\\section{})", prefix_space(&inner)));
}
_ => {}
}
}
const NULL_SUPPRESSIBLE: &[&str] = &[
"description",
"details",
"return",
"seealso",
"source",
"format",
"references",
"note",
"author",
"title",
];
fn is_null_section(body: &[Inline], md: bool) -> bool {
let atoms = serialize_inlines(body, md);
atoms.len() == 1 && atoms[0] == "(TEXT \"NULL\")"
}
fn push_section(
out: &mut Vec<String>,
macro_name: &str,
body: &[Inline],
md: bool,
drop_on_incomplete: bool,
) {
let atoms = serialize_prose_with_linkrefs(body, md);
let check_drop = if md { drop_on_incomplete } else { true };
if check_drop && !section_atoms_rd_complete(&atoms, md) {
out.push(format!("(\\{macro_name})"));
return;
}
if atoms.is_empty() {
out.push(format!("(\\{macro_name})"));
} else {
out.push(format!("(\\{macro_name} {})", atoms.join(" ")));
}
}
struct HeadingFrame {
level: usize,
title: Vec<Inline>,
body: Vec<Inline>,
children: Vec<usize>,
}
fn emit_section_with_headings(
out: &mut Vec<String>,
macro_name: &str,
body: &[Inline],
md: bool,
drop_on_incomplete: bool,
) {
let mut segments: Vec<(Option<SyntaxNode>, Vec<Inline>)> = vec![(None, Vec::new())];
for inl in body {
if let Inline::MdHeading(node) = inl {
segments.push((Some(node.clone()), Vec::new()));
} else {
segments.last_mut().unwrap().1.push(inl.clone());
}
}
if segments.len() == 1 {
push_section(out, macro_name, body, md, drop_on_incomplete);
return;
}
let mut frames: Vec<HeadingFrame> = vec![HeadingFrame {
level: 0,
title: Vec::new(),
body: std::mem::take(&mut segments[0].1),
children: Vec::new(),
}];
let mut stack = vec![0usize];
for (node, run) in segments.into_iter().skip(1) {
let node = node.expect("a non-leading segment always carries a heading");
let (level, title_text) = parse_md_heading(&node);
let title = resolve_macro_arg_inlines(&title_text);
while frames[*stack.last().unwrap()].level >= level {
stack.pop();
}
let parent = *stack.last().unwrap();
let idx = frames.len();
frames.push(HeadingFrame {
level,
title,
body: run,
children: Vec::new(),
});
frames[parent].children.push(idx);
stack.push(idx);
}
let mut inner = serialize_prose_with_linkrefs(&frames[0].body, md);
for &c in &frames[0].children {
if frames[c].level >= 2 {
inner.push(render_heading_frame(&frames, c, md, "subsection"));
}
}
if !inner.is_empty() {
out.push(format!("(\\{macro_name} {})", inner.join(" ")));
}
for &c in &frames[0].children {
if frames[c].level == 1 {
out.push(render_heading_frame(&frames, c, md, "section"));
}
}
}
fn render_heading_frame(frames: &[HeadingFrame], idx: usize, md: bool, macro_name: &str) -> String {
let f = &frames[idx];
let title_atoms = serialize_inlines(&f.title, md);
let mut body_atoms = serialize_prose_with_linkrefs(&f.body, md);
for &c in &f.children {
body_atoms.push(render_heading_frame(frames, c, md, "subsection"));
}
let mut inner = grp_arg(&title_atoms);
let body_arg = grp_arg(&body_atoms);
if !body_arg.is_empty() {
if !inner.is_empty() {
inner.push(' ');
}
inner.push_str(&body_arg);
}
format!("(\\{macro_name}{})", prefix_space(&inner))
}
fn parse_md_heading(node: &SyntaxNode) -> (usize, String) {
let text = node.text().to_string();
let lines: Vec<&str> = text.split('\n').map(strip_marker).collect();
if lines.len() >= 2
&& let Some(level) = setext_underline_level(lines.last().unwrap())
{
let title = lines[..lines.len() - 1]
.iter()
.map(|l| l.trim())
.collect::<Vec<_>>()
.join(" ");
return (level, title);
}
let line = strip_marker(&text).trim_start();
let level = line.bytes().take_while(|&b| b == b'#').count().clamp(1, 6);
let rest = line.get(level..).unwrap_or("").trim();
(level, strip_atx_closing(rest).to_string())
}
fn setext_underline_level(line: &str) -> Option<usize> {
let s = line.trim();
let ch = s.bytes().next()?;
if (ch == b'=' || ch == b'-') && s.bytes().all(|b| b == ch) {
return Some(if ch == b'=' { 1 } else { 2 });
}
None
}
fn strip_atx_closing(s: &str) -> &str {
let t = s.trim_end();
let hashes = t.len() - t.trim_end_matches('#').len();
if hashes == 0 {
return t;
}
let before = &t[..t.len() - hashes];
if before.is_empty() || before.ends_with([' ', '\t']) {
before.trim_end()
} else {
t
}
}
fn section_atoms_rd_complete(atoms: &[String], md: bool) -> bool {
let mut rd = String::new();
for atom in atoms {
sexpr_to_rd(atom, md, &mut rd);
}
rd_complete(&rd)
}
fn sexpr_to_rd(atom: &str, md: bool, out: &mut String) {
let bytes = atom.as_bytes();
let mut i = 0;
render_sexpr(bytes, &mut i, md, out);
}
fn render_sexpr(bytes: &[u8], i: &mut usize, md: bool, out: &mut String) {
if bytes.get(*i) != Some(&b'(') {
return;
}
*i += 1; let head_start = *i;
while let Some(&c) = bytes.get(*i) {
if c == b' ' || c == b')' {
break;
}
*i += 1;
}
let head = &bytes[head_start..*i];
let is_leaf = matches!(head, b"TEXT" | b"RCODE" | b"VERB" | b"UNKNOWN");
let escape_percent = md;
if is_leaf {
skip_spaces(bytes, i);
if bytes.get(*i) == Some(&b'"') {
let text = read_quoted(bytes, i);
append_leaf_text(&text, escape_percent, out);
}
while let Some(&c) = bytes.get(*i) {
*i += 1;
if c == b')' {
break;
}
}
return;
}
let is_grp = head == b"GRP";
if !is_grp {
out.push_str(std::str::from_utf8(head).unwrap_or(""));
}
loop {
skip_spaces(bytes, i);
match bytes.get(*i) {
None => break,
Some(&b')') => {
*i += 1;
break;
}
Some(_) => {
if is_grp {
render_sexpr(bytes, i, md, out);
} else {
out.push('{');
render_sexpr(bytes, i, md, out);
out.push('}');
}
}
}
}
}
fn skip_spaces(bytes: &[u8], i: &mut usize) {
while bytes.get(*i) == Some(&b' ') {
*i += 1;
}
}
fn read_quoted(bytes: &[u8], i: &mut usize) -> String {
*i += 1; let mut out = String::new();
while let Some(&c) = bytes.get(*i) {
if c == b'\\' {
*i += 1;
match bytes.get(*i) {
Some(b'n') => out.push('\n'),
Some(&other) => out.push(other as char),
None => out.push('\\'),
}
*i += 1;
} else if c == b'"' {
*i += 1; break;
} else {
let start = *i;
*i += 1;
while bytes.get(*i).is_some_and(|b| b & 0xC0 == 0x80) {
*i += 1;
}
out.push_str(std::str::from_utf8(&bytes[start..*i]).unwrap_or(""));
}
}
out
}
fn append_leaf_text(text: &str, escape_percent: bool, out: &mut String) {
if escape_percent {
for c in text.chars() {
if c == '%' {
out.push('\\');
}
out.push(c);
}
} else {
out.push_str(text);
}
}
fn rd_complete(s: &str) -> bool {
#[derive(PartialEq)]
enum State {
Rd,
RdEscape,
RdComment,
}
let mut state = State::Rd;
let mut braces: i64 = 0;
for c in s.chars() {
match state {
State::Rd => match c {
'{' => braces += 1,
'}' => braces -= 1,
'\\' => state = State::RdEscape,
'%' => state = State::RdComment,
_ => {}
},
State::RdEscape => state = State::Rd,
State::RdComment => {
if c == '\n' {
state = State::Rd;
}
}
}
}
braces == 0 && state != State::RdEscape
}
fn serialize_prose_with_linkrefs(body: &[Inline], md: bool) -> Vec<String> {
let transformed = md.then(|| resolve_linkrefs(body)).flatten();
let body = transformed.as_deref().unwrap_or(body);
let mut atoms = serialize_inlines(body, md);
if md {
for leaked in leaked_linkref_text(&inline_source_skeleton(body)) {
append_rendered_text(&mut atoms, &leaked);
}
}
atoms
}
fn resolve_linkrefs(body: &[Inline]) -> Option<Vec<Inline>> {
let mut urls: std::collections::HashMap<String, String> = std::collections::HashMap::new();
collect_user_linkrefs_tree(body, &mut urls);
let resolved = (!urls.is_empty())
.then(|| apply_user_linkrefs(body, &urls))
.flatten();
let b1 = resolved.as_deref().unwrap_or(body);
let undefined = demote_undefined_links(b1, &linkref_keys(b1));
let b2 = undefined.as_deref().unwrap_or(b1);
let demoted = demote_poisoned_links(b2);
if resolved.is_some() || undefined.is_some() || demoted.is_some() {
Some(demoted.unwrap_or_else(|| b2.to_vec()))
} else {
None
}
}
enum RunSeg {
Raw(String),
Final(String),
}
fn push_raw(run: &mut Vec<RunSeg>, s: &str) {
match run.last_mut() {
Some(RunSeg::Raw(last)) => last.push_str(s),
_ => run.push(RunSeg::Raw(s.to_string())),
}
}
fn trim_trailing_run_ws(run: &mut Vec<RunSeg>) {
while let Some(RunSeg::Raw(last)) = run.last_mut() {
let trimmed = last.trim_end_matches(is_posix_space);
if trimmed.len() == last.len() {
break;
}
last.truncate(trimmed.len());
if last.is_empty() {
run.pop();
} else {
break;
}
}
}
fn flush_run(run: &mut Vec<RunSeg>, md: bool) -> Option<String> {
if run.is_empty() {
return None;
}
let mut combined = String::new();
for seg in run.iter() {
match seg {
RunSeg::Raw(s) => combined.push_str(&process_prose(s, md)),
RunSeg::Final(s) => combined.push_str(s),
}
}
run.clear();
text_atom(&combined)
}
fn serialize_inlines(body: &[Inline], md: bool) -> Vec<String> {
let mut atoms: Vec<String> = Vec::new();
let mut run: Vec<RunSeg> = Vec::new();
for inl in body {
match inl {
Inline::Text(s) => push_raw(&mut run, s),
Inline::Macro(node) => {
if let Some(atom) = flush_run(&mut run, md) {
atoms.push(atom);
}
atoms.push(serialize_macro(node, md));
}
Inline::MdCode(content) => {
if let Some(atom) = flush_run(&mut run, md) {
atoms.push(atom);
}
atoms.push(md_code_atom(content));
}
Inline::MdEmphasis { strong, children } => {
if let Some(atom) = flush_run(&mut run, md) {
atoms.push(atom);
}
let inner = serialize_inlines(children, md).join(" ");
let head = if *strong { "\\strong" } else { "\\emph" };
atoms.push(if inner.is_empty() {
format!("({head})")
} else {
format!("({head} {inner})")
});
}
Inline::MdList(node) => {
if let Some(atom) = flush_run(&mut run, md) {
atoms.push(atom);
}
atoms.push(serialize_md_list(node));
}
Inline::MdListResolved { ordered, items } => {
if let Some(atom) = flush_run(&mut run, md) {
atoms.push(atom);
}
atoms.push(serialize_md_list_resolved(*ordered, items));
}
Inline::MdLink(raw) => {
if let Some(atom) = flush_run(&mut run, md) {
atoms.push(atom);
}
atoms.push(resolve_md_link(raw).unwrap_or_default());
}
Inline::MdInlineLink { url, display } => {
if let Some(atom) = flush_run(&mut run, md) {
atoms.push(atom);
}
atoms.push(inline_link_node_atom(url, display, md));
}
Inline::MdRefLink { dest, display } => {
if link_display_is_droppable(display) {
continue;
}
if let Some(atom) = flush_run(&mut run, md) {
atoms.push(atom);
}
atoms.push(ref_link_node_atom(display, dest));
}
Inline::MdShortcutLink { display } => {
if link_display_is_droppable(display) {
continue;
}
if let Some(atom) = flush_run(&mut run, md) {
atoms.push(atom);
}
atoms.push(shortcut_link_node_atom(display));
}
Inline::MdImage(raw) => {
if let Some(atom) = flush_run(&mut run, md) {
atoms.push(atom);
}
if let Some(atom) = resolve_md_image(raw) {
atoms.push(atom);
}
}
Inline::MdCodeBlock(node) => {
if let Some(atom) = flush_run(&mut run, md) {
atoms.push(atom);
}
atoms.extend(serialize_md_code_block(node));
}
Inline::MdIndentedCode(node) => {
if let Some(atom) = flush_run(&mut run, md) {
atoms.push(atom);
}
atoms.extend(serialize_md_indented_code(node));
}
Inline::MdHtml(raw) => {
if let Some(atom) = flush_run(&mut run, md) {
atoms.push(atom);
}
atoms.push(html_inline_atom(raw));
}
Inline::MdHtmlBlock(node) => {
if let Some(atom) = flush_run(&mut run, md) {
atoms.push(atom);
}
atoms.push(serialize_md_html_block(node));
}
Inline::MdBlockQuote(node) => {
let flat = block_quote_flat_text(node);
if !flat.is_empty() {
trim_trailing_run_ws(&mut run);
run.push(RunSeg::Final(flat));
}
}
Inline::MdTable(node) => {
if let Some(atom) = flush_run(&mut run, md) {
atoms.push(atom);
}
atoms.push(serialize_md_table(node));
}
Inline::MdHeading(node) => {
let (_, title) = parse_md_heading(node);
for atom in serialize_inlines(&resolve_macro_arg_inlines(&title), md) {
if let Some(prose) = flush_run(&mut run, md) {
atoms.push(prose);
}
atoms.push(atom);
}
}
}
}
if let Some(atom) = flush_run(&mut run, md) {
atoms.push(atom);
}
atoms
}
fn serialize_macro(node: &SyntaxNode, md: bool) -> String {
if macro_head(node).trim_start_matches('\\') == "preformatted" {
let atoms = preformatted_atoms(node);
return if atoms.is_empty() {
"(\\preformatted)".to_string()
} else {
format!("(\\preformatted {})", atoms.join(" "))
};
}
let head_full = macro_head(node);
let name = head_full.trim_start_matches('\\');
if md
&& is_md_inline_text_macro(name)
&& let Some(content) = macro_single_arg_content(node)
{
let atoms = serialize_inlines(&resolve_macro_arg_inlines(&content), md);
return if atoms.is_empty() {
format!("({head_full})")
} else {
format!("({head_full} {})", atoms.join(" "))
};
}
if md && is_md_structural_macro(name) {
return serialize_md_structural_macro(node, &head_full);
}
let mut head = String::new();
let mut structural = false;
let mut out_atoms: Vec<String> = Vec::new();
let mut group: Vec<String> = Vec::new();
let mut run = String::new();
let flush = |run: &mut String, group: &mut Vec<String>, code: bool| {
if code {
group.extend(rcode_atoms(run));
} else if let Some(atom) = text_atom(run) {
group.push(atom);
}
run.clear();
};
let finalize = |group: &mut Vec<String>, out: &mut Vec<String>, structural: bool| {
if structural && group.len() > 1 {
out.push(format!("(GRP {})", group.join(" ")));
group.clear();
} else {
out.append(group);
}
};
for el in node.children_with_tokens() {
match el.kind() {
SyntaxKind::ROXYGEN_RD_MACRO_NAME => {
head = el
.as_token()
.map(|t| t.text().to_string())
.unwrap_or_default();
structural = is_two_arg_rd_macro(head.trim_start_matches('\\'));
}
SyntaxKind::ROXYGEN_RD_MACRO_VERB => {
flush(&mut run, &mut group, head == "\\code");
let raw = el
.as_token()
.map(|t| t.text().to_string())
.unwrap_or_default();
group.push(format!("(VERB {})", encode_text(&raw)));
}
SyntaxKind::ROXYGEN_RD_MACRO => {
flush(&mut run, &mut group, head == "\\code");
if let Some(n) = el.as_node() {
group.push(serialize_macro(n, md));
}
}
SyntaxKind::ROXYGEN_RD_MACRO_DELIM => {
if el.as_token().is_some_and(|t| t.text() == "}") {
flush(&mut run, &mut group, head == "\\code");
finalize(&mut group, &mut out_atoms, structural);
}
}
SyntaxKind::ROXYGEN_RD_MACRO_OPT | SyntaxKind::ROXYGEN_MARKER => {}
_ => {
if let Some(t) = el.as_token() {
run.push_str(t.text());
}
}
}
}
flush(&mut run, &mut group, head == "\\code");
finalize(&mut group, &mut out_atoms, structural);
if out_atoms.is_empty() {
let name = head.trim_start_matches('\\');
if is_known_rd_macro(name) {
format!("({head})")
} else {
format!("(UNKNOWN {})", encode_text(&head))
}
} else {
format!("({head} {})", out_atoms.join(" "))
}
}
fn serialize_md_structural_macro(node: &SyntaxNode, head_full: &str) -> String {
let mut out_atoms: Vec<String> = Vec::new();
let mut pieces: Vec<MdArgPiece> = Vec::new();
let mut run = String::new();
let mut verb: Option<String> = None;
let flush = |run: &mut String, pieces: &mut Vec<MdArgPiece>| {
if !run.is_empty() {
pieces.push(MdArgPiece::Text(std::mem::take(run)));
}
};
for el in node.children_with_tokens() {
match el.kind() {
SyntaxKind::ROXYGEN_RD_MACRO_NAME => {}
SyntaxKind::ROXYGEN_RD_MACRO_VERB => {
let raw = el
.as_token()
.map(|t| t.text().to_string())
.unwrap_or_default();
verb = Some(format!("(VERB {})", encode_text(&raw)));
}
SyntaxKind::ROXYGEN_RD_MACRO => {
flush(&mut run, &mut pieces);
if let Some(n) = el.as_node() {
pieces.push(MdArgPiece::Macro(n.text().to_string()));
}
}
SyntaxKind::ROXYGEN_RD_MACRO_DELIM => {
if el.as_token().is_some_and(|t| t.text() == "}") {
flush(&mut run, &mut pieces);
if let Some(v) = verb.take() {
out_atoms.push(v);
} else {
let para = resolve_md_inline_pieces(&pieces);
let atoms = serialize_inlines(¶_to_inlines(¶), true);
match atoms.len() {
0 => {}
1 => out_atoms.push(atoms.into_iter().next().unwrap()),
_ => out_atoms.push(format!("(GRP {})", atoms.join(" "))),
}
}
pieces.clear();
}
}
SyntaxKind::ROXYGEN_RD_MACRO_OPT | SyntaxKind::ROXYGEN_MARKER => {}
_ => {
if let Some(t) = el.as_token() {
run.push_str(t.text());
}
}
}
}
if out_atoms.is_empty() {
format!("({head_full})")
} else {
format!("({head_full} {})", out_atoms.join(" "))
}
}
fn is_md_inline_text_macro(name: &str) -> bool {
is_known_rd_macro(name)
&& !is_fragile_for_md(name)
&& !is_two_arg_rd_macro(name)
&& !matches!(
name,
"itemize" | "enumerate" | "describe" | "Sexpr" | "RdOpts" | "enc"
)
}
fn is_md_structural_macro(name: &str) -> bool {
is_known_rd_macro(name) && !is_fragile_for_md(name) && is_two_arg_rd_macro(name)
}
fn macro_single_arg_content(node: &SyntaxNode) -> Option<String> {
let mut content = String::new();
let mut opened = false;
let mut inside = false;
for el in node.children_with_tokens() {
match el.kind() {
SyntaxKind::ROXYGEN_RD_MACRO_DELIM => {
let text = el
.as_token()
.map(|t| t.text().to_string())
.unwrap_or_default();
if text == "{" && !opened {
opened = true;
inside = true;
} else if text == "}" && inside {
inside = false;
}
}
SyntaxKind::ROXYGEN_MARKER => {}
_ if inside => match el {
NodeOrToken::Node(n) => content.push_str(&n.text().to_string()),
NodeOrToken::Token(t) => content.push_str(t.text()),
},
_ => {}
}
}
opened.then_some(content)
}
fn resolve_macro_arg_inlines(content: &str) -> Vec<Inline> {
para_to_inlines(&resolve_md_inline(content))
}
fn para_to_inlines(para: &SyntaxNode) -> Vec<Inline> {
let mut out = Vec::new();
for el in para.children_with_tokens() {
match el.kind() {
SyntaxKind::ROXYGEN_MARKER => {}
SyntaxKind::NEWLINE => out.push(Inline::Text(SOFT_BREAK.to_string())),
_ => push_inline(&mut out, el),
}
}
out
}
fn macro_head(node: &SyntaxNode) -> String {
node.children_with_tokens()
.find(|el| el.kind() == SyntaxKind::ROXYGEN_RD_MACRO_NAME)
.and_then(|el| el.as_token().map(|t| t.text().to_string()))
.unwrap_or_default()
}
fn preformatted_atoms(node: &SyntaxNode) -> Vec<String> {
let text = node.text().to_string();
let (Some(open), Some(close)) = (text.find('{'), text.rfind('}')) else {
return Vec::new();
};
if close <= open {
return Vec::new();
}
let mut body = String::new();
for (idx, line) in text[open + 1..close].split('\n').enumerate() {
if idx == 0 {
body.push_str(line);
} else {
body.push('\n');
body.push_str(strip_marker(line));
}
}
verb_atoms(&body)
}
fn split_section_title(body: &[Inline]) -> (Vec<Inline>, Vec<Inline>) {
let mut title: Vec<Inline> = Vec::new();
let mut content: Vec<Inline> = Vec::new();
let mut split = false;
for inl in body {
if split {
content.push(inl.clone());
continue;
}
if let Inline::Text(t) = inl
&& let Some(idx) = t.find(':')
{
if idx > 0 {
title.push(Inline::Text(t[..idx].to_string()));
}
let after = &t[idx + 1..];
if !after.is_empty() {
content.push(Inline::Text(after.to_string()));
}
split = true;
continue;
}
title.push(inl.clone());
}
(title, content)
}
fn grp_arg(atoms: &[String]) -> String {
match atoms {
[] => String::new(),
[one] => one.clone(),
many => format!("(GRP {})", many.join(" ")),
}
}
fn prefix_space(s: &str) -> String {
if s.is_empty() {
String::new()
} else {
format!(" {s}")
}
}
fn text_atom(body: &str) -> Option<String> {
let t = norm_ws(body);
(!t.is_empty()).then(|| format!("(TEXT {})", encode_text(&t)))
}
fn process_prose(run: &str, md: bool) -> String {
if md {
decode_html_entities(&unescape_md_brackets(&collapse_md_backslash_runs(
&md_percent_swallow(run),
)))
} else {
strip_rd_comments(run)
}
}
fn unescape_md_brackets(run: &str) -> String {
let mut out = String::with_capacity(run.len());
let mut chars = run.chars().peekable();
while let Some(c) = chars.next() {
if c == '\\' && matches!(chars.peek(), Some('[' | ']')) {
out.push(chars.next().expect("peeked bracket"));
} else {
out.push(c);
}
}
out
}
fn md_percent_swallow(run: &str) -> String {
physical_lines(run)
.map(md_percent_swallow_line)
.collect::<Vec<_>>()
.join("\n")
}
fn md_percent_swallow_line(line: &str) -> &str {
let bytes = line.as_bytes();
for (i, _) in line.char_indices().filter(|&(i, _)| bytes[i] == b'%') {
let mut k = 0usize;
while i > k && bytes[i - 1 - k] == b'\\' {
k += 1;
}
if k % 2 == 1 {
return &line[..i];
}
}
line
}
fn collapse_md_backslash_runs(run: &str) -> String {
let mut out = String::with_capacity(run.len());
let mut chars = run.chars().peekable();
while let Some(c) = chars.next() {
if c != '\\' {
out.push(c);
continue;
}
let mut k = 1usize;
while chars.peek() == Some(&'\\') {
chars.next();
k += 1;
}
if matches!(chars.peek(), Some('[' | ']')) {
for _ in 0..k {
out.push('\\');
}
} else {
for _ in 0..k.div_ceil(2) {
out.push('\\');
}
}
}
out
}
fn inline_source_skeleton(body: &[Inline]) -> String {
let mut s = String::new();
for inl in body {
s.push_str(&inline_skeleton_fragment(inl));
}
s
}
fn inline_skeleton_fragment(inl: &Inline) -> Cow<'_, str> {
match inl {
Inline::Text(t) => Cow::Borrowed(t),
Inline::MdInlineLink { display, .. } => {
Cow::Owned(format!("[{}] ", inline_plain_text(display)))
}
Inline::MdImage(raw) => match image_alt_text(raw) {
Some(alt) => Cow::Owned(format!("[{alt}] ")),
None => Cow::Borrowed(" "),
},
Inline::MdLink(raw) => match opaque_inline_link_display(raw) {
Some(display) => Cow::Owned(format!("[{display}] ")),
None => Cow::Borrowed(" "),
},
Inline::MdList(node) => {
let mut s = String::new();
for item in node
.children()
.filter(|n| n.kind() == SyntaxKind::ROXYGEN_MD_LIST_ITEM)
{
s.push(' ');
for child in md_list_item_inlines(&item) {
s.push_str(&inline_skeleton_fragment(&child));
}
}
s.push(' ');
Cow::Owned(s)
}
Inline::MdListResolved { items, .. } => {
let mut s = String::new();
for item in items {
s.push(' ');
for child in item {
s.push_str(&inline_skeleton_fragment(child));
}
}
s.push(' ');
Cow::Owned(s)
}
_ => Cow::Borrowed(" "),
}
}
fn opaque_inline_link_display(raw: &str) -> Option<&str> {
let bytes = raw.as_bytes();
if bytes.first() == Some(&b'<') {
return None; }
let text_end = scan_delimited(bytes, 0, b'[', b']')?;
(bytes.get(text_end) == Some(&b'(')).then(|| &raw[1..text_end - 1])
}
fn image_alt_text(raw: &str) -> Option<&str> {
let bytes = raw.as_bytes();
let alt_end = scan_delimited(bytes, 1, b'[', b']')?;
Some(&raw[2..alt_end - 1])
}
fn leaked_linkref_text(source: &str) -> Vec<String> {
let escaped = double_escape_md(source);
let labels = md_linkref_labels(&escaped);
let Some(first_invalid) = labels.iter().position(|label| !linkref_label_closes(label)) else {
return Vec::new();
};
labels[first_invalid..]
.iter()
.map(|label| cmark_unescape(&format!("[{label}]: R:{}", url_encode(label))))
.collect()
}
fn demote_poisoned_links(body: &[Inline]) -> Option<Vec<Inline>> {
let skeleton = inline_source_skeleton(body);
let boundary = first_invalid_linkref_offset(&skeleton)?;
let mut offset = 0;
let mut changed = false;
let out = demote_poisoned_walk(body, boundary, &mut offset, &mut changed);
Some(relink_demoted_inline_links(out))
}
fn demote_poisoned_walk(
body: &[Inline],
boundary: usize,
offset: &mut usize,
changed: &mut bool,
) -> Vec<Inline> {
let mut out = Vec::with_capacity(body.len());
for inl in body {
match inl {
Inline::MdList(node) => {
let items: Vec<Vec<Inline>> = node
.children()
.filter(|n| n.kind() == SyntaxKind::ROXYGEN_MD_LIST_ITEM)
.map(|item| md_list_item_inlines(&item))
.collect();
let mut item_changed = false;
let new_items = demote_poisoned_items(&items, boundary, offset, &mut item_changed);
if item_changed {
*changed = true;
out.push(Inline::MdListResolved {
ordered: md_list_is_ordered(node),
items: new_items,
});
} else {
out.push(inl.clone());
}
}
Inline::MdListResolved { ordered, items } => {
let mut item_changed = false;
let new_items = demote_poisoned_items(items, boundary, offset, &mut item_changed);
if item_changed {
*changed = true;
out.push(Inline::MdListResolved {
ordered: *ordered,
items: new_items,
});
} else {
out.push(inl.clone());
}
}
_ => {
let start = *offset;
*offset += skeleton_len(inl);
if start > boundary
&& let Some(text) = demoted_link_source(inl)
{
*changed = true;
out.push(Inline::Text(text));
} else {
out.push(inl.clone());
}
}
}
}
out
}
fn demote_poisoned_items(
items: &[Vec<Inline>],
boundary: usize,
offset: &mut usize,
item_changed: &mut bool,
) -> Vec<Vec<Inline>> {
let mut new_items = Vec::with_capacity(items.len());
for item in items {
*offset += 1; new_items.push(demote_poisoned_walk(item, boundary, offset, item_changed));
}
*offset += 1; new_items
}
fn relink_demoted_inline_links(body: Vec<Inline>) -> Vec<Inline> {
let mut out = Vec::with_capacity(body.len());
let mut text_run = String::new();
for inl in body {
match inl {
Inline::Text(s) => text_run.push_str(&s),
other => {
relink_text_run(&text_run, &mut out);
text_run.clear();
out.push(other);
}
}
}
relink_text_run(&text_run, &mut out);
out
}
fn relink_text_run(s: &str, out: &mut Vec<Inline>) {
if s.is_empty() {
return;
}
let bytes = s.as_bytes();
let mut i = 0;
let mut run_start = 0;
while i < bytes.len() {
if bytes[i] == b'['
&& !(i > 0 && bytes[i - 1] == b'\\')
&& let Some(text_end) = scan_delimited(bytes, i, b'[', b']')
&& bytes.get(text_end) == Some(&b'(')
&& let Some(url_end) = scan_delimited(bytes, text_end, b'(', b')')
{
if run_start < i {
out.push(Inline::Text(s[run_start..i].to_string()));
}
let display = s[i + 1..text_end - 1].to_string();
let url = s[text_end + 1..url_end - 1].to_string();
out.push(Inline::MdInlineLink {
url,
display: vec![Inline::Text(display)],
});
i = url_end;
run_start = i;
continue;
}
i += 1;
}
if run_start < s.len() {
out.push(Inline::Text(s[run_start..].to_string()));
}
}
fn skeleton_len(inl: &Inline) -> usize {
inline_skeleton_fragment(inl).len()
}
fn demoted_link_source(inl: &Inline) -> Option<String> {
match inl {
Inline::MdShortcutLink { display } => Some(format!("[{}]", link_label_text(display))),
Inline::MdRefLink { dest, display } => {
Some(format!("[{}][{}]", link_label_text(display), dest))
}
Inline::MdLink(raw) if opaque_link_is_shortcut_or_ref(raw) => Some(raw.clone()),
_ => None,
}
}
fn opaque_link_is_shortcut_or_ref(raw: &str) -> bool {
let bytes = raw.as_bytes();
if bytes.first() == Some(&b'<') {
return false; }
let Some(text_end) = scan_delimited(bytes, 0, b'[', b']') else {
return false;
};
!matches!(bytes.get(text_end), Some(&b'('))
}
fn linkref_keys(body: &[Inline]) -> std::collections::HashSet<String> {
md_linkref_scan(&linkref_source_skeleton(body))
.into_iter()
.map(|(label, _)| normalize_linkref_label(&label))
.collect()
}
fn linkref_source_skeleton(body: &[Inline]) -> String {
let mut s = String::new();
for inl in body {
linkref_skeleton_push(inl, &mut s);
}
s
}
fn linkref_skeleton_push(inl: &Inline, s: &mut String) {
match inl {
Inline::Text(t) => s.push_str(t),
Inline::MdShortcutLink { display } => {
s.push('[');
s.push_str(&link_label_text(display));
s.push(']');
}
Inline::MdRefLink { dest, display } => {
s.push('[');
s.push_str(&link_label_text(display));
s.push_str("][");
s.push_str(dest);
s.push(']');
}
Inline::MdInlineLink { display, .. } => {
s.push('[');
s.push_str(&inline_plain_text(display));
s.push_str("] ");
}
Inline::MdImage(raw) => match image_alt_text(raw) {
Some(alt) => {
s.push('[');
s.push_str(alt);
s.push_str("] ");
}
None => s.push(' '),
},
Inline::MdLink(raw) => s.push_str(raw),
Inline::MdEmphasis { children, .. } => {
s.push(' ');
for child in children {
linkref_skeleton_push(child, s);
}
s.push(' ');
}
Inline::MdList(node) => {
for item in node
.children()
.filter(|n| n.kind() == SyntaxKind::ROXYGEN_MD_LIST_ITEM)
{
s.push(' ');
for child in md_list_item_inlines(&item) {
linkref_skeleton_push(&child, s);
}
}
s.push(' ');
}
Inline::MdListResolved { items, .. } => {
for item in items {
s.push(' ');
for child in item {
linkref_skeleton_push(child, s);
}
}
s.push(' ');
}
_ => s.push(' '),
}
}
fn normalize_linkref_label(label: &str) -> String {
label
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
.to_lowercase()
}
fn link_ref_label(inl: &Inline) -> Option<String> {
match inl {
Inline::MdShortcutLink { display } => Some(link_label_text(display)),
Inline::MdRefLink { dest, .. } => Some(dest.clone()),
Inline::MdLink(raw) => opaque_link_ref_label(raw),
_ => None,
}
}
fn opaque_link_ref_label(raw: &str) -> Option<String> {
let bytes = raw.as_bytes();
if bytes.first() == Some(&b'<') {
return None; }
let text_end = scan_delimited(bytes, 0, b'[', b']')?;
match bytes.get(text_end) {
Some(&b'(') => None, Some(&b'[') => {
let ref_end = scan_delimited(bytes, text_end, b'[', b']')?;
Some(raw[text_end + 1..ref_end - 1].to_string())
}
_ => Some(raw[1..text_end - 1].to_string()), }
}
fn demote_undefined_links(
body: &[Inline],
keys: &std::collections::HashSet<String>,
) -> Option<Vec<Inline>> {
let mut changed = false;
let out: Vec<Inline> = body
.iter()
.map(|inl| {
if let Some(label) = link_ref_label(inl)
&& !keys.contains(&normalize_linkref_label(&label))
&& let Some(text) = demoted_link_source(inl)
{
changed = true;
return Inline::Text(text);
}
match demote_undefined_in_list(inl, keys) {
Some(resolved) => {
changed = true;
resolved
}
None => inl.clone(),
}
})
.collect();
changed.then_some(out)
}
fn demote_undefined_in_list(
inl: &Inline,
keys: &std::collections::HashSet<String>,
) -> Option<Inline> {
let (ordered, items): (bool, Vec<Vec<Inline>>) = match inl {
Inline::MdList(node) => (
md_list_is_ordered(node),
node.children()
.filter(|n| n.kind() == SyntaxKind::ROXYGEN_MD_LIST_ITEM)
.map(|item| md_list_item_inlines(&item))
.collect(),
),
Inline::MdListResolved { ordered, items } => (*ordered, items.clone()),
_ => return None,
};
let mut new_items = Vec::with_capacity(items.len());
let mut item_changed = false;
for item in &items {
match demote_undefined_links(item, keys) {
Some(rewritten) => {
new_items.push(rewritten);
item_changed = true;
}
None => new_items.push(item.clone()),
}
}
item_changed.then_some(Inline::MdListResolved {
ordered,
items: new_items,
})
}
fn collect_user_linkrefs_tree(
body: &[Inline],
urls: &mut std::collections::HashMap<String, String>,
) {
let (level, _dropped) = collect_user_linkrefs(body);
for (label, url) in level {
urls.entry(label).or_insert(url);
}
for inl in body {
match inl {
Inline::MdList(node) => {
for item in node
.children()
.filter(|n| n.kind() == SyntaxKind::ROXYGEN_MD_LIST_ITEM)
{
collect_user_linkrefs_tree(&md_list_item_inlines(&item), urls);
}
}
Inline::MdListResolved { items, .. } => {
for item in items {
collect_user_linkrefs_tree(item, urls);
}
}
_ => {}
}
}
}
fn apply_user_linkrefs(
body: &[Inline],
urls: &std::collections::HashMap<String, String>,
) -> Option<Vec<Inline>> {
let (_, dropped) = collect_user_linkrefs(body);
let mut out = Vec::with_capacity(body.len());
let mut changed = !dropped.is_empty();
for (i, inl) in body.iter().enumerate() {
if dropped.contains(&i) {
continue;
}
if let Some(label) = link_ref_label(inl)
&& let Some(url) = urls.get(&normalize_linkref_label(&label))
&& let Some(display) = link_display_inlines(inl)
{
out.push(Inline::MdInlineLink {
url: url.clone(),
display,
});
changed = true;
continue;
}
if let Inline::MdList(node) = inl {
let items: Vec<Vec<Inline>> = node
.children()
.filter(|n| n.kind() == SyntaxKind::ROXYGEN_MD_LIST_ITEM)
.map(|item| md_list_item_inlines(&item))
.collect();
let mut new_items = Vec::with_capacity(items.len());
let mut item_changed = false;
for item in &items {
match apply_user_linkrefs(item, urls) {
Some(rewritten) => {
new_items.push(rewritten);
item_changed = true;
}
None => new_items.push(item.clone()),
}
}
if item_changed {
out.push(Inline::MdListResolved {
ordered: md_list_is_ordered(node),
items: new_items,
});
changed = true;
continue;
}
}
out.push(inl.clone());
}
changed.then_some(out)
}
fn collect_user_linkrefs(
body: &[Inline],
) -> (
std::collections::HashMap<String, String>,
std::collections::BTreeSet<usize>,
) {
let mut urls: std::collections::HashMap<String, String> = std::collections::HashMap::new();
let mut dropped = std::collections::BTreeSet::new();
let mut i = 0;
let mut block_start = true;
while i < body.len() {
if block_start && let Some(end) = scan_linkref_run(body, i, &mut urls, &mut dropped) {
i = end;
block_start = false;
continue;
}
block_start = matches!(&body[i], Inline::Text(t) if t.contains('\n'));
i += 1;
}
(urls, dropped)
}
fn scan_linkref_run(
body: &[Inline],
start: usize,
urls: &mut std::collections::HashMap<String, String>,
dropped: &mut std::collections::BTreeSet<usize>,
) -> Option<usize> {
let mut end = start;
let mut any = false;
loop {
let mut k = end;
while let Some(Inline::Text(t)) = body.get(k) {
if t.is_empty() || t.contains('\n') || !t.chars().all(char::is_whitespace) {
break;
}
k += 1;
}
let Some((label, url, def_end)) = match_linkref_def(body, k) else {
break;
};
urls.entry(normalize_linkref_label(&label)).or_insert(url);
for idx in end..def_end {
dropped.insert(idx);
}
any = true;
end = def_end;
}
any.then_some(end)
}
fn match_linkref_def(body: &[Inline], j: usize) -> Option<(String, String, usize)> {
let label = linkref_def_label(body.get(j)?)?;
let mut text = String::new();
let mut k = j + 1;
while let Some(Inline::Text(t)) = body.get(k) {
if t.contains('\n') {
break; }
text.push_str(t);
k += 1;
}
if k == j + 1 {
return None; }
let (url, line_closed) = parse_linkref_def_dest(&text)?;
if !line_closed && !matches!(body.get(k), None | Some(Inline::Text(_))) {
return None;
}
Some((label, url, k))
}
fn linkref_def_label(inl: &Inline) -> Option<String> {
match inl {
Inline::MdShortcutLink { display } => Some(inline_plain_text(display)),
Inline::MdLink(raw) => {
let bytes = raw.as_bytes();
if bytes.first() != Some(&b'[') {
return None;
}
let end = scan_delimited(bytes, 0, b'[', b']')?;
(end == bytes.len()).then(|| raw[1..end - 1].to_string())
}
_ => None,
}
}
fn parse_linkref_def_dest(text: &str) -> Option<(String, bool)> {
let rest = text.strip_prefix(':')?.trim_start();
let (url, after) = if let Some(r) = rest.strip_prefix('<') {
let close = r.find('>')?;
(r[..close].to_string(), &r[close + 1..])
} else {
let end = rest.find(char::is_whitespace).unwrap_or(rest.len());
(rest[..end].to_string(), &rest[end..])
};
if url.is_empty() {
return None;
}
let url = decode_html_entities(&url);
if after.trim_start().is_empty() {
return Some((url, after.contains(SOFT_BREAK)));
}
let after = after.trim_start();
let close = match after.as_bytes()[0] {
b'"' => '"',
b'\'' => '\'',
b'(' => ')',
_ => return None,
};
let title_rest = &after[1..];
let end = title_rest.find(close)?;
let residual = &title_rest[end + 1..];
residual
.trim()
.is_empty()
.then(|| (url.clone(), residual.contains(SOFT_BREAK)))
}
fn decode_html_entities(s: &str) -> String {
if !s.contains('&') {
return s.to_string();
}
let mut out = String::with_capacity(s.len());
let mut i = 0;
while i < s.len() {
if s.as_bytes()[i] == b'&'
&& let Some(rel) = s[i + 1..].find(';')
&& decode_entity(&s[i + 1..i + 1 + rel], &mut out)
{
i += 1 + rel + 1;
continue;
}
let ch = s[i..].chars().next().unwrap();
out.push(ch);
i += ch.len_utf8();
}
out
}
fn decode_entity(body: &str, out: &mut String) -> bool {
if let Some(num) = body.strip_prefix('#') {
let code = match num.strip_prefix(['x', 'X']) {
Some(hex) => u32::from_str_radix(hex, 16).ok(),
None => num.parse::<u32>().ok(),
};
let Some(code) = code else { return false };
out.push(
char::from_u32(code)
.filter(|&c| c != '\0')
.unwrap_or('\u{FFFD}'),
);
return true;
}
match entities::HTML5_ENTITIES.binary_search_by_key(&body, |&(name, _)| name) {
Ok(idx) => {
out.push_str(entities::HTML5_ENTITIES[idx].1);
true
}
Err(_) => false,
}
}
fn link_display_inlines(inl: &Inline) -> Option<Vec<Inline>> {
match inl {
Inline::MdShortcutLink { display } | Inline::MdRefLink { display, .. } => {
Some(display.clone())
}
Inline::MdLink(raw) => {
let bytes = raw.as_bytes();
if bytes.first() == Some(&b'<') {
return None; }
let text_end = scan_delimited(bytes, 0, b'[', b']')?;
match bytes.get(text_end) {
Some(&b'(') => None, _ => Some(vec![Inline::Text(raw[1..text_end - 1].to_string())]),
}
}
_ => None,
}
}
fn double_escape_md(s: &str) -> String {
s.replace('\\', "\\\\")
.replace("\\\\[", "\\[")
.replace("\\\\]", "\\]")
}
fn md_linkref_labels(text: &str) -> Vec<String> {
md_linkref_scan(text).into_iter().map(|(l, _)| l).collect()
}
fn md_linkref_scan(text: &str) -> Vec<(String, usize)> {
let bytes = text.as_bytes();
let mut out = Vec::new();
let mut i = 0;
while i < bytes.len() {
if bytes[i] != b'[' || (i > 0 && matches!(bytes[i - 1], b']' | b'\\')) {
i += 1;
continue;
}
let Some((content, content_end)) = bracket_free_group(bytes, i) else {
i += 1;
continue;
};
let (label, match_end) = match bracket_free_group(bytes, content_end) {
Some((reff, ref_end)) => (reff, ref_end),
None => (content, content_end),
};
if matches!(bytes.get(match_end), Some(b'[' | b'{')) {
i += 1;
continue;
}
out.push((String::from_utf8_lossy(label).into_owned(), i));
i = match_end;
}
out
}
fn first_invalid_linkref_offset(skeleton: &str) -> Option<usize> {
md_linkref_scan(skeleton)
.into_iter()
.find(|(label, _)| label.ends_with('\\'))
.map(|(_, start)| start)
}
fn bracket_free_group(bytes: &[u8], open: usize) -> Option<(&[u8], usize)> {
if bytes.get(open) != Some(&b'[') {
return None;
}
let start = open + 1;
let mut j = start;
while j < bytes.len() && !matches!(bytes[j], b'[' | b']') {
j += 1;
}
(bytes.get(j) == Some(&b']') && j > start).then_some((&bytes[start..j], j + 1))
}
fn linkref_label_closes(label: &str) -> bool {
label.bytes().rev().take_while(|&b| b == b'\\').count() % 2 == 0
}
fn url_encode(s: &str) -> String {
let mut out = String::with_capacity(s.len());
for &b in s.as_bytes() {
if b.is_ascii_alphanumeric()
|| matches!(
b,
b'!' | b'#'
| b'$'
| b'&'
| b'\''
| b'('
| b')'
| b'*'
| b'+'
| b','
| b'-'
| b'.'
| b'/'
| b':'
| b';'
| b'='
| b'?'
| b'@'
| b'['
| b']'
| b'_'
| b'~'
)
{
out.push(b as char);
} else {
out.push_str(&format!("%{b:02X}"));
}
}
out
}
fn cmark_unescape(s: &str) -> String {
let mut out = String::with_capacity(s.len());
let mut chars = s.chars().peekable();
while let Some(c) = chars.next() {
if c == '\\' && chars.peek().is_some_and(char::is_ascii_punctuation) {
out.push(chars.next().expect("peeked punctuation"));
} else {
out.push(c);
}
}
out
}
fn append_rendered_text(atoms: &mut Vec<String>, extra: &str) {
if let Some(last) = atoms.last_mut()
&& let Some(text) = decode_text_atom(last)
&& let Some(merged) = text_atom(&format!("{text} {extra}"))
{
*last = merged;
return;
}
if let Some(atom) = text_atom(extra) {
atoms.push(atom);
}
}
fn decode_text_atom(atom: &str) -> Option<String> {
let inner = atom.strip_prefix("(TEXT \"")?.strip_suffix("\")")?;
let mut out = String::with_capacity(inner.len());
let mut chars = inner.chars();
while let Some(c) = chars.next() {
if c == '\\' {
match chars.next() {
Some('n') => out.push('\n'),
Some(other) => out.push(other), None => out.push('\\'),
}
} else {
out.push(c);
}
}
Some(out)
}
fn strip_rd_comments(s: &str) -> String {
physical_lines(s)
.map(strip_rd_line_comment)
.collect::<Vec<_>>()
.join("\n")
}
fn strip_rd_line_comment(line: &str) -> &str {
let mut escaped = false;
for (i, c) in line.char_indices() {
if escaped {
escaped = false;
} else if c == '\\' {
escaped = true;
} else if c == '%' {
return &line[..i];
}
}
line
}
fn rcode_atoms(body: &str) -> Vec<String> {
let mut atoms = Vec::new();
let mut rest = body;
while let Some(idx) = rest.find('\n') {
let (seg, tail) = rest.split_at(idx + 1);
atoms.push(format!("(RCODE {})", encode_text(seg)));
rest = tail;
}
if !rest.is_empty() {
atoms.push(format!("(RCODE {})", encode_text(rest)));
}
atoms
}
fn norm_ws(s: &str) -> String {
let mut out = String::with_capacity(s.len());
let mut pending_space = false;
for c in s.chars() {
if is_posix_space(c) {
pending_space = true;
} else {
if pending_space && !out.is_empty() {
out.push(' ');
}
pending_space = false;
out.push(c);
}
}
out
}
fn is_posix_space(c: char) -> bool {
matches!(c, ' ' | '\t' | '\n' | '\x0b' | '\x0c' | '\r')
}
const SOFT_BREAK: char = '\u{c}';
fn physical_lines(run: &str) -> impl Iterator<Item = &str> {
run.split(['\n', SOFT_BREAK])
}
fn encode_text(s: &str) -> String {
let mut out = String::with_capacity(s.len() + 2);
out.push('"');
for c in s.chars() {
match c {
'\\' => out.push_str("\\\\"),
'"' => out.push_str("\\\""),
'\n' => out.push_str("\\n"),
_ => out.push(c),
}
}
out.push('"');
out
}
fn section_body_parts(section: &RoxygenSection) -> Vec<Vec<Inline>> {
let mut groups: Vec<Vec<Inline>> = Vec::new();
let mut cur: Vec<Inline> = Vec::new();
for el in section.syntax().children_with_tokens() {
match el.kind() {
SyntaxKind::ROXYGEN_MD_HEADING => {
let Some(node) = el.into_node() else { continue };
if !cur.is_empty() {
groups.push(std::mem::take(&mut cur));
}
groups.push(vec![Inline::MdHeading(node)]);
}
SyntaxKind::ROXYGEN_PARAGRAPH
| SyntaxKind::ROXYGEN_RD_MACRO
| SyntaxKind::ROXYGEN_MD_LIST
| SyntaxKind::ROXYGEN_MD_CODE_BLOCK
| SyntaxKind::ROXYGEN_MD_INDENTED_CODE
| SyntaxKind::ROXYGEN_MD_HTML_BLOCK
| SyntaxKind::ROXYGEN_MD_BLOCK_QUOTE
| SyntaxKind::ROXYGEN_MD_TABLE => {
let Some(node) = el.into_node() else { continue };
let kind = node.kind();
let inlines = match kind {
SyntaxKind::ROXYGEN_PARAGRAPH => RoxygenParagraph::cast(node)
.map(|p| paragraph_inlines(&p))
.unwrap_or_default(),
SyntaxKind::ROXYGEN_MD_LIST => vec![Inline::MdList(node)],
SyntaxKind::ROXYGEN_MD_CODE_BLOCK => vec![Inline::MdCodeBlock(node)],
SyntaxKind::ROXYGEN_MD_INDENTED_CODE => vec![Inline::MdIndentedCode(node)],
SyntaxKind::ROXYGEN_MD_HTML_BLOCK => vec![Inline::MdHtmlBlock(node)],
SyntaxKind::ROXYGEN_MD_BLOCK_QUOTE => vec![Inline::MdBlockQuote(node)],
SyntaxKind::ROXYGEN_MD_TABLE => vec![Inline::MdTable(node)],
_ => vec![Inline::Macro(node)],
};
if !cur.is_empty() && kind != SyntaxKind::ROXYGEN_MD_BLOCK_QUOTE {
cur.push(Inline::Text(" ".to_string()));
}
cur.extend(inlines);
}
SyntaxKind::ROXYGEN_MD_THEMATIC_BREAK if !cur.is_empty() => {
groups.push(std::mem::take(&mut cur));
}
SyntaxKind::ROXYGEN_MARKER if !cur.is_empty() => {
groups.push(std::mem::take(&mut cur));
}
_ => {}
}
}
if !cur.is_empty() {
groups.push(cur);
}
groups
}
fn paragraph_inlines(para: &RoxygenParagraph) -> Vec<Inline> {
let mut out = Vec::new();
for el in para.syntax().children_with_tokens() {
match el.kind() {
SyntaxKind::ROXYGEN_MARKER => {} SyntaxKind::NEWLINE => out.push(Inline::Text(SOFT_BREAK.to_string())), _ => push_inline(&mut out, el),
}
}
out
}
fn tag_inlines(tag: &RoxygenTag) -> Vec<Inline> {
let mut out = Vec::new();
let mut seen_prose = false;
for el in tag.syntax().children_with_tokens() {
match el.kind() {
SyntaxKind::ROXYGEN_AT | SyntaxKind::ROXYGEN_TAG_NAME | SyntaxKind::ROXYGEN_TAG_ARG => {
continue;
}
SyntaxKind::ROXYGEN_MARKER => {}
SyntaxKind::NEWLINE => {
if seen_prose {
out.push(Inline::Text(SOFT_BREAK.to_string()));
}
}
SyntaxKind::WHITESPACE => {
if seen_prose {
push_inline(&mut out, el);
}
}
_ => {
seen_prose = true;
push_inline(&mut out, el);
}
}
}
out
}
fn push_inline(out: &mut Vec<Inline>, el: NodeOrToken<SyntaxNode, crate::syntax::SyntaxToken>) {
match el {
NodeOrToken::Node(n) if n.kind() == SyntaxKind::ROXYGEN_RD_MACRO => {
out.push(Inline::Macro(n));
}
NodeOrToken::Node(n) if n.kind() == SyntaxKind::ROXYGEN_MD_LIST => {
out.push(Inline::MdList(n));
}
NodeOrToken::Node(n)
if matches!(
n.kind(),
SyntaxKind::ROXYGEN_MD_EMPH | SyntaxKind::ROXYGEN_MD_STRONG
) =>
{
let strong = n.kind() == SyntaxKind::ROXYGEN_MD_STRONG;
let kids: Vec<_> = n.children_with_tokens().collect();
let interior = kids.len().saturating_sub(1);
let mut children = Vec::new();
for child in kids.into_iter().take(interior).skip(1) {
match child.kind() {
SyntaxKind::ROXYGEN_MARKER => {} SyntaxKind::NEWLINE => children.push(Inline::Text(SOFT_BREAK.to_string())),
_ => push_inline(&mut children, child),
}
}
out.push(Inline::MdEmphasis { strong, children });
}
NodeOrToken::Node(n) if n.kind() == SyntaxKind::ROXYGEN_MD_LINK => {
let kids: Vec<_> = n.children_with_tokens().collect();
let closer = kids.last().map(|c| c.to_string()).unwrap_or_default();
let interior = kids.len().saturating_sub(1);
let mut display = Vec::new();
for child in kids.into_iter().take(interior).skip(1) {
match child.kind() {
SyntaxKind::ROXYGEN_MARKER => {} SyntaxKind::NEWLINE => display.push(Inline::Text(SOFT_BREAK.to_string())),
_ => push_inline(&mut display, child),
}
}
if closer == "]" {
out.push(Inline::MdShortcutLink { display });
} else if let Some(dest) = closer.strip_prefix("][").and_then(|s| s.strip_suffix(']')) {
out.push(Inline::MdRefLink {
dest: dest.to_string(),
display,
});
} else {
out.push(Inline::MdInlineLink {
url: inline_link_dest(&closer),
display,
});
}
}
NodeOrToken::Node(n) => out.push(Inline::Text(n.text().to_string())),
NodeOrToken::Token(t) if t.kind() == SyntaxKind::ROXYGEN_MD_CODE => {
out.push(Inline::MdCode(strip_code_span(t.text())));
}
NodeOrToken::Token(t)
if t.kind() == SyntaxKind::ROXYGEN_MD_LINK && resolve_md_link(t.text()).is_some() =>
{
out.push(Inline::MdLink(t.text().to_string()));
}
NodeOrToken::Token(t)
if t.kind() == SyntaxKind::ROXYGEN_MD_IMAGE && resolve_md_image(t.text()).is_some() =>
{
out.push(Inline::MdImage(t.text().to_string()));
}
NodeOrToken::Token(t) if t.kind() == SyntaxKind::ROXYGEN_MD_HTML => {
out.push(Inline::MdHtml(t.text().to_string()));
}
NodeOrToken::Token(t) => out.push(Inline::Text(t.text().to_string())),
}
}
fn strip_code_span(text: &str) -> String {
let ticks = text.bytes().take_while(|&b| b == b'`').count();
let inner = text
.get(ticks..text.len() - ticks)
.unwrap_or("")
.replace('\n', " ");
if inner.len() >= 2
&& inner.starts_with(' ')
&& inner.ends_with(' ')
&& !inner.trim().is_empty()
{
inner[1..inner.len() - 1].to_string()
} else {
inner
}
}
fn resolve_md_link(raw: &str) -> Option<String> {
let bytes = raw.as_bytes();
if bytes.first() == Some(&b'<') {
let inner = raw.strip_prefix('<')?.strip_suffix('>')?;
return Some(if autolink_has_uri_scheme(inner) {
url_atom(inner)
} else {
href_atom(inner, &format!("mailto:{inner}"))
});
}
let text_end = scan_delimited(bytes, 0, b'[', b']')?;
let text = &raw[1..text_end - 1];
match bytes.get(text_end) {
Some(&b'(') => {
let url_end = scan_delimited(bytes, text_end, b'(', b')')?;
(url_end == bytes.len())
.then(|| inline_link_atom(text, &raw[text_end + 1..url_end - 1]))
}
Some(&b'[') => {
let ref_end = scan_delimited(bytes, text_end, b'[', b']')?;
(ref_end == bytes.len()).then(|| ref_link_atom(text, &raw[text_end + 1..ref_end - 1]))
}
None => Some(shortcut_link_atom(text)),
_ => None,
}
}
fn inline_link_dest(close: &str) -> String {
close
.strip_prefix("](")
.and_then(|s| s.strip_suffix(')'))
.unwrap_or("")
.to_string()
}
fn inline_link_node_atom(url: &str, display: &[Inline], md: bool) -> String {
let display_text = inline_plain_text(display);
if url.is_empty() || norm_ws(url) == norm_ws(&display_text) {
return url_atom(&display_text);
}
let arg = grp_arg(&serialize_inlines(display, md));
format!("(\\href (VERB {}){})", encode_text(url), prefix_space(&arg))
}
fn ref_link_node_atom(display: &[Inline], dest: &str) -> String {
let display_text = inline_plain_text(display);
if norm_ws(&display_text) == norm_ws(dest) {
return shortcut_link_atom(dest);
}
if display_has_macro(display) {
return link_over_display(display);
}
let (inner, is_code) = match display {
[Inline::MdCode(content)] => (content.clone(), true),
_ => (display_text, false),
};
code_wrap(
format!("(\\link {})", text_atom(&inner).unwrap_or_default()),
is_code,
)
}
fn display_has_macro(display: &[Inline]) -> bool {
display.iter().any(|inl| matches!(inl, Inline::Macro(_)))
}
fn link_over_display(display: &[Inline]) -> String {
let body = serialize_inlines(display, true).join(" ");
format!("(\\link {body})")
}
fn shortcut_link_node_atom(display: &[Inline]) -> String {
match display {
[Inline::MdCode(content)] => shortcut_link_atom(&format!("`{content}`")),
_ if display_has_macro(display) => link_over_display(display),
_ => shortcut_link_atom(&inline_plain_text(display)),
}
}
fn link_display_is_droppable(display: &[Inline]) -> bool {
if matches!(display, [Inline::MdCode(_)]) {
return false;
}
!display.iter().all(|inl| match inl {
Inline::Text(_) => true,
Inline::Macro(n) => !macro_arg_has_active_markdown(n),
_ => false,
})
}
fn macro_arg_has_active_markdown(node: &SyntaxNode) -> bool {
let head = macro_head(node);
let name = head.trim_start_matches('\\');
is_md_inline_text_macro(name)
&& macro_single_arg_content(node).is_some_and(|content| {
inlines_have_active_markdown(&resolve_macro_arg_inlines(&content))
})
}
fn inlines_have_active_markdown(inlines: &[Inline]) -> bool {
inlines.iter().any(|inl| match inl {
Inline::Text(_) => false,
Inline::Macro(n) => macro_arg_has_active_markdown(n),
_ => true,
})
}
fn inline_plain_text(inlines: &[Inline]) -> String {
let mut s = String::new();
for inl in inlines {
match inl {
Inline::Text(t) => s.push_str(t),
Inline::MdCode(t) => s.push_str(t),
Inline::MdEmphasis { children, .. } => s.push_str(&inline_plain_text(children)),
Inline::MdInlineLink { display, .. } => s.push_str(&inline_plain_text(display)),
Inline::MdShortcutLink { display } => s.push_str(&inline_plain_text(display)),
_ => {}
}
}
s
}
fn link_label_text(inlines: &[Inline]) -> String {
let mut s = String::new();
for inl in inlines {
match inl {
Inline::Text(t) => s.push_str(t),
Inline::MdCode(t) => s.push_str(t),
Inline::MdEmphasis { children, .. } => s.push_str(&link_label_text(children)),
Inline::MdInlineLink { display, .. } => s.push_str(&link_label_text(display)),
Inline::MdShortcutLink { display } => s.push_str(&link_label_text(display)),
Inline::Macro(n) => s.push_str(&n.text().to_string()),
_ => {}
}
}
s
}
fn inline_link_atom(text: &str, url: &str) -> String {
if url.is_empty() || norm_ws(url) == norm_ws(text) {
url_atom(text)
} else {
href_atom(text, url)
}
}
fn autolink_has_uri_scheme(inner: &str) -> bool {
let b = inner.as_bytes();
if !b.first().is_some_and(u8::is_ascii_alphabetic) {
return false;
}
let mut j = 1;
while j < b.len()
&& matches!(b[j], b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'+' | b'.' | b'-')
{
j += 1;
}
(2..=32).contains(&j) && b.get(j) == Some(&b':')
}
fn url_atom(url: &str) -> String {
format!("(\\url (VERB {}))", encode_text(url))
}
fn href_atom(text: &str, url: &str) -> String {
let mut atoms = vec![format!("(VERB {})", encode_text(url))];
if let Some(atom) = link_display_atom(text) {
atoms.push(atom);
}
format!("(\\href {})", atoms.join(" "))
}
fn link_display_atom(text: &str) -> Option<String> {
let (inner, is_code) = unwrap_code_span(text);
if is_code {
Some(md_code_atom(inner))
} else {
text_atom(text)
}
}
fn ref_link_atom(text: &str, dest: &str) -> String {
let (display, is_code) = unwrap_code_span(text);
if norm_ws(display) == norm_ws(dest) {
return shortcut_link_atom(dest);
}
code_wrap(
format!("(\\link {})", text_atom(display).unwrap_or_default()),
is_code,
)
}
fn shortcut_link_atom(dest: &str) -> String {
let (dest, code_span) = unwrap_code_span(dest);
let is_code = code_span || dest.ends_with("()");
let (pkg, fun) = match dest.rsplit_once("::") {
Some((p, f)) => (Some(p), f),
None => (None, dest),
};
let s4 = dest.ends_with("-class");
let body = if s4 {
fun.strip_suffix("-class").unwrap_or(fun)
} else {
fun
};
let head = if s4 && pkg.is_none() {
"\\linkS4class"
} else {
"\\link"
};
let display = match pkg {
Some(p) => format!("{p}::{body}"),
None => body.to_string(),
};
code_wrap(
format!("({head} {})", text_atom(&display).unwrap_or_default()),
is_code,
)
}
fn resolve_md_image(raw: &str) -> Option<String> {
let bytes = raw.as_bytes();
let alt_end = scan_delimited(bytes, 1, b'[', b']')?;
if bytes.get(alt_end) != Some(&b'(') {
return None;
}
let dest_end = scan_delimited(bytes, alt_end, b'(', b')')?;
if dest_end != bytes.len() {
return None;
}
let (url, title) = split_image_dest(&raw[alt_end + 1..dest_end - 1]);
Some(figure_atom(url, title))
}
fn split_image_dest(dest: &str) -> (&str, &str) {
let dest = dest.trim();
let (url, rest) = if dest.as_bytes().first() == Some(&b'<') {
match dest.find('>') {
Some(close) => (&dest[1..close], &dest[close + 1..]),
None => (dest, ""),
}
} else {
match dest.find(char::is_whitespace) {
Some(sp) => (&dest[..sp], &dest[sp..]),
None => (dest, ""),
}
};
(url, strip_title_delims(rest.trim()))
}
fn strip_title_delims(s: &str) -> &str {
let b = s.as_bytes();
if b.len() >= 2
&& matches!(
(b[0], b[b.len() - 1]),
(b'"', b'"') | (b'\'', b'\'') | (b'(', b')')
)
{
&s[1..s.len() - 1]
} else {
s
}
}
fn figure_atom(url: &str, title: &str) -> String {
let mut args = vec![format!("(VERB {})", encode_text(url))];
if !title.is_empty() {
args.push(format!("(VERB {})", encode_text(title)));
}
let figure = format!("(\\figure {})", args.join(" "));
match image_format(url) {
ImageFormat::Html => format!("(\\if (TEXT {}) {figure})", encode_text("html")),
ImageFormat::Pdf => format!("(\\if (TEXT {}) {figure})", encode_text("pdf")),
ImageFormat::All => figure,
}
}
enum ImageFormat {
Html,
Pdf,
All,
}
fn image_format(url: &str) -> ImageFormat {
let lower = url.to_ascii_lowercase();
let has_dot_ext = |exts: &[&str]| {
exts.iter()
.any(|e| lower.strip_suffix(e).is_some_and(|p| p.ends_with('.')))
};
match (
has_dot_ext(&["jpg", "jpeg", "gif", "png", "svg"]),
has_dot_ext(&["jpg", "jpeg", "gif", "png", "pdf"]),
) {
(true, false) => ImageFormat::Html,
(false, true) => ImageFormat::Pdf,
_ => ImageFormat::All,
}
}
fn code_wrap(inner: String, is_code: bool) -> String {
if is_code {
format!("(\\code {inner})")
} else {
inner
}
}
fn unwrap_code_span(s: &str) -> (&str, bool) {
let b = s.as_bytes();
if b.len() >= 2 && b[0] == b'`' && b[b.len() - 1] == b'`' {
(&s[1..s.len() - 1], true)
} else {
(s, false)
}
}
fn scan_delimited(bytes: &[u8], start: usize, open: u8, close: u8) -> Option<usize> {
if bytes.get(start) != Some(&open) {
return None;
}
let mut depth = 0usize;
for (i, &b) in bytes.iter().enumerate().skip(start) {
if b == open {
depth += 1;
} else if b == close {
depth -= 1;
if depth == 0 {
return Some(i + 1);
}
}
}
None
}
fn serialize_md_list(node: &SyntaxNode) -> String {
let head = if md_list_is_ordered(node) {
"\\enumerate"
} else {
"\\itemize"
};
let mut atoms: Vec<String> = Vec::new();
for item in node
.children()
.filter(|n| n.kind() == SyntaxKind::ROXYGEN_MD_LIST_ITEM)
{
atoms.push("(\\item)".to_string());
atoms.extend(serialize_inlines(&md_list_item_inlines(&item), true));
}
if atoms.is_empty() {
format!("({head})")
} else {
format!("({head} {})", atoms.join(" "))
}
}
fn serialize_md_list_resolved(ordered: bool, items: &[Vec<Inline>]) -> String {
let head = if ordered { "\\enumerate" } else { "\\itemize" };
let mut atoms: Vec<String> = Vec::new();
for item in items {
atoms.push("(\\item)".to_string());
atoms.extend(serialize_inlines(item, true));
}
if atoms.is_empty() {
format!("({head})")
} else {
format!("({head} {})", atoms.join(" "))
}
}
fn serialize_md_code_block(node: &SyntaxNode) -> Vec<String> {
let (info, code) = md_code_block_parts(node);
let class = if info.is_empty() {
"sourceCode".to_string()
} else {
format!("sourceCode {info}")
};
let html = encode_text("html");
vec![
format!(
"(\\if (TEXT {html}) (\\out (VERB {})))",
encode_text(&format!("<div class=\"{class}\">"))
),
format!("(\\preformatted (VERB {}))", encode_text(&code)),
format!(
"(\\if (TEXT {html}) (\\out (VERB {})))",
encode_text("</div>")
),
]
}
fn serialize_md_indented_code(node: &SyntaxNode) -> Vec<String> {
let text = node.text().to_string();
let mut code = String::new();
for line in text.split('\n') {
let after_marker = strip_marker(line);
let content = after_marker
.char_indices()
.take(4)
.take_while(|&(_, c)| c == ' ')
.count();
code.push_str(&after_marker[content..]);
code.push('\n');
}
let html = encode_text("html");
vec![
format!(
"(\\if (TEXT {html}) (\\out (VERB {})))",
encode_text("<div class=\"sourceCode\">")
),
format!("(\\preformatted {})", verb_atoms(&code).join(" ")),
format!(
"(\\if (TEXT {html}) (\\out (VERB {})))",
encode_text("</div>")
),
]
}
fn serialize_md_html_block(node: &SyntaxNode) -> String {
let text = node.text().to_string();
let mut body = String::from("\n");
for line in text.split('\n') {
body.push_str(strip_marker(line));
body.push('\n');
}
format!(
"(\\if (TEXT {}) (\\out {}))",
encode_text("html"),
verb_atoms(&body).join(" ")
)
}
fn block_quote_flat_text(node: &SyntaxNode) -> String {
let text = node.text().to_string();
let mut flat = String::new();
for line in text.split('\n') {
let content = strip_marker(line);
let inner = strip_block_quote_marker(content);
let inlines = resolve_macro_arg_inlines(inner);
for ch in inline_plain_text(&inlines).chars() {
if ch != SOFT_BREAK {
flat.push(ch);
}
}
}
flat
}
fn strip_block_quote_marker(content: &str) -> &str {
let trimmed = content.trim_start_matches(' ');
let after = trimmed.strip_prefix('>').unwrap_or(trimmed);
after.strip_prefix(' ').unwrap_or(after)
}
#[derive(Clone, Copy)]
enum TableAlign {
Left,
Center,
Right,
}
impl TableAlign {
fn code(self) -> char {
match self {
TableAlign::Left => 'l',
TableAlign::Center => 'c',
TableAlign::Right => 'r',
}
}
}
fn serialize_md_table(node: &SyntaxNode) -> String {
let text = node.text().to_string();
let lines: Vec<&str> = text.split('\n').map(strip_marker).collect();
if lines.len() < 2 {
return "(\\tabular)".to_string();
}
let aligns = parse_table_delim(lines[1]);
let ncol = aligns.len();
let align_str: String = aligns.iter().map(|a| a.code()).collect();
let mut grp: Vec<String> = Vec::new();
let rows = std::iter::once(lines[0]).chain(lines[2..].iter().copied());
for row in rows {
let cells = split_table_row_cells(row);
for c in 0..ncol {
if c > 0 {
grp.push("(\\tab)".to_string());
}
if let Some(cell) = cells.get(c) {
let content = unescape_table_pipes(cell.trim());
grp.extend(serialize_inlines(
&resolve_macro_arg_inlines(&content),
true,
));
}
}
grp.push("(\\cr)".to_string());
}
format!(
"(\\tabular (TEXT {}) (GRP {}))",
encode_text(&align_str),
grp.join(" ")
)
}
fn parse_table_delim(line: &str) -> Vec<TableAlign> {
split_table_row_cells(line)
.iter()
.map(|cell| {
let t = cell.trim();
match (t.starts_with(':'), t.ends_with(':')) {
(true, true) => TableAlign::Center,
(false, true) => TableAlign::Right,
_ => TableAlign::Left,
}
})
.collect()
}
fn unescape_table_pipes(cell: &str) -> String {
cell.replace("\\|", "|")
}
fn verb_atoms(body: &str) -> Vec<String> {
let mut atoms = Vec::new();
let mut rest = body;
while let Some(idx) = rest.find('\n') {
let (seg, tail) = rest.split_at(idx + 1);
atoms.push(format!("(VERB {})", encode_text(seg)));
rest = tail;
}
if !rest.is_empty() {
atoms.push(format!("(VERB {})", encode_text(rest)));
}
atoms
}
fn html_inline_atom(raw: &str) -> String {
format!(
"(\\if (TEXT {}) (\\out (VERB {})))",
encode_text("html"),
encode_text(raw)
)
}
fn md_code_block_parts(node: &SyntaxNode) -> (String, String) {
let text = node.text().to_string();
let lines: Vec<&str> = text.split('\n').collect();
let info = lines
.first()
.map(|l| strip_marker(l).trim_start_matches('`').trim().to_string())
.unwrap_or_default();
let body = if lines.len() > 2 {
&lines[1..lines.len() - 1]
} else {
&[]
};
let mut code = String::new();
for line in body {
code.push_str(strip_marker(line));
code.push('\n');
}
(info, code)
}
fn strip_marker(line: &str) -> &str {
let trimmed = line.trim_start();
let after_hashes = trimmed.trim_start_matches('#');
let body = after_hashes.strip_prefix('\'').unwrap_or(after_hashes);
body.strip_prefix(' ').unwrap_or(body)
}
fn md_list_is_ordered(node: &SyntaxNode) -> bool {
node.children()
.filter(|n| n.kind() == SyntaxKind::ROXYGEN_MD_LIST_ITEM)
.find_map(|item| {
item.children_with_tokens()
.filter_map(|el| el.into_token())
.find(|t| t.kind() == SyntaxKind::ROXYGEN_MD_LIST_MARKER)
})
.is_some_and(|t| t.text().starts_with(|c: char| c.is_ascii_digit()))
}
fn md_list_item_inlines(item: &SyntaxNode) -> Vec<Inline> {
let mut out = Vec::new();
for el in item.children_with_tokens() {
match el.kind() {
SyntaxKind::ROXYGEN_MD_LIST_MARKER | SyntaxKind::ROXYGEN_MARKER => {}
SyntaxKind::NEWLINE => out.push(Inline::Text(SOFT_BREAK.to_string())),
_ => push_inline(&mut out, el),
}
}
out
}
fn md_code_atom(content: &str) -> String {
if code_span_is_r(content) {
format!("(\\code (RCODE {}))", encode_text(content))
} else {
format!("(\\verb (VERB {}))", encode_text(content))
}
}
const SPECIAL_CODE: &[&str] = &[
"-", ":", "::", ":::", "!", "!=", "(", "[", "[[", "@", "*", "/", "&", "&&", "%*%", "%/%", "%%",
"%in%", "%o%", "%x%", "^", "+", "<", "<=", "=", "==", ">", ">=", "|", "||", "~", "$", "for",
"function", "if", "repeat", "while",
];
fn code_span_is_r(code: &str) -> bool {
if SPECIAL_CODE.contains(&code) {
return true;
}
let out = crate::parser::parse(code);
if !out.diagnostics.is_empty() {
return false;
}
let one_expr = out
.cst
.children_with_tokens()
.filter(|el| {
!matches!(
el.kind(),
SyntaxKind::WHITESPACE | SyntaxKind::NEWLINE | SyntaxKind::COMMENT
)
})
.count()
== 1;
one_expr && !has_invalid_underscore_name(&out.cst)
}
fn has_invalid_underscore_name(cst: &SyntaxNode) -> bool {
let has_pipe = cst
.descendants_with_tokens()
.any(|el| el.kind() == SyntaxKind::PIPE);
cst.descendants_with_tokens()
.filter_map(|el| el.into_token())
.filter(|t| t.kind() == SyntaxKind::IDENT)
.any(|t| {
let text = t.text();
text.starts_with('_') && (text.len() > 1 || !has_pipe)
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn projects_plain_prose_sections() {
let src = "#' Add two numbers\n\
#' @param x,y Numbers to add.\n\
#' @return Their sum.\n\
#' @export\n\
add <- function(x, y) x + y\n";
assert_eq!(
project_to_rd(src),
"(\\description (TEXT \"Add two numbers\"))\n\
(\\title (TEXT \"Add two numbers\"))\n\
(\\value (TEXT \"Their sum.\"))"
);
}
#[test]
fn two_intro_paragraphs_split_title_and_description() {
let src = "#' Example dataset\n\
#'\n\
#' A longer description.\n\
#' @name d\n\
NULL\n";
assert_eq!(
project_to_rd(src),
"(\\description (TEXT \"A longer description.\"))\n\
(\\title (TEXT \"Example dataset\"))"
);
}
#[test]
fn md_heading_hoists_section_and_nests_subsection() {
let src = "#' Title\n\
#'\n\
#' @md\n\
#' @details\n\
#' # First\n\
#' a\n\
#'\n\
#' ## Nested\n\
#' b\n\
#' @name x\n\
NULL\n";
assert_eq!(
project_to_rd(src),
"(\\description (TEXT \"Title\"))\n\
(\\section (TEXT \"First\") (GRP (TEXT \"a\") (\\subsection (TEXT \"Nested\") (TEXT \"b\"))))\n\
(\\title (TEXT \"Title\"))"
);
}
#[test]
fn md_subsection_without_level_one_stays_in_details() {
let src = "#' Title\n\
#'\n\
#' @md\n\
#' @details\n\
#' Lead.\n\
#'\n\
#' ## Sub\n\
#' body\n\
#' @name x\n\
NULL\n";
assert_eq!(
project_to_rd(src),
"(\\description (TEXT \"Title\"))\n\
(\\details (TEXT \"Lead.\") (\\subsection (TEXT \"Sub\") (TEXT \"body\")))\n\
(\\title (TEXT \"Title\"))"
);
}
#[test]
fn md_setext_heading_hoists_section() {
let src = "#' Title\n\
#'\n\
#' @md\n\
#' @details\n\
#' Big\n\
#' ===\n\
#'\n\
#' body\n\
#' @name x\n\
NULL\n";
assert_eq!(
project_to_rd(src),
"(\\description (TEXT \"Title\"))\n\
(\\section (TEXT \"Big\") (TEXT \"body\"))\n\
(\\title (TEXT \"Title\"))"
);
}
#[test]
fn md_setext_multiline_title_and_nesting() {
let src = "#' Title\n\
#'\n\
#' @md\n\
#' @details\n\
#' Top\n\
#' ===\n\
#' intro\n\
#' Sub\n\
#' ---\n\
#' deep\n\
#' @name x\n\
NULL\n";
assert_eq!(
project_to_rd(src),
"(\\description (TEXT \"Title\"))\n\
(\\section (TEXT \"Top\") (\\subsection (TEXT \"intro Sub\") (TEXT \"deep\")))\n\
(\\title (TEXT \"Title\"))"
);
}
#[test]
fn md_setext_single_dash_underline_hoists_subsection() {
let src = "#' Title\n\
#'\n\
#' @md\n\
#' @details\n\
#' Foo\n\
#' -\n\
#' bar\n\
#' @name x\n\
NULL\n";
assert_eq!(
project_to_rd(src),
"(\\description (TEXT \"Title\"))\n\
(\\details (\\subsection (TEXT \"Foo\") (TEXT \"bar\")))\n\
(\\title (TEXT \"Title\"))"
);
}
#[test]
fn three_intro_paragraphs_split_title_description_details() {
let src = "#' title\n\
#'\n\
#' description\n\
#'\n\
#' details\n\
#' @name a\n\
NULL\n";
assert_eq!(
project_to_rd(src),
"(\\description (TEXT \"description\"))\n\
(\\details (TEXT \"details\"))\n\
(\\title (TEXT \"title\"))"
);
}
#[test]
fn section_body_serializes_inline_macros_with_grp_wrap() {
let src = "#' Title\n\
#'\n\
#' Description.\n\
#' @section Foobar:\n\
#' With some \\strong{bold text}.\n\
#' @name x\n\
NULL\n";
assert_eq!(
project_to_rd(src),
"(\\description (TEXT \"Description.\"))\n\
(\\section (TEXT \"Foobar\") (GRP (TEXT \"With some\") (\\strong (TEXT \"bold text\")) (TEXT \".\")))\n\
(\\title (TEXT \"Title\"))"
);
}
#[test]
fn non_md_percent_is_an_rd_line_comment() {
let src = "#' Title here\n\
#'\n\
#' Desc with a %% comment to end of line\n\
#' @format %\n\
x <- list(a = 1, b = 2)\n";
assert_eq!(
project_to_rd(src),
"(\\description (TEXT \"Desc with a\"))\n\
(\\format)\n\
(\\title (TEXT \"Title here\"))"
);
}
#[test]
fn non_md_percent_comment_is_scoped_per_line() {
let src = "#' Title\n\
#' @details First detail line %% trailing comment\n\
#' second detail line stays\n\
#' @name x\n\
NULL\n";
assert_eq!(
project_to_rd(src),
"(\\description (TEXT \"Title\"))\n\
(\\details (TEXT \"First detail line second detail line stays\"))\n\
(\\title (TEXT \"Title\"))"
);
}
#[test]
fn md_mode_percent_survives() {
let src = "#' Title\n\
#' @md\n\
#' @format % and more\n\
x <- list(a = 1)\n";
assert_eq!(
project_to_rd(src),
"(\\description (TEXT \"Title\"))\n\
(\\format (TEXT \"% and more\"))\n\
(\\title (TEXT \"Title\"))"
);
}
#[test]
fn strip_rd_line_comment_honors_backslash_escape() {
assert_eq!(strip_rd_line_comment("a %% b"), "a ");
assert_eq!(strip_rd_line_comment("%"), "");
assert_eq!(strip_rd_line_comment("no comment here"), "no comment here");
assert_eq!(
strip_rd_line_comment("keep \\% literal"),
"keep \\% literal"
);
assert_eq!(
strip_rd_line_comment("keep \\% then % cut"),
"keep \\% then "
);
}
#[test]
fn block_macro_joins_its_paragraph_then_splits_at_blank_line() {
let src = "#' Title\n\
#'\n\
#' Description with some\n\
#' \\itemize{\n\
#' \\item itemized\n\
#' \\item list\n\
#' }\n\
#'\n\
#' And then another one:\n\
#' \\itemize{\n\
#' \\item item 1\n\
#' \\item item 2\n\
#' }\n\
foo <- function() {}\n";
assert_eq!(
project_to_rd(src),
"(\\description (TEXT \"Description with some\") \
(\\itemize (\\item) (TEXT \"itemized\") (\\item) (TEXT \"list\")))\n\
(\\details (TEXT \"And then another one:\") \
(\\itemize (\\item) (TEXT \"item 1\") (\\item) (TEXT \"item 2\")))\n\
(\\title (TEXT \"Title\"))"
);
}
#[test]
fn trailing_intro_details_merge_with_explicit_details_tag() {
let src = "#' Title\n\
#'\n\
#' Description\n\
#'\n\
#' Details1\n\
#'\n\
#' Details2\n\
#'\n\
#' @details Details3\n\
#'\n\
#' Details4\n\
foo <- function(x) {}\n";
assert_eq!(
project_to_rd(src),
"(\\description (TEXT \"Description\"))\n\
(\\details (TEXT \"Details1 Details2 Details3 Details4\"))\n\
(\\title (TEXT \"Title\"))"
);
}
#[test]
fn explicit_title_without_description_duplicates_into_description() {
let src = "#' @title a\n#' @name a\nNULL\n";
assert_eq!(
project_to_rd(src),
"(\\description (TEXT \"a\"))\n(\\title (TEXT \"a\"))"
);
}
#[test]
fn null_tag_value_suppresses_section() {
let src = "#' Title\n\
#' @description NULL\n\
#' @details NULL\n\
#' @format NULL\n\
#' @name d\n\
NULL\n";
assert_eq!(
project_to_rd(src),
"(\\description (TEXT \"Title\"))\n(\\title (TEXT \"Title\"))"
);
}
#[test]
fn sameline_tag_value_folds_plain_continuation() {
let src = "#' Title\n\
#' @details First line\n\
#' second line.\n\
#' @name d\n\
NULL\n";
assert_eq!(
project_to_rd(src),
"(\\description (TEXT \"Title\"))\n\
(\\details (TEXT \"First line second line.\"))\n\
(\\title (TEXT \"Title\"))"
);
}
#[test]
fn md_table_projects_to_tabular() {
let src = "#' T\n\
#' @md\n\
#' @details\n\
#' | a | b |\n\
#' | :-- | --: |\n\
#' | *x* | y |\n\
#' | solo |\n\
#' @name d\n\
NULL\n";
assert_eq!(
project_to_rd(src),
"(\\description (TEXT \"T\"))\n\
(\\details (\\tabular (TEXT \"lr\") (GRP \
(TEXT \"a\") (\\tab) (TEXT \"b\") (\\cr) \
(\\emph (TEXT \"x\")) (\\tab) (TEXT \"y\") (\\cr) \
(TEXT \"solo\") (\\tab) (\\cr))))\n\
(\\title (TEXT \"T\"))"
);
}
#[test]
fn md_block_quote_flattens_to_plain_text() {
let src = "#' T\n\
#' @md\n\
#' @details\n\
#' > a *quote* with `code`\n\
#' > and [text](https://x.org)\n\
#' @name d\n\
NULL\n";
assert_eq!(
project_to_rd(src),
"(\\description (TEXT \"T\"))\n\
(\\details (TEXT \"a quote with codeand text\"))\n\
(\\title (TEXT \"T\"))"
);
}
#[test]
fn md_block_quote_glues_onto_adjacent_prose() {
let same_part = "#' T\n\
#' @md\n\
#' @details\n\
#' before\n\
#' > quoted line\n\
#' @name d\n\
NULL\n";
assert_eq!(
project_to_rd(same_part),
"(\\description (TEXT \"T\"))\n\
(\\details (TEXT \"beforequoted line\"))\n\
(\\title (TEXT \"T\"))"
);
let around = "#' T\n\
#' @md\n\
#' @details\n\
#' before\n\
#'\n\
#' > quoted\n\
#'\n\
#' after\n\
#' @name d\n\
NULL\n";
assert_eq!(
project_to_rd(around),
"(\\description (TEXT \"T\"))\n\
(\\details (TEXT \"beforequoted after\"))\n\
(\\title (TEXT \"T\"))"
);
let two_quotes = "#' T\n\
#' @md\n\
#' @details\n\
#' > q1\n\
#'\n\
#' > q2\n\
#' @name d\n\
NULL\n";
assert_eq!(
project_to_rd(two_quotes),
"(\\description (TEXT \"T\"))\n\
(\\details (TEXT \"q1q2\"))\n\
(\\title (TEXT \"T\"))"
);
}
#[test]
fn md_block_quote_lazy_continuation_folds_into_quote() {
let src = "#' T\n\
#' @md\n\
#' @details\n\
#' > quoted line one\n\
#' lazy continuation\n\
#'\n\
#' Separate paragraph.\n\
#' @name d\n\
NULL\n";
assert_eq!(
project_to_rd(src),
"(\\description (TEXT \"T\"))\n\
(\\details (TEXT \"quoted line onelazy continuation Separate paragraph.\"))\n\
(\\title (TEXT \"T\"))"
);
}
#[test]
fn md_thematic_break_renders_empty_and_coalesces() {
let src = "#' T\n\
#' @md\n\
#' @details\n\
#' Before.\n\
#'\n\
#' ---\n\
#'\n\
#' Foo\n\
#' ***\n\
#' bar\n\
#'\n\
#' ___\n\
#'\n\
#' After.\n\
#' @name d\n\
NULL\n";
assert_eq!(
project_to_rd(src),
"(\\description (TEXT \"T\"))\n\
(\\details (TEXT \"Before. Foo bar After.\"))\n\
(\\title (TEXT \"T\"))"
);
}
#[test]
fn examples_body_is_a_placeholder() {
let src = "#' T\n#' @examples\n#' f(1)\n#' @name d\nNULL\n";
assert!(project_to_rd(src).contains("(\\examples ...)"));
}
#[test]
fn multiple_examples_tags_merge_into_one_section() {
let src = "#' @name a\n\
#' @title a\n\
#' @examples\n\
#' TRUE\n\
#' @examples\n\
#' FALSE\n\
NULL\n";
assert_eq!(
project_to_rd(src),
"(\\description (TEXT \"a\"))\n\
(\\examples ...)\n\
(\\title (TEXT \"a\"))"
);
}
#[test]
fn md_non_fragile_macro_arg_is_markdown_processed() {
let emph = "#' @md\n#' @title T\n#' @details A \\emph{*x*} b.\n#' @name x\nNULL\n";
assert!(
project_to_rd(emph)
.contains("(\\details (TEXT \"A\") (\\emph (\\emph (TEXT \"x\"))) (TEXT \"b.\"))"),
"{}",
project_to_rd(emph)
);
let multi = "#' @md\n#' @title T\n#' @details A \\emph{a *b* c} d.\n#' @name x\nNULL\n";
assert!(
project_to_rd(multi).contains(
"(\\details (TEXT \"A\") (\\emph (TEXT \"a\") (\\emph (TEXT \"b\")) (TEXT \"c\")) (TEXT \"d.\"))"
),
"{}",
project_to_rd(multi)
);
let strong = "#' @md\n#' @title T\n#' @details A \\strong{*x*} b.\n#' @name x\nNULL\n";
assert!(
project_to_rd(strong).contains("(\\strong (\\emph (TEXT \"x\")))"),
"{}",
project_to_rd(strong)
);
let code = "#' @md\n#' @title T\n#' @details A \\code{*x*} b.\n#' @name x\nNULL\n";
assert!(
project_to_rd(code).contains("(\\code (RCODE \"*x*\"))"),
"{}",
project_to_rd(code)
);
}
#[test]
fn md_structural_macro_args_are_markdown_processed() {
let item = "#' @md\n#' @title T\n#' @details\n#' \\describe{\n\
#' \\item{*term*}{a \\strong{bold} def}\n#' }\n#' @name x\nNULL\n";
assert!(
project_to_rd(item).contains(
"(\\describe (\\item (\\emph (TEXT \"term\")) \
(GRP (TEXT \"a\") (\\strong (TEXT \"bold\")) (TEXT \"def\"))))"
),
"{}",
project_to_rd(item)
);
let two = "#' @md\n#' @title T\n#' @details\n#' \\describe{\n\
#' \\item{*term*}{*def*}\n#' }\n#' @name x\nNULL\n";
assert!(
project_to_rd(two)
.contains("(\\describe (\\item (\\emph (TEXT \"term\")) (\\emph (TEXT \"def\"))))"),
"{}",
project_to_rd(two)
);
let frag = "#' @md\n#' @title T\n#' @details\n#' \\describe{\n\
#' \\item{x}{a \\code{*y*} b}\n#' }\n#' @name x\nNULL\n";
assert!(
project_to_rd(frag).contains(
"(\\item (TEXT \"x\") (GRP (TEXT \"a\") (\\code (RCODE \"*y*\")) (TEXT \"b\")))"
),
"{}",
project_to_rd(frag)
);
let href = "#' @md\n#' @title T\n#' @details See \\href{http://x.org}{*the* site}.\n\
#' @name x\nNULL\n";
assert!(
project_to_rd(href).contains(
"(\\href (VERB \"http://x.org\") (GRP (\\emph (TEXT \"the\")) (TEXT \"site\")))"
),
"{}",
project_to_rd(href)
);
let tab = "#' @md\n#' @title T\n#' @details\n#' \\tabular{ll}{\n\
#' *a* \\tab **b** \\cr\n#' }\n#' @name x\nNULL\n";
assert!(
project_to_rd(tab).contains(
"(\\tabular (TEXT \"ll\") (GRP (\\emph (TEXT \"a\")) (\\tab) (\\strong (TEXT \"b\")) (\\cr)))"
),
"{}",
project_to_rd(tab)
);
}
#[test]
fn md_structural_macro_arg_emphasis_spans_nested_macro() {
let item = "#' @md\n#' @title T\n#' @details\n#' \\describe{\n\
#' \\item{x}{*a \\strong{y} b*}\n#' }\n#' @name x\nNULL\n";
assert!(
project_to_rd(item).contains(
"(\\item (TEXT \"x\") (\\emph (TEXT \"a\") (\\strong (TEXT \"y\")) (TEXT \"b\")))"
),
"{}",
project_to_rd(item)
);
let tab = "#' @md\n#' @title T\n#' @details\n#' \\tabular{ll}{\n\
#' *a \\tab b* \\cr\n#' }\n#' @name x\nNULL\n";
assert!(
project_to_rd(tab).contains(
"(\\tabular (TEXT \"ll\") (GRP (\\emph (TEXT \"a\") (\\tab) (TEXT \"b\")) (\\cr)))"
),
"{}",
project_to_rd(tab)
);
}
#[test]
fn md_emphasis_span_abuts_an_inline_macro() {
let opens = "#' @md\n#' @title T\n#' @details a*\\code{x} y*\n#' @name x\nNULL\n";
assert!(
project_to_rd(opens)
.contains("(\\details (TEXT \"a\") (\\emph (\\code (RCODE \"x\")) (TEXT \"y\")))"),
"{}",
project_to_rd(opens)
);
let blocked = "#' @md\n#' @title T\n#' @details a*\\code{z}*b\n#' @name x\nNULL\n";
assert!(
project_to_rd(blocked)
.contains("(\\details (TEXT \"a*\") (\\code (RCODE \"z\")) (TEXT \"*b\"))"),
"{}",
project_to_rd(blocked)
);
}
#[test]
fn md_macro_arg_resolution_is_off_without_md() {
let src = "#' @title T\n#' @details A \\emph{*x*} b.\n#' @name x\nNULL\n";
assert!(
project_to_rd(src).contains("(\\emph (TEXT \"*x*\"))"),
"{}",
project_to_rd(src)
);
}
#[test]
fn md_link_display_with_active_markdown_macro_drops() {
let drop = "#' @md\n#' @title T\n#' @details See [a\\emph{*x*}] here.\n#' @name x\nNULL\n";
assert!(
project_to_rd(drop).contains("(\\details (TEXT \"See here.\"))"),
"{}",
project_to_rd(drop)
);
let keep_plain =
"#' @md\n#' @title T\n#' @details See [a\\emph{x}] here.\n#' @name x\nNULL\n";
assert!(
project_to_rd(keep_plain).contains("(\\link (TEXT \"a\") (\\emph (TEXT \"x\")))"),
"{}",
project_to_rd(keep_plain)
);
let keep_code =
"#' @md\n#' @title T\n#' @details See [a\\code{*x*}] here.\n#' @name x\nNULL\n";
assert!(
project_to_rd(keep_code).contains("(\\link (TEXT \"a\") (\\code (RCODE \"*x*\")))"),
"{}",
project_to_rd(keep_code)
);
let drop_nested = "#' @md\n#' @title T\n#' @details See [x \\emph{a \\strong{*y*}}] here.\n#' @name x\nNULL\n";
assert!(
project_to_rd(drop_nested).contains("(\\details (TEXT \"See here.\"))"),
"{}",
project_to_rd(drop_nested)
);
}
#[test]
fn md_nested_fragile_macro_stays_literal() {
let src =
"#' @md\n#' @title T\n#' @details A \\emph{a \\code{*x*} b} c.\n#' @name x\nNULL\n";
assert!(
project_to_rd(src)
.contains("(\\emph (TEXT \"a\") (\\code (RCODE \"*x*\")) (TEXT \"b\"))"),
"{}",
project_to_rd(src)
);
}
#[test]
fn projects_inline_rd_macros() {
let src = "#' T\n\
#'\n\
#' See \\code{\\link{add}} and \\emph{e}, plus \\url{http://x}\n\
#' and \\link[stats]{lm} end.\n\
#' @name d\n\
NULL\n";
let out = project_to_rd(src);
assert!(
out.contains(
"(\\description (TEXT \"See\") (\\code (\\link (TEXT \"add\"))) \
(TEXT \"and\") (\\emph (TEXT \"e\")) (TEXT \", plus\") \
(\\url (VERB \"http://x\")) (TEXT \"and\") (\\link (TEXT \"lm\")) \
(TEXT \"end.\"))"
),
"got: {out}"
);
}
#[test]
fn code_macro_body_projects_as_rcode() {
let src = "#' T\n\
#'\n\
#' Some \\code{code} and \\verb{More code.}\n\
#' @name d\n\
NULL\n";
let out = project_to_rd(src);
assert!(
out.contains(
"(\\description (TEXT \"Some\") (\\code (RCODE \"code\")) (TEXT \"and\") \
(\\verb (VERB \"More code.\")))"
),
"got: {out}"
);
}
#[test]
fn href_projects_verbatim_url_and_latexlike_text() {
let src = "#' T\n\
#'\n\
#' See \\href{http://a.com/x y}{click \\emph{here} now}.\n\
#' @name d\n\
NULL\n";
let out = project_to_rd(src);
assert!(
out.contains(
"(\\description (TEXT \"See\") (\\href (VERB \"http://a.com/x y\") \
(GRP (TEXT \"click\") (\\emph (TEXT \"here\")) (TEXT \"now\"))) (TEXT \".\"))"
),
"got: {out}"
);
}
#[test]
fn inline_link_code_span_text_subrenders() {
let src = "#' Title\n\
#'\n\
#' Description, see [`code link text`][func].\n\
#' And also [`code as well`](https://external.com).\n\
#' @md\n\
foo <- function() {}\n";
assert_eq!(
project_to_rd(src),
"(\\description (TEXT \"Description, see\") \
(\\code (\\link (TEXT \"code link text\"))) (TEXT \". And also\") \
(\\href (VERB \"https://external.com\") (\\verb (VERB \"code as well\"))) \
(TEXT \".\"))\n\
(\\title (TEXT \"Title\"))"
);
}
#[test]
fn non_plain_shortcut_links_are_dropped() {
let src = "#' @details\n\
#' A shortcut [*foo*] is dropped, but [a_b] and [`code`] survive \
while [`x` `y`] drops too.\n\
#' @md\n\
#' @name x\n\
NULL\n";
assert_eq!(
project_to_rd(src),
"(\\details (TEXT \"A shortcut is dropped, but\") (\\link (TEXT \"a_b\")) \
(TEXT \"and\") (\\code (\\link (TEXT \"code\"))) (TEXT \"survive while drops too.\"))"
);
}
#[test]
fn non_plain_reference_links_are_dropped() {
let src = "#' @details\n\
#' A reference [*foo*][r1] is dropped, but [plain][r2] and \
[`code`][r3] survive while [`x` `y`][r4] drops too.\n\
#' @md\n\
#' @name x\n\
NULL\n";
assert_eq!(
project_to_rd(src),
"(\\details (TEXT \"A reference is dropped, but\") (\\link (TEXT \"plain\")) \
(TEXT \"and\") (\\code (\\link (TEXT \"code\"))) (TEXT \"survive while drops too.\"))"
);
}
#[test]
fn link_display_droppable_boundary() {
assert!(!link_display_is_droppable(&[Inline::MdCode("x".into())]));
assert!(!link_display_is_droppable(&[Inline::Text("a_b".into())]));
assert!(link_display_is_droppable(&[Inline::MdEmphasis {
strong: false,
children: vec![Inline::Text("foo".into())],
}]));
assert!(link_display_is_droppable(&[
Inline::MdCode("x".into()),
Inline::Text(" ".into()),
Inline::MdCode("y".into()),
]));
assert!(link_display_is_droppable(&[Inline::MdLink(
"<https://e.org>".into()
)]));
}
#[test]
fn multiline_itemize_projects_nested() {
let src = "#' @details\n\
#' \\itemize{\n\
#' \\item one\n\
#' \\item two\n\
#' }\n\
#' @name x\n\
NULL\n";
assert_eq!(
project_to_rd(src),
"(\\details (\\itemize (\\item) (TEXT \"one\") (\\item) (TEXT \"two\")))"
);
}
#[test]
fn multiline_describe_item_projects_two_args() {
let src = "#' T\n\
#' @format A frame:\n\
#' \\describe{\n\
#' \\item{a}{first}\n\
#' \\item{b}{second}\n\
#' }\n\
#' @name d\n\
NULL\n";
let out = project_to_rd(src);
assert!(
out.contains(
"(\\describe (\\item (TEXT \"a\") (TEXT \"first\")) \
(\\item (TEXT \"b\") (TEXT \"second\")))"
),
"got: {out}"
);
}
#[test]
fn multiline_tabular_projects_format_and_grp_body() {
let src = "#' T\n\
#' @details\n\
#' \\tabular{rl}{\n\
#' a \\tab the first row \\cr\n\
#' b \\tab the second row \\cr\n\
#' }\n\
#' @name d\n\
NULL\n";
let out = project_to_rd(src);
assert!(
out.contains(
"(\\details (\\tabular (TEXT \"rl\") \
(GRP (TEXT \"a\") (\\tab) (TEXT \"the first row\") (\\cr) \
(TEXT \"b\") (\\tab) (TEXT \"the second row\") (\\cr))))"
),
"got: {out}"
);
}
#[test]
fn md_inline_projects_emph_strong_and_code_vs_verb() {
let src = "#' T\n\
#' @details\n\
#' Text with *emphasis*, **strong** words, `inline code`, and `a + b` code.\n\
#' @md\n\
#' @name d\n\
NULL\n";
let out = project_to_rd(src);
assert!(
out.contains(
"(\\details (TEXT \"Text with\") (\\emph (TEXT \"emphasis\")) (TEXT \",\") \
(\\strong (TEXT \"strong\")) (TEXT \"words,\") (\\verb (VERB \"inline code\")) \
(TEXT \", and\") (\\code (RCODE \"a + b\")) (TEXT \"code.\"))"
),
"got: {out}"
);
}
#[test]
fn underscore_leading_code_span_is_verb_not_code() {
assert!(!code_span_is_r("_"));
assert!(!code_span_is_r("_x"));
assert!(!code_span_is_r("_foo_"));
assert!(code_span_is_r("x |> _$col"));
assert!(code_span_is_r("a_b"));
}
#[test]
fn md_block_lists_project_itemize_and_enumerate() {
let src = "#' T\n\
#' @details\n\
#' Bullets:\n\
#'\n\
#' - first\n\
#' - second\n\
#'\n\
#' Numbered:\n\
#'\n\
#' 1. one\n\
#' 2. two\n\
#' @md\n\
#' @name d\n\
NULL\n";
let out = project_to_rd(src);
assert!(
out.contains(
"(\\details (TEXT \"Bullets:\") \
(\\itemize (\\item) (TEXT \"first\") (\\item) (TEXT \"second\")) \
(TEXT \"Numbered:\") \
(\\enumerate (\\item) (TEXT \"one\") (\\item) (TEXT \"two\")))"
),
"got: {out}"
);
}
#[test]
fn slot_tags_aggregate_into_slots_section() {
let src = "#' Important class.\n\
#'\n\
#' @slot a slot a\n\
#' @slot b slot b\n\
setClass('test')\n";
let out = project_to_rd(src);
assert!(
out.contains(
"(\\section (TEXT \"Slots\") (\\describe \
(\\item (\\code (RCODE \"a\")) (TEXT \"slot a\")) \
(\\item (\\code (RCODE \"b\")) (TEXT \"slot b\"))))"
),
"got: {out}"
);
}
#[test]
fn field_tags_aggregate_into_fields_section() {
let src = "#' Important class.\n\
#'\n\
#' @field a field a\n\
#' @field b field b\n\
setRefClass('test')\n";
let out = project_to_rd(src);
assert!(
out.contains(
"(\\section (TEXT \"Fields\") (\\describe \
(\\item (\\code (RCODE \"a\")) (TEXT \"field a\")) \
(\\item (\\code (RCODE \"b\")) (TEXT \"field b\"))))"
),
"got: {out}"
);
}
#[test]
fn slot_with_unbalanced_brace_is_dropped() {
let src = "#' Important class.\n\
#'\n\
#' @slot a sl{ot a\n\
#' @slot b slot b\n\
setClass('test')\n";
let out = project_to_rd(src);
assert!(
out.contains(
"(\\section (TEXT \"Slots\") (\\describe \
(\\item (\\code (RCODE \"b\")) (TEXT \"slot b\"))))"
),
"got: {out}"
);
assert!(!out.contains("slot a"), "dropped slot leaked: {out}");
}
#[test]
fn all_fields_unbalanced_drops_fields_section() {
let src = "#' Important class.\n\
#'\n\
#' @field a fi{eld a\n\
setRefClass('test')\n";
let out = project_to_rd(src);
assert!(
!out.contains("Fields"),
"Fields section should be absent: {out}"
);
}
#[test]
fn slot_with_percent_commented_brace_survives() {
let src = "#' Important class.\n\
#'\n\
#' @slot a desc %{\n\
setClass('test')\n";
let out = project_to_rd(src);
assert!(out.contains("Slots"), "Slots section should survive: {out}");
}
#[test]
fn section_with_unbalanced_brace_drops_to_na_md_off() {
let src = "#' @title T\n\
#' @section Heading:\n\
#' body with brace {\n\
#' @name x\n\
NULL\n";
let out = project_to_rd(src);
assert!(out.contains("(\\section (TEXT \"NA\"))"), "got: {out}");
assert!(!out.contains("Heading"), "dropped title leaked: {out}");
}
#[test]
fn section_with_percent_commented_brace_survives_md_off() {
let src = "#' @title T\n\
#' @section Heading:\n\
#' body %{\n\
#' @name x\n\
NULL\n";
let out = project_to_rd(src);
assert!(
out.contains("(\\section (TEXT \"Heading\") (TEXT \"body\"))"),
"got: {out}"
);
}
#[test]
fn section_unbalanced_brace_not_dropped_md_on() {
let src = "#' @md\n\
#' @title T\n\
#' @section Heading:\n\
#' body with brace {\n\
#' @name x\n\
NULL\n";
let out = project_to_rd(src);
assert!(
!out.contains("(\\section (TEXT \"NA\"))"),
"md-on @section must not drop to NA: {out}"
);
assert!(out.contains("Heading"), "title should survive: {out}");
}
#[test]
fn md_block_list_is_off_without_md_tag() {
let src = "#' T\n\
#' @details\n\
#' - first\n\
#' - second\n\
#' @name d\n\
NULL\n";
assert!(
project_to_rd(src).contains("(\\details (TEXT \"- first - second\"))"),
"got: {}",
project_to_rd(src)
);
}
#[test]
fn md_inline_is_off_without_md_tag() {
let src = "#' T\n\
#' @details\n\
#' Text with *emphasis* and `code` here.\n\
#' @name d\n\
NULL\n";
assert!(
project_to_rd(src)
.contains("(\\details (TEXT \"Text with *emphasis* and `code` here.\"))"),
"got: {}",
project_to_rd(src)
);
}
#[test]
fn norm_ws_collapses_ascii_but_preserves_unicode_whitespace() {
assert_eq!(norm_ws(" a \t\n b "), "a b");
assert_eq!(norm_ws("*\u{a0}a\u{a0}*"), "*\u{a0}a\u{a0}*");
assert_eq!(norm_ws("x\u{85}y"), "x\u{85}y");
}
#[test]
fn nbsp_cannot_flank_emphasis_stays_literal() {
let src = "#' @md\n\
#' @title T\n\
#' @details\n\
#' *\u{a0}a\u{a0}*\n\
#' @name spec\n\
NULL\n";
assert!(
project_to_rd(src).contains("(\\details (TEXT \"*\u{a0}a\u{a0}*\"))"),
"got: {}",
project_to_rd(src)
);
}
#[test]
fn unescape_md_brackets_consumes_one_backslash_before_a_bracket() {
assert_eq!(unescape_md_brackets(r"\[x\]"), "[x]");
assert_eq!(unescape_md_brackets(r"\\[x"), r"\[x");
assert_eq!(
unescape_md_brackets(r"foo \* \` \% bar"),
r"foo \* \` \% bar"
);
assert_eq!(unescape_md_brackets("a\\\n[b"), "a\\\n[b");
}
#[test]
fn collapse_md_backslash_runs_halves_a_run() {
assert_eq!(collapse_md_backslash_runs(r"a \ b"), r"a \ b");
assert_eq!(collapse_md_backslash_runs(r"a \\ b"), r"a \ b");
assert_eq!(collapse_md_backslash_runs(r"a \\\\ b"), r"a \\ b");
assert_eq!(collapse_md_backslash_runs(r"a \\\\\\ b"), r"a \\\ b");
assert_eq!(collapse_md_backslash_runs(r"\* \_ \%"), r"\* \_ \%");
assert_eq!(collapse_md_backslash_runs(r"\\[x"), r"\\[x");
assert_eq!(collapse_md_backslash_runs(r"a\\]b"), r"a\\]b");
}
#[test]
fn md_percent_swallow_is_parity_keyed() {
assert_eq!(md_percent_swallow("a % b"), "a % b");
assert_eq!(md_percent_swallow(r"a \% b"), "a \\");
assert_eq!(md_percent_swallow(r"a \\% b"), r"a \\% b");
assert_eq!(md_percent_swallow(r"a \\\% b"), "a \\\\\\");
assert_eq!(md_percent_swallow(r"a % b \% c"), "a % b \\");
assert_eq!(md_percent_swallow("a \\% b\nc"), "a \\\nc");
assert_eq!(
md_percent_swallow(&format!("a \\% b{SOFT_BREAK}c")),
"a \\\nc"
);
}
#[test]
fn strip_rd_comments_stops_at_soft_wrap() {
assert_eq!(strip_rd_comments("a % swallowed\nc"), "a \nc");
assert_eq!(
strip_rd_comments(&format!("a % swallowed{SOFT_BREAK}c")),
"a \nc"
);
}
#[test]
fn md_escaped_bracket_is_literal_with_the_backslash_consumed() {
let src = "#' @md\n\
#' @title T\n\
#' @details\n\
#' A \\[bracket](x) and \\[shortcut] stay literal.\n\
#' @name spec\n\
NULL\n";
assert!(
project_to_rd(src)
.contains("(\\details (TEXT \"A [bracket](x) and [shortcut] stay literal.\"))"),
"got: {}",
project_to_rd(src)
);
}
#[test]
fn shortcut_link_node_atom_resolves_text_and_code() {
assert_eq!(
shortcut_link_node_atom(&[Inline::Text("cross-line shortcut".to_string())]),
"(\\link (TEXT \"cross-line shortcut\"))"
);
assert_eq!(
shortcut_link_node_atom(&[Inline::MdCode("f".to_string())]),
"(\\code (\\link (TEXT \"f\")))"
);
}
#[test]
fn md_cross_line_shortcut_link_joins_into_one_link() {
let src = "#' @md\n\
#' @title T\n\
#' @details\n\
#' A [broken\n\
#' across lines] joins, but a stray a] stays.\n\
#' @name spec\n\
NULL\n";
assert!(
project_to_rd(src).contains(
"(\\details (TEXT \"A\") (\\link (TEXT \"broken across lines\")) \
(TEXT \"joins, but a stray a] stays.\"))"
),
"got: {}",
project_to_rd(src)
);
}
#[test]
fn double_escape_md_reverts_only_bracket_escapes() {
assert_eq!(double_escape_md("[text\\]"), "[text\\]");
assert_eq!(double_escape_md("a\\*b"), "a\\\\*b");
assert_eq!(double_escape_md("\\[x\\]"), "\\[x\\]");
assert_eq!(double_escape_md("[text\\\\]"), "[text\\\\\\]");
}
#[test]
fn url_encode_matches_r_urlencode() {
assert_eq!(url_encode("text\\"), "text%5C");
assert_eq!(url_encode("a b"), "a%20b");
assert_eq!(url_encode("a/b:c"), "a/b:c");
assert_eq!(url_encode("100%"), "100%25");
}
#[test]
fn cmark_unescape_drops_backslash_before_punctuation() {
assert_eq!(cmark_unescape("[text\\]: R:text%5C"), "[text]: R:text%5C");
assert_eq!(cmark_unescape("[text\\\\\\]"), "[text\\]");
assert_eq!(cmark_unescape("a\\b"), "a\\b");
}
#[test]
fn md_linkref_labels_ports_get_md_linkrefs() {
assert_eq!(md_linkref_labels("see [foo] now"), vec!["foo".to_string()]);
assert_eq!(md_linkref_labels("[text][ref]"), vec!["ref".to_string()]);
assert!(md_linkref_labels("\\[foo]").is_empty());
assert!(md_linkref_labels("[a]{x}").is_empty());
assert_eq!(md_linkref_labels("[text\\]"), vec!["text\\".to_string()]);
}
#[test]
fn linkref_label_closes_on_even_trailing_backslashes() {
assert!(linkref_label_closes("text")); assert!(!linkref_label_closes("text\\")); assert!(!linkref_label_closes("text\\\\\\")); assert!(linkref_label_closes("text\\\\")); }
#[test]
fn leaked_linkref_text_leaks_from_first_invalid_definition() {
assert_eq!(
leaked_linkref_text("see [text\\] here"),
vec!["[text]: R:text%5C".to_string()]
);
assert!(leaked_linkref_text("see [foo] here").is_empty());
assert_eq!(
leaked_linkref_text("a [one\\] b [two\\] c"),
vec!["[one]: R:one%5C".to_string(), "[two]: R:two%5C".to_string()]
);
assert!(leaked_linkref_text("an escaped \\[x\\] stays").is_empty());
assert_eq!(
leaked_linkref_text("a [one] b [two\\] c [three] d"),
vec![
"[two]: R:two%5C".to_string(),
"[three]: R:three".to_string()
]
);
}
#[test]
fn first_invalid_linkref_offset_finds_the_poison_bracket() {
assert_eq!(
first_invalid_linkref_offset("a [one] b [two\\] c"),
Some(10)
);
assert_eq!(first_invalid_linkref_offset("[foo] [bar]"), None);
assert_eq!(first_invalid_linkref_offset("[bad\\] tail"), Some(0));
}
#[test]
fn demoted_link_source_targets_only_definition_backed_links() {
assert_eq!(
demoted_link_source(&Inline::MdShortcutLink {
display: vec![Inline::Text("foo".to_string())]
}),
Some("[foo]".to_string())
);
assert_eq!(
demoted_link_source(&Inline::MdRefLink {
dest: "ref".to_string(),
display: vec![Inline::Text("disp".to_string())]
}),
Some("[disp][ref]".to_string())
);
assert_eq!(
demoted_link_source(&Inline::MdLink("[foo]".to_string())),
Some("[foo]".to_string())
);
assert_eq!(
demoted_link_source(&Inline::MdLink("[t][r]".to_string())),
Some("[t][r]".to_string())
);
assert_eq!(
demoted_link_source(&Inline::MdLink("[t](u)".to_string())),
None
);
assert_eq!(
demoted_link_source(&Inline::MdLink("<http://x>".to_string())),
None
);
assert_eq!(
demoted_link_source(&Inline::Text("plain".to_string())),
None
);
}
#[test]
fn skeleton_exposes_inline_link_brackets_for_leaked_defs() {
let link = Inline::MdInlineLink {
url: "https://example.org".to_string(),
display: vec![Inline::Text("after".to_string())],
};
assert_eq!(inline_skeleton_fragment(&link), "[after] ");
assert_eq!(skeleton_len(&link), "[after] ".len());
let body = vec![Inline::Text("see [stop\\] then ".to_string()), link];
assert_eq!(
leaked_linkref_text(&inline_source_skeleton(&body)),
vec![
"[stop]: R:stop%5C".to_string(),
"[after]: R:after".to_string(),
]
);
let clean = vec![Inline::MdInlineLink {
url: "u".to_string(),
display: vec![Inline::Text("x".to_string())],
}];
assert!(leaked_linkref_text(&inline_source_skeleton(&clean)).is_empty());
}
#[test]
fn skeleton_exposes_image_alt_for_leaked_defs() {
let image = Inline::MdImage("".to_string());
assert_eq!(image_alt_text(""), Some("alt"));
assert_eq!(inline_skeleton_fragment(&image), "[alt] ");
assert_eq!(skeleton_len(&image), "[alt] ".len());
assert_eq!(demoted_link_source(&image), None);
let body = vec![Inline::Text("see [stop\\] then ".to_string()), image];
assert_eq!(
leaked_linkref_text(&inline_source_skeleton(&body)),
vec!["[stop]: R:stop%5C".to_string(), "[alt]: R:alt".to_string()]
);
}
#[test]
fn skeleton_exposes_opaque_inline_link_inner_bracket_for_leaked_defs() {
let link = Inline::MdLink("[a [b] c](https://example.org)".to_string());
assert_eq!(
opaque_inline_link_display("[a [b] c](https://example.org)"),
Some("a [b] c")
);
assert_eq!(opaque_inline_link_display("[shortcut]"), None);
assert_eq!(opaque_inline_link_display("[text][ref]"), None);
assert_eq!(opaque_inline_link_display("<https://example.org>"), None);
assert_eq!(inline_skeleton_fragment(&link), "[a [b] c] ");
assert_eq!(skeleton_len(&link), "[a [b] c] ".len());
assert_eq!(demoted_link_source(&link), None);
let body = vec![Inline::Text("see [stop\\] then ".to_string()), link];
assert_eq!(
leaked_linkref_text(&inline_source_skeleton(&body)),
vec!["[stop]: R:stop%5C".to_string(), "[b]: R:b".to_string()]
);
}
#[test]
fn projects_mixed_linkref_poisoning() {
let src = "#' @md\n\
#' @title T\n\
#' @details\n\
#' See [before] then [stop\\] and [after].\n\
#' @name spec\n\
NULL\n";
assert!(
project_to_rd(src).contains(
"(\\details (TEXT \"See\") (\\link (TEXT \"before\")) \
(TEXT \"then [stop] and [after]. [stop]: R:stop%5C [after]: R:after\"))"
),
"got: {}",
project_to_rd(src)
);
}
#[test]
fn linkref_keys_skips_a_label_after_a_closing_bracket() {
let keys = |s: &str| linkref_keys(&[Inline::Text(s.to_string())]);
assert!(keys("a][b]").is_empty());
assert!(keys("a][b] and [b] here").contains("b"));
assert!(keys("[text][ref]").contains("ref"));
assert!(keys("[a]{x}").is_empty());
}
#[test]
fn projects_undefined_shortcut_after_bracket_as_literal() {
let src = "#' @md\n#' @details\n#' A stray a][b] here.\n#' @name x\nNULL\n";
assert_eq!(
project_to_rd(src),
"(\\details (TEXT \"A stray a][b] here.\"))"
);
}
#[test]
fn projects_undefined_ref_links_only_the_defined_inner_shortcut() {
let src = "#' @md\n#' @details\n#' A [a [b] c][ref] link.\n#' @name x\nNULL\n";
assert_eq!(
project_to_rd(src),
"(\\details (TEXT \"A [a\") (\\link (TEXT \"b\")) (TEXT \"c][ref] link.\"))"
);
}
#[test]
fn undefined_shortcut_links_when_defined_elsewhere() {
let src = "#' @md\n#' @details\n#' A stray a][b], later [b].\n#' @name x\nNULL\n";
assert_eq!(
project_to_rd(src),
"(\\details (TEXT \"A stray a]\") (\\link (TEXT \"b\")) \
(TEXT \", later\") (\\link (TEXT \"b\")) (TEXT \".\"))"
);
}
#[test]
fn projects_undefined_shortcut_inside_a_list_item_as_literal() {
let src = "#' @md\n#' @details\n#' Top.\n#'\n\
#' - a stray a][b] keeps it\n#' @name x\nNULL\n";
assert_eq!(
project_to_rd(src),
"(\\details (TEXT \"Top.\") \
(\\itemize (\\item) (TEXT \"a stray a][b] keeps it\")))"
);
}
#[test]
fn projects_self_defined_shortcut_inside_a_list_item_as_link() {
let src = "#' @md\n#' @details\n#' Top.\n#'\n\
#' - see [foo] here\n#' @name x\nNULL\n";
assert_eq!(
project_to_rd(src),
"(\\details (TEXT \"Top.\") \
(\\itemize (\\item) (TEXT \"see\") (\\link (TEXT \"foo\")) (TEXT \"here\")))"
);
}
#[test]
fn projects_in_list_poisoning_demotes_a_later_in_list_shortcut() {
let src = "#' @md\n#' @details\n#' Pre [before] links.\n#'\n\
#' - an escaped close [stop\\] here\n\
#' - a shortcut [foo] after\n#' @name x\nNULL\n";
assert_eq!(
project_to_rd(src),
"(\\details (TEXT \"Pre\") (\\link (TEXT \"before\")) (TEXT \"links.\") \
(\\itemize (\\item) (TEXT \"an escaped close [stop] here\") \
(\\item) (TEXT \"a shortcut [foo] after\")) \
(TEXT \"[stop]: R:stop%5C [foo]: R:foo\"))"
);
}
#[test]
fn projects_in_list_candidate_before_the_boundary_survives() {
let src = "#' @md\n#' @details\n#' Top.\n#'\n\
#' - early [foo] survives\n\
#' - an escaped close [stop\\] here\n\
#' - [bar] dead\n#' @name x\nNULL\n";
assert_eq!(
project_to_rd(src),
"(\\details (TEXT \"Top.\") \
(\\itemize (\\item) (TEXT \"early\") (\\link (TEXT \"foo\")) (TEXT \"survives\") \
(\\item) (TEXT \"an escaped close [stop] here\") \
(\\item) (TEXT \"[bar] dead\")) \
(TEXT \"[stop]: R:stop%5C [bar]: R:bar\"))"
);
}
#[test]
fn decode_html_entities_resolves_named_and_numeric_refs() {
assert_eq!(decode_html_entities("a&b"), "a&b");
assert_eq!(decode_html_entities("<>"'"), "<>\"'");
assert_eq!(decode_html_entities("AB"), "AB");
assert_eq!(decode_html_entities("plain"), "plain");
assert_eq!(decode_html_entities("a&b=1"), "a&b=1");
assert_eq!(decode_html_entities("&unknown;"), "&unknown;");
}
#[test]
fn parses_a_multiline_linkref_definition() {
let src = "#' @md\n#' @details\n#' See [ref].\n#'\n\
#' [ref]:\n#' https://example.com\n#' @name x\nNULL\n";
assert_eq!(
project_to_rd(src),
"(\\details (TEXT \"See\") \
(\\href (VERB \"https://example.com\") (TEXT \"ref\")) (TEXT \".\"))"
);
}
#[test]
fn append_rendered_text_coalesces_into_trailing_text() {
let mut atoms = vec!["(TEXT \"prose.\")".to_string()];
append_rendered_text(&mut atoms, "[t]: R:t%5C");
assert_eq!(atoms, vec!["(TEXT \"prose. [t]: R:t%5C\")".to_string()]);
let mut atoms = vec!["(\\link (TEXT \"x\"))".to_string()];
append_rendered_text(&mut atoms, "[t]: R:t%5C");
assert_eq!(
atoms,
vec![
"(\\link (TEXT \"x\"))".to_string(),
"(TEXT \"[t]: R:t%5C\")".to_string()
]
);
}
#[test]
fn projects_escaped_close_bracket_leaked_linkref() {
let src = "#' @md\n\
#' @title T\n\
#' @details\n\
#' A link like [text\\] leaks.\n\
#' @name spec\n\
NULL\n";
assert!(
project_to_rd(src)
.contains("(\\details (TEXT \"A link like [text] leaks. [text]: R:text%5C\"))"),
"got: {}",
project_to_rd(src)
);
}
#[test]
fn rd_complete_ports_the_brace_balance_check() {
assert!(rd_complete("a{b}"));
assert!(rd_complete("a\\{b")); assert!(rd_complete("\\emph{x}"));
assert!(rd_complete("a%{")); assert!(rd_complete("{%}\n}")); assert!(!rd_complete("a{b"));
assert!(!rd_complete("a}b"));
assert!(!rd_complete("\\emph{\\}")); assert!(!rd_complete("a\\")); assert!(!rd_complete("{%}")); }
#[test]
fn section_atoms_rd_complete_reconstructs_braces() {
assert!(section_atoms_rd_complete(
&["(TEXT \"foo\")".into(), "(\\emph (TEXT \"x\"))".into()],
true,
));
assert!(section_atoms_rd_complete(
&["(\\emph (TEXT \"a % b\"))".into()],
true,
));
assert!(section_atoms_rd_complete(
&["(\\href (VERB \"https://x/a%20b\") (TEXT \"link % text\"))".into()],
true,
));
assert!(!section_atoms_rd_complete(
&["(TEXT \"foo\")".into(), "(\\emph (TEXT \"\\\\\"))".into()],
true,
));
}
#[test]
fn projects_rdcomplete_failure_drops_the_section() {
for delim in ["*\\**", "**\\***", "_\\__", "__\\___"] {
let src =
format!("#' @md\n#' @title T\n#' @details\n#' foo {delim}\n#' @name spec\nNULL\n");
let out = project_to_rd(&src);
assert!(
out.contains("(\\details)") && !out.contains("(\\details "),
"delim {delim:?} got: {out}"
);
}
}
#[test]
fn rdcomplete_drop_is_scoped_to_with_sections_tags() {
let src = "#' @md\n#' @title T\n#' @return foo *\\**\n#' @name spec\nNULL\n";
let out = project_to_rd(src);
assert!(out.contains("(\\value"), "got: {out}");
}
#[test]
fn url_defined_reference_links_render_href() {
let src = "#' @md\n#' @title T\n#' @details\n\
#' See [*foo*][r1] and [plain][r2] and [`code`][r3].\n\
#'\n\
#' [r1]: https://example.com\n\
#' [r2]: https://example.org\n\
#' [r3]: https://example.net\n\
#' @name spec\nNULL\n";
assert_eq!(
project_to_rd(src),
"(\\description (TEXT \"T\"))\n\
(\\details (TEXT \"See\") \
(\\href (VERB \"https://example.com\") (\\emph (TEXT \"foo\"))) (TEXT \"and\") \
(\\href (VERB \"https://example.org\") (TEXT \"plain\")) (TEXT \"and\") \
(\\href (VERB \"https://example.net\") (\\code (RCODE \"code\"))) (TEXT \".\"))\n\
(\\title (TEXT \"T\"))"
);
}
#[test]
fn url_defined_shortcut_link_renders_href() {
let src = "#' @md\n#' @title T\n#' @details\n\
#' See [r1] here.\n#'\n#' [r1]: https://example.com\n\
#' @name spec\nNULL\n";
assert!(
project_to_rd(src).contains(
"(\\details (TEXT \"See\") (\\href (VERB \"https://example.com\") (TEXT \"r1\")) (TEXT \"here.\"))"
),
"got: {}",
project_to_rd(src)
);
}
#[test]
fn linkref_definition_cannot_interrupt_a_paragraph() {
let src = "#' @md\n#' @title T\n#' @details\n\
#' Some prose with [r1] here.\n#' [r1]: https://example.com\n\
#' @name spec\nNULL\n";
assert!(
project_to_rd(src).contains(
"(\\details (TEXT \"Some prose with\") (\\link (TEXT \"r1\")) (TEXT \"here.\") (\\link (TEXT \"r1\")) (TEXT \": https://example.com\"))"
),
"got: {}",
project_to_rd(src)
);
}
#[test]
fn linkref_definition_with_trailing_macro_is_not_a_definition() {
let src = "#' @md\n#' @title T\n#' @details\n\
#' See [foo].\n#'\n#' [foo]: https://x.org \\emph{bar}\n\
#' @name spec\nNULL\n";
assert!(
project_to_rd(src).contains(
"(\\details (TEXT \"See\") (\\link (TEXT \"foo\")) (TEXT \".\") (\\link (TEXT \"foo\")) (TEXT \": https://x.org\") (\\emph (TEXT \"bar\")))"
),
"got: {}",
project_to_rd(src)
);
}
#[test]
fn backslash_word_in_link_display_renders_as_rd_macro() {
let src = "#' @md\n#' @title T\n#' @details See [a\\b] and [a\\b][lbl] now.\n\
#' @name spec\nNULL\n";
assert!(
project_to_rd(src).contains(
"(\\details (TEXT \"See\") (\\link (TEXT \"a\") (UNKNOWN \"\\\\b\")) (TEXT \"and\") (\\link (TEXT \"a\") (UNKNOWN \"\\\\b\")) (TEXT \"now.\"))"
),
"got: {}",
project_to_rd(src)
);
}
#[test]
fn escaped_emphasis_in_link_display_drops_the_link() {
let src = "#' @md\n#' @title T\n#' @details A [a\\*b\\*] gap.\n\
#' @name spec\nNULL\n";
assert!(
project_to_rd(src).contains("(\\details (TEXT \"A gap.\"))"),
"got: {}",
project_to_rd(src)
);
}
#[test]
fn pure_macro_active_link_display_drops() {
let src = "#' @md\n#' @title T\n#' @details A [\\emph{*x*}] gap.\n\
#' @name spec\nNULL\n";
assert!(
project_to_rd(src).contains("(\\details (TEXT \"A gap.\"))"),
"got: {}",
project_to_rd(src)
);
}
#[test]
fn pure_macro_inert_link_display_keeps() {
let src = "#' @md\n#' @title T\n#' @details Keep [\\emph{y}] and [\\code{f}].\n\
#' @name spec\nNULL\n";
assert!(
project_to_rd(src).contains(
"(\\details (TEXT \"Keep\") (\\link (\\emph (TEXT \"y\"))) (TEXT \"and\") (\\link (\\code (RCODE \"f\"))) (TEXT \".\"))"
),
"got: {}",
project_to_rd(src)
);
}
}