use rowan::NodeOrToken;
use crate::ast::{AstNode, RoxygenBlock, RoxygenParagraph, RoxygenSection, RoxygenTag};
use crate::parser::parse;
use crate::parser::roxygen::{is_known_rd_macro, is_two_arg_rd_macro};
use crate::syntax::{SyntaxKind, SyntaxNode};
pub fn project_to_rd(text: &str) -> String {
let cst = parse(text).cst;
let mut sections: Vec<String> = Vec::new();
for block in cst.descendants().filter_map(RoxygenBlock::cast) {
project_block(&block, &mut sections);
}
sections.sort();
sections.join("\n")
}
#[derive(Clone)]
enum Inline {
Text(String),
Macro(SyntaxNode),
Md(MdInline, String),
MdList(SyntaxNode),
MdLink(String),
MdImage(String),
MdCodeBlock(SyntaxNode),
MdHtml(String),
}
#[derive(Clone, Copy)]
enum MdInline {
Emph,
Strong,
Code,
}
fn project_block(block: &RoxygenBlock, out: &mut Vec<String>) {
let mut intro_paras: Vec<Vec<Inline>> = Vec::new();
let mut tag_sections: Vec<(String, Vec<Inline>)> = Vec::new();
let mut slots: Vec<(String, Vec<Inline>)> = Vec::new();
let mut fields: Vec<(String, Vec<Inline>)> = Vec::new();
let mut has_examples = false;
for section in block.sections() {
if let Some(tag) = section.tag() {
let name = tag.name().map(|n| n.to_string()).unwrap_or_default();
let mut body = tag_inlines(&tag);
for part in section_body_parts(§ion) {
if !body.is_empty() {
body.push(Inline::Text(" ".to_string()));
}
body.extend(part);
}
match name.as_str() {
"slot" | "field" => {
let arg = tag.arg().map(|t| t.text().to_string()).unwrap_or_default();
if name == "slot" {
slots.push((arg, body));
} else {
fields.push((arg, body));
}
}
"examples" | "examplesIf" => has_examples = true,
_ => tag_sections.push((name, body)),
}
} else {
intro_paras.extend(section_body_parts(§ion));
}
}
let has_explicit_title = tag_sections
.iter()
.any(|(n, b)| n == "title" && !is_null_section(b));
let has_explicit_desc = tag_sections
.iter()
.any(|(n, b)| n == "description" && !is_null_section(b));
let explicit_title_body = tag_sections
.iter()
.find(|(n, b)| n == "title" && !is_null_section(b))
.map(|(_, b)| b.clone());
let mut cursor = 0usize;
let intro_title = if has_explicit_title {
None
} else {
intro_paras.get(cursor).inspect(|_| cursor += 1).cloned()
};
let intro_desc = if has_explicit_desc {
None
} else {
intro_paras.get(cursor).inspect(|_| cursor += 1).cloned()
};
let intro_details = &intro_paras[cursor..];
let merge_details = !intro_details.is_empty();
if let Some(title) = &intro_title {
push_section(out, "title", title);
}
let description = match intro_desc {
Some(d) => Some(d),
None if has_explicit_desc => None, None => intro_title.clone().or(explicit_title_body),
};
if let Some(description) = description {
push_section(out, "description", &description);
}
if merge_details {
let mut body = join_paras(intro_details);
for (_, ed) in tag_sections.iter().filter(|(n, _)| n == "details") {
body.push(Inline::Text(" ".to_string()));
body.extend(join_paras(std::slice::from_ref(ed)));
}
push_section(out, "details", &body);
}
for (name, body) in &tag_sections {
if merge_details && name == "details" {
continue;
}
project_tag_section(name, body, out);
}
if !slots.is_empty() {
out.push(describe_section("Slots", &slots));
}
if !fields.is_empty() {
out.push(describe_section("Fields", &fields));
}
if has_examples {
out.push("(\\examples ...)".to_string());
}
}
fn describe_section(title: &str, items: &[(String, Vec<Inline>)]) -> String {
let mut item_atoms: Vec<String> = Vec::new();
for (name, def) in items {
let code_atoms = rcode_atoms(name);
let term = if code_atoms.is_empty() {
"(\\code)".to_string()
} else {
format!("(\\code {})", code_atoms.join(" "))
};
let mut parts = vec![term];
let def_arg = grp_arg(&serialize_inlines(def));
if !def_arg.is_empty() {
parts.push(def_arg);
}
item_atoms.push(format!("(\\item {})", parts.join(" ")));
}
format!(
"(\\section (TEXT {}) (\\describe {}))",
encode_text(title),
item_atoms.join(" ")
)
}
fn join_paras(paras: &[Vec<Inline>]) -> Vec<Inline> {
let mut out: Vec<Inline> = Vec::new();
for (i, p) in paras.iter().enumerate() {
if i > 0 {
out.push(Inline::Text(" ".to_string()));
}
for inl in p {
out.push(match inl {
Inline::Text(s) => Inline::Text(s.clone()),
Inline::Macro(n) => Inline::Macro(n.clone()),
Inline::Md(k, s) => Inline::Md(*k, s.clone()),
Inline::MdList(n) => Inline::MdList(n.clone()),
Inline::MdLink(s) => Inline::MdLink(s.clone()),
Inline::MdImage(s) => Inline::MdImage(s.clone()),
Inline::MdCodeBlock(n) => Inline::MdCodeBlock(n.clone()),
Inline::MdHtml(s) => Inline::MdHtml(s.clone()),
});
}
}
out
}
fn project_tag_section(name: &str, body: &[Inline], out: &mut Vec<String>) {
if NULL_SUPPRESSIBLE.contains(&name) && is_null_section(body) {
return;
}
match name {
"description" => push_section(out, "description", body),
"details" => push_section(out, "details", body),
"return" => push_section(out, "value", body),
"seealso" => push_section(out, "seealso", body),
"source" => push_section(out, "source", body),
"format" => push_section(out, "format", body),
"references" => push_section(out, "references", body),
"note" => push_section(out, "note", body),
"author" => push_section(out, "author", body),
"title" => push_section(out, "title", body),
"section" => {
let (heading, content) = split_section_title(body);
let title = serialize_inlines(&heading);
let body = serialize_inlines(&content);
let mut inner = grp_arg(&title);
if !body.is_empty() {
if !inner.is_empty() {
inner.push(' ');
}
inner.push_str(&grp_arg(&body));
}
out.push(format!("(\\section{})", prefix_space(&inner)));
}
_ => {}
}
}
const NULL_SUPPRESSIBLE: &[&str] = &[
"description",
"details",
"return",
"seealso",
"source",
"format",
"references",
"note",
"author",
"title",
];
fn is_null_section(body: &[Inline]) -> bool {
let atoms = serialize_inlines(body);
atoms.len() == 1 && atoms[0] == "(TEXT \"NULL\")"
}
fn push_section(out: &mut Vec<String>, macro_name: &str, body: &[Inline]) {
let atoms = serialize_inlines(body);
if atoms.is_empty() {
out.push(format!("(\\{macro_name})"));
} else {
out.push(format!("(\\{macro_name} {})", atoms.join(" ")));
}
}
fn serialize_inlines(body: &[Inline]) -> Vec<String> {
let mut atoms: Vec<String> = Vec::new();
let mut run = String::new();
for inl in body {
match inl {
Inline::Text(s) => run.push_str(s),
Inline::Macro(node) => {
if let Some(atom) = text_atom(&run) {
atoms.push(atom);
}
run.clear();
atoms.push(serialize_macro(node));
}
Inline::Md(kind, content) => {
if let Some(atom) = text_atom(&run) {
atoms.push(atom);
}
run.clear();
atoms.push(serialize_md_inline(*kind, content));
}
Inline::MdList(node) => {
if let Some(atom) = text_atom(&run) {
atoms.push(atom);
}
run.clear();
atoms.push(serialize_md_list(node));
}
Inline::MdLink(raw) => {
if let Some(atom) = text_atom(&run) {
atoms.push(atom);
}
run.clear();
atoms.push(resolve_md_link(raw).unwrap_or_default());
}
Inline::MdImage(raw) => {
if let Some(atom) = text_atom(&run) {
atoms.push(atom);
}
run.clear();
if let Some(atom) = resolve_md_image(raw) {
atoms.push(atom);
}
}
Inline::MdCodeBlock(node) => {
if let Some(atom) = text_atom(&run) {
atoms.push(atom);
}
run.clear();
atoms.extend(serialize_md_code_block(node));
}
Inline::MdHtml(raw) => {
if let Some(atom) = text_atom(&run) {
atoms.push(atom);
}
run.clear();
atoms.push(html_inline_atom(raw));
}
}
}
if let Some(atom) = text_atom(&run) {
atoms.push(atom);
}
atoms
}
fn serialize_macro(node: &SyntaxNode) -> String {
let mut head = String::new();
let mut structural = false;
let mut out_atoms: Vec<String> = Vec::new();
let mut group: Vec<String> = Vec::new();
let mut run = String::new();
let flush = |run: &mut String, group: &mut Vec<String>, code: bool| {
if code {
group.extend(rcode_atoms(run));
} else if let Some(atom) = text_atom(run) {
group.push(atom);
}
run.clear();
};
let finalize = |group: &mut Vec<String>, out: &mut Vec<String>, structural: bool| {
if structural && group.len() > 1 {
out.push(format!("(GRP {})", group.join(" ")));
group.clear();
} else {
out.append(group);
}
};
for el in node.children_with_tokens() {
match el.kind() {
SyntaxKind::ROXYGEN_RD_MACRO_NAME => {
head = el
.as_token()
.map(|t| t.text().to_string())
.unwrap_or_default();
structural = is_two_arg_rd_macro(head.trim_start_matches('\\'));
}
SyntaxKind::ROXYGEN_RD_MACRO_VERB => {
flush(&mut run, &mut group, head == "\\code");
let raw = el
.as_token()
.map(|t| t.text().to_string())
.unwrap_or_default();
group.push(format!("(VERB {})", encode_text(&raw)));
}
SyntaxKind::ROXYGEN_RD_MACRO => {
flush(&mut run, &mut group, head == "\\code");
if let Some(n) = el.as_node() {
group.push(serialize_macro(n));
}
}
SyntaxKind::ROXYGEN_RD_MACRO_DELIM => {
if el.as_token().is_some_and(|t| t.text() == "}") {
flush(&mut run, &mut group, head == "\\code");
finalize(&mut group, &mut out_atoms, structural);
}
}
SyntaxKind::ROXYGEN_RD_MACRO_OPT | SyntaxKind::ROXYGEN_MARKER => {}
_ => {
if let Some(t) = el.as_token() {
run.push_str(t.text());
}
}
}
}
flush(&mut run, &mut group, head == "\\code");
finalize(&mut group, &mut out_atoms, structural);
if out_atoms.is_empty() {
let name = head.trim_start_matches('\\');
if is_known_rd_macro(name) {
format!("({head})")
} else {
format!("(UNKNOWN {})", encode_text(&head))
}
} else {
format!("({head} {})", out_atoms.join(" "))
}
}
fn split_section_title(body: &[Inline]) -> (Vec<Inline>, Vec<Inline>) {
let mut title: Vec<Inline> = Vec::new();
let mut content: Vec<Inline> = Vec::new();
let mut split = false;
for inl in body {
if split {
content.push(inl.clone());
continue;
}
if let Inline::Text(t) = inl
&& let Some(idx) = t.find(':')
{
if idx > 0 {
title.push(Inline::Text(t[..idx].to_string()));
}
let after = &t[idx + 1..];
if !after.is_empty() {
content.push(Inline::Text(after.to_string()));
}
split = true;
continue;
}
title.push(inl.clone());
}
(title, content)
}
fn grp_arg(atoms: &[String]) -> String {
match atoms {
[] => String::new(),
[one] => one.clone(),
many => format!("(GRP {})", many.join(" ")),
}
}
fn prefix_space(s: &str) -> String {
if s.is_empty() {
String::new()
} else {
format!(" {s}")
}
}
fn text_atom(body: &str) -> Option<String> {
let t = norm_ws(body);
(!t.is_empty()).then(|| format!("(TEXT {})", encode_text(&t)))
}
fn rcode_atoms(body: &str) -> Vec<String> {
let mut atoms = Vec::new();
let mut rest = body;
while let Some(idx) = rest.find('\n') {
let (seg, tail) = rest.split_at(idx + 1);
atoms.push(format!("(RCODE {})", encode_text(seg)));
rest = tail;
}
if !rest.is_empty() {
atoms.push(format!("(RCODE {})", encode_text(rest)));
}
atoms
}
fn norm_ws(s: &str) -> String {
s.split_whitespace().collect::<Vec<_>>().join(" ")
}
fn encode_text(s: &str) -> String {
let mut out = String::with_capacity(s.len() + 2);
out.push('"');
for c in s.chars() {
match c {
'\\' => out.push_str("\\\\"),
'"' => out.push_str("\\\""),
'\n' => out.push_str("\\n"),
_ => out.push(c),
}
}
out.push('"');
out
}
fn section_body_parts(section: &RoxygenSection) -> Vec<Vec<Inline>> {
let mut groups: Vec<Vec<Inline>> = Vec::new();
let mut cur: Vec<Inline> = Vec::new();
for el in section.syntax().children_with_tokens() {
match el.kind() {
SyntaxKind::ROXYGEN_PARAGRAPH
| SyntaxKind::ROXYGEN_RD_MACRO
| SyntaxKind::ROXYGEN_MD_LIST
| SyntaxKind::ROXYGEN_MD_CODE_BLOCK => {
let Some(node) = el.into_node() else { continue };
let inlines = match node.kind() {
SyntaxKind::ROXYGEN_PARAGRAPH => RoxygenParagraph::cast(node)
.map(|p| paragraph_inlines(&p))
.unwrap_or_default(),
SyntaxKind::ROXYGEN_MD_LIST => vec![Inline::MdList(node)],
SyntaxKind::ROXYGEN_MD_CODE_BLOCK => vec![Inline::MdCodeBlock(node)],
_ => vec![Inline::Macro(node)],
};
if !cur.is_empty() {
cur.push(Inline::Text(" ".to_string()));
}
cur.extend(inlines);
}
SyntaxKind::ROXYGEN_MARKER if !cur.is_empty() => {
groups.push(std::mem::take(&mut cur));
}
_ => {}
}
}
if !cur.is_empty() {
groups.push(cur);
}
groups
}
fn paragraph_inlines(para: &RoxygenParagraph) -> Vec<Inline> {
let mut out = Vec::new();
for el in para.syntax().children_with_tokens() {
match el.kind() {
SyntaxKind::ROXYGEN_MARKER => {} SyntaxKind::NEWLINE => out.push(Inline::Text(" ".to_string())), _ => push_inline(&mut out, el),
}
}
out
}
fn tag_inlines(tag: &RoxygenTag) -> Vec<Inline> {
let mut out = Vec::new();
let mut seen_prose = false;
for el in tag.syntax().children_with_tokens() {
match el.kind() {
SyntaxKind::ROXYGEN_AT | SyntaxKind::ROXYGEN_TAG_NAME | SyntaxKind::ROXYGEN_TAG_ARG => {
continue;
}
SyntaxKind::WHITESPACE => {
if seen_prose {
push_inline(&mut out, el);
}
}
_ => {
seen_prose = true;
push_inline(&mut out, el);
}
}
}
out
}
fn push_inline(out: &mut Vec<Inline>, el: NodeOrToken<SyntaxNode, crate::syntax::SyntaxToken>) {
match el {
NodeOrToken::Node(n) if n.kind() == SyntaxKind::ROXYGEN_RD_MACRO => {
out.push(Inline::Macro(n));
}
NodeOrToken::Node(n) => out.push(Inline::Text(n.text().to_string())),
NodeOrToken::Token(t) if t.kind() == SyntaxKind::ROXYGEN_MD_EMPH => {
out.push(Inline::Md(MdInline::Emph, strip_delim(t.text(), 1)));
}
NodeOrToken::Token(t) if t.kind() == SyntaxKind::ROXYGEN_MD_STRONG => {
out.push(Inline::Md(MdInline::Strong, strip_delim(t.text(), 2)));
}
NodeOrToken::Token(t) if t.kind() == SyntaxKind::ROXYGEN_MD_CODE => {
out.push(Inline::Md(MdInline::Code, strip_code_span(t.text())));
}
NodeOrToken::Token(t)
if t.kind() == SyntaxKind::ROXYGEN_MD_LINK && resolve_md_link(t.text()).is_some() =>
{
out.push(Inline::MdLink(t.text().to_string()));
}
NodeOrToken::Token(t)
if t.kind() == SyntaxKind::ROXYGEN_MD_IMAGE && resolve_md_image(t.text()).is_some() =>
{
out.push(Inline::MdImage(t.text().to_string()));
}
NodeOrToken::Token(t) if t.kind() == SyntaxKind::ROXYGEN_MD_HTML => {
out.push(Inline::MdHtml(t.text().to_string()));
}
NodeOrToken::Token(t) => out.push(Inline::Text(t.text().to_string())),
}
}
fn strip_delim(text: &str, n: usize) -> String {
text.get(n..text.len() - n).unwrap_or("").to_string()
}
fn strip_code_span(text: &str) -> String {
let ticks = text.bytes().take_while(|&b| b == b'`').count();
let inner = text
.get(ticks..text.len() - ticks)
.unwrap_or("")
.replace('\n', " ");
if inner.len() >= 2
&& inner.starts_with(' ')
&& inner.ends_with(' ')
&& !inner.trim().is_empty()
{
inner[1..inner.len() - 1].to_string()
} else {
inner
}
}
fn serialize_md_inline(kind: MdInline, content: &str) -> String {
match kind {
MdInline::Emph => format!("(\\emph {})", text_atom(content).unwrap_or_default()),
MdInline::Strong => format!("(\\strong {})", text_atom(content).unwrap_or_default()),
MdInline::Code => md_code_atom(content),
}
}
fn resolve_md_link(raw: &str) -> Option<String> {
let bytes = raw.as_bytes();
if bytes.first() == Some(&b'<') {
return Some(url_atom(raw.strip_prefix('<')?.strip_suffix('>')?));
}
let text_end = scan_delimited(bytes, 0, b'[', b']')?;
let text = &raw[1..text_end - 1];
match bytes.get(text_end) {
Some(&b'(') => {
let url_end = scan_delimited(bytes, text_end, b'(', b')')?;
(url_end == bytes.len())
.then(|| inline_link_atom(text, &raw[text_end + 1..url_end - 1]))
}
Some(&b'[') => {
let ref_end = scan_delimited(bytes, text_end, b'[', b']')?;
(ref_end == bytes.len()).then(|| ref_link_atom(text, &raw[text_end + 1..ref_end - 1]))
}
None => Some(shortcut_link_atom(text)),
_ => None,
}
}
fn inline_link_atom(text: &str, url: &str) -> String {
if url.is_empty() || norm_ws(url) == norm_ws(text) {
url_atom(text)
} else {
href_atom(text, url)
}
}
fn url_atom(url: &str) -> String {
format!("(\\url (VERB {}))", encode_text(url))
}
fn href_atom(text: &str, url: &str) -> String {
let mut atoms = vec![format!("(VERB {})", encode_text(url))];
if let Some(atom) = link_display_atom(text) {
atoms.push(atom);
}
format!("(\\href {})", atoms.join(" "))
}
fn link_display_atom(text: &str) -> Option<String> {
let (inner, is_code) = unwrap_code_span(text);
if is_code {
Some(md_code_atom(inner))
} else {
text_atom(text)
}
}
fn ref_link_atom(text: &str, dest: &str) -> String {
let (display, is_code) = unwrap_code_span(text);
if norm_ws(display) == norm_ws(dest) {
return shortcut_link_atom(dest);
}
code_wrap(
format!("(\\link {})", text_atom(display).unwrap_or_default()),
is_code,
)
}
fn shortcut_link_atom(dest: &str) -> String {
let (dest, code_span) = unwrap_code_span(dest);
let is_code = code_span || dest.ends_with("()");
let (pkg, fun) = match dest.rsplit_once("::") {
Some((p, f)) => (Some(p), f),
None => (None, dest),
};
let s4 = dest.ends_with("-class");
let body = if s4 {
fun.strip_suffix("-class").unwrap_or(fun)
} else {
fun
};
let head = if s4 && pkg.is_none() {
"\\linkS4class"
} else {
"\\link"
};
let display = match pkg {
Some(p) => format!("{p}::{body}"),
None => body.to_string(),
};
code_wrap(
format!("({head} {})", text_atom(&display).unwrap_or_default()),
is_code,
)
}
fn resolve_md_image(raw: &str) -> Option<String> {
let bytes = raw.as_bytes();
let alt_end = scan_delimited(bytes, 1, b'[', b']')?;
if bytes.get(alt_end) != Some(&b'(') {
return None;
}
let dest_end = scan_delimited(bytes, alt_end, b'(', b')')?;
if dest_end != bytes.len() {
return None;
}
let (url, title) = split_image_dest(&raw[alt_end + 1..dest_end - 1]);
Some(figure_atom(url, title))
}
fn split_image_dest(dest: &str) -> (&str, &str) {
let dest = dest.trim();
let (url, rest) = if dest.as_bytes().first() == Some(&b'<') {
match dest.find('>') {
Some(close) => (&dest[1..close], &dest[close + 1..]),
None => (dest, ""),
}
} else {
match dest.find(char::is_whitespace) {
Some(sp) => (&dest[..sp], &dest[sp..]),
None => (dest, ""),
}
};
(url, strip_title_delims(rest.trim()))
}
fn strip_title_delims(s: &str) -> &str {
let b = s.as_bytes();
if b.len() >= 2
&& matches!(
(b[0], b[b.len() - 1]),
(b'"', b'"') | (b'\'', b'\'') | (b'(', b')')
)
{
&s[1..s.len() - 1]
} else {
s
}
}
fn figure_atom(url: &str, title: &str) -> String {
let mut args = vec![format!("(VERB {})", encode_text(url))];
if !title.is_empty() {
args.push(format!("(VERB {})", encode_text(title)));
}
let figure = format!("(\\figure {})", args.join(" "));
match image_format(url) {
ImageFormat::Html => format!("(\\if (TEXT {}) {figure})", encode_text("html")),
ImageFormat::Pdf => format!("(\\if (TEXT {}) {figure})", encode_text("pdf")),
ImageFormat::All => figure,
}
}
enum ImageFormat {
Html,
Pdf,
All,
}
fn image_format(url: &str) -> ImageFormat {
let lower = url.to_ascii_lowercase();
let has_dot_ext = |exts: &[&str]| {
exts.iter()
.any(|e| lower.strip_suffix(e).is_some_and(|p| p.ends_with('.')))
};
match (
has_dot_ext(&["jpg", "jpeg", "gif", "png", "svg"]),
has_dot_ext(&["jpg", "jpeg", "gif", "png", "pdf"]),
) {
(true, false) => ImageFormat::Html,
(false, true) => ImageFormat::Pdf,
_ => ImageFormat::All,
}
}
fn code_wrap(inner: String, is_code: bool) -> String {
if is_code {
format!("(\\code {inner})")
} else {
inner
}
}
fn unwrap_code_span(s: &str) -> (&str, bool) {
let b = s.as_bytes();
if b.len() >= 2 && b[0] == b'`' && b[b.len() - 1] == b'`' {
(&s[1..s.len() - 1], true)
} else {
(s, false)
}
}
fn scan_delimited(bytes: &[u8], start: usize, open: u8, close: u8) -> Option<usize> {
if bytes.get(start) != Some(&open) {
return None;
}
let mut depth = 0usize;
for (i, &b) in bytes.iter().enumerate().skip(start) {
if b == open {
depth += 1;
} else if b == close {
depth -= 1;
if depth == 0 {
return Some(i + 1);
}
}
}
None
}
fn serialize_md_list(node: &SyntaxNode) -> String {
let head = if md_list_is_ordered(node) {
"\\enumerate"
} else {
"\\itemize"
};
let mut atoms: Vec<String> = Vec::new();
for item in node
.children()
.filter(|n| n.kind() == SyntaxKind::ROXYGEN_MD_LIST_ITEM)
{
atoms.push("(\\item)".to_string());
atoms.extend(serialize_inlines(&md_list_item_inlines(&item)));
}
if atoms.is_empty() {
format!("({head})")
} else {
format!("({head} {})", atoms.join(" "))
}
}
fn serialize_md_code_block(node: &SyntaxNode) -> Vec<String> {
let (info, code) = md_code_block_parts(node);
let class = if info.is_empty() {
"sourceCode".to_string()
} else {
format!("sourceCode {info}")
};
let html = encode_text("html");
vec![
format!(
"(\\if (TEXT {html}) (\\out (VERB {})))",
encode_text(&format!("<div class=\"{class}\">"))
),
format!("(\\preformatted (VERB {}))", encode_text(&code)),
format!(
"(\\if (TEXT {html}) (\\out (VERB {})))",
encode_text("</div>")
),
]
}
fn html_inline_atom(raw: &str) -> String {
format!(
"(\\if (TEXT {}) (\\out (VERB {})))",
encode_text("html"),
encode_text(raw)
)
}
fn md_code_block_parts(node: &SyntaxNode) -> (String, String) {
let text = node.text().to_string();
let lines: Vec<&str> = text.split('\n').collect();
let info = lines
.first()
.map(|l| strip_marker(l).trim_start_matches('`').trim().to_string())
.unwrap_or_default();
let body = if lines.len() > 2 {
&lines[1..lines.len() - 1]
} else {
&[]
};
let mut code = String::new();
for line in body {
code.push_str(strip_marker(line));
code.push('\n');
}
(info, code)
}
fn strip_marker(line: &str) -> &str {
let trimmed = line.trim_start();
let after_hashes = trimmed.trim_start_matches('#');
let body = after_hashes.strip_prefix('\'').unwrap_or(after_hashes);
body.strip_prefix(' ').unwrap_or(body)
}
fn md_list_is_ordered(node: &SyntaxNode) -> bool {
node.descendants_with_tokens()
.filter_map(|el| el.into_token())
.find(|t| t.kind() == SyntaxKind::ROXYGEN_MD_LIST_MARKER)
.is_some_and(|t| t.text().starts_with(|c: char| c.is_ascii_digit()))
}
fn md_list_item_inlines(item: &SyntaxNode) -> Vec<Inline> {
let mut out = Vec::new();
for el in item.children_with_tokens() {
match el.kind() {
SyntaxKind::ROXYGEN_MD_LIST_MARKER | SyntaxKind::ROXYGEN_MARKER => {}
SyntaxKind::NEWLINE => out.push(Inline::Text(" ".to_string())),
_ => push_inline(&mut out, el),
}
}
out
}
fn md_code_atom(content: &str) -> String {
if code_span_is_r(content) {
format!("(\\code (RCODE {}))", encode_text(content))
} else {
format!("(\\verb (VERB {}))", encode_text(content))
}
}
const SPECIAL_CODE: &[&str] = &[
"-", ":", "::", ":::", "!", "!=", "(", "[", "[[", "@", "*", "/", "&", "&&", "%*%", "%/%", "%%",
"%in%", "%o%", "%x%", "^", "+", "<", "<=", "=", "==", ">", ">=", "|", "||", "~", "$", "for",
"function", "if", "repeat", "while",
];
fn code_span_is_r(code: &str) -> bool {
if SPECIAL_CODE.contains(&code) {
return true;
}
let out = crate::parser::parse(code);
if !out.diagnostics.is_empty() {
return false;
}
out.cst
.children_with_tokens()
.filter(|el| {
!matches!(
el.kind(),
SyntaxKind::WHITESPACE | SyntaxKind::NEWLINE | SyntaxKind::COMMENT
)
})
.count()
== 1
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn projects_plain_prose_sections() {
let src = "#' Add two numbers\n\
#' @param x,y Numbers to add.\n\
#' @return Their sum.\n\
#' @export\n\
add <- function(x, y) x + y\n";
assert_eq!(
project_to_rd(src),
"(\\description (TEXT \"Add two numbers\"))\n\
(\\title (TEXT \"Add two numbers\"))\n\
(\\value (TEXT \"Their sum.\"))"
);
}
#[test]
fn two_intro_paragraphs_split_title_and_description() {
let src = "#' Example dataset\n\
#'\n\
#' A longer description.\n\
#' @name d\n\
NULL\n";
assert_eq!(
project_to_rd(src),
"(\\description (TEXT \"A longer description.\"))\n\
(\\title (TEXT \"Example dataset\"))"
);
}
#[test]
fn three_intro_paragraphs_split_title_description_details() {
let src = "#' title\n\
#'\n\
#' description\n\
#'\n\
#' details\n\
#' @name a\n\
NULL\n";
assert_eq!(
project_to_rd(src),
"(\\description (TEXT \"description\"))\n\
(\\details (TEXT \"details\"))\n\
(\\title (TEXT \"title\"))"
);
}
#[test]
fn section_body_serializes_inline_macros_with_grp_wrap() {
let src = "#' Title\n\
#'\n\
#' Description.\n\
#' @section Foobar:\n\
#' With some \\strong{bold text}.\n\
#' @name x\n\
NULL\n";
assert_eq!(
project_to_rd(src),
"(\\description (TEXT \"Description.\"))\n\
(\\section (TEXT \"Foobar\") (GRP (TEXT \"With some\") (\\strong (TEXT \"bold text\")) (TEXT \".\")))\n\
(\\title (TEXT \"Title\"))"
);
}
#[test]
fn block_macro_joins_its_paragraph_then_splits_at_blank_line() {
let src = "#' Title\n\
#'\n\
#' Description with some\n\
#' \\itemize{\n\
#' \\item itemized\n\
#' \\item list\n\
#' }\n\
#'\n\
#' And then another one:\n\
#' \\itemize{\n\
#' \\item item 1\n\
#' \\item item 2\n\
#' }\n\
foo <- function() {}\n";
assert_eq!(
project_to_rd(src),
"(\\description (TEXT \"Description with some\") \
(\\itemize (\\item) (TEXT \"itemized\") (\\item) (TEXT \"list\")))\n\
(\\details (TEXT \"And then another one:\") \
(\\itemize (\\item) (TEXT \"item 1\") (\\item) (TEXT \"item 2\")))\n\
(\\title (TEXT \"Title\"))"
);
}
#[test]
fn trailing_intro_details_merge_with_explicit_details_tag() {
let src = "#' Title\n\
#'\n\
#' Description\n\
#'\n\
#' Details1\n\
#'\n\
#' Details2\n\
#'\n\
#' @details Details3\n\
#'\n\
#' Details4\n\
foo <- function(x) {}\n";
assert_eq!(
project_to_rd(src),
"(\\description (TEXT \"Description\"))\n\
(\\details (TEXT \"Details1 Details2 Details3 Details4\"))\n\
(\\title (TEXT \"Title\"))"
);
}
#[test]
fn explicit_title_without_description_duplicates_into_description() {
let src = "#' @title a\n#' @name a\nNULL\n";
assert_eq!(
project_to_rd(src),
"(\\description (TEXT \"a\"))\n(\\title (TEXT \"a\"))"
);
}
#[test]
fn null_tag_value_suppresses_section() {
let src = "#' Title\n\
#' @description NULL\n\
#' @details NULL\n\
#' @format NULL\n\
#' @name d\n\
NULL\n";
assert_eq!(
project_to_rd(src),
"(\\description (TEXT \"Title\"))\n(\\title (TEXT \"Title\"))"
);
}
#[test]
fn examples_body_is_a_placeholder() {
let src = "#' T\n#' @examples\n#' f(1)\n#' @name d\nNULL\n";
assert!(project_to_rd(src).contains("(\\examples ...)"));
}
#[test]
fn multiple_examples_tags_merge_into_one_section() {
let src = "#' @name a\n\
#' @title a\n\
#' @examples\n\
#' TRUE\n\
#' @examples\n\
#' FALSE\n\
NULL\n";
assert_eq!(
project_to_rd(src),
"(\\description (TEXT \"a\"))\n\
(\\examples ...)\n\
(\\title (TEXT \"a\"))"
);
}
#[test]
fn projects_inline_rd_macros() {
let src = "#' T\n\
#'\n\
#' See \\code{\\link{add}} and \\emph{e}, plus \\url{http://x}\n\
#' and \\link[stats]{lm} end.\n\
#' @name d\n\
NULL\n";
let out = project_to_rd(src);
assert!(
out.contains(
"(\\description (TEXT \"See\") (\\code (\\link (TEXT \"add\"))) \
(TEXT \"and\") (\\emph (TEXT \"e\")) (TEXT \", plus\") \
(\\url (VERB \"http://x\")) (TEXT \"and\") (\\link (TEXT \"lm\")) \
(TEXT \"end.\"))"
),
"got: {out}"
);
}
#[test]
fn code_macro_body_projects_as_rcode() {
let src = "#' T\n\
#'\n\
#' Some \\code{code} and \\verb{More code.}\n\
#' @name d\n\
NULL\n";
let out = project_to_rd(src);
assert!(
out.contains(
"(\\description (TEXT \"Some\") (\\code (RCODE \"code\")) (TEXT \"and\") \
(\\verb (VERB \"More code.\")))"
),
"got: {out}"
);
}
#[test]
fn href_projects_verbatim_url_and_latexlike_text() {
let src = "#' T\n\
#'\n\
#' See \\href{http://a.com/x y}{click \\emph{here} now}.\n\
#' @name d\n\
NULL\n";
let out = project_to_rd(src);
assert!(
out.contains(
"(\\description (TEXT \"See\") (\\href (VERB \"http://a.com/x y\") \
(GRP (TEXT \"click\") (\\emph (TEXT \"here\")) (TEXT \"now\"))) (TEXT \".\"))"
),
"got: {out}"
);
}
#[test]
fn inline_link_code_span_text_subrenders() {
let src = "#' Title\n\
#'\n\
#' Description, see [`code link text`][func].\n\
#' And also [`code as well`](https://external.com).\n\
#' @md\n\
foo <- function() {}\n";
assert_eq!(
project_to_rd(src),
"(\\description (TEXT \"Description, see\") \
(\\code (\\link (TEXT \"code link text\"))) (TEXT \". And also\") \
(\\href (VERB \"https://external.com\") (\\verb (VERB \"code as well\"))) \
(TEXT \".\"))\n\
(\\title (TEXT \"Title\"))"
);
}
#[test]
fn multiline_itemize_projects_nested() {
let src = "#' @details\n\
#' \\itemize{\n\
#' \\item one\n\
#' \\item two\n\
#' }\n\
#' @name x\n\
NULL\n";
assert_eq!(
project_to_rd(src),
"(\\details (\\itemize (\\item) (TEXT \"one\") (\\item) (TEXT \"two\")))"
);
}
#[test]
fn multiline_describe_item_projects_two_args() {
let src = "#' T\n\
#' @format A frame:\n\
#' \\describe{\n\
#' \\item{a}{first}\n\
#' \\item{b}{second}\n\
#' }\n\
#' @name d\n\
NULL\n";
let out = project_to_rd(src);
assert!(
out.contains(
"(\\describe (\\item (TEXT \"a\") (TEXT \"first\")) \
(\\item (TEXT \"b\") (TEXT \"second\")))"
),
"got: {out}"
);
}
#[test]
fn multiline_tabular_projects_format_and_grp_body() {
let src = "#' T\n\
#' @details\n\
#' \\tabular{rl}{\n\
#' a \\tab the first row \\cr\n\
#' b \\tab the second row \\cr\n\
#' }\n\
#' @name d\n\
NULL\n";
let out = project_to_rd(src);
assert!(
out.contains(
"(\\details (\\tabular (TEXT \"rl\") \
(GRP (TEXT \"a\") (\\tab) (TEXT \"the first row\") (\\cr) \
(TEXT \"b\") (\\tab) (TEXT \"the second row\") (\\cr))))"
),
"got: {out}"
);
}
#[test]
fn md_inline_projects_emph_strong_and_code_vs_verb() {
let src = "#' T\n\
#' @details\n\
#' Text with *emphasis*, **strong** words, `inline code`, and `a + b` code.\n\
#' @md\n\
#' @name d\n\
NULL\n";
let out = project_to_rd(src);
assert!(
out.contains(
"(\\details (TEXT \"Text with\") (\\emph (TEXT \"emphasis\")) (TEXT \",\") \
(\\strong (TEXT \"strong\")) (TEXT \"words,\") (\\verb (VERB \"inline code\")) \
(TEXT \", and\") (\\code (RCODE \"a + b\")) (TEXT \"code.\"))"
),
"got: {out}"
);
}
#[test]
fn md_block_lists_project_itemize_and_enumerate() {
let src = "#' T\n\
#' @details\n\
#' Bullets:\n\
#'\n\
#' - first\n\
#' - second\n\
#'\n\
#' Numbered:\n\
#'\n\
#' 1. one\n\
#' 2. two\n\
#' @md\n\
#' @name d\n\
NULL\n";
let out = project_to_rd(src);
assert!(
out.contains(
"(\\details (TEXT \"Bullets:\") \
(\\itemize (\\item) (TEXT \"first\") (\\item) (TEXT \"second\")) \
(TEXT \"Numbered:\") \
(\\enumerate (\\item) (TEXT \"one\") (\\item) (TEXT \"two\")))"
),
"got: {out}"
);
}
#[test]
fn slot_tags_aggregate_into_slots_section() {
let src = "#' Important class.\n\
#'\n\
#' @slot a slot a\n\
#' @slot b slot b\n\
setClass('test')\n";
let out = project_to_rd(src);
assert!(
out.contains(
"(\\section (TEXT \"Slots\") (\\describe \
(\\item (\\code (RCODE \"a\")) (TEXT \"slot a\")) \
(\\item (\\code (RCODE \"b\")) (TEXT \"slot b\"))))"
),
"got: {out}"
);
}
#[test]
fn field_tags_aggregate_into_fields_section() {
let src = "#' Important class.\n\
#'\n\
#' @field a field a\n\
#' @field b field b\n\
setRefClass('test')\n";
let out = project_to_rd(src);
assert!(
out.contains(
"(\\section (TEXT \"Fields\") (\\describe \
(\\item (\\code (RCODE \"a\")) (TEXT \"field a\")) \
(\\item (\\code (RCODE \"b\")) (TEXT \"field b\"))))"
),
"got: {out}"
);
}
#[test]
fn md_block_list_is_off_without_md_tag() {
let src = "#' T\n\
#' @details\n\
#' - first\n\
#' - second\n\
#' @name d\n\
NULL\n";
assert!(
project_to_rd(src).contains("(\\details (TEXT \"- first - second\"))"),
"got: {}",
project_to_rd(src)
);
}
#[test]
fn md_inline_is_off_without_md_tag() {
let src = "#' T\n\
#' @details\n\
#' Text with *emphasis* and `code` here.\n\
#' @name d\n\
NULL\n";
assert!(
project_to_rd(src)
.contains("(\\details (TEXT \"Text with *emphasis* and `code` here.\"))"),
"got: {}",
project_to_rd(src)
);
}
}