use rowan::NodeOrToken;
use super::context::FormatContext;
use super::core::format_with_style;
use super::ir::Ir;
use super::style::FormatStyle;
use crate::ast::{AstNode, RoxygenTag};
use crate::syntax::{SyntaxKind, SyntaxNode, SyntaxToken};
#[derive(Clone, Default)]
struct PhysicalLine {
marker: Option<SyntaxToken>,
tag: Option<RoxygenTag>,
elements: Vec<NodeOrToken<SyntaxNode, SyntaxToken>>,
block_macro: Option<SyntaxNode>,
}
impl PhysicalLine {
fn marker(&self) -> Option<&SyntaxToken> {
self.marker.as_ref()
}
fn tag(&self) -> Option<RoxygenTag> {
self.tag.clone()
}
fn is_blank(&self) -> bool {
self.tag.is_none()
&& !self
.elements
.iter()
.any(|el| el.kind().is_roxygen_prose_content())
}
}
fn physical_lines(block: &SyntaxNode) -> Vec<PhysicalLine> {
let mut elements = Vec::new();
collect_logical_elements(block, &mut elements);
let mut lines = Vec::new();
let mut cur = PhysicalLine::default();
for el in elements {
match el.kind() {
SyntaxKind::ROXYGEN_RD_MACRO if el.as_node().is_some_and(is_block_macro) => {
if cur.marker.is_some() || !cur.elements.is_empty() {
lines.push(std::mem::take(&mut cur));
}
lines.push(PhysicalLine {
block_macro: el.as_node().cloned(),
..PhysicalLine::default()
});
}
SyntaxKind::ROXYGEN_MD_LIST | SyntaxKind::ROXYGEN_MD_CODE_BLOCK => {
if cur.marker.is_some() || !cur.elements.is_empty() {
lines.push(std::mem::take(&mut cur));
}
lines.push(PhysicalLine {
block_macro: el.as_node().cloned(),
..PhysicalLine::default()
});
}
SyntaxKind::ROXYGEN_MARKER => {
if cur.marker.is_some() {
lines.push(std::mem::take(&mut cur));
}
cur.marker = el.into_token();
}
SyntaxKind::NEWLINE => {
if cur.marker.is_some() {
lines.push(std::mem::take(&mut cur));
}
}
_ if cur.marker.is_none() => {}
SyntaxKind::ROXYGEN_TAG => {
cur.tag = el.as_node().cloned().and_then(RoxygenTag::cast);
cur.elements.push(el);
}
_ => cur.elements.push(el),
}
}
if cur.marker.is_some() {
lines.push(cur);
}
lines
}
fn is_block_macro(node: &SyntaxNode) -> bool {
node.kind() == SyntaxKind::ROXYGEN_RD_MACRO
&& node
.children_with_tokens()
.any(|el| el.kind() == SyntaxKind::ROXYGEN_MARKER)
}
fn emit_block_macro(items: &mut Vec<Ir>, node: &SyntaxNode) {
let text = node.text().to_string();
for (i, seg) in text.split('\n').enumerate() {
let line = if i == 0 { seg } else { seg.trim_start() };
push_line(items, line.trim_end().to_string());
}
}
fn emit_block_macro_examples(items: &mut Vec<Ir>, node: &SyntaxNode) {
for seg in node.text().to_string().split('\n') {
push_line(items, normalize_marker_text(seg));
}
}
fn emit_md_list(items: &mut Vec<Ir>, node: &SyntaxNode) {
for seg in node.text().to_string().split('\n') {
push_line(items, normalize_marker_text(seg));
}
}
fn emit_md_code_block(items: &mut Vec<Ir>, node: &SyntaxNode) {
for seg in node.text().to_string().split('\n') {
push_line(items, normalize_marker_text(seg));
}
}
fn normalize_marker_text(raw: &str) -> String {
let s = raw.trim();
let hashes = s.len() - s.trim_start_matches('#').len();
if hashes == 0 || !s[hashes..].starts_with('\'') {
return s.to_string();
}
let marker = &s[..hashes + 1];
let content = s[hashes + 1..].trim();
if content.is_empty() {
marker.to_string()
} else {
format!("{marker} {content}")
}
}
fn collect_logical_elements(
node: &SyntaxNode,
out: &mut Vec<NodeOrToken<SyntaxNode, SyntaxToken>>,
) {
for el in node.children_with_tokens() {
match el {
NodeOrToken::Node(n)
if matches!(
n.kind(),
SyntaxKind::ROXYGEN_SECTION | SyntaxKind::ROXYGEN_PARAGRAPH
) =>
{
collect_logical_elements(&n, out);
}
other => out.push(other),
}
}
}
pub(super) fn ir_roxygen_block(node: &SyntaxNode, indent: usize, ctx: FormatContext) -> Ir {
let style = ctx.style();
let indent_cols = indent * style.indent_width;
let mut items: Vec<Ir> = Vec::new();
let mut para = Paragraph::default();
let mut tag_unit: Option<TagUnit> = None;
let mut example = ExampleBody::default();
let mut in_examples = false;
let mut in_fence = false;
let lw = style.line_width;
macro_rules! flush_pending {
() => {{
para.flush(&mut items, indent_cols, lw);
flush_tag_unit(&mut tag_unit, &mut items, lw);
example.flush(&mut items, indent_cols, style);
}};
}
for line in physical_lines(node) {
if let Some(macro_node) = &line.block_macro {
flush_pending!();
if macro_node.kind() == SyntaxKind::ROXYGEN_MD_LIST {
emit_md_list(&mut items, macro_node);
} else if macro_node.kind() == SyntaxKind::ROXYGEN_MD_CODE_BLOCK {
emit_md_code_block(&mut items, macro_node);
} else if in_examples {
emit_block_macro_examples(&mut items, macro_node);
} else {
emit_block_macro(&mut items, macro_node);
}
continue;
}
if in_examples && line.tag().is_none() {
example.push_line(&line);
continue;
}
let content = content_text(&line);
let is_fence = is_fence_marker(&content);
if in_fence {
if is_fence {
in_fence = false;
}
flush_pending!();
emit_normalized(&mut items, &line);
continue;
}
if is_fence {
in_fence = true;
flush_pending!();
emit_normalized(&mut items, &line);
continue;
}
if let Some(tag) = line.tag() {
in_examples = tag.is_examples();
flush_pending!();
if in_examples || is_non_prose_tag(&tag) || !tag_has_prose(&tag) {
emit_tag_passthrough(&mut items, &line, &tag);
} else {
tag_unit = Some(TagUnit::new(&line, &tag, indent_cols));
}
continue;
}
let in_paragraph = tag_unit.is_some() || !para.lines.is_empty();
if line.is_blank() || is_structured(&content, in_paragraph) {
flush_pending!();
emit_normalized(&mut items, &line);
continue;
}
let marker = marker_text(&line);
if let Some(unit) = tag_unit.as_mut() {
if unit.marker == marker {
unit.push_continuation(&line);
continue;
}
flush_tag_unit(&mut tag_unit, &mut items, lw);
}
if para.marker.as_deref().is_some_and(|m| m != marker) {
para.flush(&mut items, indent_cols, lw);
}
if para.marker.is_none() {
para.marker = Some(marker);
}
para.push_line(&line);
}
flush_pending!();
Ir::concat(items)
}
#[derive(Default)]
struct Paragraph {
marker: Option<String>,
chunks: Vec<String>,
lines: Vec<PhysicalLine>,
}
impl Paragraph {
fn push_line(&mut self, line: &PhysicalLine) {
line_chunks(line, &mut self.chunks);
self.lines.push(line.clone());
}
fn clear(&mut self) {
self.marker = None;
self.chunks.clear();
self.lines.clear();
}
fn flush(&mut self, items: &mut Vec<Ir>, indent_cols: usize, line_width: usize) {
if self.lines.is_empty() {
return;
}
if self.chunks.is_empty() || self.chunks.iter().any(|c| is_unsafe_line_start(c)) {
let lines = std::mem::take(&mut self.lines);
for line in &lines {
emit_normalized(items, line);
}
} else {
let marker = self.marker.clone().unwrap_or_else(|| "#'".to_string());
let prefix = indent_cols + marker.chars().count() + 1;
let budget = line_width.saturating_sub(prefix).max(1);
for wrapped in wrap_chunks(&self.chunks, budget) {
push_line(items, format!("{marker} {wrapped}"));
}
}
self.clear();
}
}
struct TagUnit {
marker: String,
indent_cols: usize,
header: String,
chunks: Vec<String>,
lines: Vec<PhysicalLine>,
}
impl TagUnit {
fn new(line: &PhysicalLine, tag: &RoxygenTag, indent_cols: usize) -> Self {
let mut chunks = Vec::new();
tag_prose_chunks(tag, &mut chunks);
TagUnit {
marker: marker_text(line),
indent_cols,
header: tag_header(tag).unwrap_or_else(|| "@".to_string()),
chunks,
lines: vec![line.clone()],
}
}
fn push_continuation(&mut self, line: &PhysicalLine) {
line_chunks(line, &mut self.chunks);
self.lines.push(line.clone());
}
fn flush(self, items: &mut Vec<Ir>, line_width: usize) {
let marker_w = self.marker.chars().count();
if self.chunks.iter().any(|c| is_unsafe_line_start(c)) {
for (i, line) in self.lines.iter().enumerate() {
if i == 0
&& let Some(tag) = line.tag()
{
emit_tag_passthrough(items, line, &tag);
} else {
emit_normalized(items, line);
}
}
return;
}
let first_start = self.indent_cols + marker_w + 1 + self.header.chars().count() + 1;
let cont_start = self.indent_cols + marker_w + 3;
let first_budget = line_width.saturating_sub(first_start).max(1);
let cont_budget = line_width.saturating_sub(cont_start).max(1);
let prose = wrap_chunks_hanging(&self.chunks, first_budget, cont_budget);
let marker = &self.marker;
let header = &self.header;
if prose[0].is_empty() {
push_line(items, format!("{marker} {header}"));
} else {
push_line(items, format!("{marker} {header} {}", prose[0]));
}
for cont in &prose[1..] {
push_line(items, format!("{marker} {cont}"));
}
}
}
fn flush_tag_unit(unit: &mut Option<TagUnit>, items: &mut Vec<Ir>, line_width: usize) {
if let Some(unit) = unit.take() {
unit.flush(items, line_width);
}
}
#[derive(Default)]
struct ExampleBody {
marker: Option<String>,
lines: Vec<PhysicalLine>,
}
impl ExampleBody {
fn push_line(&mut self, line: &PhysicalLine) {
if self.marker.is_none() {
self.marker = Some(marker_text(line));
}
self.lines.push(line.clone());
}
fn flush(&mut self, items: &mut Vec<Ir>, indent_cols: usize, style: FormatStyle) {
if self.lines.is_empty() {
return;
}
let lines = std::mem::take(&mut self.lines);
let marker = self.marker.take().unwrap_or_else(|| "#'".to_string());
let body_end = lines
.iter()
.rposition(|l| !l.is_blank())
.map_or(0, |i| i + 1);
let (body, trailing) = lines.split_at(body_end);
let source = body.iter().map(content_text).collect::<Vec<_>>().join("\n");
if source.trim().is_empty() {
for line in &lines {
emit_normalized(items, line);
}
return;
}
let budget = style
.line_width
.saturating_sub(indent_cols + marker.len() + 1)
.max(1);
let body_style = FormatStyle {
line_width: budget,
..style
};
match format_with_style(&source, body_style) {
Ok(formatted) => {
for code in formatted.lines() {
if code.is_empty() {
push_line(items, marker.clone());
} else {
push_line(items, format!("{marker} {code}"));
}
}
}
Err(_) => {
for line in body {
emit_normalized(items, line);
}
}
}
for line in trailing {
emit_normalized(items, line);
}
}
}
fn wrap_chunks_hanging(chunks: &[String], first_budget: usize, cont_budget: usize) -> Vec<String> {
let mut lines: Vec<String> = Vec::new();
let mut cur = String::new();
let mut cur_w = 0usize;
let mut budget = first_budget;
for chunk in chunks {
let w = chunk.chars().count();
if cur.is_empty() {
if lines.is_empty()
&& budget == first_budget
&& w > first_budget
&& first_budget < cont_budget
{
lines.push(String::new());
budget = cont_budget;
}
cur.push_str(chunk);
cur_w = w;
} else if cur_w + 1 + w <= budget {
cur.push(' ');
cur.push_str(chunk);
cur_w += 1 + w;
} else {
lines.push(std::mem::take(&mut cur));
budget = cont_budget;
cur.push_str(chunk);
cur_w = w;
}
}
lines.push(cur);
lines
}
fn wrap_chunks(chunks: &[String], budget: usize) -> Vec<String> {
let mut lines: Vec<String> = Vec::new();
let mut cur = String::new();
let mut cur_w = 0usize;
for chunk in chunks {
let w = chunk.chars().count();
if cur.is_empty() {
cur.push_str(chunk);
cur_w = w;
} else if cur_w + 1 + w <= budget {
cur.push(' ');
cur.push_str(chunk);
cur_w += 1 + w;
} else {
lines.push(std::mem::take(&mut cur));
cur.push_str(chunk);
cur_w = w;
}
}
if !cur.is_empty() || lines.is_empty() {
lines.push(cur);
}
lines
}
fn line_chunks(line: &PhysicalLine, out: &mut Vec<String>) {
chunk_elements(content_elements(line), out);
}
fn chunk_elements<I>(elements: I, out: &mut Vec<String>)
where
I: Iterator<Item = NodeOrToken<SyntaxNode, SyntaxToken>>,
{
let mut cur = String::new();
for el in elements {
match el {
NodeOrToken::Token(t) if t.kind() == SyntaxKind::ROXYGEN_TEXT => {
for ch in t.text().chars() {
if ch.is_whitespace() {
if !cur.is_empty() {
out.push(std::mem::take(&mut cur));
}
} else {
cur.push(ch);
}
}
}
NodeOrToken::Token(t) => cur.push_str(t.text()),
NodeOrToken::Node(n) => cur.push_str(&n.text().to_string()),
}
}
if !cur.is_empty() {
out.push(cur);
}
}
fn content_elements(
line: &PhysicalLine,
) -> impl Iterator<Item = NodeOrToken<SyntaxNode, crate::syntax::SyntaxToken>> + '_ {
let mut seen_content = false;
line.elements
.iter()
.filter(move |el| match el.kind() {
SyntaxKind::WHITESPACE if !seen_content => false,
_ => {
seen_content = true;
true
}
})
.cloned()
}
fn content_text(line: &PhysicalLine) -> String {
let mut s = String::new();
for el in content_elements(line) {
match el {
NodeOrToken::Token(t) => s.push_str(t.text()),
NodeOrToken::Node(n) => s.push_str(&n.text().to_string()),
}
}
s.trim().to_string()
}
fn marker_text(line: &PhysicalLine) -> String {
line.marker()
.map(|t| t.text().to_string())
.unwrap_or_else(|| "#'".to_string())
}
fn emit_normalized(items: &mut Vec<Ir>, line: &PhysicalLine) {
push_line(items, normalize_roxygen_line(line));
}
fn emit_tag_passthrough(items: &mut Vec<Ir>, line: &PhysicalLine, tag: &RoxygenTag) {
let Some(header) = tag_header(tag) else {
emit_normalized(items, line);
return;
};
let marker = marker_text(line);
let rest = tag_rest_verbatim(tag);
if rest.is_empty() {
push_line(items, format!("{marker} {header}"));
} else {
push_line(items, format!("{marker} {header} {rest}"));
}
}
const NON_PROSE_TAGS: &[&str] = &[
"usage",
"eval",
"evalRd",
"evalNamespace",
"section",
"export",
"exportClass",
"exportMethod",
"exportS3Method",
"exportPattern",
"import",
"importFrom",
"importClassesFrom",
"importMethodsFrom",
"rawNamespace",
"useDynLib",
"rdname",
"name",
"aliases",
"keywords",
"family",
"concept",
"docType",
"encoding",
"backref",
];
fn is_non_prose_tag(tag: &RoxygenTag) -> bool {
tag.name()
.as_deref()
.is_some_and(|n| NON_PROSE_TAGS.contains(&n))
}
fn tag_has_prose(tag: &RoxygenTag) -> bool {
tag.syntax()
.children_with_tokens()
.any(|el| is_tag_prose_kind(el.kind()))
}
fn is_tag_prose_kind(kind: SyntaxKind) -> bool {
kind.is_roxygen_prose_content()
}
fn tag_header(tag: &RoxygenTag) -> Option<String> {
let name = tag.name()?;
let mut header = String::from("@");
header.push_str(&name);
if let Some(arg) = tag.arg() {
header.push(' ');
header.push_str(arg.text());
}
Some(header)
}
fn tag_rest_verbatim(tag: &RoxygenTag) -> String {
let mut s = String::new();
for el in tag.syntax().children_with_tokens() {
if is_tag_prose_kind(el.kind()) {
match el {
NodeOrToken::Token(t) => s.push_str(t.text()),
NodeOrToken::Node(n) => s.push_str(&n.text().to_string()),
}
}
}
s.trim().to_string()
}
fn tag_prose_chunks(tag: &RoxygenTag, out: &mut Vec<String>) {
let prose = tag
.syntax()
.children_with_tokens()
.filter(|el| is_tag_prose_kind(el.kind()));
chunk_elements(prose, out);
}
fn push_line(items: &mut Vec<Ir>, line: String) {
if !items.is_empty() {
items.push(Ir::hard_line());
}
items.push(Ir::text(line));
}
fn is_fence_marker(content: &str) -> bool {
content.starts_with("```") || content.starts_with("~~~")
}
fn is_structured(content: &str, in_paragraph: bool) -> bool {
content.starts_with("- ")
|| content.starts_with("* ")
|| content.starts_with("+ ")
|| content.starts_with("> ")
|| content.starts_with('#')
|| is_fence_marker(content)
|| content.contains('|')
|| starts_ordered_list_item(content, in_paragraph)
}
fn starts_ordered_list_item(content: &str, in_paragraph: bool) -> bool {
match ordered_marker(content) {
Some((n, len)) if content.as_bytes().get(len) == Some(&b' ') => !in_paragraph || n == 1,
_ => false,
}
}
fn ordered_marker(s: &str) -> Option<(u64, usize)> {
let digits = s.bytes().take_while(u8::is_ascii_digit).count();
if digits == 0 || digits > 9 {
return None;
}
match s.as_bytes().get(digits) {
Some(b'.' | b')') => Some((s[..digits].parse().ok()?, digits + 1)),
_ => None,
}
}
fn is_unsafe_line_start(chunk: &str) -> bool {
matches!(chunk, "-" | "*" | "+" | ">")
|| chunk.starts_with('#')
|| chunk.starts_with("```")
|| chunk.starts_with("~~~")
|| is_unsafe_ordered_marker(chunk)
}
fn is_unsafe_ordered_marker(chunk: &str) -> bool {
matches!(ordered_marker(chunk), Some((1, len)) if len == chunk.len())
}
fn normalize_roxygen_line(line: &PhysicalLine) -> String {
let marker = marker_text(line);
let mut content = String::new();
for el in content_elements(line) {
match el {
NodeOrToken::Token(t) => content.push_str(t.text()),
NodeOrToken::Node(n) => content.push_str(&n.text().to_string()),
}
}
let content = content.trim_end();
if content.is_empty() {
marker
} else {
format!("{marker} {content}")
}
}