pub fn emit_phpdoc(out: &mut String, doc: &str, indent: &str, exception_class: &str) {
if doc.is_empty() {
return;
}
let sanitized = sanitize_rust_idioms(doc, DocTarget::PhpDoc);
let sections = parse_rustdoc_sections(&sanitized);
let any_section = sections.arguments.is_some()
|| sections.returns.is_some()
|| sections.errors.is_some()
|| sections.example.is_some();
let body = if any_section {
render_phpdoc_sections(§ions, exception_class)
} else {
sanitized
};
out.push_str(indent);
out.push_str("/**\n");
for line in body.lines() {
out.push_str(indent);
out.push_str(" * ");
out.push_str(&escape_phpdoc_line(line));
out.push('\n');
}
out.push_str(indent);
out.push_str(" */\n");
}
fn escape_phpdoc_line(s: &str) -> String {
s.replace("*/", "* /")
}
pub fn emit_csharp_doc(out: &mut String, doc: &str, indent: &str, exception_class: &str) {
if doc.is_empty() {
return;
}
let raw_sections = parse_rustdoc_sections(doc);
let sections = RustdocSections {
summary: sanitize_rust_idioms_keep_sections(&raw_sections.summary, DocTarget::CSharpDoc),
arguments: raw_sections
.arguments
.as_deref()
.map(|s| sanitize_rust_idioms_keep_sections(s, DocTarget::CSharpDoc)),
returns: raw_sections
.returns
.as_deref()
.map(|s| sanitize_rust_idioms_keep_sections(s, DocTarget::CSharpDoc)),
errors: raw_sections
.errors
.as_deref()
.map(|s| sanitize_rust_idioms_keep_sections(s, DocTarget::CSharpDoc)),
panics: raw_sections
.panics
.as_deref()
.map(|s| sanitize_rust_idioms_keep_sections(s, DocTarget::CSharpDoc)),
safety: raw_sections
.safety
.as_deref()
.map(|s| sanitize_rust_idioms_keep_sections(s, DocTarget::CSharpDoc)),
example: None,
};
let any_section = sections.arguments.is_some()
|| sections.returns.is_some()
|| sections.errors.is_some()
|| sections.example.is_some();
if !any_section {
out.push_str(indent);
out.push_str("/// <summary>\n");
for line in sections.summary.lines() {
out.push_str(indent);
out.push_str("/// ");
out.push_str(line);
out.push('\n');
}
out.push_str(indent);
out.push_str("/// </summary>\n");
return;
}
let rendered = render_csharp_xml_sections(§ions, exception_class);
for line in rendered.lines() {
out.push_str(indent);
out.push_str("/// ");
out.push_str(line);
out.push('\n');
}
}
pub fn emit_elixir_doc(out: &mut String, doc: &str) {
if doc.is_empty() {
return;
}
out.push_str("@doc \"\"\"\n");
for line in doc.lines() {
out.push_str(&escape_elixir_doc_line(line));
out.push('\n');
}
out.push_str("\"\"\"\n");
}
pub fn emit_rustdoc(out: &mut String, doc: &str, indent: &str) {
if doc.is_empty() {
return;
}
for line in doc.lines() {
out.push_str(indent);
out.push_str("/// ");
out.push_str(line);
out.push('\n');
}
}
fn escape_elixir_doc_line(s: &str) -> String {
s.replace("\"\"\"", "\"\" \"")
}
pub fn emit_roxygen(out: &mut String, doc: &str) {
if doc.is_empty() {
return;
}
for line in doc.lines() {
out.push_str("#' ");
out.push_str(line);
out.push('\n');
}
}
pub fn emit_swift_doc(out: &mut String, doc: &str, indent: &str) {
if doc.is_empty() {
return;
}
for line in doc.lines() {
out.push_str(indent);
out.push_str("/// ");
out.push_str(line);
out.push('\n');
}
}
pub fn emit_javadoc(out: &mut String, doc: &str, indent: &str) {
if doc.is_empty() {
return;
}
out.push_str(indent);
out.push_str("/**\n");
for line in doc.lines() {
let escaped = escape_javadoc_line(line);
let trimmed = escaped.trim_end();
if trimmed.is_empty() {
out.push_str(indent);
out.push_str(" *\n");
} else {
out.push_str(indent);
out.push_str(" * ");
out.push_str(trimmed);
out.push('\n');
}
}
out.push_str(indent);
out.push_str(" */\n");
}
pub fn emit_kdoc(out: &mut String, doc: &str, indent: &str) {
if doc.is_empty() {
return;
}
out.push_str(indent);
out.push_str("/**\n");
for line in doc.lines() {
let trimmed = line.trim_end();
if trimmed.is_empty() {
out.push_str(indent);
out.push_str(" *\n");
} else {
out.push_str(indent);
out.push_str(" * ");
out.push_str(trimmed);
out.push('\n');
}
}
out.push_str(indent);
out.push_str(" */\n");
}
pub fn emit_kdoc_ktfmt_canonical(out: &mut String, doc: &str, indent: &str) {
const KTFMT_LINE_WIDTH: usize = 100;
if doc.is_empty() {
return;
}
let lines: Vec<&str> = doc.lines().collect();
let is_short_single_paragraph = lines.len() == 1 && !lines[0].contains('\n');
if is_short_single_paragraph {
let trimmed = lines[0].trim();
let single_line_len = indent.len() + 4 + trimmed.len() + 3; if single_line_len <= KTFMT_LINE_WIDTH {
out.push_str(indent);
out.push_str("/** ");
out.push_str(trimmed);
out.push_str(" */\n");
return;
}
}
out.push_str(indent);
out.push_str("/**\n");
for line in lines {
let trimmed = line.trim_end();
if trimmed.is_empty() {
out.push_str(indent);
out.push_str(" *\n");
} else {
out.push_str(indent);
out.push_str(" * ");
out.push_str(trimmed);
out.push('\n');
}
}
out.push_str(indent);
out.push_str(" */\n");
}
pub fn emit_dartdoc(out: &mut String, doc: &str, indent: &str) {
if doc.is_empty() {
return;
}
for line in doc.lines() {
out.push_str(indent);
out.push_str("/// ");
out.push_str(line);
out.push('\n');
}
}
pub fn emit_gleam_doc(out: &mut String, doc: &str, indent: &str) {
if doc.is_empty() {
return;
}
for line in doc.lines() {
out.push_str(indent);
out.push_str("/// ");
out.push_str(line);
out.push('\n');
}
}
pub fn emit_c_doxygen(out: &mut String, doc: &str, indent: &str) {
if doc.trim().is_empty() {
return;
}
let sections = parse_rustdoc_sections(doc);
let any_section = sections.arguments.is_some()
|| sections.returns.is_some()
|| sections.errors.is_some()
|| sections.safety.is_some()
|| sections.example.is_some();
let mut body = if any_section {
render_doxygen_sections_with_notes(§ions)
} else {
sections.summary.clone()
};
body = strip_markdown_links(&body);
let wrapped = word_wrap(&body, DOXYGEN_WRAP_WIDTH);
for line in wrapped.lines() {
out.push_str(indent);
out.push_str("/// ");
out.push_str(line);
out.push('\n');
}
}
const DOXYGEN_WRAP_WIDTH: usize = 100;
fn render_doxygen_sections_with_notes(sections: &RustdocSections) -> String {
let mut out = String::new();
if !sections.summary.is_empty() {
out.push_str(§ions.summary);
}
if let Some(args) = sections.arguments.as_deref() {
for (name, desc) in parse_arguments_bullets(args) {
if !out.is_empty() {
out.push('\n');
}
if desc.is_empty() {
out.push_str("\\param ");
out.push_str(&name);
} else {
out.push_str("\\param ");
out.push_str(&name);
out.push(' ');
out.push_str(&desc);
}
}
}
if let Some(ret) = sections.returns.as_deref() {
if !out.is_empty() {
out.push('\n');
}
out.push_str("\\return ");
out.push_str(ret.trim());
}
if let Some(err) = sections.errors.as_deref() {
if !out.is_empty() {
out.push('\n');
}
out.push_str("\\note ");
out.push_str(err.trim());
}
if let Some(safety) = sections.safety.as_deref() {
if !out.is_empty() {
out.push('\n');
}
out.push_str("\\note SAFETY: ");
out.push_str(safety.trim());
}
if let Some(example) = sections.example.as_deref() {
if !out.is_empty() {
out.push('\n');
}
out.push_str("\\code\n");
for line in example.lines() {
let t = line.trim_start();
if t.starts_with("```") {
continue;
}
out.push_str(line);
out.push('\n');
}
out.push_str("\\endcode");
}
out
}
fn strip_markdown_links(s: &str) -> String {
let mut out = String::with_capacity(s.len());
let bytes = s.as_bytes();
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'[' {
if let Some(close) = bytes[i + 1..].iter().position(|&b| b == b']') {
let text_end = i + 1 + close;
if text_end + 1 < bytes.len() && bytes[text_end + 1] == b'(' {
if let Some(paren_close) = bytes[text_end + 2..].iter().position(|&b| b == b')') {
let url_start = text_end + 2;
let url_end = url_start + paren_close;
let text = &s[i + 1..text_end];
let url = &s[url_start..url_end];
out.push_str(text);
out.push_str(" (");
out.push_str(url);
out.push(')');
i = url_end + 1;
continue;
}
}
}
}
out.push(bytes[i] as char);
i += 1;
}
out
}
fn word_wrap(input: &str, width: usize) -> String {
let mut out = String::with_capacity(input.len());
let mut in_code = false;
for raw in input.lines() {
let trimmed = raw.trim_start();
if trimmed.starts_with("\\code") {
in_code = true;
out.push_str(raw);
out.push('\n');
continue;
}
if trimmed.starts_with("\\endcode") {
in_code = false;
out.push_str(raw);
out.push('\n');
continue;
}
if in_code || trimmed.starts_with("```") {
out.push_str(raw);
out.push('\n');
continue;
}
if raw.len() <= width {
out.push_str(raw);
out.push('\n');
continue;
}
let mut current = String::with_capacity(width);
for word in raw.split_whitespace() {
if current.is_empty() {
current.push_str(word);
} else if current.len() + 1 + word.len() > width {
out.push_str(¤t);
out.push('\n');
current.clear();
current.push_str(word);
} else {
current.push(' ');
current.push_str(word);
}
}
if !current.is_empty() {
out.push_str(¤t);
out.push('\n');
}
}
out.trim_end_matches('\n').to_string()
}
pub fn emit_zig_doc(out: &mut String, doc: &str, indent: &str) {
if doc.is_empty() {
return;
}
for line in doc.lines() {
out.push_str(indent);
out.push_str("/// ");
out.push_str(line);
out.push('\n');
}
}
pub fn emit_yard_doc(out: &mut String, doc: &str, indent: &str) {
if doc.is_empty() {
return;
}
let sections = parse_rustdoc_sections(doc);
let any_section = sections.arguments.is_some()
|| sections.returns.is_some()
|| sections.errors.is_some()
|| sections.example.is_some();
let body = if any_section {
render_yard_sections(§ions)
} else {
doc.to_string()
};
for line in body.lines() {
out.push_str(indent);
out.push_str("# ");
out.push_str(line);
out.push('\n');
}
}
pub fn render_yard_sections(sections: &RustdocSections) -> String {
let mut out = String::new();
if !sections.summary.is_empty() {
out.push_str(§ions.summary);
}
if let Some(args) = sections.arguments.as_deref() {
for (name, desc) in parse_arguments_bullets(args) {
if !out.is_empty() {
out.push('\n');
}
if desc.is_empty() {
out.push_str("@param ");
out.push_str(&name);
} else {
out.push_str("@param ");
out.push_str(&name);
out.push(' ');
out.push_str(&desc);
}
}
}
if let Some(ret) = sections.returns.as_deref() {
if !out.is_empty() {
out.push('\n');
}
out.push_str("@return ");
out.push_str(ret.trim());
}
if let Some(err) = sections.errors.as_deref() {
if !out.is_empty() {
out.push('\n');
}
out.push_str("@raise ");
out.push_str(err.trim());
}
if let Some(example) = sections.example.as_deref() {
if let Some(body) = example_for_target(example, "ruby") {
if !out.is_empty() {
out.push('\n');
}
out.push_str("@example\n");
out.push_str(&body);
}
}
out
}
fn escape_javadoc_line(s: &str) -> String {
let mut result = String::with_capacity(s.len());
let mut chars = s.chars().peekable();
while let Some(ch) = chars.next() {
if ch == '`' {
let mut code = String::new();
for c in chars.by_ref() {
if c == '`' {
break;
}
code.push(c);
}
result.push_str("{@code ");
result.push_str(&escape_javadoc_html_entities(&code));
result.push('}');
} else if ch == '<' {
result.push_str("<");
} else if ch == '>' {
result.push_str(">");
} else if ch == '&' {
result.push_str("&");
} else {
result.push(ch);
}
}
result
}
fn escape_javadoc_html_entities(s: &str) -> String {
let mut out = String::with_capacity(s.len());
for ch in s.chars() {
match ch {
'<' => out.push_str("<"),
'>' => out.push_str(">"),
'&' => out.push_str("&"),
other => out.push(other),
}
}
out
}
#[derive(Debug, Default, Clone, PartialEq, Eq)]
pub struct RustdocSections {
pub summary: String,
pub arguments: Option<String>,
pub returns: Option<String>,
pub errors: Option<String>,
pub panics: Option<String>,
pub safety: Option<String>,
pub example: Option<String>,
}
pub fn parse_rustdoc_sections(doc: &str) -> RustdocSections {
if doc.trim().is_empty() {
return RustdocSections::default();
}
let mut summary = String::new();
let mut arguments: Option<String> = None;
let mut returns: Option<String> = None;
let mut errors: Option<String> = None;
let mut panics: Option<String> = None;
let mut safety: Option<String> = None;
let mut example: Option<String> = None;
let mut current: Option<&'static str> = None;
let mut buf = String::new();
let mut in_fence = false;
let flush = |target: Option<&'static str>,
buf: &mut String,
summary: &mut String,
arguments: &mut Option<String>,
returns: &mut Option<String>,
errors: &mut Option<String>,
panics: &mut Option<String>,
safety: &mut Option<String>,
example: &mut Option<String>| {
let body = std::mem::take(buf).trim().to_string();
if body.is_empty() {
return;
}
match target {
None => {
if !summary.is_empty() {
summary.push('\n');
}
summary.push_str(&body);
}
Some("arguments") => *arguments = Some(body),
Some("returns") => *returns = Some(body),
Some("errors") => *errors = Some(body),
Some("panics") => *panics = Some(body),
Some("safety") => *safety = Some(body),
Some("example") => *example = Some(body),
_ => {}
}
};
for line in doc.lines() {
let trimmed = line.trim_start();
if trimmed.starts_with("```") {
in_fence = !in_fence;
buf.push_str(line);
buf.push('\n');
continue;
}
if !in_fence {
if let Some(rest) = trimmed.strip_prefix("# ") {
let head = rest.trim().to_ascii_lowercase();
let target = match head.as_str() {
"arguments" | "args" => Some("arguments"),
"returns" => Some("returns"),
"errors" => Some("errors"),
"panics" => Some("panics"),
"safety" => Some("safety"),
"example" | "examples" => Some("example"),
_ => None,
};
if target.is_some() {
flush(
current,
&mut buf,
&mut summary,
&mut arguments,
&mut returns,
&mut errors,
&mut panics,
&mut safety,
&mut example,
);
current = target;
continue;
}
}
}
buf.push_str(line);
buf.push('\n');
}
flush(
current,
&mut buf,
&mut summary,
&mut arguments,
&mut returns,
&mut errors,
&mut panics,
&mut safety,
&mut example,
);
RustdocSections {
summary,
arguments,
returns,
errors,
panics,
safety,
example,
}
}
pub fn parse_arguments_bullets(body: &str) -> Vec<(String, String)> {
let mut out: Vec<(String, String)> = Vec::new();
for raw in body.lines() {
let line = raw.trim_end();
let trimmed = line.trim_start();
let is_bullet = trimmed.starts_with("* ") || trimmed.starts_with("- ");
if is_bullet {
let after = &trimmed[2..];
let (name, desc) = if let Some(idx) = after.find(" - ") {
(after[..idx].trim(), after[idx + 3..].trim())
} else if let Some(idx) = after.find(": ") {
(after[..idx].trim(), after[idx + 2..].trim())
} else if let Some(idx) = after.find(' ') {
(after[..idx].trim(), after[idx + 1..].trim())
} else {
(after.trim(), "")
};
let name = name.trim_matches('`').trim_matches('*').to_string();
out.push((name, desc.to_string()));
} else if !trimmed.is_empty() {
if let Some(last) = out.last_mut() {
if !last.1.is_empty() {
last.1.push(' ');
}
last.1.push_str(trimmed);
}
}
}
out
}
fn is_rust_fence_tag(tag: &str) -> bool {
const RUSTDOC_ATTRS: &[&str] = &["no_run", "ignore", "should_panic", "compile_fail"];
tag.is_empty()
|| tag == "rust"
|| tag.starts_with("rust,")
|| RUSTDOC_ATTRS.contains(&tag)
|| tag.starts_with("edition")
}
fn detect_first_fence_lang(body: &str) -> &str {
for line in body.lines() {
let trimmed = line.trim_start();
if let Some(rest) = trimmed.strip_prefix("```") {
let tag = rest.split(',').next().unwrap_or("").trim();
return if tag.is_empty() || is_rust_fence_tag(tag) {
"rust"
} else {
tag
};
}
}
"rust"
}
pub fn example_for_target(example: &str, target_lang: &str) -> Option<String> {
let trimmed = example.trim();
let source_lang = detect_first_fence_lang(trimmed);
if source_lang == "rust" && target_lang != "rust" {
None
} else {
Some(replace_fence_lang(trimmed, target_lang))
}
}
pub fn replace_fence_lang(body: &str, lang_replacement: &str) -> String {
let mut out = String::with_capacity(body.len());
for line in body.lines() {
let trimmed = line.trim_start();
if let Some(rest) = trimmed.strip_prefix("```") {
let indent = &line[..line.len() - trimmed.len()];
let after_lang = rest.find(',').map(|i| &rest[i..]).unwrap_or("");
out.push_str(indent);
out.push_str("```");
out.push_str(lang_replacement);
out.push_str(after_lang);
out.push('\n');
} else {
out.push_str(line);
out.push('\n');
}
}
out.trim_end_matches('\n').to_string()
}
pub fn render_jsdoc_sections(sections: &RustdocSections) -> String {
let mut out = String::new();
if !sections.summary.is_empty() {
out.push_str(§ions.summary);
}
if let Some(args) = sections.arguments.as_deref() {
for (name, desc) in parse_arguments_bullets(args) {
if !out.is_empty() {
out.push('\n');
}
if desc.is_empty() {
out.push_str(&crate::template_env::render(
"doc_jsdoc_param.jinja",
minijinja::context! { name => &name },
));
} else {
out.push_str(&crate::template_env::render(
"doc_jsdoc_param_desc.jinja",
minijinja::context! { name => &name, desc => &desc },
));
}
}
}
if let Some(ret) = sections.returns.as_deref() {
if !out.is_empty() {
out.push('\n');
}
out.push_str(&crate::template_env::render(
"doc_jsdoc_returns.jinja",
minijinja::context! { content => ret.trim() },
));
}
if let Some(err) = sections.errors.as_deref() {
if !out.is_empty() {
out.push('\n');
}
out.push_str(&crate::template_env::render(
"doc_jsdoc_throws.jinja",
minijinja::context! { content => err.trim() },
));
}
if let Some(example) = sections.example.as_deref() {
if let Some(body) = example_for_target(example, "typescript") {
if !out.is_empty() {
out.push('\n');
}
out.push_str("@example\n");
out.push_str(&body);
}
}
out
}
pub fn render_javadoc_sections(sections: &RustdocSections, throws_class: &str) -> String {
let mut out = String::new();
if !sections.summary.is_empty() {
out.push_str(§ions.summary);
}
if let Some(args) = sections.arguments.as_deref() {
for (name, desc) in parse_arguments_bullets(args) {
if !out.is_empty() {
out.push('\n');
}
if desc.is_empty() {
out.push_str(&crate::template_env::render(
"doc_javadoc_param.jinja",
minijinja::context! { name => &name },
));
} else {
out.push_str(&crate::template_env::render(
"doc_javadoc_param_desc.jinja",
minijinja::context! { name => &name, desc => &desc },
));
}
}
}
if let Some(ret) = sections.returns.as_deref() {
if !out.is_empty() {
out.push('\n');
}
out.push_str(&crate::template_env::render(
"doc_javadoc_return.jinja",
minijinja::context! { content => ret.trim() },
));
}
if let Some(err) = sections.errors.as_deref() {
if !out.is_empty() {
out.push('\n');
}
out.push_str(&crate::template_env::render(
"doc_javadoc_throws.jinja",
minijinja::context! { throws_class => throws_class, content => err.trim() },
));
}
out
}
pub fn render_csharp_xml_sections(sections: &RustdocSections, exception_class: &str) -> String {
let mut out = String::new();
out.push_str("<summary>\n");
let summary = if sections.summary.is_empty() {
""
} else {
sections.summary.as_str()
};
for line in summary.lines() {
out.push_str(line);
out.push('\n');
}
out.push_str("</summary>");
if let Some(args) = sections.arguments.as_deref() {
for (name, desc) in parse_arguments_bullets(args) {
out.push('\n');
if desc.is_empty() {
out.push_str(&crate::template_env::render(
"doc_csharp_param.jinja",
minijinja::context! { name => &name },
));
} else {
out.push_str(&crate::template_env::render(
"doc_csharp_param_desc.jinja",
minijinja::context! { name => &name, desc => &desc },
));
}
}
}
if let Some(ret) = sections.returns.as_deref() {
out.push('\n');
out.push_str(&crate::template_env::render(
"doc_csharp_returns.jinja",
minijinja::context! { content => ret.trim() },
));
}
if let Some(err) = sections.errors.as_deref() {
out.push('\n');
out.push_str(&crate::template_env::render(
"doc_csharp_exception.jinja",
minijinja::context! {
exception_class => exception_class,
content => err.trim(),
},
));
}
if let Some(example) = sections.example.as_deref() {
out.push('\n');
out.push_str("<example><code language=\"csharp\">\n");
for line in example.lines() {
let t = line.trim_start();
if t.starts_with("```") {
continue;
}
out.push_str(line);
out.push('\n');
}
out.push_str("</code></example>");
}
out
}
pub fn render_phpdoc_sections(sections: &RustdocSections, throws_class: &str) -> String {
let mut out = String::new();
if !sections.summary.is_empty() {
out.push_str(§ions.summary);
}
if let Some(args) = sections.arguments.as_deref() {
for (name, desc) in parse_arguments_bullets(args) {
if !out.is_empty() {
out.push('\n');
}
if desc.is_empty() {
out.push_str(&crate::template_env::render(
"doc_phpdoc_param.jinja",
minijinja::context! { name => &name },
));
} else {
out.push_str(&crate::template_env::render(
"doc_phpdoc_param_desc.jinja",
minijinja::context! { name => &name, desc => &desc },
));
}
}
}
if let Some(ret) = sections.returns.as_deref() {
if !out.is_empty() {
out.push('\n');
}
out.push_str(&crate::template_env::render(
"doc_phpdoc_return.jinja",
minijinja::context! { content => ret.trim() },
));
}
if let Some(err) = sections.errors.as_deref() {
if !out.is_empty() {
out.push('\n');
}
out.push_str(&crate::template_env::render(
"doc_phpdoc_throws.jinja",
minijinja::context! { throws_class => throws_class, content => err.trim() },
));
}
if let Some(example) = sections.example.as_deref() {
if let Some(body) = example_for_target(example, "php") {
if !out.is_empty() {
out.push('\n');
}
out.push_str(&body);
}
}
out
}
pub fn render_doxygen_sections(sections: &RustdocSections) -> String {
let mut out = String::new();
if !sections.summary.is_empty() {
out.push_str(§ions.summary);
}
if let Some(args) = sections.arguments.as_deref() {
for (name, desc) in parse_arguments_bullets(args) {
if !out.is_empty() {
out.push('\n');
}
if desc.is_empty() {
out.push_str(&crate::template_env::render(
"doc_doxygen_param.jinja",
minijinja::context! { name => &name },
));
} else {
out.push_str(&crate::template_env::render(
"doc_doxygen_param_desc.jinja",
minijinja::context! { name => &name, desc => &desc },
));
}
}
}
if let Some(ret) = sections.returns.as_deref() {
if !out.is_empty() {
out.push('\n');
}
out.push_str(&crate::template_env::render(
"doc_doxygen_return.jinja",
minijinja::context! { content => ret.trim() },
));
}
if let Some(err) = sections.errors.as_deref() {
if !out.is_empty() {
out.push('\n');
}
out.push_str(&crate::template_env::render(
"doc_doxygen_errors.jinja",
minijinja::context! { content => err.trim() },
));
}
if let Some(example) = sections.example.as_deref() {
if !out.is_empty() {
out.push('\n');
}
out.push_str("\\code\n");
for line in example.lines() {
let t = line.trim_start();
if t.starts_with("```") {
continue;
}
out.push_str(line);
out.push('\n');
}
out.push_str("\\endcode");
}
out
}
pub fn doc_first_paragraph_joined(doc: &str) -> String {
doc.lines()
.take_while(|l| !l.trim().is_empty())
.map(str::trim)
.collect::<Vec<_>>()
.join(" ")
}
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum DocTarget {
PhpDoc,
JavaDoc,
TsDoc,
JsDoc,
CSharpDoc,
}
pub fn sanitize_rust_idioms(text: &str, target: DocTarget) -> String {
sanitize_rust_idioms_inner(text, target, true)
}
pub fn sanitize_rust_idioms_keep_sections(text: &str, target: DocTarget) -> String {
sanitize_rust_idioms_inner(text, target, false)
}
fn sanitize_rust_idioms_inner(text: &str, target: DocTarget, drop_csharp_sections: bool) -> String {
let mut out = String::with_capacity(text.len());
let mut in_rust_fence = false;
let mut in_other_fence = false;
let mut csharp_section_dropped = false;
for line in text.lines() {
if csharp_section_dropped {
continue;
}
let trimmed = line.trim_start();
if drop_csharp_sections
&& matches!(target, DocTarget::CSharpDoc)
&& !in_rust_fence
&& !in_other_fence
&& is_rustdoc_section_heading(trimmed)
{
csharp_section_dropped = true;
continue;
}
if let Some(rest) = trimmed.strip_prefix("```") {
if in_rust_fence {
in_rust_fence = false;
match target {
DocTarget::TsDoc
| DocTarget::JsDoc
| DocTarget::CSharpDoc
| DocTarget::PhpDoc
| DocTarget::JavaDoc => {
}
}
continue;
}
if in_other_fence {
in_other_fence = false;
out.push_str(line);
out.push('\n');
continue;
}
let lang = rest.split(',').next().unwrap_or("").trim();
let is_rust = is_rust_fence_tag(lang);
if is_rust {
in_rust_fence = true;
match target {
DocTarget::TsDoc
| DocTarget::JsDoc
| DocTarget::CSharpDoc
| DocTarget::PhpDoc
| DocTarget::JavaDoc => {
}
}
continue;
}
in_other_fence = true;
out.push_str(line);
out.push('\n');
continue;
}
if in_rust_fence {
match target {
DocTarget::TsDoc | DocTarget::JsDoc | DocTarget::CSharpDoc | DocTarget::PhpDoc | DocTarget::JavaDoc => {
}
}
continue;
}
if in_other_fence {
out.push_str(line);
out.push('\n');
continue;
}
let stripped_indent = line.trim_start();
if stripped_indent.starts_with("#[") && stripped_indent.ends_with(']') {
continue;
}
let sanitized = apply_prose_transforms(line, target);
out.push_str(&sanitized);
out.push('\n');
}
if out.ends_with('\n') && !text.ends_with('\n') {
out.pop();
}
if matches!(target, DocTarget::CSharpDoc) {
out = xml_escape_for_csharp(&out);
}
out
}
fn is_rustdoc_section_heading(trimmed: &str) -> bool {
let Some(rest) = trimmed.strip_prefix("# ") else {
return false;
};
let head = rest.trim().to_ascii_lowercase();
matches!(
head.as_str(),
"arguments" | "args" | "returns" | "errors" | "panics" | "safety" | "example" | "examples"
)
}
fn xml_escape_for_csharp(s: &str) -> String {
let mut out = String::with_capacity(s.len());
for ch in s.chars() {
match ch {
'&' => out.push_str("&"),
'<' => out.push_str("<"),
'>' => out.push_str(">"),
_ => out.push(ch),
}
}
out
}
fn apply_prose_transforms(line: &str, target: DocTarget) -> String {
let line = replace_intradoc_links(line, target);
let line = replace_path_separator(&line);
let line = strip_unwrap_expect(&line);
let segments = tokenize_backtick_spans(&line);
let mut result = String::with_capacity(line.len());
for (is_code, span) in segments {
if is_code {
result.push('`');
result.push_str(span);
result.push('`');
} else {
result.push_str(&transform_prose_segment(span, target));
}
}
result
}
fn tokenize_backtick_spans(line: &str) -> Vec<(bool, &str)> {
let mut segments = Vec::new();
let bytes = line.as_bytes();
let mut start = 0;
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'`' {
if i > start {
segments.push((false, &line[start..i]));
}
let code_start = i + 1;
let close = bytes[code_start..].iter().position(|&b| b == b'`');
if let Some(offset) = close {
let code_end = code_start + offset;
segments.push((true, &line[code_start..code_end]));
i = code_end + 1;
start = i;
} else {
segments.push((false, &line[i..]));
start = line.len();
i = line.len();
}
} else {
i += 1;
}
}
if start < line.len() {
segments.push((false, &line[start..]));
}
segments
}
fn transform_prose_segment(text: &str, target: DocTarget) -> String {
let mut s = text.to_string();
s = strip_inline_attributes(&s);
s = s.replace("pub fn ", "");
s = s.replace("crate::", "");
s = s.replace("&mut self", "");
s = s.replace("&self", "");
s = strip_lifetime_and_bounds(&s);
s = replace_type_wrappers(&s, target);
s = replace_some_calls(&s);
s = replace_some_keyword_in_prose(&s);
s = replace_none_keyword(&s, target);
s
}
#[inline]
fn advance_char(s: &str, out: &mut String, i: usize) -> usize {
let ch = s[i..].chars().next().expect("valid UTF-8 position");
out.push(ch);
i + ch.len_utf8()
}
fn replace_intradoc_links(s: &str, _target: DocTarget) -> String {
let mut out = String::with_capacity(s.len());
let bytes = s.as_bytes();
let mut i = 0;
while i < bytes.len() {
if i + 1 < bytes.len() && bytes[i] == b'[' && bytes[i + 1] == b'`' {
let search_start = i + 2;
let mut found = false;
let mut j = search_start;
while j + 1 < bytes.len() {
if bytes[j] == b'`' && bytes[j + 1] == b']' {
let inner = &s[search_start..j];
let converted = inner.replace("::", ".");
out.push('`');
out.push_str(&converted);
out.push('`');
i = j + 2;
found = true;
break;
}
j += 1;
}
if !found {
i = advance_char(s, &mut out, i);
}
} else {
i = advance_char(s, &mut out, i);
}
}
out
}
fn strip_inline_attributes(s: &str) -> String {
let mut out = String::with_capacity(s.len());
let bytes = s.as_bytes();
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'#' && i + 1 < bytes.len() && bytes[i + 1] == b'[' {
let mut depth = 0usize;
let mut j = i + 1;
while j < bytes.len() {
if bytes[j] == b'[' {
depth += 1;
} else if bytes[j] == b']' {
depth -= 1;
if depth == 0 {
i = j + 1;
break;
}
}
j += 1;
}
if depth != 0 {
i = advance_char(s, &mut out, i);
}
} else {
i = advance_char(s, &mut out, i);
}
}
out
}
fn strip_lifetime_and_bounds(s: &str) -> String {
let mut out = s.to_string();
out = regex_replace_all(&out, r"Send\s*\+\s*Sync", "");
out = regex_replace_all(&out, r"Sync\s*\+\s*Send", "");
out = regex_replace_word_boundary(&out, "Send", "");
out = regex_replace_word_boundary(&out, "Sync", "");
out = regex_replace_all(&out, r"'\s*static\b", "");
out
}
fn regex_replace_all(s: &str, pattern: &str, replacement: &str) -> String {
match pattern {
r"Send\s*\+\s*Sync" => replace_with_optional_spaces(s, "Send", "+", "Sync", replacement),
r"Sync\s*\+\s*Send" => replace_with_optional_spaces(s, "Sync", "+", "Send", replacement),
r"'\s*static\b" => replace_static_lifetime(s, replacement),
_ => s.replace(pattern, replacement),
}
}
fn regex_replace_word_boundary(s: &str, keyword: &str, replacement: &str) -> String {
let mut out = String::with_capacity(s.len());
let klen = keyword.len();
let bytes = s.as_bytes();
let kbytes = keyword.as_bytes();
if klen == 0 || klen > bytes.len() {
return s.to_string();
}
let mut i = 0;
while i + klen <= bytes.len() {
if &bytes[i..i + klen] == kbytes {
let before_ok = i == 0 || !bytes[i - 1].is_ascii_alphanumeric() && bytes[i - 1] != b'_';
let after_ok =
i + klen >= bytes.len() || !bytes[i + klen].is_ascii_alphanumeric() && bytes[i + klen] != b'_';
if before_ok && after_ok {
out.push_str(replacement);
i += klen;
continue;
}
}
i = advance_char(s, &mut out, i);
}
if i < bytes.len() {
out.push_str(&s[i..]);
}
out
}
fn replace_with_optional_spaces(s: &str, a: &str, op: &str, b: &str, replacement: &str) -> String {
let mut out = String::with_capacity(s.len());
let mut i = 0;
let chars: Vec<char> = s.chars().collect();
let total = chars.len();
while i < total {
let a_chars: Vec<char> = a.chars().collect();
let b_chars: Vec<char> = b.chars().collect();
let op_chars: Vec<char> = op.chars().collect();
if chars[i..].starts_with(&a_chars) {
let mut j = i + a_chars.len();
while j < total && chars[j] == ' ' {
j += 1;
}
if j + op_chars.len() <= total && chars[j..].starts_with(&op_chars) {
let mut k = j + op_chars.len();
while k < total && chars[k] == ' ' {
k += 1;
}
if k + b_chars.len() <= total && chars[k..].starts_with(&b_chars) {
out.push_str(replacement);
i = k + b_chars.len();
continue;
}
}
}
out.push(chars[i]);
i += 1;
}
out
}
fn replace_static_lifetime(s: &str, replacement: &str) -> String {
let mut out = String::with_capacity(s.len());
let bytes = s.as_bytes();
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'\'' {
let mut j = i + 1;
while j < bytes.len() && bytes[j] == b' ' {
j += 1;
}
let keyword = b"static";
if bytes[j..].starts_with(keyword) {
let end = j + keyword.len();
let after_ok = end >= bytes.len() || !bytes[end].is_ascii_alphanumeric() && bytes[end] != b'_';
if after_ok {
out.push_str(replacement);
i = end;
continue;
}
}
}
i = advance_char(s, &mut out, i);
}
out
}
fn replace_type_wrappers(s: &str, target: DocTarget) -> String {
let mut out = s.to_string();
let vec_u8_replacement = match target {
DocTarget::PhpDoc => "string",
DocTarget::JavaDoc => "byte[]",
DocTarget::TsDoc | DocTarget::JsDoc => "Uint8Array",
DocTarget::CSharpDoc => "byte[]",
};
out = replace_generic1(&out, "Vec", "u8", vec_u8_replacement);
let map_replacement_fn = |k: &str, v: &str| match target {
DocTarget::PhpDoc => format!("array<{k}, {v}>"),
DocTarget::JavaDoc => format!("Map<{k}, {v}>"),
DocTarget::TsDoc | DocTarget::JsDoc => format!("Record<{k}, {v}>"),
DocTarget::CSharpDoc => format!("Dictionary<{k}, {v}>"),
};
out = replace_generic2(&out, "HashMap", &map_replacement_fn);
out = replace_generic1_passthrough(&out, "Vec", |inner| format!("{inner}[]"));
let option_replacement_fn = |inner: &str| match target {
DocTarget::PhpDoc => format!("{inner}?"),
DocTarget::JavaDoc => format!("{inner} | null"),
DocTarget::TsDoc | DocTarget::JsDoc => format!("{inner} | undefined"),
DocTarget::CSharpDoc => format!("{inner}?"),
};
out = replace_generic1_passthrough(&out, "Option", option_replacement_fn);
if matches!(target, DocTarget::CSharpDoc) {
out = replace_generic2(&out, "Result", &|t: &str, _e: &str| t.to_string());
}
for wrapper in &["Arc", "Box", "Mutex", "RwLock", "Rc", "Cell", "RefCell"] {
out = replace_generic1_passthrough(&out, wrapper, |inner| inner.to_string());
}
out
}
fn replace_generic1(s: &str, name: &str, arg: &str, replacement: &str) -> String {
let pattern = format!("{name}<{arg}>");
s.replace(&pattern, replacement)
}
fn replace_generic1_passthrough<F>(s: &str, name: &str, f: F) -> String
where
F: Fn(&str) -> String,
{
let mut out = String::with_capacity(s.len());
let mut i = 0;
let prefix = format!("{name}<");
let pbytes = prefix.as_bytes();
let bytes = s.as_bytes();
while i < bytes.len() {
if bytes[i..].starts_with(pbytes) {
let before_ok = i == 0 || !bytes[i - 1].is_ascii_alphanumeric() && bytes[i - 1] != b'_';
if before_ok {
let inner_start = i + pbytes.len();
let mut depth = 1usize;
let mut j = inner_start;
while j < bytes.len() {
match bytes[j] {
b'<' => depth += 1,
b'>' => {
depth -= 1;
if depth == 0 {
break;
}
}
_ => {}
}
j += 1;
}
if depth == 0 && j < bytes.len() {
let inner = &s[inner_start..j];
out.push_str(&f(inner));
i = j + 1;
continue;
}
}
}
i = advance_char(s, &mut out, i);
}
out
}
fn replace_generic2<F>(s: &str, name: &str, f: &F) -> String
where
F: Fn(&str, &str) -> String,
{
let mut out = String::with_capacity(s.len());
let mut i = 0;
let prefix = format!("{name}<");
let pbytes = prefix.as_bytes();
let bytes = s.as_bytes();
while i < bytes.len() {
if bytes[i..].starts_with(pbytes) {
let before_ok = i == 0 || !bytes[i - 1].is_ascii_alphanumeric() && bytes[i - 1] != b'_';
if before_ok {
let inner_start = i + pbytes.len();
let mut depth = 1usize;
let mut j = inner_start;
while j < bytes.len() {
match bytes[j] {
b'<' => depth += 1,
b'>' => {
depth -= 1;
if depth == 0 {
break;
}
}
_ => {}
}
j += 1;
}
if depth == 0 && j < bytes.len() {
let inner = &s[inner_start..j];
let split = split_on_comma_at_top_level(inner);
if let Some((k, v)) = split {
out.push_str(&f(k.trim(), v.trim()));
i = j + 1;
continue;
}
}
}
}
i = advance_char(s, &mut out, i);
}
out
}
fn split_on_comma_at_top_level(s: &str) -> Option<(&str, &str)> {
let mut depth = 0i32;
for (idx, ch) in s.char_indices() {
match ch {
'<' => depth += 1,
'>' => depth -= 1,
',' if depth == 0 => return Some((&s[..idx], &s[idx + 1..])),
_ => {}
}
}
None
}
fn replace_some_calls(s: &str) -> String {
let mut out = String::with_capacity(s.len());
let bytes = s.as_bytes();
let prefix = b"Some(";
let mut i = 0;
while i < bytes.len() {
if bytes[i..].starts_with(prefix) {
let before_ok = i == 0 || !bytes[i - 1].is_ascii_alphanumeric() && bytes[i - 1] != b'_';
if before_ok {
let arg_start = i + prefix.len();
let mut depth = 1usize;
let mut j = arg_start;
while j < bytes.len() {
match bytes[j] {
b'(' => depth += 1,
b')' => {
depth -= 1;
if depth == 0 {
break;
}
}
_ => {}
}
j += 1;
}
if depth == 0 && j < bytes.len() {
let arg = &s[arg_start..j];
out.push_str("the value (");
out.push_str(arg);
out.push(')');
i = j + 1;
continue;
}
}
}
i = advance_char(s, &mut out, i);
}
out
}
fn replace_some_keyword_in_prose(s: &str) -> String {
let keyword = b"Some ";
let klen = keyword.len();
let bytes = s.as_bytes();
if klen >= bytes.len() {
return s.to_string();
}
let mut out = String::with_capacity(s.len());
let mut i = 0;
while i + klen < bytes.len() {
if &bytes[i..i + klen] == keyword {
let before_ok = i == 0 || !bytes[i - 1].is_ascii_alphanumeric() && bytes[i - 1] != b'_';
let after_ok = bytes[i + klen].is_ascii_lowercase();
if before_ok && after_ok {
i += klen;
continue;
}
}
i = advance_char(s, &mut out, i);
}
if i < bytes.len() {
out.push_str(&s[i..]);
}
out
}
fn replace_none_keyword(s: &str, target: DocTarget) -> String {
let replacement = match target {
DocTarget::PhpDoc | DocTarget::JavaDoc | DocTarget::CSharpDoc => "null",
DocTarget::TsDoc | DocTarget::JsDoc => "undefined",
};
let keyword = b"None";
let klen = keyword.len();
let mut out = String::with_capacity(s.len());
let bytes = s.as_bytes();
if klen > bytes.len() {
return s.to_string();
}
let mut i = 0;
while i + klen <= bytes.len() {
if &bytes[i..i + klen] == keyword {
let before_ok = i == 0 || !bytes[i - 1].is_ascii_alphanumeric() && bytes[i - 1] != b'_';
let after_ok =
i + klen >= bytes.len() || !bytes[i + klen].is_ascii_alphanumeric() && bytes[i + klen] != b'_';
if before_ok && after_ok {
out.push_str(replacement);
i += klen;
continue;
}
}
i = advance_char(s, &mut out, i);
}
if i < bytes.len() {
out.push_str(&s[i..]);
}
out
}
fn replace_path_separator(s: &str) -> String {
let mut out = String::with_capacity(s.len());
let bytes = s.as_bytes();
let mut i = 0;
while i < bytes.len() {
if i + 1 < bytes.len() && bytes[i] == b':' && bytes[i + 1] == b':' {
let before_ok = i > 0 && (bytes[i - 1].is_ascii_alphanumeric() || bytes[i - 1] == b'_');
let after_ok = i + 2 < bytes.len() && (bytes[i + 2].is_ascii_alphanumeric() || bytes[i + 2] == b'_');
if before_ok || after_ok {
out.push('.');
i += 2;
continue;
}
}
i = advance_char(s, &mut out, i);
}
out
}
fn strip_unwrap_expect(s: &str) -> String {
let mut out = String::with_capacity(s.len());
let bytes = s.as_bytes();
let mut i = 0;
while i < bytes.len() {
if bytes[i..].starts_with(b".unwrap()") {
i += b".unwrap()".len();
continue;
}
if bytes[i..].starts_with(b".expect(") {
let arg_start = i + b".expect(".len();
let mut depth = 1usize;
let mut j = arg_start;
while j < bytes.len() {
match bytes[j] {
b'(' => depth += 1,
b')' => {
depth -= 1;
if depth == 0 {
break;
}
}
_ => {}
}
j += 1;
}
if depth == 0 {
i = j + 1;
continue;
}
}
i = advance_char(s, &mut out, i);
}
out
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_emit_phpdoc() {
let mut out = String::new();
emit_phpdoc(&mut out, "Simple documentation", " ", "TestException");
assert!(out.contains("/**"));
assert!(out.contains("Simple documentation"));
assert!(out.contains("*/"));
}
#[test]
fn test_phpdoc_escaping() {
let mut out = String::new();
emit_phpdoc(&mut out, "Handle */ sequences", "", "TestException");
assert!(out.contains("Handle * / sequences"));
}
#[test]
fn test_emit_csharp_doc() {
let mut out = String::new();
emit_csharp_doc(&mut out, "C# documentation", " ", "TestException");
assert!(out.contains("<summary>"));
assert!(out.contains("C# documentation"));
assert!(out.contains("</summary>"));
}
#[test]
fn test_csharp_xml_escaping() {
let mut out = String::new();
emit_csharp_doc(&mut out, "foo < bar & baz > qux", "", "TestException");
assert!(out.contains("foo < bar & baz > qux"));
}
#[test]
fn test_emit_elixir_doc() {
let mut out = String::new();
emit_elixir_doc(&mut out, "Elixir documentation");
assert!(out.contains("@doc \"\"\""));
assert!(out.contains("Elixir documentation"));
assert!(out.contains("\"\"\""));
}
#[test]
fn test_elixir_heredoc_escaping() {
let mut out = String::new();
emit_elixir_doc(&mut out, "Handle \"\"\" sequences");
assert!(out.contains("Handle \"\" \" sequences"));
}
#[test]
fn test_emit_roxygen() {
let mut out = String::new();
emit_roxygen(&mut out, "R documentation");
assert!(out.contains("#' R documentation"));
}
#[test]
fn test_emit_swift_doc() {
let mut out = String::new();
emit_swift_doc(&mut out, "Swift documentation", " ");
assert!(out.contains("/// Swift documentation"));
}
#[test]
fn test_emit_javadoc() {
let mut out = String::new();
emit_javadoc(&mut out, "Java documentation", " ");
assert!(out.contains("/**"));
assert!(out.contains("Java documentation"));
assert!(out.contains("*/"));
}
#[test]
fn test_emit_kdoc() {
let mut out = String::new();
emit_kdoc(&mut out, "Kotlin documentation", " ");
assert!(out.contains("/**"));
assert!(out.contains("Kotlin documentation"));
assert!(out.contains("*/"));
}
#[test]
fn test_emit_dartdoc() {
let mut out = String::new();
emit_dartdoc(&mut out, "Dart documentation", " ");
assert!(out.contains("/// Dart documentation"));
}
#[test]
fn test_emit_gleam_doc() {
let mut out = String::new();
emit_gleam_doc(&mut out, "Gleam documentation", " ");
assert!(out.contains("/// Gleam documentation"));
}
#[test]
fn test_emit_zig_doc() {
let mut out = String::new();
emit_zig_doc(&mut out, "Zig documentation", " ");
assert!(out.contains("/// Zig documentation"));
}
#[test]
fn test_empty_doc_skipped() {
let mut out = String::new();
emit_phpdoc(&mut out, "", "", "TestException");
emit_csharp_doc(&mut out, "", "", "TestException");
emit_elixir_doc(&mut out, "");
emit_roxygen(&mut out, "");
emit_kdoc(&mut out, "", "");
emit_dartdoc(&mut out, "", "");
emit_gleam_doc(&mut out, "", "");
emit_zig_doc(&mut out, "", "");
assert!(out.is_empty());
}
#[test]
fn test_doc_first_paragraph_joined_single_line() {
assert_eq!(doc_first_paragraph_joined("Simple doc."), "Simple doc.");
}
#[test]
fn test_doc_first_paragraph_joined_wrapped_sentence() {
let doc = "Convert HTML to Markdown,\nreturning a result.";
assert_eq!(
doc_first_paragraph_joined(doc),
"Convert HTML to Markdown, returning a result."
);
}
#[test]
fn test_doc_first_paragraph_joined_stops_at_blank_line() {
let doc = "First paragraph.\nStill first.\n\nSecond paragraph.";
assert_eq!(doc_first_paragraph_joined(doc), "First paragraph. Still first.");
}
#[test]
fn test_doc_first_paragraph_joined_empty() {
assert_eq!(doc_first_paragraph_joined(""), "");
}
#[test]
fn test_parse_rustdoc_sections_basic() {
let doc = "Extracts text from a file.\n\n# Arguments\n\n* `path` - The file path.\n\n# Returns\n\nThe extracted text.\n\n# Errors\n\nReturns `KreuzbergError` on failure.";
let sections = parse_rustdoc_sections(doc);
assert_eq!(sections.summary, "Extracts text from a file.");
assert_eq!(sections.arguments.as_deref(), Some("* `path` - The file path."));
assert_eq!(sections.returns.as_deref(), Some("The extracted text."));
assert_eq!(sections.errors.as_deref(), Some("Returns `KreuzbergError` on failure."));
assert!(sections.panics.is_none());
}
#[test]
fn test_parse_rustdoc_sections_example_with_fence() {
let doc = "Run the thing.\n\n# Example\n\n```rust\nlet x = run();\n```";
let sections = parse_rustdoc_sections(doc);
assert_eq!(sections.summary, "Run the thing.");
assert!(sections.example.as_ref().unwrap().contains("```rust"));
assert!(sections.example.as_ref().unwrap().contains("let x = run();"));
}
#[test]
fn test_parse_rustdoc_sections_pound_inside_fence_is_not_a_heading() {
let doc = "Summary.\n\n# Example\n\n```bash\n# install deps\nrun --foo\n```";
let sections = parse_rustdoc_sections(doc);
assert_eq!(sections.summary, "Summary.");
assert!(sections.example.as_ref().unwrap().contains("# install deps"));
}
#[test]
fn test_parse_arguments_bullets_dash_separator() {
let body = "* `path` - The file path.\n* `config` - Optional configuration.";
let pairs = parse_arguments_bullets(body);
assert_eq!(pairs.len(), 2);
assert_eq!(pairs[0], ("path".to_string(), "The file path.".to_string()));
assert_eq!(pairs[1], ("config".to_string(), "Optional configuration.".to_string()));
}
#[test]
fn test_parse_arguments_bullets_continuation_line() {
let body = "* `path` - The file path,\n resolved relative to cwd.\n* `mode` - Open mode.";
let pairs = parse_arguments_bullets(body);
assert_eq!(pairs.len(), 2);
assert_eq!(pairs[0].1, "The file path, resolved relative to cwd.");
}
#[test]
fn test_replace_fence_lang_rust_to_typescript() {
let body = "```rust\nlet x = run();\n```";
let out = replace_fence_lang(body, "typescript");
assert!(out.starts_with("```typescript"));
assert!(out.contains("let x = run();"));
}
#[test]
fn test_replace_fence_lang_preserves_attrs() {
let body = "```rust,no_run\nlet x = run();\n```";
let out = replace_fence_lang(body, "typescript");
assert!(out.starts_with("```typescript,no_run"));
}
#[test]
fn test_replace_fence_lang_no_fence_unchanged() {
let body = "Plain prose with `inline code`.";
let out = replace_fence_lang(body, "typescript");
assert_eq!(out, "Plain prose with `inline code`.");
}
fn fixture_sections() -> RustdocSections {
let doc = "Extracts text from a file.\n\n# Arguments\n\n* `path` - The file path.\n* `config` - Optional configuration.\n\n# Returns\n\nThe extracted text and metadata.\n\n# Errors\n\nReturns an error when the file is unreadable.\n\n# Example\n\n```rust\nlet result = extract(\"file.pdf\")?;\n```";
parse_rustdoc_sections(doc)
}
#[test]
fn test_render_jsdoc_sections() {
let sections = fixture_sections();
let out = render_jsdoc_sections(§ions);
assert!(out.starts_with("Extracts text from a file."));
assert!(out.contains("@param path - The file path."));
assert!(out.contains("@param config - Optional configuration."));
assert!(out.contains("@returns The extracted text and metadata."));
assert!(out.contains("@throws Returns an error when the file is unreadable."));
assert!(!out.contains("@example"), "Rust example must not appear in TSDoc");
assert!(!out.contains("```typescript"));
assert!(!out.contains("```rust"));
}
#[test]
fn test_render_jsdoc_sections_preserves_typescript_example() {
let doc = "Do something.\n\n# Example\n\n```typescript\nconst x = doSomething();\n```";
let sections = parse_rustdoc_sections(doc);
let out = render_jsdoc_sections(§ions);
assert!(out.contains("@example"), "TypeScript example must be preserved");
assert!(out.contains("```typescript"));
}
#[test]
fn test_render_javadoc_sections() {
let sections = fixture_sections();
let out = render_javadoc_sections(§ions, "KreuzbergRsException");
assert!(out.contains("@param path The file path."));
assert!(out.contains("@return The extracted text and metadata."));
assert!(out.contains("@throws KreuzbergRsException Returns an error when the file is unreadable."));
assert!(out.starts_with("Extracts text from a file."));
}
#[test]
fn test_render_csharp_xml_sections() {
let sections = fixture_sections();
let out = render_csharp_xml_sections(§ions, "KreuzbergException");
assert!(out.contains("<summary>\nExtracts text from a file.\n</summary>"));
assert!(out.contains("<param name=\"path\">The file path.</param>"));
assert!(out.contains("<returns>The extracted text and metadata.</returns>"));
assert!(out.contains("<exception cref=\"KreuzbergException\">"));
assert!(out.contains("<example><code language=\"csharp\">"));
assert!(out.contains("let result = extract"));
}
#[test]
fn test_render_phpdoc_sections() {
let sections = fixture_sections();
let out = render_phpdoc_sections(§ions, "KreuzbergException");
assert!(out.contains("@param mixed $path The file path."));
assert!(out.contains("@return The extracted text and metadata."));
assert!(out.contains("@throws KreuzbergException"));
assert!(!out.contains("```php"), "Rust example must not appear in PHPDoc");
assert!(!out.contains("```rust"));
}
#[test]
fn test_render_phpdoc_sections_preserves_php_example() {
let doc = "Do something.\n\n# Example\n\n```php\n$x = doSomething();\n```";
let sections = parse_rustdoc_sections(doc);
let out = render_phpdoc_sections(§ions, "MyException");
assert!(out.contains("```php"), "PHP example must be preserved");
}
#[test]
fn test_render_doxygen_sections() {
let sections = fixture_sections();
let out = render_doxygen_sections(§ions);
assert!(out.contains("\\param path The file path."));
assert!(out.contains("\\return The extracted text and metadata."));
assert!(out.contains("\\code"));
assert!(out.contains("\\endcode"));
}
#[test]
fn test_emit_yard_doc_simple() {
let mut out = String::new();
emit_yard_doc(&mut out, "Simple Ruby documentation", " ");
assert!(out.contains("# Simple Ruby documentation"));
}
#[test]
fn test_emit_yard_doc_empty() {
let mut out = String::new();
emit_yard_doc(&mut out, "", " ");
assert!(out.is_empty());
}
#[test]
fn test_emit_yard_doc_with_sections() {
let mut out = String::new();
let doc = "Extracts text from a file.\n\n# Arguments\n\n* `path` - The file path.\n\n# Returns\n\nThe extracted text.\n\n# Errors\n\nReturns error on failure.";
emit_yard_doc(&mut out, doc, " ");
assert!(out.contains("# Extracts text from a file."));
assert!(out.contains("# @param path The file path."));
assert!(out.contains("# @return The extracted text."));
assert!(out.contains("# @raise Returns error on failure."));
}
#[test]
fn test_emit_c_doxygen_simple_prose() {
let mut out = String::new();
emit_c_doxygen(&mut out, "Free a string.", "");
assert!(out.contains("/// Free a string."), "got: {out}");
}
#[test]
fn test_emit_c_doxygen_with_sections() {
let mut out = String::new();
let doc = "Extract content from a file.\n\n# Arguments\n\n* `path` - Path to the file.\n* `mode` - Read mode.\n\n# Returns\n\nA newly allocated string the caller owns.\n\n# Errors\n\nReturns null when the file is unreadable.";
emit_c_doxygen(&mut out, doc, "");
assert!(out.contains("/// Extract content from a file."));
assert!(out.contains("/// \\param path Path to the file."));
assert!(out.contains("/// \\param mode Read mode."));
assert!(out.contains("/// \\return A newly allocated string the caller owns."));
assert!(out.contains("/// \\note Returns null when the file is unreadable."));
}
#[test]
fn test_emit_c_doxygen_safety_section_maps_to_note() {
let mut out = String::new();
let doc = "Free a buffer.\n\n# Safety\n\nPointer must have been returned by this library.";
emit_c_doxygen(&mut out, doc, "");
assert!(out.contains("/// \\note SAFETY: Pointer must have been returned by this library."));
}
#[test]
fn test_emit_c_doxygen_example_renders_code_fence() {
let mut out = String::new();
let doc = "Demo.\n\n# Example\n\n```rust\nlet x = run();\n```";
emit_c_doxygen(&mut out, doc, "");
assert!(out.contains("/// \\code"));
assert!(out.contains("/// \\endcode"));
assert!(out.contains("let x = run();"));
}
#[test]
fn test_emit_c_doxygen_strips_markdown_links() {
let mut out = String::new();
let doc = "See [the docs](https://example.com/x) for details.";
emit_c_doxygen(&mut out, doc, "");
assert!(
out.contains("the docs (https://example.com/x)"),
"expected flattened link, got: {out}"
);
assert!(!out.contains("](https://"));
}
#[test]
fn test_emit_c_doxygen_word_wraps_long_lines() {
let mut out = String::new();
let long = "a ".repeat(80);
emit_c_doxygen(&mut out, long.trim(), "");
for line in out.lines() {
let body = line.trim_start_matches("/// ");
assert!(body.len() <= 100, "line too long ({}): {line}", body.len());
}
}
#[test]
fn test_emit_c_doxygen_empty_input_is_noop() {
let mut out = String::new();
emit_c_doxygen(&mut out, "", "");
emit_c_doxygen(&mut out, " \n\t ", "");
assert!(out.is_empty());
}
#[test]
fn test_emit_c_doxygen_indent_applied() {
let mut out = String::new();
emit_c_doxygen(&mut out, "Hello.", " ");
assert!(out.starts_with(" /// Hello."));
}
#[test]
fn test_render_yard_sections() {
let sections = fixture_sections();
let out = render_yard_sections(§ions);
assert!(out.contains("@param path The file path."));
assert!(out.contains("@return The extracted text and metadata."));
assert!(out.contains("@raise Returns an error when the file is unreadable."));
assert!(!out.contains("@example"), "Rust example must not appear in YARD");
assert!(!out.contains("```ruby"));
assert!(!out.contains("```rust"));
}
#[test]
fn test_render_yard_sections_preserves_ruby_example() {
let doc = "Do something.\n\n# Example\n\n```ruby\nputs :hi\n```";
let sections = parse_rustdoc_sections(doc);
let out = render_yard_sections(§ions);
assert!(out.contains("@example"), "Ruby example must be preserved");
assert!(out.contains("```ruby"));
}
#[test]
fn example_for_target_rust_fenced_suppressed_for_php() {
let example = "```rust\nlet x = 1;\n```";
assert_eq!(
example_for_target(example, "php"),
None,
"rust-fenced example must be omitted for PHP target"
);
}
#[test]
fn example_for_target_bare_fence_defaults_to_rust_suppressed_for_ruby() {
let example = "```\nlet x = 1;\n```";
assert_eq!(
example_for_target(example, "ruby"),
None,
"bare fence is treated as Rust and must be omitted for Ruby target"
);
}
#[test]
fn example_for_target_php_example_preserved_for_php() {
let example = "```php\n$x = 1;\n```";
let result = example_for_target(example, "php");
assert!(result.is_some(), "PHP example must be preserved for PHP target");
assert!(result.unwrap().contains("```php"));
}
#[test]
fn example_for_target_ruby_example_preserved_for_ruby() {
let example = "```ruby\nputs :hi\n```";
let result = example_for_target(example, "ruby");
assert!(result.is_some(), "Ruby example must be preserved for Ruby target");
assert!(result.unwrap().contains("```ruby"));
}
#[test]
fn render_phpdoc_sections_with_rust_example_emits_no_at_example_block() {
let doc = "Convert HTML.\n\n# Arguments\n\n* `html` - The HTML input.\n\n# Example\n\n```rust\nlet result = convert(html, None)?;\n```";
let sections = parse_rustdoc_sections(doc);
let out = render_phpdoc_sections(§ions, "HtmlToMarkdownException");
assert!(!out.contains("```php"), "no PHP @example block for Rust source");
assert!(!out.contains("```rust"), "raw Rust must not leak into PHPDoc");
assert!(out.contains("@param"), "other sections must still be emitted");
}
#[test]
fn test_emit_kdoc_ktfmt_canonical_short_single_line() {
let mut out = String::new();
emit_kdoc_ktfmt_canonical(&mut out, "Simple doc.", "");
assert_eq!(
out, "/** Simple doc. */\n",
"short single-line comment should collapse to canonical format"
);
}
#[test]
fn test_emit_kdoc_ktfmt_canonical_short_with_indent() {
let mut out = String::new();
emit_kdoc_ktfmt_canonical(&mut out, "Text node (most frequent - 100+ per document)", " ");
assert_eq!(out, " /** Text node (most frequent - 100+ per document) */\n");
}
#[test]
fn test_emit_kdoc_ktfmt_canonical_long_comment_uses_multiline() {
let mut out = String::new();
let long_text = "This is a very long documentation comment that exceeds the 100-character line width limit and should therefore be emitted in multi-line format";
emit_kdoc_ktfmt_canonical(&mut out, long_text, "");
assert!(out.contains("/**\n"), "long comment should start with newline");
assert!(out.contains(" * "), "long comment should use multi-line format");
assert!(out.contains(" */\n"), "long comment should end with newline");
}
#[test]
fn test_emit_kdoc_ktfmt_canonical_multiline_comment() {
let mut out = String::new();
let doc = "First line.\n\nSecond paragraph.";
emit_kdoc_ktfmt_canonical(&mut out, doc, "");
assert!(out.contains("/**\n"), "multi-paragraph should use multi-line format");
assert!(out.contains(" * First line."), "first paragraph preserved");
assert!(out.contains(" *\n"), "blank line preserved");
assert!(out.contains(" * Second paragraph."), "second paragraph preserved");
}
#[test]
fn test_emit_kdoc_ktfmt_canonical_empty_doc() {
let mut out = String::new();
emit_kdoc_ktfmt_canonical(&mut out, "", "");
assert!(out.is_empty(), "empty doc should produce no output");
}
#[test]
fn test_emit_kdoc_ktfmt_canonical_fits_within_100_chars() {
let mut out = String::new();
let content = "a".repeat(93);
emit_kdoc_ktfmt_canonical(&mut out, &content, "");
let line = out.lines().next().unwrap();
assert_eq!(
line.len(),
100,
"should fit exactly at 100 chars and use single-line format"
);
assert!(out.starts_with("/**"), "should use single-line format");
}
#[test]
fn test_emit_kdoc_ktfmt_canonical_exceeds_100_chars() {
let mut out = String::new();
let content = "a".repeat(94);
emit_kdoc_ktfmt_canonical(&mut out, &content, "");
assert!(
out.contains("/**\n"),
"should use multi-line format when exceeding 100 chars"
);
assert!(out.contains(" * "), "multi-line format with ` * ` prefix");
}
#[test]
fn test_emit_kdoc_ktfmt_canonical_respects_indent() {
let mut out = String::new();
let content = "a".repeat(89);
emit_kdoc_ktfmt_canonical(&mut out, &content, " ");
let line = out.lines().next().unwrap();
assert_eq!(line.len(), 100, "should respect indent in 100-char calculation");
assert!(line.starts_with(" /** "), "should include indent");
}
#[test]
fn test_emit_kdoc_ktfmt_canonical_real_world_enum_variant() {
let mut out = String::new();
emit_kdoc_ktfmt_canonical(&mut out, "Text node (most frequent - 100+ per document)", " ");
assert!(out.starts_with(" /** "), "should preserve 4-space indent");
assert!(out.contains(" */\n"), "should end with newline");
let line_count = out.lines().count();
assert_eq!(line_count, 1, "should be single-line format");
}
#[test]
fn test_emit_kdoc_ktfmt_canonical_real_world_data_class_field() {
let mut out = String::new();
let doc = "Heading style to use in Markdown output (ATX `#` or Setext underline).";
emit_kdoc_ktfmt_canonical(&mut out, doc, " ");
let line_count = out.lines().count();
assert_eq!(line_count, 1, "should be single-line format");
assert!(out.starts_with(" /** "), "should have correct indent");
}
#[test]
fn sanitize_intradoc_link_with_path_separator_java() {
let input = "See [`ConversionOptions::builder()`] for details.";
let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
assert!(out.contains("`ConversionOptions.builder()`"), "got: {out}");
assert!(!out.contains("[`"), "brackets must be removed, got: {out}");
}
#[test]
fn sanitize_intradoc_link_simple_type_php() {
let input = "Returns a [`ConversionResult`].";
let out = sanitize_rust_idioms(input, DocTarget::PhpDoc);
assert!(out.contains("`ConversionResult`"), "got: {out}");
assert!(!out.contains("[`"), "got: {out}");
}
#[test]
fn sanitize_none_to_null_javadoc() {
let input = "Returns None when no value is found.";
let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
assert!(out.contains("null"), "got: {out}");
assert!(!out.contains("None"), "got: {out}");
}
#[test]
fn sanitize_none_to_undefined_tsdoc() {
let input = "Returns None if absent.";
let out = sanitize_rust_idioms(input, DocTarget::TsDoc);
assert!(out.contains("undefined"), "got: {out}");
assert!(!out.contains("None"), "got: {out}");
}
#[test]
fn sanitize_some_x_to_the_value_x() {
let input = "Pass Some(value) to enable.";
let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
assert!(out.contains("the value (value)"), "got: {out}");
assert!(!out.contains("Some("), "got: {out}");
}
#[test]
fn sanitize_bare_some_followed_by_lowercase_noun_is_dropped() {
let input =
"Only specified fields (Some values) will override existing options; None values leave the previous";
let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
assert!(
out.contains("(values)"),
"bare `Some ` before lowercase noun must be stripped; got: {out}"
);
assert!(
out.contains("null values"),
"bare `None ` must also be replaced; got: {out}"
);
assert!(!out.contains("Some "), "Some prefix must not survive; got: {out}");
}
#[test]
fn sanitize_bare_some_does_not_touch_identifiers_or_uppercase_followers() {
let cases = [
"SomeType lives on.",
"Some.method() returns Self.",
"Some Title",
"Some(x) is a value.",
];
for case in cases {
let out = sanitize_rust_idioms(case, DocTarget::JavaDoc);
if case.starts_with("Some(") {
assert!(out.contains("the value (x)"), "got: {out}");
} else {
assert!(out.contains("Some"), "Some must survive in {case:?}; got: {out}");
}
}
}
#[test]
fn sanitize_option_t_to_nullable_php() {
let input = "The result is Option<String>.";
let out = sanitize_rust_idioms(input, DocTarget::PhpDoc);
assert!(out.contains("String?"), "got: {out}");
assert!(!out.contains("Option<"), "got: {out}");
}
#[test]
fn sanitize_option_t_to_or_null_java() {
let input = "The result is Option<String>.";
let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
assert!(out.contains("String | null"), "got: {out}");
}
#[test]
fn sanitize_option_t_to_or_undefined_tsdoc() {
let input = "The result is Option<String>.";
let out = sanitize_rust_idioms(input, DocTarget::TsDoc);
assert!(out.contains("String | undefined"), "got: {out}");
}
#[test]
fn sanitize_vec_u8_per_target() {
assert!(sanitize_rust_idioms("Takes Vec<u8>.", DocTarget::PhpDoc).contains("string"));
assert!(sanitize_rust_idioms("Takes Vec<u8>.", DocTarget::JavaDoc).contains("byte[]"));
assert!(sanitize_rust_idioms("Takes Vec<u8>.", DocTarget::TsDoc).contains("Uint8Array"));
assert!(sanitize_rust_idioms("Takes Vec<u8>.", DocTarget::JsDoc).contains("Uint8Array"));
}
#[test]
fn sanitize_vec_t_to_array() {
let input = "Returns Vec<String>.";
let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
assert!(out.contains("String[]"), "got: {out}");
assert!(!out.contains("Vec<"), "got: {out}");
}
#[test]
fn sanitize_hashmap_per_target() {
let input = "Uses HashMap<String, u32>.";
assert!(sanitize_rust_idioms(input, DocTarget::PhpDoc).contains("array<String, u32>"));
assert!(sanitize_rust_idioms(input, DocTarget::JavaDoc).contains("Map<String, u32>"));
assert!(sanitize_rust_idioms(input, DocTarget::TsDoc).contains("Record<String, u32>"));
}
#[test]
fn sanitize_arc_wrapper_stripped() {
let input = "Holds Arc<Config>.";
let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
assert!(out.contains("Config"), "got: {out}");
assert!(!out.contains("Arc<"), "got: {out}");
}
#[test]
fn sanitize_box_mutex_rwlock_rc_cell_refcell_stripped() {
for wrapper in &["Box", "Mutex", "RwLock", "Rc", "Cell", "RefCell"] {
let input = format!("Contains {wrapper}<Inner>.");
let out = sanitize_rust_idioms(&input, DocTarget::JavaDoc);
assert!(out.contains("Inner"), "wrapper {wrapper} not stripped, got: {out}");
assert!(
!out.contains(&format!("{wrapper}<")),
"wrapper {wrapper} still present, got: {out}"
);
}
}
#[test]
fn sanitize_send_sync_stripped() {
let input = "The type is Send + Sync.";
let out = sanitize_rust_idioms(input, DocTarget::TsDoc);
assert!(!out.contains("Send"), "got: {out}");
assert!(!out.contains("Sync"), "got: {out}");
}
#[test]
fn sanitize_static_lifetime_stripped() {
let input = "Requires 'static lifetime.";
let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
assert!(!out.contains("'static"), "got: {out}");
}
#[test]
fn sanitize_pub_fn_stripped() {
let input = "Calls pub fn convert().";
let out = sanitize_rust_idioms(input, DocTarget::PhpDoc);
assert!(!out.contains("pub fn"), "got: {out}");
assert!(out.contains("convert()"), "got: {out}");
}
#[test]
fn sanitize_crate_prefix_stripped() {
let input = "See crate::error::ConversionError.";
let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
assert!(!out.contains("crate::"), "got: {out}");
assert!(out.contains("error.ConversionError"), "got: {out}");
}
#[test]
fn sanitize_unwrap_expect_stripped() {
let input = "Call result.unwrap() or result.expect(\"msg\").";
let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
assert!(!out.contains(".unwrap()"), "got: {out}");
assert!(!out.contains(".expect("), "got: {out}");
}
#[test]
fn sanitize_no_mutation_inside_backticks() {
let input = "Use `None` as the argument.";
let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
assert!(out.contains("`None`"), "backtick span must be preserved, got: {out}");
}
#[test]
fn sanitize_rust_fence_dropped_for_tsdoc() {
let input = "Intro.\n\n```rust\nlet x = 1;\n```\n\nTrailer.";
let out = sanitize_rust_idioms(input, DocTarget::TsDoc);
assert!(
!out.contains("let x = 1;"),
"rust fence content must be dropped, got: {out}"
);
assert!(!out.contains("```rust"), "got: {out}");
assert!(out.contains("Trailer."), "text after fence must survive, got: {out}");
}
#[test]
fn sanitize_rust_fence_dropped_for_java() {
let input = "Intro.\n\n```rust\nlet x = 1;\n```\n\nTrailer.";
let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
assert!(
!out.contains("let x = 1;"),
"fence content must be dropped for Java, got: {out}"
);
assert!(!out.contains("```"), "fence markers must be dropped, got: {out}");
assert!(out.contains("Intro."), "prose before fence kept: {out}");
assert!(out.contains("Trailer."), "prose after fence kept: {out}");
}
#[test]
fn sanitize_non_rust_fence_passed_through() {
let input = "Example:\n\n```typescript\nconst x = 1;\n```";
let out = sanitize_rust_idioms(input, DocTarget::TsDoc);
assert!(out.contains("```typescript"), "non-rust fence must survive, got: {out}");
assert!(out.contains("const x = 1;"), "got: {out}");
}
#[test]
fn sanitize_backtick_code_span_not_mutated_option() {
let input = "The type is `Option<String>`.";
let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
assert!(
out.contains("`Option<String>`"),
"code span must be preserved, got: {out}"
);
}
#[test]
fn sanitize_idempotent() {
let input = "Returns None when Vec<String> is empty.";
let once = sanitize_rust_idioms(input, DocTarget::JavaDoc);
let twice = sanitize_rust_idioms(&once, DocTarget::JavaDoc);
assert_eq!(once, twice, "sanitize_rust_idioms should be idempotent");
}
#[test]
fn sanitize_multiline_prose() {
let input = "Convert HTML to Markdown.\n\nReturns None on failure.\nUse Option<String> for the result.";
let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
assert!(out.contains("null"), "None must be replaced on line 2, got: {out}");
assert!(
out.contains("String | null"),
"Option<String> must be replaced on line 3, got: {out}"
);
}
#[test]
fn sanitize_attribute_line_dropped() {
let input = "#[derive(Debug, Clone)]\nSome documentation.";
let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
assert!(!out.contains("#[derive("), "attribute line must be dropped, got: {out}");
assert!(out.contains("documentation."), "prose must survive, got: {out}");
}
#[test]
fn sanitize_path_separator_in_prose() {
let input = "See std::collections::HashMap for details.";
let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
assert!(out.contains("std.collections.HashMap"), ":: must become ., got: {out}");
}
#[test]
fn sanitize_none_not_replaced_inside_identifier() {
let input = "Unlike NoneType in Python.";
let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
assert!(out.contains("NoneType"), "NoneType must not be replaced, got: {out}");
}
#[test]
fn sanitize_csharp_drops_rust_section_headings_and_example_body() {
let input = "Convert error to HTTP status code\n\n\
Maps GraphQL error types to status codes.\n\n\
# Examples\n\n\
```ignore\n\
use spikard_graphql::error::GraphQLError;\n\
let error = GraphQLError::AuthenticationError(\"Invalid token\".to_string());\n\
assert_eq!(error.status_code(), 401);\n\
```\n";
let out = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
assert!(
out.contains("Convert error to HTTP status code"),
"summary preserved: {out}"
);
assert!(out.contains("Maps GraphQL error types"), "prose preserved: {out}");
assert!(!out.contains("# Examples"), "heading dropped: {out}");
assert!(!out.contains("```"), "code fence dropped: {out}");
assert!(!out.contains("Self::error_code"), "Self::method dropped: {out}");
assert!(
!out.contains("GraphQLError::AuthenticationError"),
"rust path dropped: {out}"
);
}
#[test]
fn sanitize_csharp_intradoc_link_with_path_separator() {
let input = "See [`Self::error_code`] for the variant codes.";
let out = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
assert!(out.contains("`Self.error_code`"), "intra-doc link normalised: {out}");
assert!(!out.contains("[`"), "square brackets removed: {out}");
assert!(!out.contains("::"), ":: replaced with .: {out}");
}
#[test]
fn sanitize_csharp_result_type_keeps_success_drops_error() {
let input = "Returns Result<String, ConversionError> on failure.";
let out = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
assert!(out.contains("String"), "success type kept: {out}");
assert!(!out.contains("Result<"), "Result wrapper dropped: {out}");
assert!(!out.contains("ConversionError"), "error type dropped: {out}");
}
#[test]
fn sanitize_csharp_option_becomes_nullable() {
let input = "Returns Option<String>.";
let out = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
assert!(out.contains("String?"), "Option<T> -> T?: {out}");
assert!(!out.contains("Option<"), "Option dropped: {out}");
}
#[test]
fn sanitize_csharp_vec_u8_becomes_byte_array() {
let input = "Accepts Vec<u8>.";
let out = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
assert!(out.contains("byte[]"), "Vec<u8> -> byte[]: {out}");
}
#[test]
fn sanitize_csharp_hashmap_becomes_dictionary() {
let input = "Holds HashMap<String, u32>.";
let out = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
assert!(
out.contains("Dictionary<String, u32>"),
"HashMap -> Dictionary with XML-escaped brackets: {out}"
);
}
#[test]
fn sanitize_csharp_none_to_null() {
let input = "Returns None on miss.";
let out = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
assert!(out.contains("null"), "None -> null: {out}");
assert!(!out.contains("None"), "None replaced: {out}");
}
#[test]
fn sanitize_csharp_escapes_raw_angle_brackets_and_amp() {
let input = "Accepts Box<dyn Trait> and combines a & b.";
let out = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
assert!(out.contains("dyn Trait"), "Box<T> stripped: {out}");
assert!(out.contains("&"), "ampersand escaped: {out}");
}
#[test]
fn sanitize_csharp_drops_rust_code_fence_entirely() {
let input = "Intro.\n\n```rust\nlet x: Vec<u8> = vec![];\n```\n\nTrailer.";
let out = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
assert!(!out.contains("let x"), "code fence body dropped: {out}");
assert!(!out.contains("```"), "fence markers dropped: {out}");
assert!(out.contains("Intro."), "prose before fence kept: {out}");
assert!(out.contains("Trailer."), "prose after fence kept: {out}");
}
#[test]
fn sanitize_csharp_keep_sections_does_not_drop_headings() {
let input = "Summary.\n\n# Arguments\n\n* `name` - the value.";
let out = sanitize_rust_idioms_keep_sections(input, DocTarget::CSharpDoc);
assert!(out.contains("# Arguments"), "heading preserved: {out}");
assert!(out.contains("name"), "body preserved: {out}");
}
#[test]
fn sanitize_csharp_idempotent() {
let input = "Returns Option<String> or None.";
let once = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
let twice = sanitize_rust_idioms(&once, DocTarget::CSharpDoc);
assert_eq!(once, twice, "CSharpDoc sanitisation must be idempotent");
}
#[test]
fn sanitize_phpdoc_drops_unmarked_rust_code_fences() {
let input = "Detect language name from a file extension.\n\nReturns `None` for unrecognized extensions.\n\n```\nuse tree_sitter_language_pack::detect_language_from_extension;\nassert_eq!(detect_language_from_extension(\"py\"), Some(\"python\"));\nassert_eq!(detect_language_from_extension(\"RS\"), Some(\"rust\"));\nassert_eq!(detect_language_from_extension(\"xyz\"), None);\n```";
let out = sanitize_rust_idioms(input, DocTarget::PhpDoc);
assert!(
!out.contains("use tree_sitter_language_pack"),
"Rust use stmt dropped: {out}"
);
assert!(!out.contains("assert_eq!"), "Rust code dropped: {out}");
assert!(!out.contains("```"), "fence markers dropped: {out}");
assert!(out.contains("Detect language name"), "prose before fence kept: {out}");
assert!(out.contains("unrecognized extensions"), "prose kept: {out}");
}
#[test]
fn sanitize_javadoc_drops_unmarked_rust_code_fences() {
let input = "Process a file.\n\n```\nlet result = process(\"def hello(): pass\", &config).unwrap();\n```";
let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
assert!(!out.contains("unwrap"), "Rust unwrap dropped: {out}");
assert!(!out.contains("```"), "fence markers dropped: {out}");
assert!(out.contains("Process a file"), "prose kept: {out}");
}
#[test]
fn sanitize_phpdoc_drops_explicit_rust_fences() {
let input = "Summary.\n\n```rust\nuse std::path::PathBuf;\nlet p = PathBuf::from(\"/tmp\");\n```";
let out = sanitize_rust_idioms(input, DocTarget::PhpDoc);
assert!(!out.contains("use std::"), "Rust code dropped: {out}");
assert!(!out.contains("PathBuf"), "Rust types dropped: {out}");
assert!(!out.contains("```"), "fence markers dropped: {out}");
assert!(out.contains("Summary"), "prose kept: {out}");
}
#[test]
fn sanitize_no_run_fence_dropped_for_tsdoc() {
let input = "Intro.\n\n```no_run\nuse foo::bar;\nbar::init();\n```\n\nTrailer.";
let out = sanitize_rust_idioms(input, DocTarget::TsDoc);
assert!(!out.contains("use foo::bar"), "no_run fence body dropped: {out}");
assert!(!out.contains("```"), "fence markers dropped: {out}");
assert!(out.contains("Intro."), "prose before fence kept: {out}");
assert!(out.contains("Trailer."), "prose after fence kept: {out}");
}
#[test]
fn sanitize_ignore_fence_dropped_for_phpdoc() {
let input = "Summary.\n\n```ignore\nlet x = 1;\n// this would not compile\n```";
let out = sanitize_rust_idioms(input, DocTarget::PhpDoc);
assert!(!out.contains("let x = 1"), "ignore fence body dropped: {out}");
assert!(!out.contains("```"), "fence markers dropped: {out}");
assert!(out.contains("Summary"), "prose kept: {out}");
}
#[test]
fn sanitize_should_panic_fence_dropped_for_javadoc() {
let input = "Panics on null.\n\n```should_panic\nlet _ = parse(null);\n```";
let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
assert!(!out.contains("parse(null)"), "should_panic fence body dropped: {out}");
assert!(!out.contains("```"), "fence markers dropped: {out}");
assert!(out.contains("Panics on null"), "prose kept: {out}");
}
#[test]
fn sanitize_compile_fail_fence_dropped_for_csharp() {
let input = "Type safety demo.\n\n```compile_fail\nlet x: u32 = \"hello\";\n```";
let out = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
assert!(!out.contains("let x:"), "compile_fail fence body dropped: {out}");
assert!(!out.contains("```"), "fence markers dropped: {out}");
assert!(out.contains("Type safety demo"), "prose kept: {out}");
}
#[test]
fn sanitize_edition_fence_dropped_for_tsdoc() {
let input = "Edition example.\n\n```edition2021\nuse std::fmt;\n```\n\nSee also edition2018.";
let out = sanitize_rust_idioms(input, DocTarget::TsDoc);
assert!(!out.contains("use std::fmt"), "edition2021 fence body dropped: {out}");
assert!(!out.contains("```"), "fence markers dropped: {out}");
assert!(out.contains("Edition example"), "prose kept: {out}");
}
#[test]
fn sanitize_python_fence_preserved_for_tsdoc() {
let input = "Example:\n\n```python\nimport foo\nfoo.bar()\n```";
let out = sanitize_rust_idioms(input, DocTarget::TsDoc);
assert!(out.contains("```python"), "python fence preserved: {out}");
assert!(out.contains("import foo"), "python body preserved: {out}");
}
#[test]
fn sanitize_javascript_fence_preserved_for_phpdoc() {
let input = "Usage:\n\n```javascript\nconst x = require('foo');\n```";
let out = sanitize_rust_idioms(input, DocTarget::PhpDoc);
assert!(out.contains("```javascript"), "javascript fence preserved: {out}");
assert!(out.contains("require('foo')"), "javascript body preserved: {out}");
}
#[test]
fn example_for_target_no_run_fence_suppressed_for_typescript() {
let example =
"```no_run\nuse tree_sitter_language_pack::available_languages;\nlet langs = available_languages();\n```";
assert_eq!(
example_for_target(example, "typescript"),
None,
"no_run fence must be treated as Rust and suppressed for TypeScript"
);
}
#[test]
fn example_for_target_ignore_fence_suppressed_for_php() {
let example = "```ignore\nlet x = 1;\n```";
assert_eq!(
example_for_target(example, "php"),
None,
"ignore fence must be treated as Rust and suppressed for PHP"
);
}
#[test]
fn example_for_target_compile_fail_fence_suppressed_for_java() {
let example = "```compile_fail\nlet x: u32 = \"wrong\";\n```";
assert_eq!(
example_for_target(example, "java"),
None,
"compile_fail fence must be treated as Rust and suppressed for Java"
);
}
#[test]
fn example_for_target_should_panic_fence_suppressed_for_ruby() {
let example = "```should_panic\nlet _ = parse(None);\n```";
assert_eq!(
example_for_target(example, "ruby"),
None,
"should_panic fence must be treated as Rust and suppressed for Ruby"
);
}
#[test]
fn example_for_target_edition_fence_suppressed_for_php() {
let example = "```edition2021\nuse std::fmt;\n```";
assert_eq!(
example_for_target(example, "php"),
None,
"edition2021 fence must be treated as Rust and suppressed for PHP"
);
}
#[test]
fn example_for_target_python_fence_preserved() {
let example = "```python\nimport foo\n```";
let result = example_for_target(example, "php");
assert!(result.is_some(), "python fence must be preserved for PHP target");
}
}