use crate::core::config::Language;
const RUST_ONLY_SECTIONS: &[&str] = &["example", "examples", "arguments", "fields"];
#[cfg(test)]
pub(crate) fn check_monotonic_headings(doc: &str) -> Result<(), String> {
let mut previous_level: Option<usize> = None;
let mut in_code_block = false;
for line in doc.lines() {
if line.trim_start().starts_with("```") {
in_code_block = !in_code_block;
continue;
}
if in_code_block || !line.starts_with('#') {
continue;
}
let heading_level = line.chars().take_while(|&c| c == '#').count();
if heading_level == 0 || heading_level > 6 {
continue;
}
if let Some(prev) = previous_level {
let increment = heading_level.saturating_sub(prev);
if increment > 1 {
let heading_text = line.trim_start_matches('#').trim();
return Err(format!(
"Heading increment violation: H{} → H{} (skip of {})\nHeading: {}",
prev, heading_level, increment, heading_text
));
}
}
previous_level = Some(heading_level);
}
Ok(())
}
pub(crate) fn demote_headings(doc: &str, levels: usize) -> String {
if levels == 0 || doc.is_empty() {
return doc.to_string();
}
let mut out = String::new();
let mut in_code_block = false;
for line in doc.lines() {
if line.trim_start().starts_with("```") {
in_code_block = !in_code_block;
out.push_str(line);
out.push('\n');
continue;
}
if in_code_block || !line.starts_with('#') {
out.push_str(line);
out.push('\n');
continue;
}
let heading_level = line.chars().take_while(|&c| c == '#').count();
if heading_level > 0 && heading_level <= 6 {
let new_level = std::cmp::min(heading_level + levels, 6);
let demoted_hashes = "#".repeat(new_level);
let rest = &line[heading_level..];
out.push_str(&demoted_hashes);
out.push_str(rest);
out.push('\n');
} else {
out.push_str(line);
out.push('\n');
}
}
out.trim_end().to_string()
}
pub(crate) fn wrap_bare_urls(text: &str) -> String {
let url_re = regex::Regex::new(r"(https?://[^\s)>\]]+)").unwrap();
let mut result = String::with_capacity(text.len());
let mut last_end = 0;
for mat in url_re.find_iter(text) {
let start = mat.start();
let preceding = if start > 0 { text.as_bytes()[start - 1] } else { b' ' };
if preceding == b'(' || preceding == b'<' {
continue;
}
result.push_str(&text[last_end..start]);
result.push('<');
result.push_str(mat.as_str());
result.push('>');
last_end = mat.end();
}
result.push_str(&text[last_end..]);
result
}
pub fn clean_doc(doc: &str, lang: Language) -> String {
if doc.is_empty() {
return String::new();
}
let doc = strip_rust_sections(doc);
let doc = rust_links_to_plain(&doc);
let doc = convert_doc_headings_to_bold(&doc);
let doc = rust_paths_to_dot_notation(&doc, lang);
let doc = replace_rust_terminology(&doc, lang);
let doc = normalize_list_markers(&doc);
let doc = ensure_blank_before_lists(&doc);
doc.trim().to_string()
}
fn is_list_item_start(line: &str) -> bool {
let trimmed_left = line.trim_start_matches(' ');
let leading_spaces = line.len() - trimmed_left.len();
if leading_spaces > 3 {
return false;
}
let bytes = trimmed_left.as_bytes();
match bytes.first() {
Some(b'-') | Some(b'*') | Some(b'+') => {
matches!(bytes.get(1), Some(b' ') | Some(b'\t'))
}
Some(c) if c.is_ascii_digit() => {
let mut idx = 1;
while bytes.get(idx).is_some_and(|c| c.is_ascii_digit()) {
idx += 1;
}
matches!(bytes.get(idx), Some(b'.') | Some(b')')) && matches!(bytes.get(idx + 1), Some(b' ') | Some(b'\t'))
}
_ => false,
}
}
pub(crate) fn ensure_blank_before_lists(doc: &str) -> String {
let mut out = String::with_capacity(doc.len());
let mut in_code_block = false;
let mut prev_non_empty: Option<String> = None;
let mut prev_was_blank = true;
for line in doc.lines() {
if line.trim_start().starts_with("```") {
in_code_block = !in_code_block;
out.push_str(line);
out.push('\n');
prev_non_empty = Some(line.to_string());
prev_was_blank = false;
continue;
}
if in_code_block {
out.push_str(line);
out.push('\n');
continue;
}
if line.trim().is_empty() {
out.push_str(line);
out.push('\n');
prev_was_blank = true;
continue;
}
let starts_list = is_list_item_start(line);
let prev_was_list = prev_non_empty.as_deref().is_some_and(is_list_item_start);
if starts_list && !prev_was_blank && !prev_was_list {
out.push('\n');
}
out.push_str(line);
out.push('\n');
prev_non_empty = Some(line.to_string());
prev_was_blank = false;
}
out
}
pub(crate) fn convert_doc_headings_to_bold(doc: &str) -> String {
let mut out = String::new();
let mut in_code_block = false;
for line in doc.lines() {
if line.trim_start().starts_with("```") {
in_code_block = !in_code_block;
out.push_str(line);
out.push('\n');
continue;
}
if !in_code_block && line.starts_with('#') {
let heading_text = line.trim_start_matches('#').trim();
let lower = heading_text.to_lowercase();
if lower == "errors"
|| lower == "returns"
|| lower == "panics"
|| lower == "safety"
|| lower == "notes"
|| lower == "note"
{
out.push_str(&crate::docs::template_env::render(
"bold_heading.jinja",
minijinja::context! { text => heading_text },
));
continue;
}
}
out.push_str(line);
out.push('\n');
}
out
}
pub(crate) fn normalize_list_markers(doc: &str) -> String {
let mut out = String::new();
let mut in_code_block = false;
for line in doc.lines() {
if line.trim_start().starts_with("```") {
in_code_block = !in_code_block;
out.push_str(line);
out.push('\n');
continue;
}
if in_code_block {
out.push_str(line);
out.push('\n');
continue;
}
let trimmed_left = line.trim_start_matches(' ');
let leading_spaces = line.len() - trimmed_left.len();
if trimmed_left.starts_with("* ") && leading_spaces <= 3 {
out.push_str(&" ".repeat(leading_spaces));
out.push_str("- ");
out.push_str(&trimmed_left[2..]);
} else {
out.push_str(line);
}
out.push('\n');
}
out.trim_end().to_string()
}
pub(crate) fn collapse_whitespace(s: &str) -> String {
s.split_whitespace().collect::<Vec<_>>().join(" ")
}
pub(crate) fn replace_rust_terminology(doc: &str, lang: Language) -> String {
let doc = doc
.replace("this crate", "this library")
.replace("in this crate", "in this library")
.replace("for this crate", "for this library")
.replace(
"Panic caught during conversion to prevent unwinding across FFI boundaries",
"Internal error caught during conversion",
);
let doc = doc.replace(
"None when `output_format` is set to `OutputFormat.None`",
"null/nil when in extraction-only mode",
);
let none_replacement = match lang {
Language::Go | Language::Ruby | Language::Elixir => "`nil`",
Language::Java | Language::Node | Language::Wasm | Language::Csharp | Language::Php => "`null`",
Language::Python | Language::Rust => "`None`", Language::R | Language::Ffi | Language::C | Language::Jni => "`NULL`",
Language::Kotlin
| Language::KotlinAndroid
| Language::Swift
| Language::Dart
| Language::Gleam
| Language::Zig => "`null`",
};
let doc = doc.replace("`None`", none_replacement);
if lang == Language::Python {
let doc = doc.replace("`true`", "`True`").replace("`false`", "`False`");
return doc;
}
if lang != Language::Rust {
let doc = doc.replace("`True`", "`true`").replace("`False`", "`false`");
return doc;
}
doc
}
pub(crate) fn rust_paths_to_dot_notation(doc: &str, lang: Language) -> String {
let sep = if lang == Language::Php { "::" } else { "." };
let mut out = String::new();
let mut in_code_block = false;
for line in doc.lines() {
if line.trim_start().starts_with("```") {
in_code_block = !in_code_block;
out.push_str(line);
out.push('\n');
continue;
}
if in_code_block {
out.push_str(line);
out.push('\n');
continue;
}
let line = line
.replace("Default::default()", "the default constructor")
.replace("::", sep);
out.push_str(&line);
out.push('\n');
}
out
}
pub(crate) fn clean_doc_inline(doc: &str, lang: Language) -> String {
if doc.is_empty() {
return String::new();
}
let cleaned = clean_doc(doc, lang);
cleaned
.lines()
.map(str::trim)
.filter(|l| !l.is_empty())
.collect::<Vec<_>>()
.join(" ")
}
pub(crate) fn strip_rust_sections(doc: &str) -> String {
let mut out = String::new();
let mut skip_section = false;
let mut in_code_block = false;
let mut code_block_buf = String::new();
for line in doc.lines() {
if line.trim_start().starts_with("```") {
if in_code_block {
in_code_block = false;
if !skip_section && !is_rust_code_block(&code_block_buf) {
out.push_str(&code_block_buf);
out.push_str(line);
out.push('\n');
}
code_block_buf.clear();
continue;
} else {
in_code_block = true;
if !skip_section {
code_block_buf.push_str(line);
code_block_buf.push('\n');
}
continue;
}
}
if in_code_block {
if !skip_section {
code_block_buf.push_str(line);
code_block_buf.push('\n');
}
continue;
}
if line.starts_with('#') {
let header_text = line.trim_start_matches('#').trim().to_lowercase();
if RUST_ONLY_SECTIONS.contains(&header_text.as_str()) {
skip_section = true;
continue;
} else {
skip_section = false;
}
}
if skip_section {
let trimmed = line.trim();
let is_section_content = trimmed.is_empty()
|| trimmed.starts_with('*')
|| trimmed.starts_with('-')
|| trimmed.starts_with('+')
|| trimmed.starts_with(" ") || trimmed.starts_with('\t');
if is_section_content {
continue;
}
skip_section = false;
}
if is_rust_specific_line(line) {
continue;
}
out.push_str(line);
out.push('\n');
}
out
}
pub(crate) fn is_rust_code_block(content: &str) -> bool {
let first_line = content.lines().next().unwrap_or("");
let fence_lang = first_line.trim_start_matches('`').trim().to_lowercase();
if matches!(fence_lang.as_str(), "rust" | "rust,no_run" | "rust,ignore" | "") {
for line in content.lines().skip(1) {
if line.starts_with("use ")
|| line.contains("unwrap()")
|| line.contains("assert!")
|| line.contains("assert_eq!")
|| line.contains("Vec::new()")
|| line.contains("Default::default()")
|| line.contains("::new(")
|| line.contains(".to_string()")
|| line.contains("r#\"")
{
return true;
}
}
}
false
}
pub(crate) fn is_rust_specific_line(line: &str) -> bool {
let trimmed = line.trim();
trimmed.starts_with("# use ") || trimmed.starts_with("use ") && trimmed.ends_with(';')
}
pub(crate) fn extract_param_docs(doc: &str) -> std::collections::HashMap<String, String> {
let mut map = std::collections::HashMap::new();
let mut in_args = false;
let mut in_code_block = false;
for line in doc.lines() {
if line.trim_start().starts_with("```") {
in_code_block = !in_code_block;
continue;
}
if in_code_block {
continue;
}
if line.starts_with('#') {
let header = line.trim_start_matches('#').trim().to_lowercase();
in_args = matches!(header.as_str(), "arguments" | "args" | "parameters" | "params");
continue;
}
if in_args {
let trimmed = line.trim_start_matches(['*', '-', ' ']);
let parsed = trimmed
.find(" - ")
.map(|pos| (pos, 3))
.or_else(|| trimmed.find(": ").map(|pos| (pos, 2)));
if let Some((sep_pos, sep_len)) = parsed {
let raw_name = trimmed[..sep_pos].trim();
let param_name = raw_name.trim_matches('`');
let desc = trimmed[sep_pos + sep_len..].trim();
if !param_name.is_empty() && !desc.is_empty() {
map.insert(param_name.to_string(), desc.to_string());
}
}
}
}
map
}
pub(crate) fn rust_links_to_plain(doc: &str) -> String {
let mut result = String::with_capacity(doc.len());
let chars: Vec<char> = doc.chars().collect();
let mut i = 0;
while i < chars.len() {
if i + 1 < chars.len() && chars[i] == '[' && chars[i + 1] == '`' {
let start = i + 1; let mut j = start;
while j < chars.len() && chars[j] != ']' {
j += 1;
}
if j < chars.len() {
let text: String = chars[start..j].iter().collect();
if j + 1 < chars.len() && chars[j + 1] == '(' {
let mut k = j + 2;
while k < chars.len() && chars[k] != ')' {
k += 1;
}
if k < chars.len() {
result.push_str(&text);
i = k + 1;
continue;
}
} else {
result.push_str(&text);
i = j + 1;
continue;
}
}
}
result.push(chars[i]);
i += 1;
}
result
}
#[cfg(test)]
mod tests;