use super::sections::is_rust_fence_tag;
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum DocTarget {
PhpDoc,
JavaDoc,
TsDoc,
JsDoc,
CSharpDoc,
}
pub fn sanitize_rust_idioms(text: &str, target: DocTarget) -> String {
sanitize_rust_idioms_inner(text, target, true)
}
pub fn sanitize_rust_idioms_keep_sections(text: &str, target: DocTarget) -> String {
sanitize_rust_idioms_inner(text, target, false)
}
fn sanitize_rust_idioms_inner(text: &str, target: DocTarget, drop_csharp_sections: bool) -> String {
let mut out = String::with_capacity(text.len());
let mut in_rust_fence = false;
let mut in_other_fence = false;
let mut csharp_section_dropped = false;
for line in text.lines() {
if csharp_section_dropped {
continue;
}
let trimmed = line.trim_start();
if drop_csharp_sections
&& matches!(target, DocTarget::CSharpDoc)
&& !in_rust_fence
&& !in_other_fence
&& is_rustdoc_section_heading(trimmed)
{
csharp_section_dropped = true;
continue;
}
if let Some(rest) = trimmed.strip_prefix("```") {
if in_rust_fence {
in_rust_fence = false;
match target {
DocTarget::TsDoc
| DocTarget::JsDoc
| DocTarget::CSharpDoc
| DocTarget::PhpDoc
| DocTarget::JavaDoc => {
}
}
continue;
}
if in_other_fence {
in_other_fence = false;
out.push_str(line);
out.push('\n');
continue;
}
let lang = rest.split(',').next().unwrap_or("").trim();
let is_rust = is_rust_fence_tag(lang);
if is_rust {
in_rust_fence = true;
match target {
DocTarget::TsDoc
| DocTarget::JsDoc
| DocTarget::CSharpDoc
| DocTarget::PhpDoc
| DocTarget::JavaDoc => {
}
}
continue;
}
in_other_fence = true;
out.push_str(line);
out.push('\n');
continue;
}
if in_rust_fence {
match target {
DocTarget::TsDoc | DocTarget::JsDoc | DocTarget::CSharpDoc | DocTarget::PhpDoc | DocTarget::JavaDoc => {
}
}
continue;
}
if in_other_fence {
out.push_str(line);
out.push('\n');
continue;
}
let stripped_indent = line.trim_start();
if stripped_indent.starts_with("#[") && stripped_indent.ends_with(']') {
continue;
}
let sanitized = apply_prose_transforms(line, target);
out.push_str(&sanitized);
out.push('\n');
}
if out.ends_with('\n') && !text.ends_with('\n') {
out.pop();
}
if matches!(target, DocTarget::CSharpDoc) {
out = xml_escape_for_csharp(&out);
}
out
}
fn is_rustdoc_section_heading(trimmed: &str) -> bool {
let Some(rest) = trimmed.strip_prefix("# ") else {
return false;
};
let head = rest.trim().to_ascii_lowercase();
matches!(
head.as_str(),
"arguments" | "args" | "returns" | "errors" | "panics" | "safety" | "example" | "examples"
)
}
fn xml_escape_for_csharp(s: &str) -> String {
let mut out = String::with_capacity(s.len());
for ch in s.chars() {
match ch {
'&' => out.push_str("&"),
'<' => out.push_str("<"),
'>' => out.push_str(">"),
_ => out.push(ch),
}
}
out
}
fn apply_prose_transforms(line: &str, target: DocTarget) -> String {
let line = replace_intradoc_links(line, target);
let line = replace_path_separator(&line);
let line = strip_unwrap_expect(&line);
let segments = tokenize_backtick_spans(&line);
let mut result = String::with_capacity(line.len());
for (is_code, span) in segments {
if is_code {
result.push('`');
result.push_str(span);
result.push('`');
} else {
result.push_str(&transform_prose_segment(span, target));
}
}
result
}
fn tokenize_backtick_spans(line: &str) -> Vec<(bool, &str)> {
let mut segments = Vec::new();
let bytes = line.as_bytes();
let mut start = 0;
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'`' {
if i > start {
segments.push((false, &line[start..i]));
}
let code_start = i + 1;
let close = bytes[code_start..].iter().position(|&b| b == b'`');
if let Some(offset) = close {
let code_end = code_start + offset;
segments.push((true, &line[code_start..code_end]));
i = code_end + 1;
start = i;
} else {
segments.push((false, &line[i..]));
start = line.len();
i = line.len();
}
} else {
i += 1;
}
}
if start < line.len() {
segments.push((false, &line[start..]));
}
segments
}
fn transform_prose_segment(text: &str, target: DocTarget) -> String {
let mut s = text.to_string();
s = strip_inline_attributes(&s);
s = s.replace("pub fn ", "");
s = s.replace("crate::", "");
s = s.replace("&mut self", "");
s = s.replace("&self", "");
s = strip_lifetime_and_bounds(&s);
s = replace_type_wrappers(&s, target);
s = replace_some_calls(&s);
s = replace_some_keyword_in_prose(&s);
s = replace_none_keyword(&s, target);
s
}
#[inline]
fn advance_char(s: &str, out: &mut String, i: usize) -> usize {
let ch = s[i..].chars().next().expect("valid UTF-8 position");
out.push(ch);
i + ch.len_utf8()
}
fn replace_intradoc_links(s: &str, _target: DocTarget) -> String {
let mut out = String::with_capacity(s.len());
let bytes = s.as_bytes();
let mut i = 0;
while i < bytes.len() {
if i + 1 < bytes.len() && bytes[i] == b'[' && bytes[i + 1] == b'`' {
let search_start = i + 2;
let mut found = false;
let mut j = search_start;
while j + 1 < bytes.len() {
if bytes[j] == b'`' && bytes[j + 1] == b']' {
let inner = &s[search_start..j];
let converted = inner.replace("::", ".");
out.push('`');
out.push_str(&converted);
out.push('`');
i = j + 2;
found = true;
break;
}
j += 1;
}
if !found {
i = advance_char(s, &mut out, i);
}
} else {
i = advance_char(s, &mut out, i);
}
}
out
}
fn strip_inline_attributes(s: &str) -> String {
let mut out = String::with_capacity(s.len());
let bytes = s.as_bytes();
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'#' && i + 1 < bytes.len() && bytes[i + 1] == b'[' {
let mut depth = 0usize;
let mut j = i + 1;
while j < bytes.len() {
if bytes[j] == b'[' {
depth += 1;
} else if bytes[j] == b']' {
depth -= 1;
if depth == 0 {
i = j + 1;
break;
}
}
j += 1;
}
if depth != 0 {
i = advance_char(s, &mut out, i);
}
} else {
i = advance_char(s, &mut out, i);
}
}
out
}
fn strip_lifetime_and_bounds(s: &str) -> String {
let mut out = s.to_string();
out = regex_replace_all(&out, r"Send\s*\+\s*Sync", "");
out = regex_replace_all(&out, r"Sync\s*\+\s*Send", "");
out = regex_replace_word_boundary(&out, "Send", "");
out = regex_replace_word_boundary(&out, "Sync", "");
out = regex_replace_all(&out, r"'\s*static\b", "");
out
}
fn regex_replace_all(s: &str, pattern: &str, replacement: &str) -> String {
match pattern {
r"Send\s*\+\s*Sync" => replace_with_optional_spaces(s, "Send", "+", "Sync", replacement),
r"Sync\s*\+\s*Send" => replace_with_optional_spaces(s, "Sync", "+", "Send", replacement),
r"'\s*static\b" => replace_static_lifetime(s, replacement),
_ => s.replace(pattern, replacement),
}
}
fn regex_replace_word_boundary(s: &str, keyword: &str, replacement: &str) -> String {
let mut out = String::with_capacity(s.len());
let klen = keyword.len();
let bytes = s.as_bytes();
let kbytes = keyword.as_bytes();
if klen == 0 || klen > bytes.len() {
return s.to_string();
}
let mut i = 0;
while i + klen <= bytes.len() {
if &bytes[i..i + klen] == kbytes {
let before_ok = i == 0 || !bytes[i - 1].is_ascii_alphanumeric() && bytes[i - 1] != b'_';
let after_ok =
i + klen >= bytes.len() || !bytes[i + klen].is_ascii_alphanumeric() && bytes[i + klen] != b'_';
if before_ok && after_ok {
out.push_str(replacement);
i += klen;
continue;
}
}
i = advance_char(s, &mut out, i);
}
if i < bytes.len() {
out.push_str(&s[i..]);
}
out
}
fn replace_with_optional_spaces(s: &str, a: &str, op: &str, b: &str, replacement: &str) -> String {
let mut out = String::with_capacity(s.len());
let mut i = 0;
let chars: Vec<char> = s.chars().collect();
let total = chars.len();
while i < total {
let a_chars: Vec<char> = a.chars().collect();
let b_chars: Vec<char> = b.chars().collect();
let op_chars: Vec<char> = op.chars().collect();
if chars[i..].starts_with(&a_chars) {
let mut j = i + a_chars.len();
while j < total && chars[j] == ' ' {
j += 1;
}
if j + op_chars.len() <= total && chars[j..].starts_with(&op_chars) {
let mut k = j + op_chars.len();
while k < total && chars[k] == ' ' {
k += 1;
}
if k + b_chars.len() <= total && chars[k..].starts_with(&b_chars) {
out.push_str(replacement);
i = k + b_chars.len();
continue;
}
}
}
out.push(chars[i]);
i += 1;
}
out
}
fn replace_static_lifetime(s: &str, replacement: &str) -> String {
let mut out = String::with_capacity(s.len());
let bytes = s.as_bytes();
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'\'' {
let mut j = i + 1;
while j < bytes.len() && bytes[j] == b' ' {
j += 1;
}
let keyword = b"static";
if bytes[j..].starts_with(keyword) {
let end = j + keyword.len();
let after_ok = end >= bytes.len() || !bytes[end].is_ascii_alphanumeric() && bytes[end] != b'_';
if after_ok {
out.push_str(replacement);
i = end;
continue;
}
}
}
i = advance_char(s, &mut out, i);
}
out
}
fn replace_type_wrappers(s: &str, target: DocTarget) -> String {
let mut out = s.to_string();
let vec_u8_replacement = match target {
DocTarget::PhpDoc => "string",
DocTarget::JavaDoc => "byte[]",
DocTarget::TsDoc | DocTarget::JsDoc => "Uint8Array",
DocTarget::CSharpDoc => "byte[]",
};
out = replace_generic1(&out, "Vec", "u8", vec_u8_replacement);
let map_replacement_fn = |k: &str, v: &str| match target {
DocTarget::PhpDoc => format!("array<{k}, {v}>"),
DocTarget::JavaDoc => format!("Map<{k}, {v}>"),
DocTarget::TsDoc | DocTarget::JsDoc => format!("Record<{k}, {v}>"),
DocTarget::CSharpDoc => format!("Dictionary<{k}, {v}>"),
};
out = replace_generic2(&out, "HashMap", &map_replacement_fn);
out = replace_generic1_passthrough(&out, "Vec", |inner| format!("{inner}[]"));
let option_replacement_fn = |inner: &str| match target {
DocTarget::PhpDoc => format!("{inner}?"),
DocTarget::JavaDoc => format!("{inner} | null"),
DocTarget::TsDoc | DocTarget::JsDoc => format!("{inner} | undefined"),
DocTarget::CSharpDoc => format!("{inner}?"),
};
out = replace_generic1_passthrough(&out, "Option", option_replacement_fn);
if matches!(target, DocTarget::CSharpDoc) {
out = replace_generic2(&out, "Result", &|t: &str, _e: &str| t.to_string());
}
for wrapper in &["Arc", "Box", "Mutex", "RwLock", "Rc", "Cell", "RefCell"] {
out = replace_generic1_passthrough(&out, wrapper, |inner| inner.to_string());
}
out
}
fn replace_generic1(s: &str, name: &str, arg: &str, replacement: &str) -> String {
let pattern = format!("{name}<{arg}>");
s.replace(&pattern, replacement)
}
fn replace_generic1_passthrough<F>(s: &str, name: &str, f: F) -> String
where
F: Fn(&str) -> String,
{
let mut out = String::with_capacity(s.len());
let mut i = 0;
let prefix = format!("{name}<");
let pbytes = prefix.as_bytes();
let bytes = s.as_bytes();
while i < bytes.len() {
if bytes[i..].starts_with(pbytes) {
let before_ok = i == 0 || !bytes[i - 1].is_ascii_alphanumeric() && bytes[i - 1] != b'_';
if before_ok {
let inner_start = i + pbytes.len();
let mut depth = 1usize;
let mut j = inner_start;
while j < bytes.len() {
match bytes[j] {
b'<' => depth += 1,
b'>' => {
depth -= 1;
if depth == 0 {
break;
}
}
_ => {}
}
j += 1;
}
if depth == 0 && j < bytes.len() {
let inner = &s[inner_start..j];
out.push_str(&f(inner));
i = j + 1;
continue;
}
}
}
i = advance_char(s, &mut out, i);
}
out
}
fn replace_generic2<F>(s: &str, name: &str, f: &F) -> String
where
F: Fn(&str, &str) -> String,
{
let mut out = String::with_capacity(s.len());
let mut i = 0;
let prefix = format!("{name}<");
let pbytes = prefix.as_bytes();
let bytes = s.as_bytes();
while i < bytes.len() {
if bytes[i..].starts_with(pbytes) {
let before_ok = i == 0 || !bytes[i - 1].is_ascii_alphanumeric() && bytes[i - 1] != b'_';
if before_ok {
let inner_start = i + pbytes.len();
let mut depth = 1usize;
let mut j = inner_start;
while j < bytes.len() {
match bytes[j] {
b'<' => depth += 1,
b'>' => {
depth -= 1;
if depth == 0 {
break;
}
}
_ => {}
}
j += 1;
}
if depth == 0 && j < bytes.len() {
let inner = &s[inner_start..j];
let split = split_on_comma_at_top_level(inner);
if let Some((k, v)) = split {
out.push_str(&f(k.trim(), v.trim()));
i = j + 1;
continue;
}
}
}
}
i = advance_char(s, &mut out, i);
}
out
}
fn split_on_comma_at_top_level(s: &str) -> Option<(&str, &str)> {
let mut depth = 0i32;
for (idx, ch) in s.char_indices() {
match ch {
'<' => depth += 1,
'>' => depth -= 1,
',' if depth == 0 => return Some((&s[..idx], &s[idx + 1..])),
_ => {}
}
}
None
}
fn replace_some_calls(s: &str) -> String {
let mut out = String::with_capacity(s.len());
let bytes = s.as_bytes();
let prefix = b"Some(";
let mut i = 0;
while i < bytes.len() {
if bytes[i..].starts_with(prefix) {
let before_ok = i == 0 || !bytes[i - 1].is_ascii_alphanumeric() && bytes[i - 1] != b'_';
if before_ok {
let arg_start = i + prefix.len();
let mut depth = 1usize;
let mut j = arg_start;
while j < bytes.len() {
match bytes[j] {
b'(' => depth += 1,
b')' => {
depth -= 1;
if depth == 0 {
break;
}
}
_ => {}
}
j += 1;
}
if depth == 0 && j < bytes.len() {
let arg = &s[arg_start..j];
out.push_str("the value (");
out.push_str(arg);
out.push(')');
i = j + 1;
continue;
}
}
}
i = advance_char(s, &mut out, i);
}
out
}
fn replace_some_keyword_in_prose(s: &str) -> String {
let keyword = b"Some ";
let klen = keyword.len();
let bytes = s.as_bytes();
if klen >= bytes.len() {
return s.to_string();
}
let mut out = String::with_capacity(s.len());
let mut i = 0;
while i + klen < bytes.len() {
if &bytes[i..i + klen] == keyword {
let before_ok = i == 0 || !bytes[i - 1].is_ascii_alphanumeric() && bytes[i - 1] != b'_';
let after_ok = bytes[i + klen].is_ascii_lowercase();
if before_ok && after_ok {
i += klen;
continue;
}
}
i = advance_char(s, &mut out, i);
}
if i < bytes.len() {
out.push_str(&s[i..]);
}
out
}
fn replace_none_keyword(s: &str, target: DocTarget) -> String {
let replacement = match target {
DocTarget::PhpDoc | DocTarget::JavaDoc | DocTarget::CSharpDoc => "null",
DocTarget::TsDoc | DocTarget::JsDoc => "undefined",
};
let keyword = b"None";
let klen = keyword.len();
let mut out = String::with_capacity(s.len());
let bytes = s.as_bytes();
if klen > bytes.len() {
return s.to_string();
}
let mut i = 0;
while i + klen <= bytes.len() {
if &bytes[i..i + klen] == keyword {
let before_ok = i == 0 || !bytes[i - 1].is_ascii_alphanumeric() && bytes[i - 1] != b'_';
let after_ok =
i + klen >= bytes.len() || !bytes[i + klen].is_ascii_alphanumeric() && bytes[i + klen] != b'_';
if before_ok && after_ok {
out.push_str(replacement);
i += klen;
continue;
}
}
i = advance_char(s, &mut out, i);
}
if i < bytes.len() {
out.push_str(&s[i..]);
}
out
}
fn replace_path_separator(s: &str) -> String {
let mut out = String::with_capacity(s.len());
let bytes = s.as_bytes();
let mut i = 0;
while i < bytes.len() {
if i + 1 < bytes.len() && bytes[i] == b':' && bytes[i + 1] == b':' {
let before_ok = i > 0 && (bytes[i - 1].is_ascii_alphanumeric() || bytes[i - 1] == b'_');
let after_ok = i + 2 < bytes.len() && (bytes[i + 2].is_ascii_alphanumeric() || bytes[i + 2] == b'_');
if before_ok || after_ok {
out.push('.');
i += 2;
continue;
}
}
i = advance_char(s, &mut out, i);
}
out
}
fn strip_unwrap_expect(s: &str) -> String {
let mut out = String::with_capacity(s.len());
let bytes = s.as_bytes();
let mut i = 0;
while i < bytes.len() {
if bytes[i..].starts_with(b".unwrap()") {
i += b".unwrap()".len();
continue;
}
if bytes[i..].starts_with(b".expect(") {
let arg_start = i + b".expect(".len();
let mut depth = 1usize;
let mut j = arg_start;
while j < bytes.len() {
match bytes[j] {
b'(' => depth += 1,
b')' => {
depth -= 1;
if depth == 0 {
break;
}
}
_ => {}
}
j += 1;
}
if depth == 0 {
i = j + 1;
continue;
}
}
i = advance_char(s, &mut out, i);
}
out
}