mod blocks;
mod markdown;
mod predicates;
mod reformat;
mod reparse;
mod spacing;
pub(super) use blocks::*;
pub(super) use markdown::*;
pub(super) use predicates::*;
pub(super) use reformat::*;
pub(super) use reparse::*;
pub(super) use spacing::*;
use super::{pretty_print, should_unicode_escape};
pub(super) fn normalize_doc_comment(text: &str) -> String {
if text.trim().is_empty() {
return " ".to_string();
}
let mut result = String::with_capacity(text.len() + 16);
if !text.contains('\n') {
result.push_str(text.trim_end());
result.push('\n');
return result;
}
let text = if text.starts_with('\n') && !text.starts_with("\n\n") {
let rest = &text[1..];
if !rest.is_empty() && !rest.starts_with('\n') && !rest.trim().is_empty() {
if rest.starts_with(" ") {
std::borrow::Cow::Owned(format!("\n\n{}", rest))
} else {
std::borrow::Cow::Owned(format!(" {}", rest))
}
} else {
std::borrow::Cow::Borrowed(text)
}
} else {
std::borrow::Cow::Borrowed(text)
};
let lines: Vec<&str> = text.split('\n').collect();
let mut i = 0;
while i < lines.len() {
let line = lines[i];
let is_heading = (line.starts_with("# ")
|| line.starts_with("## ")
|| line.starts_with("### ")
|| line.starts_with("#### ")
|| line.starts_with("##### ")
|| line.starts_with("###### "))
&& !line.starts_with(" ");
if is_heading {
if !result.is_empty() && !result.ends_with("\n\n\n") {
if result.ends_with("\n\n") {
result.push('\n');
} else if result.ends_with('\n') {
result.push_str("\n\n");
}
}
}
result.push_str(line);
if i + 1 < lines.len() {
result.push('\n');
if is_heading {
if i + 1 < lines.len() && !lines[i + 1].trim().is_empty() {
result.push('\n');
}
}
}
i += 1;
}
let trimmed_result = result.trim_end_matches('\n');
let has_multiple_paragraphs = trimmed_result.contains("\n\n");
if has_multiple_paragraphs {
if !result.ends_with("\n\n") {
if result.ends_with('\n') {
result.push('\n');
} else {
result.push_str("\n\n");
}
}
} else {
while result.ends_with("\n\n") {
result.pop();
}
if !result.ends_with('\n') {
result.push('\n');
}
}
result
}
fn horizontal_rule_line_end(bytes: &[u8], start: usize) -> Option<usize> {
let mut p = start;
let mut star_count = 0;
while p < bytes.len() && bytes[p] != b'\n' {
match bytes[p] {
b'*' => star_count += 1,
b' ' | b'\t' => {}
_ => return None,
}
p += 1;
}
if star_count >= 3 { Some(p) } else { None }
}
fn ends_with_blank_line(result: &str) -> bool {
result.is_empty() || result.ends_with("\n\n")
}
pub(super) fn normalize_emphasis(text: &str) -> String {
let bytes = text.as_bytes();
let len = bytes.len();
let mut result = String::with_capacity(len);
let mut i = 0;
let mut at_line_start = true;
let mut line_indent = 0u32;
let mut in_docs_line = false;
let mut prev_line_blank = false;
let mut current_line_has_content = false;
let mut in_indented_code_block = false;
#[inline]
fn push_utf8_char(text: &str, pos: usize, result: &mut String) -> usize {
let Some(ch) = text[pos..].chars().next() else {
unreachable!("callers only invoke this with pos < text.len()")
};
result.push(ch);
ch.len_utf8()
}
while i < len {
let ch = bytes[i];
if ch > 127 {
at_line_start = false;
i += push_utf8_char(text, i, &mut result);
continue;
}
if ch == b'\n' {
result.push('\n');
i += 1;
prev_line_blank = !current_line_has_content;
at_line_start = true;
line_indent = 0;
in_docs_line = false;
current_line_has_content = false;
continue;
}
if at_line_start {
if ch == b' ' {
line_indent += 1;
result.push(' ');
i += 1;
continue;
}
at_line_start = false;
current_line_has_content = true;
if line_indent >= 4 {
if prev_line_blank || in_indented_code_block {
in_indented_code_block = true;
}
} else {
in_indented_code_block = false;
}
if text[i..].starts_with("@docs") {
in_docs_line = true;
}
if !in_indented_code_block && ch == b'*' && i + 1 < len && bytes[i + 1] == b' ' {
if let Some(line_end) = horizontal_rule_line_end(bytes, i) {
if !ends_with_blank_line(&result) {
result.push('\n');
}
result.push_str("---");
let mut after = line_end;
if after < len && bytes[after] == b'\n' {
after += 1;
}
let next_blank = after >= len || bytes[after] == b'\n';
if !next_blank {
result.push('\n');
}
i = line_end;
prev_line_blank = true;
current_line_has_content = false;
continue;
}
result.push('-');
i += 1;
continue;
}
}
if in_docs_line {
result.push(ch as char);
i += 1;
continue;
}
if in_indented_code_block {
result.push(ch as char);
i += 1;
continue;
}
if ch == b'`' {
let bt_start = i;
let mut bt_count = 0;
while i + bt_count < len && bytes[i + bt_count] == b'`' {
bt_count += 1;
}
if bt_count >= 3 && (bt_start == 0 || bytes[bt_start - 1] == b'\n') {
let mut pos = bt_start;
while pos < len && bytes[pos] != b'\n' {
pos += 1;
}
result.push_str(&text[bt_start..pos]);
i = pos;
if i < len && bytes[i] == b'\n' {
result.push('\n');
i += 1;
}
while i < len {
let line_start = i;
let mut bc = 0;
while i + bc < len && bytes[i + bc] == b'`' {
bc += 1;
}
if bc >= bt_count {
let mut j = i + bc;
let mut rest_ws = true;
while j < len && bytes[j] != b'\n' {
if bytes[j] != b' ' && bytes[j] != b'\t' {
rest_ws = false;
break;
}
j += 1;
}
if rest_ws {
result.push_str(&text[line_start..j]);
i = j;
break;
}
}
while i < len {
if bytes[i] > 127 {
let Some(ch) = text[i..].chars().next() else {
unreachable!("while i < len above")
};
result.push(ch);
i += ch.len_utf8();
} else {
result.push(bytes[i] as char);
i += 1;
}
if i > 0 && bytes[i - 1] == b'\n' {
break;
}
}
}
continue;
}
let after_open = bt_start + bt_count;
let mut found_close = false;
let mut close_start = after_open;
let mut search_end = after_open;
while search_end < len {
if bytes[search_end] == b'\n' {
let nls = search_end + 1;
if nls >= len {
search_end = len;
break;
}
let mut ws = nls;
while ws < len && bytes[ws] == b' ' {
ws += 1;
}
if ws >= len || bytes[ws] == b'\n' {
break;
}
}
search_end += 1;
}
while close_start < search_end {
if bytes[close_start] == b'`' {
let mut cc = 0;
while close_start + cc < len && bytes[close_start + cc] == b'`' {
cc += 1;
}
if cc == bt_count {
found_close = true;
result.push_str(&text[bt_start..close_start + cc]);
i = close_start + cc;
break;
}
close_start += cc;
} else {
close_start += 1;
}
}
if !found_close {
for _ in 0..bt_count {
result.push('\\');
result.push('`');
}
i = bt_start + bt_count;
}
continue;
}
if ch == b'*' {
if i + 1 < len && bytes[i + 1] == b'*' {
result.push('*');
result.push('*');
i += 2;
while i < len {
if bytes[i] == b'*' && i + 1 < len && bytes[i + 1] == b'*' {
result.push('*');
result.push('*');
i += 2;
break;
}
if bytes[i] == b'\n' {
result.push('\n');
i += 1;
at_line_start = true;
line_indent = 0;
break;
}
if bytes[i] > 127 {
i += push_utf8_char(text, i, &mut result);
} else {
result.push(bytes[i] as char);
i += 1;
}
}
continue;
}
if i + 1 < len && bytes[i + 1] != b' ' && bytes[i + 1] != b'\n' {
let start = i + 1;
let mut end = start;
let mut found = false;
while end < len {
if bytes[end] == b'*' && end > start && bytes[end - 1] != b' ' {
found = true;
break;
}
if bytes[end] == b'\n' {
break; }
end += 1;
}
if found {
result.push('_');
result.push_str(&text[start..end]);
result.push('_');
i = end + 1;
continue;
}
}
if i > 0 && bytes[i - 1] == b'\\' {
result.push('*');
} else {
result.push('\\');
result.push('*');
}
i += 1;
} else {
result.push(ch as char);
i += 1;
}
}
result
}
pub(super) fn normalize_empty_link_refs(text: &str) -> String {
text.replace("][]", "]")
}
pub(super) fn normalize_doc_char_literals(text: &str) -> String {
let lines: Vec<&str> = text.split('\n').collect();
let mut out: Vec<String> = Vec::with_capacity(lines.len());
for line in &lines {
if line.starts_with(" ") {
out.push(normalize_char_literals_in_code_line(line));
} else {
out.push((*line).to_string());
}
}
out.join("\n")
}
pub(super) fn normalize_char_literals_in_code_line(line: &str) -> String {
let chars: Vec<char> = line.chars().collect();
let mut out = String::with_capacity(line.len());
let mut i = 0;
while i < chars.len() {
if chars[i] == '\'' && i + 3 < chars.len() {
if chars[i + 1] == '\\' && chars[i + 2] == '"' && chars[i + 3] == '\'' {
out.push('\'');
out.push('"');
out.push('\'');
i += 4;
continue;
}
if chars[i + 1] == '\\'
&& chars[i + 2] == 'u'
&& i + 3 < chars.len()
&& chars[i + 3] == '{'
{
let mut j = i + 4;
while j < chars.len() && chars[j] != '}' {
j += 1;
}
if j < chars.len() && j + 1 < chars.len() && chars[j + 1] == '\'' {
let hex: String = chars[i + 4..j].iter().collect();
if let Ok(code) = u32::from_str_radix(&hex, 16)
&& let Some(ch) = char::from_u32(code)
&& !ch.is_control()
&& !should_unicode_escape(ch)
{
out.push('\'');
out.push(ch);
out.push('\'');
i = j + 2;
continue;
}
}
}
}
if chars[i] == '"' {
let start = i;
let mut j = i + 1;
let mut buf = String::new();
buf.push('"');
while j < chars.len() {
let c = chars[j];
if c == '\\' && j + 1 < chars.len() {
let nx = chars[j + 1];
if nx == 'u' && j + 2 < chars.len() && chars[j + 2] == '{' {
let mut k = j + 3;
while k < chars.len() && chars[k] != '}' {
k += 1;
}
if k < chars.len() {
let hex: String = chars[j + 3..k].iter().collect();
if let Ok(code) = u32::from_str_radix(&hex, 16)
&& let Some(ch) = char::from_u32(code)
&& !ch.is_control()
&& !should_unicode_escape(ch)
&& ch != '"'
&& ch != '\\'
{
buf.push(ch);
j = k + 1;
continue;
}
}
buf.push(c);
buf.push(nx);
j += 2;
continue;
}
buf.push(c);
buf.push(nx);
j += 2;
continue;
}
buf.push(c);
if c == '"' {
j += 1;
break;
}
j += 1;
}
if buf.ends_with('"') && buf.len() > 1 {
out.push_str(&buf);
i = j;
continue;
}
out.push(chars[start]);
i += 1;
continue;
}
out.push(chars[i]);
i += 1;
}
out
}
pub(super) fn collapse_blank_lines_in_doc(text: &str) -> String {
if text.trim().is_empty() {
return text.to_string();
}
let lines: Vec<&str> = text.split('\n').collect();
let is_link_ref = |line: &str| -> bool {
let t = line.trim_start();
if !t.starts_with('[') {
return false;
}
if let Some(close) = t.find(']') {
let after = &t[close + 1..];
after.starts_with(": ") || after.starts_with(":\t")
} else {
false
}
};
let mut out: Vec<String> = Vec::with_capacity(lines.len());
let mut i = 0;
while i < lines.len() {
let line = lines[i];
if line.trim().is_empty() {
let mut j = i;
while j < lines.len() && lines[j].trim().is_empty() {
j += 1;
}
let run = j - i;
let next_is_link_ref = j < lines.len() && is_link_ref(lines[j]);
let emit = if next_is_link_ref && run > 1 { 1 } else { run };
for _ in 0..emit {
out.push(String::new());
}
i = j;
} else {
out.push(line.to_string());
i += 1;
}
}
out.join("\n")
}
pub(super) fn ensure_blank_before_docs_after_prose(text: &str) -> String {
let lines: Vec<&str> = text.split('\n').collect();
let mut out: Vec<String> = Vec::with_capacity(lines.len() + 4);
for (idx, line) in lines.iter().enumerate() {
let trimmed = line.trim_start();
if trimmed.starts_with("@docs") && idx > 0 {
let prev = lines[idx - 1];
let prev_trimmed = prev.trim_start();
let needs_blank = !prev.trim().is_empty()
&& !prev_trimmed.starts_with("@docs")
&& !prev_trimmed.starts_with('#')
&& !prev_trimmed.starts_with("- ")
&& !prev_trimmed.starts_with("* ")
&& !prev_trimmed.starts_with("\\* ")
&& !prev.starts_with(" ")
&& !prev_trimmed.starts_with("```");
if needs_blank {
out.push(String::new());
}
}
out.push((*line).to_string());
}
out.join("\n")
}
pub(super) fn normalize_docs_lines(text: &str) -> String {
let mut result = String::with_capacity(text.len());
let lines: Vec<&str> = text.split('\n').collect();
let mut i = 0;
while i < lines.len() {
let line = lines[i];
let trimmed = line.trim();
if let Some(rest) = trimmed.strip_prefix("@docs") {
let leading_ws: &str = &line[..line.len() - line.trim_start().len()];
let base_names: Vec<String> = rest
.split(',')
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty())
.collect();
let has_trailing_comma = rest.trim_end().ends_with(',');
result.push_str(leading_ws);
result.push_str("@docs ");
result.push_str(&base_names.join(", "));
if has_trailing_comma {
while i + 1 < lines.len() {
let next = lines[i + 1].trim();
if next.is_empty() || next.starts_with('@') || next.starts_with('#') {
break;
}
i += 1;
let cont_names: Vec<String> = next
.split(',')
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty())
.collect();
let cont_trailing = next.ends_with(',');
result.push('\n');
result.push_str(leading_ws);
result.push_str("@docs ");
result.push_str(&cont_names.join(", "));
if !cont_trailing {
break;
}
}
}
} else {
result.push_str(line);
}
if i + 1 < lines.len() {
result.push('\n');
}
i += 1;
}
result
}
pub(super) fn strip_paragraph_leading_whitespace(text: &str) -> String {
let lines: Vec<&str> = text.split('\n').collect();
let mut result = String::with_capacity(text.len());
let mut in_code_block = false;
for (i, &line) in lines.iter().enumerate() {
if i > 0 {
result.push('\n');
}
if line.starts_with(" ") {
if i == 0 || lines[i - 1].trim().is_empty() {
in_code_block = true;
}
} else if !line.trim().is_empty() {
in_code_block = false;
}
if in_code_block || line.trim().is_empty() {
result.push_str(line);
continue;
}
if i == 0 {
result.push_str(line);
continue;
}
let trimmed = line.trim_start();
if trimmed.starts_with("- ")
|| trimmed.starts_with("@docs")
|| trimmed.starts_with('#')
|| strip_ordered_list_prefix(trimmed).is_some()
{
result.push_str(line);
continue;
}
if line.starts_with(' ') && line.len() > 1 && !line.as_bytes()[1].is_ascii_whitespace() {
result.push_str(&line[1..]);
} else {
result.push_str(line);
}
}
result
}
pub(super) fn collapse_prose_internal_spaces(text: &str) -> String {
let lines: Vec<&str> = text.split('\n').collect();
let mut result = String::with_capacity(text.len());
let mut in_code_block = false;
let mut in_fenced_block = false;
for (i, &line) in lines.iter().enumerate() {
if i > 0 {
result.push('\n');
}
let trimmed_all = line.trim();
if trimmed_all.starts_with("```") {
if in_fenced_block {
in_fenced_block = false;
result.push_str(line);
continue;
} else {
in_fenced_block = true;
result.push_str(line);
continue;
}
}
if in_fenced_block {
result.push_str(line);
continue;
}
if line.starts_with(" ") {
if i == 0 || lines[i - 1].trim().is_empty() {
in_code_block = true;
}
} else if !line.trim().is_empty() {
in_code_block = false;
}
if in_code_block || line.trim().is_empty() {
result.push_str(line);
continue;
}
if line.trim_start().starts_with('>') {
result.push_str(line);
continue;
}
if strip_ordered_list_prefix(line.trim_start()).is_some() {
result.push_str(line);
continue;
}
let leading_len = line.len() - line.trim_start().len();
result.push_str(&line[..leading_len]);
let rest = &line[leading_len..];
let bytes = rest.as_bytes();
let mut j = 0;
let mut in_code_span = false;
while j < bytes.len() {
let b = bytes[j];
if b == b'`' {
in_code_span = !in_code_span;
result.push('`');
j += 1;
continue;
}
if !in_code_span && b == b' ' {
result.push(' ');
j += 1;
while j < bytes.len() && bytes[j] == b' ' {
j += 1;
}
continue;
}
if b < 128 {
result.push(b as char);
j += 1;
} else {
let ch_start = j;
j += 1;
while j < bytes.len() && (bytes[j] & 0b1100_0000) == 0b1000_0000 {
j += 1;
}
result.push_str(&rest[ch_start..j]);
}
}
}
result
}
pub(super) fn ensure_blank_before_code_block_with_trailing_comment(text: &str) -> String {
let lines: Vec<&str> = text.split('\n').collect();
let mut block_needs_extra_blank: Vec<bool> = vec![false; lines.len()];
let mut i = 0;
while i < lines.len() {
let line = lines[i];
let starts_code = line.starts_with(" ")
&& !line.trim().is_empty()
&& (i == 0 || lines[i - 1].trim().is_empty());
if !starts_code {
i += 1;
continue;
}
let block_start = i;
let mut block_end = i;
while block_end + 1 < lines.len() {
let next = lines[block_end + 1];
if next.trim().is_empty() {
block_end += 1;
continue;
}
if next.starts_with(" ") {
block_end += 1;
continue;
}
break;
}
let mut last_non_blank = block_end;
while last_non_blank > block_start && lines[last_non_blank].trim().is_empty() {
last_non_blank -= 1;
}
let last_line = lines[last_non_blank];
let last_leading = last_line.len() - last_line.trim_start().len();
let ends_with_comment = last_leading == 4 && last_line.trim_start().starts_with("--");
let starts_with_import = lines[block_start].trim_start().starts_with("import ");
let mut all_decls = true;
let mut saw_any_decl = false;
if ends_with_comment && !starts_with_import {
#[allow(clippy::needless_range_loop)]
for idx in block_start..=last_non_blank {
let line = lines[idx];
if line.trim().is_empty() {
continue;
}
let leading = line.len() - line.trim_start().len();
if leading != 4 {
continue;
}
let t = line.trim_start();
if t.starts_with("--") {
continue;
}
if looks_like_code_block_decl(t) {
saw_any_decl = true;
} else {
all_decls = false;
break;
}
}
}
if ends_with_comment && saw_any_decl && all_decls && !starts_with_import {
block_needs_extra_blank[block_start] = true;
}
i = block_end + 1;
}
let mut out: Vec<String> = Vec::with_capacity(lines.len() + 8);
for (i, line) in lines.iter().enumerate() {
if block_needs_extra_blank[i] && i >= 2 {
let prev = lines[i - 1];
let prev2 = lines[i - 2];
let prev_blank = prev.trim().is_empty();
let prev2_prose = !prev2.trim().is_empty()
&& !prev2.starts_with(' ')
&& !prev2.starts_with('\t')
&& !prev2.starts_with('#')
&& !prev2.starts_with("- ")
&& !prev2.starts_with("* ")
&& !prev2.starts_with("\\* ")
&& !prev2.starts_with("@docs ")
&& !prev2.starts_with("```");
if prev_blank && prev2_prose {
let n = out.len();
let already_double =
n >= 2 && out[n - 1].trim().is_empty() && out[n - 2].trim().is_empty();
if !already_double {
out.push(String::new());
}
}
}
out.push((*line).to_string());
}
out.join("\n")
}
pub(super) fn strip_trailing_whitespace_in_doc(text: &str) -> String {
let lines: Vec<&str> = text.split('\n').collect();
let mut result = String::with_capacity(text.len());
for (i, line) in lines.iter().enumerate() {
if i > 0 {
result.push('\n');
}
if i == lines.len() - 1 {
result.push_str(line);
} else {
result.push_str(line.trim_end());
}
}
result
}