#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct StripResult {
pub content: String,
pub stripped_headers: Vec<String>,
}
pub fn strip_empty_sections(content: &str) -> StripResult {
let lines: Vec<&str> = content.lines().collect();
let total = lines.len();
if total == 0 {
return StripResult::default();
}
let levels: Vec<u8> = lines.iter().map(|l| header_level(l)).collect();
let mut drop_ranges: Vec<(usize, usize)> = Vec::new();
let mut stripped_headers: Vec<String> = Vec::new();
let mut i = 0;
while i < total {
let lvl = levels[i];
if lvl != 0 {
let body_start = i + 1;
let mut body_end = total;
for (j, level) in levels.iter().enumerate().skip(body_start) {
if *level != 0 && *level <= lvl {
body_end = j;
break;
}
}
if body_is_empty(&lines[body_start..body_end]) {
drop_ranges.push((i, body_end));
stripped_headers.push(lines[i].to_string());
i = body_end;
continue;
}
}
i += 1;
}
if drop_ranges.is_empty() {
return StripResult {
content: content.to_string(),
stripped_headers,
};
}
let mut kept: Vec<&str> = Vec::with_capacity(total);
let mut cursor = 0;
for (start, end) in drop_ranges {
if cursor < start {
kept.extend_from_slice(&lines[cursor..start]);
}
cursor = end;
}
if cursor < total {
kept.extend_from_slice(&lines[cursor..total]);
}
let mut out = kept.join("\n");
if content.ends_with('\n') && !out.ends_with('\n') {
out.push('\n');
}
StripResult {
content: out,
stripped_headers,
}
}
fn header_level(line: &str) -> u8 {
let trimmed = line.trim_start();
if !trimmed.starts_with('#') {
return 0;
}
let hashes = trimmed.bytes().take_while(|b| *b == b'#').count();
if !(2..=6).contains(&hashes) {
return 0;
}
let rest = &trimmed[hashes..];
if !rest.starts_with(' ') && !rest.is_empty() {
return 0;
}
hashes as u8
}
fn body_is_empty(body: &[&str]) -> bool {
for line in body {
let trimmed = line.trim();
if trimmed.is_empty() {
continue;
}
if is_in_flight_marker(trimmed) {
return false;
}
if is_structural_only(trimmed) {
continue;
}
return false;
}
true
}
fn is_in_flight_marker(trimmed: &str) -> bool {
let lower = trimmed.to_ascii_lowercase();
for marker in ["tbd", "todo", "wip", "placeholder"] {
if let Some(pos) = lower.find(marker) {
let before_ok = pos == 0
|| !lower
.as_bytes()
.get(pos - 1)
.copied()
.map(|b| b.is_ascii_alphanumeric() || b == b'_')
.unwrap_or(false);
let after_idx = pos + marker.len();
let after_ok = after_idx >= lower.len()
|| !lower
.as_bytes()
.get(after_idx)
.copied()
.map(|b| b.is_ascii_alphanumeric() || b == b'_')
.unwrap_or(false);
if before_ok && after_ok {
return true;
}
}
}
false
}
fn is_structural_only(trimmed: &str) -> bool {
if is_horizontal_rule(trimmed) {
return true;
}
if is_table_separator(trimmed) {
return true;
}
if is_html_comment_only(trimmed) {
return true;
}
if trimmed.starts_with('>') && trimmed.len() < 40 {
return true;
}
false
}
fn is_horizontal_rule(trimmed: &str) -> bool {
let bytes = trimmed.as_bytes();
if bytes.len() < 3 {
return false;
}
let first = bytes[0];
if !matches!(first, b'-' | b'*' | b'_') {
return false;
}
bytes.iter().all(|b| *b == first || *b == b' ')
&& bytes.iter().filter(|b| **b == first).count() >= 3
}
fn is_table_separator(trimmed: &str) -> bool {
if !trimmed.starts_with('|') || !trimmed.ends_with('|') {
return false;
}
let inner = &trimmed[1..trimmed.len() - 1];
inner.split('|').all(|cell| {
let c = cell.trim();
!c.is_empty() && c.chars().all(|ch| matches!(ch, '-' | ':' | ' '))
})
}
fn is_html_comment_only(trimmed: &str) -> bool {
trimmed.starts_with("<!--") && trimmed.ends_with("-->")
}