use std::collections::HashSet;
use std::path::Path;
#[derive(Debug, Clone, Copy)]
enum StripLang {
Rust,
Python,
Go,
JsTs,
JavaKotlinCSharp,
CppC,
}
fn detect_lang(path: &Path) -> Option<StripLang> {
match path.extension()?.to_str()? {
"rs" => Some(StripLang::Rust),
"py" | "pyi" => Some(StripLang::Python),
"go" => Some(StripLang::Go),
"js" | "jsx" | "ts" | "tsx" | "mjs" | "cjs" => Some(StripLang::JsTs),
"java" | "kt" | "kts" | "cs" | "scala" | "sc" => Some(StripLang::JavaKotlinCSharp),
"c" | "h" | "cpp" | "hpp" | "cc" | "cxx" => Some(StripLang::CppC),
_ => None,
}
}
pub(crate) fn strip_noise(
content: &str,
path: &Path,
def_range: Option<(u32, u32)>,
) -> HashSet<u32> {
let mut skip = HashSet::new();
let Some((range_start, range_end)) = def_range else {
return skip;
};
let Some(lang) = detect_lang(path) else {
return skip;
};
let lines: Vec<&str> = content.lines().collect();
let mut consecutive_blanks: u32 = 0;
for line_num in range_start..=range_end {
let idx = (line_num - 1) as usize;
let line = match lines.get(idx) {
Some(l) => *l,
None => break,
};
let trimmed = line.trim();
if trimmed.is_empty() {
consecutive_blanks += 1;
if consecutive_blanks >= 2 {
skip.insert(line_num);
}
continue;
}
consecutive_blanks = 0;
if is_debug_log(trimmed, lang) {
skip.insert(line_num);
continue;
}
if is_strippable_comment(trimmed, lang) {
skip.insert(line_num);
}
}
skip
}
fn is_debug_log(trimmed: &str, lang: StripLang) -> bool {
match lang {
StripLang::Rust => {
trimmed.starts_with("log::debug!")
|| trimmed.starts_with("log::trace!")
|| trimmed.starts_with("tracing::debug!")
|| trimmed.starts_with("tracing::trace!")
|| trimmed.starts_with("debug!(")
|| trimmed.starts_with("trace!(")
|| trimmed.starts_with("dbg!(")
}
StripLang::Python => {
trimmed.starts_with("logger.debug(")
|| trimmed.starts_with("logging.debug(")
|| trimmed.starts_with("print(")
|| trimmed.starts_with("pprint(")
|| trimmed.starts_with("pprint.pprint(")
}
StripLang::Go => {
trimmed.starts_with("log.Printf(")
|| trimmed.starts_with("log.Println(")
|| trimmed.starts_with("log.Print(")
|| trimmed.starts_with("fmt.Printf(")
|| trimmed.starts_with("fmt.Println(")
|| trimmed.starts_with("fmt.Print(")
}
StripLang::JsTs => {
trimmed.starts_with("console.log(")
|| trimmed.starts_with("console.debug(")
|| trimmed.starts_with("console.trace(")
}
StripLang::JavaKotlinCSharp => {
trimmed.starts_with("System.out.print")
|| trimmed.starts_with("logger.debug(")
|| trimmed.starts_with("log.debug(")
|| trimmed.starts_with("Log.d(")
|| trimmed.starts_with("println(") }
StripLang::CppC => {
trimmed.starts_with("printf(")
|| trimmed.starts_with("std::cout")
|| trimmed.starts_with("cout ")
|| trimmed.starts_with("cout<<")
}
}
}
const KEEP_MARKERS: &[&str] = &["TODO", "FIXME", "NOTE", "HACK", "SAFETY", "WARN"];
fn is_strippable_comment(trimmed: &str, lang: StripLang) -> bool {
let is_comment = match lang {
StripLang::Rust => {
if trimmed.starts_with("///")
|| trimmed.starts_with("//!")
|| trimmed.starts_with("/**")
|| trimmed.starts_with("#[doc")
{
return false;
}
trimmed.starts_with("//")
}
StripLang::Python => {
if trimmed.starts_with("\"\"\"") || trimmed.starts_with("'''") {
return false;
}
trimmed.starts_with('#')
}
StripLang::Go => trimmed.starts_with("//"),
StripLang::JsTs => {
if trimmed.starts_with("/**") || trimmed.starts_with("* ") || trimmed == "*/" {
return false;
}
trimmed.starts_with("//")
}
StripLang::JavaKotlinCSharp => {
if trimmed.starts_with("/**") || trimmed.starts_with("///") {
return false;
}
trimmed.starts_with("//")
}
StripLang::CppC => {
if trimmed.starts_with("/**")
|| trimmed.starts_with("///")
|| trimmed.starts_with("//!")
{
return false;
}
trimmed.starts_with("//")
}
};
if !is_comment {
return false;
}
let upper = trimmed.to_ascii_uppercase();
!KEEP_MARKERS.iter().any(|m| upper.contains(m))
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
fn path(ext: &str) -> PathBuf {
PathBuf::from(format!("test.{ext}"))
}
#[test]
fn consecutive_blanks_collapsed() {
let content = "fn foo() {\n let x = 1;\n\n\n\n let y = 2;\n}\n";
let skip = strip_noise(content, &path("rs"), Some((1, 6)));
assert!(!skip.contains(&3));
assert!(skip.contains(&4));
assert!(skip.contains(&5));
}
#[test]
fn rust_debug_log_stripped() {
let content = "fn foo() {\n debug!(\"hi\");\n dbg!(x);\n error!(\"bad\");\n}\n";
let skip = strip_noise(content, &path("rs"), Some((1, 5)));
assert!(skip.contains(&2)); assert!(skip.contains(&3)); assert!(!skip.contains(&4)); }
#[test]
fn js_console_log_stripped() {
let content = "function foo() {\n console.log('hi');\n console.error('bad');\n}\n";
let skip = strip_noise(content, &path("ts"), Some((1, 4)));
assert!(skip.contains(&2)); assert!(!skip.contains(&3)); }
#[test]
fn python_print_stripped() {
let content = "def foo():\n print(x)\n logger.error('bad')\n";
let skip = strip_noise(content, &path("py"), Some((1, 3)));
assert!(skip.contains(&2)); assert!(!skip.contains(&3)); }
#[test]
fn go_fmt_println_stripped() {
let content = "func foo() {\n\tfmt.Println(\"debug\")\n\tlog.Fatalf(\"fatal\")\n}\n";
let skip = strip_noise(content, &path("go"), Some((1, 4)));
assert!(skip.contains(&2)); assert!(!skip.contains(&3)); }
#[test]
fn comment_stripped_unless_marker() {
let content =
"fn foo() {\n // just a comment\n // TODO: fix this\n /// doc comment\n}\n";
let skip = strip_noise(content, &path("rs"), Some((1, 5)));
assert!(skip.contains(&2)); assert!(!skip.contains(&3)); assert!(!skip.contains(&4)); }
#[test]
fn no_range_returns_empty() {
let content = "fn foo() {}\n";
let skip = strip_noise(content, &path("rs"), None);
assert!(skip.is_empty());
}
#[test]
fn unsupported_lang_returns_empty() {
let content = "fn foo() {}\n";
let skip = strip_noise(content, &path("txt"), Some((1, 1)));
assert!(skip.is_empty());
}
#[test]
fn ruby_not_supported() {
let content = "def foo\n puts 'hi'\nend\n";
let skip = strip_noise(content, &path("rb"), Some((1, 3)));
assert!(skip.is_empty());
}
#[test]
fn jsdoc_continuation_preserved() {
let content = "function f() {\n /**\n * JSDoc line\n */\n // plain comment\n}\n";
let skip = strip_noise(content, &path("js"), Some((1, 6)));
assert!(!skip.contains(&2)); assert!(!skip.contains(&3)); assert!(!skip.contains(&4)); assert!(skip.contains(&5)); }
}