use std::path::Path;
use alint_core::{
Context, Error, Level, PerFileRule, Result, Rule, RuleSpec, Scope, ScopeFilter, Violation,
};
use serde::Deserialize;
#[derive(Debug, Deserialize)]
struct Options {
#[serde(default)]
language: Language,
#[serde(default = "default_min_lines")]
min_lines: usize,
#[serde(default = "default_threshold")]
threshold: f64,
#[serde(default = "default_skip_leading_lines")]
skip_leading_lines: usize,
}
fn default_min_lines() -> usize {
3
}
fn default_threshold() -> f64 {
0.5
}
fn default_skip_leading_lines() -> usize {
30
}
#[derive(Debug, Deserialize, Default, Clone, Copy, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
enum Language {
#[default]
Auto,
Rust,
Typescript,
Javascript,
Python,
Go,
Java,
C,
Cpp,
Ruby,
Shell,
}
impl Language {
fn resolve(self, path: &Path) -> Self {
if self != Self::Auto {
return self;
}
let ext = path
.extension()
.and_then(|s| s.to_str())
.unwrap_or("")
.to_ascii_lowercase();
match ext.as_str() {
"rs" => Self::Rust,
"ts" | "tsx" => Self::Typescript,
"js" | "jsx" | "mjs" | "cjs" => Self::Javascript,
"py" => Self::Python,
"go" => Self::Go,
"java" | "kt" | "kts" | "scala" => Self::Java,
"c" | "h" => Self::C,
"cc" | "cpp" | "cxx" | "hpp" | "hh" => Self::Cpp,
"rb" => Self::Ruby,
"sh" | "bash" | "zsh" | "fish" => Self::Shell,
_ => Self::Auto, }
}
fn line_markers(self) -> &'static [&'static str] {
match self {
Self::Rust
| Self::Typescript
| Self::Javascript
| Self::Go
| Self::Java
| Self::C
| Self::Cpp => &["//"],
Self::Python | Self::Shell | Self::Ruby => &["#"],
Self::Auto => &[],
}
}
fn doc_line_markers(self) -> &'static [&'static str] {
match self {
Self::Rust => &["///", "//!"],
_ => &[],
}
}
fn block_delim(self) -> Option<(&'static str, &'static str)> {
match self {
Self::Rust
| Self::Typescript
| Self::Javascript
| Self::Go
| Self::Java
| Self::C
| Self::Cpp => Some(("/*", "*/")),
_ => None,
}
}
fn doc_block_delim(self) -> Option<(&'static str, &'static str)> {
match self {
Self::Rust | Self::Typescript | Self::Javascript | Self::Java | Self::Cpp => {
Some(("/**", "*/"))
}
_ => None,
}
}
}
#[derive(Debug)]
pub struct CommentedOutCodeRule {
id: String,
level: Level,
policy_url: Option<String>,
message: Option<String>,
scope: Scope,
scope_filter: Option<ScopeFilter>,
language: Language,
min_lines: usize,
threshold: f64,
skip_leading_lines: usize,
}
impl Rule for CommentedOutCodeRule {
fn id(&self) -> &str {
&self.id
}
fn level(&self) -> Level {
self.level
}
fn policy_url(&self) -> Option<&str> {
self.policy_url.as_deref()
}
fn path_scope(&self) -> Option<&Scope> {
Some(&self.scope)
}
fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
let mut violations = Vec::new();
for entry in ctx.index.files() {
if !self.scope.matches(&entry.path) {
continue;
}
if let Some(filter) = &self.scope_filter
&& !filter.matches(&entry.path, ctx.index)
{
continue;
}
let full = ctx.root.join(&entry.path);
let Ok(bytes) = std::fs::read(&full) else {
continue;
};
violations.extend(self.evaluate_file(ctx, &entry.path, &bytes)?);
}
Ok(violations)
}
fn as_per_file(&self) -> Option<&dyn PerFileRule> {
Some(self)
}
fn scope_filter(&self) -> Option<&ScopeFilter> {
self.scope_filter.as_ref()
}
}
impl PerFileRule for CommentedOutCodeRule {
fn path_scope(&self) -> &Scope {
&self.scope
}
fn evaluate_file(
&self,
_ctx: &Context<'_>,
path: &Path,
bytes: &[u8],
) -> Result<Vec<Violation>> {
let lang = self.language.resolve(path);
if lang == Language::Auto {
return Ok(Vec::new()); }
let Ok(text) = std::str::from_utf8(bytes) else {
return Ok(Vec::new());
};
let mut violations = Vec::new();
for block in find_comment_blocks(text, lang) {
if block.lines.len() < self.min_lines {
continue;
}
if block.start_line <= self.skip_leading_lines {
continue;
}
if block.is_doc_comment {
continue;
}
let density = score_density(&block.content);
if density >= self.threshold {
let msg = self.message.clone().unwrap_or_else(|| {
format!(
"block of {} commented-out lines (density {:.2}); remove or convert to runtime-checked branch",
block.lines.len(),
density,
)
});
violations.push(
Violation::new(msg)
.with_path(std::sync::Arc::<Path>::from(path))
.with_location(block.start_line, 1),
);
}
}
Ok(violations)
}
}
pub fn build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
let Some(paths) = &spec.paths else {
return Err(Error::rule_config(
&spec.id,
"commented_out_code requires a `paths` field",
));
};
let opts: Options = spec
.deserialize_options()
.map_err(|e| Error::rule_config(&spec.id, format!("invalid options: {e}")))?;
if opts.min_lines < 2 {
return Err(Error::rule_config(
&spec.id,
"commented_out_code `min_lines` must be ≥ 2",
));
}
if !(0.0..=1.0).contains(&opts.threshold) {
return Err(Error::rule_config(
&spec.id,
"commented_out_code `threshold` must be between 0.0 and 1.0",
));
}
Ok(Box::new(CommentedOutCodeRule {
id: spec.id.clone(),
level: spec.level,
policy_url: spec.policy_url.clone(),
message: spec.message.clone(),
scope: Scope::from_paths_spec(paths)?,
scope_filter: spec.parse_scope_filter()?,
language: opts.language,
min_lines: opts.min_lines,
threshold: opts.threshold,
skip_leading_lines: opts.skip_leading_lines,
}))
}
#[derive(Debug)]
struct CommentBlock {
start_line: usize,
lines: Vec<String>,
content: String,
is_doc_comment: bool,
}
fn find_comment_blocks(text: &str, lang: Language) -> Vec<CommentBlock> {
let mut blocks = Vec::new();
let line_markers = lang.line_markers();
let doc_line_markers = lang.doc_line_markers();
let block_delim = lang.block_delim();
let doc_block_delim = lang.doc_block_delim();
let lines: Vec<&str> = text.lines().collect();
let mut i = 0;
while i < lines.len() {
let line = lines[i];
let trimmed = line.trim_start();
if let Some((open, close)) = block_delim {
if trimmed.starts_with(open) {
let is_doc = doc_block_delim.is_some_and(|(d_open, _)| trimmed.starts_with(d_open));
let start_line = i + 1;
let mut block_lines = Vec::new();
let mut block_content = String::new();
let mut closed = false;
let mut j = i;
while j < lines.len() {
let l = lines[j];
block_lines.push(l.to_string());
let stripped = strip_block_comment_markers(l, open, close);
block_content.push_str(&stripped);
block_content.push('\n');
if l.contains(close) && (j > i || trimmed.matches(close).count() > 0) {
closed = true;
j += 1;
break;
}
j += 1;
}
if closed {
blocks.push(CommentBlock {
start_line,
lines: block_lines,
content: block_content,
is_doc_comment: is_doc,
});
}
i = j;
continue;
}
}
if line_markers.iter().any(|m| trimmed.starts_with(*m)) {
let start_line = i + 1;
let mut block_lines = Vec::new();
let mut block_content = String::new();
let mut all_doc = !doc_line_markers.is_empty();
let mut j = i;
while j < lines.len() {
let l = lines[j];
let lt = l.trim_start();
let Some(m) = line_markers.iter().find(|mk| lt.starts_with(*mk)).copied() else {
break;
};
let is_doc_line = doc_line_markers.iter().any(|d| {
lt.starts_with(d)
&& (lt.len() == d.len()
|| !lt[d.len()..].starts_with(m.chars().next().unwrap_or(' ')))
});
if !is_doc_line {
all_doc = false;
}
block_lines.push(l.to_string());
block_content.push_str(strip_line_marker(lt, m));
block_content.push('\n');
j += 1;
}
blocks.push(CommentBlock {
start_line,
lines: block_lines,
content: block_content,
is_doc_comment: all_doc,
});
i = j;
continue;
}
i += 1;
}
blocks
}
fn strip_line_marker<'a>(line: &'a str, marker: &str) -> &'a str {
let after = line.strip_prefix(marker).unwrap_or(line);
after.strip_prefix(' ').unwrap_or(after)
}
fn strip_block_comment_markers(line: &str, open: &str, close: &str) -> String {
let mut s = line.trim().to_string();
if let Some(rest) = s.strip_prefix(open) {
s = rest.to_string();
}
if let Some(rest) = s.strip_suffix(close) {
s = rest.to_string();
}
let trimmed = s.trim_start();
if let Some(rest) = trimmed.strip_prefix("* ") {
return rest.to_string();
}
if trimmed == "*" {
return String::new();
}
s
}
const STRONG_CODE_CHARS: &[char] = &[
'(', ')', '{', '}', '[', ']', ';', '=', '<', '>', '&', '|', '^',
];
const SATURATION_POINT: f64 = 0.20;
fn score_density(content: &str) -> f64 {
let collapsed = drop_long_runs(content);
let nonws_count = collapsed.chars().filter(|c| !c.is_whitespace()).count();
if nonws_count == 0 {
return 0.0;
}
let strong_count = collapsed
.chars()
.filter(|c| STRONG_CODE_CHARS.contains(c))
.count();
#[allow(clippy::cast_precision_loss)]
let raw = strong_count as f64 / nonws_count as f64;
(raw / SATURATION_POINT).min(1.0)
}
fn drop_long_runs(s: &str) -> String {
let mut out = String::with_capacity(s.len());
let mut buf: Vec<char> = Vec::new();
let mut prev: Option<char> = None;
for ch in s.chars() {
if Some(ch) == prev {
buf.push(ch);
} else {
if buf.len() < 5 {
out.extend(buf.iter());
}
buf.clear();
buf.push(ch);
prev = Some(ch);
}
}
if buf.len() < 5 {
out.extend(buf.iter());
}
out
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn density_high_for_code_low_for_prose() {
let code = "let x = compute(y, z); if x > 0 { return x; }";
let d_code = score_density(code);
assert!(d_code > 0.5, "code density {d_code} should be > 0.5");
let prose = "This module parses RFC 9535 JSONPath expressions and resolves them.";
let d_prose = score_density(prose);
assert!(d_prose < 0.5, "prose density {d_prose} should be < 0.5");
}
#[test]
fn line_block_in_rust_detected_with_markers_stripped() {
let src = "fn main() {}\n// let x = compute(y);\n// if x > 0 { return x; }\n// log(\"unused\");\nfn other() {}";
let blocks = find_comment_blocks(src, Language::Rust);
assert_eq!(blocks.len(), 1);
let b = &blocks[0];
assert_eq!(b.lines.len(), 3);
assert_eq!(b.start_line, 2);
assert!(b.content.contains("let x = compute(y);"));
assert!(!b.is_doc_comment);
}
#[test]
fn rust_doc_line_comments_marked_as_doc() {
let src = "/// Documents the next item.\n/// More docs.\n/// Even more.\nfn foo() {}";
let blocks = find_comment_blocks(src, Language::Rust);
assert_eq!(blocks.len(), 1);
assert!(blocks[0].is_doc_comment, "/// block must be marked as doc");
}
#[test]
fn block_comment_javadoc_marked_as_doc() {
let src = "/**\n * Documented.\n * @param x foo\n */\nfunction bar() {}";
let blocks = find_comment_blocks(src, Language::Typescript);
assert!(!blocks.is_empty());
assert!(blocks[0].is_doc_comment, "/** … */ must be marked as doc");
}
#[test]
fn python_hash_block_detected() {
let src = "x = 1\n# old = compute(x)\n# if old > 0:\n# print(old)\nprint(x)";
let blocks = find_comment_blocks(src, Language::Python);
assert_eq!(blocks.len(), 1);
assert!(blocks[0].content.contains("old = compute(x)"));
}
#[test]
fn end_to_end_threshold_filters_prose() {
let prose_src = "fn foo() {}\n// This is a normal explanatory comment\n// describing what foo does.\n// Multiple lines of prose.";
let blocks = find_comment_blocks(prose_src, Language::Rust);
assert_eq!(blocks.len(), 1);
let d = score_density(&blocks[0].content);
assert!(d < 0.5, "prose comment density {d} should be < 0.5");
let code_src = "fn foo() {}\n// let x = compute(y);\n// if x > 0 { return x; }\n// log_metric(\"path-a\", x);";
let blocks = find_comment_blocks(code_src, Language::Rust);
assert_eq!(blocks.len(), 1);
let d = score_density(&blocks[0].content);
assert!(d >= 0.5, "code comment density {d} should be >= 0.5");
}
#[test]
fn banner_separators_dont_score_as_code() {
let banner = "// ============================================\n\
// Section Title\n\
// ============================================";
let blocks = find_comment_blocks(banner, Language::Rust);
assert_eq!(blocks.len(), 1);
let d = score_density(&blocks[0].content);
assert!(d < 0.5, "banner density {d} should be < 0.5");
}
#[test]
fn drop_long_runs_strips_banners() {
assert_eq!(drop_long_runs("foo ============= bar"), "foo bar");
assert_eq!(drop_long_runs("a==b"), "a==b"); assert_eq!(drop_long_runs("a===b"), "a===b"); assert_eq!(drop_long_runs("a====b"), "a====b"); assert_eq!(drop_long_runs("a=====b"), "ab"); }
#[test]
fn language_extension_resolution() {
let path = Path::new("foo.rs");
assert_eq!(Language::Auto.resolve(path), Language::Rust);
let path = Path::new("foo.py");
assert_eq!(Language::Auto.resolve(path), Language::Python);
let path = Path::new("foo.tsx");
assert_eq!(Language::Auto.resolve(path), Language::Typescript);
let path = Path::new("unknown");
assert_eq!(Language::Auto.resolve(path), Language::Auto);
}
}