1use std::path::Path;
54
55use alint_core::{
56 Context, Error, Level, PerFileRule, Result, Rule, RuleSpec, Scope, ScopeFilter, Violation,
57};
58use serde::Deserialize;
59
60#[derive(Debug, Deserialize)]
61struct Options {
62 #[serde(default)]
66 language: Language,
67 #[serde(default = "default_min_lines")]
71 min_lines: usize,
72 #[serde(default = "default_threshold")]
75 threshold: f64,
76 #[serde(default = "default_skip_leading_lines")]
80 skip_leading_lines: usize,
81}
82
83fn default_min_lines() -> usize {
84 3
85}
86fn default_threshold() -> f64 {
87 0.5
88}
89fn default_skip_leading_lines() -> usize {
90 30
91}
92
93#[derive(Debug, Deserialize, Default, Clone, Copy, PartialEq, Eq)]
94#[serde(rename_all = "snake_case")]
95enum Language {
96 #[default]
97 Auto,
98 Rust,
99 Typescript,
100 Javascript,
101 Python,
102 Go,
103 Java,
104 C,
105 Cpp,
106 Ruby,
107 Shell,
108}
109
110impl Language {
111 fn resolve(self, path: &Path) -> Self {
114 if self != Self::Auto {
115 return self;
116 }
117 let ext = path
118 .extension()
119 .and_then(|s| s.to_str())
120 .unwrap_or("")
121 .to_ascii_lowercase();
122 match ext.as_str() {
123 "rs" => Self::Rust,
124 "ts" | "tsx" => Self::Typescript,
125 "js" | "jsx" | "mjs" | "cjs" => Self::Javascript,
126 "py" => Self::Python,
127 "go" => Self::Go,
128 "java" | "kt" | "kts" | "scala" => Self::Java,
129 "c" | "h" => Self::C,
130 "cc" | "cpp" | "cxx" | "hpp" | "hh" => Self::Cpp,
131 "rb" => Self::Ruby,
132 "sh" | "bash" | "zsh" | "fish" => Self::Shell,
133 _ => Self::Auto, }
135 }
136
137 fn line_markers(self) -> &'static [&'static str] {
140 match self {
141 Self::Rust
144 | Self::Typescript
145 | Self::Javascript
146 | Self::Go
147 | Self::Java
148 | Self::C
149 | Self::Cpp => &["//"],
150 Self::Python | Self::Shell | Self::Ruby => &["#"],
151 Self::Auto => &[],
152 }
153 }
154
155 fn doc_line_markers(self) -> &'static [&'static str] {
159 match self {
162 Self::Rust => &["///", "//!"],
163 _ => &[],
164 }
165 }
166
167 fn block_delim(self) -> Option<(&'static str, &'static str)> {
169 match self {
170 Self::Rust
171 | Self::Typescript
172 | Self::Javascript
173 | Self::Go
174 | Self::Java
175 | Self::C
176 | Self::Cpp => Some(("/*", "*/")),
177 _ => None,
178 }
179 }
180
181 fn doc_block_delim(self) -> Option<(&'static str, &'static str)> {
184 match self {
185 Self::Rust | Self::Typescript | Self::Javascript | Self::Java | Self::Cpp => {
187 Some(("/**", "*/"))
188 }
189 _ => None,
190 }
191 }
192}
193
194#[derive(Debug)]
195pub struct CommentedOutCodeRule {
196 id: String,
197 level: Level,
198 policy_url: Option<String>,
199 message: Option<String>,
200 scope: Scope,
201 scope_filter: Option<ScopeFilter>,
202 language: Language,
203 min_lines: usize,
204 threshold: f64,
205 skip_leading_lines: usize,
206}
207
208impl Rule for CommentedOutCodeRule {
209 fn id(&self) -> &str {
210 &self.id
211 }
212 fn level(&self) -> Level {
213 self.level
214 }
215 fn policy_url(&self) -> Option<&str> {
216 self.policy_url.as_deref()
217 }
218 fn path_scope(&self) -> Option<&Scope> {
219 Some(&self.scope)
220 }
221
222 fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
223 let mut violations = Vec::new();
224 for entry in ctx.index.files() {
225 if !self.scope.matches(&entry.path) {
226 continue;
227 }
228 if let Some(filter) = &self.scope_filter
229 && !filter.matches(&entry.path, ctx.index)
230 {
231 continue;
232 }
233 let full = ctx.root.join(&entry.path);
234 let Ok(bytes) = std::fs::read(&full) else {
235 continue;
236 };
237 violations.extend(self.evaluate_file(ctx, &entry.path, &bytes)?);
238 }
239 Ok(violations)
240 }
241
242 fn as_per_file(&self) -> Option<&dyn PerFileRule> {
243 Some(self)
244 }
245
246 fn scope_filter(&self) -> Option<&ScopeFilter> {
247 self.scope_filter.as_ref()
248 }
249}
250
251impl PerFileRule for CommentedOutCodeRule {
252 fn path_scope(&self) -> &Scope {
253 &self.scope
254 }
255
256 fn evaluate_file(
257 &self,
258 _ctx: &Context<'_>,
259 path: &Path,
260 bytes: &[u8],
261 ) -> Result<Vec<Violation>> {
262 let lang = self.language.resolve(path);
263 if lang == Language::Auto {
264 return Ok(Vec::new()); }
266 let Ok(text) = std::str::from_utf8(bytes) else {
267 return Ok(Vec::new());
268 };
269 let mut violations = Vec::new();
270 for block in find_comment_blocks(text, lang) {
271 if block.lines.len() < self.min_lines {
272 continue;
273 }
274 if block.start_line <= self.skip_leading_lines {
275 continue;
276 }
277 if block.is_doc_comment {
278 continue;
279 }
280 let density = score_density(&block.content);
281 if density >= self.threshold {
282 let msg = self.message.clone().unwrap_or_else(|| {
283 format!(
284 "block of {} commented-out lines (density {:.2}); remove or convert to runtime-checked branch",
285 block.lines.len(),
286 density,
287 )
288 });
289 violations.push(
290 Violation::new(msg)
291 .with_path(std::sync::Arc::<Path>::from(path))
292 .with_location(block.start_line, 1),
293 );
294 }
295 }
296 Ok(violations)
297 }
298}
299
300pub fn build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
301 let Some(paths) = &spec.paths else {
302 return Err(Error::rule_config(
303 &spec.id,
304 "commented_out_code requires a `paths` field",
305 ));
306 };
307 let opts: Options = spec
308 .deserialize_options()
309 .map_err(|e| Error::rule_config(&spec.id, format!("invalid options: {e}")))?;
310 if opts.min_lines < 2 {
311 return Err(Error::rule_config(
312 &spec.id,
313 "commented_out_code `min_lines` must be ≥ 2",
314 ));
315 }
316 if !(0.0..=1.0).contains(&opts.threshold) {
317 return Err(Error::rule_config(
318 &spec.id,
319 "commented_out_code `threshold` must be between 0.0 and 1.0",
320 ));
321 }
322 Ok(Box::new(CommentedOutCodeRule {
323 id: spec.id.clone(),
324 level: spec.level,
325 policy_url: spec.policy_url.clone(),
326 message: spec.message.clone(),
327 scope: Scope::from_paths_spec(paths)?,
328 scope_filter: spec.parse_scope_filter()?,
329 language: opts.language,
330 min_lines: opts.min_lines,
331 threshold: opts.threshold,
332 skip_leading_lines: opts.skip_leading_lines,
333 }))
334}
335
336#[derive(Debug)]
339struct CommentBlock {
340 start_line: usize,
341 lines: Vec<String>,
342 content: String,
345 is_doc_comment: bool,
348}
349
350fn find_comment_blocks(text: &str, lang: Language) -> Vec<CommentBlock> {
351 let mut blocks = Vec::new();
352 let line_markers = lang.line_markers();
353 let doc_line_markers = lang.doc_line_markers();
354 let block_delim = lang.block_delim();
355 let doc_block_delim = lang.doc_block_delim();
356
357 let lines: Vec<&str> = text.lines().collect();
358 let mut i = 0;
359 while i < lines.len() {
360 let line = lines[i];
361 let trimmed = line.trim_start();
362
363 if let Some((open, close)) = block_delim {
365 if trimmed.starts_with(open) {
366 let is_doc = doc_block_delim.is_some_and(|(d_open, _)| trimmed.starts_with(d_open));
367 let start_line = i + 1;
368 let mut block_lines = Vec::new();
369 let mut block_content = String::new();
370 let mut closed = false;
371 let mut j = i;
372 while j < lines.len() {
373 let l = lines[j];
374 block_lines.push(l.to_string());
375 let stripped = strip_block_comment_markers(l, open, close);
376 block_content.push_str(&stripped);
377 block_content.push('\n');
378 if l.contains(close) && (j > i || trimmed.matches(close).count() > 0) {
379 closed = true;
380 j += 1;
381 break;
382 }
383 j += 1;
384 }
385 if closed {
386 blocks.push(CommentBlock {
387 start_line,
388 lines: block_lines,
389 content: block_content,
390 is_doc_comment: is_doc,
391 });
392 }
393 i = j;
394 continue;
395 }
396 }
397
398 if line_markers.iter().any(|m| trimmed.starts_with(*m)) {
400 let start_line = i + 1;
401 let mut block_lines = Vec::new();
402 let mut block_content = String::new();
403 let mut all_doc = !doc_line_markers.is_empty();
404 let mut j = i;
405 while j < lines.len() {
406 let l = lines[j];
407 let lt = l.trim_start();
408 let Some(m) = line_markers.iter().find(|mk| lt.starts_with(*mk)).copied() else {
409 break;
410 };
411 let is_doc_line = doc_line_markers.iter().any(|d| {
412 lt.starts_with(d)
413 && (lt.len() == d.len()
414 || !lt[d.len()..].starts_with(m.chars().next().unwrap_or(' ')))
415 });
416 if !is_doc_line {
417 all_doc = false;
418 }
419 block_lines.push(l.to_string());
420 block_content.push_str(strip_line_marker(lt, m));
421 block_content.push('\n');
422 j += 1;
423 }
424 blocks.push(CommentBlock {
425 start_line,
426 lines: block_lines,
427 content: block_content,
428 is_doc_comment: all_doc,
429 });
430 i = j;
431 continue;
432 }
433
434 i += 1;
435 }
436 blocks
437}
438
439fn strip_line_marker<'a>(line: &'a str, marker: &str) -> &'a str {
440 let after = line.strip_prefix(marker).unwrap_or(line);
441 after.strip_prefix(' ').unwrap_or(after)
442}
443
444fn strip_block_comment_markers(line: &str, open: &str, close: &str) -> String {
445 let mut s = line.trim().to_string();
446 if let Some(rest) = s.strip_prefix(open) {
447 s = rest.to_string();
448 }
449 if let Some(rest) = s.strip_suffix(close) {
450 s = rest.to_string();
451 }
452 let trimmed = s.trim_start();
454 if let Some(rest) = trimmed.strip_prefix("* ") {
455 return rest.to_string();
456 }
457 if trimmed == "*" {
458 return String::new();
459 }
460 s
461}
462
463const STRONG_CODE_CHARS: &[char] = &[
474 '(', ')', '{', '}', '[', ']', ';', '=', '<', '>', '&', '|', '^',
475];
476
477const SATURATION_POINT: f64 = 0.20;
483
484fn score_density(content: &str) -> f64 {
495 let collapsed = drop_long_runs(content);
496 let nonws_count = collapsed.chars().filter(|c| !c.is_whitespace()).count();
497 if nonws_count == 0 {
498 return 0.0;
499 }
500 let strong_count = collapsed
501 .chars()
502 .filter(|c| STRONG_CODE_CHARS.contains(c))
503 .count();
504 #[allow(clippy::cast_precision_loss)]
505 let raw = strong_count as f64 / nonws_count as f64;
506 (raw / SATURATION_POINT).min(1.0)
507}
508
509fn drop_long_runs(s: &str) -> String {
514 let mut out = String::with_capacity(s.len());
515 let mut buf: Vec<char> = Vec::new();
516 let mut prev: Option<char> = None;
517 for ch in s.chars() {
518 if Some(ch) == prev {
519 buf.push(ch);
520 } else {
521 if buf.len() < 5 {
522 out.extend(buf.iter());
523 }
524 buf.clear();
525 buf.push(ch);
526 prev = Some(ch);
527 }
528 }
529 if buf.len() < 5 {
530 out.extend(buf.iter());
531 }
532 out
533}
534
535#[cfg(test)]
536mod tests {
537 use super::*;
538
539 #[test]
540 fn density_high_for_code_low_for_prose() {
541 let code = "let x = compute(y, z); if x > 0 { return x; }";
543 let d_code = score_density(code);
544 assert!(d_code > 0.5, "code density {d_code} should be > 0.5");
545
546 let prose = "This module parses RFC 9535 JSONPath expressions and resolves them.";
548 let d_prose = score_density(prose);
549 assert!(d_prose < 0.5, "prose density {d_prose} should be < 0.5");
550 }
551
552 #[test]
553 fn line_block_in_rust_detected_with_markers_stripped() {
554 let src = "fn main() {}\n// let x = compute(y);\n// if x > 0 { return x; }\n// log(\"unused\");\nfn other() {}";
555 let blocks = find_comment_blocks(src, Language::Rust);
556 assert_eq!(blocks.len(), 1);
557 let b = &blocks[0];
558 assert_eq!(b.lines.len(), 3);
559 assert_eq!(b.start_line, 2);
560 assert!(b.content.contains("let x = compute(y);"));
561 assert!(!b.is_doc_comment);
562 }
563
564 #[test]
565 fn rust_doc_line_comments_marked_as_doc() {
566 let src = "/// Documents the next item.\n/// More docs.\n/// Even more.\nfn foo() {}";
567 let blocks = find_comment_blocks(src, Language::Rust);
568 assert_eq!(blocks.len(), 1);
569 assert!(blocks[0].is_doc_comment, "/// block must be marked as doc");
570 }
571
572 #[test]
573 fn block_comment_javadoc_marked_as_doc() {
574 let src = "/**\n * Documented.\n * @param x foo\n */\nfunction bar() {}";
575 let blocks = find_comment_blocks(src, Language::Typescript);
576 assert!(!blocks.is_empty());
577 assert!(blocks[0].is_doc_comment, "/** … */ must be marked as doc");
578 }
579
580 #[test]
581 fn python_hash_block_detected() {
582 let src = "x = 1\n# old = compute(x)\n# if old > 0:\n# print(old)\nprint(x)";
583 let blocks = find_comment_blocks(src, Language::Python);
584 assert_eq!(blocks.len(), 1);
585 assert!(blocks[0].content.contains("old = compute(x)"));
586 }
587
588 #[test]
589 fn end_to_end_threshold_filters_prose() {
590 let prose_src = "fn foo() {}\n// This is a normal explanatory comment\n// describing what foo does.\n// Multiple lines of prose.";
592 let blocks = find_comment_blocks(prose_src, Language::Rust);
593 assert_eq!(blocks.len(), 1);
594 let d = score_density(&blocks[0].content);
595 assert!(d < 0.5, "prose comment density {d} should be < 0.5");
596
597 let code_src = "fn foo() {}\n// let x = compute(y);\n// if x > 0 { return x; }\n// log_metric(\"path-a\", x);";
599 let blocks = find_comment_blocks(code_src, Language::Rust);
600 assert_eq!(blocks.len(), 1);
601 let d = score_density(&blocks[0].content);
602 assert!(d >= 0.5, "code comment density {d} should be >= 0.5");
603 }
604
605 #[test]
606 fn banner_separators_dont_score_as_code() {
607 let banner = "// ============================================\n\
609 // Section Title\n\
610 // ============================================";
611 let blocks = find_comment_blocks(banner, Language::Rust);
612 assert_eq!(blocks.len(), 1);
613 let d = score_density(&blocks[0].content);
614 assert!(d < 0.5, "banner density {d} should be < 0.5");
615 }
616
617 #[test]
618 fn drop_long_runs_strips_banners() {
619 assert_eq!(drop_long_runs("foo ============= bar"), "foo bar");
620 assert_eq!(drop_long_runs("a==b"), "a==b"); assert_eq!(drop_long_runs("a===b"), "a===b"); assert_eq!(drop_long_runs("a====b"), "a====b"); assert_eq!(drop_long_runs("a=====b"), "ab"); }
625
626 #[test]
627 fn language_extension_resolution() {
628 let path = Path::new("foo.rs");
629 assert_eq!(Language::Auto.resolve(path), Language::Rust);
630 let path = Path::new("foo.py");
631 assert_eq!(Language::Auto.resolve(path), Language::Python);
632 let path = Path::new("foo.tsx");
633 assert_eq!(Language::Auto.resolve(path), Language::Typescript);
634 let path = Path::new("unknown");
635 assert_eq!(Language::Auto.resolve(path), Language::Auto);
636 }
637}