1use std::path::Path;
54
55use alint_core::{Context, Error, Level, Result, Rule, RuleSpec, Scope, Violation};
56use serde::Deserialize;
57
58#[derive(Debug, Deserialize)]
59struct Options {
60 #[serde(default)]
64 language: Language,
65 #[serde(default = "default_min_lines")]
69 min_lines: usize,
70 #[serde(default = "default_threshold")]
73 threshold: f64,
74 #[serde(default = "default_skip_leading_lines")]
78 skip_leading_lines: usize,
79}
80
81fn default_min_lines() -> usize {
82 3
83}
84fn default_threshold() -> f64 {
85 0.5
86}
87fn default_skip_leading_lines() -> usize {
88 30
89}
90
91#[derive(Debug, Deserialize, Default, Clone, Copy, PartialEq, Eq)]
92#[serde(rename_all = "snake_case")]
93enum Language {
94 #[default]
95 Auto,
96 Rust,
97 Typescript,
98 Javascript,
99 Python,
100 Go,
101 Java,
102 C,
103 Cpp,
104 Ruby,
105 Shell,
106}
107
108impl Language {
109 fn resolve(self, path: &Path) -> Self {
112 if self != Self::Auto {
113 return self;
114 }
115 let ext = path
116 .extension()
117 .and_then(|s| s.to_str())
118 .unwrap_or("")
119 .to_ascii_lowercase();
120 match ext.as_str() {
121 "rs" => Self::Rust,
122 "ts" | "tsx" => Self::Typescript,
123 "js" | "jsx" | "mjs" | "cjs" => Self::Javascript,
124 "py" => Self::Python,
125 "go" => Self::Go,
126 "java" | "kt" | "kts" | "scala" => Self::Java,
127 "c" | "h" => Self::C,
128 "cc" | "cpp" | "cxx" | "hpp" | "hh" => Self::Cpp,
129 "rb" => Self::Ruby,
130 "sh" | "bash" | "zsh" | "fish" => Self::Shell,
131 _ => Self::Auto, }
133 }
134
135 fn line_markers(self) -> &'static [&'static str] {
138 match self {
139 Self::Rust
142 | Self::Typescript
143 | Self::Javascript
144 | Self::Go
145 | Self::Java
146 | Self::C
147 | Self::Cpp => &["//"],
148 Self::Python | Self::Shell | Self::Ruby => &["#"],
149 Self::Auto => &[],
150 }
151 }
152
153 fn doc_line_markers(self) -> &'static [&'static str] {
157 match self {
160 Self::Rust => &["///", "//!"],
161 _ => &[],
162 }
163 }
164
165 fn block_delim(self) -> Option<(&'static str, &'static str)> {
167 match self {
168 Self::Rust
169 | Self::Typescript
170 | Self::Javascript
171 | Self::Go
172 | Self::Java
173 | Self::C
174 | Self::Cpp => Some(("/*", "*/")),
175 _ => None,
176 }
177 }
178
179 fn doc_block_delim(self) -> Option<(&'static str, &'static str)> {
182 match self {
183 Self::Rust | Self::Typescript | Self::Javascript | Self::Java | Self::Cpp => {
185 Some(("/**", "*/"))
186 }
187 _ => None,
188 }
189 }
190}
191
192#[derive(Debug)]
193pub struct CommentedOutCodeRule {
194 id: String,
195 level: Level,
196 policy_url: Option<String>,
197 message: Option<String>,
198 scope: Scope,
199 language: Language,
200 min_lines: usize,
201 threshold: f64,
202 skip_leading_lines: usize,
203}
204
205impl Rule for CommentedOutCodeRule {
206 fn id(&self) -> &str {
207 &self.id
208 }
209 fn level(&self) -> Level {
210 self.level
211 }
212 fn policy_url(&self) -> Option<&str> {
213 self.policy_url.as_deref()
214 }
215 fn path_scope(&self) -> Option<&Scope> {
216 Some(&self.scope)
217 }
218
219 fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
220 let mut violations = Vec::new();
221 for entry in ctx.index.files() {
222 if !self.scope.matches(&entry.path) {
223 continue;
224 }
225 let lang = self.language.resolve(&entry.path);
226 if lang == Language::Auto {
227 continue; }
229 let full = ctx.root.join(&entry.path);
230 let Ok(bytes) = std::fs::read(&full) else {
231 continue;
232 };
233 let Ok(text) = std::str::from_utf8(&bytes) else {
234 continue;
235 };
236 for block in find_comment_blocks(text, lang) {
237 if block.lines.len() < self.min_lines {
238 continue;
239 }
240 if block.start_line <= self.skip_leading_lines {
241 continue;
242 }
243 if block.is_doc_comment {
244 continue;
245 }
246 let density = score_density(&block.content);
247 if density >= self.threshold {
248 let msg = self.message.clone().unwrap_or_else(|| {
249 format!(
250 "block of {} commented-out lines (density {:.2}); remove or convert to runtime-checked branch",
251 block.lines.len(),
252 density,
253 )
254 });
255 violations.push(
256 Violation::new(msg)
257 .with_path(entry.path.clone())
258 .with_location(block.start_line, 1),
259 );
260 }
261 }
262 }
263 Ok(violations)
264 }
265}
266
267pub fn build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
268 let Some(paths) = &spec.paths else {
269 return Err(Error::rule_config(
270 &spec.id,
271 "commented_out_code requires a `paths` field",
272 ));
273 };
274 let opts: Options = spec
275 .deserialize_options()
276 .map_err(|e| Error::rule_config(&spec.id, format!("invalid options: {e}")))?;
277 if opts.min_lines < 2 {
278 return Err(Error::rule_config(
279 &spec.id,
280 "commented_out_code `min_lines` must be ≥ 2",
281 ));
282 }
283 if !(0.0..=1.0).contains(&opts.threshold) {
284 return Err(Error::rule_config(
285 &spec.id,
286 "commented_out_code `threshold` must be between 0.0 and 1.0",
287 ));
288 }
289 Ok(Box::new(CommentedOutCodeRule {
290 id: spec.id.clone(),
291 level: spec.level,
292 policy_url: spec.policy_url.clone(),
293 message: spec.message.clone(),
294 scope: Scope::from_paths_spec(paths)?,
295 language: opts.language,
296 min_lines: opts.min_lines,
297 threshold: opts.threshold,
298 skip_leading_lines: opts.skip_leading_lines,
299 }))
300}
301
302#[derive(Debug)]
305struct CommentBlock {
306 start_line: usize,
307 lines: Vec<String>,
308 content: String,
311 is_doc_comment: bool,
314}
315
316fn find_comment_blocks(text: &str, lang: Language) -> Vec<CommentBlock> {
317 let mut blocks = Vec::new();
318 let line_markers = lang.line_markers();
319 let doc_line_markers = lang.doc_line_markers();
320 let block_delim = lang.block_delim();
321 let doc_block_delim = lang.doc_block_delim();
322
323 let lines: Vec<&str> = text.lines().collect();
324 let mut i = 0;
325 while i < lines.len() {
326 let line = lines[i];
327 let trimmed = line.trim_start();
328
329 if let Some((open, close)) = block_delim {
331 if trimmed.starts_with(open) {
332 let is_doc = doc_block_delim.is_some_and(|(d_open, _)| trimmed.starts_with(d_open));
333 let start_line = i + 1;
334 let mut block_lines = Vec::new();
335 let mut block_content = String::new();
336 let mut closed = false;
337 let mut j = i;
338 while j < lines.len() {
339 let l = lines[j];
340 block_lines.push(l.to_string());
341 let stripped = strip_block_comment_markers(l, open, close);
342 block_content.push_str(&stripped);
343 block_content.push('\n');
344 if l.contains(close) && (j > i || trimmed.matches(close).count() > 0) {
345 closed = true;
346 j += 1;
347 break;
348 }
349 j += 1;
350 }
351 if closed {
352 blocks.push(CommentBlock {
353 start_line,
354 lines: block_lines,
355 content: block_content,
356 is_doc_comment: is_doc,
357 });
358 }
359 i = j;
360 continue;
361 }
362 }
363
364 if line_markers.iter().any(|m| trimmed.starts_with(*m)) {
366 let start_line = i + 1;
367 let mut block_lines = Vec::new();
368 let mut block_content = String::new();
369 let mut all_doc = !doc_line_markers.is_empty();
370 let mut j = i;
371 while j < lines.len() {
372 let l = lines[j];
373 let lt = l.trim_start();
374 let Some(m) = line_markers.iter().find(|mk| lt.starts_with(*mk)).copied() else {
375 break;
376 };
377 let is_doc_line = doc_line_markers.iter().any(|d| {
378 lt.starts_with(d)
379 && (lt.len() == d.len()
380 || !lt[d.len()..].starts_with(m.chars().next().unwrap_or(' ')))
381 });
382 if !is_doc_line {
383 all_doc = false;
384 }
385 block_lines.push(l.to_string());
386 block_content.push_str(strip_line_marker(lt, m));
387 block_content.push('\n');
388 j += 1;
389 }
390 blocks.push(CommentBlock {
391 start_line,
392 lines: block_lines,
393 content: block_content,
394 is_doc_comment: all_doc,
395 });
396 i = j;
397 continue;
398 }
399
400 i += 1;
401 }
402 blocks
403}
404
405fn strip_line_marker<'a>(line: &'a str, marker: &str) -> &'a str {
406 let after = line.strip_prefix(marker).unwrap_or(line);
407 after.strip_prefix(' ').unwrap_or(after)
408}
409
410fn strip_block_comment_markers(line: &str, open: &str, close: &str) -> String {
411 let mut s = line.trim().to_string();
412 if let Some(rest) = s.strip_prefix(open) {
413 s = rest.to_string();
414 }
415 if let Some(rest) = s.strip_suffix(close) {
416 s = rest.to_string();
417 }
418 let trimmed = s.trim_start();
420 if let Some(rest) = trimmed.strip_prefix("* ") {
421 return rest.to_string();
422 }
423 if trimmed == "*" {
424 return String::new();
425 }
426 s
427}
428
429const STRONG_CODE_CHARS: &[char] = &[
440 '(', ')', '{', '}', '[', ']', ';', '=', '<', '>', '&', '|', '^',
441];
442
443const SATURATION_POINT: f64 = 0.20;
449
450fn score_density(content: &str) -> f64 {
461 let collapsed = drop_long_runs(content);
462 let nonws_count = collapsed.chars().filter(|c| !c.is_whitespace()).count();
463 if nonws_count == 0 {
464 return 0.0;
465 }
466 let strong_count = collapsed
467 .chars()
468 .filter(|c| STRONG_CODE_CHARS.contains(c))
469 .count();
470 #[allow(clippy::cast_precision_loss)]
471 let raw = strong_count as f64 / nonws_count as f64;
472 (raw / SATURATION_POINT).min(1.0)
473}
474
475fn drop_long_runs(s: &str) -> String {
480 let mut out = String::with_capacity(s.len());
481 let mut buf: Vec<char> = Vec::new();
482 let mut prev: Option<char> = None;
483 for ch in s.chars() {
484 if Some(ch) == prev {
485 buf.push(ch);
486 } else {
487 if buf.len() < 5 {
488 out.extend(buf.iter());
489 }
490 buf.clear();
491 buf.push(ch);
492 prev = Some(ch);
493 }
494 }
495 if buf.len() < 5 {
496 out.extend(buf.iter());
497 }
498 out
499}
500
501#[cfg(test)]
502mod tests {
503 use super::*;
504
505 #[test]
506 fn density_high_for_code_low_for_prose() {
507 let code = "let x = compute(y, z); if x > 0 { return x; }";
509 let d_code = score_density(code);
510 assert!(d_code > 0.5, "code density {d_code} should be > 0.5");
511
512 let prose = "This module parses RFC 9535 JSONPath expressions and resolves them.";
514 let d_prose = score_density(prose);
515 assert!(d_prose < 0.5, "prose density {d_prose} should be < 0.5");
516 }
517
518 #[test]
519 fn line_block_in_rust_detected_with_markers_stripped() {
520 let src = "fn main() {}\n// let x = compute(y);\n// if x > 0 { return x; }\n// log(\"unused\");\nfn other() {}";
521 let blocks = find_comment_blocks(src, Language::Rust);
522 assert_eq!(blocks.len(), 1);
523 let b = &blocks[0];
524 assert_eq!(b.lines.len(), 3);
525 assert_eq!(b.start_line, 2);
526 assert!(b.content.contains("let x = compute(y);"));
527 assert!(!b.is_doc_comment);
528 }
529
530 #[test]
531 fn rust_doc_line_comments_marked_as_doc() {
532 let src = "/// Documents the next item.\n/// More docs.\n/// Even more.\nfn foo() {}";
533 let blocks = find_comment_blocks(src, Language::Rust);
534 assert_eq!(blocks.len(), 1);
535 assert!(blocks[0].is_doc_comment, "/// block must be marked as doc");
536 }
537
538 #[test]
539 fn block_comment_javadoc_marked_as_doc() {
540 let src = "/**\n * Documented.\n * @param x foo\n */\nfunction bar() {}";
541 let blocks = find_comment_blocks(src, Language::Typescript);
542 assert!(!blocks.is_empty());
543 assert!(blocks[0].is_doc_comment, "/** … */ must be marked as doc");
544 }
545
546 #[test]
547 fn python_hash_block_detected() {
548 let src = "x = 1\n# old = compute(x)\n# if old > 0:\n# print(old)\nprint(x)";
549 let blocks = find_comment_blocks(src, Language::Python);
550 assert_eq!(blocks.len(), 1);
551 assert!(blocks[0].content.contains("old = compute(x)"));
552 }
553
554 #[test]
555 fn end_to_end_threshold_filters_prose() {
556 let prose_src = "fn foo() {}\n// This is a normal explanatory comment\n// describing what foo does.\n// Multiple lines of prose.";
558 let blocks = find_comment_blocks(prose_src, Language::Rust);
559 assert_eq!(blocks.len(), 1);
560 let d = score_density(&blocks[0].content);
561 assert!(d < 0.5, "prose comment density {d} should be < 0.5");
562
563 let code_src = "fn foo() {}\n// let x = compute(y);\n// if x > 0 { return x; }\n// log_metric(\"path-a\", x);";
565 let blocks = find_comment_blocks(code_src, Language::Rust);
566 assert_eq!(blocks.len(), 1);
567 let d = score_density(&blocks[0].content);
568 assert!(d >= 0.5, "code comment density {d} should be >= 0.5");
569 }
570
571 #[test]
572 fn banner_separators_dont_score_as_code() {
573 let banner = "// ============================================\n\
575 // Section Title\n\
576 // ============================================";
577 let blocks = find_comment_blocks(banner, Language::Rust);
578 assert_eq!(blocks.len(), 1);
579 let d = score_density(&blocks[0].content);
580 assert!(d < 0.5, "banner density {d} should be < 0.5");
581 }
582
583 #[test]
584 fn drop_long_runs_strips_banners() {
585 assert_eq!(drop_long_runs("foo ============= bar"), "foo bar");
586 assert_eq!(drop_long_runs("a==b"), "a==b"); assert_eq!(drop_long_runs("a===b"), "a===b"); assert_eq!(drop_long_runs("a====b"), "a====b"); assert_eq!(drop_long_runs("a=====b"), "ab"); }
591
592 #[test]
593 fn language_extension_resolution() {
594 let path = Path::new("foo.rs");
595 assert_eq!(Language::Auto.resolve(path), Language::Rust);
596 let path = Path::new("foo.py");
597 assert_eq!(Language::Auto.resolve(path), Language::Python);
598 let path = Path::new("foo.tsx");
599 assert_eq!(Language::Auto.resolve(path), Language::Typescript);
600 let path = Path::new("unknown");
601 assert_eq!(Language::Auto.resolve(path), Language::Auto);
602 }
603}