1use std::path::Path;
54
55use alint_core::{Context, Error, Level, PerFileRule, Result, Rule, RuleSpec, Scope, Violation};
56use serde::Deserialize;
57
58#[derive(Debug, Deserialize)]
59struct Options {
60 #[serde(default)]
64 language: Language,
65 #[serde(default = "default_min_lines")]
69 min_lines: usize,
70 #[serde(default = "default_threshold")]
73 threshold: f64,
74 #[serde(default = "default_skip_leading_lines")]
78 skip_leading_lines: usize,
79}
80
81fn default_min_lines() -> usize {
82 3
83}
84fn default_threshold() -> f64 {
85 0.5
86}
87fn default_skip_leading_lines() -> usize {
88 30
89}
90
91#[derive(Debug, Deserialize, Default, Clone, Copy, PartialEq, Eq)]
92#[serde(rename_all = "snake_case")]
93enum Language {
94 #[default]
95 Auto,
96 Rust,
97 Typescript,
98 Javascript,
99 Python,
100 Go,
101 Java,
102 C,
103 Cpp,
104 Ruby,
105 Shell,
106}
107
108impl Language {
109 fn resolve(self, path: &Path) -> Self {
112 if self != Self::Auto {
113 return self;
114 }
115 let ext = path
116 .extension()
117 .and_then(|s| s.to_str())
118 .unwrap_or("")
119 .to_ascii_lowercase();
120 match ext.as_str() {
121 "rs" => Self::Rust,
122 "ts" | "tsx" => Self::Typescript,
123 "js" | "jsx" | "mjs" | "cjs" => Self::Javascript,
124 "py" => Self::Python,
125 "go" => Self::Go,
126 "java" | "kt" | "kts" | "scala" => Self::Java,
127 "c" | "h" => Self::C,
128 "cc" | "cpp" | "cxx" | "hpp" | "hh" => Self::Cpp,
129 "rb" => Self::Ruby,
130 "sh" | "bash" | "zsh" | "fish" => Self::Shell,
131 _ => Self::Auto, }
133 }
134
135 fn line_markers(self) -> &'static [&'static str] {
138 match self {
139 Self::Rust
142 | Self::Typescript
143 | Self::Javascript
144 | Self::Go
145 | Self::Java
146 | Self::C
147 | Self::Cpp => &["//"],
148 Self::Python | Self::Shell | Self::Ruby => &["#"],
149 Self::Auto => &[],
150 }
151 }
152
153 fn doc_line_markers(self) -> &'static [&'static str] {
157 match self {
160 Self::Rust => &["///", "//!"],
161 _ => &[],
162 }
163 }
164
165 fn block_delim(self) -> Option<(&'static str, &'static str)> {
167 match self {
168 Self::Rust
169 | Self::Typescript
170 | Self::Javascript
171 | Self::Go
172 | Self::Java
173 | Self::C
174 | Self::Cpp => Some(("/*", "*/")),
175 _ => None,
176 }
177 }
178
179 fn doc_block_delim(self) -> Option<(&'static str, &'static str)> {
182 match self {
183 Self::Rust | Self::Typescript | Self::Javascript | Self::Java | Self::Cpp => {
185 Some(("/**", "*/"))
186 }
187 _ => None,
188 }
189 }
190}
191
192#[derive(Debug)]
193pub struct CommentedOutCodeRule {
194 id: String,
195 level: Level,
196 policy_url: Option<String>,
197 message: Option<String>,
198 scope: Scope,
199 language: Language,
200 min_lines: usize,
201 threshold: f64,
202 skip_leading_lines: usize,
203}
204
205impl Rule for CommentedOutCodeRule {
206 fn id(&self) -> &str {
207 &self.id
208 }
209 fn level(&self) -> Level {
210 self.level
211 }
212 fn policy_url(&self) -> Option<&str> {
213 self.policy_url.as_deref()
214 }
215 fn path_scope(&self) -> Option<&Scope> {
216 Some(&self.scope)
217 }
218
219 fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
220 let mut violations = Vec::new();
221 for entry in ctx.index.files() {
222 if !self.scope.matches(&entry.path) {
223 continue;
224 }
225 let full = ctx.root.join(&entry.path);
226 let Ok(bytes) = std::fs::read(&full) else {
227 continue;
228 };
229 violations.extend(self.evaluate_file(ctx, &entry.path, &bytes)?);
230 }
231 Ok(violations)
232 }
233
234 fn as_per_file(&self) -> Option<&dyn PerFileRule> {
235 Some(self)
236 }
237}
238
239impl PerFileRule for CommentedOutCodeRule {
240 fn path_scope(&self) -> &Scope {
241 &self.scope
242 }
243
244 fn evaluate_file(
245 &self,
246 _ctx: &Context<'_>,
247 path: &Path,
248 bytes: &[u8],
249 ) -> Result<Vec<Violation>> {
250 let lang = self.language.resolve(path);
251 if lang == Language::Auto {
252 return Ok(Vec::new()); }
254 let Ok(text) = std::str::from_utf8(bytes) else {
255 return Ok(Vec::new());
256 };
257 let mut violations = Vec::new();
258 for block in find_comment_blocks(text, lang) {
259 if block.lines.len() < self.min_lines {
260 continue;
261 }
262 if block.start_line <= self.skip_leading_lines {
263 continue;
264 }
265 if block.is_doc_comment {
266 continue;
267 }
268 let density = score_density(&block.content);
269 if density >= self.threshold {
270 let msg = self.message.clone().unwrap_or_else(|| {
271 format!(
272 "block of {} commented-out lines (density {:.2}); remove or convert to runtime-checked branch",
273 block.lines.len(),
274 density,
275 )
276 });
277 violations.push(
278 Violation::new(msg)
279 .with_path(std::sync::Arc::<Path>::from(path))
280 .with_location(block.start_line, 1),
281 );
282 }
283 }
284 Ok(violations)
285 }
286}
287
288pub fn build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
289 let Some(paths) = &spec.paths else {
290 return Err(Error::rule_config(
291 &spec.id,
292 "commented_out_code requires a `paths` field",
293 ));
294 };
295 let opts: Options = spec
296 .deserialize_options()
297 .map_err(|e| Error::rule_config(&spec.id, format!("invalid options: {e}")))?;
298 if opts.min_lines < 2 {
299 return Err(Error::rule_config(
300 &spec.id,
301 "commented_out_code `min_lines` must be ≥ 2",
302 ));
303 }
304 if !(0.0..=1.0).contains(&opts.threshold) {
305 return Err(Error::rule_config(
306 &spec.id,
307 "commented_out_code `threshold` must be between 0.0 and 1.0",
308 ));
309 }
310 Ok(Box::new(CommentedOutCodeRule {
311 id: spec.id.clone(),
312 level: spec.level,
313 policy_url: spec.policy_url.clone(),
314 message: spec.message.clone(),
315 scope: Scope::from_paths_spec(paths)?,
316 language: opts.language,
317 min_lines: opts.min_lines,
318 threshold: opts.threshold,
319 skip_leading_lines: opts.skip_leading_lines,
320 }))
321}
322
323#[derive(Debug)]
326struct CommentBlock {
327 start_line: usize,
328 lines: Vec<String>,
329 content: String,
332 is_doc_comment: bool,
335}
336
337fn find_comment_blocks(text: &str, lang: Language) -> Vec<CommentBlock> {
338 let mut blocks = Vec::new();
339 let line_markers = lang.line_markers();
340 let doc_line_markers = lang.doc_line_markers();
341 let block_delim = lang.block_delim();
342 let doc_block_delim = lang.doc_block_delim();
343
344 let lines: Vec<&str> = text.lines().collect();
345 let mut i = 0;
346 while i < lines.len() {
347 let line = lines[i];
348 let trimmed = line.trim_start();
349
350 if let Some((open, close)) = block_delim {
352 if trimmed.starts_with(open) {
353 let is_doc = doc_block_delim.is_some_and(|(d_open, _)| trimmed.starts_with(d_open));
354 let start_line = i + 1;
355 let mut block_lines = Vec::new();
356 let mut block_content = String::new();
357 let mut closed = false;
358 let mut j = i;
359 while j < lines.len() {
360 let l = lines[j];
361 block_lines.push(l.to_string());
362 let stripped = strip_block_comment_markers(l, open, close);
363 block_content.push_str(&stripped);
364 block_content.push('\n');
365 if l.contains(close) && (j > i || trimmed.matches(close).count() > 0) {
366 closed = true;
367 j += 1;
368 break;
369 }
370 j += 1;
371 }
372 if closed {
373 blocks.push(CommentBlock {
374 start_line,
375 lines: block_lines,
376 content: block_content,
377 is_doc_comment: is_doc,
378 });
379 }
380 i = j;
381 continue;
382 }
383 }
384
385 if line_markers.iter().any(|m| trimmed.starts_with(*m)) {
387 let start_line = i + 1;
388 let mut block_lines = Vec::new();
389 let mut block_content = String::new();
390 let mut all_doc = !doc_line_markers.is_empty();
391 let mut j = i;
392 while j < lines.len() {
393 let l = lines[j];
394 let lt = l.trim_start();
395 let Some(m) = line_markers.iter().find(|mk| lt.starts_with(*mk)).copied() else {
396 break;
397 };
398 let is_doc_line = doc_line_markers.iter().any(|d| {
399 lt.starts_with(d)
400 && (lt.len() == d.len()
401 || !lt[d.len()..].starts_with(m.chars().next().unwrap_or(' ')))
402 });
403 if !is_doc_line {
404 all_doc = false;
405 }
406 block_lines.push(l.to_string());
407 block_content.push_str(strip_line_marker(lt, m));
408 block_content.push('\n');
409 j += 1;
410 }
411 blocks.push(CommentBlock {
412 start_line,
413 lines: block_lines,
414 content: block_content,
415 is_doc_comment: all_doc,
416 });
417 i = j;
418 continue;
419 }
420
421 i += 1;
422 }
423 blocks
424}
425
426fn strip_line_marker<'a>(line: &'a str, marker: &str) -> &'a str {
427 let after = line.strip_prefix(marker).unwrap_or(line);
428 after.strip_prefix(' ').unwrap_or(after)
429}
430
431fn strip_block_comment_markers(line: &str, open: &str, close: &str) -> String {
432 let mut s = line.trim().to_string();
433 if let Some(rest) = s.strip_prefix(open) {
434 s = rest.to_string();
435 }
436 if let Some(rest) = s.strip_suffix(close) {
437 s = rest.to_string();
438 }
439 let trimmed = s.trim_start();
441 if let Some(rest) = trimmed.strip_prefix("* ") {
442 return rest.to_string();
443 }
444 if trimmed == "*" {
445 return String::new();
446 }
447 s
448}
449
450const STRONG_CODE_CHARS: &[char] = &[
461 '(', ')', '{', '}', '[', ']', ';', '=', '<', '>', '&', '|', '^',
462];
463
464const SATURATION_POINT: f64 = 0.20;
470
471fn score_density(content: &str) -> f64 {
482 let collapsed = drop_long_runs(content);
483 let nonws_count = collapsed.chars().filter(|c| !c.is_whitespace()).count();
484 if nonws_count == 0 {
485 return 0.0;
486 }
487 let strong_count = collapsed
488 .chars()
489 .filter(|c| STRONG_CODE_CHARS.contains(c))
490 .count();
491 #[allow(clippy::cast_precision_loss)]
492 let raw = strong_count as f64 / nonws_count as f64;
493 (raw / SATURATION_POINT).min(1.0)
494}
495
496fn drop_long_runs(s: &str) -> String {
501 let mut out = String::with_capacity(s.len());
502 let mut buf: Vec<char> = Vec::new();
503 let mut prev: Option<char> = None;
504 for ch in s.chars() {
505 if Some(ch) == prev {
506 buf.push(ch);
507 } else {
508 if buf.len() < 5 {
509 out.extend(buf.iter());
510 }
511 buf.clear();
512 buf.push(ch);
513 prev = Some(ch);
514 }
515 }
516 if buf.len() < 5 {
517 out.extend(buf.iter());
518 }
519 out
520}
521
522#[cfg(test)]
523mod tests {
524 use super::*;
525
526 #[test]
527 fn density_high_for_code_low_for_prose() {
528 let code = "let x = compute(y, z); if x > 0 { return x; }";
530 let d_code = score_density(code);
531 assert!(d_code > 0.5, "code density {d_code} should be > 0.5");
532
533 let prose = "This module parses RFC 9535 JSONPath expressions and resolves them.";
535 let d_prose = score_density(prose);
536 assert!(d_prose < 0.5, "prose density {d_prose} should be < 0.5");
537 }
538
539 #[test]
540 fn line_block_in_rust_detected_with_markers_stripped() {
541 let src = "fn main() {}\n// let x = compute(y);\n// if x > 0 { return x; }\n// log(\"unused\");\nfn other() {}";
542 let blocks = find_comment_blocks(src, Language::Rust);
543 assert_eq!(blocks.len(), 1);
544 let b = &blocks[0];
545 assert_eq!(b.lines.len(), 3);
546 assert_eq!(b.start_line, 2);
547 assert!(b.content.contains("let x = compute(y);"));
548 assert!(!b.is_doc_comment);
549 }
550
551 #[test]
552 fn rust_doc_line_comments_marked_as_doc() {
553 let src = "/// Documents the next item.\n/// More docs.\n/// Even more.\nfn foo() {}";
554 let blocks = find_comment_blocks(src, Language::Rust);
555 assert_eq!(blocks.len(), 1);
556 assert!(blocks[0].is_doc_comment, "/// block must be marked as doc");
557 }
558
559 #[test]
560 fn block_comment_javadoc_marked_as_doc() {
561 let src = "/**\n * Documented.\n * @param x foo\n */\nfunction bar() {}";
562 let blocks = find_comment_blocks(src, Language::Typescript);
563 assert!(!blocks.is_empty());
564 assert!(blocks[0].is_doc_comment, "/** … */ must be marked as doc");
565 }
566
567 #[test]
568 fn python_hash_block_detected() {
569 let src = "x = 1\n# old = compute(x)\n# if old > 0:\n# print(old)\nprint(x)";
570 let blocks = find_comment_blocks(src, Language::Python);
571 assert_eq!(blocks.len(), 1);
572 assert!(blocks[0].content.contains("old = compute(x)"));
573 }
574
575 #[test]
576 fn end_to_end_threshold_filters_prose() {
577 let prose_src = "fn foo() {}\n// This is a normal explanatory comment\n// describing what foo does.\n// Multiple lines of prose.";
579 let blocks = find_comment_blocks(prose_src, Language::Rust);
580 assert_eq!(blocks.len(), 1);
581 let d = score_density(&blocks[0].content);
582 assert!(d < 0.5, "prose comment density {d} should be < 0.5");
583
584 let code_src = "fn foo() {}\n// let x = compute(y);\n// if x > 0 { return x; }\n// log_metric(\"path-a\", x);";
586 let blocks = find_comment_blocks(code_src, Language::Rust);
587 assert_eq!(blocks.len(), 1);
588 let d = score_density(&blocks[0].content);
589 assert!(d >= 0.5, "code comment density {d} should be >= 0.5");
590 }
591
592 #[test]
593 fn banner_separators_dont_score_as_code() {
594 let banner = "// ============================================\n\
596 // Section Title\n\
597 // ============================================";
598 let blocks = find_comment_blocks(banner, Language::Rust);
599 assert_eq!(blocks.len(), 1);
600 let d = score_density(&blocks[0].content);
601 assert!(d < 0.5, "banner density {d} should be < 0.5");
602 }
603
604 #[test]
605 fn drop_long_runs_strips_banners() {
606 assert_eq!(drop_long_runs("foo ============= bar"), "foo bar");
607 assert_eq!(drop_long_runs("a==b"), "a==b"); assert_eq!(drop_long_runs("a===b"), "a===b"); assert_eq!(drop_long_runs("a====b"), "a====b"); assert_eq!(drop_long_runs("a=====b"), "ab"); }
612
613 #[test]
614 fn language_extension_resolution() {
615 let path = Path::new("foo.rs");
616 assert_eq!(Language::Auto.resolve(path), Language::Rust);
617 let path = Path::new("foo.py");
618 assert_eq!(Language::Auto.resolve(path), Language::Python);
619 let path = Path::new("foo.tsx");
620 assert_eq!(Language::Auto.resolve(path), Language::Typescript);
621 let path = Path::new("unknown");
622 assert_eq!(Language::Auto.resolve(path), Language::Auto);
623 }
624}