1use std::path::Path;
54
55use alint_core::{Context, Error, Level, PerFileRule, Result, Rule, RuleSpec, Scope, Violation};
56use serde::Deserialize;
57
58#[derive(Debug, Deserialize)]
59#[serde(deny_unknown_fields)]
60struct Options {
61 #[serde(default)]
65 language: Language,
66 #[serde(default = "default_min_lines")]
70 min_lines: usize,
71 #[serde(default = "default_threshold")]
74 threshold: f64,
75 #[serde(default = "default_skip_leading_lines")]
79 skip_leading_lines: usize,
80}
81
82fn default_min_lines() -> usize {
83 3
84}
85fn default_threshold() -> f64 {
86 0.5
87}
88fn default_skip_leading_lines() -> usize {
89 30
90}
91
92#[derive(Debug, Deserialize, Default, Clone, Copy, PartialEq, Eq)]
93#[serde(rename_all = "snake_case")]
94enum Language {
95 #[default]
96 Auto,
97 Rust,
98 Typescript,
99 Javascript,
100 Python,
101 Go,
102 Java,
103 C,
104 Cpp,
105 Ruby,
106 Shell,
107}
108
109impl Language {
110 fn resolve(self, path: &Path) -> Self {
113 if self != Self::Auto {
114 return self;
115 }
116 let ext = path
117 .extension()
118 .and_then(|s| s.to_str())
119 .unwrap_or("")
120 .to_ascii_lowercase();
121 match ext.as_str() {
122 "rs" => Self::Rust,
123 "ts" | "tsx" => Self::Typescript,
124 "js" | "jsx" | "mjs" | "cjs" => Self::Javascript,
125 "py" => Self::Python,
126 "go" => Self::Go,
127 "java" | "kt" | "kts" | "scala" => Self::Java,
128 "c" | "h" => Self::C,
129 "cc" | "cpp" | "cxx" | "hpp" | "hh" => Self::Cpp,
130 "rb" => Self::Ruby,
131 "sh" | "bash" | "zsh" | "fish" => Self::Shell,
132 _ => Self::Auto, }
134 }
135
136 fn line_markers(self) -> &'static [&'static str] {
139 match self {
140 Self::Rust
143 | Self::Typescript
144 | Self::Javascript
145 | Self::Go
146 | Self::Java
147 | Self::C
148 | Self::Cpp => &["//"],
149 Self::Python | Self::Shell | Self::Ruby => &["#"],
150 Self::Auto => &[],
151 }
152 }
153
154 fn doc_line_markers(self) -> &'static [&'static str] {
158 match self {
161 Self::Rust => &["///", "//!"],
162 _ => &[],
163 }
164 }
165
166 fn block_delim(self) -> Option<(&'static str, &'static str)> {
168 match self {
169 Self::Rust
170 | Self::Typescript
171 | Self::Javascript
172 | Self::Go
173 | Self::Java
174 | Self::C
175 | Self::Cpp => Some(("/*", "*/")),
176 _ => None,
177 }
178 }
179
180 fn doc_block_delim(self) -> Option<(&'static str, &'static str)> {
183 match self {
184 Self::Rust | Self::Typescript | Self::Javascript | Self::Java | Self::Cpp => {
186 Some(("/**", "*/"))
187 }
188 _ => None,
189 }
190 }
191}
192
193#[derive(Debug)]
194pub struct CommentedOutCodeRule {
195 id: String,
196 level: Level,
197 policy_url: Option<String>,
198 message: Option<String>,
199 scope: Scope,
200 language: Language,
201 min_lines: usize,
202 threshold: f64,
203 skip_leading_lines: usize,
204}
205
206impl Rule for CommentedOutCodeRule {
207 fn id(&self) -> &str {
208 &self.id
209 }
210 fn level(&self) -> Level {
211 self.level
212 }
213 fn policy_url(&self) -> Option<&str> {
214 self.policy_url.as_deref()
215 }
216 fn path_scope(&self) -> Option<&Scope> {
217 Some(&self.scope)
218 }
219
220 fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
221 let mut violations = Vec::new();
222 for entry in ctx.index.files() {
223 if !self.scope.matches(&entry.path, ctx.index) {
224 continue;
225 }
226 let full = ctx.root.join(&entry.path);
227 let Ok(bytes) = std::fs::read(&full) else {
228 continue;
229 };
230 violations.extend(self.evaluate_file(ctx, &entry.path, &bytes)?);
231 }
232 Ok(violations)
233 }
234
235 fn as_per_file(&self) -> Option<&dyn PerFileRule> {
236 Some(self)
237 }
238}
239
240impl PerFileRule for CommentedOutCodeRule {
241 fn path_scope(&self) -> &Scope {
242 &self.scope
243 }
244
245 fn evaluate_file(
246 &self,
247 _ctx: &Context<'_>,
248 path: &Path,
249 bytes: &[u8],
250 ) -> Result<Vec<Violation>> {
251 let lang = self.language.resolve(path);
252 if lang == Language::Auto {
253 return Ok(Vec::new()); }
255 let Ok(text) = std::str::from_utf8(bytes) else {
256 return Ok(Vec::new());
257 };
258 let mut violations = Vec::new();
259 for block in find_comment_blocks(text, lang) {
260 if block.lines.len() < self.min_lines {
261 continue;
262 }
263 if block.start_line <= self.skip_leading_lines {
264 continue;
265 }
266 if block.is_doc_comment {
267 continue;
268 }
269 let density = score_density(&block.content);
270 if density >= self.threshold {
271 let msg = self.message.clone().unwrap_or_else(|| {
272 format!(
273 "block of {} commented-out lines (density {:.2}); remove or convert to runtime-checked branch",
274 block.lines.len(),
275 density,
276 )
277 });
278 violations.push(
279 Violation::new(msg)
280 .with_path(std::sync::Arc::<Path>::from(path))
281 .with_location(block.start_line, 1),
282 );
283 }
284 }
285 Ok(violations)
286 }
287}
288
289pub fn build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
290 let Some(_paths) = &spec.paths else {
291 return Err(Error::rule_config(
292 &spec.id,
293 "commented_out_code requires a `paths` field",
294 ));
295 };
296 let opts: Options = spec
297 .deserialize_options()
298 .map_err(|e| Error::rule_config(&spec.id, format!("invalid options: {e}")))?;
299 if opts.min_lines < 2 {
300 return Err(Error::rule_config(
301 &spec.id,
302 "commented_out_code `min_lines` must be ≥ 2",
303 ));
304 }
305 if !(0.0..=1.0).contains(&opts.threshold) {
306 return Err(Error::rule_config(
307 &spec.id,
308 "commented_out_code `threshold` must be between 0.0 and 1.0",
309 ));
310 }
311 Ok(Box::new(CommentedOutCodeRule {
312 id: spec.id.clone(),
313 level: spec.level,
314 policy_url: spec.policy_url.clone(),
315 message: spec.message.clone(),
316 scope: Scope::from_spec(spec)?,
317 language: opts.language,
318 min_lines: opts.min_lines,
319 threshold: opts.threshold,
320 skip_leading_lines: opts.skip_leading_lines,
321 }))
322}
323
324#[derive(Debug)]
327struct CommentBlock {
328 start_line: usize,
329 lines: Vec<String>,
330 content: String,
333 is_doc_comment: bool,
336}
337
338fn find_comment_blocks(text: &str, lang: Language) -> Vec<CommentBlock> {
339 let mut blocks = Vec::new();
340 let line_markers = lang.line_markers();
341 let doc_line_markers = lang.doc_line_markers();
342 let block_delim = lang.block_delim();
343 let doc_block_delim = lang.doc_block_delim();
344
345 let lines: Vec<&str> = text.lines().collect();
346 let mut i = 0;
347 while i < lines.len() {
348 let line = lines[i];
349 let trimmed = line.trim_start();
350
351 if let Some((open, close)) = block_delim {
353 if trimmed.starts_with(open) {
354 let is_doc = doc_block_delim.is_some_and(|(d_open, _)| trimmed.starts_with(d_open));
355 let start_line = i + 1;
356 let mut block_lines = Vec::new();
357 let mut block_content = String::new();
358 let mut closed = false;
359 let mut j = i;
360 while j < lines.len() {
361 let l = lines[j];
362 block_lines.push(l.to_string());
363 let stripped = strip_block_comment_markers(l, open, close);
364 block_content.push_str(&stripped);
365 block_content.push('\n');
366 if l.contains(close) && (j > i || trimmed.matches(close).count() > 0) {
367 closed = true;
368 j += 1;
369 break;
370 }
371 j += 1;
372 }
373 if closed {
374 blocks.push(CommentBlock {
375 start_line,
376 lines: block_lines,
377 content: block_content,
378 is_doc_comment: is_doc,
379 });
380 }
381 i = j;
382 continue;
383 }
384 }
385
386 if line_markers.iter().any(|m| trimmed.starts_with(*m)) {
388 let start_line = i + 1;
389 let mut block_lines = Vec::new();
390 let mut block_content = String::new();
391 let mut all_doc = !doc_line_markers.is_empty();
392 let mut j = i;
393 while j < lines.len() {
394 let l = lines[j];
395 let lt = l.trim_start();
396 let Some(m) = line_markers.iter().find(|mk| lt.starts_with(*mk)).copied() else {
397 break;
398 };
399 let is_doc_line = doc_line_markers.iter().any(|d| {
400 lt.starts_with(d)
401 && (lt.len() == d.len()
402 || !lt[d.len()..].starts_with(m.chars().next().unwrap_or(' ')))
403 });
404 if !is_doc_line {
405 all_doc = false;
406 }
407 block_lines.push(l.to_string());
408 block_content.push_str(strip_line_marker(lt, m));
409 block_content.push('\n');
410 j += 1;
411 }
412 blocks.push(CommentBlock {
413 start_line,
414 lines: block_lines,
415 content: block_content,
416 is_doc_comment: all_doc,
417 });
418 i = j;
419 continue;
420 }
421
422 i += 1;
423 }
424 blocks
425}
426
427fn strip_line_marker<'a>(line: &'a str, marker: &str) -> &'a str {
428 let after = line.strip_prefix(marker).unwrap_or(line);
429 after.strip_prefix(' ').unwrap_or(after)
430}
431
432fn strip_block_comment_markers(line: &str, open: &str, close: &str) -> String {
433 let mut s = line.trim().to_string();
434 if let Some(rest) = s.strip_prefix(open) {
435 s = rest.to_string();
436 }
437 if let Some(rest) = s.strip_suffix(close) {
438 s = rest.to_string();
439 }
440 let trimmed = s.trim_start();
442 if let Some(rest) = trimmed.strip_prefix("* ") {
443 return rest.to_string();
444 }
445 if trimmed == "*" {
446 return String::new();
447 }
448 s
449}
450
451const STRONG_CODE_CHARS: &[char] = &[
462 '(', ')', '{', '}', '[', ']', ';', '=', '<', '>', '&', '|', '^',
463];
464
465const SATURATION_POINT: f64 = 0.20;
471
472fn score_density(content: &str) -> f64 {
483 let collapsed = drop_long_runs(content);
484 let nonws_count = collapsed.chars().filter(|c| !c.is_whitespace()).count();
485 if nonws_count == 0 {
486 return 0.0;
487 }
488 let strong_count = collapsed
489 .chars()
490 .filter(|c| STRONG_CODE_CHARS.contains(c))
491 .count();
492 #[allow(clippy::cast_precision_loss)]
493 let raw = strong_count as f64 / nonws_count as f64;
494 (raw / SATURATION_POINT).min(1.0)
495}
496
497fn drop_long_runs(s: &str) -> String {
502 let mut out = String::with_capacity(s.len());
503 let mut buf: Vec<char> = Vec::new();
504 let mut prev: Option<char> = None;
505 for ch in s.chars() {
506 if Some(ch) == prev {
507 buf.push(ch);
508 } else {
509 if buf.len() < 5 {
510 out.extend(buf.iter());
511 }
512 buf.clear();
513 buf.push(ch);
514 prev = Some(ch);
515 }
516 }
517 if buf.len() < 5 {
518 out.extend(buf.iter());
519 }
520 out
521}
522
523#[cfg(test)]
524mod tests {
525 use super::*;
526
527 #[test]
528 fn density_high_for_code_low_for_prose() {
529 let code = "let x = compute(y, z); if x > 0 { return x; }";
531 let d_code = score_density(code);
532 assert!(d_code > 0.5, "code density {d_code} should be > 0.5");
533
534 let prose = "This module parses RFC 9535 JSONPath expressions and resolves them.";
536 let d_prose = score_density(prose);
537 assert!(d_prose < 0.5, "prose density {d_prose} should be < 0.5");
538 }
539
540 #[test]
541 fn line_block_in_rust_detected_with_markers_stripped() {
542 let src = "fn main() {}\n// let x = compute(y);\n// if x > 0 { return x; }\n// log(\"unused\");\nfn other() {}";
543 let blocks = find_comment_blocks(src, Language::Rust);
544 assert_eq!(blocks.len(), 1);
545 let b = &blocks[0];
546 assert_eq!(b.lines.len(), 3);
547 assert_eq!(b.start_line, 2);
548 assert!(b.content.contains("let x = compute(y);"));
549 assert!(!b.is_doc_comment);
550 }
551
552 #[test]
553 fn rust_doc_line_comments_marked_as_doc() {
554 let src = "/// Documents the next item.\n/// More docs.\n/// Even more.\nfn foo() {}";
555 let blocks = find_comment_blocks(src, Language::Rust);
556 assert_eq!(blocks.len(), 1);
557 assert!(blocks[0].is_doc_comment, "/// block must be marked as doc");
558 }
559
560 #[test]
561 fn block_comment_javadoc_marked_as_doc() {
562 let src = "/**\n * Documented.\n * @param x foo\n */\nfunction bar() {}";
563 let blocks = find_comment_blocks(src, Language::Typescript);
564 assert!(!blocks.is_empty());
565 assert!(blocks[0].is_doc_comment, "/** … */ must be marked as doc");
566 }
567
568 #[test]
569 fn python_hash_block_detected() {
570 let src = "x = 1\n# old = compute(x)\n# if old > 0:\n# print(old)\nprint(x)";
571 let blocks = find_comment_blocks(src, Language::Python);
572 assert_eq!(blocks.len(), 1);
573 assert!(blocks[0].content.contains("old = compute(x)"));
574 }
575
576 #[test]
577 fn end_to_end_threshold_filters_prose() {
578 let prose_src = "fn foo() {}\n// This is a normal explanatory comment\n// describing what foo does.\n// Multiple lines of prose.";
580 let blocks = find_comment_blocks(prose_src, Language::Rust);
581 assert_eq!(blocks.len(), 1);
582 let d = score_density(&blocks[0].content);
583 assert!(d < 0.5, "prose comment density {d} should be < 0.5");
584
585 let code_src = "fn foo() {}\n// let x = compute(y);\n// if x > 0 { return x; }\n// log_metric(\"path-a\", x);";
587 let blocks = find_comment_blocks(code_src, Language::Rust);
588 assert_eq!(blocks.len(), 1);
589 let d = score_density(&blocks[0].content);
590 assert!(d >= 0.5, "code comment density {d} should be >= 0.5");
591 }
592
593 #[test]
594 fn banner_separators_dont_score_as_code() {
595 let banner = "// ============================================\n\
597 // Section Title\n\
598 // ============================================";
599 let blocks = find_comment_blocks(banner, Language::Rust);
600 assert_eq!(blocks.len(), 1);
601 let d = score_density(&blocks[0].content);
602 assert!(d < 0.5, "banner density {d} should be < 0.5");
603 }
604
605 #[test]
606 fn drop_long_runs_strips_banners() {
607 assert_eq!(drop_long_runs("foo ============= bar"), "foo bar");
608 assert_eq!(drop_long_runs("a==b"), "a==b"); assert_eq!(drop_long_runs("a===b"), "a===b"); assert_eq!(drop_long_runs("a====b"), "a====b"); assert_eq!(drop_long_runs("a=====b"), "ab"); }
613
614 #[test]
615 fn language_extension_resolution() {
616 let path = Path::new("foo.rs");
617 assert_eq!(Language::Auto.resolve(path), Language::Rust);
618 let path = Path::new("foo.py");
619 assert_eq!(Language::Auto.resolve(path), Language::Python);
620 let path = Path::new("foo.tsx");
621 assert_eq!(Language::Auto.resolve(path), Language::Typescript);
622 let path = Path::new("unknown");
623 assert_eq!(Language::Auto.resolve(path), Language::Auto);
624 }
625}