1use std::path::Path;
54
55use alint_core::{
56 Context, Error, Level, PerFileRule, Result, Rule, RuleSpec, Scope, Violation, eval_per_file,
57};
58use serde::Deserialize;
59
60#[derive(Debug, Deserialize)]
61#[serde(deny_unknown_fields)]
62struct Options {
63 #[serde(default)]
67 language: Language,
68 #[serde(default = "default_min_lines")]
72 min_lines: usize,
73 #[serde(default = "default_threshold")]
76 threshold: f64,
77 #[serde(default = "default_skip_leading_lines")]
81 skip_leading_lines: usize,
82}
83
84fn default_min_lines() -> usize {
85 3
86}
87fn default_threshold() -> f64 {
88 0.5
89}
90fn default_skip_leading_lines() -> usize {
91 30
92}
93
94#[derive(Debug, Deserialize, Default, Clone, Copy, PartialEq, Eq)]
95#[serde(rename_all = "snake_case")]
96enum Language {
97 #[default]
98 Auto,
99 Rust,
100 Typescript,
101 Javascript,
102 Python,
103 Go,
104 Java,
105 C,
106 Cpp,
107 Ruby,
108 Shell,
109}
110
111impl Language {
112 fn resolve(self, path: &Path) -> Self {
115 if self != Self::Auto {
116 return self;
117 }
118 let ext = path
119 .extension()
120 .and_then(|s| s.to_str())
121 .unwrap_or("")
122 .to_ascii_lowercase();
123 match ext.as_str() {
124 "rs" => Self::Rust,
125 "ts" | "tsx" => Self::Typescript,
126 "js" | "jsx" | "mjs" | "cjs" => Self::Javascript,
127 "py" => Self::Python,
128 "go" => Self::Go,
129 "java" | "kt" | "kts" | "scala" => Self::Java,
130 "c" | "h" => Self::C,
131 "cc" | "cpp" | "cxx" | "hpp" | "hh" => Self::Cpp,
132 "rb" => Self::Ruby,
133 "sh" | "bash" | "zsh" | "fish" => Self::Shell,
134 _ => Self::Auto, }
136 }
137
138 fn line_markers(self) -> &'static [&'static str] {
141 match self {
142 Self::Rust
145 | Self::Typescript
146 | Self::Javascript
147 | Self::Go
148 | Self::Java
149 | Self::C
150 | Self::Cpp => &["//"],
151 Self::Python | Self::Shell | Self::Ruby => &["#"],
152 Self::Auto => &[],
153 }
154 }
155
156 fn doc_line_markers(self) -> &'static [&'static str] {
160 match self {
163 Self::Rust => &["///", "//!"],
164 _ => &[],
165 }
166 }
167
168 fn block_delim(self) -> Option<(&'static str, &'static str)> {
170 match self {
171 Self::Rust
172 | Self::Typescript
173 | Self::Javascript
174 | Self::Go
175 | Self::Java
176 | Self::C
177 | Self::Cpp => Some(("/*", "*/")),
178 _ => None,
179 }
180 }
181
182 fn doc_block_delim(self) -> Option<(&'static str, &'static str)> {
185 match self {
186 Self::Rust | Self::Typescript | Self::Javascript | Self::Java | Self::Cpp => {
188 Some(("/**", "*/"))
189 }
190 _ => None,
191 }
192 }
193}
194
195#[derive(Debug)]
196pub struct CommentedOutCodeRule {
197 id: String,
198 level: Level,
199 policy_url: Option<String>,
200 message: Option<String>,
201 scope: Scope,
202 language: Language,
203 min_lines: usize,
204 threshold: f64,
205 skip_leading_lines: usize,
206}
207
208impl Rule for CommentedOutCodeRule {
209 alint_core::rule_common_impl!();
210 fn path_scope(&self) -> Option<&Scope> {
211 Some(&self.scope)
212 }
213
214 fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
215 eval_per_file(self, ctx)
216 }
217
218 fn as_per_file(&self) -> Option<&dyn PerFileRule> {
219 Some(self)
220 }
221}
222
223impl PerFileRule for CommentedOutCodeRule {
224 fn path_scope(&self) -> &Scope {
225 &self.scope
226 }
227
228 fn evaluate_file(
229 &self,
230 _ctx: &Context<'_>,
231 path: &Path,
232 bytes: &[u8],
233 ) -> Result<Vec<Violation>> {
234 let lang = self.language.resolve(path);
235 if lang == Language::Auto {
236 return Ok(Vec::new()); }
238 let Ok(text) = std::str::from_utf8(bytes) else {
239 return Ok(Vec::new());
240 };
241 let mut violations = Vec::new();
242 for block in find_comment_blocks(text, lang) {
243 if block.lines.len() < self.min_lines {
244 continue;
245 }
246 if block.start_line <= self.skip_leading_lines {
247 continue;
248 }
249 if block.is_doc_comment {
250 continue;
251 }
252 let density = score_density(&block.content);
253 if density >= self.threshold {
254 let msg = self.message.clone().unwrap_or_else(|| {
255 format!(
256 "block of {} commented-out lines (density {:.2}); remove or convert to runtime-checked branch",
257 block.lines.len(),
258 density,
259 )
260 });
261 violations.push(
262 Violation::new(msg)
263 .with_path(std::sync::Arc::<Path>::from(path))
264 .with_location(block.start_line, 1),
265 );
266 }
267 }
268 Ok(violations)
269 }
270}
271
272pub fn build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
273 let Some(_paths) = &spec.paths else {
274 return Err(Error::rule_config(
275 &spec.id,
276 "commented_out_code requires a `paths` field",
277 ));
278 };
279 let opts: Options = spec
280 .deserialize_options()
281 .map_err(|e| Error::rule_config(&spec.id, format!("invalid options: {e}")))?;
282 if opts.min_lines < 2 {
283 return Err(Error::rule_config(
284 &spec.id,
285 "commented_out_code `min_lines` must be ≥ 2",
286 ));
287 }
288 if !(0.0..=1.0).contains(&opts.threshold) {
289 return Err(Error::rule_config(
290 &spec.id,
291 "commented_out_code `threshold` must be between 0.0 and 1.0",
292 ));
293 }
294 Ok(Box::new(CommentedOutCodeRule {
295 id: spec.id.clone(),
296 level: spec.level,
297 policy_url: spec.policy_url.clone(),
298 message: spec.message.clone(),
299 scope: Scope::from_spec(spec)?,
300 language: opts.language,
301 min_lines: opts.min_lines,
302 threshold: opts.threshold,
303 skip_leading_lines: opts.skip_leading_lines,
304 }))
305}
306
307#[derive(Debug)]
310struct CommentBlock {
311 start_line: usize,
312 lines: Vec<String>,
313 content: String,
316 is_doc_comment: bool,
319}
320
321fn find_comment_blocks(text: &str, lang: Language) -> Vec<CommentBlock> {
322 let mut blocks = Vec::new();
323 let line_markers = lang.line_markers();
324 let doc_line_markers = lang.doc_line_markers();
325 let block_delim = lang.block_delim();
326 let doc_block_delim = lang.doc_block_delim();
327
328 let lines: Vec<&str> = text.lines().collect();
329 let mut i = 0;
330 while i < lines.len() {
331 let line = lines[i];
332 let trimmed = line.trim_start();
333
334 if let Some((open, close)) = block_delim {
336 if trimmed.starts_with(open) {
337 let is_doc = doc_block_delim.is_some_and(|(d_open, _)| trimmed.starts_with(d_open));
338 let start_line = i + 1;
339 let mut block_lines = Vec::new();
340 let mut block_content = String::new();
341 let mut closed = false;
342 let mut j = i;
343 while j < lines.len() {
344 let l = lines[j];
345 block_lines.push(l.to_string());
346 let stripped = strip_block_comment_markers(l, open, close);
347 block_content.push_str(&stripped);
348 block_content.push('\n');
349 if l.contains(close) && (j > i || trimmed.matches(close).count() > 0) {
350 closed = true;
351 j += 1;
352 break;
353 }
354 j += 1;
355 }
356 if closed {
357 blocks.push(CommentBlock {
358 start_line,
359 lines: block_lines,
360 content: block_content,
361 is_doc_comment: is_doc,
362 });
363 }
364 i = j;
365 continue;
366 }
367 }
368
369 if line_markers.iter().any(|m| trimmed.starts_with(*m)) {
371 let start_line = i + 1;
372 let mut block_lines = Vec::new();
373 let mut block_content = String::new();
374 let mut all_doc = !doc_line_markers.is_empty();
375 let mut j = i;
376 while j < lines.len() {
377 let l = lines[j];
378 let lt = l.trim_start();
379 let Some(m) = line_markers.iter().find(|mk| lt.starts_with(*mk)).copied() else {
380 break;
381 };
382 let is_doc_line = doc_line_markers.iter().any(|d| {
383 lt.starts_with(d)
384 && (lt.len() == d.len()
385 || !lt[d.len()..].starts_with(m.chars().next().unwrap_or(' ')))
386 });
387 if !is_doc_line {
388 all_doc = false;
389 }
390 block_lines.push(l.to_string());
391 block_content.push_str(strip_line_marker(lt, m));
392 block_content.push('\n');
393 j += 1;
394 }
395 blocks.push(CommentBlock {
396 start_line,
397 lines: block_lines,
398 content: block_content,
399 is_doc_comment: all_doc,
400 });
401 i = j;
402 continue;
403 }
404
405 i += 1;
406 }
407 blocks
408}
409
410fn strip_line_marker<'a>(line: &'a str, marker: &str) -> &'a str {
411 let after = line.strip_prefix(marker).unwrap_or(line);
412 after.strip_prefix(' ').unwrap_or(after)
413}
414
415fn strip_block_comment_markers(line: &str, open: &str, close: &str) -> String {
416 let mut s = line.trim().to_string();
417 if let Some(rest) = s.strip_prefix(open) {
418 s = rest.to_string();
419 }
420 if let Some(rest) = s.strip_suffix(close) {
421 s = rest.to_string();
422 }
423 let trimmed = s.trim_start();
425 if let Some(rest) = trimmed.strip_prefix("* ") {
426 return rest.to_string();
427 }
428 if trimmed == "*" {
429 return String::new();
430 }
431 s
432}
433
434const STRONG_CODE_CHARS: &[char] = &[
445 '(', ')', '{', '}', '[', ']', ';', '=', '<', '>', '&', '|', '^',
446];
447
448const SATURATION_POINT: f64 = 0.20;
454
455fn score_density(content: &str) -> f64 {
466 let collapsed = drop_long_runs(content);
467 let nonws_count = collapsed.chars().filter(|c| !c.is_whitespace()).count();
468 if nonws_count == 0 {
469 return 0.0;
470 }
471 let strong_count = collapsed
472 .chars()
473 .filter(|c| STRONG_CODE_CHARS.contains(c))
474 .count();
475 #[allow(clippy::cast_precision_loss)]
476 let raw = strong_count as f64 / nonws_count as f64;
477 (raw / SATURATION_POINT).min(1.0)
478}
479
480fn drop_long_runs(s: &str) -> String {
485 let mut out = String::with_capacity(s.len());
486 let mut buf: Vec<char> = Vec::new();
487 let mut prev: Option<char> = None;
488 for ch in s.chars() {
489 if Some(ch) == prev {
490 buf.push(ch);
491 } else {
492 if buf.len() < 5 {
493 out.extend(buf.iter());
494 }
495 buf.clear();
496 buf.push(ch);
497 prev = Some(ch);
498 }
499 }
500 if buf.len() < 5 {
501 out.extend(buf.iter());
502 }
503 out
504}
505
506#[cfg(test)]
507mod tests {
508 use super::*;
509
510 #[test]
511 fn density_high_for_code_low_for_prose() {
512 let code = "let x = compute(y, z); if x > 0 { return x; }";
514 let d_code = score_density(code);
515 assert!(d_code > 0.5, "code density {d_code} should be > 0.5");
516
517 let prose = "This module parses RFC 9535 JSONPath expressions and resolves them.";
519 let d_prose = score_density(prose);
520 assert!(d_prose < 0.5, "prose density {d_prose} should be < 0.5");
521 }
522
523 #[test]
524 fn line_block_in_rust_detected_with_markers_stripped() {
525 let src = "fn main() {}\n// let x = compute(y);\n// if x > 0 { return x; }\n// log(\"unused\");\nfn other() {}";
526 let blocks = find_comment_blocks(src, Language::Rust);
527 assert_eq!(blocks.len(), 1);
528 let b = &blocks[0];
529 assert_eq!(b.lines.len(), 3);
530 assert_eq!(b.start_line, 2);
531 assert!(b.content.contains("let x = compute(y);"));
532 assert!(!b.is_doc_comment);
533 }
534
535 #[test]
536 fn rust_doc_line_comments_marked_as_doc() {
537 let src = "/// Documents the next item.\n/// More docs.\n/// Even more.\nfn foo() {}";
538 let blocks = find_comment_blocks(src, Language::Rust);
539 assert_eq!(blocks.len(), 1);
540 assert!(blocks[0].is_doc_comment, "/// block must be marked as doc");
541 }
542
543 #[test]
544 fn block_comment_javadoc_marked_as_doc() {
545 let src = "/**\n * Documented.\n * @param x foo\n */\nfunction bar() {}";
546 let blocks = find_comment_blocks(src, Language::Typescript);
547 assert!(!blocks.is_empty());
548 assert!(blocks[0].is_doc_comment, "/** … */ must be marked as doc");
549 }
550
551 #[test]
552 fn python_hash_block_detected() {
553 let src = "x = 1\n# old = compute(x)\n# if old > 0:\n# print(old)\nprint(x)";
554 let blocks = find_comment_blocks(src, Language::Python);
555 assert_eq!(blocks.len(), 1);
556 assert!(blocks[0].content.contains("old = compute(x)"));
557 }
558
559 #[test]
560 fn end_to_end_threshold_filters_prose() {
561 let prose_src = "fn foo() {}\n// This is a normal explanatory comment\n// describing what foo does.\n// Multiple lines of prose.";
563 let blocks = find_comment_blocks(prose_src, Language::Rust);
564 assert_eq!(blocks.len(), 1);
565 let d = score_density(&blocks[0].content);
566 assert!(d < 0.5, "prose comment density {d} should be < 0.5");
567
568 let code_src = "fn foo() {}\n// let x = compute(y);\n// if x > 0 { return x; }\n// log_metric(\"path-a\", x);";
570 let blocks = find_comment_blocks(code_src, Language::Rust);
571 assert_eq!(blocks.len(), 1);
572 let d = score_density(&blocks[0].content);
573 assert!(d >= 0.5, "code comment density {d} should be >= 0.5");
574 }
575
576 #[test]
577 fn banner_separators_dont_score_as_code() {
578 let banner = "// ============================================\n\
580 // Section Title\n\
581 // ============================================";
582 let blocks = find_comment_blocks(banner, Language::Rust);
583 assert_eq!(blocks.len(), 1);
584 let d = score_density(&blocks[0].content);
585 assert!(d < 0.5, "banner density {d} should be < 0.5");
586 }
587
588 #[test]
589 fn drop_long_runs_strips_banners() {
590 assert_eq!(drop_long_runs("foo ============= bar"), "foo bar");
591 assert_eq!(drop_long_runs("a==b"), "a==b"); assert_eq!(drop_long_runs("a===b"), "a===b"); assert_eq!(drop_long_runs("a====b"), "a====b"); assert_eq!(drop_long_runs("a=====b"), "ab"); }
596
597 #[test]
598 fn language_extension_resolution() {
599 let path = Path::new("foo.rs");
600 assert_eq!(Language::Auto.resolve(path), Language::Rust);
601 let path = Path::new("foo.py");
602 assert_eq!(Language::Auto.resolve(path), Language::Python);
603 let path = Path::new("foo.tsx");
604 assert_eq!(Language::Auto.resolve(path), Language::Typescript);
605 let path = Path::new("unknown");
606 assert_eq!(Language::Auto.resolve(path), Language::Auto);
607 }
608}