use crate::detectors::base::{Detector, DetectorConfig};
use crate::graph::GraphQueryExt;
use crate::models::{Finding, Severity};
use anyhow::Result;
use regex::Regex;
use std::collections::HashSet;
use std::path::PathBuf;
use std::sync::LazyLock;
use tracing::info;
static FUNC_REF: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"\b([a-zA-Z_][a-zA-Z0-9_]*)\s*\(").expect("valid regex"));
pub struct CommentedCodeDetector {
#[allow(dead_code)] repository_path: PathBuf,
max_findings: usize,
min_lines: usize,
}
impl CommentedCodeDetector {
pub fn new(repository_path: impl Into<PathBuf>) -> Self {
Self {
repository_path: repository_path.into(),
max_findings: 50,
min_lines: 5,
}
}
fn has_strong_code_indicator(line: &str) -> bool {
let strong_indicators = [
"def ",
"fn ",
"function ",
"class ",
"import ",
"from ",
"return ",
"const ",
"let ",
"var ",
"==",
"!=",
"&&",
"||",
"->",
"=>",
"+=",
"-=",
];
strong_indicators.iter().any(|p| line.contains(p))
}
fn has_weak_code_indicator(line: &str) -> bool {
let weak_indicators = ["=", "()", "{}", "[]", ";", "if ", "else", "for ", "while "];
weak_indicators.iter().any(|p| line.contains(p))
}
fn looks_like_code(line: &str) -> bool {
Self::has_strong_code_indicator(line) || Self::has_weak_code_indicator(line)
}
fn is_annotation_comment(line: &str) -> bool {
let upper = line.to_uppercase();
upper.contains("TODO")
|| upper.contains("FIXME")
|| upper.contains("XXX")
|| upper.contains("HACK")
|| upper.contains("NOTE:")
|| upper.contains("BUG:")
|| upper.contains("DEPRECATED")
}
fn is_license_comment(line: &str) -> bool {
let upper = line.to_uppercase();
upper.contains("COPYRIGHT")
|| upper.contains("LICENSE")
|| upper.contains("PERMISSION IS HEREBY GRANTED")
|| upper.contains("ALL RIGHTS RESERVED")
|| upper.contains("SPDX-LICENSE")
|| upper.contains("WARRANTY")
|| upper.contains("REDISTRIBUTION")
}
fn extract_func_refs(lines: &[&str], start: usize, end: usize) -> HashSet<String> {
let mut refs = HashSet::new();
for line in lines.get(start..end).unwrap_or(&[]) {
for cap in FUNC_REF.captures_iter(line) {
if let Some(m) = cap.get(1) {
let name = m.as_str();
if ![
"if", "for", "while", "function", "def", "class", "return", "import",
"from",
]
.contains(&name)
{
refs.insert(name.to_string());
}
}
}
}
refs
}
fn check_func_existence(
all_func_names: &HashSet<String>,
refs: &HashSet<String>,
) -> (usize, usize) {
let existing = refs.iter().filter(|r| all_func_names.contains(*r)).count();
let missing = refs.len() - existing;
(existing, missing)
}
}
impl Detector for CommentedCodeDetector {
fn name(&self) -> &'static str {
"commented-code"
}
fn description(&self) -> &'static str {
"Detects large blocks of commented code"
}
fn requires_graph(&self) -> bool {
true
}
fn file_extensions(&self) -> &'static [&'static str] {
&[
"py", "js", "ts", "jsx", "tsx", "rb", "java", "go", "rs", "c", "cpp", "cs",
]
}
fn detect(
&self,
ctx: &crate::detectors::analysis_context::AnalysisContext,
) -> Result<Vec<Finding>> {
let graph = ctx.graph;
let files = &ctx.as_file_provider();
let i = graph.interner();
let mut findings = vec![];
let all_func_names: HashSet<String> = graph
.get_functions_shared()
.iter()
.map(|f| f.node_name(i).to_string())
.collect();
for path in files.files_with_extensions(&[
"py", "js", "ts", "jsx", "tsx", "java", "go", "rs", "rb", "php", "c", "cpp", "cs",
]) {
if findings.len() >= self.max_findings {
break;
}
let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
if let Some(content) = files.content(path) {
let lines: Vec<&str> = content.lines().collect();
let mut i = 0;
while i < lines.len() {
let line = lines[i].trim();
if line.starts_with("//!") || line.starts_with("///") {
i += 1;
continue;
}
let is_comment = line.starts_with("//")
|| (line.starts_with("#") && ext != "rs")
|| line.starts_with("*");
if is_comment
&& (Self::is_annotation_comment(line) || Self::is_license_comment(line))
{
i += 1;
continue;
}
if is_comment && Self::looks_like_code(line) {
let start = i;
let mut code_lines = 1;
let mut j = i + 1;
let mut has_annotation = false;
let mut has_strong = Self::has_strong_code_indicator(line);
while j < lines.len() {
let next = lines[j].trim();
let next_is_comment = next.starts_with("//")
|| next.starts_with("#")
|| next.starts_with("*");
if Self::is_annotation_comment(next) {
has_annotation = true;
}
if next_is_comment && Self::looks_like_code(next) {
if Self::has_strong_code_indicator(next) {
has_strong = true;
}
code_lines += 1;
j += 1;
} else if next.is_empty()
|| (next_is_comment && !Self::looks_like_code(next))
{
j += 1;
} else {
break;
}
}
if code_lines >= self.min_lines && has_strong {
let func_refs = Self::extract_func_refs(&lines, start, j);
let (existing, missing) =
Self::check_func_existence(&all_func_names, &func_refs);
let mut notes = Vec::new();
if !func_refs.is_empty() {
if missing > 0 && existing == 0 {
notes.push(format!("⚠️ References {} functions that no longer exist - likely stale", missing));
} else if missing > existing {
notes.push(format!("📊 {} of {} referenced functions missing - probably outdated", missing, func_refs.len()));
}
}
if has_annotation {
notes.push(
"📝 Contains TODO/FIXME - may be intentionally preserved"
.to_string(),
);
}
let context_notes = if notes.is_empty() {
String::new()
} else {
format!("\n\n**Analysis:**\n{}", notes.join("\n"))
};
let severity = if (missing > 0 && existing == 0) || code_lines > 20 {
Severity::Medium } else {
Severity::Low
};
let suggestion = if missing > existing {
"This commented code references functions that no longer exist.\n\
It's likely outdated - delete it (version control has history)."
.to_string()
} else if has_annotation {
"This block contains TODO/FIXME markers. Either:\n\
1. Complete the TODO and uncomment the code\n\
2. Delete if no longer relevant"
.to_string()
} else {
"Delete commented code (version control has history).".to_string()
};
findings.push(Finding {
id: String::new(),
detector: "CommentedCodeDetector".to_string(),
severity,
title: format!("{} lines of commented code", code_lines),
description: format!(
"Large block of commented code should be removed.{}",
context_notes
),
affected_files: vec![path.to_path_buf()],
line_start: Some((start + 1) as u32),
line_end: Some(j as u32),
suggested_fix: Some(suggestion),
estimated_effort: Some("5 minutes".to_string()),
category: Some("maintainability".to_string()),
cwe_id: None,
why_it_matters: Some(
"Commented code clutters the codebase and can confuse developers. \
If the code was important, it's in version control history.".to_string()
),
..Default::default()
});
}
i = j;
} else {
i += 1;
}
}
}
}
info!(
"CommentedCodeDetector found {} findings (graph-aware)",
findings.len()
);
Ok(findings)
}
}
impl crate::detectors::RegisteredDetector for CommentedCodeDetector {
fn create(init: &crate::detectors::DetectorInit) -> std::sync::Arc<dyn Detector> {
std::sync::Arc::new(Self::new(init.repo_path))
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::graph::builder::GraphBuilder;
#[test]
fn test_detects_commented_code_block() {
let store = GraphBuilder::new().freeze();
let detector = CommentedCodeDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(&store, vec![
("example.py", "def active():\n pass\n\n# if condition:\n# x = 1\n# y = x + 2\n# result = process(x, y)\n# return result\n# foo = bar()\n\ndef another():\n pass\n"),
]);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
!findings.is_empty(),
"Should detect commented code block. Found: {:?}",
findings.iter().map(|f| &f.title).collect::<Vec<_>>()
);
}
#[test]
fn test_no_finding_for_normal_comments() {
let store = GraphBuilder::new().freeze();
let detector = CommentedCodeDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(&store, vec![
("clean.py", "# This module handles user authentication.\n# It provides login and logout functionality.\n# See the docs for more information.\n# Created by the team in 2024.\n# Licensed under MIT.\n\ndef login(user, password):\n return authenticate(user, password)\n"),
]);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
findings.is_empty(),
"Should not flag normal comments. Found: {:?}",
findings.iter().map(|f| &f.title).collect::<Vec<_>>()
);
}
#[test]
fn test_no_finding_for_license_header() {
let store = GraphBuilder::new().freeze();
let detector = CommentedCodeDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(&store, vec![
("licensed.py", "# Copyright (c) 2024 Django Software Foundation\n# All rights reserved.\n# Permission is hereby granted, free of charge,\n# to any person obtaining a copy of this software\n# and associated documentation files (the \"Software\"),\n# to deal in the Software without restriction.\n\ndef main():\n pass\n"),
]);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
findings.is_empty(),
"Should not flag license headers as commented code. Found: {:?}",
findings.iter().map(|f| &f.title).collect::<Vec<_>>()
);
}
#[test]
fn test_no_finding_for_technical_comments_with_equals() {
let store = GraphBuilder::new().freeze();
let detector = CommentedCodeDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(&store, vec![
("doc.py", "# The default timeout = 30 seconds for all connections.\n# Each worker handles requests independently.\n# When count = 0, the queue is considered empty.\n# The maximum retry count = 3 before giving up.\n# Buffer size = 4096 bytes is optimal for most cases.\n# Connection pool size = 10 is the recommended default.\n\ndef process():\n pass\n"),
]);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
findings.is_empty(),
"Should not flag technical docs (contain '=' but aren't code). Found: {:?}",
findings.iter().map(|f| &f.title).collect::<Vec<_>>()
);
}
#[test]
fn test_still_detects_real_commented_code() {
let store = GraphBuilder::new().freeze();
let detector = CommentedCodeDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(&store, vec![
("dead.py", "def active():\n pass\n\n# def old_function():\n# x = compute()\n# if x > 0:\n# return process(x)\n# else:\n# return fallback()\n\ndef another():\n pass\n"),
]);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
!findings.is_empty(),
"Should still detect real commented-out code"
);
}
}