//! Self-Admitted Technical Debt (SATD) detector.
//!
//! Scans code comments for SATD patterns (TODO, FIXME, HACK, etc.)
//! and enriches findings with graph context.
//!
//! SATD Patterns and Severity:
//! - HIGH: HACK, KLUDGE, BUG (known bugs or workarounds)
//! - MEDIUM: FIXME, XXX, REFACTOR (issues needing attention)
//! - LOW: TODO, TEMP (reminders for future work)
use std::collections::HashMap;
use std::fs;
use std::path::{Path, PathBuf};
use regex::Regex;
use crate::detectors::base::{Detector, DetectorConfig, DetectorResult};
use crate::graph::GraphClient;
use crate::models::{Finding, Severity};
/// SATD detector
///
/// Scans code comments for TODO, FIXME, HACK, XXX, KLUDGE, REFACTOR, TEMP,
/// and BUG patterns.
pub struct SATDDetector {
config: DetectorConfig,
/// Repository path to scan
repository_path: Option<PathBuf>,
/// Maximum findings to report
max_findings: usize,
/// Patterns to exclude
exclude_patterns: Vec<String>,
/// File extensions to scan
file_extensions: Vec<String>,
}
impl SATDDetector {
/// Create a new SATD detector
pub fn new() -> Self {
Self {
config: DetectorConfig::default(),
repository_path: None,
max_findings: 500,
exclude_patterns: vec![
"tests/".to_string(),
"test_*.py".to_string(),
"*_test.py".to_string(),
"migrations/".to_string(),
"__pycache__/".to_string(),
".git/".to_string(),
"node_modules/".to_string(),
"venv/".to_string(),
".venv/".to_string(),
],
file_extensions: vec![
".py".to_string(),
".js".to_string(),
".ts".to_string(),
".jsx".to_string(),
".tsx".to_string(),
".java".to_string(),
".go".to_string(),
".rs".to_string(),
".c".to_string(),
".cpp".to_string(),
".h".to_string(),
],
}
}
/// Set repository path
pub fn with_repository_path<P: AsRef<Path>>(mut self, path: P) -> Self {
self.repository_path = Some(path.as_ref().to_path_buf());
self
}
/// Set max findings
pub fn with_max_findings(mut self, max: usize) -> Self {
self.max_findings = max;
self
}
/// SATD severity mapping
fn severity_map() -> HashMap<&'static str, Severity> {
let mut map = HashMap::new();
// High severity: known bugs or workarounds
map.insert("HACK", Severity::High);
map.insert("KLUDGE", Severity::High);
map.insert("BUG", Severity::High);
// Medium severity: issues needing attention
map.insert("FIXME", Severity::Medium);
map.insert("XXX", Severity::Medium);
map.insert("REFACTOR", Severity::Medium);
// Low severity: reminders for future work
map.insert("TODO", Severity::Low);
map.insert("TEMP", Severity::Low);
map
}
/// Get confidence score for SATD type
fn confidence_score(satd_type: &str) -> f64 {
match satd_type {
"BUG" | "HACK" | "KLUDGE" => 0.95,
"FIXME" | "REFACTOR" => 0.90,
"XXX" | "TEMP" => 0.85,
"TODO" => 0.80,
_ => 0.80,
}
}
/// Check if path should be excluded
fn should_exclude(&self, path: &str) -> bool {
for pattern in &self.exclude_patterns {
if pattern.ends_with('/') {
let dir = pattern.trim_end_matches('/');
if path.split('/').any(|p| p == dir) {
return true;
}
} else if pattern.contains('*') {
// Simple glob matching
let regex_pattern = pattern.replace("*", ".*");
if let Ok(re) = Regex::new(®ex_pattern) {
let filename = Path::new(path)
.file_name()
.and_then(|f| f.to_str())
.unwrap_or("");
if re.is_match(path) || re.is_match(filename) {
return true;
}
}
} else if path.contains(pattern) {
return true;
}
}
false
}
/// Scan files for SATD patterns
fn scan_files(&self) -> Vec<SATDMatch> {
let repo_path = match &self.repository_path {
Some(p) => p.clone(),
None => return Vec::new(),
};
if !repo_path.exists() {
tracing::warn!("Repository path does not exist: {:?}", repo_path);
return Vec::new();
}
let pattern = Regex::new(
r"(?i)(?:#|//|/\*|\*|\"\"\"|\'\'\')?\s*\b(TODO|FIXME|HACK|XXX|KLUDGE|REFACTOR|TEMP|BUG)\b[\s:(\[]*(.{0,200})"
).expect("Invalid regex");
let mut matches = Vec::new();
for ext in &self.file_extensions {
self.scan_directory(&repo_path, &repo_path, ext, &pattern, &mut matches);
}
matches
}
fn scan_directory(
&self,
root: &Path,
dir: &Path,
ext: &str,
pattern: &Regex,
matches: &mut Vec<SATDMatch>,
) {
let entries = match fs::read_dir(dir) {
Ok(e) => e,
Err(_) => return,
};
for entry in entries.flatten() {
let path = entry.path();
let rel_path = path
.strip_prefix(root)
.unwrap_or(&path)
.to_string_lossy()
.to_string();
if self.should_exclude(&rel_path) {
continue;
}
if path.is_dir() {
self.scan_directory(root, &path, ext, pattern, matches);
} else if path.extension().and_then(|e| e.to_str()) == Some(ext.trim_start_matches('.'))
{
if matches.len() >= self.max_findings {
return;
}
self.scan_file(&path, &rel_path, pattern, matches);
}
}
}
fn scan_file(&self, path: &Path, rel_path: &str, pattern: &Regex, matches: &mut Vec<SATDMatch>) {
let content = match fs::read_to_string(path) {
Ok(c) => c,
Err(_) => return,
};
// Skip very large files
if content.len() > 1_000_000 {
return;
}
for (line_idx, line) in content.lines().enumerate() {
// Skip very long lines
if line.len() > 2000 {
continue;
}
for cap in pattern.captures_iter(line) {
let satd_type = cap.get(1).map(|m| m.as_str().to_uppercase()).unwrap_or_default();
let comment_text = cap
.get(2)
.map(|m| m.as_str().trim().trim_end_matches("*/").trim().to_string())
.unwrap_or_default();
let severity = Self::severity_map()
.get(satd_type.as_str())
.cloned()
.unwrap_or(Severity::Low);
matches.push(SATDMatch {
file_path: rel_path.to_string(),
line_number: (line_idx + 1) as i64,
satd_type,
comment_text,
severity,
});
if matches.len() >= self.max_findings {
return;
}
}
}
}
fn create_finding(&self, satd_match: &SATDMatch, graph: &GraphClient) -> Finding {
// Get graph context
let graph_context = self.get_graph_context(graph, &satd_match.file_path, satd_match.line_number);
// Build title
let title = if satd_match.comment_text.is_empty() {
format!("SATD: {}", satd_match.satd_type)
} else {
let short_comment = if satd_match.comment_text.len() > 80 {
format!("{}...", &satd_match.comment_text[..80])
} else {
satd_match.comment_text.clone()
};
format!("SATD: {} - {}", satd_match.satd_type, short_comment)
};
// Build description
let mut description = format!(
"**Self-Admitted Technical Debt ({})**\n\n",
satd_match.satd_type
);
if !satd_match.comment_text.is_empty() {
description.push_str(&format!("**Comment**: {}\n\n", satd_match.comment_text));
}
description.push_str(&format!(
"**Location**: {}:{}\n",
satd_match.file_path, satd_match.line_number
));
if let Some(entity) = &graph_context.containing_entity {
description.push_str(&format!(
"**Containing {}**: `{}`\n",
graph_context.entity_type.as_deref().unwrap_or("entity"),
entity
));
}
description.push_str("\n**Severity Rationale**:\n");
match satd_match.satd_type.as_str() {
"HACK" | "KLUDGE" | "BUG" => {
description.push_str(
"- HIGH: Indicates a known bug, workaround, or hack that needs immediate attention\n",
);
}
"FIXME" | "XXX" | "REFACTOR" => {
description.push_str("- MEDIUM: Indicates an issue that should be addressed soon\n");
}
_ => {
description.push_str("- LOW: Reminder for future work\n");
}
}
let suggestion = self.suggest_fix(&satd_match.satd_type, &satd_match.comment_text);
let effort = self.estimate_effort(&satd_match.satd_type);
Finding {
id: format!(
"satd_{}_{}_{}",
satd_match.file_path.replace('/', "_"),
satd_match.line_number,
satd_match.satd_type
),
detector: "SATDDetector".to_string(),
severity: satd_match.severity.clone(),
title,
description,
affected_nodes: graph_context.nodes,
affected_files: vec![satd_match.file_path.clone()],
line_start: Some(satd_match.line_number),
line_end: Some(satd_match.line_number),
suggested_fix: Some(suggestion),
estimated_effort: Some(effort),
confidence: Self::confidence_score(&satd_match.satd_type),
tags: vec![
"satd".to_string(),
satd_match.satd_type.to_lowercase(),
"technical_debt".to_string(),
],
metadata: serde_json::json!({
"satd_type": satd_match.satd_type,
"comment_text": satd_match.comment_text,
"containing_entity": graph_context.containing_entity,
"entity_type": graph_context.entity_type,
}),
}
}
fn get_graph_context(&self, graph: &GraphClient, file_path: &str, line: i64) -> GraphContext {
let query = r#"
MATCH (file:File {filePath: $file_path})
OPTIONAL MATCH (file)-[:CONTAINS]->(entity)
WHERE entity.lineStart <= $line AND entity.lineEnd >= $line
WITH file, entity
ORDER BY entity.lineStart DESC
LIMIT 1
RETURN
file.loc as file_loc,
entity.qualifiedName as containing_entity,
labels(entity)[0] as entity_type,
entity.complexity as complexity
"#;
match graph.execute_with_params(
query,
vec![
("file_path", file_path.into()),
("line", line.into()),
],
) {
Ok(results) if !results.is_empty() => {
let row = &results[0];
let containing_entity = row.get_string("containing_entity");
GraphContext {
file_loc: row.get_i64("file_loc").unwrap_or(0),
containing_entity: containing_entity.clone(),
entity_type: row.get_string("entity_type"),
complexity: row.get_i64("complexity").unwrap_or(0),
nodes: containing_entity.map(|e| vec![e]).unwrap_or_default(),
}
}
_ => GraphContext::default(),
}
}
fn suggest_fix(&self, satd_type: &str, comment_text: &str) -> String {
let base = match satd_type {
"TODO" => "Review and either implement the TODO or create a tracking issue",
"FIXME" => "Investigate and fix the issue described in the comment",
"HACK" => "Replace the hacky workaround with a proper solution",
"XXX" => "Review and address the concern mentioned in the comment",
"KLUDGE" => "Refactor this code to remove the kludge/workaround",
"REFACTOR" => "Schedule time to refactor as described",
"TEMP" => "Remove the temporary code before release",
"BUG" => "Fix the known bug and add a regression test",
_ => "Review and address this technical debt",
};
if comment_text.is_empty() {
base.to_string()
} else {
let short = if comment_text.len() > 100 {
&comment_text[..100]
} else {
comment_text
};
format!("{}. Comment indicates: '{}'", base, short)
}
}
fn estimate_effort(&self, satd_type: &str) -> String {
match satd_type {
"HACK" | "KLUDGE" | "BUG" => "Medium (1-4 hours)".to_string(),
"REFACTOR" => "Large (4+ hours)".to_string(),
_ => "Small (30-60 minutes)".to_string(),
}
}
}
impl Default for SATDDetector {
fn default() -> Self {
Self::new()
}
}
impl Detector for SATDDetector {
fn name(&self) -> &'static str {
"SATDDetector"
}
fn description(&self) -> &'static str {
"Detects Self-Admitted Technical Debt comments (TODO, FIXME, HACK, etc.)"
}
fn detect(&self, graph: &GraphClient) -> DetectorResult {
if self.repository_path.is_none() {
tracing::warn!("SATDDetector: repository_path not set, skipping");
return Ok(Vec::new());
}
let matches = self.scan_files();
let findings: Vec<Finding> = matches
.iter()
.take(self.max_findings)
.map(|m| self.create_finding(m, graph))
.collect();
Ok(findings)
}
fn is_dependent(&self) -> bool {
false
}
}
/// A single SATD match
struct SATDMatch {
file_path: String,
line_number: i64,
satd_type: String,
comment_text: String,
severity: Severity,
}
/// Graph context for a SATD finding
#[derive(Default)]
struct GraphContext {
file_loc: i64,
containing_entity: Option<String>,
entity_type: Option<String>,
complexity: i64,
nodes: Vec<String>,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_severity_map() {
let map = SATDDetector::severity_map();
assert_eq!(map.get("HACK"), Some(&Severity::High));
assert_eq!(map.get("FIXME"), Some(&Severity::Medium));
assert_eq!(map.get("TODO"), Some(&Severity::Low));
}
#[test]
fn test_should_exclude() {
let detector = SATDDetector::new();
assert!(detector.should_exclude("tests/test_foo.py"));
assert!(detector.should_exclude("node_modules/package/index.js"));
assert!(!detector.should_exclude("src/main.py"));
}
#[test]
fn test_confidence_score() {
assert_eq!(SATDDetector::confidence_score("BUG"), 0.95);
assert_eq!(SATDDetector::confidence_score("TODO"), 0.80);
}
}