repotoire 0.3.47

//! Self-Admitted Technical Debt (SATD) detector.
//!
//! Scans code comments for SATD patterns (TODO, FIXME, HACK, etc.)
//! and enriches findings with graph context.
//!
//! SATD Patterns and Severity:
//! - HIGH: HACK, KLUDGE, BUG (known bugs or workarounds)
//! - MEDIUM: FIXME, XXX, REFACTOR (issues needing attention)
//! - LOW: TODO, TEMP (reminders for future work)

use std::collections::HashMap;
use std::fs;
use std::path::{Path, PathBuf};

use regex::Regex;

use crate::detectors::base::{Detector, DetectorConfig, DetectorResult};
use crate::graph::GraphClient;
use crate::models::{Finding, Severity};

/// SATD detector
///
/// Scans code comments for TODO, FIXME, HACK, XXX, KLUDGE, REFACTOR, TEMP,
/// and BUG patterns.
pub struct SATDDetector {
    config: DetectorConfig,
    /// Repository path to scan
    repository_path: Option<PathBuf>,
    /// Maximum findings to report
    max_findings: usize,
    /// Patterns to exclude
    exclude_patterns: Vec<String>,
    /// File extensions to scan
    file_extensions: Vec<String>,
}

impl SATDDetector {
    /// Create a new SATD detector
    pub fn new() -> Self {
        Self {
            config: DetectorConfig::default(),
            repository_path: None,
            max_findings: 500,
            exclude_patterns: vec![
                "tests/".to_string(),
                "test_*.py".to_string(),
                "*_test.py".to_string(),
                "migrations/".to_string(),
                "__pycache__/".to_string(),
                ".git/".to_string(),
                "node_modules/".to_string(),
                "venv/".to_string(),
                ".venv/".to_string(),
            ],
            file_extensions: vec![
                ".py".to_string(),
                ".js".to_string(),
                ".ts".to_string(),
                ".jsx".to_string(),
                ".tsx".to_string(),
                ".java".to_string(),
                ".go".to_string(),
                ".rs".to_string(),
                ".c".to_string(),
                ".cpp".to_string(),
                ".h".to_string(),
            ],
        }
    }

    /// Set repository path
    pub fn with_repository_path<P: AsRef<Path>>(mut self, path: P) -> Self {
        self.repository_path = Some(path.as_ref().to_path_buf());
        self
    }

    /// Set max findings
    pub fn with_max_findings(mut self, max: usize) -> Self {
        self.max_findings = max;
        self
    }

    /// SATD severity mapping
    fn severity_map() -> HashMap<&'static str, Severity> {
        let mut map = HashMap::new();
        // High severity: known bugs or workarounds
        map.insert("HACK", Severity::High);
        map.insert("KLUDGE", Severity::High);
        map.insert("BUG", Severity::High);
        // Medium severity: issues needing attention
        map.insert("FIXME", Severity::Medium);
        map.insert("XXX", Severity::Medium);
        map.insert("REFACTOR", Severity::Medium);
        // Low severity: reminders for future work
        map.insert("TODO", Severity::Low);
        map.insert("TEMP", Severity::Low);
        map
    }

    /// Get confidence score for SATD type
    fn confidence_score(satd_type: &str) -> f64 {
        match satd_type {
            "BUG" | "HACK" | "KLUDGE" => 0.95,
            "FIXME" | "REFACTOR" => 0.90,
            "XXX" | "TEMP" => 0.85,
            "TODO" => 0.80,
            _ => 0.80,
        }
    }

    /// Check if path should be excluded
    fn should_exclude(&self, path: &str) -> bool {
        for pattern in &self.exclude_patterns {
            if pattern.ends_with('/') {
                let dir = pattern.trim_end_matches('/');
                if path.split('/').any(|p| p == dir) {
                    return true;
                }
            } else if pattern.contains('*') {
                // Simple glob matching
                let regex_pattern = pattern.replace("*", ".*");
                if let Ok(re) = Regex::new(&regex_pattern) {
                    let filename = Path::new(path)
                        .file_name()
                        .and_then(|f| f.to_str())
                        .unwrap_or("");
                    if re.is_match(path) || re.is_match(filename) {
                        return true;
                    }
                }
            } else if path.contains(pattern) {
                return true;
            }
        }
        false
    }

    /// Scan files for SATD patterns
    fn scan_files(&self) -> Vec<SATDMatch> {
        let repo_path = match &self.repository_path {
            Some(p) => p.clone(),
            None => return Vec::new(),
        };

        if !repo_path.exists() {
            tracing::warn!("Repository path does not exist: {:?}", repo_path);
            return Vec::new();
        }

        let pattern = Regex::new(
            r"(?i)(?:#|//|/\*|\*|\"\"\"|\'\'\')?\s*\b(TODO|FIXME|HACK|XXX|KLUDGE|REFACTOR|TEMP|BUG)\b[\s:(\[]*(.{0,200})"
        ).expect("Invalid regex");

        let mut matches = Vec::new();

        for ext in &self.file_extensions {
            self.scan_directory(&repo_path, &repo_path, ext, &pattern, &mut matches);
        }

        matches
    }

    fn scan_directory(
        &self,
        root: &Path,
        dir: &Path,
        ext: &str,
        pattern: &Regex,
        matches: &mut Vec<SATDMatch>,
    ) {
        let entries = match fs::read_dir(dir) {
            Ok(e) => e,
            Err(_) => return,
        };

        for entry in entries.flatten() {
            let path = entry.path();
            let rel_path = path
                .strip_prefix(root)
                .unwrap_or(&path)
                .to_string_lossy()
                .to_string();

            if self.should_exclude(&rel_path) {
                continue;
            }

            if path.is_dir() {
                self.scan_directory(root, &path, ext, pattern, matches);
            } else if path.extension().and_then(|e| e.to_str()) == Some(ext.trim_start_matches('.'))
            {
                if matches.len() >= self.max_findings {
                    return;
                }

                self.scan_file(&path, &rel_path, pattern, matches);
            }
        }
    }

    fn scan_file(&self, path: &Path, rel_path: &str, pattern: &Regex, matches: &mut Vec<SATDMatch>) {
        let content = match fs::read_to_string(path) {
            Ok(c) => c,
            Err(_) => return,
        };

        // Skip very large files
        if content.len() > 1_000_000 {
            return;
        }

        for (line_idx, line) in content.lines().enumerate() {
            // Skip very long lines
            if line.len() > 2000 {
                continue;
            }

            for cap in pattern.captures_iter(line) {
                let satd_type = cap.get(1).map(|m| m.as_str().to_uppercase()).unwrap_or_default();
                let comment_text = cap
                    .get(2)
                    .map(|m| m.as_str().trim().trim_end_matches("*/").trim().to_string())
                    .unwrap_or_default();

                let severity = Self::severity_map()
                    .get(satd_type.as_str())
                    .cloned()
                    .unwrap_or(Severity::Low);

                matches.push(SATDMatch {
                    file_path: rel_path.to_string(),
                    line_number: (line_idx + 1) as i64,
                    satd_type,
                    comment_text,
                    severity,
                });

                if matches.len() >= self.max_findings {
                    return;
                }
            }
        }
    }

    fn create_finding(&self, satd_match: &SATDMatch, graph: &GraphClient) -> Finding {
        // Get graph context
        let graph_context = self.get_graph_context(graph, &satd_match.file_path, satd_match.line_number);

        // Build title
        let title = if satd_match.comment_text.is_empty() {
            format!("SATD: {}", satd_match.satd_type)
        } else {
            let short_comment = if satd_match.comment_text.len() > 80 {
                format!("{}...", &satd_match.comment_text[..80])
            } else {
                satd_match.comment_text.clone()
            };
            format!("SATD: {} - {}", satd_match.satd_type, short_comment)
        };

        // Build description
        let mut description = format!(
            "**Self-Admitted Technical Debt ({})**\n\n",
            satd_match.satd_type
        );

        if !satd_match.comment_text.is_empty() {
            description.push_str(&format!("**Comment**: {}\n\n", satd_match.comment_text));
        }

        description.push_str(&format!(
            "**Location**: {}:{}\n",
            satd_match.file_path, satd_match.line_number
        ));

        if let Some(entity) = &graph_context.containing_entity {
            description.push_str(&format!(
                "**Containing {}**: `{}`\n",
                graph_context.entity_type.as_deref().unwrap_or("entity"),
                entity
            ));
        }

        description.push_str("\n**Severity Rationale**:\n");
        match satd_match.satd_type.as_str() {
            "HACK" | "KLUDGE" | "BUG" => {
                description.push_str(
                    "- HIGH: Indicates a known bug, workaround, or hack that needs immediate attention\n",
                );
            }
            "FIXME" | "XXX" | "REFACTOR" => {
                description.push_str("- MEDIUM: Indicates an issue that should be addressed soon\n");
            }
            _ => {
                description.push_str("- LOW: Reminder for future work\n");
            }
        }

        let suggestion = self.suggest_fix(&satd_match.satd_type, &satd_match.comment_text);
        let effort = self.estimate_effort(&satd_match.satd_type);

        Finding {
            id: format!(
                "satd_{}_{}_{}", 
                satd_match.file_path.replace('/', "_"), 
                satd_match.line_number,
                satd_match.satd_type
            ),
            detector: "SATDDetector".to_string(),
            severity: satd_match.severity.clone(),
            title,
            description,
            affected_nodes: graph_context.nodes,
            affected_files: vec![satd_match.file_path.clone()],
            line_start: Some(satd_match.line_number),
            line_end: Some(satd_match.line_number),
            suggested_fix: Some(suggestion),
            estimated_effort: Some(effort),
            confidence: Self::confidence_score(&satd_match.satd_type),
            tags: vec![
                "satd".to_string(),
                satd_match.satd_type.to_lowercase(),
                "technical_debt".to_string(),
            ],
            metadata: serde_json::json!({
                "satd_type": satd_match.satd_type,
                "comment_text": satd_match.comment_text,
                "containing_entity": graph_context.containing_entity,
                "entity_type": graph_context.entity_type,
            }),
        }
    }

    fn get_graph_context(&self, graph: &GraphClient, file_path: &str, line: i64) -> GraphContext {
        let query = r#"
            MATCH (file:File {filePath: $file_path})
            OPTIONAL MATCH (file)-[:CONTAINS]->(entity)
            WHERE entity.lineStart <= $line AND entity.lineEnd >= $line
            WITH file, entity
            ORDER BY entity.lineStart DESC
            LIMIT 1
            RETURN
                file.loc as file_loc,
                entity.qualifiedName as containing_entity,
                labels(entity)[0] as entity_type,
                entity.complexity as complexity
        "#;

        match graph.execute_with_params(
            query,
            vec![
                ("file_path", file_path.into()),
                ("line", line.into()),
            ],
        ) {
            Ok(results) if !results.is_empty() => {
                let row = &results[0];
                let containing_entity = row.get_string("containing_entity");
                GraphContext {
                    file_loc: row.get_i64("file_loc").unwrap_or(0),
                    containing_entity: containing_entity.clone(),
                    entity_type: row.get_string("entity_type"),
                    complexity: row.get_i64("complexity").unwrap_or(0),
                    nodes: containing_entity.map(|e| vec![e]).unwrap_or_default(),
                }
            }
            _ => GraphContext::default(),
        }
    }

    fn suggest_fix(&self, satd_type: &str, comment_text: &str) -> String {
        let base = match satd_type {
            "TODO" => "Review and either implement the TODO or create a tracking issue",
            "FIXME" => "Investigate and fix the issue described in the comment",
            "HACK" => "Replace the hacky workaround with a proper solution",
            "XXX" => "Review and address the concern mentioned in the comment",
            "KLUDGE" => "Refactor this code to remove the kludge/workaround",
            "REFACTOR" => "Schedule time to refactor as described",
            "TEMP" => "Remove the temporary code before release",
            "BUG" => "Fix the known bug and add a regression test",
            _ => "Review and address this technical debt",
        };

        if comment_text.is_empty() {
            base.to_string()
        } else {
            let short = if comment_text.len() > 100 {
                &comment_text[..100]
            } else {
                comment_text
            };
            format!("{}. Comment indicates: '{}'", base, short)
        }
    }

    fn estimate_effort(&self, satd_type: &str) -> String {
        match satd_type {
            "HACK" | "KLUDGE" | "BUG" => "Medium (1-4 hours)".to_string(),
            "REFACTOR" => "Large (4+ hours)".to_string(),
            _ => "Small (30-60 minutes)".to_string(),
        }
    }
}

impl Default for SATDDetector {
    fn default() -> Self {
        Self::new()
    }
}

impl Detector for SATDDetector {
    fn name(&self) -> &'static str {
        "SATDDetector"
    }

    fn description(&self) -> &'static str {
        "Detects Self-Admitted Technical Debt comments (TODO, FIXME, HACK, etc.)"
    }

    fn detect(&self, graph: &GraphClient) -> DetectorResult {
        if self.repository_path.is_none() {
            tracing::warn!("SATDDetector: repository_path not set, skipping");
            return Ok(Vec::new());
        }

        let matches = self.scan_files();
        let findings: Vec<Finding> = matches
            .iter()
            .take(self.max_findings)
            .map(|m| self.create_finding(m, graph))
            .collect();

        Ok(findings)
    }

    fn is_dependent(&self) -> bool {
        false
    }
}

/// A single SATD match
struct SATDMatch {
    file_path: String,
    line_number: i64,
    satd_type: String,
    comment_text: String,
    severity: Severity,
}

/// Graph context for a SATD finding
#[derive(Default)]
struct GraphContext {
    file_loc: i64,
    containing_entity: Option<String>,
    entity_type: Option<String>,
    complexity: i64,
    nodes: Vec<String>,
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_severity_map() {
        let map = SATDDetector::severity_map();
        assert_eq!(map.get("HACK"), Some(&Severity::High));
        assert_eq!(map.get("FIXME"), Some(&Severity::Medium));
        assert_eq!(map.get("TODO"), Some(&Severity::Low));
    }

    #[test]
    fn test_should_exclude() {
        let detector = SATDDetector::new();
        assert!(detector.should_exclude("tests/test_foo.py"));
        assert!(detector.should_exclude("node_modules/package/index.js"));
        assert!(!detector.should_exclude("src/main.py"));
    }

    #[test]
    fn test_confidence_score() {
        assert_eq!(SATDDetector::confidence_score("BUG"), 0.95);
        assert_eq!(SATDDetector::confidence_score("TODO"), 0.80);
    }
}