repotoire 0.3.47

//! AI Churn Pattern Detector
//!
//! Detects code with high modification frequency shortly after creation - a pattern
//! commonly seen with AI-generated code that gets quickly revised or corrected.
//!
//! The detector uses git blame + diff to analyze function-level changes and identify:
//! - Functions created and modified within 48 hours ("fix velocity")
//! - High churn ratio (lines_modified / lines_original) in first week
//! - Rapid iterative corrections typical of AI-generated code
//!
//! Key detection signal: time_to_first_fix < 48h AND modifications >= 3 → HIGH

use crate::detectors::base::{Detector, DetectorConfig};
use crate::graph::GraphClient;
use crate::models::{Finding, Severity};
use anyhow::Result;
use chrono::{DateTime, Duration, Utc};
use std::collections::HashMap;
use std::path::PathBuf;
use tracing::{debug, info, warn};
use uuid::Uuid;

/// Time thresholds in hours
const CRITICAL_FIX_VELOCITY_HOURS: i64 = 24;
const HIGH_FIX_VELOCITY_HOURS: i64 = 48;
const MEDIUM_FIX_VELOCITY_HOURS: i64 = 72;

/// Modification count thresholds
const CRITICAL_MOD_COUNT: usize = 5;
const HIGH_MOD_COUNT: usize = 3;

/// Churn ratio thresholds
const CRITICAL_CHURN_RATIO: f64 = 1.5;
const HIGH_CHURN_RATIO: f64 = 0.8;
const MEDIUM_CHURN_RATIO: f64 = 0.5;

/// Minimum score to create a finding (filters out noise)
const MIN_CHURN_SCORE: f64 = 0.8;

/// Analysis window in days
const DEFAULT_ANALYSIS_WINDOW_DAYS: i64 = 90;

/// Minimum function size to analyze
const DEFAULT_MIN_FUNCTION_LINES: usize = 5;

/// A single modification record
#[derive(Debug, Clone)]
pub struct Modification {
    pub timestamp: DateTime<Utc>,
    pub commit_sha: String,
    pub lines_added: usize,
    pub lines_deleted: usize,
}

/// Track churn statistics for a function
#[derive(Debug, Clone)]
pub struct FunctionChurnRecord {
    pub qualified_name: String,
    pub file_path: String,
    pub function_name: String,
    pub created_at: Option<DateTime<Utc>>,
    pub creation_commit: String,
    pub lines_original: usize,
    pub first_modification_at: Option<DateTime<Utc>>,
    pub first_modification_commit: String,
    pub modifications: Vec<Modification>,
}

impl FunctionChurnRecord {
    /// Time between creation and first modification
    pub fn time_to_first_fix(&self) -> Option<Duration> {
        match (&self.created_at, &self.first_modification_at) {
            (Some(created), Some(first_mod)) => Some(*first_mod - *created),
            _ => None,
        }
    }

    /// Time to first fix in hours
    pub fn time_to_first_fix_hours(&self) -> Option<f64> {
        self.time_to_first_fix()
            .map(|d| d.num_seconds() as f64 / 3600.0)
    }

    /// Count modifications within first week of creation
    pub fn modifications_first_week(&self) -> usize {
        let Some(created_at) = self.created_at else {
            return 0;
        };
        let week_cutoff = created_at + Duration::days(7);
        self.modifications
            .iter()
            .filter(|m| m.timestamp <= week_cutoff)
            .count()
    }

    /// Total lines changed (added + deleted) in first week
    pub fn lines_changed_first_week(&self) -> usize {
        let Some(created_at) = self.created_at else {
            return 0;
        };
        let week_cutoff = created_at + Duration::days(7);
        self.modifications
            .iter()
            .filter(|m| m.timestamp <= week_cutoff)
            .map(|m| m.lines_added + m.lines_deleted)
            .sum()
    }

    /// Ratio of lines changed to original lines in first week
    pub fn churn_ratio(&self) -> f64 {
        if self.lines_original == 0 {
            return 0.0;
        }
        self.lines_changed_first_week() as f64 / self.lines_original as f64
    }

    /// Key signal: fixed within 48h AND multiple modifications
    pub fn is_high_velocity_fix(&self) -> bool {
        let Some(ttf_hours) = self.time_to_first_fix_hours() else {
            return false;
        };
        ttf_hours < HIGH_FIX_VELOCITY_HOURS as f64 && self.modifications.len() >= 2
    }

    /// Combined score indicating AI churn pattern (0-1)
    pub fn ai_churn_score(&self) -> f64 {
        let mut score = 0.0;

        // Fast fix velocity is strong signal
        if let Some(ttf_hours) = self.time_to_first_fix_hours() {
            if ttf_hours < CRITICAL_FIX_VELOCITY_HOURS as f64 {
                score += 0.4;
            } else if ttf_hours < HIGH_FIX_VELOCITY_HOURS as f64 {
                score += 0.25;
            } else if ttf_hours < MEDIUM_FIX_VELOCITY_HOURS as f64 {
                score += 0.1;
            }
        }

        // Multiple early modifications
        let mods = self.modifications.len();
        if mods >= 4 {
            score += 0.3;
        } else if mods >= 2 {
            score += 0.2;
        } else if mods >= 1 {
            score += 0.1;
        }

        // High churn ratio
        let churn = self.churn_ratio();
        if churn > 1.0 {
            score += 0.3;
        } else if churn > 0.5 {
            score += 0.2;
        } else if churn > 0.3 {
            score += 0.1;
        }

        score.min(1.0)
    }
}

/// Detects AI-generated code patterns through fix velocity and churn analysis
pub struct AIChurnDetector {
    config: DetectorConfig,
    analysis_window_days: i64,
    min_function_lines: usize,
}

impl AIChurnDetector {
    /// Create a new detector with default settings
    pub fn new() -> Self {
        Self {
            config: DetectorConfig::new(),
            analysis_window_days: DEFAULT_ANALYSIS_WINDOW_DAYS,
            min_function_lines: DEFAULT_MIN_FUNCTION_LINES,
        }
    }

    /// Create with custom config
    pub fn with_config(config: DetectorConfig) -> Self {
        Self {
            analysis_window_days: config
                .get_option_or("analysis_window_days", DEFAULT_ANALYSIS_WINDOW_DAYS),
            min_function_lines: config
                .get_option_or("min_function_lines", DEFAULT_MIN_FUNCTION_LINES),
            config,
        }
    }

    /// Calculate severity based on fix velocity and churn metrics
    fn calculate_severity(&self, record: &FunctionChurnRecord) -> Severity {
        let ttf_hours = record.time_to_first_fix_hours();
        let mods = record.modifications.len();
        let churn = record.churn_ratio();

        // CRITICAL conditions
        if churn > CRITICAL_CHURN_RATIO {
            return Severity::Critical;
        }
        if let Some(ttf) = ttf_hours {
            if ttf < CRITICAL_FIX_VELOCITY_HOURS as f64 && mods >= CRITICAL_MOD_COUNT {
                return Severity::Critical;
            }
        }

        // HIGH conditions (key signal)
        if let Some(ttf) = ttf_hours {
            if ttf < HIGH_FIX_VELOCITY_HOURS as f64 && mods >= HIGH_MOD_COUNT {
                return Severity::High;
            }
        }
        if churn > HIGH_CHURN_RATIO {
            return Severity::High;
        }

        // MEDIUM conditions
        if let Some(ttf) = ttf_hours {
            if ttf < MEDIUM_FIX_VELOCITY_HOURS as f64 && mods >= 2 {
                return Severity::Medium;
            }
        }
        if churn > MEDIUM_CHURN_RATIO {
            return Severity::Medium;
        }

        // LOW - only if significant modification count
        if mods >= 4 {
            return Severity::Low;
        }

        Severity::Info
    }

    /// Create a finding for a high-churn function
    fn create_finding(&self, record: &FunctionChurnRecord) -> Option<Finding> {
        // Skip if score too low (noise filter)
        if record.ai_churn_score() < MIN_CHURN_SCORE {
            return None;
        }

        let severity = self.calculate_severity(record);
        if severity == Severity::Info {
            return None;
        }

        let ttf_hours = record.time_to_first_fix_hours();
        let ttf_str = ttf_hours
            .map(|h| format!("{:.1} hours", h))
            .unwrap_or_else(|| "N/A".to_string());

        let created_str = record
            .created_at
            .map(|dt| dt.format("%Y-%m-%d %H:%M").to_string())
            .unwrap_or_else(|| "Unknown".to_string());

        let mut description = format!(
            "Function `{}` in `{}` shows signs of rapid post-creation revision.\n\n\
             **Fix Velocity Metrics:**\n\
             - Created: {} (commit `{}`)\n\
             - Time to first fix: **{}**\n\
             - Total modifications in first week: **{}**\n\n\
             **Churn Analysis:**\n\
             - Original size: {} lines\n\
             - Lines changed in first week: {}\n\
             - Churn ratio: **{:.2}** ({:.0}% of original code)\n\
             - AI churn score: {:.2}",
            record.function_name,
            record.file_path,
            created_str,
            record.creation_commit,
            ttf_str,
            record.modifications_first_week(),
            record.lines_original,
            record.lines_changed_first_week(),
            record.churn_ratio(),
            record.churn_ratio() * 100.0,
            record.ai_churn_score(),
        );

        if record.is_high_velocity_fix() {
            description.push_str(
                "\n\n⚠️ **High fix velocity detected**: This function was modified within 48 hours of creation \
                 with multiple follow-up changes - a pattern strongly associated with AI-generated code \
                 that required human correction.",
            );
        }

        if record.churn_ratio() > CRITICAL_CHURN_RATIO {
            description.push_str(
                "\n\n⚠️ **Critical churn ratio**: More code was changed than originally written, \
                 indicating significant rewriting was needed.",
            );
        }

        // Modification timeline
        if !record.modifications.is_empty() {
            description.push_str("\n\n**Modification Timeline:**");
            for (i, m) in record.modifications.iter().take(5).enumerate() {
                let time_str = m.timestamp.format("%Y-%m-%d %H:%M").to_string();
                description.push_str(&format!(
                    "\n- {}: commit `{}` (+{} lines)",
                    time_str, m.commit_sha, m.lines_added
                ));
                if i == 4 && record.modifications.len() > 5 {
                    description.push_str(&format!(
                        "\n- ... and {} more modifications",
                        record.modifications.len() - 5
                    ));
                }
            }
        }

        let suggested_fix = match severity {
            Severity::Critical => {
                "This function shows strong signs of AI-generated code that required extensive correction. \
                 Consider:\n\
                 1. **Review thoroughly** for hidden bugs or incomplete logic\n\
                 2. **Add comprehensive tests** - the rapid changes suggest edge cases may be missed\n\
                 3. **Document the logic** - ensure the team understands what this code does\n\
                 4. **Consider rewriting** if the churn continues".to_string()
            }
            Severity::High => {
                "Review this function for correctness issues. Consider:\n\
                 1. Adding unit tests with edge cases\n\
                 2. Reviewing for logical errors\n\
                 3. Ensuring proper error handling".to_string()
            }
            _ => {
                "Monitor this function for continued churn. Consider adding tests \
                 to stabilize the implementation.".to_string()
            }
        };

        let estimated_effort = if matches!(severity, Severity::Low | Severity::Medium) {
            "Small (2-4 hours)"
        } else {
            "Medium (1-2 days)"
        };

        Some(Finding {
            id: Uuid::new_v4().to_string(),
            detector: "AIChurnDetector".to_string(),
            severity,
            title: format!("AI churn pattern in `{}`", record.function_name),
            description,
            affected_files: vec![PathBuf::from(&record.file_path)],
            line_start: None,
            line_end: None,
            suggested_fix: Some(suggested_fix),
            estimated_effort: Some(estimated_effort.to_string()),
            category: Some("ai_churn".to_string()),
            cwe_id: None,
            why_it_matters: Some(
                "Code that requires rapid fixing after creation often indicates AI-generated content \
                 that wasn't fully understood or tested before commit. This pattern is associated with \
                 hidden bugs, incomplete error handling, and logic that may not be fully correct."
                    .to_string(),
            ),
        })
    }
}

impl Default for AIChurnDetector {
    fn default() -> Self {
        Self::new()
    }
}

impl Detector for AIChurnDetector {
    fn name(&self) -> &'static str {
        "AIChurnDetector"
    }

    fn description(&self) -> &'static str {
        "Detects AI-generated code patterns through fix velocity and churn analysis"
    }

    fn category(&self) -> &'static str {
        "ai_generated"
    }

    fn config(&self) -> Option<&DetectorConfig> {
        Some(&self.config)
    }

    fn detect(&self, graph: &GraphClient) -> Result<Vec<Finding>> {
        debug!("Starting AI churn detection");

        // Query for functions with modification history
        // Note: This requires git history data in the graph
        let query = r#"
            MATCH (f:Function)
            WHERE f.name IS NOT NULL 
              AND f.filePath IS NOT NULL
              AND f.createdAt IS NOT NULL
            OPTIONAL MATCH (f)-[:MODIFIED_IN]->(c:Commit)
            WITH f, collect(c) AS commits
            WHERE size(commits) >= 2
            RETURN f.qualifiedName AS qualified_name,
                   f.name AS name,
                   f.filePath AS file_path,
                   f.loc AS loc,
                   f.createdAt AS created_at,
                   f.creationCommit AS creation_commit,
                   [c IN commits | {
                       sha: c.sha,
                       timestamp: c.timestamp,
                       linesAdded: c.linesAdded,
                       linesDeleted: c.linesDeleted
                   }] AS modifications
            LIMIT 500
        "#;

        let results = graph.execute(query)?;

        if results.is_empty() {
            debug!("No functions with modification history found");
            // Fall back to simpler query without git history
            return self.detect_without_git_history(graph);
        }

        let mut findings: Vec<Finding> = Vec::new();

        for row in results {
            let qualified_name = row
                .get("qualified_name")
                .and_then(|v| v.as_str())
                .unwrap_or("")
                .to_string();

            let function_name = row
                .get("name")
                .and_then(|v| v.as_str())
                .unwrap_or("")
                .to_string();

            let file_path = row
                .get("file_path")
                .and_then(|v| v.as_str())
                .unwrap_or("")
                .to_string();

            let loc = row.get("loc").and_then(|v| v.as_u64()).unwrap_or(0) as usize;

            if loc < self.min_function_lines {
                continue;
            }

            let created_at = row
                .get("created_at")
                .and_then(|v| v.as_str())
                .and_then(|s| DateTime::parse_from_rfc3339(s).ok())
                .map(|dt| dt.with_timezone(&Utc));

            let creation_commit = row
                .get("creation_commit")
                .and_then(|v| v.as_str())
                .unwrap_or("")
                .to_string();

            // Parse modifications
            let modifications: Vec<Modification> = row
                .get("modifications")
                .and_then(|v| v.as_array())
                .map(|arr| {
                    arr.iter()
                        .filter_map(|m| {
                            let sha = m.get("sha")?.as_str()?.to_string();
                            let timestamp = m
                                .get("timestamp")
                                .and_then(|v| v.as_str())
                                .and_then(|s| DateTime::parse_from_rfc3339(s).ok())
                                .map(|dt| dt.with_timezone(&Utc))?;
                            let lines_added =
                                m.get("linesAdded").and_then(|v| v.as_u64()).unwrap_or(0) as usize;
                            let lines_deleted =
                                m.get("linesDeleted").and_then(|v| v.as_u64()).unwrap_or(0)
                                    as usize;
                            Some(Modification {
                                timestamp,
                                commit_sha: sha,
                                lines_added,
                                lines_deleted,
                            })
                        })
                        .collect()
                })
                .unwrap_or_default();

            // Sort modifications by timestamp
            let mut sorted_mods = modifications;
            sorted_mods.sort_by_key(|m| m.timestamp);

            let first_modification_at = sorted_mods.first().map(|m| m.timestamp);
            let first_modification_commit = sorted_mods
                .first()
                .map(|m| m.commit_sha.clone())
                .unwrap_or_default();

            let record = FunctionChurnRecord {
                qualified_name,
                file_path,
                function_name,
                created_at,
                creation_commit,
                lines_original: loc,
                first_modification_at,
                first_modification_commit,
                modifications: sorted_mods,
            };

            if let Some(finding) = self.create_finding(&record) {
                findings.push(finding);
            }
        }

        findings.sort_by(|a, b| b.severity.cmp(&a.severity));

        info!(
            "AIChurnDetector found {} high-churn patterns",
            findings.len()
        );

        Ok(findings)
    }
}

impl AIChurnDetector {
    /// Fallback detection without git history data
    fn detect_without_git_history(&self, _graph: &GraphClient) -> Result<Vec<Finding>> {
        warn!(
            "AIChurnDetector: No git history data in graph. \
             For full churn detection, ensure git history is indexed."
        );
        Ok(vec![])
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    fn make_record(ttf_hours: Option<f64>, mods: usize, churn_ratio: f64) -> FunctionChurnRecord {
        let now = Utc::now();
        let created_at = Some(now - Duration::days(7));
        let first_mod_at = ttf_hours.map(|h| now - Duration::days(7) + Duration::hours(h as i64));
        let lines_original = 100;
        let lines_changed = (churn_ratio * lines_original as f64) as usize;

        let modifications: Vec<Modification> = (0..mods)
            .map(|i| Modification {
                timestamp: first_mod_at.unwrap_or(now) + Duration::hours(i as i64),
                commit_sha: format!("abc{}", i),
                lines_added: lines_changed / mods.max(1),
                lines_deleted: 0,
            })
            .collect();

        FunctionChurnRecord {
            qualified_name: "test::func".to_string(),
            file_path: "test.py".to_string(),
            function_name: "func".to_string(),
            created_at,
            creation_commit: "initial".to_string(),
            lines_original,
            first_modification_at: first_mod_at,
            first_modification_commit: "first".to_string(),
            modifications,
        }
    }

    #[test]
    fn test_churn_score_high_velocity() {
        let record = make_record(Some(24.0), 3, 0.5);
        let score = record.ai_churn_score();
        assert!(
            score >= 0.7,
            "High velocity + multiple mods should score high"
        );
    }

    #[test]
    fn test_churn_score_high_churn_ratio() {
        let record = make_record(Some(100.0), 1, 1.5);
        let score = record.ai_churn_score();
        assert!(score >= 0.4, "High churn ratio should contribute to score");
    }

    #[test]
    fn test_is_high_velocity_fix() {
        let record = make_record(Some(24.0), 3, 0.3);
        assert!(record.is_high_velocity_fix());

        let record2 = make_record(Some(72.0), 3, 0.3);
        assert!(!record2.is_high_velocity_fix());
    }

    #[test]
    fn test_severity_calculation() {
        let detector = AIChurnDetector::new();

        // Critical: high churn ratio
        let record = make_record(Some(24.0), 5, 2.0);
        assert_eq!(detector.calculate_severity(&record), Severity::Critical);

        // High: fast fix + multiple mods
        let record = make_record(Some(36.0), 3, 0.5);
        assert_eq!(detector.calculate_severity(&record), Severity::High);
    }
}