repotoire 0.9.0

//! Unsafe template detector for XSS and template injection vulnerabilities
//!
//! Detects dangerous template patterns that can lead to XSS:
//!
//! - Jinja2 Environment() without autoescape=True
//! - render_template_string() with variables
//! - Markup() with untrusted input
//! - React dangerouslySetInnerHTML
//! - Vue v-html directive
//! - innerHTML = assignments
//! - document.write()
//!
//! CWE-79: Cross-site Scripting (XSS)
//! CWE-1336: Server-Side Template Injection

use crate::detectors::base::{Detector, DetectorConfig};
use crate::models::{Finding, Severity};
use anyhow::Result;
use regex::Regex;
use std::collections::HashSet;
use std::path::{Path, PathBuf};
use tracing::{debug, info};

/// Default file patterns to exclude
const DEFAULT_EXCLUDE_PATTERNS: &[&str] = &[
    "tests/",
    "test_",
    "_test.py",
    "migrations/",
    "__pycache__/",
    ".git/",
    "node_modules/",
    "venv/",
    ".venv/",
    "dist/",
    "build/",
    ".min.js",
    ".bundle.js",
];

/// Detects XSS and template injection vulnerabilities
pub struct UnsafeTemplateDetector {
    config: DetectorConfig,
    repository_path: PathBuf,
    max_findings: usize,
    exclude_patterns: Vec<String>,
    compiled_globs: Vec<Regex>,
    // Python patterns
    jinja2_env_pattern: Regex,
    autoescape_true_pattern: Regex,
    render_template_string_pattern: Regex,
    markup_pattern: Regex,
    // JavaScript patterns
    dangerous_inner_html_pattern: Regex,
    vue_vhtml_pattern: Regex,
    innerhtml_assign_pattern: Regex,
    outerhtml_assign_pattern: Regex,
    document_write_pattern: Regex,
    // Pre-computed taint results
    precomputed_cross: std::sync::OnceLock<Vec<crate::detectors::taint::TaintPath>>,
    precomputed_intra: std::sync::OnceLock<Vec<crate::detectors::taint::TaintPath>>,
}

impl UnsafeTemplateDetector {
    /// Create a new detector with default settings
    pub fn new() -> Self {
        Self::with_config(DetectorConfig::new(), PathBuf::from("."))
    }

    /// Create with custom repository path
    pub fn with_repository_path(repository_path: PathBuf) -> Self {
        Self::with_config(DetectorConfig::new(), repository_path)
    }

    /// Create with custom config and repository path
    pub fn with_config(config: DetectorConfig, repository_path: PathBuf) -> Self {
        let max_findings = config.get_option_or("max_findings", 100);
        let exclude_patterns = config
            .get_option::<Vec<String>>("exclude_patterns")
            .unwrap_or_else(|| {
                DEFAULT_EXCLUDE_PATTERNS
                    .iter()
                    .map(|s| s.to_string())
                    .collect()
            });

        // Compile Python patterns
        let jinja2_env_pattern = Regex::new(r"\bEnvironment\s*\([^)]*\)").expect("valid regex");
        let autoescape_true_pattern =
            Regex::new(r"(?i)autoescape\s*=\s*(?:True|select_autoescape\s*\()")
                .expect("valid regex");
        // Simplified: detect render_template_string calls with any content
        // (filtering for variable usage happens in scan logic)
        let render_template_string_pattern =
            Regex::new(r#"\brender_template_string\s*\([^)]+\)"#).expect("valid regex");
        // Simplified: detect Markup calls with any content
        let markup_pattern = Regex::new(r#"\bMarkup\s*\([^)]+\)"#).expect("valid regex");

        // Compile JavaScript patterns
        let dangerous_inner_html_pattern =
            Regex::new(r"\bdangerouslySetInnerHTML\s*=\s*\{").expect("valid regex");
        let vue_vhtml_pattern =
            Regex::new(r#"\bv-html\s*=\s*["'][^"']+["']"#).expect("valid regex");
        // Use =[^=] to exclude == comparisons (#25)
        let innerhtml_assign_pattern =
            Regex::new(r"\.\s*innerHTML\s*=[^=;][^;]*").expect("valid regex");
        let outerhtml_assign_pattern =
            Regex::new(r"\.\s*outerHTML\s*=[^=;][^;]*").expect("valid regex");
        let document_write_pattern =
            Regex::new(r"\bdocument\s*\.\s*write(?:ln)?\s*\(").expect("valid regex");

        Self {
            config,
            repository_path,
            max_findings,
            compiled_globs: crate::detectors::base::compile_glob_patterns(&exclude_patterns),
            exclude_patterns,
            jinja2_env_pattern,
            autoescape_true_pattern,
            render_template_string_pattern,
            markup_pattern,
            dangerous_inner_html_pattern,
            vue_vhtml_pattern,
            innerhtml_assign_pattern,
            outerhtml_assign_pattern,
            document_write_pattern,
            precomputed_cross: std::sync::OnceLock::new(),
            precomputed_intra: std::sync::OnceLock::new(),
        }
    }

    /// Check if a function call contains only a string literal (safe)
    fn is_string_literal_only(&self, call_match: &str) -> bool {
        // Pattern: function_name("string") or function_name('string')
        // If it matches this pattern, it's safe (static string)
        let safe_pattern = Regex::new(r#"^\w+\s*\(\s*["'][^"']*["']\s*\)$"#).expect("valid regex");
        safe_pattern.is_match(call_match.trim())
    }

    /// Check if path should be excluded
    fn should_exclude(&self, path: &str) -> bool {
        crate::detectors::base::should_exclude_path(
            path,
            &self.exclude_patterns,
            &self.compiled_globs,
        )
    }

    /// Scan Python files for template vulnerabilities using the given FileProvider.
    fn scan_python_files(
        &self,
        fp: &crate::detectors::analysis_context::AnalysisContextFileProvider<'_>,
    ) -> Vec<Finding> {
        let mut findings = Vec::new();

        // Walk through Python files via FileProvider
        for path in fp.files_with_extension("py") {
            let rel_path = path.to_string_lossy().to_string();

            if self.should_exclude(&rel_path) {
                continue;
            }

            let content = match fp.content(path) {
                Some(c) => c,
                None => continue,
            };
            let content = content.as_str();

            if content.len() > 500_000 {
                continue;
            }

            let lines: Vec<&str> = content.lines().collect();
            for (line_no, line) in lines.iter().enumerate() {
                let line_num = (line_no + 1) as u32;
                let stripped = line.trim();

                if stripped.starts_with('#') {
                    continue;
                }

                // Check for suppression comments
                let prev_line = if line_no > 0 {
                    Some(lines[line_no - 1])
                } else {
                    None
                };
                if crate::detectors::is_line_suppressed(line, prev_line) {
                    continue;
                }

                // Check for Jinja2 Environment without autoescape
                if let Some(env_match) = self.jinja2_env_pattern.find(line) {
                    let env_code = env_match.as_str();
                    if !self.autoescape_true_pattern.is_match(env_code) {
                        findings.push(self.create_finding(
                            &rel_path,
                            line_num,
                            "jinja2_no_autoescape",
                            stripped,
                        ));
                    }
                }

                // Check for render_template_string with variable (skip string-only calls)
                if let Some(m) = self.render_template_string_pattern.find(line) {
                    if !self.is_string_literal_only(m.as_str()) {
                        findings.push(self.create_finding(
                            &rel_path,
                            line_num,
                            "render_template_string",
                            stripped,
                        ));
                    }
                }

                // Check for Markup with variable (skip string-only calls)
                if let Some(m) = self.markup_pattern.find(line) {
                    if !self.is_string_literal_only(m.as_str()) {
                        findings.push(self.create_finding(
                            &rel_path,
                            line_num,
                            "markup_unsafe",
                            stripped,
                        ));
                    }
                }

                if findings.len() >= self.max_findings {
                    return findings;
                }
            }
        }

        findings
    }

    /// Scan JavaScript/TypeScript files for XSS vulnerabilities using the given FileProvider.
    fn scan_javascript_files(
        &self,
        fp: &crate::detectors::analysis_context::AnalysisContextFileProvider<'_>,
    ) -> Vec<Finding> {
        let mut findings = Vec::new();

        // Pre-compile static-assignment regexes outside the file loop
        use std::sync::LazyLock;
        static STATIC_INNERHTML: LazyLock<Regex> = LazyLock::new(|| {
            Regex::new(r#"\.\s*innerHTML\s*=\s*["'][^"']*["']\s*;?\s*$"#).expect("valid regex")
        });
        let static_innerhtml_pat = &*STATIC_INNERHTML;
        static STATIC_OUTERHTML: LazyLock<Regex> = LazyLock::new(|| {
            Regex::new(r#"\.\s*outerHTML\s*=\s*["'][^"']*["']\s*;?\s*$"#).expect("valid regex")
        });
        let static_outerhtml_pat = &*STATIC_OUTERHTML;

        // Walk through JS/TS files via FileProvider
        for path in fp.files_with_extensions(&["js", "jsx", "ts", "tsx"]) {
            let rel_path = path.to_string_lossy().to_string();

            if self.should_exclude(&rel_path) {
                continue;
            }

            let content = match fp.content(path) {
                Some(c) => c,
                None => continue,
            };
            let content = content.as_str();

            if content.len() > 500_000 {
                continue;
            }

            let lines: Vec<&str> = content.lines().collect();

            for (line_no, line) in lines.iter().enumerate() {
                let line_num = (line_no + 1) as u32;
                let stripped = line.trim();

                if stripped.starts_with("//") || stripped.starts_with("/*") {
                    continue;
                }

                // Check for suppression comments
                let prev_line = if line_no > 0 {
                    Some(lines[line_no - 1])
                } else {
                    None
                };
                if crate::detectors::is_line_suppressed(line, prev_line) {
                    continue;
                }

                // Check for dangerouslySetInnerHTML (React)
                if self.dangerous_inner_html_pattern.is_match(line) {
                    findings.push(self.create_finding(
                        &rel_path,
                        line_num,
                        "dangerously_set_inner_html",
                        stripped,
                    ));
                }

                // Check for innerHTML assignment
                if self.innerhtml_assign_pattern.is_match(line) {
                    // Skip static string assignments: innerHTML = "" or innerHTML = "literal"
                    if !static_innerhtml_pat.is_match(stripped) {
                        findings.push(self.create_finding(
                            &rel_path,
                            line_num,
                            "innerhtml_assignment",
                            stripped,
                        ));
                    }
                }

                // Check for outerHTML assignment
                if self.outerhtml_assign_pattern.is_match(line)
                    && !static_outerhtml_pat.is_match(stripped)
                {
                    findings.push(self.create_finding(
                        &rel_path,
                        line_num,
                        "outerhtml_assignment",
                        stripped,
                    ));
                }

                // Check for document.write
                if self.document_write_pattern.is_match(line) {
                    findings.push(self.create_finding(
                        &rel_path,
                        line_num,
                        "document_write",
                        stripped,
                    ));
                }

                if findings.len() >= self.max_findings {
                    return findings;
                }
            }
        }

        findings
    }

    /// Scan Vue files for v-html directive using the given FileProvider.
    fn scan_vue_files(
        &self,
        fp: &crate::detectors::analysis_context::AnalysisContextFileProvider<'_>,
    ) -> Vec<Finding> {
        let mut findings = Vec::new();

        // Walk through Vue files via FileProvider
        for path in fp.files_with_extension("vue") {
            let rel_path = path.to_string_lossy().to_string();

            if self.should_exclude(&rel_path) {
                continue;
            }

            let content = match fp.content(path) {
                Some(c) => c,
                None => continue,
            };
            let content = content.as_str();

            if content.len() > 500_000 {
                continue;
            }

            let lines: Vec<&str> = content.lines().collect();
            for (line_no, line) in lines.iter().enumerate() {
                let line_num = (line_no + 1) as u32;

                // Check for suppression comments
                let prev_line = if line_no > 0 {
                    Some(lines[line_no - 1])
                } else {
                    None
                };
                if crate::detectors::is_line_suppressed(line, prev_line) {
                    continue;
                }

                if self.vue_vhtml_pattern.is_match(line) {
                    findings.push(self.create_finding(
                        &rel_path,
                        line_num,
                        "vue_vhtml",
                        line.trim(),
                    ));
                }

                if findings.len() >= self.max_findings {
                    return findings;
                }
            }
        }

        findings
    }

    /// Create a finding for detected template vulnerability
    fn create_finding(
        &self,
        file_path: &str,
        line_start: u32,
        pattern_type: &str,
        snippet: &str,
    ) -> Finding {
        let (title_desc, desc, cwe) = match pattern_type {
            "jinja2_no_autoescape" => (
                "Jinja2 Environment without autoescape",
                "Jinja2 Environment() created without autoescape=True, allowing XSS attacks",
                "CWE-79",
            ),
            "render_template_string" => (
                "Unsafe render_template_string",
                "render_template_string() with variable input can lead to template injection",
                "CWE-1336",
            ),
            "markup_unsafe" => (
                "Unsafe Markup usage",
                "Markup() with variable input bypasses escaping, enabling XSS",
                "CWE-79",
            ),
            "dangerously_set_inner_html" => (
                "React dangerouslySetInnerHTML",
                "dangerouslySetInnerHTML can introduce XSS vulnerabilities",
                "CWE-79",
            ),
            "vue_vhtml" => (
                "Vue v-html directive",
                "v-html directive bypasses Vue's XSS protection",
                "CWE-79",
            ),
            "innerhtml_assignment" => (
                "innerHTML assignment",
                "Direct innerHTML assignment can lead to XSS vulnerabilities",
                "CWE-79",
            ),
            "outerhtml_assignment" => (
                "outerHTML assignment",
                "Direct outerHTML assignment can lead to XSS vulnerabilities",
                "CWE-79",
            ),
            "document_write" => (
                "document.write usage",
                "document.write() can introduce XSS vulnerabilities",
                "CWE-79",
            ),
            _ => (
                "Unsafe template pattern",
                "Potentially unsafe template handling detected",
                "CWE-79",
            ),
        };

        let title = format!("XSS: {}", title_desc);

        let description = format!(
            "**{}**\n\n\
             **Location**: {}:{}\n\n\
             **Code snippet**:\n```\n{}\n```\n\n\
             Cross-Site Scripting (XSS) vulnerabilities occur when untrusted data is included\n\
             in web pages without proper validation or escaping. Attackers can inject malicious\n\
             scripts that:\n\
             - Steal user session cookies\n\
             - Capture keystrokes and credentials\n\
             - Redirect users to malicious sites\n\
             - Deface the application\n\n\
             This vulnerability is classified as **{}: Improper Neutralization of\n\
             Input During Web Page Generation ('Cross-site Scripting')**.",
            desc,
            file_path,
            line_start,
            &snippet[..snippet.len().min(100)],
            cwe
        );

        let suggested_fix = self.get_recommendation(pattern_type);

        Finding {
            id: String::new(),
            detector: "UnsafeTemplateDetector".to_string(),
            severity: Severity::High,
            title,
            description,
            affected_files: vec![PathBuf::from(file_path)],
            line_start: Some(line_start),
            line_end: Some(line_start),
            suggested_fix: Some(suggested_fix),
            estimated_effort: Some("Medium (1-4 hours)".to_string()),
            category: Some("security".to_string()),
            cwe_id: Some(cwe.to_string()),
            why_it_matters: Some(
                "XSS vulnerabilities allow attackers to execute scripts in users' browsers, \
                 potentially stealing sensitive data or hijacking user sessions."
                    .to_string(),
            ),
            ..Default::default()
        }
    }

    /// Get remediation recommendation for pattern type
    fn get_recommendation(&self, pattern_type: &str) -> String {
        match pattern_type {
            "jinja2_no_autoescape" => "**Recommended fixes**:\n\n\
                 1. **Enable autoescape globally** (preferred):\n\
                    ```python\n\
                    from jinja2 import Environment, select_autoescape\n\n\
                    env = Environment(\n\
                        autoescape=select_autoescape(['html', 'htm', 'xml'])\n\
                    )\n\
                    ```\n\n\
                 2. **Use Flask's default environment** (autoescape enabled by default):\n\
                    ```python\n\
                    from flask import render_template\n\
                    return render_template('template.html', data=user_data)\n\
                    ```"
            .to_string(),
            "render_template_string" => "**Recommended fixes**:\n\n\
                 1. **Use file-based templates** instead of string templates:\n\
                    ```python\n\
                    # Instead of:\n\
                    return render_template_string(user_template)\n\n\
                    # Use:\n\
                    return render_template('user_template.html', data=user_data)\n\
                    ```\n\n\
                 2. **If string templates are required**, validate and sanitize:\n\
                    ```python\n\
                    from markupsafe import escape\n\
                    safe_data = escape(user_data)\n\
                    ```"
            .to_string(),
            "markup_unsafe" => "**Recommended fixes**:\n\n\
                 1. **Avoid Markup() with untrusted input**:\n\
                    ```python\n\
                    # Instead of:\n\
                    return Markup(user_data)\n\n\
                    # Use:\n\
                    from markupsafe import escape\n\
                    return escape(user_data)\n\
                    ```\n\n\
                 2. **Only use Markup() for trusted, static content**:\n\
                    ```python\n\
                    return Markup('<strong>') + escape(user_data) + Markup('</strong>')\n\
                    ```"
            .to_string(),
            "dangerously_set_inner_html" => "**Recommended fixes**:\n\n\
                 1. **Avoid dangerouslySetInnerHTML when possible**:\n\
                    ```jsx\n\
                    // Instead of:\n\
                    <div dangerouslySetInnerHTML={{__html: userContent}} />\n\n\
                    // Use React's built-in escaping:\n\
                    <div>{userContent}</div>\n\
                    ```\n\n\
                 2. **If HTML rendering is required**, sanitize first:\n\
                    ```jsx\n\
                    import DOMPurify from 'dompurify';\n\n\
                    <div dangerouslySetInnerHTML={{__html: DOMPurify.sanitize(userContent)}} />\n\
                    ```"
            .to_string(),
            "vue_vhtml" => "**Recommended fixes**:\n\n\
                 1. **Avoid v-html with user content**:\n\
                    ```vue\n\
                    <!-- Instead of: -->\n\
                    <div v-html=\"userContent\"></div>\n\n\
                    <!-- Use text interpolation: -->\n\
                    <div>{{ userContent }}</div>\n\
                    ```\n\n\
                 2. **If HTML rendering is required**, sanitize first:\n\
                    ```vue\n\
                    import DOMPurify from 'dompurify';\n\n\
                    computed: {\n\
                      safeContent() {\n\
                        return DOMPurify.sanitize(this.userContent);\n\
                      }\n\
                    }\n\
                    <div v-html=\"safeContent\"></div>\n\
                    ```"
            .to_string(),
            "innerhtml_assignment" | "outerhtml_assignment" => "**Recommended fixes**:\n\n\
                 1. **Use textContent for text** (auto-escapes):\n\
                    ```javascript\n\
                    // Instead of:\n\
                    element.innerHTML = userInput;\n\n\
                    // Use:\n\
                    element.textContent = userInput;\n\
                    ```\n\n\
                 2. **Use DOM APIs for structure**:\n\
                    ```javascript\n\
                    const span = document.createElement('span');\n\
                    span.textContent = userInput;\n\
                    element.appendChild(span);\n\
                    ```\n\n\
                 3. **If HTML is required**, sanitize first:\n\
                    ```javascript\n\
                    import DOMPurify from 'dompurify';\n\
                    element.innerHTML = DOMPurify.sanitize(userInput);\n\
                    ```"
            .to_string(),
            "document_write" => "**Recommended fixes**:\n\n\
                 1. **Avoid document.write entirely** (deprecated):\n\
                    ```javascript\n\
                    // Instead of:\n\
                    document.write('<div>' + userInput + '</div>');\n\n\
                    // Use DOM APIs:\n\
                    const div = document.createElement('div');\n\
                    div.textContent = userInput;\n\
                    document.body.appendChild(div);\n\
                    ```\n\n\
                 2. **For dynamic script loading**, use createElement:\n\
                    ```javascript\n\
                    const script = document.createElement('script');\n\
                    script.src = trustedScriptUrl;\n\
                    document.head.appendChild(script);\n\
                    ```"
            .to_string(),
            _ => "**Recommended fixes**:\n\n\
                 1. Avoid using raw HTML/template injection patterns\n\
                 2. Use framework-provided escaping mechanisms\n\
                 3. Sanitize user input with a library like DOMPurify\n\
                 4. Apply Content Security Policy (CSP) headers"
                .to_string(),
        }
    }
}

impl Default for UnsafeTemplateDetector {
    fn default() -> Self {
        Self::new()
    }
}

/// Returns `true` when `sink_text` names an SSTI-class sink — where the *template body*
/// is the tainted argument, not merely a context variable passed to a fixed template.
///
/// - `render_template_string` — Flask
/// - `from_string`            — Jinja2 `env.from_string(tainted)`
/// - `Template(`              — Jinja2/Mako `Template(tainted_src).render()`
///
/// NOTE: of these, only `render_template_string` is currently in the taint engine's
/// `TaintCategory::Xss` sink registry (see `taint::analysis::add_xss_patterns`), so in
/// practice only that pattern can reach this function via an SSA `TaintPath` today. The
/// `from_string` / `Template(` arms are kept so they light up automatically if/when those
/// patterns are added to the Xss sink list.
// TODO(taint-sinks): register `from_string` and `Template(` in `add_xss_patterns` so the
// blocking-tier SSTI coverage actually matches what this predicate accepts.
fn is_ssti_sink(sink_text: &str) -> bool {
    sink_text.contains("render_template_string")
        || sink_text.contains("from_string")
        || sink_text.contains("Template(")
}

impl Detector for UnsafeTemplateDetector {
    fn name(&self) -> &'static str {
        "UnsafeTemplateDetector"
    }

    fn description(&self) -> &'static str {
        "Detects XSS and template injection vulnerabilities (Jinja2, React, Vue, innerHTML)"
    }

    fn bypass_postprocessor(&self) -> bool {
        true
    }

    fn category(&self) -> &'static str {
        "security"
    }

    fn config(&self) -> Option<&DetectorConfig> {
        Some(&self.config)
    }

    crate::detectors::impl_taint_precompute!();

    fn taint_category(&self) -> Option<crate::detectors::taint::TaintCategory> {
        Some(crate::detectors::taint::TaintCategory::Xss)
    }

    fn file_extensions(&self) -> &'static [&'static str] {
        &["py", "js", "ts", "jsx", "tsx", "rb", "php"]
    }

    fn content_requirements(&self) -> crate::detectors::detector_context::ContentFlags {
        crate::detectors::detector_context::ContentFlags::HAS_TEMPLATE
    }

    fn detect(
        &self,
        ctx: &crate::detectors::analysis_context::AnalysisContext,
    ) -> Result<Vec<Finding>> {
        let graph = ctx.graph;
        debug!("Starting unsafe template detection");

        let fp = ctx.as_file_provider();
        let mut findings = Vec::new();

        // Scan Python files
        findings.extend(self.scan_python_files(&fp));

        if findings.len() < self.max_findings {
            // Scan JavaScript/TypeScript files
            findings.extend(self.scan_javascript_files(&fp));
        }

        if findings.len() < self.max_findings {
            // Scan Vue files
            findings.extend(self.scan_vue_files(&fp));
        }

        // Truncate to max_findings
        findings.truncate(self.max_findings);

        // Supplement with intra-function taint analysis (precomputed or fallback)
        let intra_paths = if let Some(intra) = self.precomputed_intra.get() {
            intra.clone()
        } else {
            let taint_analyzer = crate::detectors::taint::TaintAnalyzer::new();
            crate::detectors::taint::run_intra_function_taint(
                &taint_analyzer,
                graph,
                crate::detectors::taint::TaintCategory::Xss,
                &self.repository_path,
            )
        };
        let mut seen: std::collections::HashSet<(String, u32)> = findings
            .iter()
            .filter_map(|f| {
                f.affected_files
                    .first()
                    .map(|p| (p.to_string_lossy().to_string(), f.line_start.unwrap_or(0)))
            })
            .collect();
        for path in intra_paths.iter().filter(|p| !p.is_sanitized) {
            let loc = (path.sink_file.clone(), path.sink_line);
            if !seen.insert(loc) {
                continue;
            }
            let mut finding = crate::detectors::taint::taint_path_to_finding(
                path,
                "UnsafeTemplateDetector",
                "Unsafe Template Injection",
            );
            // Promote to Blocking only for SSTI sinks where the template body is tainted.
            // Gate on `!path.is_sanitized` (already filtered above) rather than
            // `sanitizers_on_path.is_empty()` — the intra-function heuristic only reports
            // the bool, so `sanitizers_on_path` is always empty for these paths.
            if is_ssti_sink(&path.sink_callee_text) {
                finding.tier = crate::models::Tier::Blocking;
                finding.deterministic = true;
                finding.confidence = Some(0.95);
                finding.evidence = Some(crate::models::Evidence::TaintPath {
                    source: crate::models::SourceSpan {
                        file: std::path::PathBuf::from(&path.source_file),
                        line_start: path.source_line,
                        line_end: path.source_line,
                        snippet: None,
                    },
                    sink: crate::models::SourceSpan {
                        file: std::path::PathBuf::from(&path.sink_file),
                        line_start: path.sink_line,
                        line_end: path.sink_line,
                        snippet: None,
                    },
                    sink_kind: "html_sink".to_string(),
                    flow: vec![],
                    sanitizers_seen: path.sanitizers_on_path.clone(),
                });
            }
            findings.push(finding);
            if findings.len() >= self.max_findings {
                break;
            }
        }

        // Downgrade findings that target public API definitions
        for finding in &mut findings {
            if let (Some(file_path), Some(line)) =
                (finding.affected_files.first(), finding.line_start)
            {
                let file_str = file_path.to_string_lossy();
                if crate::detectors::api_surface::is_api_surface(graph, &file_str, line) {
                    finding.severity = Severity::Info;
                    finding.description.push_str(
                        "\n\nNote: This is a public API definition. \
                         The security risk is in caller code that passes \
                         unsanitized input, not in this definition.",
                    );
                }
            }
        }

        info!(
            "UnsafeTemplateDetector found {} potential vulnerabilities (+ taint)",
            findings.len()
        );

        Ok(findings)
    }
}

impl crate::detectors::RegisteredDetector for UnsafeTemplateDetector {
    fn create(init: &crate::detectors::DetectorInit) -> std::sync::Arc<dyn Detector> {
        std::sync::Arc::new(Self::with_repository_path(init.repo_path.to_path_buf()))
    }

    fn max_tier() -> crate::models::Tier {
        crate::models::Tier::Blocking
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    /// Build a `TaintPath` with the given sink pattern for injection into `precomputed_intra`.
    fn make_taint_path(
        sink_pattern: &str,
        is_sanitized: bool,
    ) -> crate::detectors::taint::TaintPath {
        crate::detectors::taint::TaintPath {
            source_function: "request_handler".to_string(),
            source_file: "app.py".to_string(),
            source_line: 3,
            sink_function: sink_pattern.to_string(),
            sink_file: "app.py".to_string(),
            sink_line: 5,
            category: crate::detectors::taint::TaintCategory::Xss,
            call_chain: vec![],
            is_sanitized,
            sanitizer: None,
            confidence: 0.95,
            sink_callee_text: sink_pattern.to_string(),
            sanitizers_on_path: vec![],
        }
    }

    /// A tainted value flowing into `render_template_string` (SSTI: the template body
    /// is attacker-controlled) must produce a Blocking finding with TaintPath evidence.
    #[test]
    fn taint_to_template_render_is_blocking() {
        use crate::graph::builder::GraphBuilder;

        let store = GraphBuilder::new().freeze();
        let detector =
            UnsafeTemplateDetector::with_repository_path(std::path::PathBuf::from("/mock/repo"));
        // Inject a pre-computed intra path simulating taint → render_template_string.
        detector
            .precomputed_intra
            .set(vec![make_taint_path("render_template_string", false)])
            .expect("OnceLock freshly constructed");

        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("app.py", "# placeholder\n")],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        let taint_findings: Vec<_> = findings
            .iter()
            .filter(|f| f.detector == "UnsafeTemplateDetector" && f.line_start == Some(5))
            .collect();
        assert!(
            !taint_findings.is_empty(),
            "Expected a taint finding for render_template_string"
        );
        let f = taint_findings[0];
        assert_eq!(
            f.tier,
            crate::models::Tier::Blocking,
            "SSTI taint path must be Blocking"
        );
        assert!(
            f.deterministic,
            "Blocking SSTI finding must be deterministic"
        );
        assert!(
            f.confidence.unwrap_or(0.0) >= 0.90,
            "Blocking SSTI finding must have confidence >= 0.90"
        );
        assert!(
            matches!(
                f.evidence,
                Some(crate::models::Evidence::TaintPath { ref sink_kind, .. })
                    if sink_kind == "html_sink"
            ),
            "Evidence must be TaintPath with sink_kind = html_sink; got {:?}",
            f.evidence
        );
    }

    /// A tainted variable passed only as a *context argument* to a fixed template is NOT SSTI.
    /// The detector cannot currently distinguish this from a tainted template body, so such
    /// findings (from non-SSTI sinks like `innerHTML`) stay Advisory with no evidence.
    #[test]
    fn context_var_into_fixed_template_is_advisory() {
        use crate::graph::builder::GraphBuilder;

        let store = GraphBuilder::new().freeze();
        let detector =
            UnsafeTemplateDetector::with_repository_path(std::path::PathBuf::from("/mock/repo"));
        // innerHTML is an XSS sink but NOT an SSTI sink — it's not a template engine render.
        detector
            .precomputed_intra
            .set(vec![make_taint_path("innerHTML", false)])
            .expect("OnceLock freshly constructed");

        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("app.py", "# placeholder\n")],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        let taint_findings: Vec<_> = findings
            .iter()
            .filter(|f| f.detector == "UnsafeTemplateDetector" && f.line_start == Some(5))
            .collect();
        assert!(
            !taint_findings.is_empty(),
            "Expected a taint finding for innerHTML"
        );
        let f = taint_findings[0];
        assert_eq!(
            f.tier,
            crate::models::Tier::Advisory,
            "Non-SSTI XSS sink must remain Advisory"
        );
        assert!(
            f.evidence.is_none(),
            "Advisory finding must have no evidence"
        );
    }

    /// Findings produced by the line/regex heuristic path (not SSA taint) must stay Advisory.
    #[test]
    fn line_heuristic_match_is_advisory() {
        use crate::graph::builder::GraphBuilder;

        // Python file containing `render_template_string(user_var)` — matches the regex
        // heuristic in `scan_python_files`. No taint paths injected.
        let content =
            "from flask import render_template_string\ndef view(request):\n    tpl = request.args['tpl']\n    return render_template_string(tpl)\n";

        let store = GraphBuilder::new().freeze();
        let detector =
            UnsafeTemplateDetector::with_repository_path(std::path::PathBuf::from("/mock/repo"));
        // Empty taint paths (no SSA path).
        detector
            .precomputed_intra
            .set(vec![])
            .expect("OnceLock freshly constructed");

        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("app.py", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        // The regex scan should have found a finding for render_template_string.
        let regex_findings: Vec<_> = findings
            .iter()
            .filter(|f| f.detector == "UnsafeTemplateDetector")
            .collect();
        assert!(
            !regex_findings.is_empty(),
            "Expected a line-heuristic finding for render_template_string"
        );
        for f in &regex_findings {
            assert_eq!(
                f.tier,
                crate::models::Tier::Advisory,
                "Line-heuristic findings must be Advisory (not Blocking)"
            );
            assert!(
                f.evidence.is_none(),
                "Line-heuristic findings must have no evidence"
            );
        }
    }

    #[test]
    fn test_jinja2_detection() {
        let detector = UnsafeTemplateDetector::new();

        // Should detect Environment without autoescape
        assert!(detector
            .jinja2_env_pattern
            .is_match("env = Environment(loader=FileSystemLoader())"));

        // Should detect autoescape=True
        assert!(detector.autoescape_true_pattern.is_match("autoescape=True"));
        assert!(detector
            .autoescape_true_pattern
            .is_match("autoescape=select_autoescape()"));
    }

    #[test]
    fn test_react_detection() {
        let detector = UnsafeTemplateDetector::new();

        // Should detect dangerouslySetInnerHTML
        assert!(detector
            .dangerous_inner_html_pattern
            .is_match(r#"<div dangerouslySetInnerHTML={{__html: content}} />"#));
    }

    #[test]
    fn test_vue_detection() {
        let detector = UnsafeTemplateDetector::new();

        // Should detect v-html
        assert!(detector
            .vue_vhtml_pattern
            .is_match(r#"<div v-html="userContent"></div>"#));
    }

    #[test]
    fn test_innerhtml_detection() {
        let detector = UnsafeTemplateDetector::new();

        // Should detect innerHTML assignment
        assert!(detector
            .innerhtml_assign_pattern
            .is_match("element.innerHTML = userInput;"));

        // Should detect outerHTML assignment
        assert!(detector
            .outerhtml_assign_pattern
            .is_match("element.outerHTML = userInput;"));
    }

    #[test]
    fn test_document_write_detection() {
        let detector = UnsafeTemplateDetector::new();

        assert!(detector
            .document_write_pattern
            .is_match("document.write('<div>' + content + '</div>')"));
        assert!(detector
            .document_write_pattern
            .is_match("document.writeln(html)"));
    }

    #[test]
    fn test_no_finding_for_static_innerhtml() {
        use crate::graph::builder::GraphBuilder;

        let content = "function clearContent(el) {\n    el.innerHTML = \"\";\n}\nfunction setLoading(el) {\n    el.innerHTML = \"<div>Loading...</div>\";\n}\n";

        let store = GraphBuilder::new().freeze();
        let detector =
            UnsafeTemplateDetector::with_repository_path(std::path::PathBuf::from("/mock/repo"));
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("app.js", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        let innerhtml_findings: Vec<_> = findings
            .iter()
            .filter(|f| f.title.contains("innerHTML"))
            .collect();
        assert!(
            innerhtml_findings.is_empty(),
            "Should not flag static string innerHTML assignments. Found: {:?}",
            innerhtml_findings
                .iter()
                .map(|f| &f.title)
                .collect::<Vec<_>>()
        );
    }
}