Skip to main content

tirith_core/
script_analysis.rs

1use once_cell::sync::Lazy;
2use regex::Regex;
3use serde::Serialize;
4
5/// Result of static script analysis.
6#[derive(Debug, Clone, Serialize)]
7pub struct ScriptAnalysis {
8    pub domains_referenced: Vec<String>,
9    pub paths_referenced: Vec<String>,
10    pub has_sudo: bool,
11    pub has_eval: bool,
12    pub has_base64: bool,
13    pub has_curl_wget: bool,
14    pub interpreter: String,
15}
16
17static DOMAIN_RE: Lazy<Regex> = Lazy::new(|| {
18    Regex::new(r"(?:https?://)?([a-zA-Z0-9][-a-zA-Z0-9]*(?:\.[a-zA-Z0-9][-a-zA-Z0-9]*)+)").unwrap()
19});
20
21static PATH_RE: Lazy<Regex> = Lazy::new(|| {
22    Regex::new(r"(?:/(?:usr|etc|var|tmp|opt|home|root|bin|sbin|lib|dev)(?:/[\w.-]+)+)").unwrap()
23});
24
25/// Perform static analysis on script content.
26pub fn analyze(content: &str, interpreter: &str) -> ScriptAnalysis {
27    let mut domains = Vec::new();
28    for cap in DOMAIN_RE.captures_iter(content) {
29        if let Some(m) = cap.get(1) {
30            let domain = m.as_str().to_string();
31            if !domains.contains(&domain) {
32                domains.push(domain);
33            }
34        }
35    }
36
37    let mut paths = Vec::new();
38    for mat in PATH_RE.find_iter(content) {
39        let path = mat.as_str().to_string();
40        if !paths.contains(&path) {
41            paths.push(path);
42        }
43    }
44
45    ScriptAnalysis {
46        domains_referenced: domains,
47        paths_referenced: paths,
48        has_sudo: content.contains("sudo "),
49        has_eval: content.contains("eval ") || content.contains("eval("),
50        has_base64: content.contains("base64"),
51        has_curl_wget: content.contains("curl ")
52            || content.contains("wget ")
53            || content.contains("http ")
54            || content.contains("https ")
55            || content.contains("xh "),
56        interpreter: interpreter.to_string(),
57    }
58}
59
60/// Detect interpreter from shebang line.
61pub fn detect_interpreter(content: &str) -> &str {
62    if let Some(first_line) = content.lines().next() {
63        let first_line = first_line.trim();
64        if first_line.starts_with("#!") {
65            let shebang = first_line.trim_start_matches("#!");
66            let parts: Vec<&str> = shebang.split_whitespace().collect();
67            if let Some(prog) = parts.first() {
68                let base = prog.rsplit('/').next().unwrap_or(prog);
69                if base == "env" {
70                    // Walk past `env` flags (-S, -i, …) and `VAR=val`
71                    // assignments to reach the actual interpreter name.
72                    for part in parts.iter().skip(1) {
73                        if part.starts_with('-') || part.contains('=') {
74                            continue;
75                        }
76                        return part;
77                    }
78                } else {
79                    return base;
80                }
81            }
82        }
83    }
84    "sh"
85}
86
87#[cfg(test)]
88mod tests {
89    use super::*;
90
91    #[test]
92    fn test_detect_interpreter_env_s() {
93        let content = "#!/usr/bin/env -S python3 -u\nprint('hello')";
94        assert_eq!(detect_interpreter(content), "python3");
95    }
96
97    #[test]
98    fn test_detect_interpreter_env_s_with_var() {
99        let content = "#!/usr/bin/env -S VAR=1 python3\nprint('hello')";
100        assert_eq!(detect_interpreter(content), "python3");
101    }
102
103    #[test]
104    fn test_detect_interpreter_crlf() {
105        let content = "#!/bin/bash\r\necho hello";
106        assert_eq!(detect_interpreter(content), "bash");
107    }
108
109    #[test]
110    fn test_detect_interpreter_basic() {
111        let content = "#!/usr/bin/env python3\nprint('hello')";
112        assert_eq!(detect_interpreter(content), "python3");
113    }
114
115    #[test]
116    fn test_detect_interpreter_no_shebang() {
117        let content = "echo hello";
118        assert_eq!(detect_interpreter(content), "sh");
119    }
120}