tirith_core/
script_analysis.rs1use once_cell::sync::Lazy;
2use regex::Regex;
3use serde::Serialize;
4
5#[derive(Debug, Clone, Serialize)]
7pub struct ScriptAnalysis {
8 pub domains_referenced: Vec<String>,
9 pub paths_referenced: Vec<String>,
10 pub has_sudo: bool,
11 pub has_eval: bool,
12 pub has_base64: bool,
13 pub has_curl_wget: bool,
14 pub interpreter: String,
15}
16
17static DOMAIN_RE: Lazy<Regex> = Lazy::new(|| {
18 Regex::new(r"(?:https?://)?([a-zA-Z0-9][-a-zA-Z0-9]*(?:\.[a-zA-Z0-9][-a-zA-Z0-9]*)+)").unwrap()
19});
20
21static PATH_RE: Lazy<Regex> = Lazy::new(|| {
22 Regex::new(r"(?:/(?:usr|etc|var|tmp|opt|home|root|bin|sbin|lib|dev)(?:/[\w.-]+)+)").unwrap()
23});
24
25pub fn analyze(content: &str, interpreter: &str) -> ScriptAnalysis {
27 let mut domains = Vec::new();
28 for cap in DOMAIN_RE.captures_iter(content) {
29 if let Some(m) = cap.get(1) {
30 let domain = m.as_str().to_string();
31 if !domains.contains(&domain) {
32 domains.push(domain);
33 }
34 }
35 }
36
37 let mut paths = Vec::new();
38 for mat in PATH_RE.find_iter(content) {
39 let path = mat.as_str().to_string();
40 if !paths.contains(&path) {
41 paths.push(path);
42 }
43 }
44
45 ScriptAnalysis {
46 domains_referenced: domains,
47 paths_referenced: paths,
48 has_sudo: content.contains("sudo "),
49 has_eval: content.contains("eval ") || content.contains("eval("),
50 has_base64: content.contains("base64"),
51 has_curl_wget: content.contains("curl ")
52 || content.contains("wget ")
53 || content.contains("http ")
54 || content.contains("https ")
55 || content.contains("xh "),
56 interpreter: interpreter.to_string(),
57 }
58}
59
60pub fn detect_interpreter(content: &str) -> &str {
62 if let Some(first_line) = content.lines().next() {
63 let first_line = first_line.trim();
64 if first_line.starts_with("#!") {
65 let shebang = first_line.trim_start_matches("#!");
66 let parts: Vec<&str> = shebang.split_whitespace().collect();
67 if let Some(prog) = parts.first() {
68 let base = prog.rsplit('/').next().unwrap_or(prog);
69 if base == "env" {
70 for part in parts.iter().skip(1) {
73 if part.starts_with('-') || part.contains('=') {
74 continue;
75 }
76 return part;
77 }
78 } else {
79 return base;
80 }
81 }
82 }
83 }
84 "sh"
85}
86
87#[cfg(test)]
88mod tests {
89 use super::*;
90
91 #[test]
92 fn test_detect_interpreter_env_s() {
93 let content = "#!/usr/bin/env -S python3 -u\nprint('hello')";
94 assert_eq!(detect_interpreter(content), "python3");
95 }
96
97 #[test]
98 fn test_detect_interpreter_env_s_with_var() {
99 let content = "#!/usr/bin/env -S VAR=1 python3\nprint('hello')";
100 assert_eq!(detect_interpreter(content), "python3");
101 }
102
103 #[test]
104 fn test_detect_interpreter_crlf() {
105 let content = "#!/bin/bash\r\necho hello";
106 assert_eq!(detect_interpreter(content), "bash");
107 }
108
109 #[test]
110 fn test_detect_interpreter_basic() {
111 let content = "#!/usr/bin/env python3\nprint('hello')";
112 assert_eq!(detect_interpreter(content), "python3");
113 }
114
115 #[test]
116 fn test_detect_interpreter_no_shebang() {
117 let content = "echo hello";
118 assert_eq!(detect_interpreter(content), "sh");
119 }
120}