zeph_tools/shell/
deobfuscate.rs1use std::sync::LazyLock;
18
19use regex::Regex;
20use tracing;
21
22const MAX_INPUT_BYTES: usize = 8192;
26
27static RE_HEX: LazyLock<Regex> =
28 LazyLock::new(|| Regex::new(r"\\x([0-9a-fA-F]{2})").expect("RE_HEX"));
29static RE_OCT: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\\([0-7]{1,3})").expect("RE_OCT"));
30static RE_UNI: LazyLock<Regex> =
31 LazyLock::new(|| Regex::new(r"\\u([0-9a-fA-F]{4})").expect("RE_UNI"));
32static RE_VAR_BRACE: LazyLock<Regex> =
33 LazyLock::new(|| Regex::new(r"\$\{([A-Za-z_][A-Za-z0-9_]*)\}").expect("RE_VAR_BRACE"));
34static RE_VAR_PLAIN: LazyLock<Regex> =
35 LazyLock::new(|| Regex::new(r"\$([A-Za-z_][A-Za-z0-9_]*)").expect("RE_VAR_PLAIN"));
36static RE_BACKTICK: LazyLock<Regex> =
37 LazyLock::new(|| Regex::new(r"`([^`]*)`").expect("RE_BACKTICK"));
38static RE_SPACE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\s+").expect("RE_SPACE"));
39
40#[must_use]
65pub fn deobfuscate(command: &str) -> String {
66 let _span = tracing::info_span!("tools.deobfuscate.normalize").entered();
67 let input = if command.len() > MAX_INPUT_BYTES {
69 let boundary = command.floor_char_boundary(MAX_INPUT_BYTES);
70 &command[..boundary]
71 } else {
72 command
73 };
74
75 let s = RE_HEX.replace_all(input, |caps: ®ex::Captures<'_>| {
81 u8::from_str_radix(&caps[1], 16)
82 .ok()
83 .filter(u8::is_ascii)
84 .map_or_else(
85 || caps[0].to_owned(),
86 |b| {
87 if b == b'\\' {
89 "[bs]".to_owned()
90 } else {
91 (b as char).to_string()
92 }
93 },
94 )
95 });
96
97 let s = RE_OCT.replace_all(&s, |caps: ®ex::Captures<'_>| {
101 u8::from_str_radix(&caps[1], 8)
102 .ok()
103 .filter(|&b| (0x20u8..=0x7E).contains(&b))
104 .map_or_else(|| caps[0].to_owned(), |b| (b as char).to_string())
105 });
106
107 let s = RE_UNI.replace_all(&s, |caps: ®ex::Captures<'_>| {
109 u32::from_str_radix(&caps[1], 16)
110 .ok()
111 .and_then(char::from_u32)
112 .map_or_else(|| caps[0].to_owned(), |c| c.to_string())
113 });
114
115 let s = s.replace("\\\n", "");
117
118 let s = RE_VAR_BRACE.replace_all(&s, "[var:$1]");
120 let s = RE_VAR_PLAIN.replace_all(&s, "[var:$1]");
121
122 let s = RE_BACKTICK.replace_all(&s, "[subshell: $1]");
124
125 let s = replace_dollar_subshells(&s);
127
128 let s = strip_concatenation_quotes(&s);
130
131 RE_SPACE.replace_all(&s, " ").trim().to_owned()
133}
134
135fn replace_dollar_subshells(s: &str) -> String {
140 let bytes = s.as_bytes();
141 let mut out = String::with_capacity(s.len());
142 let mut i = 0;
143 while i < bytes.len() {
144 if bytes[i] == b'$' && i + 1 < bytes.len() && bytes[i + 1] == b'(' {
145 let start = i + 2;
146 let mut depth = 1usize;
147 let mut j = start;
148 while j < bytes.len() && depth > 0 {
149 match bytes[j] {
150 b'(' => depth += 1,
151 b')' => depth -= 1,
152 _ => {}
153 }
154 j += 1;
155 }
156 let end = j.saturating_sub(1).min(s.len());
157 let inner = s[start..end].trim();
158 out.push_str("[subshell: ");
159 out.push_str(inner);
160 out.push(']');
161 i = j;
162 } else {
163 let ch = s[i..].chars().next().unwrap_or('\0');
165 out.push(ch);
166 i += ch.len_utf8();
167 }
168 }
169 out
170}
171
172fn strip_concatenation_quotes(s: &str) -> String {
177 let mut out = String::with_capacity(s.len());
178 let mut chars = s.chars().peekable();
179 while let Some(ch) = chars.next() {
180 match ch {
181 '\'' => {
182 for inner in chars.by_ref() {
183 if inner == '\'' {
184 break;
185 }
186 out.push(inner);
187 }
188 }
189 '"' => {
190 while let Some(inner) = chars.next() {
191 if inner == '"' {
192 break;
193 }
194 if inner == '\\' {
195 if let Some(escaped) = chars.next() {
196 out.push(escaped);
197 }
198 } else {
199 out.push(inner);
200 }
201 }
202 }
203 _ => out.push(ch),
204 }
205 }
206 out
207}
208
209#[cfg(test)]
210mod tests {
211 use super::*;
212
213 #[test]
214 fn hex_escape_decoded() {
215 assert_eq!(deobfuscate(r"\x63url"), "curl");
216 assert_eq!(deobfuscate(r"\x41\x42\x43"), "ABC");
217 }
218
219 #[test]
220 fn octal_escape_decoded() {
221 assert_eq!(deobfuscate(r"\143at"), "cat");
222 assert_eq!(deobfuscate(r"\101"), "A");
223 }
224
225 #[test]
226 fn unicode_escape_decoded() {
227 assert_eq!(deobfuscate(r"curl"), "curl");
229 }
230
231 #[test]
232 fn variable_expansion_brace() {
233 assert_eq!(deobfuscate("${HOME}/file"), "[var:HOME]/file");
234 }
235
236 #[test]
237 fn variable_expansion_plain() {
238 assert_eq!(deobfuscate("echo $PATH"), "echo [var:PATH]");
240 }
241
242 #[test]
243 fn backtick_subshell() {
244 assert_eq!(deobfuscate("`whoami`"), "[subshell: whoami]");
245 }
246
247 #[test]
248 fn dollar_subshell_simple() {
249 assert_eq!(deobfuscate("$(whoami)"), "[subshell: whoami]");
250 }
251
252 #[test]
253 fn quote_concatenation_collapse() {
254 assert_eq!(deobfuscate("'cu'\"rl\""), "curl");
255 assert_eq!(deobfuscate("'ab'\"cd\"'ef'"), "abcdef");
256 }
257
258 #[test]
259 fn line_continuation() {
260 assert_eq!(deobfuscate("cu\\\nrl"), "curl");
261 }
262
263 #[test]
264 fn whitespace_normalized() {
265 assert_eq!(deobfuscate("echo hello"), "echo hello");
266 assert_eq!(deobfuscate(" ls "), "ls");
267 }
268
269 #[test]
270 fn long_input_truncated() {
271 let long = "a".repeat(MAX_INPUT_BYTES + 100);
272 let result = deobfuscate(&long);
273 assert!(result.len() <= MAX_INPUT_BYTES);
274 }
275}