Skip to main content

tirith_core/
engine.rs

1use std::time::Instant;
2
3use crate::extract::{self, ScanContext};
4use crate::normalize;
5use crate::policy::Policy;
6use crate::tokenize::ShellType;
7use crate::verdict::{Finding, Timings, Verdict};
8
9/// Extract the raw path from a URL string before any normalization.
10fn extract_raw_path_from_url(raw: &str) -> Option<String> {
11    if let Some(idx) = raw.find("://") {
12        let after = &raw[idx + 3..];
13        if let Some(slash_idx) = after.find('/') {
14            // Find end of path (before ? or #)
15            let path_start = &after[slash_idx..];
16            let end = path_start.find(['?', '#']).unwrap_or(path_start.len());
17            return Some(path_start[..end].to_string());
18        }
19    }
20    None
21}
22
23/// Analysis context passed through the pipeline.
24pub struct AnalysisContext {
25    pub input: String,
26    pub shell: ShellType,
27    pub scan_context: ScanContext,
28    pub raw_bytes: Option<Vec<u8>>,
29    pub interactive: bool,
30    pub cwd: Option<String>,
31}
32
33/// Run the tiered analysis pipeline.
34pub fn analyze(ctx: &AnalysisContext) -> Verdict {
35    let start = Instant::now();
36
37    // Tier 0: Check bypass flag
38    let tier0_start = Instant::now();
39    let bypass_requested = std::env::var("TIRITH").ok().as_deref() == Some("0");
40    let tier0_ms = tier0_start.elapsed().as_secs_f64() * 1000.0;
41
42    // Tier 1: Fast scan (no I/O)
43    let tier1_start = Instant::now();
44
45    // Step 1 (paste only): byte-level scan for control chars
46    let byte_scan_triggered = if ctx.scan_context == ScanContext::Paste {
47        if let Some(ref bytes) = ctx.raw_bytes {
48            let scan = extract::scan_bytes(bytes);
49            scan.has_ansi_escapes
50                || scan.has_control_chars
51                || scan.has_bidi_controls
52                || scan.has_zero_width
53                || scan.has_invalid_utf8
54        } else {
55            false
56        }
57    } else {
58        false
59    };
60
61    // Step 2: URL-like regex scan
62    let regex_triggered = extract::tier1_scan(&ctx.input, ctx.scan_context);
63
64    let tier1_ms = tier1_start.elapsed().as_secs_f64() * 1000.0;
65
66    // If nothing triggered, fast exit
67    if !byte_scan_triggered && !regex_triggered {
68        let total_ms = start.elapsed().as_secs_f64() * 1000.0;
69        return Verdict::allow_fast(
70            1,
71            Timings {
72                tier0_ms,
73                tier1_ms,
74                tier2_ms: None,
75                tier3_ms: None,
76                total_ms,
77            },
78        );
79    }
80
81    // Tier 2: Policy + data loading (deferred I/O)
82    let tier2_start = Instant::now();
83
84    if bypass_requested {
85        // Load partial policy to check bypass settings
86        let policy = Policy::discover_partial(ctx.cwd.as_deref());
87        let allow_bypass = if ctx.interactive {
88            policy.allow_bypass_env
89        } else {
90            policy.allow_bypass_env_noninteractive
91        };
92
93        if allow_bypass {
94            let tier2_ms = tier2_start.elapsed().as_secs_f64() * 1000.0;
95            let total_ms = start.elapsed().as_secs_f64() * 1000.0;
96            let mut verdict = Verdict::allow_fast(
97                2,
98                Timings {
99                    tier0_ms,
100                    tier1_ms,
101                    tier2_ms: Some(tier2_ms),
102                    tier3_ms: None,
103                    total_ms,
104                },
105            );
106            verdict.bypass_requested = true;
107            verdict.bypass_honored = true;
108            verdict.interactive_detected = ctx.interactive;
109            verdict.policy_path_used = policy.path.clone();
110            // Log bypass to audit
111            crate::audit::log_verdict(&verdict, &ctx.input, None, None);
112            return verdict;
113        }
114    }
115
116    let mut policy = Policy::discover(ctx.cwd.as_deref());
117    policy.load_user_lists();
118    policy.load_org_lists(ctx.cwd.as_deref());
119    let tier2_ms = tier2_start.elapsed().as_secs_f64() * 1000.0;
120
121    // Tier 3: Full analysis
122    let tier3_start = Instant::now();
123    let mut findings = Vec::new();
124
125    // Run byte-level rules for paste context
126    if ctx.scan_context == ScanContext::Paste {
127        if let Some(ref bytes) = ctx.raw_bytes {
128            let byte_findings = crate::rules::terminal::check_bytes(bytes);
129            findings.extend(byte_findings);
130        }
131        // Check for hidden multiline content in pasted text
132        let multiline_findings = crate::rules::terminal::check_hidden_multiline(&ctx.input);
133        findings.extend(multiline_findings);
134    }
135
136    // Bidi and zero-width checks apply to both exec and paste contexts
137    // (exec context: bidi in URLs/commands is always dangerous)
138    if ctx.scan_context == ScanContext::Exec {
139        let byte_input = ctx.input.as_bytes();
140        let scan = extract::scan_bytes(byte_input);
141        if scan.has_bidi_controls || scan.has_zero_width {
142            let byte_findings = crate::rules::terminal::check_bytes(byte_input);
143            // Only keep bidi and zero-width findings for exec context
144            findings.extend(byte_findings.into_iter().filter(|f| {
145                matches!(
146                    f.rule_id,
147                    crate::verdict::RuleId::BidiControls | crate::verdict::RuleId::ZeroWidthChars
148                )
149            }));
150        }
151    }
152
153    // Extract and analyze URLs
154    let extracted = extract::extract_urls(&ctx.input, ctx.shell);
155
156    for url_info in &extracted {
157        // Normalize path if available — use raw extracted URL's path for non-ASCII detection
158        // since url::Url percent-encodes non-ASCII during parsing
159        let raw_path = extract_raw_path_from_url(&url_info.raw);
160        let normalized_path = url_info.parsed.path().map(normalize::normalize_path);
161
162        // Run all rule categories
163        let hostname_findings = crate::rules::hostname::check(&url_info.parsed, &policy);
164        findings.extend(hostname_findings);
165
166        let path_findings = crate::rules::path::check(
167            &url_info.parsed,
168            normalized_path.as_ref(),
169            raw_path.as_deref(),
170        );
171        findings.extend(path_findings);
172
173        let transport_findings =
174            crate::rules::transport::check(&url_info.parsed, url_info.in_sink_context);
175        findings.extend(transport_findings);
176
177        let ecosystem_findings = crate::rules::ecosystem::check(&url_info.parsed);
178        findings.extend(ecosystem_findings);
179    }
180
181    // Run command-shape rules on full input
182    let command_findings = crate::rules::command::check(&ctx.input, ctx.shell);
183    findings.extend(command_findings);
184
185    // Run environment rules
186    let env_findings = crate::rules::environment::check(&crate::rules::environment::RealEnv);
187    findings.extend(env_findings);
188
189    // Apply policy severity overrides
190    for finding in &mut findings {
191        if let Some(override_sev) = policy.severity_override(&finding.rule_id) {
192            finding.severity = override_sev;
193        }
194    }
195
196    // Filter by allowlist/blocklist
197    // Blocklist: if any extracted URL matches blocklist, escalate to Block
198    for url_info in &extracted {
199        if policy.is_blocklisted(&url_info.raw) {
200            findings.push(Finding {
201                rule_id: crate::verdict::RuleId::PolicyBlocklisted,
202                severity: crate::verdict::Severity::Critical,
203                title: "URL matches blocklist".to_string(),
204                description: format!("URL '{}' matches a blocklist pattern", url_info.raw),
205                evidence: vec![crate::verdict::Evidence::Url {
206                    raw: url_info.raw.clone(),
207                }],
208            });
209        }
210    }
211
212    // Allowlist: remove findings for URLs that match allowlist
213    // (blocklist takes precedence — if blocklisted, findings remain)
214    if !policy.allowlist.is_empty() {
215        let blocklisted_urls: Vec<String> = extracted
216            .iter()
217            .filter(|u| policy.is_blocklisted(&u.raw))
218            .map(|u| u.raw.clone())
219            .collect();
220
221        findings.retain(|f| {
222            // Keep all findings that aren't URL-based
223            let url_in_evidence = f.evidence.iter().find_map(|e| {
224                if let crate::verdict::Evidence::Url { raw } = e {
225                    Some(raw.clone())
226                } else {
227                    None
228                }
229            });
230            match url_in_evidence {
231                Some(ref url) => {
232                    // Keep if blocklisted, otherwise drop if allowlisted
233                    blocklisted_urls.contains(url) || !policy.is_allowlisted(url)
234                }
235                None => true, // Keep non-URL findings
236            }
237        });
238    }
239
240    let tier3_ms = tier3_start.elapsed().as_secs_f64() * 1000.0;
241    let total_ms = start.elapsed().as_secs_f64() * 1000.0;
242
243    let mut verdict = Verdict::from_findings(
244        findings,
245        3,
246        Timings {
247            tier0_ms,
248            tier1_ms,
249            tier2_ms: Some(tier2_ms),
250            tier3_ms: Some(tier3_ms),
251            total_ms,
252        },
253    );
254    verdict.bypass_requested = bypass_requested;
255    verdict.interactive_detected = ctx.interactive;
256    verdict.policy_path_used = policy.path.clone();
257    verdict.urls_extracted_count = Some(extracted.len());
258
259    verdict
260}