Skip to main content

tirith_core/
engine.rs

1use std::time::Instant;
2
3use crate::extract::{self, ScanContext};
4use crate::normalize;
5use crate::policy::Policy;
6use crate::tokenize::ShellType;
7use crate::verdict::{Finding, Timings, Verdict};
8
9/// Extract the raw path from a URL string before any normalization.
10fn extract_raw_path_from_url(raw: &str) -> Option<String> {
11    if let Some(idx) = raw.find("://") {
12        let after = &raw[idx + 3..];
13        if let Some(slash_idx) = after.find('/') {
14            // Find end of path (before ? or #)
15            let path_start = &after[slash_idx..];
16            let end = path_start.find(['?', '#']).unwrap_or(path_start.len());
17            return Some(path_start[..end].to_string());
18        }
19    }
20    None
21}
22
23/// Analysis context passed through the pipeline.
24pub struct AnalysisContext {
25    pub input: String,
26    pub shell: ShellType,
27    pub scan_context: ScanContext,
28    pub raw_bytes: Option<Vec<u8>>,
29    pub interactive: bool,
30    pub cwd: Option<String>,
31}
32
33/// Run the tiered analysis pipeline.
34pub fn analyze(ctx: &AnalysisContext) -> Verdict {
35    let start = Instant::now();
36
37    // Tier 0: Check bypass flag
38    let tier0_start = Instant::now();
39    let bypass_requested = std::env::var("TIRITH").ok().as_deref() == Some("0");
40    let tier0_ms = tier0_start.elapsed().as_secs_f64() * 1000.0;
41
42    // Tier 1: Fast scan (no I/O)
43    let tier1_start = Instant::now();
44
45    // Step 1 (paste only): byte-level scan for control chars
46    let byte_scan_triggered = if ctx.scan_context == ScanContext::Paste {
47        if let Some(ref bytes) = ctx.raw_bytes {
48            let scan = extract::scan_bytes(bytes);
49            scan.has_ansi_escapes
50                || scan.has_control_chars
51                || scan.has_bidi_controls
52                || scan.has_zero_width
53                || scan.has_invalid_utf8
54        } else {
55            false
56        }
57    } else {
58        false
59    };
60
61    // Step 2: URL-like regex scan
62    let regex_triggered = extract::tier1_scan(&ctx.input, ctx.scan_context);
63
64    // Step 3 (exec only): check for bidi/zero-width chars even without URLs
65    let exec_bidi_triggered = if ctx.scan_context == ScanContext::Exec {
66        let scan = extract::scan_bytes(ctx.input.as_bytes());
67        scan.has_bidi_controls || scan.has_zero_width
68    } else {
69        false
70    };
71
72    let tier1_ms = tier1_start.elapsed().as_secs_f64() * 1000.0;
73
74    // If nothing triggered, fast exit
75    if !byte_scan_triggered && !regex_triggered && !exec_bidi_triggered {
76        let total_ms = start.elapsed().as_secs_f64() * 1000.0;
77        return Verdict::allow_fast(
78            1,
79            Timings {
80                tier0_ms,
81                tier1_ms,
82                tier2_ms: None,
83                tier3_ms: None,
84                total_ms,
85            },
86        );
87    }
88
89    // Tier 2: Policy + data loading (deferred I/O)
90    let tier2_start = Instant::now();
91
92    if bypass_requested {
93        // Load partial policy to check bypass settings
94        let policy = Policy::discover_partial(ctx.cwd.as_deref());
95        let allow_bypass = if ctx.interactive {
96            policy.allow_bypass_env
97        } else {
98            policy.allow_bypass_env_noninteractive
99        };
100
101        if allow_bypass {
102            let tier2_ms = tier2_start.elapsed().as_secs_f64() * 1000.0;
103            let total_ms = start.elapsed().as_secs_f64() * 1000.0;
104            let mut verdict = Verdict::allow_fast(
105                2,
106                Timings {
107                    tier0_ms,
108                    tier1_ms,
109                    tier2_ms: Some(tier2_ms),
110                    tier3_ms: None,
111                    total_ms,
112                },
113            );
114            verdict.bypass_requested = true;
115            verdict.bypass_honored = true;
116            verdict.interactive_detected = ctx.interactive;
117            verdict.policy_path_used = policy.path.clone();
118            // Log bypass to audit
119            crate::audit::log_verdict(&verdict, &ctx.input, None, None);
120            return verdict;
121        }
122    }
123
124    let mut policy = Policy::discover(ctx.cwd.as_deref());
125    policy.load_user_lists();
126    policy.load_org_lists(ctx.cwd.as_deref());
127    let tier2_ms = tier2_start.elapsed().as_secs_f64() * 1000.0;
128
129    // Tier 3: Full analysis
130    let tier3_start = Instant::now();
131    let mut findings = Vec::new();
132
133    // Run byte-level rules for paste context
134    if ctx.scan_context == ScanContext::Paste {
135        if let Some(ref bytes) = ctx.raw_bytes {
136            let byte_findings = crate::rules::terminal::check_bytes(bytes);
137            findings.extend(byte_findings);
138        }
139        // Check for hidden multiline content in pasted text
140        let multiline_findings = crate::rules::terminal::check_hidden_multiline(&ctx.input);
141        findings.extend(multiline_findings);
142    }
143
144    // Bidi and zero-width checks apply to both exec and paste contexts
145    // (exec context: bidi in URLs/commands is always dangerous)
146    if ctx.scan_context == ScanContext::Exec {
147        let byte_input = ctx.input.as_bytes();
148        let scan = extract::scan_bytes(byte_input);
149        if scan.has_bidi_controls || scan.has_zero_width {
150            let byte_findings = crate::rules::terminal::check_bytes(byte_input);
151            // Only keep bidi and zero-width findings for exec context
152            findings.extend(byte_findings.into_iter().filter(|f| {
153                matches!(
154                    f.rule_id,
155                    crate::verdict::RuleId::BidiControls | crate::verdict::RuleId::ZeroWidthChars
156                )
157            }));
158        }
159    }
160
161    // Extract and analyze URLs
162    let extracted = extract::extract_urls(&ctx.input, ctx.shell);
163
164    for url_info in &extracted {
165        // Normalize path if available — use raw extracted URL's path for non-ASCII detection
166        // since url::Url percent-encodes non-ASCII during parsing
167        let raw_path = extract_raw_path_from_url(&url_info.raw);
168        let normalized_path = url_info.parsed.path().map(normalize::normalize_path);
169
170        // Run all rule categories
171        let hostname_findings = crate::rules::hostname::check(&url_info.parsed, &policy);
172        findings.extend(hostname_findings);
173
174        let path_findings = crate::rules::path::check(
175            &url_info.parsed,
176            normalized_path.as_ref(),
177            raw_path.as_deref(),
178        );
179        findings.extend(path_findings);
180
181        let transport_findings =
182            crate::rules::transport::check(&url_info.parsed, url_info.in_sink_context);
183        findings.extend(transport_findings);
184
185        let ecosystem_findings = crate::rules::ecosystem::check(&url_info.parsed);
186        findings.extend(ecosystem_findings);
187    }
188
189    // Run command-shape rules on full input
190    let command_findings = crate::rules::command::check(&ctx.input, ctx.shell);
191    findings.extend(command_findings);
192
193    // Run environment rules
194    let env_findings = crate::rules::environment::check(&crate::rules::environment::RealEnv);
195    findings.extend(env_findings);
196
197    // Apply policy severity overrides
198    for finding in &mut findings {
199        if let Some(override_sev) = policy.severity_override(&finding.rule_id) {
200            finding.severity = override_sev;
201        }
202    }
203
204    // Filter by allowlist/blocklist
205    // Blocklist: if any extracted URL matches blocklist, escalate to Block
206    for url_info in &extracted {
207        if policy.is_blocklisted(&url_info.raw) {
208            findings.push(Finding {
209                rule_id: crate::verdict::RuleId::PolicyBlocklisted,
210                severity: crate::verdict::Severity::Critical,
211                title: "URL matches blocklist".to_string(),
212                description: format!("URL '{}' matches a blocklist pattern", url_info.raw),
213                evidence: vec![crate::verdict::Evidence::Url {
214                    raw: url_info.raw.clone(),
215                }],
216            });
217        }
218    }
219
220    // Allowlist: remove findings for URLs that match allowlist
221    // (blocklist takes precedence — if blocklisted, findings remain)
222    if !policy.allowlist.is_empty() {
223        let blocklisted_urls: Vec<String> = extracted
224            .iter()
225            .filter(|u| policy.is_blocklisted(&u.raw))
226            .map(|u| u.raw.clone())
227            .collect();
228
229        findings.retain(|f| {
230            // Keep all findings that aren't URL-based
231            let url_in_evidence = f.evidence.iter().find_map(|e| {
232                if let crate::verdict::Evidence::Url { raw } = e {
233                    Some(raw.clone())
234                } else {
235                    None
236                }
237            });
238            match url_in_evidence {
239                Some(ref url) => {
240                    // Keep if blocklisted, otherwise drop if allowlisted
241                    blocklisted_urls.contains(url) || !policy.is_allowlisted(url)
242                }
243                None => true, // Keep non-URL findings
244            }
245        });
246    }
247
248    let tier3_ms = tier3_start.elapsed().as_secs_f64() * 1000.0;
249    let total_ms = start.elapsed().as_secs_f64() * 1000.0;
250
251    let mut verdict = Verdict::from_findings(
252        findings,
253        3,
254        Timings {
255            tier0_ms,
256            tier1_ms,
257            tier2_ms: Some(tier2_ms),
258            tier3_ms: Some(tier3_ms),
259            total_ms,
260        },
261    );
262    verdict.bypass_requested = bypass_requested;
263    verdict.interactive_detected = ctx.interactive;
264    verdict.policy_path_used = policy.path.clone();
265    verdict.urls_extracted_count = Some(extracted.len());
266
267    verdict
268}
269
270#[cfg(test)]
271mod tests {
272    use super::*;
273    #[test]
274    fn test_exec_bidi_without_url() {
275        // Input with bidi control but no URL — should NOT fast-exit at tier 1
276        let input = format!("echo hello{}world", '\u{202E}');
277        let ctx = AnalysisContext {
278            input,
279            shell: ShellType::Posix,
280            scan_context: ScanContext::Exec,
281            raw_bytes: None,
282            interactive: true,
283            cwd: None,
284        };
285        let verdict = analyze(&ctx);
286        // Should reach tier 3 (not fast-exit at tier 1)
287        assert!(
288            verdict.tier_reached >= 3,
289            "bidi in exec should reach tier 3, got tier {}",
290            verdict.tier_reached
291        );
292        // Should have findings about bidi
293        assert!(
294            verdict
295                .findings
296                .iter()
297                .any(|f| matches!(f.rule_id, crate::verdict::RuleId::BidiControls)),
298            "should detect bidi controls in exec context"
299        );
300    }
301}