zeph_tools/
verifier.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Pre-execution verification for tool calls.
5//!
6//! Based on the `TrustBench` pattern (arXiv:2603.09157): intercept tool calls before
7//! execution to block or warn on destructive or injection patterns.
8//!
9//! ## Blocklist separation
10//!
11//! `DESTRUCTIVE_PATTERNS` (this module) is intentionally separate from
12//! `DEFAULT_BLOCKED_COMMANDS` in `shell.rs`. The two lists serve different purposes:
13//!
14//! - `DEFAULT_BLOCKED_COMMANDS` — shell safety net: prevents the *shell executor* from
15//!   running network tools (`curl`, `wget`, `nc`) and a few destructive commands.
16//!   It is applied at tool-execution time by `ShellExecutor`.
17//!
18//! - `DESTRUCTIVE_PATTERNS` — pre-execution guard: targets filesystem/system destruction
19//!   commands (disk formats, wipefs, fork bombs, recursive permission changes).
20//!   It runs *before* dispatch, in the LLM-call hot path, and must not be conflated
21//!   with the shell safety net to avoid accidental allow-listing via config drift.
22//!
23//! Overlap (3 entries: `rm -rf /`, `mkfs`, `dd if=`) is intentional — belt-and-suspenders.
24
25use std::collections::HashSet;
26use std::sync::{Arc, LazyLock, RwLock};
27
28use regex::Regex;
29use serde::{Deserialize, Serialize};
30use unicode_normalization::UnicodeNormalization as _;
31
32fn default_true() -> bool {
33    true
34}
35
36fn default_shell_tools() -> Vec<String> {
37    vec![
38        "bash".to_string(),
39        "shell".to_string(),
40        "terminal".to_string(),
41    ]
42}
43
44/// Result of a pre-execution verification check.
45#[must_use]
46#[derive(Debug, Clone, PartialEq, Eq)]
47pub enum VerificationResult {
48    /// Tool call is safe to proceed.
49    Allow,
50    /// Tool call must be blocked. Executor returns an error to the LLM.
51    Block { reason: String },
52    /// Tool call proceeds but a warning is logged and tracked in metrics (metrics-only,
53    /// not visible to the LLM or user beyond the TUI security panel).
54    Warn { message: String },
55}
56
57/// Pre-execution verification trait. Implementations intercept tool calls
58/// before the executor runs them. Based on `TrustBench` pattern (arXiv:2603.09157).
59///
60/// Sync by design: verifiers inspect arguments only — no I/O needed.
61/// Object-safe: uses `&self` and returns a concrete enum.
62pub trait PreExecutionVerifier: Send + Sync + std::fmt::Debug {
63    /// Verify whether a tool call should proceed.
64    fn verify(&self, tool_name: &str, args: &serde_json::Value) -> VerificationResult;
65
66    /// Human-readable name for logging and TUI display.
67    fn name(&self) -> &'static str;
68}
69
70// ---------------------------------------------------------------------------
71// Config types
72// ---------------------------------------------------------------------------
73
74/// Configuration for the destructive command verifier.
75///
76/// `allowed_paths`: when **empty** (the default), ALL destructive commands are denied.
77/// This is a conservative default: to allow e.g. `rm -rf /tmp/build` you must
78/// explicitly add `/tmp/build` to `allowed_paths`.
79///
80/// `shell_tools`: the set of tool names considered shell executors. Defaults to
81/// `["bash", "shell", "terminal"]`. Add custom names here if your setup registers
82/// shell tools under different names (e.g., via MCP or ACP integrations).
83#[derive(Debug, Clone, Deserialize, Serialize)]
84pub struct DestructiveVerifierConfig {
85    #[serde(default = "default_true")]
86    pub enabled: bool,
87    /// Explicit path prefixes under which destructive commands are permitted.
88    /// **Empty = deny-all destructive commands** (safest default).
89    #[serde(default)]
90    pub allowed_paths: Vec<String>,
91    /// Additional command patterns to treat as destructive (substring match).
92    #[serde(default)]
93    pub extra_patterns: Vec<String>,
94    /// Tool names to treat as shell executors (case-insensitive).
95    /// Default: `["bash", "shell", "terminal"]`.
96    #[serde(default = "default_shell_tools")]
97    pub shell_tools: Vec<String>,
98}
99
100impl Default for DestructiveVerifierConfig {
101    fn default() -> Self {
102        Self {
103            enabled: true,
104            allowed_paths: Vec::new(),
105            extra_patterns: Vec::new(),
106            shell_tools: default_shell_tools(),
107        }
108    }
109}
110
111/// Configuration for the injection pattern verifier.
112#[derive(Debug, Clone, Deserialize, Serialize)]
113pub struct InjectionVerifierConfig {
114    #[serde(default = "default_true")]
115    pub enabled: bool,
116    /// Additional injection patterns to block (regex strings).
117    /// Invalid regexes are logged at WARN level and skipped.
118    #[serde(default)]
119    pub extra_patterns: Vec<String>,
120    /// URLs explicitly permitted even if they match SSRF patterns.
121    #[serde(default)]
122    pub allowlisted_urls: Vec<String>,
123}
124
125impl Default for InjectionVerifierConfig {
126    fn default() -> Self {
127        Self {
128            enabled: true,
129            extra_patterns: Vec::new(),
130            allowlisted_urls: Vec::new(),
131        }
132    }
133}
134
135/// Configuration for the URL grounding verifier.
136///
137/// When enabled, `fetch` and `web_scrape` calls are blocked unless the URL
138/// appears in the set of URLs extracted from user messages (`user_provided_urls`).
139/// This prevents the LLM from hallucinating API endpoints and calling fetch with
140/// fabricated URLs that were never supplied by the user.
141#[derive(Debug, Clone, Deserialize, Serialize)]
142pub struct UrlGroundingVerifierConfig {
143    #[serde(default = "default_true")]
144    pub enabled: bool,
145    /// Tool IDs subject to URL grounding checks. Any tool whose name ends with `_fetch`
146    /// is also guarded regardless of this list.
147    #[serde(default = "default_guarded_tools")]
148    pub guarded_tools: Vec<String>,
149}
150
151fn default_guarded_tools() -> Vec<String> {
152    vec!["fetch".to_string(), "web_scrape".to_string()]
153}
154
155impl Default for UrlGroundingVerifierConfig {
156    fn default() -> Self {
157        Self {
158            enabled: true,
159            guarded_tools: default_guarded_tools(),
160        }
161    }
162}
163
164/// Top-level configuration for all pre-execution verifiers.
165#[derive(Debug, Clone, Deserialize, Serialize)]
166pub struct PreExecutionVerifierConfig {
167    #[serde(default = "default_true")]
168    pub enabled: bool,
169    #[serde(default)]
170    pub destructive_commands: DestructiveVerifierConfig,
171    #[serde(default)]
172    pub injection_patterns: InjectionVerifierConfig,
173    #[serde(default)]
174    pub url_grounding: UrlGroundingVerifierConfig,
175    #[serde(default)]
176    pub firewall: FirewallVerifierConfig,
177}
178
179impl Default for PreExecutionVerifierConfig {
180    fn default() -> Self {
181        Self {
182            enabled: true,
183            destructive_commands: DestructiveVerifierConfig::default(),
184            injection_patterns: InjectionVerifierConfig::default(),
185            url_grounding: UrlGroundingVerifierConfig::default(),
186            firewall: FirewallVerifierConfig::default(),
187        }
188    }
189}
190
191// ---------------------------------------------------------------------------
192// DestructiveCommandVerifier
193// ---------------------------------------------------------------------------
194
195/// Destructive command patterns for `DestructiveCommandVerifier`.
196///
197/// Intentionally separate from `DEFAULT_BLOCKED_COMMANDS` in `shell.rs` — see module
198/// docs for the semantic distinction between the two lists.
199static DESTRUCTIVE_PATTERNS: &[&str] = &[
200    "rm -rf /",
201    "rm -rf ~",
202    "rm -r /",
203    "dd if=",
204    "mkfs",
205    "fdisk",
206    "shred",
207    "wipefs",
208    ":(){ :|:& };:",
209    ":(){:|:&};:",
210    "chmod -r 777 /",
211    "chown -r",
212];
213
214/// Verifier that blocks destructive shell commands (e.g., `rm -rf /`, `dd`, `mkfs`)
215/// before the shell tool executes them.
216///
217/// Applies to any tool whose name is in the configured `shell_tools` set (default:
218/// `["bash", "shell", "terminal"]`). For commands targeting a specific path, execution
219/// is allowed when the path starts with one of the configured `allowed_paths`. When
220/// `allowed_paths` is empty (the default), **all** matching destructive commands are blocked.
221#[derive(Debug)]
222pub struct DestructiveCommandVerifier {
223    shell_tools: Vec<String>,
224    allowed_paths: Vec<String>,
225    extra_patterns: Vec<String>,
226}
227
228impl DestructiveCommandVerifier {
229    #[must_use]
230    pub fn new(config: &DestructiveVerifierConfig) -> Self {
231        Self {
232            shell_tools: config
233                .shell_tools
234                .iter()
235                .map(|s| s.to_lowercase())
236                .collect(),
237            allowed_paths: config
238                .allowed_paths
239                .iter()
240                .map(|s| s.to_lowercase())
241                .collect(),
242            extra_patterns: config
243                .extra_patterns
244                .iter()
245                .map(|s| s.to_lowercase())
246                .collect(),
247        }
248    }
249
250    fn is_shell_tool(&self, tool_name: &str) -> bool {
251        let lower = tool_name.to_lowercase();
252        self.shell_tools.iter().any(|t| t == &lower)
253    }
254
255    /// Extract the effective command string from `args`.
256    ///
257    /// Supports:
258    /// - `{"command": "rm -rf /"}` (string)
259    /// - `{"command": ["rm", "-rf", "/"]}` (array — joined with spaces)
260    /// - `{"command": "bash -c 'rm -rf /'"}` (shell `-c` unwrapping, looped up to 8 levels)
261    /// - `env VAR=val bash -c '...'` and `exec bash -c '...'` prefix stripping
262    ///
263    /// NFKC-normalizes the result to defeat Unicode homoglyph bypasses.
264    fn extract_command(args: &serde_json::Value) -> Option<String> {
265        let raw = match args.get("command") {
266            Some(serde_json::Value::String(s)) => s.clone(),
267            Some(serde_json::Value::Array(arr)) => arr
268                .iter()
269                .filter_map(|v| v.as_str())
270                .collect::<Vec<_>>()
271                .join(" "),
272            _ => return None,
273        };
274        // NFKC-normalize + lowercase to defeat Unicode homoglyph and case bypasses.
275        let mut current: String = raw.nfkc().collect::<String>().to_lowercase();
276        // Loop: strip shell wrapper prefixes up to 8 levels deep.
277        // Handles double-nested: `bash -c "bash -c 'rm -rf /'"`.
278        for _ in 0..8 {
279            let trimmed = current.trim().to_owned();
280            // Strip `env VAR=value ... CMD` prefix (one or more VAR=value tokens).
281            let after_env = Self::strip_env_prefix(&trimmed);
282            // Strip `exec ` prefix.
283            let after_exec = after_env.strip_prefix("exec ").map_or(after_env, str::trim);
284            // Strip interpreter wrapper: `bash -c '...'` / `sh -c '...'` / `zsh -c '...'`.
285            let mut unwrapped = false;
286            for interp in &["bash -c ", "sh -c ", "zsh -c "] {
287                if let Some(rest) = after_exec.strip_prefix(interp) {
288                    let script = rest.trim().trim_matches(|c: char| c == '\'' || c == '"');
289                    current.clone_from(&script.to_owned());
290                    unwrapped = true;
291                    break;
292                }
293            }
294            if !unwrapped {
295                return Some(after_exec.to_owned());
296            }
297        }
298        Some(current)
299    }
300
301    /// Strip leading `env VAR=value` tokens from a command string.
302    /// Returns the remainder after all `KEY=VALUE` pairs are consumed.
303    fn strip_env_prefix(cmd: &str) -> &str {
304        let mut rest = cmd;
305        // `env` keyword is optional; strip it if present.
306        if let Some(after_env) = rest.strip_prefix("env ") {
307            rest = after_env.trim_start();
308        }
309        // Consume `KEY=VALUE` tokens.
310        loop {
311            // A VAR=value token: identifier chars + '=' + non-space chars.
312            let mut chars = rest.chars();
313            let key_end = chars
314                .by_ref()
315                .take_while(|c| c.is_alphanumeric() || *c == '_')
316                .count();
317            if key_end == 0 {
318                break;
319            }
320            let remainder = &rest[key_end..];
321            if let Some(after_eq) = remainder.strip_prefix('=') {
322                // Consume the value (up to the first space).
323                let val_end = after_eq.find(' ').unwrap_or(after_eq.len());
324                rest = after_eq[val_end..].trim_start();
325            } else {
326                break;
327            }
328        }
329        rest
330    }
331
332    /// Returns `true` if `command` targets a path that is covered by `allowed_paths`.
333    ///
334    /// Uses lexical normalization (resolves `..` and `.` without filesystem access)
335    /// so that `/tmp/build/../../etc` is correctly resolved to `/etc` before comparison,
336    /// defeating path traversal bypasses like `/tmp/build/../../etc/passwd`.
337    fn is_allowed_path(&self, command: &str) -> bool {
338        if self.allowed_paths.is_empty() {
339            return false;
340        }
341        let tokens: Vec<&str> = command.split_whitespace().collect();
342        for token in &tokens {
343            let t = token.trim_matches(|c| c == '\'' || c == '"');
344            if t.starts_with('/') || t.starts_with('~') || t.starts_with('.') {
345                let normalized = Self::lexical_normalize(std::path::Path::new(t));
346                let n_lower = normalized.to_string_lossy().to_lowercase();
347                if self
348                    .allowed_paths
349                    .iter()
350                    .any(|p| n_lower.starts_with(p.as_str()))
351                {
352                    return true;
353                }
354            }
355        }
356        false
357    }
358
359    /// Lexically normalize a path by resolving `.` and `..` components without
360    /// hitting the filesystem. Does not require the path to exist.
361    fn lexical_normalize(p: &std::path::Path) -> std::path::PathBuf {
362        let mut out = std::path::PathBuf::new();
363        for component in p.components() {
364            match component {
365                std::path::Component::ParentDir => {
366                    out.pop();
367                }
368                std::path::Component::CurDir => {}
369                other => out.push(other),
370            }
371        }
372        out
373    }
374
375    fn check_patterns(command: &str) -> Option<&'static str> {
376        DESTRUCTIVE_PATTERNS
377            .iter()
378            .find(|&pat| command.contains(pat))
379            .copied()
380    }
381
382    fn check_extra_patterns(&self, command: &str) -> Option<String> {
383        self.extra_patterns
384            .iter()
385            .find(|pat| command.contains(pat.as_str()))
386            .cloned()
387    }
388}
389
390impl PreExecutionVerifier for DestructiveCommandVerifier {
391    fn name(&self) -> &'static str {
392        "DestructiveCommandVerifier"
393    }
394
395    fn verify(&self, tool_name: &str, args: &serde_json::Value) -> VerificationResult {
396        if !self.is_shell_tool(tool_name) {
397            return VerificationResult::Allow;
398        }
399
400        let Some(command) = Self::extract_command(args) else {
401            return VerificationResult::Allow;
402        };
403
404        if let Some(pat) = Self::check_patterns(&command) {
405            if self.is_allowed_path(&command) {
406                return VerificationResult::Allow;
407            }
408            return VerificationResult::Block {
409                reason: format!("[{}] destructive pattern '{}' detected", self.name(), pat),
410            };
411        }
412
413        if let Some(pat) = self.check_extra_patterns(&command) {
414            if self.is_allowed_path(&command) {
415                return VerificationResult::Allow;
416            }
417            return VerificationResult::Block {
418                reason: format!(
419                    "[{}] extra destructive pattern '{}' detected",
420                    self.name(),
421                    pat
422                ),
423            };
424        }
425
426        VerificationResult::Allow
427    }
428}
429
430// ---------------------------------------------------------------------------
431// InjectionPatternVerifier
432// ---------------------------------------------------------------------------
433
434/// High-confidence injection block patterns applied to string field values in tool args.
435///
436/// These require *structural* patterns, not just keywords — e.g., `UNION SELECT` is
437/// blocked but a plain mention of "SELECT" is not. This avoids false positives for
438/// `memory_search` queries discussing SQL or coding assistants writing SQL examples.
439static INJECTION_BLOCK_PATTERNS: LazyLock<Vec<Regex>> = LazyLock::new(|| {
440    [
441        // SQL injection structural patterns
442        r"(?i)'\s*OR\s*'1'\s*=\s*'1",
443        r"(?i)'\s*OR\s*1\s*=\s*1",
444        r"(?i);\s*DROP\s+TABLE",
445        r"(?i)UNION\s+SELECT",
446        r"(?i)'\s*;\s*SELECT",
447        // Command injection via shell metacharacters with dangerous commands
448        r";\s*rm\s+",
449        r"\|\s*rm\s+",
450        r"&&\s*rm\s+",
451        r";\s*curl\s+",
452        r"\|\s*curl\s+",
453        r"&&\s*curl\s+",
454        r";\s*wget\s+",
455        // Path traversal to sensitive system files
456        r"\.\./\.\./\.\./etc/passwd",
457        r"\.\./\.\./\.\./etc/shadow",
458        r"\.\./\.\./\.\./windows/",
459        r"\.\.[/\\]\.\.[/\\]\.\.[/\\]",
460    ]
461    .iter()
462    .map(|s| Regex::new(s).expect("static pattern must compile"))
463    .collect()
464});
465
466/// SSRF host patterns — matched against the *extracted host* (not the full URL string).
467/// This prevents bypasses like `http://evil.com/?r=http://localhost` where the SSRF
468/// target appears only in a query parameter, not as the actual request host.
469/// Bare hostnames (no port/path) are included alongside `host:port` variants.
470static SSRF_HOST_PATTERNS: LazyLock<Vec<Regex>> = LazyLock::new(|| {
471    [
472        // localhost — with or without port
473        r"^localhost$",
474        r"^localhost:",
475        // IPv4 loopback
476        r"^127\.0\.0\.1$",
477        r"^127\.0\.0\.1:",
478        // IPv6 loopback
479        r"^\[::1\]$",
480        r"^\[::1\]:",
481        // AWS metadata service
482        r"^169\.254\.169\.254$",
483        r"^169\.254\.169\.254:",
484        // RFC-1918 private ranges
485        r"^10\.\d+\.\d+\.\d+$",
486        r"^10\.\d+\.\d+\.\d+:",
487        r"^172\.(1[6-9]|2\d|3[01])\.\d+\.\d+$",
488        r"^172\.(1[6-9]|2\d|3[01])\.\d+\.\d+:",
489        r"^192\.168\.\d+\.\d+$",
490        r"^192\.168\.\d+\.\d+:",
491    ]
492    .iter()
493    .map(|s| Regex::new(s).expect("static pattern must compile"))
494    .collect()
495});
496
497/// Extract the host (and optional port) from a URL string.
498/// Returns the portion between `://` and the next `/`, `?`, `#`, or end of string.
499/// If the URL has no scheme, returns `None`.
500fn extract_url_host(url: &str) -> Option<&str> {
501    let after_scheme = url.split_once("://")?.1;
502    let host_end = after_scheme
503        .find(['/', '?', '#'])
504        .unwrap_or(after_scheme.len());
505    Some(&after_scheme[..host_end])
506}
507
508/// Field names that suggest URL/endpoint content — SSRF patterns are applied here.
509static URL_FIELD_NAMES: &[&str] = &["url", "endpoint", "uri", "href", "src", "host", "base_url"];
510
511/// Field names that are known to carry user-provided text queries — SQL injection and
512/// command injection patterns are skipped for these fields to avoid false positives.
513/// Examples: `memory_search(query=...)`, `web_search(query=...)`.
514static SAFE_QUERY_FIELDS: &[&str] = &["query", "q", "search", "text", "message", "content"];
515
516/// Verifier that blocks tool arguments containing SQL injection, command injection,
517/// or path traversal patterns. Applies to ALL tools using field-aware matching.
518///
519/// ## Field-aware matching
520///
521/// Rather than serialising all args to a flat string (which causes false positives),
522/// this verifier iterates over each string-valued field and applies pattern categories
523/// based on field semantics:
524///
525/// - `SAFE_QUERY_FIELDS` (`query`, `q`, `search`, `text`, …): injection patterns are
526///   **skipped** — these fields contain user-provided text and generate too many false
527///   positives for SQL/command discussions in chat.
528/// - `URL_FIELD_NAMES` (`url`, `endpoint`, `uri`, …): SSRF patterns are applied.
529/// - All other string fields: injection + path traversal patterns are applied.
530///
531/// ## Warn semantics
532///
533/// `VerificationResult::Warn` is metrics-only — the tool call proceeds, a WARN log
534/// entry is emitted, and the TUI security panel counter increments. The LLM does not
535/// see the warning in its tool result.
536#[derive(Debug)]
537pub struct InjectionPatternVerifier {
538    extra_patterns: Vec<Regex>,
539    allowlisted_urls: Vec<String>,
540}
541
542impl InjectionPatternVerifier {
543    #[must_use]
544    pub fn new(config: &InjectionVerifierConfig) -> Self {
545        let extra_patterns = config
546            .extra_patterns
547            .iter()
548            .filter_map(|s| match Regex::new(s) {
549                Ok(re) => Some(re),
550                Err(e) => {
551                    tracing::warn!(
552                        pattern = %s,
553                        error = %e,
554                        "InjectionPatternVerifier: invalid extra_pattern, skipping"
555                    );
556                    None
557                }
558            })
559            .collect();
560
561        Self {
562            extra_patterns,
563            allowlisted_urls: config
564                .allowlisted_urls
565                .iter()
566                .map(|s| s.to_lowercase())
567                .collect(),
568        }
569    }
570
571    fn is_allowlisted(&self, text: &str) -> bool {
572        let lower = text.to_lowercase();
573        self.allowlisted_urls
574            .iter()
575            .any(|u| lower.contains(u.as_str()))
576    }
577
578    fn is_url_field(field: &str) -> bool {
579        let lower = field.to_lowercase();
580        URL_FIELD_NAMES.iter().any(|&f| f == lower)
581    }
582
583    fn is_safe_query_field(field: &str) -> bool {
584        let lower = field.to_lowercase();
585        SAFE_QUERY_FIELDS.iter().any(|&f| f == lower)
586    }
587
588    /// Check a single string value from a named field.
589    fn check_field_value(&self, field: &str, value: &str) -> VerificationResult {
590        let is_url = Self::is_url_field(field);
591        let is_safe_query = Self::is_safe_query_field(field);
592
593        // Injection + path traversal: skip safe query fields (user text), apply elsewhere.
594        if !is_safe_query {
595            for pat in INJECTION_BLOCK_PATTERNS.iter() {
596                if pat.is_match(value) {
597                    return VerificationResult::Block {
598                        reason: format!(
599                            "[{}] injection pattern detected in field '{}': {}",
600                            "InjectionPatternVerifier",
601                            field,
602                            pat.as_str()
603                        ),
604                    };
605                }
606            }
607            for pat in &self.extra_patterns {
608                if pat.is_match(value) {
609                    return VerificationResult::Block {
610                        reason: format!(
611                            "[{}] extra injection pattern detected in field '{}': {}",
612                            "InjectionPatternVerifier",
613                            field,
614                            pat.as_str()
615                        ),
616                    };
617                }
618            }
619        }
620
621        // SSRF: apply only to URL-like fields.
622        // Extract the host first so that SSRF targets embedded in query parameters
623        // (e.g. `http://evil.com/?r=http://localhost`) are not falsely matched.
624        if is_url && let Some(host) = extract_url_host(value) {
625            for pat in SSRF_HOST_PATTERNS.iter() {
626                if pat.is_match(host) {
627                    if self.is_allowlisted(value) {
628                        return VerificationResult::Allow;
629                    }
630                    return VerificationResult::Warn {
631                        message: format!(
632                            "[{}] possible SSRF in field '{}': host '{}' matches pattern (not blocked)",
633                            "InjectionPatternVerifier", field, host,
634                        ),
635                    };
636                }
637            }
638        }
639
640        VerificationResult::Allow
641    }
642
643    /// Walk all string leaf values in a JSON object, collecting field names for context.
644    fn check_object(&self, obj: &serde_json::Map<String, serde_json::Value>) -> VerificationResult {
645        for (key, val) in obj {
646            let result = self.check_value(key, val);
647            if !matches!(result, VerificationResult::Allow) {
648                return result;
649            }
650        }
651        VerificationResult::Allow
652    }
653
654    fn check_value(&self, field: &str, val: &serde_json::Value) -> VerificationResult {
655        match val {
656            serde_json::Value::String(s) => self.check_field_value(field, s),
657            serde_json::Value::Array(arr) => {
658                for item in arr {
659                    let r = self.check_value(field, item);
660                    if !matches!(r, VerificationResult::Allow) {
661                        return r;
662                    }
663                }
664                VerificationResult::Allow
665            }
666            serde_json::Value::Object(obj) => self.check_object(obj),
667            // Non-string primitives (numbers, booleans, null) cannot contain injection.
668            _ => VerificationResult::Allow,
669        }
670    }
671}
672
673impl PreExecutionVerifier for InjectionPatternVerifier {
674    fn name(&self) -> &'static str {
675        "InjectionPatternVerifier"
676    }
677
678    fn verify(&self, _tool_name: &str, args: &serde_json::Value) -> VerificationResult {
679        match args {
680            serde_json::Value::Object(obj) => self.check_object(obj),
681            // Flat string args (unusual but handle gracefully — treat as unnamed field).
682            serde_json::Value::String(s) => self.check_field_value("_args", s),
683            _ => VerificationResult::Allow,
684        }
685    }
686}
687
688// ---------------------------------------------------------------------------
689// UrlGroundingVerifier
690// ---------------------------------------------------------------------------
691
692/// Verifier that blocks `fetch` and `web_scrape` calls when the requested URL
693/// was not explicitly provided by the user in the conversation.
694///
695/// The agent populates `user_provided_urls` whenever a user message is received,
696/// by extracting all http/https URLs from the raw input. This set persists across
697/// turns within a session and is cleared on `/clear`.
698///
699/// ## Bypass rules
700///
701/// - Tools not in the `guarded_tools` list (and not ending in `_fetch`) pass through.
702/// - If the URL in the tool call is a prefix-match or exact match of any URL in
703///   `user_provided_urls`, the call is allowed.
704/// - If `user_provided_urls` is empty (no URLs seen in this session at all), the call
705///   is blocked — the LLM must not fetch arbitrary URLs when the user never provided one.
706#[derive(Debug, Clone)]
707pub struct UrlGroundingVerifier {
708    guarded_tools: Vec<String>,
709    user_provided_urls: Arc<RwLock<HashSet<String>>>,
710}
711
712impl UrlGroundingVerifier {
713    #[must_use]
714    pub fn new(
715        config: &UrlGroundingVerifierConfig,
716        user_provided_urls: Arc<RwLock<HashSet<String>>>,
717    ) -> Self {
718        Self {
719            guarded_tools: config
720                .guarded_tools
721                .iter()
722                .map(|s| s.to_lowercase())
723                .collect(),
724            user_provided_urls,
725        }
726    }
727
728    fn is_guarded(&self, tool_name: &str) -> bool {
729        let lower = tool_name.to_lowercase();
730        self.guarded_tools.iter().any(|t| t == &lower) || lower.ends_with("_fetch")
731    }
732
733    /// Returns true if `url` is grounded — i.e., it appears in (or is a prefix of)
734    /// a URL from `user_provided_urls`.
735    fn is_grounded(url: &str, user_provided_urls: &HashSet<String>) -> bool {
736        let lower = url.to_lowercase();
737        user_provided_urls
738            .iter()
739            .any(|u| lower.starts_with(u.as_str()) || u.starts_with(lower.as_str()))
740    }
741}
742
743impl PreExecutionVerifier for UrlGroundingVerifier {
744    fn name(&self) -> &'static str {
745        "UrlGroundingVerifier"
746    }
747
748    fn verify(&self, tool_name: &str, args: &serde_json::Value) -> VerificationResult {
749        if !self.is_guarded(tool_name) {
750            return VerificationResult::Allow;
751        }
752
753        let Some(url) = args.get("url").and_then(|v| v.as_str()) else {
754            return VerificationResult::Allow;
755        };
756
757        let Ok(urls) = self.user_provided_urls.read() else {
758            // Poisoned lock: fail open to avoid blocking legitimate tool calls.
759            return VerificationResult::Allow;
760        };
761
762        if Self::is_grounded(url, &urls) {
763            return VerificationResult::Allow;
764        }
765
766        VerificationResult::Block {
767            reason: format!(
768                "[UrlGroundingVerifier] fetch rejected: URL '{url}' was not provided by the user",
769            ),
770        }
771    }
772}
773
774// ---------------------------------------------------------------------------
775// FirewallVerifier
776// ---------------------------------------------------------------------------
777
778/// Configuration for the firewall verifier.
779#[derive(Debug, Clone, Deserialize, Serialize)]
780pub struct FirewallVerifierConfig {
781    #[serde(default = "default_true")]
782    pub enabled: bool,
783    /// Glob patterns for additional paths to block.
784    #[serde(default)]
785    pub blocked_paths: Vec<String>,
786    /// Additional environment variable names to block from tool arguments.
787    #[serde(default)]
788    pub blocked_env_vars: Vec<String>,
789    /// Tool IDs exempt from firewall scanning.
790    #[serde(default)]
791    pub exempt_tools: Vec<String>,
792}
793
794impl Default for FirewallVerifierConfig {
795    fn default() -> Self {
796        Self {
797            enabled: true,
798            blocked_paths: Vec::new(),
799            blocked_env_vars: Vec::new(),
800            exempt_tools: Vec::new(),
801        }
802    }
803}
804
805/// Policy-enforcement verifier that inspects tool arguments for path traversal,
806/// environment-variable exfiltration, sensitive file access, and command chaining.
807///
808/// ## Scope delineation with `InjectionPatternVerifier`
809///
810/// `FirewallVerifier` enforces *configurable policy* (blocked paths, env vars, sensitive
811/// file patterns). `InjectionPatternVerifier` performs regex-based *injection pattern
812/// detection* (prompt injection, SSRF, etc.). They are complementary — belt-and-suspenders,
813/// the same intentional overlap documented at the top of this module.
814///
815/// Both verifiers may produce `Block` for the same call (e.g. command chaining detected
816/// by both). The pipeline stops at the first `Block` result.
817#[derive(Debug)]
818pub struct FirewallVerifier {
819    blocked_path_globs: Vec<glob::Pattern>,
820    blocked_env_vars: HashSet<String>,
821    exempt_tools: HashSet<String>,
822}
823
824/// Built-in path patterns that are always blocked regardless of config.
825static SENSITIVE_PATH_PATTERNS: LazyLock<Vec<glob::Pattern>> = LazyLock::new(|| {
826    let raw = [
827        "/etc/passwd",
828        "/etc/shadow",
829        "/etc/sudoers",
830        "~/.ssh/*",
831        "~/.aws/*",
832        "~/.gnupg/*",
833        "**/*.pem",
834        "**/*.key",
835        "**/id_rsa",
836        "**/id_ed25519",
837        "**/.env",
838        "**/credentials",
839    ];
840    raw.iter()
841        .filter_map(|p| {
842            glob::Pattern::new(p)
843                .map_err(|e| {
844                    tracing::error!(pattern = p, error = %e, "failed to compile built-in firewall path pattern");
845                    e
846                })
847                .ok()
848        })
849        .collect()
850});
851
852/// Built-in env var prefixes that trigger a block when found in tool arguments.
853static SENSITIVE_ENV_PREFIXES: &[&str] =
854    &["$AWS_", "$ZEPH_", "${AWS_", "${ZEPH_", "%AWS_", "%ZEPH_"];
855
856/// Argument field names to extract and inspect.
857static INSPECTED_FIELDS: &[&str] = &[
858    "command",
859    "file_path",
860    "path",
861    "url",
862    "query",
863    "uri",
864    "input",
865    "args",
866];
867
868impl FirewallVerifier {
869    /// Build a `FirewallVerifier` from config.
870    ///
871    /// Invalid glob patterns in `blocked_paths` are logged at WARN level and skipped.
872    #[must_use]
873    pub fn new(config: &FirewallVerifierConfig) -> Self {
874        let blocked_path_globs = config
875            .blocked_paths
876            .iter()
877            .filter_map(|p| {
878                glob::Pattern::new(p)
879                    .map_err(|e| {
880                        tracing::warn!(pattern = p, error = %e, "invalid glob pattern in firewall blocked_paths, skipping");
881                        e
882                    })
883                    .ok()
884            })
885            .collect();
886
887        let blocked_env_vars = config
888            .blocked_env_vars
889            .iter()
890            .map(|s| s.to_uppercase())
891            .collect();
892
893        let exempt_tools = config
894            .exempt_tools
895            .iter()
896            .map(|s| s.to_lowercase())
897            .collect();
898
899        Self {
900            blocked_path_globs,
901            blocked_env_vars,
902            exempt_tools,
903        }
904    }
905
906    /// Extract all string argument values from a tool call's JSON args.
907    fn collect_args(args: &serde_json::Value) -> Vec<String> {
908        let mut out = Vec::new();
909        match args {
910            serde_json::Value::Object(map) => {
911                for field in INSPECTED_FIELDS {
912                    if let Some(val) = map.get(*field) {
913                        Self::collect_strings(val, &mut out);
914                    }
915                }
916            }
917            serde_json::Value::String(s) => out.push(s.clone()),
918            _ => {}
919        }
920        out
921    }
922
923    fn collect_strings(val: &serde_json::Value, out: &mut Vec<String>) {
924        match val {
925            serde_json::Value::String(s) => out.push(s.clone()),
926            serde_json::Value::Array(arr) => {
927                for item in arr {
928                    Self::collect_strings(item, out);
929                }
930            }
931            _ => {}
932        }
933    }
934
935    fn scan_arg(&self, arg: &str) -> Option<VerificationResult> {
936        // Apply NFKC normalization consistent with DestructiveCommandVerifier.
937        let normalized: String = arg.nfkc().collect();
938        let lower = normalized.to_lowercase();
939
940        // Path traversal
941        if lower.contains("../") || lower.contains("..\\") {
942            return Some(VerificationResult::Block {
943                reason: format!(
944                    "[FirewallVerifier] path traversal pattern detected in argument: {arg}"
945                ),
946            });
947        }
948
949        // Sensitive paths (built-in)
950        for pattern in SENSITIVE_PATH_PATTERNS.iter() {
951            if pattern.matches(&normalized) || pattern.matches(&lower) {
952                return Some(VerificationResult::Block {
953                    reason: format!(
954                        "[FirewallVerifier] sensitive path pattern '{pattern}' matched in argument: {arg}"
955                    ),
956                });
957            }
958        }
959
960        // User-configured blocked paths
961        for pattern in &self.blocked_path_globs {
962            if pattern.matches(&normalized) || pattern.matches(&lower) {
963                return Some(VerificationResult::Block {
964                    reason: format!(
965                        "[FirewallVerifier] blocked path pattern '{pattern}' matched in argument: {arg}"
966                    ),
967                });
968            }
969        }
970
971        // Env var exfiltration (built-in prefixes)
972        let upper = normalized.to_uppercase();
973        for prefix in SENSITIVE_ENV_PREFIXES {
974            if upper.contains(*prefix) {
975                return Some(VerificationResult::Block {
976                    reason: format!(
977                        "[FirewallVerifier] env var exfiltration pattern '{prefix}' detected in argument: {arg}"
978                    ),
979                });
980            }
981        }
982
983        // User-configured blocked env vars (match $VAR or %VAR% patterns)
984        for var in &self.blocked_env_vars {
985            let dollar_form = format!("${var}");
986            let brace_form = format!("${{{var}}}");
987            let percent_form = format!("%{var}%");
988            if upper.contains(&dollar_form)
989                || upper.contains(&brace_form)
990                || upper.contains(&percent_form)
991            {
992                return Some(VerificationResult::Block {
993                    reason: format!(
994                        "[FirewallVerifier] blocked env var '{var}' detected in argument: {arg}"
995                    ),
996                });
997            }
998        }
999
1000        None
1001    }
1002}
1003
1004impl PreExecutionVerifier for FirewallVerifier {
1005    fn name(&self) -> &'static str {
1006        "FirewallVerifier"
1007    }
1008
1009    fn verify(&self, tool_name: &str, args: &serde_json::Value) -> VerificationResult {
1010        if self.exempt_tools.contains(&tool_name.to_lowercase()) {
1011            return VerificationResult::Allow;
1012        }
1013
1014        for arg in Self::collect_args(args) {
1015            if let Some(result) = self.scan_arg(&arg) {
1016                return result;
1017            }
1018        }
1019
1020        VerificationResult::Allow
1021    }
1022}
1023
1024// ---------------------------------------------------------------------------
1025// Tests
1026// ---------------------------------------------------------------------------
1027
1028#[cfg(test)]
1029mod tests {
1030    use serde_json::json;
1031
1032    use super::*;
1033
1034    // --- DestructiveCommandVerifier ---
1035
1036    fn dcv() -> DestructiveCommandVerifier {
1037        DestructiveCommandVerifier::new(&DestructiveVerifierConfig::default())
1038    }
1039
1040    #[test]
1041    fn allow_normal_command() {
1042        let v = dcv();
1043        assert_eq!(
1044            v.verify("bash", &json!({"command": "ls -la /tmp"})),
1045            VerificationResult::Allow
1046        );
1047    }
1048
1049    #[test]
1050    fn block_rm_rf_root() {
1051        let v = dcv();
1052        let result = v.verify("bash", &json!({"command": "rm -rf /"}));
1053        assert!(matches!(result, VerificationResult::Block { .. }));
1054    }
1055
1056    #[test]
1057    fn block_dd_dev_zero() {
1058        let v = dcv();
1059        let result = v.verify("bash", &json!({"command": "dd if=/dev/zero of=/dev/sda"}));
1060        assert!(matches!(result, VerificationResult::Block { .. }));
1061    }
1062
1063    #[test]
1064    fn block_mkfs() {
1065        let v = dcv();
1066        let result = v.verify("bash", &json!({"command": "mkfs.ext4 /dev/sda1"}));
1067        assert!(matches!(result, VerificationResult::Block { .. }));
1068    }
1069
1070    #[test]
1071    fn allow_rm_rf_in_allowed_path() {
1072        let config = DestructiveVerifierConfig {
1073            allowed_paths: vec!["/tmp/build".to_string()],
1074            ..Default::default()
1075        };
1076        let v = DestructiveCommandVerifier::new(&config);
1077        assert_eq!(
1078            v.verify("bash", &json!({"command": "rm -rf /tmp/build/artifacts"})),
1079            VerificationResult::Allow
1080        );
1081    }
1082
1083    #[test]
1084    fn block_rm_rf_when_not_in_allowed_path() {
1085        let config = DestructiveVerifierConfig {
1086            allowed_paths: vec!["/tmp/build".to_string()],
1087            ..Default::default()
1088        };
1089        let v = DestructiveCommandVerifier::new(&config);
1090        let result = v.verify("bash", &json!({"command": "rm -rf /home/user"}));
1091        assert!(matches!(result, VerificationResult::Block { .. }));
1092    }
1093
1094    #[test]
1095    fn allow_non_shell_tool() {
1096        let v = dcv();
1097        assert_eq!(
1098            v.verify("read_file", &json!({"path": "rm -rf /"})),
1099            VerificationResult::Allow
1100        );
1101    }
1102
1103    #[test]
1104    fn block_extra_pattern() {
1105        let config = DestructiveVerifierConfig {
1106            extra_patterns: vec!["format c:".to_string()],
1107            ..Default::default()
1108        };
1109        let v = DestructiveCommandVerifier::new(&config);
1110        let result = v.verify("bash", &json!({"command": "format c:"}));
1111        assert!(matches!(result, VerificationResult::Block { .. }));
1112    }
1113
1114    #[test]
1115    fn array_args_normalization() {
1116        let v = dcv();
1117        let result = v.verify("bash", &json!({"command": ["rm", "-rf", "/"]}));
1118        assert!(matches!(result, VerificationResult::Block { .. }));
1119    }
1120
1121    #[test]
1122    fn sh_c_wrapping_normalization() {
1123        let v = dcv();
1124        let result = v.verify("bash", &json!({"command": "bash -c 'rm -rf /'"}));
1125        assert!(matches!(result, VerificationResult::Block { .. }));
1126    }
1127
1128    #[test]
1129    fn fork_bomb_blocked() {
1130        let v = dcv();
1131        let result = v.verify("bash", &json!({"command": ":(){ :|:& };:"}));
1132        assert!(matches!(result, VerificationResult::Block { .. }));
1133    }
1134
1135    #[test]
1136    fn custom_shell_tool_name_blocked() {
1137        let config = DestructiveVerifierConfig {
1138            shell_tools: vec!["execute".to_string(), "run_command".to_string()],
1139            ..Default::default()
1140        };
1141        let v = DestructiveCommandVerifier::new(&config);
1142        let result = v.verify("execute", &json!({"command": "rm -rf /"}));
1143        assert!(matches!(result, VerificationResult::Block { .. }));
1144    }
1145
1146    #[test]
1147    fn terminal_tool_name_blocked_by_default() {
1148        let v = dcv();
1149        let result = v.verify("terminal", &json!({"command": "rm -rf /"}));
1150        assert!(matches!(result, VerificationResult::Block { .. }));
1151    }
1152
1153    #[test]
1154    fn default_shell_tools_contains_bash_shell_terminal() {
1155        let config = DestructiveVerifierConfig::default();
1156        let lower: Vec<String> = config
1157            .shell_tools
1158            .iter()
1159            .map(|s| s.to_lowercase())
1160            .collect();
1161        assert!(lower.contains(&"bash".to_string()));
1162        assert!(lower.contains(&"shell".to_string()));
1163        assert!(lower.contains(&"terminal".to_string()));
1164    }
1165
1166    // --- InjectionPatternVerifier ---
1167
1168    fn ipv() -> InjectionPatternVerifier {
1169        InjectionPatternVerifier::new(&InjectionVerifierConfig::default())
1170    }
1171
1172    #[test]
1173    fn allow_clean_args() {
1174        let v = ipv();
1175        assert_eq!(
1176            v.verify("search", &json!({"query": "rust async traits"})),
1177            VerificationResult::Allow
1178        );
1179    }
1180
1181    #[test]
1182    fn allow_sql_discussion_in_query_field() {
1183        // S2: memory_search with SQL discussion must NOT be blocked.
1184        let v = ipv();
1185        assert_eq!(
1186            v.verify(
1187                "memory_search",
1188                &json!({"query": "explain SQL UNION SELECT vs JOIN"})
1189            ),
1190            VerificationResult::Allow
1191        );
1192    }
1193
1194    #[test]
1195    fn allow_sql_or_pattern_in_query_field() {
1196        // S2: safe query field must not trigger SQL injection pattern.
1197        let v = ipv();
1198        assert_eq!(
1199            v.verify("memory_search", &json!({"query": "' OR '1'='1"})),
1200            VerificationResult::Allow
1201        );
1202    }
1203
1204    #[test]
1205    fn block_sql_injection_in_non_query_field() {
1206        let v = ipv();
1207        let result = v.verify("db_query", &json!({"sql": "' OR '1'='1"}));
1208        assert!(matches!(result, VerificationResult::Block { .. }));
1209    }
1210
1211    #[test]
1212    fn block_drop_table() {
1213        let v = ipv();
1214        let result = v.verify("db_query", &json!({"input": "name'; DROP TABLE users"}));
1215        assert!(matches!(result, VerificationResult::Block { .. }));
1216    }
1217
1218    #[test]
1219    fn block_path_traversal() {
1220        let v = ipv();
1221        let result = v.verify("read_file", &json!({"path": "../../../etc/passwd"}));
1222        assert!(matches!(result, VerificationResult::Block { .. }));
1223    }
1224
1225    #[test]
1226    fn warn_on_localhost_url_field() {
1227        // S2: SSRF warn only fires on URL-like fields.
1228        let v = ipv();
1229        let result = v.verify("http_get", &json!({"url": "http://localhost:8080/api"}));
1230        assert!(matches!(result, VerificationResult::Warn { .. }));
1231    }
1232
1233    #[test]
1234    fn allow_localhost_in_non_url_field() {
1235        // S2: localhost in a "text" field (not a URL field) must not warn.
1236        let v = ipv();
1237        assert_eq!(
1238            v.verify(
1239                "memory_search",
1240                &json!({"query": "connect to http://localhost:8080"})
1241            ),
1242            VerificationResult::Allow
1243        );
1244    }
1245
1246    #[test]
1247    fn warn_on_private_ip_url_field() {
1248        let v = ipv();
1249        let result = v.verify("fetch", &json!({"url": "http://192.168.1.1/admin"}));
1250        assert!(matches!(result, VerificationResult::Warn { .. }));
1251    }
1252
1253    #[test]
1254    fn allow_localhost_when_allowlisted() {
1255        let config = InjectionVerifierConfig {
1256            allowlisted_urls: vec!["http://localhost:3000".to_string()],
1257            ..Default::default()
1258        };
1259        let v = InjectionPatternVerifier::new(&config);
1260        assert_eq!(
1261            v.verify("http_get", &json!({"url": "http://localhost:3000/api"})),
1262            VerificationResult::Allow
1263        );
1264    }
1265
1266    #[test]
1267    fn block_union_select_in_non_query_field() {
1268        let v = ipv();
1269        let result = v.verify(
1270            "db_query",
1271            &json!({"input": "id=1 UNION SELECT password FROM users"}),
1272        );
1273        assert!(matches!(result, VerificationResult::Block { .. }));
1274    }
1275
1276    #[test]
1277    fn allow_union_select_in_query_field() {
1278        // S2: "UNION SELECT" in a `query` field is a SQL discussion, not an injection.
1279        let v = ipv();
1280        assert_eq!(
1281            v.verify(
1282                "memory_search",
1283                &json!({"query": "id=1 UNION SELECT password FROM users"})
1284            ),
1285            VerificationResult::Allow
1286        );
1287    }
1288
1289    // --- FIX-1: Unicode normalization bypass ---
1290
1291    #[test]
1292    fn block_rm_rf_unicode_homoglyph() {
1293        // U+FF0F FULLWIDTH SOLIDUS looks like '/' and NFKC-normalizes to '/'.
1294        let v = dcv();
1295        // "rm -rf ／" where ／ is U+FF0F
1296        let result = v.verify("bash", &json!({"command": "rm -rf \u{FF0F}"}));
1297        assert!(matches!(result, VerificationResult::Block { .. }));
1298    }
1299
1300    // --- FIX-2: Path traversal in is_allowed_path ---
1301
1302    #[test]
1303    fn path_traversal_not_allowed_via_dotdot() {
1304        // `/tmp/build/../../etc` lexically resolves to `/etc`, NOT under `/tmp/build`.
1305        let config = DestructiveVerifierConfig {
1306            allowed_paths: vec!["/tmp/build".to_string()],
1307            ..Default::default()
1308        };
1309        let v = DestructiveCommandVerifier::new(&config);
1310        // Should be BLOCKED: resolved path is /etc, not under /tmp/build.
1311        let result = v.verify("bash", &json!({"command": "rm -rf /tmp/build/../../etc"}));
1312        assert!(matches!(result, VerificationResult::Block { .. }));
1313    }
1314
1315    #[test]
1316    fn allowed_path_with_dotdot_stays_in_allowed() {
1317        // `/tmp/build/sub/../artifacts` resolves to `/tmp/build/artifacts` — still allowed.
1318        let config = DestructiveVerifierConfig {
1319            allowed_paths: vec!["/tmp/build".to_string()],
1320            ..Default::default()
1321        };
1322        let v = DestructiveCommandVerifier::new(&config);
1323        assert_eq!(
1324            v.verify(
1325                "bash",
1326                &json!({"command": "rm -rf /tmp/build/sub/../artifacts"}),
1327            ),
1328            VerificationResult::Allow,
1329        );
1330    }
1331
1332    // --- FIX-3: Double-nested shell wrapping ---
1333
1334    #[test]
1335    fn double_nested_bash_c_blocked() {
1336        let v = dcv();
1337        let result = v.verify(
1338            "bash",
1339            &json!({"command": "bash -c \"bash -c 'rm -rf /'\""}),
1340        );
1341        assert!(matches!(result, VerificationResult::Block { .. }));
1342    }
1343
1344    #[test]
1345    fn env_prefix_stripping_blocked() {
1346        let v = dcv();
1347        let result = v.verify(
1348            "bash",
1349            &json!({"command": "env FOO=bar bash -c 'rm -rf /'"}),
1350        );
1351        assert!(matches!(result, VerificationResult::Block { .. }));
1352    }
1353
1354    #[test]
1355    fn exec_prefix_stripping_blocked() {
1356        let v = dcv();
1357        let result = v.verify("bash", &json!({"command": "exec bash -c 'rm -rf /'"}));
1358        assert!(matches!(result, VerificationResult::Block { .. }));
1359    }
1360
1361    // --- FIX-4: SSRF host extraction (not substring match) ---
1362
1363    #[test]
1364    fn ssrf_not_triggered_for_embedded_localhost_in_query_param() {
1365        // `evil.com/?r=http://localhost` — host is `evil.com`, not localhost.
1366        let v = ipv();
1367        let result = v.verify(
1368            "http_get",
1369            &json!({"url": "http://evil.com/?r=http://localhost"}),
1370        );
1371        // Should NOT warn — the actual request host is evil.com, not localhost.
1372        assert_eq!(result, VerificationResult::Allow);
1373    }
1374
1375    #[test]
1376    fn ssrf_triggered_for_bare_localhost_no_port() {
1377        // FIX-7: `http://localhost` with no trailing slash or port must warn.
1378        let v = ipv();
1379        let result = v.verify("http_get", &json!({"url": "http://localhost"}));
1380        assert!(matches!(result, VerificationResult::Warn { .. }));
1381    }
1382
1383    #[test]
1384    fn ssrf_triggered_for_localhost_with_path() {
1385        let v = ipv();
1386        let result = v.verify("http_get", &json!({"url": "http://localhost/api/v1"}));
1387        assert!(matches!(result, VerificationResult::Warn { .. }));
1388    }
1389
1390    // --- Verifier chain: first Block wins, Warn continues ---
1391
1392    #[test]
1393    fn chain_first_block_wins() {
1394        let dcv = DestructiveCommandVerifier::new(&DestructiveVerifierConfig::default());
1395        let ipv = InjectionPatternVerifier::new(&InjectionVerifierConfig::default());
1396        let verifiers: Vec<Box<dyn PreExecutionVerifier>> = vec![Box::new(dcv), Box::new(ipv)];
1397
1398        let args = json!({"command": "rm -rf /"});
1399        let mut result = VerificationResult::Allow;
1400        for v in &verifiers {
1401            result = v.verify("bash", &args);
1402            if matches!(result, VerificationResult::Block { .. }) {
1403                break;
1404            }
1405        }
1406        assert!(matches!(result, VerificationResult::Block { .. }));
1407    }
1408
1409    #[test]
1410    fn chain_warn_continues() {
1411        let dcv = DestructiveCommandVerifier::new(&DestructiveVerifierConfig::default());
1412        let ipv = InjectionPatternVerifier::new(&InjectionVerifierConfig::default());
1413        let verifiers: Vec<Box<dyn PreExecutionVerifier>> = vec![Box::new(dcv), Box::new(ipv)];
1414
1415        // localhost URL in `url` field: dcv allows, ipv warns, chain does NOT block.
1416        let args = json!({"url": "http://localhost:8080/api"});
1417        let mut got_warn = false;
1418        let mut got_block = false;
1419        for v in &verifiers {
1420            match v.verify("http_get", &args) {
1421                VerificationResult::Block { .. } => {
1422                    got_block = true;
1423                    break;
1424                }
1425                VerificationResult::Warn { .. } => {
1426                    got_warn = true;
1427                }
1428                VerificationResult::Allow => {}
1429            }
1430        }
1431        assert!(got_warn);
1432        assert!(!got_block);
1433    }
1434
1435    // --- UrlGroundingVerifier ---
1436
1437    fn ugv(urls: &[&str]) -> UrlGroundingVerifier {
1438        let set: HashSet<String> = urls.iter().map(|s| s.to_lowercase()).collect();
1439        UrlGroundingVerifier::new(
1440            &UrlGroundingVerifierConfig::default(),
1441            Arc::new(RwLock::new(set)),
1442        )
1443    }
1444
1445    #[test]
1446    fn url_grounding_allows_user_provided_url() {
1447        let v = ugv(&["https://docs.anthropic.com/models"]);
1448        assert_eq!(
1449            v.verify(
1450                "fetch",
1451                &json!({"url": "https://docs.anthropic.com/models"})
1452            ),
1453            VerificationResult::Allow
1454        );
1455    }
1456
1457    #[test]
1458    fn url_grounding_blocks_hallucinated_url() {
1459        let v = ugv(&["https://example.com/page"]);
1460        let result = v.verify(
1461            "fetch",
1462            &json!({"url": "https://api.anthropic.ai/v1/models"}),
1463        );
1464        assert!(matches!(result, VerificationResult::Block { .. }));
1465    }
1466
1467    #[test]
1468    fn url_grounding_blocks_when_no_user_urls_at_all() {
1469        let v = ugv(&[]);
1470        let result = v.verify(
1471            "fetch",
1472            &json!({"url": "https://api.anthropic.ai/v1/models"}),
1473        );
1474        assert!(matches!(result, VerificationResult::Block { .. }));
1475    }
1476
1477    #[test]
1478    fn url_grounding_allows_non_guarded_tool() {
1479        let v = ugv(&[]);
1480        assert_eq!(
1481            v.verify("read_file", &json!({"path": "/etc/hosts"})),
1482            VerificationResult::Allow
1483        );
1484    }
1485
1486    #[test]
1487    fn url_grounding_guards_fetch_suffix_tool() {
1488        let v = ugv(&[]);
1489        let result = v.verify("http_fetch", &json!({"url": "https://evil.com/"}));
1490        assert!(matches!(result, VerificationResult::Block { .. }));
1491    }
1492
1493    #[test]
1494    fn url_grounding_allows_web_scrape_with_provided_url() {
1495        let v = ugv(&["https://rust-lang.org/"]);
1496        assert_eq!(
1497            v.verify(
1498                "web_scrape",
1499                &json!({"url": "https://rust-lang.org/", "select": "h1"})
1500            ),
1501            VerificationResult::Allow
1502        );
1503    }
1504
1505    #[test]
1506    fn url_grounding_allows_prefix_match() {
1507        // User provided https://docs.rs/ — agent fetches a sub-path.
1508        let v = ugv(&["https://docs.rs/"]);
1509        assert_eq!(
1510            v.verify(
1511                "fetch",
1512                &json!({"url": "https://docs.rs/tokio/latest/tokio/"})
1513            ),
1514            VerificationResult::Allow
1515        );
1516    }
1517
1518    // --- Regression: #2191 — fetch URL hallucination ---
1519
1520    /// REG-2191-1: exact reproduction of the bug scenario.
1521    /// Agent asks "do you know Anthropic?" (no URL provided) and halluccinates
1522    /// `https://api.anthropic.ai/v1/models`. With an empty `user_provided_urls` set
1523    /// the fetch must be blocked.
1524    #[test]
1525    fn reg_2191_hallucinated_api_endpoint_blocked_with_empty_session() {
1526        // Simulate: user never sent any URL in the conversation.
1527        let v = ugv(&[]);
1528        let result = v.verify(
1529            "fetch",
1530            &json!({"url": "https://api.anthropic.ai/v1/models"}),
1531        );
1532        assert!(
1533            matches!(result, VerificationResult::Block { .. }),
1534            "fetch must be blocked when no user URL was provided — this is the #2191 regression"
1535        );
1536    }
1537
1538    /// REG-2191-2: passthrough — user explicitly pasted the URL, fetch must proceed.
1539    #[test]
1540    fn reg_2191_user_provided_url_allows_fetch() {
1541        let v = ugv(&["https://api.anthropic.com/v1/models"]);
1542        assert_eq!(
1543            v.verify(
1544                "fetch",
1545                &json!({"url": "https://api.anthropic.com/v1/models"}),
1546            ),
1547            VerificationResult::Allow,
1548            "fetch must be allowed when the URL was explicitly provided by the user"
1549        );
1550    }
1551
1552    /// REG-2191-3: `web_scrape` variant — same rejection for `web_scrape` tool.
1553    #[test]
1554    fn reg_2191_web_scrape_hallucinated_url_blocked() {
1555        let v = ugv(&[]);
1556        let result = v.verify(
1557            "web_scrape",
1558            &json!({"url": "https://api.anthropic.ai/v1/models", "select": "body"}),
1559        );
1560        assert!(
1561            matches!(result, VerificationResult::Block { .. }),
1562            "web_scrape must be blocked for hallucinated URL with empty user_provided_urls"
1563        );
1564    }
1565
1566    /// REG-2191-4: URL present only in an imagined system/assistant message context
1567    /// is NOT in `user_provided_urls` (the agent only populates from user messages).
1568    /// The verifier itself cannot distinguish message roles — it only sees the set
1569    /// populated by the agent. This test confirms: an empty set always blocks.
1570    #[test]
1571    fn reg_2191_empty_url_set_always_blocks_fetch() {
1572        // Whether the URL came from a system/assistant message or was never seen —
1573        // if user_provided_urls is empty, fetch must be blocked.
1574        let v = ugv(&[]);
1575        let result = v.verify(
1576            "fetch",
1577            &json!({"url": "https://docs.anthropic.com/something"}),
1578        );
1579        assert!(matches!(result, VerificationResult::Block { .. }));
1580    }
1581
1582    /// REG-2191-5: URL matching is case-insensitive — user pastes mixed-case URL.
1583    #[test]
1584    fn reg_2191_case_insensitive_url_match_allows_fetch() {
1585        // user_provided_urls stores lowercase; verify that the fetched URL with
1586        // different casing still matches.
1587        let v = ugv(&["https://Docs.Anthropic.COM/models"]);
1588        assert_eq!(
1589            v.verify(
1590                "fetch",
1591                &json!({"url": "https://docs.anthropic.com/models/detail"}),
1592            ),
1593            VerificationResult::Allow,
1594            "URL matching must be case-insensitive"
1595        );
1596    }
1597
1598    /// REG-2191-6: tool name ending in `_fetch` is auto-guarded regardless of config.
1599    /// An MCP-registered `anthropic_fetch` tool must not bypass the gate.
1600    #[test]
1601    fn reg_2191_mcp_fetch_suffix_tool_blocked_with_empty_session() {
1602        let v = ugv(&[]);
1603        let result = v.verify(
1604            "anthropic_fetch",
1605            &json!({"url": "https://api.anthropic.ai/v1/models"}),
1606        );
1607        assert!(
1608            matches!(result, VerificationResult::Block { .. }),
1609            "MCP tools ending in _fetch must be guarded even if not in guarded_tools list"
1610        );
1611    }
1612
1613    /// REG-2191-7: reverse prefix — user provided a specific URL, agent fetches
1614    /// the root. This is the "reverse prefix" case: `user_url` `starts_with` `fetch_url`.
1615    #[test]
1616    fn reg_2191_reverse_prefix_match_allows_fetch() {
1617        // User provided a deep URL; agent wants to fetch the root.
1618        // Allowed: user_url.starts_with(fetch_url).
1619        let v = ugv(&["https://docs.rs/tokio/latest/tokio/index.html"]);
1620        assert_eq!(
1621            v.verify("fetch", &json!({"url": "https://docs.rs/"})),
1622            VerificationResult::Allow,
1623            "reverse prefix: fetched URL is a prefix of user-provided URL — should be allowed"
1624        );
1625    }
1626
1627    /// REG-2191-8: completely different domain with same path prefix must be blocked.
1628    #[test]
1629    fn reg_2191_different_domain_blocked() {
1630        // User provided docs.rs, agent wants to fetch evil.com/docs.rs path — must block.
1631        let v = ugv(&["https://docs.rs/"]);
1632        let result = v.verify("fetch", &json!({"url": "https://evil.com/docs.rs/exfil"}));
1633        assert!(
1634            matches!(result, VerificationResult::Block { .. }),
1635            "different domain must not be allowed even if path looks similar"
1636        );
1637    }
1638
1639    /// REG-2191-9: args without a `url` field — verifier must not block (Allow).
1640    #[test]
1641    fn reg_2191_missing_url_field_allows_fetch() {
1642        // Some fetch-like tools may call with different arg names.
1643        // Verifier only checks the `url` field; missing field → Allow.
1644        let v = ugv(&[]);
1645        assert_eq!(
1646            v.verify(
1647                "fetch",
1648                &json!({"endpoint": "https://api.anthropic.ai/v1/models"})
1649            ),
1650            VerificationResult::Allow,
1651            "missing url field must not trigger blocking — only explicit url field is checked"
1652        );
1653    }
1654
1655    /// REG-2191-10: verifier disabled via config — all fetch calls pass through.
1656    #[test]
1657    fn reg_2191_disabled_verifier_allows_all() {
1658        let config = UrlGroundingVerifierConfig {
1659            enabled: false,
1660            guarded_tools: default_guarded_tools(),
1661        };
1662        // Note: the enabled flag is checked by the pipeline, not inside verify().
1663        // The pipeline skips disabled verifiers. This test documents that the struct
1664        // can be constructed with enabled=false (config round-trip).
1665        let set: HashSet<String> = HashSet::new();
1666        let v = UrlGroundingVerifier::new(&config, Arc::new(RwLock::new(set)));
1667        // verify() itself doesn't check enabled — the pipeline is responsible.
1668        // When called directly it will still block (the field has no effect here).
1669        // This is an API documentation test, not a behaviour test.
1670        let _ = v.verify("fetch", &json!({"url": "https://example.com/"}));
1671        // No assertion: just verifies the struct can be built with enabled=false.
1672    }
1673
1674    // --- FirewallVerifier ---
1675
1676    fn fwv() -> FirewallVerifier {
1677        FirewallVerifier::new(&FirewallVerifierConfig::default())
1678    }
1679
1680    #[test]
1681    fn firewall_allows_normal_path() {
1682        let v = fwv();
1683        assert_eq!(
1684            v.verify("shell", &json!({"command": "ls /tmp/build"})),
1685            VerificationResult::Allow
1686        );
1687    }
1688
1689    #[test]
1690    fn firewall_blocks_path_traversal() {
1691        let v = fwv();
1692        let result = v.verify("read", &json!({"file_path": "../../etc/passwd"}));
1693        assert!(
1694            matches!(result, VerificationResult::Block { .. }),
1695            "path traversal must be blocked"
1696        );
1697    }
1698
1699    #[test]
1700    fn firewall_blocks_etc_passwd() {
1701        let v = fwv();
1702        let result = v.verify("read", &json!({"file_path": "/etc/passwd"}));
1703        assert!(
1704            matches!(result, VerificationResult::Block { .. }),
1705            "/etc/passwd must be blocked"
1706        );
1707    }
1708
1709    #[test]
1710    fn firewall_blocks_ssh_key() {
1711        let v = fwv();
1712        let result = v.verify("read", &json!({"file_path": "~/.ssh/id_rsa"}));
1713        assert!(
1714            matches!(result, VerificationResult::Block { .. }),
1715            "SSH key path must be blocked"
1716        );
1717    }
1718
1719    #[test]
1720    fn firewall_blocks_aws_env_var() {
1721        let v = fwv();
1722        let result = v.verify("shell", &json!({"command": "echo $AWS_SECRET_ACCESS_KEY"}));
1723        assert!(
1724            matches!(result, VerificationResult::Block { .. }),
1725            "AWS env var exfiltration must be blocked"
1726        );
1727    }
1728
1729    #[test]
1730    fn firewall_blocks_zeph_env_var() {
1731        let v = fwv();
1732        let result = v.verify("shell", &json!({"command": "cat ${ZEPH_CLAUDE_API_KEY}"}));
1733        assert!(
1734            matches!(result, VerificationResult::Block { .. }),
1735            "ZEPH env var exfiltration must be blocked"
1736        );
1737    }
1738
1739    #[test]
1740    fn firewall_exempt_tool_bypasses_check() {
1741        let cfg = FirewallVerifierConfig {
1742            enabled: true,
1743            blocked_paths: vec![],
1744            blocked_env_vars: vec![],
1745            exempt_tools: vec!["read".to_string()],
1746        };
1747        let v = FirewallVerifier::new(&cfg);
1748        // /etc/passwd would normally be blocked but tool is exempt.
1749        assert_eq!(
1750            v.verify("read", &json!({"file_path": "/etc/passwd"})),
1751            VerificationResult::Allow
1752        );
1753    }
1754
1755    #[test]
1756    fn firewall_custom_blocked_path() {
1757        let cfg = FirewallVerifierConfig {
1758            enabled: true,
1759            blocked_paths: vec!["/data/secrets/*".to_string()],
1760            blocked_env_vars: vec![],
1761            exempt_tools: vec![],
1762        };
1763        let v = FirewallVerifier::new(&cfg);
1764        let result = v.verify("read", &json!({"file_path": "/data/secrets/master.key"}));
1765        assert!(
1766            matches!(result, VerificationResult::Block { .. }),
1767            "custom blocked path must be blocked"
1768        );
1769    }
1770
1771    #[test]
1772    fn firewall_custom_blocked_env_var() {
1773        let cfg = FirewallVerifierConfig {
1774            enabled: true,
1775            blocked_paths: vec![],
1776            blocked_env_vars: vec!["MY_SECRET".to_string()],
1777            exempt_tools: vec![],
1778        };
1779        let v = FirewallVerifier::new(&cfg);
1780        let result = v.verify("shell", &json!({"command": "echo $MY_SECRET"}));
1781        assert!(
1782            matches!(result, VerificationResult::Block { .. }),
1783            "custom blocked env var must be blocked"
1784        );
1785    }
1786
1787    #[test]
1788    fn firewall_invalid_glob_is_skipped() {
1789        // Invalid glob should not panic — logged and skipped at construction.
1790        let cfg = FirewallVerifierConfig {
1791            enabled: true,
1792            blocked_paths: vec!["[invalid-glob".to_string(), "/valid/path/*".to_string()],
1793            blocked_env_vars: vec![],
1794            exempt_tools: vec![],
1795        };
1796        let v = FirewallVerifier::new(&cfg);
1797        // Valid pattern still works
1798        let result = v.verify("read", &json!({"path": "/valid/path/file.txt"}));
1799        assert!(matches!(result, VerificationResult::Block { .. }));
1800    }
1801
1802    #[test]
1803    fn firewall_config_default_deserialization() {
1804        let cfg: FirewallVerifierConfig = toml::from_str("").unwrap();
1805        assert!(cfg.enabled);
1806        assert!(cfg.blocked_paths.is_empty());
1807        assert!(cfg.blocked_env_vars.is_empty());
1808        assert!(cfg.exempt_tools.is_empty());
1809    }
1810}
zeph_tools/verifier.rs

zeph_tools/
verifier.rs