zeph_tools/
verifier.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Pre-execution verification for tool calls.
5//!
6//! Based on the `TrustBench` pattern (arXiv:2603.09157): intercept tool calls before
7//! execution to block or warn on destructive or injection patterns.
8//!
9//! ## Blocklist separation
10//!
11//! `DESTRUCTIVE_PATTERNS` (this module) is intentionally separate from
12//! `DEFAULT_BLOCKED_COMMANDS` in `shell.rs`. The two lists serve different purposes:
13//!
14//! - `DEFAULT_BLOCKED_COMMANDS` — shell safety net: prevents the *shell executor* from
15//!   running network tools (`curl`, `wget`, `nc`) and a few destructive commands.
16//!   It is applied at tool-execution time by `ShellExecutor`.
17//!
18//! - `DESTRUCTIVE_PATTERNS` — pre-execution guard: targets filesystem/system destruction
19//!   commands (disk formats, wipefs, fork bombs, recursive permission changes).
20//!   It runs *before* dispatch, in the LLM-call hot path, and must not be conflated
21//!   with the shell safety net to avoid accidental allow-listing via config drift.
22//!
23//! Overlap (3 entries: `rm -rf /`, `mkfs`, `dd if=`) is intentional — belt-and-suspenders.
24
25use std::collections::HashSet;
26use std::sync::{Arc, LazyLock};
27
28use parking_lot::RwLock;
29
30use regex::Regex;
31use serde::{Deserialize, Serialize};
32use unicode_normalization::UnicodeNormalization as _;
33
34fn default_true() -> bool {
35    true
36}
37
38fn default_shell_tools() -> Vec<String> {
39    vec![
40        "bash".to_string(),
41        "shell".to_string(),
42        "terminal".to_string(),
43    ]
44}
45
46/// Result of a pre-execution verification check.
47#[must_use]
48#[derive(Debug, Clone, PartialEq, Eq)]
49pub enum VerificationResult {
50    /// Tool call is safe to proceed.
51    Allow,
52    /// Tool call must be blocked. Executor returns an error to the LLM.
53    Block { reason: String },
54    /// Tool call proceeds but a warning is logged and tracked in metrics (metrics-only,
55    /// not visible to the LLM or user beyond the TUI security panel).
56    Warn { message: String },
57}
58
59/// Pre-execution verification trait. Implementations intercept tool calls
60/// before the executor runs them. Based on `TrustBench` pattern (arXiv:2603.09157).
61///
62/// Sync by design: verifiers inspect arguments only — no I/O needed.
63/// Object-safe: uses `&self` and returns a concrete enum.
64pub trait PreExecutionVerifier: Send + Sync + std::fmt::Debug {
65    /// Verify whether a tool call should proceed.
66    fn verify(&self, tool_name: &str, args: &serde_json::Value) -> VerificationResult;
67
68    /// Human-readable name for logging and TUI display.
69    fn name(&self) -> &'static str;
70}
71
72// ---------------------------------------------------------------------------
73// Config types
74// ---------------------------------------------------------------------------
75
76/// Configuration for the destructive command verifier.
77///
78/// `allowed_paths`: when **empty** (the default), ALL destructive commands are denied.
79/// This is a conservative default: to allow e.g. `rm -rf /tmp/build` you must
80/// explicitly add `/tmp/build` to `allowed_paths`.
81///
82/// `shell_tools`: the set of tool names considered shell executors. Defaults to
83/// `["bash", "shell", "terminal"]`. Add custom names here if your setup registers
84/// shell tools under different names (e.g., via MCP or ACP integrations).
85#[derive(Debug, Clone, Deserialize, Serialize)]
86pub struct DestructiveVerifierConfig {
87    #[serde(default = "default_true")]
88    pub enabled: bool,
89    /// Explicit path prefixes under which destructive commands are permitted.
90    /// **Empty = deny-all destructive commands** (safest default).
91    #[serde(default)]
92    pub allowed_paths: Vec<String>,
93    /// Additional command patterns to treat as destructive (substring match).
94    #[serde(default)]
95    pub extra_patterns: Vec<String>,
96    /// Tool names to treat as shell executors (case-insensitive).
97    /// Default: `["bash", "shell", "terminal"]`.
98    #[serde(default = "default_shell_tools")]
99    pub shell_tools: Vec<String>,
100}
101
102impl Default for DestructiveVerifierConfig {
103    fn default() -> Self {
104        Self {
105            enabled: true,
106            allowed_paths: Vec::new(),
107            extra_patterns: Vec::new(),
108            shell_tools: default_shell_tools(),
109        }
110    }
111}
112
113/// Configuration for the injection pattern verifier.
114#[derive(Debug, Clone, Deserialize, Serialize)]
115pub struct InjectionVerifierConfig {
116    #[serde(default = "default_true")]
117    pub enabled: bool,
118    /// Additional injection patterns to block (regex strings).
119    /// Invalid regexes are logged at WARN level and skipped.
120    #[serde(default)]
121    pub extra_patterns: Vec<String>,
122    /// URLs explicitly permitted even if they match SSRF patterns.
123    #[serde(default)]
124    pub allowlisted_urls: Vec<String>,
125}
126
127impl Default for InjectionVerifierConfig {
128    fn default() -> Self {
129        Self {
130            enabled: true,
131            extra_patterns: Vec::new(),
132            allowlisted_urls: Vec::new(),
133        }
134    }
135}
136
137/// Configuration for the URL grounding verifier.
138///
139/// When enabled, `fetch` and `web_scrape` calls are blocked unless the URL
140/// appears in the set of URLs extracted from user messages (`user_provided_urls`).
141/// This prevents the LLM from hallucinating API endpoints and calling fetch with
142/// fabricated URLs that were never supplied by the user.
143#[derive(Debug, Clone, Deserialize, Serialize)]
144pub struct UrlGroundingVerifierConfig {
145    #[serde(default = "default_true")]
146    pub enabled: bool,
147    /// Tool IDs subject to URL grounding checks. Any tool whose name ends with `_fetch`
148    /// is also guarded regardless of this list.
149    #[serde(default = "default_guarded_tools")]
150    pub guarded_tools: Vec<String>,
151}
152
153fn default_guarded_tools() -> Vec<String> {
154    vec!["fetch".to_string(), "web_scrape".to_string()]
155}
156
157impl Default for UrlGroundingVerifierConfig {
158    fn default() -> Self {
159        Self {
160            enabled: true,
161            guarded_tools: default_guarded_tools(),
162        }
163    }
164}
165
166/// Top-level configuration for all pre-execution verifiers.
167#[derive(Debug, Clone, Deserialize, Serialize)]
168pub struct PreExecutionVerifierConfig {
169    #[serde(default = "default_true")]
170    pub enabled: bool,
171    #[serde(default)]
172    pub destructive_commands: DestructiveVerifierConfig,
173    #[serde(default)]
174    pub injection_patterns: InjectionVerifierConfig,
175    #[serde(default)]
176    pub url_grounding: UrlGroundingVerifierConfig,
177    #[serde(default)]
178    pub firewall: FirewallVerifierConfig,
179}
180
181impl Default for PreExecutionVerifierConfig {
182    fn default() -> Self {
183        Self {
184            enabled: true,
185            destructive_commands: DestructiveVerifierConfig::default(),
186            injection_patterns: InjectionVerifierConfig::default(),
187            url_grounding: UrlGroundingVerifierConfig::default(),
188            firewall: FirewallVerifierConfig::default(),
189        }
190    }
191}
192
193// ---------------------------------------------------------------------------
194// DestructiveCommandVerifier
195// ---------------------------------------------------------------------------
196
197/// Destructive command patterns for `DestructiveCommandVerifier`.
198///
199/// Intentionally separate from `DEFAULT_BLOCKED_COMMANDS` in `shell.rs` — see module
200/// docs for the semantic distinction between the two lists.
201static DESTRUCTIVE_PATTERNS: &[&str] = &[
202    "rm -rf /",
203    "rm -rf ~",
204    "rm -r /",
205    "dd if=",
206    "mkfs",
207    "fdisk",
208    "shred",
209    "wipefs",
210    ":(){ :|:& };:",
211    ":(){:|:&};:",
212    "chmod -r 777 /",
213    "chown -r",
214];
215
216/// Verifier that blocks destructive shell commands (e.g., `rm -rf /`, `dd`, `mkfs`)
217/// before the shell tool executes them.
218///
219/// Applies to any tool whose name is in the configured `shell_tools` set (default:
220/// `["bash", "shell", "terminal"]`). For commands targeting a specific path, execution
221/// is allowed when the path starts with one of the configured `allowed_paths`. When
222/// `allowed_paths` is empty (the default), **all** matching destructive commands are blocked.
223#[derive(Debug)]
224pub struct DestructiveCommandVerifier {
225    shell_tools: Vec<String>,
226    allowed_paths: Vec<String>,
227    extra_patterns: Vec<String>,
228}
229
230impl DestructiveCommandVerifier {
231    #[must_use]
232    pub fn new(config: &DestructiveVerifierConfig) -> Self {
233        Self {
234            shell_tools: config
235                .shell_tools
236                .iter()
237                .map(|s| s.to_lowercase())
238                .collect(),
239            allowed_paths: config
240                .allowed_paths
241                .iter()
242                .map(|s| s.to_lowercase())
243                .collect(),
244            extra_patterns: config
245                .extra_patterns
246                .iter()
247                .map(|s| s.to_lowercase())
248                .collect(),
249        }
250    }
251
252    fn is_shell_tool(&self, tool_name: &str) -> bool {
253        let lower = tool_name.to_lowercase();
254        self.shell_tools.iter().any(|t| t == &lower)
255    }
256
257    /// Extract the effective command string from `args`.
258    ///
259    /// Supports:
260    /// - `{"command": "rm -rf /"}` (string)
261    /// - `{"command": ["rm", "-rf", "/"]}` (array — joined with spaces)
262    /// - `{"command": "bash -c 'rm -rf /'"}` (shell `-c` unwrapping, looped up to 8 levels)
263    /// - `env VAR=val bash -c '...'` and `exec bash -c '...'` prefix stripping
264    ///
265    /// NFKC-normalizes the result to defeat Unicode homoglyph bypasses.
266    fn extract_command(args: &serde_json::Value) -> Option<String> {
267        let raw = match args.get("command") {
268            Some(serde_json::Value::String(s)) => s.clone(),
269            Some(serde_json::Value::Array(arr)) => arr
270                .iter()
271                .filter_map(|v| v.as_str())
272                .collect::<Vec<_>>()
273                .join(" "),
274            _ => return None,
275        };
276        // NFKC-normalize + lowercase to defeat Unicode homoglyph and case bypasses.
277        let mut current: String = raw.nfkc().collect::<String>().to_lowercase();
278        // Loop: strip shell wrapper prefixes up to 8 levels deep.
279        // Handles double-nested: `bash -c "bash -c 'rm -rf /'"`.
280        for _ in 0..8 {
281            let trimmed = current.trim().to_owned();
282            // Strip `env VAR=value ... CMD` prefix (one or more VAR=value tokens).
283            let after_env = Self::strip_env_prefix(&trimmed);
284            // Strip `exec ` prefix.
285            let after_exec = after_env.strip_prefix("exec ").map_or(after_env, str::trim);
286            // Strip interpreter wrapper: `bash -c '...'` / `sh -c '...'` / `zsh -c '...'`.
287            let mut unwrapped = false;
288            for interp in &["bash -c ", "sh -c ", "zsh -c "] {
289                if let Some(rest) = after_exec.strip_prefix(interp) {
290                    let script = rest.trim().trim_matches(|c: char| c == '\'' || c == '"');
291                    current.clone_from(&script.to_owned());
292                    unwrapped = true;
293                    break;
294                }
295            }
296            if !unwrapped {
297                return Some(after_exec.to_owned());
298            }
299        }
300        Some(current)
301    }
302
303    /// Strip leading `env VAR=value` tokens from a command string.
304    /// Returns the remainder after all `KEY=VALUE` pairs are consumed.
305    fn strip_env_prefix(cmd: &str) -> &str {
306        let mut rest = cmd;
307        // `env` keyword is optional; strip it if present.
308        if let Some(after_env) = rest.strip_prefix("env ") {
309            rest = after_env.trim_start();
310        }
311        // Consume `KEY=VALUE` tokens.
312        loop {
313            // A VAR=value token: identifier chars + '=' + non-space chars.
314            let mut chars = rest.chars();
315            let key_end = chars
316                .by_ref()
317                .take_while(|c| c.is_alphanumeric() || *c == '_')
318                .count();
319            if key_end == 0 {
320                break;
321            }
322            let remainder = &rest[key_end..];
323            if let Some(after_eq) = remainder.strip_prefix('=') {
324                // Consume the value (up to the first space).
325                let val_end = after_eq.find(' ').unwrap_or(after_eq.len());
326                rest = after_eq[val_end..].trim_start();
327            } else {
328                break;
329            }
330        }
331        rest
332    }
333
334    /// Returns `true` if `command` targets a path that is covered by `allowed_paths`.
335    ///
336    /// Uses lexical normalization (resolves `..` and `.` without filesystem access)
337    /// so that `/tmp/build/../../etc` is correctly resolved to `/etc` before comparison,
338    /// defeating path traversal bypasses like `/tmp/build/../../etc/passwd`.
339    fn is_allowed_path(&self, command: &str) -> bool {
340        if self.allowed_paths.is_empty() {
341            return false;
342        }
343        let tokens: Vec<&str> = command.split_whitespace().collect();
344        for token in &tokens {
345            let t = token.trim_matches(|c| c == '\'' || c == '"');
346            if t.starts_with('/') || t.starts_with('~') || t.starts_with('.') {
347                let normalized = Self::lexical_normalize(std::path::Path::new(t));
348                let n_lower = normalized.to_string_lossy().to_lowercase();
349                if self
350                    .allowed_paths
351                    .iter()
352                    .any(|p| n_lower.starts_with(p.as_str()))
353                {
354                    return true;
355                }
356            }
357        }
358        false
359    }
360
361    /// Lexically normalize a path by resolving `.` and `..` components without
362    /// hitting the filesystem. Does not require the path to exist.
363    fn lexical_normalize(p: &std::path::Path) -> std::path::PathBuf {
364        let mut out = std::path::PathBuf::new();
365        for component in p.components() {
366            match component {
367                std::path::Component::ParentDir => {
368                    out.pop();
369                }
370                std::path::Component::CurDir => {}
371                other => out.push(other),
372            }
373        }
374        out
375    }
376
377    fn check_patterns(command: &str) -> Option<&'static str> {
378        DESTRUCTIVE_PATTERNS
379            .iter()
380            .find(|&pat| command.contains(pat))
381            .copied()
382    }
383
384    fn check_extra_patterns(&self, command: &str) -> Option<String> {
385        self.extra_patterns
386            .iter()
387            .find(|pat| command.contains(pat.as_str()))
388            .cloned()
389    }
390}
391
392impl PreExecutionVerifier for DestructiveCommandVerifier {
393    fn name(&self) -> &'static str {
394        "DestructiveCommandVerifier"
395    }
396
397    fn verify(&self, tool_name: &str, args: &serde_json::Value) -> VerificationResult {
398        if !self.is_shell_tool(tool_name) {
399            return VerificationResult::Allow;
400        }
401
402        let Some(command) = Self::extract_command(args) else {
403            return VerificationResult::Allow;
404        };
405
406        if let Some(pat) = Self::check_patterns(&command) {
407            if self.is_allowed_path(&command) {
408                return VerificationResult::Allow;
409            }
410            return VerificationResult::Block {
411                reason: format!("[{}] destructive pattern '{}' detected", self.name(), pat),
412            };
413        }
414
415        if let Some(pat) = self.check_extra_patterns(&command) {
416            if self.is_allowed_path(&command) {
417                return VerificationResult::Allow;
418            }
419            return VerificationResult::Block {
420                reason: format!(
421                    "[{}] extra destructive pattern '{}' detected",
422                    self.name(),
423                    pat
424                ),
425            };
426        }
427
428        VerificationResult::Allow
429    }
430}
431
432// ---------------------------------------------------------------------------
433// InjectionPatternVerifier
434// ---------------------------------------------------------------------------
435
436/// High-confidence injection block patterns applied to string field values in tool args.
437///
438/// These require *structural* patterns, not just keywords — e.g., `UNION SELECT` is
439/// blocked but a plain mention of "SELECT" is not. This avoids false positives for
440/// `memory_search` queries discussing SQL or coding assistants writing SQL examples.
441static INJECTION_BLOCK_PATTERNS: LazyLock<Vec<Regex>> = LazyLock::new(|| {
442    [
443        // SQL injection structural patterns
444        r"(?i)'\s*OR\s*'1'\s*=\s*'1",
445        r"(?i)'\s*OR\s*1\s*=\s*1",
446        r"(?i);\s*DROP\s+TABLE",
447        r"(?i)UNION\s+SELECT",
448        r"(?i)'\s*;\s*SELECT",
449        // Command injection via shell metacharacters with dangerous commands
450        r";\s*rm\s+",
451        r"\|\s*rm\s+",
452        r"&&\s*rm\s+",
453        r";\s*curl\s+",
454        r"\|\s*curl\s+",
455        r"&&\s*curl\s+",
456        r";\s*wget\s+",
457        // Path traversal to sensitive system files
458        r"\.\./\.\./\.\./etc/passwd",
459        r"\.\./\.\./\.\./etc/shadow",
460        r"\.\./\.\./\.\./windows/",
461        r"\.\.[/\\]\.\.[/\\]\.\.[/\\]",
462    ]
463    .iter()
464    .map(|s| Regex::new(s).expect("static pattern must compile"))
465    .collect()
466});
467
468/// SSRF host patterns — matched against the *extracted host* (not the full URL string).
469/// This prevents bypasses like `http://evil.com/?r=http://localhost` where the SSRF
470/// target appears only in a query parameter, not as the actual request host.
471/// Bare hostnames (no port/path) are included alongside `host:port` variants.
472static SSRF_HOST_PATTERNS: LazyLock<Vec<Regex>> = LazyLock::new(|| {
473    [
474        // localhost — with or without port
475        r"^localhost$",
476        r"^localhost:",
477        // IPv4 loopback
478        r"^127\.0\.0\.1$",
479        r"^127\.0\.0\.1:",
480        // IPv6 loopback
481        r"^\[::1\]$",
482        r"^\[::1\]:",
483        // AWS metadata service
484        r"^169\.254\.169\.254$",
485        r"^169\.254\.169\.254:",
486        // RFC-1918 private ranges
487        r"^10\.\d+\.\d+\.\d+$",
488        r"^10\.\d+\.\d+\.\d+:",
489        r"^172\.(1[6-9]|2\d|3[01])\.\d+\.\d+$",
490        r"^172\.(1[6-9]|2\d|3[01])\.\d+\.\d+:",
491        r"^192\.168\.\d+\.\d+$",
492        r"^192\.168\.\d+\.\d+:",
493    ]
494    .iter()
495    .map(|s| Regex::new(s).expect("static pattern must compile"))
496    .collect()
497});
498
499/// Extract the host (and optional port) from a URL string.
500/// Returns the portion between `://` and the next `/`, `?`, `#`, or end of string.
501/// If the URL has no scheme, returns `None`.
502fn extract_url_host(url: &str) -> Option<&str> {
503    let after_scheme = url.split_once("://")?.1;
504    let host_end = after_scheme
505        .find(['/', '?', '#'])
506        .unwrap_or(after_scheme.len());
507    Some(&after_scheme[..host_end])
508}
509
510/// Field names that suggest URL/endpoint content — SSRF patterns are applied here.
511static URL_FIELD_NAMES: &[&str] = &["url", "endpoint", "uri", "href", "src", "host", "base_url"];
512
513/// Field names that are known to carry user-provided text queries — SQL injection and
514/// command injection patterns are skipped for these fields to avoid false positives.
515/// Examples: `memory_search(query=...)`, `web_search(query=...)`.
516static SAFE_QUERY_FIELDS: &[&str] = &["query", "q", "search", "text", "message", "content"];
517
518/// Verifier that blocks tool arguments containing SQL injection, command injection,
519/// or path traversal patterns. Applies to ALL tools using field-aware matching.
520///
521/// ## Field-aware matching
522///
523/// Rather than serialising all args to a flat string (which causes false positives),
524/// this verifier iterates over each string-valued field and applies pattern categories
525/// based on field semantics:
526///
527/// - `SAFE_QUERY_FIELDS` (`query`, `q`, `search`, `text`, …): injection patterns are
528///   **skipped** — these fields contain user-provided text and generate too many false
529///   positives for SQL/command discussions in chat.
530/// - `URL_FIELD_NAMES` (`url`, `endpoint`, `uri`, …): SSRF patterns are applied.
531/// - All other string fields: injection + path traversal patterns are applied.
532///
533/// ## Warn semantics
534///
535/// `VerificationResult::Warn` is metrics-only — the tool call proceeds, a WARN log
536/// entry is emitted, and the TUI security panel counter increments. The LLM does not
537/// see the warning in its tool result.
538#[derive(Debug)]
539pub struct InjectionPatternVerifier {
540    extra_patterns: Vec<Regex>,
541    allowlisted_urls: Vec<String>,
542}
543
544impl InjectionPatternVerifier {
545    #[must_use]
546    pub fn new(config: &InjectionVerifierConfig) -> Self {
547        let extra_patterns = config
548            .extra_patterns
549            .iter()
550            .filter_map(|s| match Regex::new(s) {
551                Ok(re) => Some(re),
552                Err(e) => {
553                    tracing::warn!(
554                        pattern = %s,
555                        error = %e,
556                        "InjectionPatternVerifier: invalid extra_pattern, skipping"
557                    );
558                    None
559                }
560            })
561            .collect();
562
563        Self {
564            extra_patterns,
565            allowlisted_urls: config
566                .allowlisted_urls
567                .iter()
568                .map(|s| s.to_lowercase())
569                .collect(),
570        }
571    }
572
573    fn is_allowlisted(&self, text: &str) -> bool {
574        let lower = text.to_lowercase();
575        self.allowlisted_urls
576            .iter()
577            .any(|u| lower.contains(u.as_str()))
578    }
579
580    fn is_url_field(field: &str) -> bool {
581        let lower = field.to_lowercase();
582        URL_FIELD_NAMES.iter().any(|&f| f == lower)
583    }
584
585    fn is_safe_query_field(field: &str) -> bool {
586        let lower = field.to_lowercase();
587        SAFE_QUERY_FIELDS.iter().any(|&f| f == lower)
588    }
589
590    /// Check a single string value from a named field.
591    fn check_field_value(&self, field: &str, value: &str) -> VerificationResult {
592        let is_url = Self::is_url_field(field);
593        let is_safe_query = Self::is_safe_query_field(field);
594
595        // Injection + path traversal: skip safe query fields (user text), apply elsewhere.
596        if !is_safe_query {
597            for pat in INJECTION_BLOCK_PATTERNS.iter() {
598                if pat.is_match(value) {
599                    return VerificationResult::Block {
600                        reason: format!(
601                            "[{}] injection pattern detected in field '{}': {}",
602                            "InjectionPatternVerifier",
603                            field,
604                            pat.as_str()
605                        ),
606                    };
607                }
608            }
609            for pat in &self.extra_patterns {
610                if pat.is_match(value) {
611                    return VerificationResult::Block {
612                        reason: format!(
613                            "[{}] extra injection pattern detected in field '{}': {}",
614                            "InjectionPatternVerifier",
615                            field,
616                            pat.as_str()
617                        ),
618                    };
619                }
620            }
621        }
622
623        // SSRF: apply only to URL-like fields.
624        // Extract the host first so that SSRF targets embedded in query parameters
625        // (e.g. `http://evil.com/?r=http://localhost`) are not falsely matched.
626        if is_url && let Some(host) = extract_url_host(value) {
627            for pat in SSRF_HOST_PATTERNS.iter() {
628                if pat.is_match(host) {
629                    if self.is_allowlisted(value) {
630                        return VerificationResult::Allow;
631                    }
632                    return VerificationResult::Warn {
633                        message: format!(
634                            "[{}] possible SSRF in field '{}': host '{}' matches pattern (not blocked)",
635                            "InjectionPatternVerifier", field, host,
636                        ),
637                    };
638                }
639            }
640        }
641
642        VerificationResult::Allow
643    }
644
645    /// Walk all string leaf values in a JSON object, collecting field names for context.
646    fn check_object(&self, obj: &serde_json::Map<String, serde_json::Value>) -> VerificationResult {
647        for (key, val) in obj {
648            let result = self.check_value(key, val);
649            if !matches!(result, VerificationResult::Allow) {
650                return result;
651            }
652        }
653        VerificationResult::Allow
654    }
655
656    fn check_value(&self, field: &str, val: &serde_json::Value) -> VerificationResult {
657        match val {
658            serde_json::Value::String(s) => self.check_field_value(field, s),
659            serde_json::Value::Array(arr) => {
660                for item in arr {
661                    let r = self.check_value(field, item);
662                    if !matches!(r, VerificationResult::Allow) {
663                        return r;
664                    }
665                }
666                VerificationResult::Allow
667            }
668            serde_json::Value::Object(obj) => self.check_object(obj),
669            // Non-string primitives (numbers, booleans, null) cannot contain injection.
670            _ => VerificationResult::Allow,
671        }
672    }
673}
674
675impl PreExecutionVerifier for InjectionPatternVerifier {
676    fn name(&self) -> &'static str {
677        "InjectionPatternVerifier"
678    }
679
680    fn verify(&self, _tool_name: &str, args: &serde_json::Value) -> VerificationResult {
681        match args {
682            serde_json::Value::Object(obj) => self.check_object(obj),
683            // Flat string args (unusual but handle gracefully — treat as unnamed field).
684            serde_json::Value::String(s) => self.check_field_value("_args", s),
685            _ => VerificationResult::Allow,
686        }
687    }
688}
689
690// ---------------------------------------------------------------------------
691// UrlGroundingVerifier
692// ---------------------------------------------------------------------------
693
694/// Verifier that blocks `fetch` and `web_scrape` calls when the requested URL
695/// was not explicitly provided by the user in the conversation.
696///
697/// The agent populates `user_provided_urls` whenever a user message is received,
698/// by extracting all http/https URLs from the raw input. This set persists across
699/// turns within a session and is cleared on `/clear`.
700///
701/// ## Bypass rules
702///
703/// - Tools not in the `guarded_tools` list (and not ending in `_fetch`) pass through.
704/// - If the URL in the tool call is a prefix-match or exact match of any URL in
705///   `user_provided_urls`, the call is allowed.
706/// - If `user_provided_urls` is empty (no URLs seen in this session at all), the call
707///   is blocked — the LLM must not fetch arbitrary URLs when the user never provided one.
708#[derive(Debug, Clone)]
709pub struct UrlGroundingVerifier {
710    guarded_tools: Vec<String>,
711    user_provided_urls: Arc<RwLock<HashSet<String>>>,
712}
713
714impl UrlGroundingVerifier {
715    #[must_use]
716    pub fn new(
717        config: &UrlGroundingVerifierConfig,
718        user_provided_urls: Arc<RwLock<HashSet<String>>>,
719    ) -> Self {
720        Self {
721            guarded_tools: config
722                .guarded_tools
723                .iter()
724                .map(|s| s.to_lowercase())
725                .collect(),
726            user_provided_urls,
727        }
728    }
729
730    fn is_guarded(&self, tool_name: &str) -> bool {
731        let lower = tool_name.to_lowercase();
732        self.guarded_tools.iter().any(|t| t == &lower) || lower.ends_with("_fetch")
733    }
734
735    /// Returns true if `url` is grounded — i.e., it appears in (or is a prefix of)
736    /// a URL from `user_provided_urls`.
737    fn is_grounded(url: &str, user_provided_urls: &HashSet<String>) -> bool {
738        let lower = url.to_lowercase();
739        user_provided_urls
740            .iter()
741            .any(|u| lower.starts_with(u.as_str()) || u.starts_with(lower.as_str()))
742    }
743}
744
745impl PreExecutionVerifier for UrlGroundingVerifier {
746    fn name(&self) -> &'static str {
747        "UrlGroundingVerifier"
748    }
749
750    fn verify(&self, tool_name: &str, args: &serde_json::Value) -> VerificationResult {
751        if !self.is_guarded(tool_name) {
752            return VerificationResult::Allow;
753        }
754
755        let Some(url) = args.get("url").and_then(|v| v.as_str()) else {
756            return VerificationResult::Allow;
757        };
758
759        let urls = self.user_provided_urls.read();
760
761        if Self::is_grounded(url, &urls) {
762            return VerificationResult::Allow;
763        }
764
765        VerificationResult::Block {
766            reason: format!(
767                "[UrlGroundingVerifier] fetch rejected: URL '{url}' was not provided by the user",
768            ),
769        }
770    }
771}
772
773// ---------------------------------------------------------------------------
774// FirewallVerifier
775// ---------------------------------------------------------------------------
776
777/// Configuration for the firewall verifier.
778#[derive(Debug, Clone, Deserialize, Serialize)]
779pub struct FirewallVerifierConfig {
780    #[serde(default = "default_true")]
781    pub enabled: bool,
782    /// Glob patterns for additional paths to block.
783    #[serde(default)]
784    pub blocked_paths: Vec<String>,
785    /// Additional environment variable names to block from tool arguments.
786    #[serde(default)]
787    pub blocked_env_vars: Vec<String>,
788    /// Tool IDs exempt from firewall scanning.
789    #[serde(default)]
790    pub exempt_tools: Vec<String>,
791}
792
793impl Default for FirewallVerifierConfig {
794    fn default() -> Self {
795        Self {
796            enabled: true,
797            blocked_paths: Vec::new(),
798            blocked_env_vars: Vec::new(),
799            exempt_tools: Vec::new(),
800        }
801    }
802}
803
804/// Policy-enforcement verifier that inspects tool arguments for path traversal,
805/// environment-variable exfiltration, sensitive file access, and command chaining.
806///
807/// ## Scope delineation with `InjectionPatternVerifier`
808///
809/// `FirewallVerifier` enforces *configurable policy* (blocked paths, env vars, sensitive
810/// file patterns). `InjectionPatternVerifier` performs regex-based *injection pattern
811/// detection* (prompt injection, SSRF, etc.). They are complementary — belt-and-suspenders,
812/// the same intentional overlap documented at the top of this module.
813///
814/// Both verifiers may produce `Block` for the same call (e.g. command chaining detected
815/// by both). The pipeline stops at the first `Block` result.
816#[derive(Debug)]
817pub struct FirewallVerifier {
818    blocked_path_globs: Vec<glob::Pattern>,
819    blocked_env_vars: HashSet<String>,
820    exempt_tools: HashSet<String>,
821}
822
823/// Built-in path patterns that are always blocked regardless of config.
824static SENSITIVE_PATH_PATTERNS: LazyLock<Vec<glob::Pattern>> = LazyLock::new(|| {
825    let raw = [
826        "/etc/passwd",
827        "/etc/shadow",
828        "/etc/sudoers",
829        "~/.ssh/*",
830        "~/.aws/*",
831        "~/.gnupg/*",
832        "**/*.pem",
833        "**/*.key",
834        "**/id_rsa",
835        "**/id_ed25519",
836        "**/.env",
837        "**/credentials",
838    ];
839    raw.iter()
840        .filter_map(|p| {
841            glob::Pattern::new(p)
842                .map_err(|e| {
843                    tracing::error!(pattern = p, error = %e, "failed to compile built-in firewall path pattern");
844                    e
845                })
846                .ok()
847        })
848        .collect()
849});
850
851/// Built-in env var prefixes that trigger a block when found in tool arguments.
852static SENSITIVE_ENV_PREFIXES: &[&str] =
853    &["$AWS_", "$ZEPH_", "${AWS_", "${ZEPH_", "%AWS_", "%ZEPH_"];
854
855/// Argument field names to extract and inspect.
856static INSPECTED_FIELDS: &[&str] = &[
857    "command",
858    "file_path",
859    "path",
860    "url",
861    "query",
862    "uri",
863    "input",
864    "args",
865];
866
867impl FirewallVerifier {
868    /// Build a `FirewallVerifier` from config.
869    ///
870    /// Invalid glob patterns in `blocked_paths` are logged at WARN level and skipped.
871    #[must_use]
872    pub fn new(config: &FirewallVerifierConfig) -> Self {
873        let blocked_path_globs = config
874            .blocked_paths
875            .iter()
876            .filter_map(|p| {
877                glob::Pattern::new(p)
878                    .map_err(|e| {
879                        tracing::warn!(pattern = p, error = %e, "invalid glob pattern in firewall blocked_paths, skipping");
880                        e
881                    })
882                    .ok()
883            })
884            .collect();
885
886        let blocked_env_vars = config
887            .blocked_env_vars
888            .iter()
889            .map(|s| s.to_uppercase())
890            .collect();
891
892        let exempt_tools = config
893            .exempt_tools
894            .iter()
895            .map(|s| s.to_lowercase())
896            .collect();
897
898        Self {
899            blocked_path_globs,
900            blocked_env_vars,
901            exempt_tools,
902        }
903    }
904
905    /// Extract all string argument values from a tool call's JSON args.
906    fn collect_args(args: &serde_json::Value) -> Vec<String> {
907        let mut out = Vec::new();
908        match args {
909            serde_json::Value::Object(map) => {
910                for field in INSPECTED_FIELDS {
911                    if let Some(val) = map.get(*field) {
912                        Self::collect_strings(val, &mut out);
913                    }
914                }
915            }
916            serde_json::Value::String(s) => out.push(s.clone()),
917            _ => {}
918        }
919        out
920    }
921
922    fn collect_strings(val: &serde_json::Value, out: &mut Vec<String>) {
923        match val {
924            serde_json::Value::String(s) => out.push(s.clone()),
925            serde_json::Value::Array(arr) => {
926                for item in arr {
927                    Self::collect_strings(item, out);
928                }
929            }
930            _ => {}
931        }
932    }
933
934    fn scan_arg(&self, arg: &str) -> Option<VerificationResult> {
935        // Apply NFKC normalization consistent with DestructiveCommandVerifier.
936        let normalized: String = arg.nfkc().collect();
937        let lower = normalized.to_lowercase();
938
939        // Path traversal
940        if lower.contains("../") || lower.contains("..\\") {
941            return Some(VerificationResult::Block {
942                reason: format!(
943                    "[FirewallVerifier] path traversal pattern detected in argument: {arg}"
944                ),
945            });
946        }
947
948        // Sensitive paths (built-in)
949        for pattern in SENSITIVE_PATH_PATTERNS.iter() {
950            if pattern.matches(&normalized) || pattern.matches(&lower) {
951                return Some(VerificationResult::Block {
952                    reason: format!(
953                        "[FirewallVerifier] sensitive path pattern '{pattern}' matched in argument: {arg}"
954                    ),
955                });
956            }
957        }
958
959        // User-configured blocked paths
960        for pattern in &self.blocked_path_globs {
961            if pattern.matches(&normalized) || pattern.matches(&lower) {
962                return Some(VerificationResult::Block {
963                    reason: format!(
964                        "[FirewallVerifier] blocked path pattern '{pattern}' matched in argument: {arg}"
965                    ),
966                });
967            }
968        }
969
970        // Env var exfiltration (built-in prefixes)
971        let upper = normalized.to_uppercase();
972        for prefix in SENSITIVE_ENV_PREFIXES {
973            if upper.contains(*prefix) {
974                return Some(VerificationResult::Block {
975                    reason: format!(
976                        "[FirewallVerifier] env var exfiltration pattern '{prefix}' detected in argument: {arg}"
977                    ),
978                });
979            }
980        }
981
982        // User-configured blocked env vars (match $VAR or %VAR% patterns)
983        for var in &self.blocked_env_vars {
984            let dollar_form = format!("${var}");
985            let brace_form = format!("${{{var}}}");
986            let percent_form = format!("%{var}%");
987            if upper.contains(&dollar_form)
988                || upper.contains(&brace_form)
989                || upper.contains(&percent_form)
990            {
991                return Some(VerificationResult::Block {
992                    reason: format!(
993                        "[FirewallVerifier] blocked env var '{var}' detected in argument: {arg}"
994                    ),
995                });
996            }
997        }
998
999        None
1000    }
1001}
1002
1003impl PreExecutionVerifier for FirewallVerifier {
1004    fn name(&self) -> &'static str {
1005        "FirewallVerifier"
1006    }
1007
1008    fn verify(&self, tool_name: &str, args: &serde_json::Value) -> VerificationResult {
1009        if self.exempt_tools.contains(&tool_name.to_lowercase()) {
1010            return VerificationResult::Allow;
1011        }
1012
1013        for arg in Self::collect_args(args) {
1014            if let Some(result) = self.scan_arg(&arg) {
1015                return result;
1016            }
1017        }
1018
1019        VerificationResult::Allow
1020    }
1021}
1022
1023// ---------------------------------------------------------------------------
1024// Tests
1025// ---------------------------------------------------------------------------
1026
1027#[cfg(test)]
1028mod tests {
1029    use serde_json::json;
1030
1031    use super::*;
1032
1033    // --- DestructiveCommandVerifier ---
1034
1035    fn dcv() -> DestructiveCommandVerifier {
1036        DestructiveCommandVerifier::new(&DestructiveVerifierConfig::default())
1037    }
1038
1039    #[test]
1040    fn allow_normal_command() {
1041        let v = dcv();
1042        assert_eq!(
1043            v.verify("bash", &json!({"command": "ls -la /tmp"})),
1044            VerificationResult::Allow
1045        );
1046    }
1047
1048    #[test]
1049    fn block_rm_rf_root() {
1050        let v = dcv();
1051        let result = v.verify("bash", &json!({"command": "rm -rf /"}));
1052        assert!(matches!(result, VerificationResult::Block { .. }));
1053    }
1054
1055    #[test]
1056    fn block_dd_dev_zero() {
1057        let v = dcv();
1058        let result = v.verify("bash", &json!({"command": "dd if=/dev/zero of=/dev/sda"}));
1059        assert!(matches!(result, VerificationResult::Block { .. }));
1060    }
1061
1062    #[test]
1063    fn block_mkfs() {
1064        let v = dcv();
1065        let result = v.verify("bash", &json!({"command": "mkfs.ext4 /dev/sda1"}));
1066        assert!(matches!(result, VerificationResult::Block { .. }));
1067    }
1068
1069    #[test]
1070    fn allow_rm_rf_in_allowed_path() {
1071        let config = DestructiveVerifierConfig {
1072            allowed_paths: vec!["/tmp/build".to_string()],
1073            ..Default::default()
1074        };
1075        let v = DestructiveCommandVerifier::new(&config);
1076        assert_eq!(
1077            v.verify("bash", &json!({"command": "rm -rf /tmp/build/artifacts"})),
1078            VerificationResult::Allow
1079        );
1080    }
1081
1082    #[test]
1083    fn block_rm_rf_when_not_in_allowed_path() {
1084        let config = DestructiveVerifierConfig {
1085            allowed_paths: vec!["/tmp/build".to_string()],
1086            ..Default::default()
1087        };
1088        let v = DestructiveCommandVerifier::new(&config);
1089        let result = v.verify("bash", &json!({"command": "rm -rf /home/user"}));
1090        assert!(matches!(result, VerificationResult::Block { .. }));
1091    }
1092
1093    #[test]
1094    fn allow_non_shell_tool() {
1095        let v = dcv();
1096        assert_eq!(
1097            v.verify("read_file", &json!({"path": "rm -rf /"})),
1098            VerificationResult::Allow
1099        );
1100    }
1101
1102    #[test]
1103    fn block_extra_pattern() {
1104        let config = DestructiveVerifierConfig {
1105            extra_patterns: vec!["format c:".to_string()],
1106            ..Default::default()
1107        };
1108        let v = DestructiveCommandVerifier::new(&config);
1109        let result = v.verify("bash", &json!({"command": "format c:"}));
1110        assert!(matches!(result, VerificationResult::Block { .. }));
1111    }
1112
1113    #[test]
1114    fn array_args_normalization() {
1115        let v = dcv();
1116        let result = v.verify("bash", &json!({"command": ["rm", "-rf", "/"]}));
1117        assert!(matches!(result, VerificationResult::Block { .. }));
1118    }
1119
1120    #[test]
1121    fn sh_c_wrapping_normalization() {
1122        let v = dcv();
1123        let result = v.verify("bash", &json!({"command": "bash -c 'rm -rf /'"}));
1124        assert!(matches!(result, VerificationResult::Block { .. }));
1125    }
1126
1127    #[test]
1128    fn fork_bomb_blocked() {
1129        let v = dcv();
1130        let result = v.verify("bash", &json!({"command": ":(){ :|:& };:"}));
1131        assert!(matches!(result, VerificationResult::Block { .. }));
1132    }
1133
1134    #[test]
1135    fn custom_shell_tool_name_blocked() {
1136        let config = DestructiveVerifierConfig {
1137            shell_tools: vec!["execute".to_string(), "run_command".to_string()],
1138            ..Default::default()
1139        };
1140        let v = DestructiveCommandVerifier::new(&config);
1141        let result = v.verify("execute", &json!({"command": "rm -rf /"}));
1142        assert!(matches!(result, VerificationResult::Block { .. }));
1143    }
1144
1145    #[test]
1146    fn terminal_tool_name_blocked_by_default() {
1147        let v = dcv();
1148        let result = v.verify("terminal", &json!({"command": "rm -rf /"}));
1149        assert!(matches!(result, VerificationResult::Block { .. }));
1150    }
1151
1152    #[test]
1153    fn default_shell_tools_contains_bash_shell_terminal() {
1154        let config = DestructiveVerifierConfig::default();
1155        let lower: Vec<String> = config
1156            .shell_tools
1157            .iter()
1158            .map(|s| s.to_lowercase())
1159            .collect();
1160        assert!(lower.contains(&"bash".to_string()));
1161        assert!(lower.contains(&"shell".to_string()));
1162        assert!(lower.contains(&"terminal".to_string()));
1163    }
1164
1165    // --- InjectionPatternVerifier ---
1166
1167    fn ipv() -> InjectionPatternVerifier {
1168        InjectionPatternVerifier::new(&InjectionVerifierConfig::default())
1169    }
1170
1171    #[test]
1172    fn allow_clean_args() {
1173        let v = ipv();
1174        assert_eq!(
1175            v.verify("search", &json!({"query": "rust async traits"})),
1176            VerificationResult::Allow
1177        );
1178    }
1179
1180    #[test]
1181    fn allow_sql_discussion_in_query_field() {
1182        // S2: memory_search with SQL discussion must NOT be blocked.
1183        let v = ipv();
1184        assert_eq!(
1185            v.verify(
1186                "memory_search",
1187                &json!({"query": "explain SQL UNION SELECT vs JOIN"})
1188            ),
1189            VerificationResult::Allow
1190        );
1191    }
1192
1193    #[test]
1194    fn allow_sql_or_pattern_in_query_field() {
1195        // S2: safe query field must not trigger SQL injection pattern.
1196        let v = ipv();
1197        assert_eq!(
1198            v.verify("memory_search", &json!({"query": "' OR '1'='1"})),
1199            VerificationResult::Allow
1200        );
1201    }
1202
1203    #[test]
1204    fn block_sql_injection_in_non_query_field() {
1205        let v = ipv();
1206        let result = v.verify("db_query", &json!({"sql": "' OR '1'='1"}));
1207        assert!(matches!(result, VerificationResult::Block { .. }));
1208    }
1209
1210    #[test]
1211    fn block_drop_table() {
1212        let v = ipv();
1213        let result = v.verify("db_query", &json!({"input": "name'; DROP TABLE users"}));
1214        assert!(matches!(result, VerificationResult::Block { .. }));
1215    }
1216
1217    #[test]
1218    fn block_path_traversal() {
1219        let v = ipv();
1220        let result = v.verify("read_file", &json!({"path": "../../../etc/passwd"}));
1221        assert!(matches!(result, VerificationResult::Block { .. }));
1222    }
1223
1224    #[test]
1225    fn warn_on_localhost_url_field() {
1226        // S2: SSRF warn only fires on URL-like fields.
1227        let v = ipv();
1228        let result = v.verify("http_get", &json!({"url": "http://localhost:8080/api"}));
1229        assert!(matches!(result, VerificationResult::Warn { .. }));
1230    }
1231
1232    #[test]
1233    fn allow_localhost_in_non_url_field() {
1234        // S2: localhost in a "text" field (not a URL field) must not warn.
1235        let v = ipv();
1236        assert_eq!(
1237            v.verify(
1238                "memory_search",
1239                &json!({"query": "connect to http://localhost:8080"})
1240            ),
1241            VerificationResult::Allow
1242        );
1243    }
1244
1245    #[test]
1246    fn warn_on_private_ip_url_field() {
1247        let v = ipv();
1248        let result = v.verify("fetch", &json!({"url": "http://192.168.1.1/admin"}));
1249        assert!(matches!(result, VerificationResult::Warn { .. }));
1250    }
1251
1252    #[test]
1253    fn allow_localhost_when_allowlisted() {
1254        let config = InjectionVerifierConfig {
1255            allowlisted_urls: vec!["http://localhost:3000".to_string()],
1256            ..Default::default()
1257        };
1258        let v = InjectionPatternVerifier::new(&config);
1259        assert_eq!(
1260            v.verify("http_get", &json!({"url": "http://localhost:3000/api"})),
1261            VerificationResult::Allow
1262        );
1263    }
1264
1265    #[test]
1266    fn block_union_select_in_non_query_field() {
1267        let v = ipv();
1268        let result = v.verify(
1269            "db_query",
1270            &json!({"input": "id=1 UNION SELECT password FROM users"}),
1271        );
1272        assert!(matches!(result, VerificationResult::Block { .. }));
1273    }
1274
1275    #[test]
1276    fn allow_union_select_in_query_field() {
1277        // S2: "UNION SELECT" in a `query` field is a SQL discussion, not an injection.
1278        let v = ipv();
1279        assert_eq!(
1280            v.verify(
1281                "memory_search",
1282                &json!({"query": "id=1 UNION SELECT password FROM users"})
1283            ),
1284            VerificationResult::Allow
1285        );
1286    }
1287
1288    // --- FIX-1: Unicode normalization bypass ---
1289
1290    #[test]
1291    fn block_rm_rf_unicode_homoglyph() {
1292        // U+FF0F FULLWIDTH SOLIDUS looks like '/' and NFKC-normalizes to '/'.
1293        let v = dcv();
1294        // "rm -rf ／" where ／ is U+FF0F
1295        let result = v.verify("bash", &json!({"command": "rm -rf \u{FF0F}"}));
1296        assert!(matches!(result, VerificationResult::Block { .. }));
1297    }
1298
1299    // --- FIX-2: Path traversal in is_allowed_path ---
1300
1301    #[test]
1302    fn path_traversal_not_allowed_via_dotdot() {
1303        // `/tmp/build/../../etc` lexically resolves to `/etc`, NOT under `/tmp/build`.
1304        let config = DestructiveVerifierConfig {
1305            allowed_paths: vec!["/tmp/build".to_string()],
1306            ..Default::default()
1307        };
1308        let v = DestructiveCommandVerifier::new(&config);
1309        // Should be BLOCKED: resolved path is /etc, not under /tmp/build.
1310        let result = v.verify("bash", &json!({"command": "rm -rf /tmp/build/../../etc"}));
1311        assert!(matches!(result, VerificationResult::Block { .. }));
1312    }
1313
1314    #[test]
1315    fn allowed_path_with_dotdot_stays_in_allowed() {
1316        // `/tmp/build/sub/../artifacts` resolves to `/tmp/build/artifacts` — still allowed.
1317        let config = DestructiveVerifierConfig {
1318            allowed_paths: vec!["/tmp/build".to_string()],
1319            ..Default::default()
1320        };
1321        let v = DestructiveCommandVerifier::new(&config);
1322        assert_eq!(
1323            v.verify(
1324                "bash",
1325                &json!({"command": "rm -rf /tmp/build/sub/../artifacts"}),
1326            ),
1327            VerificationResult::Allow,
1328        );
1329    }
1330
1331    // --- FIX-3: Double-nested shell wrapping ---
1332
1333    #[test]
1334    fn double_nested_bash_c_blocked() {
1335        let v = dcv();
1336        let result = v.verify(
1337            "bash",
1338            &json!({"command": "bash -c \"bash -c 'rm -rf /'\""}),
1339        );
1340        assert!(matches!(result, VerificationResult::Block { .. }));
1341    }
1342
1343    #[test]
1344    fn env_prefix_stripping_blocked() {
1345        let v = dcv();
1346        let result = v.verify(
1347            "bash",
1348            &json!({"command": "env FOO=bar bash -c 'rm -rf /'"}),
1349        );
1350        assert!(matches!(result, VerificationResult::Block { .. }));
1351    }
1352
1353    #[test]
1354    fn exec_prefix_stripping_blocked() {
1355        let v = dcv();
1356        let result = v.verify("bash", &json!({"command": "exec bash -c 'rm -rf /'"}));
1357        assert!(matches!(result, VerificationResult::Block { .. }));
1358    }
1359
1360    // --- FIX-4: SSRF host extraction (not substring match) ---
1361
1362    #[test]
1363    fn ssrf_not_triggered_for_embedded_localhost_in_query_param() {
1364        // `evil.com/?r=http://localhost` — host is `evil.com`, not localhost.
1365        let v = ipv();
1366        let result = v.verify(
1367            "http_get",
1368            &json!({"url": "http://evil.com/?r=http://localhost"}),
1369        );
1370        // Should NOT warn — the actual request host is evil.com, not localhost.
1371        assert_eq!(result, VerificationResult::Allow);
1372    }
1373
1374    #[test]
1375    fn ssrf_triggered_for_bare_localhost_no_port() {
1376        // FIX-7: `http://localhost` with no trailing slash or port must warn.
1377        let v = ipv();
1378        let result = v.verify("http_get", &json!({"url": "http://localhost"}));
1379        assert!(matches!(result, VerificationResult::Warn { .. }));
1380    }
1381
1382    #[test]
1383    fn ssrf_triggered_for_localhost_with_path() {
1384        let v = ipv();
1385        let result = v.verify("http_get", &json!({"url": "http://localhost/api/v1"}));
1386        assert!(matches!(result, VerificationResult::Warn { .. }));
1387    }
1388
1389    // --- Verifier chain: first Block wins, Warn continues ---
1390
1391    #[test]
1392    fn chain_first_block_wins() {
1393        let dcv = DestructiveCommandVerifier::new(&DestructiveVerifierConfig::default());
1394        let ipv = InjectionPatternVerifier::new(&InjectionVerifierConfig::default());
1395        let verifiers: Vec<Box<dyn PreExecutionVerifier>> = vec![Box::new(dcv), Box::new(ipv)];
1396
1397        let args = json!({"command": "rm -rf /"});
1398        let mut result = VerificationResult::Allow;
1399        for v in &verifiers {
1400            result = v.verify("bash", &args);
1401            if matches!(result, VerificationResult::Block { .. }) {
1402                break;
1403            }
1404        }
1405        assert!(matches!(result, VerificationResult::Block { .. }));
1406    }
1407
1408    #[test]
1409    fn chain_warn_continues() {
1410        let dcv = DestructiveCommandVerifier::new(&DestructiveVerifierConfig::default());
1411        let ipv = InjectionPatternVerifier::new(&InjectionVerifierConfig::default());
1412        let verifiers: Vec<Box<dyn PreExecutionVerifier>> = vec![Box::new(dcv), Box::new(ipv)];
1413
1414        // localhost URL in `url` field: dcv allows, ipv warns, chain does NOT block.
1415        let args = json!({"url": "http://localhost:8080/api"});
1416        let mut got_warn = false;
1417        let mut got_block = false;
1418        for v in &verifiers {
1419            match v.verify("http_get", &args) {
1420                VerificationResult::Block { .. } => {
1421                    got_block = true;
1422                    break;
1423                }
1424                VerificationResult::Warn { .. } => {
1425                    got_warn = true;
1426                }
1427                VerificationResult::Allow => {}
1428            }
1429        }
1430        assert!(got_warn);
1431        assert!(!got_block);
1432    }
1433
1434    // --- UrlGroundingVerifier ---
1435
1436    fn ugv(urls: &[&str]) -> UrlGroundingVerifier {
1437        let set: HashSet<String> = urls.iter().map(|s| s.to_lowercase()).collect();
1438        UrlGroundingVerifier::new(
1439            &UrlGroundingVerifierConfig::default(),
1440            Arc::new(RwLock::new(set)),
1441        )
1442    }
1443
1444    #[test]
1445    fn url_grounding_allows_user_provided_url() {
1446        let v = ugv(&["https://docs.anthropic.com/models"]);
1447        assert_eq!(
1448            v.verify(
1449                "fetch",
1450                &json!({"url": "https://docs.anthropic.com/models"})
1451            ),
1452            VerificationResult::Allow
1453        );
1454    }
1455
1456    #[test]
1457    fn url_grounding_blocks_hallucinated_url() {
1458        let v = ugv(&["https://example.com/page"]);
1459        let result = v.verify(
1460            "fetch",
1461            &json!({"url": "https://api.anthropic.ai/v1/models"}),
1462        );
1463        assert!(matches!(result, VerificationResult::Block { .. }));
1464    }
1465
1466    #[test]
1467    fn url_grounding_blocks_when_no_user_urls_at_all() {
1468        let v = ugv(&[]);
1469        let result = v.verify(
1470            "fetch",
1471            &json!({"url": "https://api.anthropic.ai/v1/models"}),
1472        );
1473        assert!(matches!(result, VerificationResult::Block { .. }));
1474    }
1475
1476    #[test]
1477    fn url_grounding_allows_non_guarded_tool() {
1478        let v = ugv(&[]);
1479        assert_eq!(
1480            v.verify("read_file", &json!({"path": "/etc/hosts"})),
1481            VerificationResult::Allow
1482        );
1483    }
1484
1485    #[test]
1486    fn url_grounding_guards_fetch_suffix_tool() {
1487        let v = ugv(&[]);
1488        let result = v.verify("http_fetch", &json!({"url": "https://evil.com/"}));
1489        assert!(matches!(result, VerificationResult::Block { .. }));
1490    }
1491
1492    #[test]
1493    fn url_grounding_allows_web_scrape_with_provided_url() {
1494        let v = ugv(&["https://rust-lang.org/"]);
1495        assert_eq!(
1496            v.verify(
1497                "web_scrape",
1498                &json!({"url": "https://rust-lang.org/", "select": "h1"})
1499            ),
1500            VerificationResult::Allow
1501        );
1502    }
1503
1504    #[test]
1505    fn url_grounding_allows_prefix_match() {
1506        // User provided https://docs.rs/ — agent fetches a sub-path.
1507        let v = ugv(&["https://docs.rs/"]);
1508        assert_eq!(
1509            v.verify(
1510                "fetch",
1511                &json!({"url": "https://docs.rs/tokio/latest/tokio/"})
1512            ),
1513            VerificationResult::Allow
1514        );
1515    }
1516
1517    // --- Regression: #2191 — fetch URL hallucination ---
1518
1519    /// REG-2191-1: exact reproduction of the bug scenario.
1520    /// Agent asks "do you know Anthropic?" (no URL provided) and halluccinates
1521    /// `https://api.anthropic.ai/v1/models`. With an empty `user_provided_urls` set
1522    /// the fetch must be blocked.
1523    #[test]
1524    fn reg_2191_hallucinated_api_endpoint_blocked_with_empty_session() {
1525        // Simulate: user never sent any URL in the conversation.
1526        let v = ugv(&[]);
1527        let result = v.verify(
1528            "fetch",
1529            &json!({"url": "https://api.anthropic.ai/v1/models"}),
1530        );
1531        assert!(
1532            matches!(result, VerificationResult::Block { .. }),
1533            "fetch must be blocked when no user URL was provided — this is the #2191 regression"
1534        );
1535    }
1536
1537    /// REG-2191-2: passthrough — user explicitly pasted the URL, fetch must proceed.
1538    #[test]
1539    fn reg_2191_user_provided_url_allows_fetch() {
1540        let v = ugv(&["https://api.anthropic.com/v1/models"]);
1541        assert_eq!(
1542            v.verify(
1543                "fetch",
1544                &json!({"url": "https://api.anthropic.com/v1/models"}),
1545            ),
1546            VerificationResult::Allow,
1547            "fetch must be allowed when the URL was explicitly provided by the user"
1548        );
1549    }
1550
1551    /// REG-2191-3: `web_scrape` variant — same rejection for `web_scrape` tool.
1552    #[test]
1553    fn reg_2191_web_scrape_hallucinated_url_blocked() {
1554        let v = ugv(&[]);
1555        let result = v.verify(
1556            "web_scrape",
1557            &json!({"url": "https://api.anthropic.ai/v1/models", "select": "body"}),
1558        );
1559        assert!(
1560            matches!(result, VerificationResult::Block { .. }),
1561            "web_scrape must be blocked for hallucinated URL with empty user_provided_urls"
1562        );
1563    }
1564
1565    /// REG-2191-4: URL present only in an imagined system/assistant message context
1566    /// is NOT in `user_provided_urls` (the agent only populates from user messages).
1567    /// The verifier itself cannot distinguish message roles — it only sees the set
1568    /// populated by the agent. This test confirms: an empty set always blocks.
1569    #[test]
1570    fn reg_2191_empty_url_set_always_blocks_fetch() {
1571        // Whether the URL came from a system/assistant message or was never seen —
1572        // if user_provided_urls is empty, fetch must be blocked.
1573        let v = ugv(&[]);
1574        let result = v.verify(
1575            "fetch",
1576            &json!({"url": "https://docs.anthropic.com/something"}),
1577        );
1578        assert!(matches!(result, VerificationResult::Block { .. }));
1579    }
1580
1581    /// REG-2191-5: URL matching is case-insensitive — user pastes mixed-case URL.
1582    #[test]
1583    fn reg_2191_case_insensitive_url_match_allows_fetch() {
1584        // user_provided_urls stores lowercase; verify that the fetched URL with
1585        // different casing still matches.
1586        let v = ugv(&["https://Docs.Anthropic.COM/models"]);
1587        assert_eq!(
1588            v.verify(
1589                "fetch",
1590                &json!({"url": "https://docs.anthropic.com/models/detail"}),
1591            ),
1592            VerificationResult::Allow,
1593            "URL matching must be case-insensitive"
1594        );
1595    }
1596
1597    /// REG-2191-6: tool name ending in `_fetch` is auto-guarded regardless of config.
1598    /// An MCP-registered `anthropic_fetch` tool must not bypass the gate.
1599    #[test]
1600    fn reg_2191_mcp_fetch_suffix_tool_blocked_with_empty_session() {
1601        let v = ugv(&[]);
1602        let result = v.verify(
1603            "anthropic_fetch",
1604            &json!({"url": "https://api.anthropic.ai/v1/models"}),
1605        );
1606        assert!(
1607            matches!(result, VerificationResult::Block { .. }),
1608            "MCP tools ending in _fetch must be guarded even if not in guarded_tools list"
1609        );
1610    }
1611
1612    /// REG-2191-7: reverse prefix — user provided a specific URL, agent fetches
1613    /// the root. This is the "reverse prefix" case: `user_url` `starts_with` `fetch_url`.
1614    #[test]
1615    fn reg_2191_reverse_prefix_match_allows_fetch() {
1616        // User provided a deep URL; agent wants to fetch the root.
1617        // Allowed: user_url.starts_with(fetch_url).
1618        let v = ugv(&["https://docs.rs/tokio/latest/tokio/index.html"]);
1619        assert_eq!(
1620            v.verify("fetch", &json!({"url": "https://docs.rs/"})),
1621            VerificationResult::Allow,
1622            "reverse prefix: fetched URL is a prefix of user-provided URL — should be allowed"
1623        );
1624    }
1625
1626    /// REG-2191-8: completely different domain with same path prefix must be blocked.
1627    #[test]
1628    fn reg_2191_different_domain_blocked() {
1629        // User provided docs.rs, agent wants to fetch evil.com/docs.rs path — must block.
1630        let v = ugv(&["https://docs.rs/"]);
1631        let result = v.verify("fetch", &json!({"url": "https://evil.com/docs.rs/exfil"}));
1632        assert!(
1633            matches!(result, VerificationResult::Block { .. }),
1634            "different domain must not be allowed even if path looks similar"
1635        );
1636    }
1637
1638    /// REG-2191-9: args without a `url` field — verifier must not block (Allow).
1639    #[test]
1640    fn reg_2191_missing_url_field_allows_fetch() {
1641        // Some fetch-like tools may call with different arg names.
1642        // Verifier only checks the `url` field; missing field → Allow.
1643        let v = ugv(&[]);
1644        assert_eq!(
1645            v.verify(
1646                "fetch",
1647                &json!({"endpoint": "https://api.anthropic.ai/v1/models"})
1648            ),
1649            VerificationResult::Allow,
1650            "missing url field must not trigger blocking — only explicit url field is checked"
1651        );
1652    }
1653
1654    /// REG-2191-10: verifier disabled via config — all fetch calls pass through.
1655    #[test]
1656    fn reg_2191_disabled_verifier_allows_all() {
1657        let config = UrlGroundingVerifierConfig {
1658            enabled: false,
1659            guarded_tools: default_guarded_tools(),
1660        };
1661        // Note: the enabled flag is checked by the pipeline, not inside verify().
1662        // The pipeline skips disabled verifiers. This test documents that the struct
1663        // can be constructed with enabled=false (config round-trip).
1664        let set: HashSet<String> = HashSet::new();
1665        let v = UrlGroundingVerifier::new(&config, Arc::new(RwLock::new(set)));
1666        // verify() itself doesn't check enabled — the pipeline is responsible.
1667        // When called directly it will still block (the field has no effect here).
1668        // This is an API documentation test, not a behaviour test.
1669        let _ = v.verify("fetch", &json!({"url": "https://example.com/"}));
1670        // No assertion: just verifies the struct can be built with enabled=false.
1671    }
1672
1673    // --- FirewallVerifier ---
1674
1675    fn fwv() -> FirewallVerifier {
1676        FirewallVerifier::new(&FirewallVerifierConfig::default())
1677    }
1678
1679    #[test]
1680    fn firewall_allows_normal_path() {
1681        let v = fwv();
1682        assert_eq!(
1683            v.verify("shell", &json!({"command": "ls /tmp/build"})),
1684            VerificationResult::Allow
1685        );
1686    }
1687
1688    #[test]
1689    fn firewall_blocks_path_traversal() {
1690        let v = fwv();
1691        let result = v.verify("read", &json!({"file_path": "../../etc/passwd"}));
1692        assert!(
1693            matches!(result, VerificationResult::Block { .. }),
1694            "path traversal must be blocked"
1695        );
1696    }
1697
1698    #[test]
1699    fn firewall_blocks_etc_passwd() {
1700        let v = fwv();
1701        let result = v.verify("read", &json!({"file_path": "/etc/passwd"}));
1702        assert!(
1703            matches!(result, VerificationResult::Block { .. }),
1704            "/etc/passwd must be blocked"
1705        );
1706    }
1707
1708    #[test]
1709    fn firewall_blocks_ssh_key() {
1710        let v = fwv();
1711        let result = v.verify("read", &json!({"file_path": "~/.ssh/id_rsa"}));
1712        assert!(
1713            matches!(result, VerificationResult::Block { .. }),
1714            "SSH key path must be blocked"
1715        );
1716    }
1717
1718    #[test]
1719    fn firewall_blocks_aws_env_var() {
1720        let v = fwv();
1721        let result = v.verify("shell", &json!({"command": "echo $AWS_SECRET_ACCESS_KEY"}));
1722        assert!(
1723            matches!(result, VerificationResult::Block { .. }),
1724            "AWS env var exfiltration must be blocked"
1725        );
1726    }
1727
1728    #[test]
1729    fn firewall_blocks_zeph_env_var() {
1730        let v = fwv();
1731        let result = v.verify("shell", &json!({"command": "cat ${ZEPH_CLAUDE_API_KEY}"}));
1732        assert!(
1733            matches!(result, VerificationResult::Block { .. }),
1734            "ZEPH env var exfiltration must be blocked"
1735        );
1736    }
1737
1738    #[test]
1739    fn firewall_exempt_tool_bypasses_check() {
1740        let cfg = FirewallVerifierConfig {
1741            enabled: true,
1742            blocked_paths: vec![],
1743            blocked_env_vars: vec![],
1744            exempt_tools: vec!["read".to_string()],
1745        };
1746        let v = FirewallVerifier::new(&cfg);
1747        // /etc/passwd would normally be blocked but tool is exempt.
1748        assert_eq!(
1749            v.verify("read", &json!({"file_path": "/etc/passwd"})),
1750            VerificationResult::Allow
1751        );
1752    }
1753
1754    #[test]
1755    fn firewall_custom_blocked_path() {
1756        let cfg = FirewallVerifierConfig {
1757            enabled: true,
1758            blocked_paths: vec!["/data/secrets/*".to_string()],
1759            blocked_env_vars: vec![],
1760            exempt_tools: vec![],
1761        };
1762        let v = FirewallVerifier::new(&cfg);
1763        let result = v.verify("read", &json!({"file_path": "/data/secrets/master.key"}));
1764        assert!(
1765            matches!(result, VerificationResult::Block { .. }),
1766            "custom blocked path must be blocked"
1767        );
1768    }
1769
1770    #[test]
1771    fn firewall_custom_blocked_env_var() {
1772        let cfg = FirewallVerifierConfig {
1773            enabled: true,
1774            blocked_paths: vec![],
1775            blocked_env_vars: vec!["MY_SECRET".to_string()],
1776            exempt_tools: vec![],
1777        };
1778        let v = FirewallVerifier::new(&cfg);
1779        let result = v.verify("shell", &json!({"command": "echo $MY_SECRET"}));
1780        assert!(
1781            matches!(result, VerificationResult::Block { .. }),
1782            "custom blocked env var must be blocked"
1783        );
1784    }
1785
1786    #[test]
1787    fn firewall_invalid_glob_is_skipped() {
1788        // Invalid glob should not panic — logged and skipped at construction.
1789        let cfg = FirewallVerifierConfig {
1790            enabled: true,
1791            blocked_paths: vec!["[invalid-glob".to_string(), "/valid/path/*".to_string()],
1792            blocked_env_vars: vec![],
1793            exempt_tools: vec![],
1794        };
1795        let v = FirewallVerifier::new(&cfg);
1796        // Valid pattern still works
1797        let result = v.verify("read", &json!({"path": "/valid/path/file.txt"}));
1798        assert!(matches!(result, VerificationResult::Block { .. }));
1799    }
1800
1801    #[test]
1802    fn firewall_config_default_deserialization() {
1803        let cfg: FirewallVerifierConfig = toml::from_str("").unwrap();
1804        assert!(cfg.enabled);
1805        assert!(cfg.blocked_paths.is_empty());
1806        assert!(cfg.blocked_env_vars.is_empty());
1807        assert!(cfg.exempt_tools.is_empty());
1808    }
1809}
zeph_tools/verifier.rs

zeph_tools/
verifier.rs