zeph_tools/
verifier.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Pre-execution verification for tool calls.
5//!
6//! Based on the `TrustBench` pattern (arXiv:2603.09157): intercept tool calls before
7//! execution to block or warn on destructive or injection patterns.
8//!
9//! ## Blocklist separation
10//!
11//! `DESTRUCTIVE_PATTERNS` (this module) is intentionally separate from
12//! `DEFAULT_BLOCKED_COMMANDS` in `shell.rs`. The two lists serve different purposes:
13//!
14//! - `DEFAULT_BLOCKED_COMMANDS` — shell safety net: prevents the *shell executor* from
15//!   running network tools (`curl`, `wget`, `nc`) and a few destructive commands.
16//!   It is applied at tool-execution time by `ShellExecutor`.
17//!
18//! - `DESTRUCTIVE_PATTERNS` — pre-execution guard: targets filesystem/system destruction
19//!   commands (disk formats, wipefs, fork bombs, recursive permission changes).
20//!   It runs *before* dispatch, in the LLM-call hot path, and must not be conflated
21//!   with the shell safety net to avoid accidental allow-listing via config drift.
22//!
23//! Overlap (3 entries: `rm -rf /`, `mkfs`, `dd if=`) is intentional — belt-and-suspenders.
24
25use std::collections::HashSet;
26use std::sync::{Arc, LazyLock};
27
28use parking_lot::RwLock;
29
30use regex::Regex;
31use unicode_normalization::UnicodeNormalization as _;
32
33use zeph_config::tools::{
34    DestructiveVerifierConfig, FirewallVerifierConfig, InjectionVerifierConfig,
35    UrlGroundingVerifierConfig,
36};
37
38/// Result of a pre-execution verification check.
39#[must_use]
40#[derive(Debug, Clone, PartialEq, Eq)]
41pub enum VerificationResult {
42    /// Tool call is safe to proceed.
43    Allow,
44    /// Tool call must be blocked. Executor returns an error to the LLM.
45    Block { reason: String },
46    /// Tool call proceeds but a warning is logged and tracked in metrics (metrics-only,
47    /// not visible to the LLM or user beyond the TUI security panel).
48    Warn { message: String },
49}
50
51/// Pre-execution verification trait. Implementations intercept tool calls
52/// before the executor runs them. Based on `TrustBench` pattern (arXiv:2603.09157).
53///
54/// Sync by design: verifiers inspect arguments only — no I/O needed.
55/// Object-safe: uses `&self` and returns a concrete enum.
56pub trait PreExecutionVerifier: Send + Sync + std::fmt::Debug {
57    /// Verify whether a tool call should proceed.
58    fn verify(&self, tool_name: &str, args: &serde_json::Value) -> VerificationResult;
59
60    /// Human-readable name for logging and TUI display.
61    fn name(&self) -> &'static str;
62}
63
64// ---------------------------------------------------------------------------
65// DestructiveCommandVerifier
66// ---------------------------------------------------------------------------
67
68/// Destructive command patterns for `DestructiveCommandVerifier`.
69///
70/// Intentionally separate from `DEFAULT_BLOCKED_COMMANDS` in `shell.rs` — see module
71/// docs for the semantic distinction between the two lists.
72static DESTRUCTIVE_PATTERNS: &[&str] = &[
73    "rm -rf /",
74    "rm -rf ~",
75    "rm -r /",
76    "dd if=",
77    "mkfs",
78    "fdisk",
79    "shred",
80    "wipefs",
81    ":(){ :|:& };:",
82    ":(){:|:&};:",
83    "chmod -r 777 /",
84    "chown -r",
85];
86
87/// Verifier that blocks destructive shell commands (e.g., `rm -rf /`, `dd`, `mkfs`)
88/// before the shell tool executes them.
89///
90/// Applies to any tool whose name is in the configured `shell_tools` set (default:
91/// `["bash", "shell", "terminal"]`). For commands targeting a specific path, execution
92/// is allowed when the path starts with one of the configured `allowed_paths`. When
93/// `allowed_paths` is empty (the default), **all** matching destructive commands are blocked.
94#[derive(Debug)]
95pub struct DestructiveCommandVerifier {
96    shell_tools: Vec<String>,
97    allowed_paths: Vec<String>,
98    extra_patterns: Vec<String>,
99}
100
101impl DestructiveCommandVerifier {
102    #[must_use]
103    pub fn new(config: &DestructiveVerifierConfig) -> Self {
104        Self {
105            shell_tools: config
106                .shell_tools
107                .iter()
108                .map(|s| s.to_lowercase())
109                .collect(),
110            allowed_paths: config
111                .allowed_paths
112                .iter()
113                .map(|s| s.to_lowercase())
114                .collect(),
115            extra_patterns: config
116                .extra_patterns
117                .iter()
118                .map(|s| s.to_lowercase())
119                .collect(),
120        }
121    }
122
123    fn is_shell_tool(&self, tool_name: &str) -> bool {
124        let lower = tool_name.to_lowercase();
125        self.shell_tools.iter().any(|t| t == &lower)
126    }
127
128    /// Extract the effective command string from `args`.
129    ///
130    /// Supports:
131    /// - `{"command": "rm -rf /"}` (string)
132    /// - `{"command": ["rm", "-rf", "/"]}` (array — joined with spaces)
133    /// - `{"command": "bash -c 'rm -rf /'"}` (shell `-c` unwrapping, looped up to 8 levels)
134    /// - `env VAR=val bash -c '...'` and `exec bash -c '...'` prefix stripping
135    ///
136    /// NFKC-normalizes the result to defeat Unicode homoglyph bypasses.
137    fn extract_command(args: &serde_json::Value) -> Option<String> {
138        let raw = match args.get("command") {
139            Some(serde_json::Value::String(s)) => s.clone(),
140            Some(serde_json::Value::Array(arr)) => arr
141                .iter()
142                .filter_map(|v| v.as_str())
143                .collect::<Vec<_>>()
144                .join(" "),
145            _ => return None,
146        };
147        // NFKC-normalize + lowercase to defeat Unicode homoglyph and case bypasses.
148        let mut current: String = raw.nfkc().collect::<String>().to_lowercase();
149        // Loop: strip shell wrapper prefixes up to 8 levels deep.
150        // Handles double-nested: `bash -c "bash -c 'rm -rf /'"`.
151        for _ in 0..8 {
152            let trimmed = current.trim().to_owned();
153            // Strip `env VAR=value ... CMD` prefix (one or more VAR=value tokens).
154            let after_env = Self::strip_env_prefix(&trimmed);
155            // Strip `exec ` prefix.
156            let after_exec = after_env.strip_prefix("exec ").map_or(after_env, str::trim);
157            // Strip interpreter wrapper: `bash -c '...'` / `sh -c '...'` / `zsh -c '...'`.
158            let mut unwrapped = false;
159            for interp in &["bash -c ", "sh -c ", "zsh -c "] {
160                if let Some(rest) = after_exec.strip_prefix(interp) {
161                    let script = rest.trim().trim_matches(|c: char| c == '\'' || c == '"');
162                    current.clone_from(&script.to_owned());
163                    unwrapped = true;
164                    break;
165                }
166            }
167            if !unwrapped {
168                return Some(after_exec.to_owned());
169            }
170        }
171        Some(current)
172    }
173
174    /// Strip leading `env VAR=value` tokens from a command string.
175    /// Returns the remainder after all `KEY=VALUE` pairs are consumed.
176    fn strip_env_prefix(cmd: &str) -> &str {
177        let mut rest = cmd;
178        // `env` keyword is optional; strip it if present.
179        if let Some(after_env) = rest.strip_prefix("env ") {
180            rest = after_env.trim_start();
181        }
182        // Consume `KEY=VALUE` tokens.
183        loop {
184            // A VAR=value token: identifier chars + '=' + non-space chars.
185            let mut chars = rest.chars();
186            let key_end = chars
187                .by_ref()
188                .take_while(|c| c.is_alphanumeric() || *c == '_')
189                .count();
190            if key_end == 0 {
191                break;
192            }
193            let remainder = &rest[key_end..];
194            if let Some(after_eq) = remainder.strip_prefix('=') {
195                // Consume the value (up to the first space).
196                let val_end = after_eq.find(' ').unwrap_or(after_eq.len());
197                rest = after_eq[val_end..].trim_start();
198            } else {
199                break;
200            }
201        }
202        rest
203    }
204
205    /// Returns `true` if `command` targets a path that is covered by `allowed_paths`.
206    ///
207    /// Uses lexical normalization (resolves `..` and `.` without filesystem access)
208    /// so that `/tmp/build/../../etc` is correctly resolved to `/etc` before comparison,
209    /// defeating path traversal bypasses like `/tmp/build/../../etc/passwd`.
210    fn is_allowed_path(&self, command: &str) -> bool {
211        if self.allowed_paths.is_empty() {
212            return false;
213        }
214        let tokens: Vec<&str> = command.split_whitespace().collect();
215        for token in &tokens {
216            let t = token.trim_matches(|c| c == '\'' || c == '"');
217            if t.starts_with('/') || t.starts_with('~') || t.starts_with('.') {
218                let normalized = Self::lexical_normalize(std::path::Path::new(t));
219                // Normalize separators to '/' for cross-platform comparison so that
220                // Unix-style allowed_paths (e.g. "/tmp/build") match on Windows too.
221                let n_lower = normalized
222                    .to_string_lossy()
223                    .replace('\\', "/")
224                    .to_lowercase();
225                if self
226                    .allowed_paths
227                    .iter()
228                    .any(|p| n_lower.starts_with(p.replace('\\', "/").to_lowercase().as_str()))
229                {
230                    return true;
231                }
232            }
233        }
234        false
235    }
236
237    /// Lexically normalize a path by resolving `.` and `..` components without
238    /// hitting the filesystem. Does not require the path to exist.
239    fn lexical_normalize(p: &std::path::Path) -> std::path::PathBuf {
240        let mut out = std::path::PathBuf::new();
241        for component in p.components() {
242            match component {
243                std::path::Component::ParentDir => {
244                    out.pop();
245                }
246                std::path::Component::CurDir => {}
247                other => out.push(other),
248            }
249        }
250        out
251    }
252
253    fn check_patterns(command: &str) -> Option<&'static str> {
254        DESTRUCTIVE_PATTERNS
255            .iter()
256            .find(|&pat| command.contains(pat))
257            .copied()
258    }
259
260    fn check_extra_patterns(&self, command: &str) -> Option<String> {
261        self.extra_patterns
262            .iter()
263            .find(|pat| command.contains(pat.as_str()))
264            .cloned()
265    }
266}
267
268impl PreExecutionVerifier for DestructiveCommandVerifier {
269    fn name(&self) -> &'static str {
270        "DestructiveCommandVerifier"
271    }
272
273    fn verify(&self, tool_name: &str, args: &serde_json::Value) -> VerificationResult {
274        if !self.is_shell_tool(tool_name) {
275            return VerificationResult::Allow;
276        }
277
278        let Some(command) = Self::extract_command(args) else {
279            return VerificationResult::Allow;
280        };
281
282        if let Some(pat) = Self::check_patterns(&command) {
283            if self.is_allowed_path(&command) {
284                return VerificationResult::Allow;
285            }
286            return VerificationResult::Block {
287                reason: format!("[{}] destructive pattern '{}' detected", self.name(), pat),
288            };
289        }
290
291        if let Some(pat) = self.check_extra_patterns(&command) {
292            if self.is_allowed_path(&command) {
293                return VerificationResult::Allow;
294            }
295            return VerificationResult::Block {
296                reason: format!(
297                    "[{}] extra destructive pattern '{}' detected",
298                    self.name(),
299                    pat
300                ),
301            };
302        }
303
304        VerificationResult::Allow
305    }
306}
307
308// ---------------------------------------------------------------------------
309// InjectionPatternVerifier
310// ---------------------------------------------------------------------------
311
312/// High-confidence injection block patterns applied to string field values in tool args.
313///
314/// These require *structural* patterns, not just keywords — e.g., `UNION SELECT` is
315/// blocked but a plain mention of "SELECT" is not. This avoids false positives for
316/// `memory_search` queries discussing SQL or coding assistants writing SQL examples.
317static INJECTION_BLOCK_PATTERNS: LazyLock<Vec<Regex>> = LazyLock::new(|| {
318    [
319        // SQL injection structural patterns
320        r"(?i)'\s*OR\s*'1'\s*=\s*'1",
321        r"(?i)'\s*OR\s*1\s*=\s*1",
322        r"(?i);\s*DROP\s+TABLE",
323        r"(?i)UNION\s+SELECT",
324        r"(?i)'\s*;\s*SELECT",
325        // Command injection via shell metacharacters with dangerous commands
326        r";\s*rm\s+",
327        r"\|\s*rm\s+",
328        r"&&\s*rm\s+",
329        r";\s*curl\s+",
330        r"\|\s*curl\s+",
331        r"&&\s*curl\s+",
332        r";\s*wget\s+",
333        // Path traversal to sensitive system files
334        r"\.\./\.\./\.\./etc/passwd",
335        r"\.\./\.\./\.\./etc/shadow",
336        r"\.\./\.\./\.\./windows/",
337        r"\.\.[/\\]\.\.[/\\]\.\.[/\\]",
338    ]
339    .iter()
340    .map(|s| Regex::new(s).expect("static pattern must compile"))
341    .collect()
342});
343
344/// SSRF host patterns — matched against the *extracted host* (not the full URL string).
345/// This prevents bypasses like `http://evil.com/?r=http://localhost` where the SSRF
346/// target appears only in a query parameter, not as the actual request host.
347/// Bare hostnames (no port/path) are included alongside `host:port` variants.
348static SSRF_HOST_PATTERNS: LazyLock<Vec<Regex>> = LazyLock::new(|| {
349    [
350        // localhost — with or without port
351        r"^localhost$",
352        r"^localhost:",
353        // IPv4 loopback
354        r"^127\.0\.0\.1$",
355        r"^127\.0\.0\.1:",
356        // IPv6 loopback
357        r"^\[::1\]$",
358        r"^\[::1\]:",
359        // AWS metadata service
360        r"^169\.254\.169\.254$",
361        r"^169\.254\.169\.254:",
362        // RFC-1918 private ranges
363        r"^10\.\d+\.\d+\.\d+$",
364        r"^10\.\d+\.\d+\.\d+:",
365        r"^172\.(1[6-9]|2\d|3[01])\.\d+\.\d+$",
366        r"^172\.(1[6-9]|2\d|3[01])\.\d+\.\d+:",
367        r"^192\.168\.\d+\.\d+$",
368        r"^192\.168\.\d+\.\d+:",
369    ]
370    .iter()
371    .map(|s| Regex::new(s).expect("static pattern must compile"))
372    .collect()
373});
374
375/// Extract the host (and optional port) from a URL string.
376/// Returns the portion between `://` and the next `/`, `?`, `#`, or end of string.
377/// If the URL has no scheme, returns `None`.
378fn extract_url_host(url: &str) -> Option<&str> {
379    let after_scheme = url.split_once("://")?.1;
380    let host_end = after_scheme
381        .find(['/', '?', '#'])
382        .unwrap_or(after_scheme.len());
383    Some(&after_scheme[..host_end])
384}
385
386/// Field names that suggest URL/endpoint content — SSRF patterns are applied here.
387static URL_FIELD_NAMES: &[&str] = &["url", "endpoint", "uri", "href", "src", "host", "base_url"];
388
389/// Field names that are known to carry user-provided text queries — SQL injection and
390/// command injection patterns are skipped for these fields to avoid false positives.
391/// Examples: `memory_search(query=...)`, `web_search(query=...)`.
392static SAFE_QUERY_FIELDS: &[&str] = &["query", "q", "search", "text", "message", "content"];
393
394/// Verifier that blocks tool arguments containing SQL injection, command injection,
395/// or path traversal patterns. Applies to ALL tools using field-aware matching.
396///
397/// ## Field-aware matching
398///
399/// Rather than serialising all args to a flat string (which causes false positives),
400/// this verifier iterates over each string-valued field and applies pattern categories
401/// based on field semantics:
402///
403/// - `SAFE_QUERY_FIELDS` (`query`, `q`, `search`, `text`, …): injection patterns are
404///   **skipped** — these fields contain user-provided text and generate too many false
405///   positives for SQL/command discussions in chat.
406/// - `URL_FIELD_NAMES` (`url`, `endpoint`, `uri`, …): SSRF patterns are applied.
407/// - All other string fields: injection + path traversal patterns are applied.
408///
409/// ## Warn semantics
410///
411/// `VerificationResult::Warn` is metrics-only — the tool call proceeds, a WARN log
412/// entry is emitted, and the TUI security panel counter increments. The LLM does not
413/// see the warning in its tool result.
414#[derive(Debug)]
415pub struct InjectionPatternVerifier {
416    extra_patterns: Vec<Regex>,
417    allowlisted_urls: Vec<String>,
418}
419
420impl InjectionPatternVerifier {
421    #[must_use]
422    pub fn new(config: &InjectionVerifierConfig) -> Self {
423        let extra_patterns = config
424            .extra_patterns
425            .iter()
426            .filter_map(|s| match Regex::new(s) {
427                Ok(re) => Some(re),
428                Err(e) => {
429                    tracing::warn!(
430                        pattern = %s,
431                        error = %e,
432                        "InjectionPatternVerifier: invalid extra_pattern, skipping"
433                    );
434                    None
435                }
436            })
437            .collect();
438
439        Self {
440            extra_patterns,
441            allowlisted_urls: config
442                .allowlisted_urls
443                .iter()
444                .map(|s| s.to_lowercase())
445                .collect(),
446        }
447    }
448
449    fn is_allowlisted(&self, text: &str) -> bool {
450        let lower = text.to_lowercase();
451        self.allowlisted_urls
452            .iter()
453            .any(|u| lower.contains(u.as_str()))
454    }
455
456    fn is_url_field(field: &str) -> bool {
457        let lower = field.to_lowercase();
458        URL_FIELD_NAMES.iter().any(|&f| f == lower)
459    }
460
461    fn is_safe_query_field(field: &str) -> bool {
462        let lower = field.to_lowercase();
463        SAFE_QUERY_FIELDS.iter().any(|&f| f == lower)
464    }
465
466    /// Check a single string value from a named field.
467    fn check_field_value(&self, field: &str, value: &str) -> VerificationResult {
468        let is_url = Self::is_url_field(field);
469        let is_safe_query = Self::is_safe_query_field(field);
470
471        // Injection + path traversal: skip safe query fields (user text), apply elsewhere.
472        if !is_safe_query {
473            for pat in INJECTION_BLOCK_PATTERNS.iter() {
474                if pat.is_match(value) {
475                    return VerificationResult::Block {
476                        reason: format!(
477                            "[{}] injection pattern detected in field '{}': {}",
478                            "InjectionPatternVerifier",
479                            field,
480                            pat.as_str()
481                        ),
482                    };
483                }
484            }
485            for pat in &self.extra_patterns {
486                if pat.is_match(value) {
487                    return VerificationResult::Block {
488                        reason: format!(
489                            "[{}] extra injection pattern detected in field '{}': {}",
490                            "InjectionPatternVerifier",
491                            field,
492                            pat.as_str()
493                        ),
494                    };
495                }
496            }
497        }
498
499        // SSRF: apply only to URL-like fields.
500        // Extract the host first so that SSRF targets embedded in query parameters
501        // (e.g. `http://evil.com/?r=http://localhost`) are not falsely matched.
502        if is_url && let Some(host) = extract_url_host(value) {
503            for pat in SSRF_HOST_PATTERNS.iter() {
504                if pat.is_match(host) {
505                    if self.is_allowlisted(value) {
506                        return VerificationResult::Allow;
507                    }
508                    return VerificationResult::Warn {
509                        message: format!(
510                            "[{}] possible SSRF in field '{}': host '{}' matches pattern (not blocked)",
511                            "InjectionPatternVerifier", field, host,
512                        ),
513                    };
514                }
515            }
516        }
517
518        VerificationResult::Allow
519    }
520
521    /// Walk all string leaf values in a JSON object, collecting field names for context.
522    fn check_object(&self, obj: &serde_json::Map<String, serde_json::Value>) -> VerificationResult {
523        for (key, val) in obj {
524            let result = self.check_value(key, val);
525            if !matches!(result, VerificationResult::Allow) {
526                return result;
527            }
528        }
529        VerificationResult::Allow
530    }
531
532    fn check_value(&self, field: &str, val: &serde_json::Value) -> VerificationResult {
533        match val {
534            serde_json::Value::String(s) => self.check_field_value(field, s),
535            serde_json::Value::Array(arr) => {
536                for item in arr {
537                    let r = self.check_value(field, item);
538                    if !matches!(r, VerificationResult::Allow) {
539                        return r;
540                    }
541                }
542                VerificationResult::Allow
543            }
544            serde_json::Value::Object(obj) => self.check_object(obj),
545            // Non-string primitives (numbers, booleans, null) cannot contain injection.
546            _ => VerificationResult::Allow,
547        }
548    }
549}
550
551impl PreExecutionVerifier for InjectionPatternVerifier {
552    fn name(&self) -> &'static str {
553        "InjectionPatternVerifier"
554    }
555
556    fn verify(&self, _tool_name: &str, args: &serde_json::Value) -> VerificationResult {
557        match args {
558            serde_json::Value::Object(obj) => self.check_object(obj),
559            // Flat string args (unusual but handle gracefully — treat as unnamed field).
560            serde_json::Value::String(s) => self.check_field_value("_args", s),
561            _ => VerificationResult::Allow,
562        }
563    }
564}
565
566// ---------------------------------------------------------------------------
567// UrlGroundingVerifier
568// ---------------------------------------------------------------------------
569
570/// Verifier that blocks `fetch` and `web_scrape` calls when the requested URL
571/// was not explicitly provided by the user in the conversation.
572///
573/// The agent populates `user_provided_urls` whenever a user message is received,
574/// by extracting all http/https URLs from the raw input. This set persists across
575/// turns within a session and is cleared on `/clear`.
576///
577/// ## Bypass rules
578///
579/// - Tools not in the `guarded_tools` list (and not ending in `_fetch`) pass through.
580/// - If the URL in the tool call is a prefix-match or exact match of any URL in
581///   `user_provided_urls`, the call is allowed.
582/// - If `user_provided_urls` is empty (no URLs seen in this session at all), the call
583///   is blocked — the LLM must not fetch arbitrary URLs when the user never provided one.
584#[derive(Debug, Clone)]
585pub struct UrlGroundingVerifier {
586    guarded_tools: Vec<String>,
587    user_provided_urls: Arc<RwLock<HashSet<String>>>,
588}
589
590impl UrlGroundingVerifier {
591    #[must_use]
592    pub fn new(
593        config: &UrlGroundingVerifierConfig,
594        user_provided_urls: Arc<RwLock<HashSet<String>>>,
595    ) -> Self {
596        Self {
597            guarded_tools: config
598                .guarded_tools
599                .iter()
600                .map(|s| s.to_lowercase())
601                .collect(),
602            user_provided_urls,
603        }
604    }
605
606    fn is_guarded(&self, tool_name: &str) -> bool {
607        let lower = tool_name.to_lowercase();
608        self.guarded_tools.iter().any(|t| t == &lower) || lower.ends_with("_fetch")
609    }
610
611    /// Returns true if `url` is grounded — i.e., it appears in (or is a prefix of)
612    /// a URL from `user_provided_urls`.
613    fn is_grounded(url: &str, user_provided_urls: &HashSet<String>) -> bool {
614        let lower = url.to_lowercase();
615        user_provided_urls
616            .iter()
617            .any(|u| lower.starts_with(u.as_str()) || u.starts_with(lower.as_str()))
618    }
619}
620
621impl PreExecutionVerifier for UrlGroundingVerifier {
622    fn name(&self) -> &'static str {
623        "UrlGroundingVerifier"
624    }
625
626    fn verify(&self, tool_name: &str, args: &serde_json::Value) -> VerificationResult {
627        if !self.is_guarded(tool_name) {
628            return VerificationResult::Allow;
629        }
630
631        let Some(url) = args.get("url").and_then(|v| v.as_str()) else {
632            return VerificationResult::Allow;
633        };
634
635        let urls = self.user_provided_urls.read();
636
637        if Self::is_grounded(url, &urls) {
638            return VerificationResult::Allow;
639        }
640
641        VerificationResult::Block {
642            reason: format!(
643                "[UrlGroundingVerifier] fetch rejected: URL '{url}' was not provided by the user",
644            ),
645        }
646    }
647}
648
649// ---------------------------------------------------------------------------
650// FirewallVerifier
651// ---------------------------------------------------------------------------
652
653/// Policy-enforcement verifier that inspects tool arguments for path traversal,
654/// environment-variable exfiltration, sensitive file access, and command chaining.
655///
656/// ## Scope delineation with `InjectionPatternVerifier`
657///
658/// `FirewallVerifier` enforces *configurable policy* (blocked paths, env vars, sensitive
659/// file patterns). `InjectionPatternVerifier` performs regex-based *injection pattern
660/// detection* (prompt injection, SSRF, etc.). They are complementary — belt-and-suspenders,
661/// the same intentional overlap documented at the top of this module.
662///
663/// Both verifiers may produce `Block` for the same call (e.g. command chaining detected
664/// by both). The pipeline stops at the first `Block` result.
665#[derive(Debug)]
666pub struct FirewallVerifier {
667    blocked_path_globs: Vec<glob::Pattern>,
668    blocked_env_vars: HashSet<String>,
669    exempt_tools: HashSet<String>,
670}
671
672/// Built-in path patterns that are always blocked regardless of config.
673static SENSITIVE_PATH_PATTERNS: LazyLock<Vec<glob::Pattern>> = LazyLock::new(|| {
674    let raw = [
675        "/etc/passwd",
676        "/etc/shadow",
677        "/etc/sudoers",
678        "~/.ssh/*",
679        "~/.aws/*",
680        "~/.gnupg/*",
681        "**/*.pem",
682        "**/*.key",
683        "**/id_rsa",
684        "**/id_ed25519",
685        "**/.env",
686        "**/credentials",
687    ];
688    raw.iter()
689        .filter_map(|p| {
690            glob::Pattern::new(p)
691                .map_err(|e| {
692                    tracing::error!(pattern = p, error = %e, "failed to compile built-in firewall path pattern");
693                    e
694                })
695                .ok()
696        })
697        .collect()
698});
699
700/// Built-in env var prefixes that trigger a block when found in tool arguments.
701static SENSITIVE_ENV_PREFIXES: &[&str] =
702    &["$AWS_", "$ZEPH_", "${AWS_", "${ZEPH_", "%AWS_", "%ZEPH_"];
703
704/// Argument field names to extract and inspect.
705static INSPECTED_FIELDS: &[&str] = &[
706    "command",
707    "file_path",
708    "path",
709    "url",
710    "query",
711    "uri",
712    "input",
713    "args",
714];
715
716impl FirewallVerifier {
717    /// Build a `FirewallVerifier` from config.
718    ///
719    /// Invalid glob patterns in `blocked_paths` are logged at WARN level and skipped.
720    #[must_use]
721    pub fn new(config: &FirewallVerifierConfig) -> Self {
722        let blocked_path_globs = config
723            .blocked_paths
724            .iter()
725            .filter_map(|p| {
726                glob::Pattern::new(p)
727                    .map_err(|e| {
728                        tracing::warn!(pattern = p, error = %e, "invalid glob pattern in firewall blocked_paths, skipping");
729                        e
730                    })
731                    .ok()
732            })
733            .collect();
734
735        let blocked_env_vars = config
736            .blocked_env_vars
737            .iter()
738            .map(|s| s.to_uppercase())
739            .collect();
740
741        let exempt_tools = config
742            .exempt_tools
743            .iter()
744            .map(|s| s.to_lowercase())
745            .collect();
746
747        Self {
748            blocked_path_globs,
749            blocked_env_vars,
750            exempt_tools,
751        }
752    }
753
754    /// Extract all string argument values from a tool call's JSON args.
755    fn collect_args(args: &serde_json::Value) -> Vec<String> {
756        let mut out = Vec::new();
757        match args {
758            serde_json::Value::Object(map) => {
759                for field in INSPECTED_FIELDS {
760                    if let Some(val) = map.get(*field) {
761                        Self::collect_strings(val, &mut out);
762                    }
763                }
764            }
765            serde_json::Value::String(s) => out.push(s.clone()),
766            _ => {}
767        }
768        out
769    }
770
771    fn collect_strings(val: &serde_json::Value, out: &mut Vec<String>) {
772        match val {
773            serde_json::Value::String(s) => out.push(s.clone()),
774            serde_json::Value::Array(arr) => {
775                for item in arr {
776                    Self::collect_strings(item, out);
777                }
778            }
779            _ => {}
780        }
781    }
782
783    fn scan_arg(&self, arg: &str) -> Option<VerificationResult> {
784        // Apply NFKC normalization consistent with DestructiveCommandVerifier.
785        let normalized: String = arg.nfkc().collect();
786        let lower = normalized.to_lowercase();
787
788        // Path traversal
789        if lower.contains("../") || lower.contains("..\\") {
790            return Some(VerificationResult::Block {
791                reason: format!(
792                    "[FirewallVerifier] path traversal pattern detected in argument: {arg}"
793                ),
794            });
795        }
796
797        // Sensitive paths (built-in)
798        for pattern in SENSITIVE_PATH_PATTERNS.iter() {
799            if pattern.matches(&normalized) || pattern.matches(&lower) {
800                return Some(VerificationResult::Block {
801                    reason: format!(
802                        "[FirewallVerifier] sensitive path pattern '{pattern}' matched in argument: {arg}"
803                    ),
804                });
805            }
806        }
807
808        // User-configured blocked paths
809        for pattern in &self.blocked_path_globs {
810            if pattern.matches(&normalized) || pattern.matches(&lower) {
811                return Some(VerificationResult::Block {
812                    reason: format!(
813                        "[FirewallVerifier] blocked path pattern '{pattern}' matched in argument: {arg}"
814                    ),
815                });
816            }
817        }
818
819        // Env var exfiltration (built-in prefixes)
820        let upper = normalized.to_uppercase();
821        for prefix in SENSITIVE_ENV_PREFIXES {
822            if upper.contains(*prefix) {
823                return Some(VerificationResult::Block {
824                    reason: format!(
825                        "[FirewallVerifier] env var exfiltration pattern '{prefix}' detected in argument: {arg}"
826                    ),
827                });
828            }
829        }
830
831        // User-configured blocked env vars (match $VAR or %VAR% patterns)
832        for var in &self.blocked_env_vars {
833            let dollar_form = format!("${var}");
834            let brace_form = format!("${{{var}}}");
835            let percent_form = format!("%{var}%");
836            if upper.contains(&dollar_form)
837                || upper.contains(&brace_form)
838                || upper.contains(&percent_form)
839            {
840                return Some(VerificationResult::Block {
841                    reason: format!(
842                        "[FirewallVerifier] blocked env var '{var}' detected in argument: {arg}"
843                    ),
844                });
845            }
846        }
847
848        None
849    }
850}
851
852impl PreExecutionVerifier for FirewallVerifier {
853    fn name(&self) -> &'static str {
854        "FirewallVerifier"
855    }
856
857    fn verify(&self, tool_name: &str, args: &serde_json::Value) -> VerificationResult {
858        if self.exempt_tools.contains(&tool_name.to_lowercase()) {
859            return VerificationResult::Allow;
860        }
861
862        for arg in Self::collect_args(args) {
863            if let Some(result) = self.scan_arg(&arg) {
864                return result;
865            }
866        }
867
868        VerificationResult::Allow
869    }
870}
871
872// ---------------------------------------------------------------------------
873// Tests
874// ---------------------------------------------------------------------------
875
876#[cfg(test)]
877mod tests {
878    use serde_json::json;
879
880    use super::*;
881
882    // --- DestructiveCommandVerifier ---
883
884    fn dcv() -> DestructiveCommandVerifier {
885        DestructiveCommandVerifier::new(&DestructiveVerifierConfig::default())
886    }
887
888    #[test]
889    fn allow_normal_command() {
890        let v = dcv();
891        assert_eq!(
892            v.verify("bash", &json!({"command": "ls -la /tmp"})),
893            VerificationResult::Allow
894        );
895    }
896
897    #[test]
898    fn block_rm_rf_root() {
899        let v = dcv();
900        let result = v.verify("bash", &json!({"command": "rm -rf /"}));
901        assert!(matches!(result, VerificationResult::Block { .. }));
902    }
903
904    #[test]
905    fn block_dd_dev_zero() {
906        let v = dcv();
907        let result = v.verify("bash", &json!({"command": "dd if=/dev/zero of=/dev/sda"}));
908        assert!(matches!(result, VerificationResult::Block { .. }));
909    }
910
911    #[test]
912    fn block_mkfs() {
913        let v = dcv();
914        let result = v.verify("bash", &json!({"command": "mkfs.ext4 /dev/sda1"}));
915        assert!(matches!(result, VerificationResult::Block { .. }));
916    }
917
918    #[test]
919    fn allow_rm_rf_in_allowed_path() {
920        let config = DestructiveVerifierConfig {
921            allowed_paths: vec!["/tmp/build".to_string()],
922            ..Default::default()
923        };
924        let v = DestructiveCommandVerifier::new(&config);
925        assert_eq!(
926            v.verify("bash", &json!({"command": "rm -rf /tmp/build/artifacts"})),
927            VerificationResult::Allow
928        );
929    }
930
931    #[test]
932    fn block_rm_rf_when_not_in_allowed_path() {
933        let config = DestructiveVerifierConfig {
934            allowed_paths: vec!["/tmp/build".to_string()],
935            ..Default::default()
936        };
937        let v = DestructiveCommandVerifier::new(&config);
938        let result = v.verify("bash", &json!({"command": "rm -rf /home/user"}));
939        assert!(matches!(result, VerificationResult::Block { .. }));
940    }
941
942    #[test]
943    fn allow_non_shell_tool() {
944        let v = dcv();
945        assert_eq!(
946            v.verify("read_file", &json!({"path": "rm -rf /"})),
947            VerificationResult::Allow
948        );
949    }
950
951    #[test]
952    fn block_extra_pattern() {
953        let config = DestructiveVerifierConfig {
954            extra_patterns: vec!["format c:".to_string()],
955            ..Default::default()
956        };
957        let v = DestructiveCommandVerifier::new(&config);
958        let result = v.verify("bash", &json!({"command": "format c:"}));
959        assert!(matches!(result, VerificationResult::Block { .. }));
960    }
961
962    #[test]
963    fn array_args_normalization() {
964        let v = dcv();
965        let result = v.verify("bash", &json!({"command": ["rm", "-rf", "/"]}));
966        assert!(matches!(result, VerificationResult::Block { .. }));
967    }
968
969    #[test]
970    fn sh_c_wrapping_normalization() {
971        let v = dcv();
972        let result = v.verify("bash", &json!({"command": "bash -c 'rm -rf /'"}));
973        assert!(matches!(result, VerificationResult::Block { .. }));
974    }
975
976    #[test]
977    fn fork_bomb_blocked() {
978        let v = dcv();
979        let result = v.verify("bash", &json!({"command": ":(){ :|:& };:"}));
980        assert!(matches!(result, VerificationResult::Block { .. }));
981    }
982
983    #[test]
984    fn custom_shell_tool_name_blocked() {
985        let config = DestructiveVerifierConfig {
986            shell_tools: vec!["execute".to_string(), "run_command".to_string()],
987            ..Default::default()
988        };
989        let v = DestructiveCommandVerifier::new(&config);
990        let result = v.verify("execute", &json!({"command": "rm -rf /"}));
991        assert!(matches!(result, VerificationResult::Block { .. }));
992    }
993
994    #[test]
995    fn terminal_tool_name_blocked_by_default() {
996        let v = dcv();
997        let result = v.verify("terminal", &json!({"command": "rm -rf /"}));
998        assert!(matches!(result, VerificationResult::Block { .. }));
999    }
1000
1001    #[test]
1002    fn default_shell_tools_contains_bash_shell_terminal() {
1003        let config = DestructiveVerifierConfig::default();
1004        let lower: Vec<String> = config
1005            .shell_tools
1006            .iter()
1007            .map(|s| s.to_lowercase())
1008            .collect();
1009        assert!(lower.contains(&"bash".to_string()));
1010        assert!(lower.contains(&"shell".to_string()));
1011        assert!(lower.contains(&"terminal".to_string()));
1012    }
1013
1014    // --- InjectionPatternVerifier ---
1015
1016    fn ipv() -> InjectionPatternVerifier {
1017        InjectionPatternVerifier::new(&InjectionVerifierConfig::default())
1018    }
1019
1020    #[test]
1021    fn allow_clean_args() {
1022        let v = ipv();
1023        assert_eq!(
1024            v.verify("search", &json!({"query": "rust async traits"})),
1025            VerificationResult::Allow
1026        );
1027    }
1028
1029    #[test]
1030    fn allow_sql_discussion_in_query_field() {
1031        // S2: memory_search with SQL discussion must NOT be blocked.
1032        let v = ipv();
1033        assert_eq!(
1034            v.verify(
1035                "memory_search",
1036                &json!({"query": "explain SQL UNION SELECT vs JOIN"})
1037            ),
1038            VerificationResult::Allow
1039        );
1040    }
1041
1042    #[test]
1043    fn allow_sql_or_pattern_in_query_field() {
1044        // S2: safe query field must not trigger SQL injection pattern.
1045        let v = ipv();
1046        assert_eq!(
1047            v.verify("memory_search", &json!({"query": "' OR '1'='1"})),
1048            VerificationResult::Allow
1049        );
1050    }
1051
1052    #[test]
1053    fn block_sql_injection_in_non_query_field() {
1054        let v = ipv();
1055        let result = v.verify("db_query", &json!({"sql": "' OR '1'='1"}));
1056        assert!(matches!(result, VerificationResult::Block { .. }));
1057    }
1058
1059    #[test]
1060    fn block_drop_table() {
1061        let v = ipv();
1062        let result = v.verify("db_query", &json!({"input": "name'; DROP TABLE users"}));
1063        assert!(matches!(result, VerificationResult::Block { .. }));
1064    }
1065
1066    #[test]
1067    fn block_path_traversal() {
1068        let v = ipv();
1069        let result = v.verify("read_file", &json!({"path": "../../../etc/passwd"}));
1070        assert!(matches!(result, VerificationResult::Block { .. }));
1071    }
1072
1073    #[test]
1074    fn warn_on_localhost_url_field() {
1075        // S2: SSRF warn only fires on URL-like fields.
1076        let v = ipv();
1077        let result = v.verify("http_get", &json!({"url": "http://localhost:8080/api"}));
1078        assert!(matches!(result, VerificationResult::Warn { .. }));
1079    }
1080
1081    #[test]
1082    fn allow_localhost_in_non_url_field() {
1083        // S2: localhost in a "text" field (not a URL field) must not warn.
1084        let v = ipv();
1085        assert_eq!(
1086            v.verify(
1087                "memory_search",
1088                &json!({"query": "connect to http://localhost:8080"})
1089            ),
1090            VerificationResult::Allow
1091        );
1092    }
1093
1094    #[test]
1095    fn warn_on_private_ip_url_field() {
1096        let v = ipv();
1097        let result = v.verify("fetch", &json!({"url": "http://192.168.1.1/admin"}));
1098        assert!(matches!(result, VerificationResult::Warn { .. }));
1099    }
1100
1101    #[test]
1102    fn allow_localhost_when_allowlisted() {
1103        let config = InjectionVerifierConfig {
1104            allowlisted_urls: vec!["http://localhost:3000".to_string()],
1105            ..Default::default()
1106        };
1107        let v = InjectionPatternVerifier::new(&config);
1108        assert_eq!(
1109            v.verify("http_get", &json!({"url": "http://localhost:3000/api"})),
1110            VerificationResult::Allow
1111        );
1112    }
1113
1114    #[test]
1115    fn block_union_select_in_non_query_field() {
1116        let v = ipv();
1117        let result = v.verify(
1118            "db_query",
1119            &json!({"input": "id=1 UNION SELECT password FROM users"}),
1120        );
1121        assert!(matches!(result, VerificationResult::Block { .. }));
1122    }
1123
1124    #[test]
1125    fn allow_union_select_in_query_field() {
1126        // S2: "UNION SELECT" in a `query` field is a SQL discussion, not an injection.
1127        let v = ipv();
1128        assert_eq!(
1129            v.verify(
1130                "memory_search",
1131                &json!({"query": "id=1 UNION SELECT password FROM users"})
1132            ),
1133            VerificationResult::Allow
1134        );
1135    }
1136
1137    // --- FIX-1: Unicode normalization bypass ---
1138
1139    #[test]
1140    fn block_rm_rf_unicode_homoglyph() {
1141        // U+FF0F FULLWIDTH SOLIDUS looks like '/' and NFKC-normalizes to '/'.
1142        let v = dcv();
1143        // "rm -rf ／" where ／ is U+FF0F
1144        let result = v.verify("bash", &json!({"command": "rm -rf \u{FF0F}"}));
1145        assert!(matches!(result, VerificationResult::Block { .. }));
1146    }
1147
1148    // --- FIX-2: Path traversal in is_allowed_path ---
1149
1150    #[test]
1151    fn path_traversal_not_allowed_via_dotdot() {
1152        // `/tmp/build/../../etc` lexically resolves to `/etc`, NOT under `/tmp/build`.
1153        let config = DestructiveVerifierConfig {
1154            allowed_paths: vec!["/tmp/build".to_string()],
1155            ..Default::default()
1156        };
1157        let v = DestructiveCommandVerifier::new(&config);
1158        // Should be BLOCKED: resolved path is /etc, not under /tmp/build.
1159        let result = v.verify("bash", &json!({"command": "rm -rf /tmp/build/../../etc"}));
1160        assert!(matches!(result, VerificationResult::Block { .. }));
1161    }
1162
1163    #[test]
1164    fn allowed_path_with_dotdot_stays_in_allowed() {
1165        // `/tmp/build/sub/../artifacts` resolves to `/tmp/build/artifacts` — still allowed.
1166        let config = DestructiveVerifierConfig {
1167            allowed_paths: vec!["/tmp/build".to_string()],
1168            ..Default::default()
1169        };
1170        let v = DestructiveCommandVerifier::new(&config);
1171        assert_eq!(
1172            v.verify(
1173                "bash",
1174                &json!({"command": "rm -rf /tmp/build/sub/../artifacts"}),
1175            ),
1176            VerificationResult::Allow,
1177        );
1178    }
1179
1180    // --- FIX-3: Double-nested shell wrapping ---
1181
1182    #[test]
1183    fn double_nested_bash_c_blocked() {
1184        let v = dcv();
1185        let result = v.verify(
1186            "bash",
1187            &json!({"command": "bash -c \"bash -c 'rm -rf /'\""}),
1188        );
1189        assert!(matches!(result, VerificationResult::Block { .. }));
1190    }
1191
1192    #[test]
1193    fn env_prefix_stripping_blocked() {
1194        let v = dcv();
1195        let result = v.verify(
1196            "bash",
1197            &json!({"command": "env FOO=bar bash -c 'rm -rf /'"}),
1198        );
1199        assert!(matches!(result, VerificationResult::Block { .. }));
1200    }
1201
1202    #[test]
1203    fn exec_prefix_stripping_blocked() {
1204        let v = dcv();
1205        let result = v.verify("bash", &json!({"command": "exec bash -c 'rm -rf /'"}));
1206        assert!(matches!(result, VerificationResult::Block { .. }));
1207    }
1208
1209    // --- FIX-4: SSRF host extraction (not substring match) ---
1210
1211    #[test]
1212    fn ssrf_not_triggered_for_embedded_localhost_in_query_param() {
1213        // `evil.com/?r=http://localhost` — host is `evil.com`, not localhost.
1214        let v = ipv();
1215        let result = v.verify(
1216            "http_get",
1217            &json!({"url": "http://evil.com/?r=http://localhost"}),
1218        );
1219        // Should NOT warn — the actual request host is evil.com, not localhost.
1220        assert_eq!(result, VerificationResult::Allow);
1221    }
1222
1223    #[test]
1224    fn ssrf_triggered_for_bare_localhost_no_port() {
1225        // FIX-7: `http://localhost` with no trailing slash or port must warn.
1226        let v = ipv();
1227        let result = v.verify("http_get", &json!({"url": "http://localhost"}));
1228        assert!(matches!(result, VerificationResult::Warn { .. }));
1229    }
1230
1231    #[test]
1232    fn ssrf_triggered_for_localhost_with_path() {
1233        let v = ipv();
1234        let result = v.verify("http_get", &json!({"url": "http://localhost/api/v1"}));
1235        assert!(matches!(result, VerificationResult::Warn { .. }));
1236    }
1237
1238    // --- Verifier chain: first Block wins, Warn continues ---
1239
1240    #[test]
1241    fn chain_first_block_wins() {
1242        let dcv = DestructiveCommandVerifier::new(&DestructiveVerifierConfig::default());
1243        let ipv = InjectionPatternVerifier::new(&InjectionVerifierConfig::default());
1244        let verifiers: Vec<Box<dyn PreExecutionVerifier>> = vec![Box::new(dcv), Box::new(ipv)];
1245
1246        let args = json!({"command": "rm -rf /"});
1247        let mut result = VerificationResult::Allow;
1248        for v in &verifiers {
1249            result = v.verify("bash", &args);
1250            if matches!(result, VerificationResult::Block { .. }) {
1251                break;
1252            }
1253        }
1254        assert!(matches!(result, VerificationResult::Block { .. }));
1255    }
1256
1257    #[test]
1258    fn chain_warn_continues() {
1259        let dcv = DestructiveCommandVerifier::new(&DestructiveVerifierConfig::default());
1260        let ipv = InjectionPatternVerifier::new(&InjectionVerifierConfig::default());
1261        let verifiers: Vec<Box<dyn PreExecutionVerifier>> = vec![Box::new(dcv), Box::new(ipv)];
1262
1263        // localhost URL in `url` field: dcv allows, ipv warns, chain does NOT block.
1264        let args = json!({"url": "http://localhost:8080/api"});
1265        let mut got_warn = false;
1266        let mut got_block = false;
1267        for v in &verifiers {
1268            match v.verify("http_get", &args) {
1269                VerificationResult::Block { .. } => {
1270                    got_block = true;
1271                    break;
1272                }
1273                VerificationResult::Warn { .. } => {
1274                    got_warn = true;
1275                }
1276                VerificationResult::Allow => {}
1277            }
1278        }
1279        assert!(got_warn);
1280        assert!(!got_block);
1281    }
1282
1283    // --- UrlGroundingVerifier ---
1284
1285    fn ugv(urls: &[&str]) -> UrlGroundingVerifier {
1286        let set: HashSet<String> = urls.iter().map(|s| s.to_lowercase()).collect();
1287        UrlGroundingVerifier::new(
1288            &UrlGroundingVerifierConfig::default(),
1289            Arc::new(RwLock::new(set)),
1290        )
1291    }
1292
1293    #[test]
1294    fn url_grounding_allows_user_provided_url() {
1295        let v = ugv(&["https://docs.anthropic.com/models"]);
1296        assert_eq!(
1297            v.verify(
1298                "fetch",
1299                &json!({"url": "https://docs.anthropic.com/models"})
1300            ),
1301            VerificationResult::Allow
1302        );
1303    }
1304
1305    #[test]
1306    fn url_grounding_blocks_hallucinated_url() {
1307        let v = ugv(&["https://example.com/page"]);
1308        let result = v.verify(
1309            "fetch",
1310            &json!({"url": "https://api.anthropic.ai/v1/models"}),
1311        );
1312        assert!(matches!(result, VerificationResult::Block { .. }));
1313    }
1314
1315    #[test]
1316    fn url_grounding_blocks_when_no_user_urls_at_all() {
1317        let v = ugv(&[]);
1318        let result = v.verify(
1319            "fetch",
1320            &json!({"url": "https://api.anthropic.ai/v1/models"}),
1321        );
1322        assert!(matches!(result, VerificationResult::Block { .. }));
1323    }
1324
1325    #[test]
1326    fn url_grounding_allows_non_guarded_tool() {
1327        let v = ugv(&[]);
1328        assert_eq!(
1329            v.verify("read_file", &json!({"path": "/etc/hosts"})),
1330            VerificationResult::Allow
1331        );
1332    }
1333
1334    #[test]
1335    fn url_grounding_guards_fetch_suffix_tool() {
1336        let v = ugv(&[]);
1337        let result = v.verify("http_fetch", &json!({"url": "https://evil.com/"}));
1338        assert!(matches!(result, VerificationResult::Block { .. }));
1339    }
1340
1341    #[test]
1342    fn url_grounding_allows_web_scrape_with_provided_url() {
1343        let v = ugv(&["https://rust-lang.org/"]);
1344        assert_eq!(
1345            v.verify(
1346                "web_scrape",
1347                &json!({"url": "https://rust-lang.org/", "select": "h1"})
1348            ),
1349            VerificationResult::Allow
1350        );
1351    }
1352
1353    #[test]
1354    fn url_grounding_allows_prefix_match() {
1355        // User provided https://docs.rs/ — agent fetches a sub-path.
1356        let v = ugv(&["https://docs.rs/"]);
1357        assert_eq!(
1358            v.verify(
1359                "fetch",
1360                &json!({"url": "https://docs.rs/tokio/latest/tokio/"})
1361            ),
1362            VerificationResult::Allow
1363        );
1364    }
1365
1366    // --- Regression: #2191 — fetch URL hallucination ---
1367
1368    /// REG-2191-1: exact reproduction of the bug scenario.
1369    /// Agent asks "do you know Anthropic?" (no URL provided) and halluccinates
1370    /// `https://api.anthropic.ai/v1/models`. With an empty `user_provided_urls` set
1371    /// the fetch must be blocked.
1372    #[test]
1373    fn reg_2191_hallucinated_api_endpoint_blocked_with_empty_session() {
1374        // Simulate: user never sent any URL in the conversation.
1375        let v = ugv(&[]);
1376        let result = v.verify(
1377            "fetch",
1378            &json!({"url": "https://api.anthropic.ai/v1/models"}),
1379        );
1380        assert!(
1381            matches!(result, VerificationResult::Block { .. }),
1382            "fetch must be blocked when no user URL was provided — this is the #2191 regression"
1383        );
1384    }
1385
1386    /// REG-2191-2: passthrough — user explicitly pasted the URL, fetch must proceed.
1387    #[test]
1388    fn reg_2191_user_provided_url_allows_fetch() {
1389        let v = ugv(&["https://api.anthropic.com/v1/models"]);
1390        assert_eq!(
1391            v.verify(
1392                "fetch",
1393                &json!({"url": "https://api.anthropic.com/v1/models"}),
1394            ),
1395            VerificationResult::Allow,
1396            "fetch must be allowed when the URL was explicitly provided by the user"
1397        );
1398    }
1399
1400    /// REG-2191-3: `web_scrape` variant — same rejection for `web_scrape` tool.
1401    #[test]
1402    fn reg_2191_web_scrape_hallucinated_url_blocked() {
1403        let v = ugv(&[]);
1404        let result = v.verify(
1405            "web_scrape",
1406            &json!({"url": "https://api.anthropic.ai/v1/models", "select": "body"}),
1407        );
1408        assert!(
1409            matches!(result, VerificationResult::Block { .. }),
1410            "web_scrape must be blocked for hallucinated URL with empty user_provided_urls"
1411        );
1412    }
1413
1414    /// REG-2191-4: URL present only in an imagined system/assistant message context
1415    /// is NOT in `user_provided_urls` (the agent only populates from user messages).
1416    /// The verifier itself cannot distinguish message roles — it only sees the set
1417    /// populated by the agent. This test confirms: an empty set always blocks.
1418    #[test]
1419    fn reg_2191_empty_url_set_always_blocks_fetch() {
1420        // Whether the URL came from a system/assistant message or was never seen —
1421        // if user_provided_urls is empty, fetch must be blocked.
1422        let v = ugv(&[]);
1423        let result = v.verify(
1424            "fetch",
1425            &json!({"url": "https://docs.anthropic.com/something"}),
1426        );
1427        assert!(matches!(result, VerificationResult::Block { .. }));
1428    }
1429
1430    /// REG-2191-5: URL matching is case-insensitive — user pastes mixed-case URL.
1431    #[test]
1432    fn reg_2191_case_insensitive_url_match_allows_fetch() {
1433        // user_provided_urls stores lowercase; verify that the fetched URL with
1434        // different casing still matches.
1435        let v = ugv(&["https://Docs.Anthropic.COM/models"]);
1436        assert_eq!(
1437            v.verify(
1438                "fetch",
1439                &json!({"url": "https://docs.anthropic.com/models/detail"}),
1440            ),
1441            VerificationResult::Allow,
1442            "URL matching must be case-insensitive"
1443        );
1444    }
1445
1446    /// REG-2191-6: tool name ending in `_fetch` is auto-guarded regardless of config.
1447    /// An MCP-registered `anthropic_fetch` tool must not bypass the gate.
1448    #[test]
1449    fn reg_2191_mcp_fetch_suffix_tool_blocked_with_empty_session() {
1450        let v = ugv(&[]);
1451        let result = v.verify(
1452            "anthropic_fetch",
1453            &json!({"url": "https://api.anthropic.ai/v1/models"}),
1454        );
1455        assert!(
1456            matches!(result, VerificationResult::Block { .. }),
1457            "MCP tools ending in _fetch must be guarded even if not in guarded_tools list"
1458        );
1459    }
1460
1461    /// REG-2191-7: reverse prefix — user provided a specific URL, agent fetches
1462    /// the root. This is the "reverse prefix" case: `user_url` `starts_with` `fetch_url`.
1463    #[test]
1464    fn reg_2191_reverse_prefix_match_allows_fetch() {
1465        // User provided a deep URL; agent wants to fetch the root.
1466        // Allowed: user_url.starts_with(fetch_url).
1467        let v = ugv(&["https://docs.rs/tokio/latest/tokio/index.html"]);
1468        assert_eq!(
1469            v.verify("fetch", &json!({"url": "https://docs.rs/"})),
1470            VerificationResult::Allow,
1471            "reverse prefix: fetched URL is a prefix of user-provided URL — should be allowed"
1472        );
1473    }
1474
1475    /// REG-2191-8: completely different domain with same path prefix must be blocked.
1476    #[test]
1477    fn reg_2191_different_domain_blocked() {
1478        // User provided docs.rs, agent wants to fetch evil.com/docs.rs path — must block.
1479        let v = ugv(&["https://docs.rs/"]);
1480        let result = v.verify("fetch", &json!({"url": "https://evil.com/docs.rs/exfil"}));
1481        assert!(
1482            matches!(result, VerificationResult::Block { .. }),
1483            "different domain must not be allowed even if path looks similar"
1484        );
1485    }
1486
1487    /// REG-2191-9: args without a `url` field — verifier must not block (Allow).
1488    #[test]
1489    fn reg_2191_missing_url_field_allows_fetch() {
1490        // Some fetch-like tools may call with different arg names.
1491        // Verifier only checks the `url` field; missing field → Allow.
1492        let v = ugv(&[]);
1493        assert_eq!(
1494            v.verify(
1495                "fetch",
1496                &json!({"endpoint": "https://api.anthropic.ai/v1/models"})
1497            ),
1498            VerificationResult::Allow,
1499            "missing url field must not trigger blocking — only explicit url field is checked"
1500        );
1501    }
1502
1503    /// REG-2191-10: verifier disabled via config — all fetch calls pass through.
1504    #[test]
1505    fn reg_2191_disabled_verifier_allows_all() {
1506        let config = UrlGroundingVerifierConfig {
1507            enabled: false,
1508            ..UrlGroundingVerifierConfig::default()
1509        };
1510        // Note: the enabled flag is checked by the pipeline, not inside verify().
1511        // The pipeline skips disabled verifiers. This test documents that the struct
1512        // can be constructed with enabled=false (config round-trip).
1513        let set: HashSet<String> = HashSet::new();
1514        let v = UrlGroundingVerifier::new(&config, Arc::new(RwLock::new(set)));
1515        // verify() itself doesn't check enabled — the pipeline is responsible.
1516        // When called directly it will still block (the field has no effect here).
1517        // This is an API documentation test, not a behaviour test.
1518        let _ = v.verify("fetch", &json!({"url": "https://example.com/"}));
1519        // No assertion: just verifies the struct can be built with enabled=false.
1520    }
1521
1522    // --- FirewallVerifier ---
1523
1524    fn fwv() -> FirewallVerifier {
1525        FirewallVerifier::new(&FirewallVerifierConfig::default())
1526    }
1527
1528    #[test]
1529    fn firewall_allows_normal_path() {
1530        let v = fwv();
1531        assert_eq!(
1532            v.verify("shell", &json!({"command": "ls /tmp/build"})),
1533            VerificationResult::Allow
1534        );
1535    }
1536
1537    #[test]
1538    fn firewall_blocks_path_traversal() {
1539        let v = fwv();
1540        let result = v.verify("read", &json!({"file_path": "../../etc/passwd"}));
1541        assert!(
1542            matches!(result, VerificationResult::Block { .. }),
1543            "path traversal must be blocked"
1544        );
1545    }
1546
1547    #[test]
1548    fn firewall_blocks_etc_passwd() {
1549        let v = fwv();
1550        let result = v.verify("read", &json!({"file_path": "/etc/passwd"}));
1551        assert!(
1552            matches!(result, VerificationResult::Block { .. }),
1553            "/etc/passwd must be blocked"
1554        );
1555    }
1556
1557    #[test]
1558    fn firewall_blocks_ssh_key() {
1559        let v = fwv();
1560        let result = v.verify("read", &json!({"file_path": "~/.ssh/id_rsa"}));
1561        assert!(
1562            matches!(result, VerificationResult::Block { .. }),
1563            "SSH key path must be blocked"
1564        );
1565    }
1566
1567    #[test]
1568    fn firewall_blocks_aws_env_var() {
1569        let v = fwv();
1570        let result = v.verify("shell", &json!({"command": "echo $AWS_SECRET_ACCESS_KEY"}));
1571        assert!(
1572            matches!(result, VerificationResult::Block { .. }),
1573            "AWS env var exfiltration must be blocked"
1574        );
1575    }
1576
1577    #[test]
1578    fn firewall_blocks_zeph_env_var() {
1579        let v = fwv();
1580        let result = v.verify("shell", &json!({"command": "cat ${ZEPH_CLAUDE_API_KEY}"}));
1581        assert!(
1582            matches!(result, VerificationResult::Block { .. }),
1583            "ZEPH env var exfiltration must be blocked"
1584        );
1585    }
1586
1587    #[test]
1588    fn firewall_exempt_tool_bypasses_check() {
1589        let cfg = FirewallVerifierConfig {
1590            enabled: true,
1591            blocked_paths: vec![],
1592            blocked_env_vars: vec![],
1593            exempt_tools: vec!["read".to_string()],
1594        };
1595        let v = FirewallVerifier::new(&cfg);
1596        // /etc/passwd would normally be blocked but tool is exempt.
1597        assert_eq!(
1598            v.verify("read", &json!({"file_path": "/etc/passwd"})),
1599            VerificationResult::Allow
1600        );
1601    }
1602
1603    #[test]
1604    fn firewall_custom_blocked_path() {
1605        let cfg = FirewallVerifierConfig {
1606            enabled: true,
1607            blocked_paths: vec!["/data/secrets/*".to_string()],
1608            blocked_env_vars: vec![],
1609            exempt_tools: vec![],
1610        };
1611        let v = FirewallVerifier::new(&cfg);
1612        let result = v.verify("read", &json!({"file_path": "/data/secrets/master.key"}));
1613        assert!(
1614            matches!(result, VerificationResult::Block { .. }),
1615            "custom blocked path must be blocked"
1616        );
1617    }
1618
1619    #[test]
1620    fn firewall_custom_blocked_env_var() {
1621        let cfg = FirewallVerifierConfig {
1622            enabled: true,
1623            blocked_paths: vec![],
1624            blocked_env_vars: vec!["MY_SECRET".to_string()],
1625            exempt_tools: vec![],
1626        };
1627        let v = FirewallVerifier::new(&cfg);
1628        let result = v.verify("shell", &json!({"command": "echo $MY_SECRET"}));
1629        assert!(
1630            matches!(result, VerificationResult::Block { .. }),
1631            "custom blocked env var must be blocked"
1632        );
1633    }
1634
1635    #[test]
1636    fn firewall_invalid_glob_is_skipped() {
1637        // Invalid glob should not panic — logged and skipped at construction.
1638        let cfg = FirewallVerifierConfig {
1639            enabled: true,
1640            blocked_paths: vec!["[invalid-glob".to_string(), "/valid/path/*".to_string()],
1641            blocked_env_vars: vec![],
1642            exempt_tools: vec![],
1643        };
1644        let v = FirewallVerifier::new(&cfg);
1645        // Valid pattern still works
1646        let result = v.verify("read", &json!({"path": "/valid/path/file.txt"}));
1647        assert!(matches!(result, VerificationResult::Block { .. }));
1648    }
1649
1650    #[test]
1651    fn firewall_config_default_deserialization() {
1652        let cfg: FirewallVerifierConfig = toml::from_str("").unwrap();
1653        assert!(cfg.enabled);
1654        assert!(cfg.blocked_paths.is_empty());
1655        assert!(cfg.blocked_env_vars.is_empty());
1656        assert!(cfg.exempt_tools.is_empty());
1657    }
1658}
zeph_tools/verifier.rs

zeph_tools/
verifier.rs