zeph_tools/
verifier.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Pre-execution verification for tool calls.
5//!
6//! Based on the `TrustBench` pattern (arXiv:2603.09157): intercept tool calls before
7//! execution to block or warn on destructive or injection patterns.
8//!
9//! ## Blocklist separation
10//!
11//! `DESTRUCTIVE_PATTERNS` (this module) is intentionally separate from
12//! `DEFAULT_BLOCKED_COMMANDS` in `shell.rs`. The two lists serve different purposes:
13//!
14//! - `DEFAULT_BLOCKED_COMMANDS` — shell safety net: prevents the *shell executor* from
15//!   running network tools (`curl`, `wget`, `nc`) and a few destructive commands.
16//!   It is applied at tool-execution time by `ShellExecutor`.
17//!
18//! - `DESTRUCTIVE_PATTERNS` — pre-execution guard: targets filesystem/system destruction
19//!   commands (disk formats, wipefs, fork bombs, recursive permission changes).
20//!   It runs *before* dispatch, in the LLM-call hot path, and must not be conflated
21//!   with the shell safety net to avoid accidental allow-listing via config drift.
22//!
23//! Overlap (3 entries: `rm -rf /`, `mkfs`, `dd if=`) is intentional — belt-and-suspenders.
24
25use std::collections::HashSet;
26use std::sync::{Arc, LazyLock, RwLock};
27
28use regex::Regex;
29use serde::{Deserialize, Serialize};
30use unicode_normalization::UnicodeNormalization as _;
31
32fn default_true() -> bool {
33    true
34}
35
36fn default_shell_tools() -> Vec<String> {
37    vec![
38        "bash".to_string(),
39        "shell".to_string(),
40        "terminal".to_string(),
41    ]
42}
43
44/// Result of a pre-execution verification check.
45#[must_use]
46#[derive(Debug, Clone, PartialEq, Eq)]
47pub enum VerificationResult {
48    /// Tool call is safe to proceed.
49    Allow,
50    /// Tool call must be blocked. Executor returns an error to the LLM.
51    Block { reason: String },
52    /// Tool call proceeds but a warning is logged and tracked in metrics (metrics-only,
53    /// not visible to the LLM or user beyond the TUI security panel).
54    Warn { message: String },
55}
56
57/// Pre-execution verification trait. Implementations intercept tool calls
58/// before the executor runs them. Based on `TrustBench` pattern (arXiv:2603.09157).
59///
60/// Sync by design: verifiers inspect arguments only — no I/O needed.
61/// Object-safe: uses `&self` and returns a concrete enum.
62pub trait PreExecutionVerifier: Send + Sync + std::fmt::Debug {
63    /// Verify whether a tool call should proceed.
64    fn verify(&self, tool_name: &str, args: &serde_json::Value) -> VerificationResult;
65
66    /// Human-readable name for logging and TUI display.
67    fn name(&self) -> &'static str;
68}
69
70// ---------------------------------------------------------------------------
71// Config types
72// ---------------------------------------------------------------------------
73
74/// Configuration for the destructive command verifier.
75///
76/// `allowed_paths`: when **empty** (the default), ALL destructive commands are denied.
77/// This is a conservative default: to allow e.g. `rm -rf /tmp/build` you must
78/// explicitly add `/tmp/build` to `allowed_paths`.
79///
80/// `shell_tools`: the set of tool names considered shell executors. Defaults to
81/// `["bash", "shell", "terminal"]`. Add custom names here if your setup registers
82/// shell tools under different names (e.g., via MCP or ACP integrations).
83#[derive(Debug, Clone, Deserialize, Serialize)]
84pub struct DestructiveVerifierConfig {
85    #[serde(default = "default_true")]
86    pub enabled: bool,
87    /// Explicit path prefixes under which destructive commands are permitted.
88    /// **Empty = deny-all destructive commands** (safest default).
89    #[serde(default)]
90    pub allowed_paths: Vec<String>,
91    /// Additional command patterns to treat as destructive (substring match).
92    #[serde(default)]
93    pub extra_patterns: Vec<String>,
94    /// Tool names to treat as shell executors (case-insensitive).
95    /// Default: `["bash", "shell", "terminal"]`.
96    #[serde(default = "default_shell_tools")]
97    pub shell_tools: Vec<String>,
98}
99
100impl Default for DestructiveVerifierConfig {
101    fn default() -> Self {
102        Self {
103            enabled: true,
104            allowed_paths: Vec::new(),
105            extra_patterns: Vec::new(),
106            shell_tools: default_shell_tools(),
107        }
108    }
109}
110
111/// Configuration for the injection pattern verifier.
112#[derive(Debug, Clone, Deserialize, Serialize)]
113pub struct InjectionVerifierConfig {
114    #[serde(default = "default_true")]
115    pub enabled: bool,
116    /// Additional injection patterns to block (regex strings).
117    /// Invalid regexes are logged at WARN level and skipped.
118    #[serde(default)]
119    pub extra_patterns: Vec<String>,
120    /// URLs explicitly permitted even if they match SSRF patterns.
121    #[serde(default)]
122    pub allowlisted_urls: Vec<String>,
123}
124
125impl Default for InjectionVerifierConfig {
126    fn default() -> Self {
127        Self {
128            enabled: true,
129            extra_patterns: Vec::new(),
130            allowlisted_urls: Vec::new(),
131        }
132    }
133}
134
135/// Configuration for the URL grounding verifier.
136///
137/// When enabled, `fetch` and `web_scrape` calls are blocked unless the URL
138/// appears in the set of URLs extracted from user messages (`user_provided_urls`).
139/// This prevents the LLM from hallucinating API endpoints and calling fetch with
140/// fabricated URLs that were never supplied by the user.
141#[derive(Debug, Clone, Deserialize, Serialize)]
142pub struct UrlGroundingVerifierConfig {
143    #[serde(default = "default_true")]
144    pub enabled: bool,
145    /// Tool IDs subject to URL grounding checks. Any tool whose name ends with `_fetch`
146    /// is also guarded regardless of this list.
147    #[serde(default = "default_guarded_tools")]
148    pub guarded_tools: Vec<String>,
149}
150
151fn default_guarded_tools() -> Vec<String> {
152    vec!["fetch".to_string(), "web_scrape".to_string()]
153}
154
155impl Default for UrlGroundingVerifierConfig {
156    fn default() -> Self {
157        Self {
158            enabled: true,
159            guarded_tools: default_guarded_tools(),
160        }
161    }
162}
163
164/// Top-level configuration for all pre-execution verifiers.
165#[derive(Debug, Clone, Deserialize, Serialize)]
166pub struct PreExecutionVerifierConfig {
167    #[serde(default = "default_true")]
168    pub enabled: bool,
169    #[serde(default)]
170    pub destructive_commands: DestructiveVerifierConfig,
171    #[serde(default)]
172    pub injection_patterns: InjectionVerifierConfig,
173    #[serde(default)]
174    pub url_grounding: UrlGroundingVerifierConfig,
175}
176
177impl Default for PreExecutionVerifierConfig {
178    fn default() -> Self {
179        Self {
180            enabled: true,
181            destructive_commands: DestructiveVerifierConfig::default(),
182            injection_patterns: InjectionVerifierConfig::default(),
183            url_grounding: UrlGroundingVerifierConfig::default(),
184        }
185    }
186}
187
188// ---------------------------------------------------------------------------
189// DestructiveCommandVerifier
190// ---------------------------------------------------------------------------
191
192/// Destructive command patterns for `DestructiveCommandVerifier`.
193///
194/// Intentionally separate from `DEFAULT_BLOCKED_COMMANDS` in `shell.rs` — see module
195/// docs for the semantic distinction between the two lists.
196static DESTRUCTIVE_PATTERNS: &[&str] = &[
197    "rm -rf /",
198    "rm -rf ~",
199    "rm -r /",
200    "dd if=",
201    "mkfs",
202    "fdisk",
203    "shred",
204    "wipefs",
205    ":(){ :|:& };:",
206    ":(){:|:&};:",
207    "chmod -r 777 /",
208    "chown -r",
209];
210
211/// Verifier that blocks destructive shell commands (e.g., `rm -rf /`, `dd`, `mkfs`)
212/// before the shell tool executes them.
213///
214/// Applies to any tool whose name is in the configured `shell_tools` set (default:
215/// `["bash", "shell", "terminal"]`). For commands targeting a specific path, execution
216/// is allowed when the path starts with one of the configured `allowed_paths`. When
217/// `allowed_paths` is empty (the default), **all** matching destructive commands are blocked.
218#[derive(Debug)]
219pub struct DestructiveCommandVerifier {
220    shell_tools: Vec<String>,
221    allowed_paths: Vec<String>,
222    extra_patterns: Vec<String>,
223}
224
225impl DestructiveCommandVerifier {
226    #[must_use]
227    pub fn new(config: &DestructiveVerifierConfig) -> Self {
228        Self {
229            shell_tools: config
230                .shell_tools
231                .iter()
232                .map(|s| s.to_lowercase())
233                .collect(),
234            allowed_paths: config
235                .allowed_paths
236                .iter()
237                .map(|s| s.to_lowercase())
238                .collect(),
239            extra_patterns: config
240                .extra_patterns
241                .iter()
242                .map(|s| s.to_lowercase())
243                .collect(),
244        }
245    }
246
247    fn is_shell_tool(&self, tool_name: &str) -> bool {
248        let lower = tool_name.to_lowercase();
249        self.shell_tools.iter().any(|t| t == &lower)
250    }
251
252    /// Extract the effective command string from `args`.
253    ///
254    /// Supports:
255    /// - `{"command": "rm -rf /"}` (string)
256    /// - `{"command": ["rm", "-rf", "/"]}` (array — joined with spaces)
257    /// - `{"command": "bash -c 'rm -rf /'"}` (shell `-c` unwrapping, looped up to 8 levels)
258    /// - `env VAR=val bash -c '...'` and `exec bash -c '...'` prefix stripping
259    ///
260    /// NFKC-normalizes the result to defeat Unicode homoglyph bypasses.
261    fn extract_command(args: &serde_json::Value) -> Option<String> {
262        let raw = match args.get("command") {
263            Some(serde_json::Value::String(s)) => s.clone(),
264            Some(serde_json::Value::Array(arr)) => arr
265                .iter()
266                .filter_map(|v| v.as_str())
267                .collect::<Vec<_>>()
268                .join(" "),
269            _ => return None,
270        };
271        // NFKC-normalize + lowercase to defeat Unicode homoglyph and case bypasses.
272        let mut current: String = raw.nfkc().collect::<String>().to_lowercase();
273        // Loop: strip shell wrapper prefixes up to 8 levels deep.
274        // Handles double-nested: `bash -c "bash -c 'rm -rf /'"`.
275        for _ in 0..8 {
276            let trimmed = current.trim().to_owned();
277            // Strip `env VAR=value ... CMD` prefix (one or more VAR=value tokens).
278            let after_env = Self::strip_env_prefix(&trimmed);
279            // Strip `exec ` prefix.
280            let after_exec = after_env.strip_prefix("exec ").map_or(after_env, str::trim);
281            // Strip interpreter wrapper: `bash -c '...'` / `sh -c '...'` / `zsh -c '...'`.
282            let mut unwrapped = false;
283            for interp in &["bash -c ", "sh -c ", "zsh -c "] {
284                if let Some(rest) = after_exec.strip_prefix(interp) {
285                    let script = rest.trim().trim_matches(|c: char| c == '\'' || c == '"');
286                    current.clone_from(&script.to_owned());
287                    unwrapped = true;
288                    break;
289                }
290            }
291            if !unwrapped {
292                return Some(after_exec.to_owned());
293            }
294        }
295        Some(current)
296    }
297
298    /// Strip leading `env VAR=value` tokens from a command string.
299    /// Returns the remainder after all `KEY=VALUE` pairs are consumed.
300    fn strip_env_prefix(cmd: &str) -> &str {
301        let mut rest = cmd;
302        // `env` keyword is optional; strip it if present.
303        if let Some(after_env) = rest.strip_prefix("env ") {
304            rest = after_env.trim_start();
305        }
306        // Consume `KEY=VALUE` tokens.
307        loop {
308            // A VAR=value token: identifier chars + '=' + non-space chars.
309            let mut chars = rest.chars();
310            let key_end = chars
311                .by_ref()
312                .take_while(|c| c.is_alphanumeric() || *c == '_')
313                .count();
314            if key_end == 0 {
315                break;
316            }
317            let remainder = &rest[key_end..];
318            if let Some(after_eq) = remainder.strip_prefix('=') {
319                // Consume the value (up to the first space).
320                let val_end = after_eq.find(' ').unwrap_or(after_eq.len());
321                rest = after_eq[val_end..].trim_start();
322            } else {
323                break;
324            }
325        }
326        rest
327    }
328
329    /// Returns `true` if `command` targets a path that is covered by `allowed_paths`.
330    ///
331    /// Uses lexical normalization (resolves `..` and `.` without filesystem access)
332    /// so that `/tmp/build/../../etc` is correctly resolved to `/etc` before comparison,
333    /// defeating path traversal bypasses like `/tmp/build/../../etc/passwd`.
334    fn is_allowed_path(&self, command: &str) -> bool {
335        if self.allowed_paths.is_empty() {
336            return false;
337        }
338        let tokens: Vec<&str> = command.split_whitespace().collect();
339        for token in &tokens {
340            let t = token.trim_matches(|c| c == '\'' || c == '"');
341            if t.starts_with('/') || t.starts_with('~') || t.starts_with('.') {
342                let normalized = Self::lexical_normalize(std::path::Path::new(t));
343                let n_lower = normalized.to_string_lossy().to_lowercase();
344                if self
345                    .allowed_paths
346                    .iter()
347                    .any(|p| n_lower.starts_with(p.as_str()))
348                {
349                    return true;
350                }
351            }
352        }
353        false
354    }
355
356    /// Lexically normalize a path by resolving `.` and `..` components without
357    /// hitting the filesystem. Does not require the path to exist.
358    fn lexical_normalize(p: &std::path::Path) -> std::path::PathBuf {
359        let mut out = std::path::PathBuf::new();
360        for component in p.components() {
361            match component {
362                std::path::Component::ParentDir => {
363                    out.pop();
364                }
365                std::path::Component::CurDir => {}
366                other => out.push(other),
367            }
368        }
369        out
370    }
371
372    fn check_patterns(command: &str) -> Option<&'static str> {
373        DESTRUCTIVE_PATTERNS
374            .iter()
375            .find(|&pat| command.contains(pat))
376            .copied()
377    }
378
379    fn check_extra_patterns(&self, command: &str) -> Option<String> {
380        self.extra_patterns
381            .iter()
382            .find(|pat| command.contains(pat.as_str()))
383            .cloned()
384    }
385}
386
387impl PreExecutionVerifier for DestructiveCommandVerifier {
388    fn name(&self) -> &'static str {
389        "DestructiveCommandVerifier"
390    }
391
392    fn verify(&self, tool_name: &str, args: &serde_json::Value) -> VerificationResult {
393        if !self.is_shell_tool(tool_name) {
394            return VerificationResult::Allow;
395        }
396
397        let Some(command) = Self::extract_command(args) else {
398            return VerificationResult::Allow;
399        };
400
401        if let Some(pat) = Self::check_patterns(&command) {
402            if self.is_allowed_path(&command) {
403                return VerificationResult::Allow;
404            }
405            return VerificationResult::Block {
406                reason: format!("[{}] destructive pattern '{}' detected", self.name(), pat),
407            };
408        }
409
410        if let Some(pat) = self.check_extra_patterns(&command) {
411            if self.is_allowed_path(&command) {
412                return VerificationResult::Allow;
413            }
414            return VerificationResult::Block {
415                reason: format!(
416                    "[{}] extra destructive pattern '{}' detected",
417                    self.name(),
418                    pat
419                ),
420            };
421        }
422
423        VerificationResult::Allow
424    }
425}
426
427// ---------------------------------------------------------------------------
428// InjectionPatternVerifier
429// ---------------------------------------------------------------------------
430
431/// High-confidence injection block patterns applied to string field values in tool args.
432///
433/// These require *structural* patterns, not just keywords — e.g., `UNION SELECT` is
434/// blocked but a plain mention of "SELECT" is not. This avoids false positives for
435/// `memory_search` queries discussing SQL or coding assistants writing SQL examples.
436static INJECTION_BLOCK_PATTERNS: LazyLock<Vec<Regex>> = LazyLock::new(|| {
437    [
438        // SQL injection structural patterns
439        r"(?i)'\s*OR\s*'1'\s*=\s*'1",
440        r"(?i)'\s*OR\s*1\s*=\s*1",
441        r"(?i);\s*DROP\s+TABLE",
442        r"(?i)UNION\s+SELECT",
443        r"(?i)'\s*;\s*SELECT",
444        // Command injection via shell metacharacters with dangerous commands
445        r";\s*rm\s+",
446        r"\|\s*rm\s+",
447        r"&&\s*rm\s+",
448        r";\s*curl\s+",
449        r"\|\s*curl\s+",
450        r"&&\s*curl\s+",
451        r";\s*wget\s+",
452        // Path traversal to sensitive system files
453        r"\.\./\.\./\.\./etc/passwd",
454        r"\.\./\.\./\.\./etc/shadow",
455        r"\.\./\.\./\.\./windows/",
456        r"\.\.[/\\]\.\.[/\\]\.\.[/\\]",
457    ]
458    .iter()
459    .map(|s| Regex::new(s).expect("static pattern must compile"))
460    .collect()
461});
462
463/// SSRF host patterns — matched against the *extracted host* (not the full URL string).
464/// This prevents bypasses like `http://evil.com/?r=http://localhost` where the SSRF
465/// target appears only in a query parameter, not as the actual request host.
466/// Bare hostnames (no port/path) are included alongside `host:port` variants.
467static SSRF_HOST_PATTERNS: LazyLock<Vec<Regex>> = LazyLock::new(|| {
468    [
469        // localhost — with or without port
470        r"^localhost$",
471        r"^localhost:",
472        // IPv4 loopback
473        r"^127\.0\.0\.1$",
474        r"^127\.0\.0\.1:",
475        // IPv6 loopback
476        r"^\[::1\]$",
477        r"^\[::1\]:",
478        // AWS metadata service
479        r"^169\.254\.169\.254$",
480        r"^169\.254\.169\.254:",
481        // RFC-1918 private ranges
482        r"^10\.\d+\.\d+\.\d+$",
483        r"^10\.\d+\.\d+\.\d+:",
484        r"^172\.(1[6-9]|2\d|3[01])\.\d+\.\d+$",
485        r"^172\.(1[6-9]|2\d|3[01])\.\d+\.\d+:",
486        r"^192\.168\.\d+\.\d+$",
487        r"^192\.168\.\d+\.\d+:",
488    ]
489    .iter()
490    .map(|s| Regex::new(s).expect("static pattern must compile"))
491    .collect()
492});
493
494/// Extract the host (and optional port) from a URL string.
495/// Returns the portion between `://` and the next `/`, `?`, `#`, or end of string.
496/// If the URL has no scheme, returns `None`.
497fn extract_url_host(url: &str) -> Option<&str> {
498    let after_scheme = url.split_once("://")?.1;
499    let host_end = after_scheme
500        .find(['/', '?', '#'])
501        .unwrap_or(after_scheme.len());
502    Some(&after_scheme[..host_end])
503}
504
505/// Field names that suggest URL/endpoint content — SSRF patterns are applied here.
506static URL_FIELD_NAMES: &[&str] = &["url", "endpoint", "uri", "href", "src", "host", "base_url"];
507
508/// Field names that are known to carry user-provided text queries — SQL injection and
509/// command injection patterns are skipped for these fields to avoid false positives.
510/// Examples: `memory_search(query=...)`, `web_search(query=...)`.
511static SAFE_QUERY_FIELDS: &[&str] = &["query", "q", "search", "text", "message", "content"];
512
513/// Verifier that blocks tool arguments containing SQL injection, command injection,
514/// or path traversal patterns. Applies to ALL tools using field-aware matching.
515///
516/// ## Field-aware matching
517///
518/// Rather than serialising all args to a flat string (which causes false positives),
519/// this verifier iterates over each string-valued field and applies pattern categories
520/// based on field semantics:
521///
522/// - `SAFE_QUERY_FIELDS` (`query`, `q`, `search`, `text`, …): injection patterns are
523///   **skipped** — these fields contain user-provided text and generate too many false
524///   positives for SQL/command discussions in chat.
525/// - `URL_FIELD_NAMES` (`url`, `endpoint`, `uri`, …): SSRF patterns are applied.
526/// - All other string fields: injection + path traversal patterns are applied.
527///
528/// ## Warn semantics
529///
530/// `VerificationResult::Warn` is metrics-only — the tool call proceeds, a WARN log
531/// entry is emitted, and the TUI security panel counter increments. The LLM does not
532/// see the warning in its tool result.
533#[derive(Debug)]
534pub struct InjectionPatternVerifier {
535    extra_patterns: Vec<Regex>,
536    allowlisted_urls: Vec<String>,
537}
538
539impl InjectionPatternVerifier {
540    #[must_use]
541    pub fn new(config: &InjectionVerifierConfig) -> Self {
542        let extra_patterns = config
543            .extra_patterns
544            .iter()
545            .filter_map(|s| match Regex::new(s) {
546                Ok(re) => Some(re),
547                Err(e) => {
548                    tracing::warn!(
549                        pattern = %s,
550                        error = %e,
551                        "InjectionPatternVerifier: invalid extra_pattern, skipping"
552                    );
553                    None
554                }
555            })
556            .collect();
557
558        Self {
559            extra_patterns,
560            allowlisted_urls: config
561                .allowlisted_urls
562                .iter()
563                .map(|s| s.to_lowercase())
564                .collect(),
565        }
566    }
567
568    fn is_allowlisted(&self, text: &str) -> bool {
569        let lower = text.to_lowercase();
570        self.allowlisted_urls
571            .iter()
572            .any(|u| lower.contains(u.as_str()))
573    }
574
575    fn is_url_field(field: &str) -> bool {
576        let lower = field.to_lowercase();
577        URL_FIELD_NAMES.iter().any(|&f| f == lower)
578    }
579
580    fn is_safe_query_field(field: &str) -> bool {
581        let lower = field.to_lowercase();
582        SAFE_QUERY_FIELDS.iter().any(|&f| f == lower)
583    }
584
585    /// Check a single string value from a named field.
586    fn check_field_value(&self, field: &str, value: &str) -> VerificationResult {
587        let is_url = Self::is_url_field(field);
588        let is_safe_query = Self::is_safe_query_field(field);
589
590        // Injection + path traversal: skip safe query fields (user text), apply elsewhere.
591        if !is_safe_query {
592            for pat in INJECTION_BLOCK_PATTERNS.iter() {
593                if pat.is_match(value) {
594                    return VerificationResult::Block {
595                        reason: format!(
596                            "[{}] injection pattern detected in field '{}': {}",
597                            "InjectionPatternVerifier",
598                            field,
599                            pat.as_str()
600                        ),
601                    };
602                }
603            }
604            for pat in &self.extra_patterns {
605                if pat.is_match(value) {
606                    return VerificationResult::Block {
607                        reason: format!(
608                            "[{}] extra injection pattern detected in field '{}': {}",
609                            "InjectionPatternVerifier",
610                            field,
611                            pat.as_str()
612                        ),
613                    };
614                }
615            }
616        }
617
618        // SSRF: apply only to URL-like fields.
619        // Extract the host first so that SSRF targets embedded in query parameters
620        // (e.g. `http://evil.com/?r=http://localhost`) are not falsely matched.
621        if is_url && let Some(host) = extract_url_host(value) {
622            for pat in SSRF_HOST_PATTERNS.iter() {
623                if pat.is_match(host) {
624                    if self.is_allowlisted(value) {
625                        return VerificationResult::Allow;
626                    }
627                    return VerificationResult::Warn {
628                        message: format!(
629                            "[{}] possible SSRF in field '{}': host '{}' matches pattern (not blocked)",
630                            "InjectionPatternVerifier", field, host,
631                        ),
632                    };
633                }
634            }
635        }
636
637        VerificationResult::Allow
638    }
639
640    /// Walk all string leaf values in a JSON object, collecting field names for context.
641    fn check_object(&self, obj: &serde_json::Map<String, serde_json::Value>) -> VerificationResult {
642        for (key, val) in obj {
643            let result = self.check_value(key, val);
644            if !matches!(result, VerificationResult::Allow) {
645                return result;
646            }
647        }
648        VerificationResult::Allow
649    }
650
651    fn check_value(&self, field: &str, val: &serde_json::Value) -> VerificationResult {
652        match val {
653            serde_json::Value::String(s) => self.check_field_value(field, s),
654            serde_json::Value::Array(arr) => {
655                for item in arr {
656                    let r = self.check_value(field, item);
657                    if !matches!(r, VerificationResult::Allow) {
658                        return r;
659                    }
660                }
661                VerificationResult::Allow
662            }
663            serde_json::Value::Object(obj) => self.check_object(obj),
664            // Non-string primitives (numbers, booleans, null) cannot contain injection.
665            _ => VerificationResult::Allow,
666        }
667    }
668}
669
670impl PreExecutionVerifier for InjectionPatternVerifier {
671    fn name(&self) -> &'static str {
672        "InjectionPatternVerifier"
673    }
674
675    fn verify(&self, _tool_name: &str, args: &serde_json::Value) -> VerificationResult {
676        match args {
677            serde_json::Value::Object(obj) => self.check_object(obj),
678            // Flat string args (unusual but handle gracefully — treat as unnamed field).
679            serde_json::Value::String(s) => self.check_field_value("_args", s),
680            _ => VerificationResult::Allow,
681        }
682    }
683}
684
685// ---------------------------------------------------------------------------
686// UrlGroundingVerifier
687// ---------------------------------------------------------------------------
688
689/// Verifier that blocks `fetch` and `web_scrape` calls when the requested URL
690/// was not explicitly provided by the user in the conversation.
691///
692/// The agent populates `user_provided_urls` whenever a user message is received,
693/// by extracting all http/https URLs from the raw input. This set persists across
694/// turns within a session and is cleared on `/clear`.
695///
696/// ## Bypass rules
697///
698/// - Tools not in the `guarded_tools` list (and not ending in `_fetch`) pass through.
699/// - If the URL in the tool call is a prefix-match or exact match of any URL in
700///   `user_provided_urls`, the call is allowed.
701/// - If `user_provided_urls` is empty (no URLs seen in this session at all), the call
702///   is blocked — the LLM must not fetch arbitrary URLs when the user never provided one.
703#[derive(Debug, Clone)]
704pub struct UrlGroundingVerifier {
705    guarded_tools: Vec<String>,
706    user_provided_urls: Arc<RwLock<HashSet<String>>>,
707}
708
709impl UrlGroundingVerifier {
710    #[must_use]
711    pub fn new(
712        config: &UrlGroundingVerifierConfig,
713        user_provided_urls: Arc<RwLock<HashSet<String>>>,
714    ) -> Self {
715        Self {
716            guarded_tools: config
717                .guarded_tools
718                .iter()
719                .map(|s| s.to_lowercase())
720                .collect(),
721            user_provided_urls,
722        }
723    }
724
725    fn is_guarded(&self, tool_name: &str) -> bool {
726        let lower = tool_name.to_lowercase();
727        self.guarded_tools.iter().any(|t| t == &lower) || lower.ends_with("_fetch")
728    }
729
730    /// Returns true if `url` is grounded — i.e., it appears in (or is a prefix of)
731    /// a URL from `user_provided_urls`.
732    fn is_grounded(url: &str, user_provided_urls: &HashSet<String>) -> bool {
733        let lower = url.to_lowercase();
734        user_provided_urls
735            .iter()
736            .any(|u| lower.starts_with(u.as_str()) || u.starts_with(lower.as_str()))
737    }
738}
739
740impl PreExecutionVerifier for UrlGroundingVerifier {
741    fn name(&self) -> &'static str {
742        "UrlGroundingVerifier"
743    }
744
745    fn verify(&self, tool_name: &str, args: &serde_json::Value) -> VerificationResult {
746        if !self.is_guarded(tool_name) {
747            return VerificationResult::Allow;
748        }
749
750        let Some(url) = args.get("url").and_then(|v| v.as_str()) else {
751            return VerificationResult::Allow;
752        };
753
754        let Ok(urls) = self.user_provided_urls.read() else {
755            // Poisoned lock: fail open to avoid blocking legitimate tool calls.
756            return VerificationResult::Allow;
757        };
758
759        if Self::is_grounded(url, &urls) {
760            return VerificationResult::Allow;
761        }
762
763        VerificationResult::Block {
764            reason: format!(
765                "[UrlGroundingVerifier] fetch rejected: URL '{url}' was not provided by the user",
766            ),
767        }
768    }
769}
770
771// ---------------------------------------------------------------------------
772// Tests
773// ---------------------------------------------------------------------------
774
775#[cfg(test)]
776mod tests {
777    use serde_json::json;
778
779    use super::*;
780
781    // --- DestructiveCommandVerifier ---
782
783    fn dcv() -> DestructiveCommandVerifier {
784        DestructiveCommandVerifier::new(&DestructiveVerifierConfig::default())
785    }
786
787    #[test]
788    fn allow_normal_command() {
789        let v = dcv();
790        assert_eq!(
791            v.verify("bash", &json!({"command": "ls -la /tmp"})),
792            VerificationResult::Allow
793        );
794    }
795
796    #[test]
797    fn block_rm_rf_root() {
798        let v = dcv();
799        let result = v.verify("bash", &json!({"command": "rm -rf /"}));
800        assert!(matches!(result, VerificationResult::Block { .. }));
801    }
802
803    #[test]
804    fn block_dd_dev_zero() {
805        let v = dcv();
806        let result = v.verify("bash", &json!({"command": "dd if=/dev/zero of=/dev/sda"}));
807        assert!(matches!(result, VerificationResult::Block { .. }));
808    }
809
810    #[test]
811    fn block_mkfs() {
812        let v = dcv();
813        let result = v.verify("bash", &json!({"command": "mkfs.ext4 /dev/sda1"}));
814        assert!(matches!(result, VerificationResult::Block { .. }));
815    }
816
817    #[test]
818    fn allow_rm_rf_in_allowed_path() {
819        let config = DestructiveVerifierConfig {
820            allowed_paths: vec!["/tmp/build".to_string()],
821            ..Default::default()
822        };
823        let v = DestructiveCommandVerifier::new(&config);
824        assert_eq!(
825            v.verify("bash", &json!({"command": "rm -rf /tmp/build/artifacts"})),
826            VerificationResult::Allow
827        );
828    }
829
830    #[test]
831    fn block_rm_rf_when_not_in_allowed_path() {
832        let config = DestructiveVerifierConfig {
833            allowed_paths: vec!["/tmp/build".to_string()],
834            ..Default::default()
835        };
836        let v = DestructiveCommandVerifier::new(&config);
837        let result = v.verify("bash", &json!({"command": "rm -rf /home/user"}));
838        assert!(matches!(result, VerificationResult::Block { .. }));
839    }
840
841    #[test]
842    fn allow_non_shell_tool() {
843        let v = dcv();
844        assert_eq!(
845            v.verify("read_file", &json!({"path": "rm -rf /"})),
846            VerificationResult::Allow
847        );
848    }
849
850    #[test]
851    fn block_extra_pattern() {
852        let config = DestructiveVerifierConfig {
853            extra_patterns: vec!["format c:".to_string()],
854            ..Default::default()
855        };
856        let v = DestructiveCommandVerifier::new(&config);
857        let result = v.verify("bash", &json!({"command": "format c:"}));
858        assert!(matches!(result, VerificationResult::Block { .. }));
859    }
860
861    #[test]
862    fn array_args_normalization() {
863        let v = dcv();
864        let result = v.verify("bash", &json!({"command": ["rm", "-rf", "/"]}));
865        assert!(matches!(result, VerificationResult::Block { .. }));
866    }
867
868    #[test]
869    fn sh_c_wrapping_normalization() {
870        let v = dcv();
871        let result = v.verify("bash", &json!({"command": "bash -c 'rm -rf /'"}));
872        assert!(matches!(result, VerificationResult::Block { .. }));
873    }
874
875    #[test]
876    fn fork_bomb_blocked() {
877        let v = dcv();
878        let result = v.verify("bash", &json!({"command": ":(){ :|:& };:"}));
879        assert!(matches!(result, VerificationResult::Block { .. }));
880    }
881
882    #[test]
883    fn custom_shell_tool_name_blocked() {
884        let config = DestructiveVerifierConfig {
885            shell_tools: vec!["execute".to_string(), "run_command".to_string()],
886            ..Default::default()
887        };
888        let v = DestructiveCommandVerifier::new(&config);
889        let result = v.verify("execute", &json!({"command": "rm -rf /"}));
890        assert!(matches!(result, VerificationResult::Block { .. }));
891    }
892
893    #[test]
894    fn terminal_tool_name_blocked_by_default() {
895        let v = dcv();
896        let result = v.verify("terminal", &json!({"command": "rm -rf /"}));
897        assert!(matches!(result, VerificationResult::Block { .. }));
898    }
899
900    #[test]
901    fn default_shell_tools_contains_bash_shell_terminal() {
902        let config = DestructiveVerifierConfig::default();
903        let lower: Vec<String> = config
904            .shell_tools
905            .iter()
906            .map(|s| s.to_lowercase())
907            .collect();
908        assert!(lower.contains(&"bash".to_string()));
909        assert!(lower.contains(&"shell".to_string()));
910        assert!(lower.contains(&"terminal".to_string()));
911    }
912
913    // --- InjectionPatternVerifier ---
914
915    fn ipv() -> InjectionPatternVerifier {
916        InjectionPatternVerifier::new(&InjectionVerifierConfig::default())
917    }
918
919    #[test]
920    fn allow_clean_args() {
921        let v = ipv();
922        assert_eq!(
923            v.verify("search", &json!({"query": "rust async traits"})),
924            VerificationResult::Allow
925        );
926    }
927
928    #[test]
929    fn allow_sql_discussion_in_query_field() {
930        // S2: memory_search with SQL discussion must NOT be blocked.
931        let v = ipv();
932        assert_eq!(
933            v.verify(
934                "memory_search",
935                &json!({"query": "explain SQL UNION SELECT vs JOIN"})
936            ),
937            VerificationResult::Allow
938        );
939    }
940
941    #[test]
942    fn allow_sql_or_pattern_in_query_field() {
943        // S2: safe query field must not trigger SQL injection pattern.
944        let v = ipv();
945        assert_eq!(
946            v.verify("memory_search", &json!({"query": "' OR '1'='1"})),
947            VerificationResult::Allow
948        );
949    }
950
951    #[test]
952    fn block_sql_injection_in_non_query_field() {
953        let v = ipv();
954        let result = v.verify("db_query", &json!({"sql": "' OR '1'='1"}));
955        assert!(matches!(result, VerificationResult::Block { .. }));
956    }
957
958    #[test]
959    fn block_drop_table() {
960        let v = ipv();
961        let result = v.verify("db_query", &json!({"input": "name'; DROP TABLE users"}));
962        assert!(matches!(result, VerificationResult::Block { .. }));
963    }
964
965    #[test]
966    fn block_path_traversal() {
967        let v = ipv();
968        let result = v.verify("read_file", &json!({"path": "../../../etc/passwd"}));
969        assert!(matches!(result, VerificationResult::Block { .. }));
970    }
971
972    #[test]
973    fn warn_on_localhost_url_field() {
974        // S2: SSRF warn only fires on URL-like fields.
975        let v = ipv();
976        let result = v.verify("http_get", &json!({"url": "http://localhost:8080/api"}));
977        assert!(matches!(result, VerificationResult::Warn { .. }));
978    }
979
980    #[test]
981    fn allow_localhost_in_non_url_field() {
982        // S2: localhost in a "text" field (not a URL field) must not warn.
983        let v = ipv();
984        assert_eq!(
985            v.verify(
986                "memory_search",
987                &json!({"query": "connect to http://localhost:8080"})
988            ),
989            VerificationResult::Allow
990        );
991    }
992
993    #[test]
994    fn warn_on_private_ip_url_field() {
995        let v = ipv();
996        let result = v.verify("fetch", &json!({"url": "http://192.168.1.1/admin"}));
997        assert!(matches!(result, VerificationResult::Warn { .. }));
998    }
999
1000    #[test]
1001    fn allow_localhost_when_allowlisted() {
1002        let config = InjectionVerifierConfig {
1003            allowlisted_urls: vec!["http://localhost:3000".to_string()],
1004            ..Default::default()
1005        };
1006        let v = InjectionPatternVerifier::new(&config);
1007        assert_eq!(
1008            v.verify("http_get", &json!({"url": "http://localhost:3000/api"})),
1009            VerificationResult::Allow
1010        );
1011    }
1012
1013    #[test]
1014    fn block_union_select_in_non_query_field() {
1015        let v = ipv();
1016        let result = v.verify(
1017            "db_query",
1018            &json!({"input": "id=1 UNION SELECT password FROM users"}),
1019        );
1020        assert!(matches!(result, VerificationResult::Block { .. }));
1021    }
1022
1023    #[test]
1024    fn allow_union_select_in_query_field() {
1025        // S2: "UNION SELECT" in a `query` field is a SQL discussion, not an injection.
1026        let v = ipv();
1027        assert_eq!(
1028            v.verify(
1029                "memory_search",
1030                &json!({"query": "id=1 UNION SELECT password FROM users"})
1031            ),
1032            VerificationResult::Allow
1033        );
1034    }
1035
1036    // --- FIX-1: Unicode normalization bypass ---
1037
1038    #[test]
1039    fn block_rm_rf_unicode_homoglyph() {
1040        // U+FF0F FULLWIDTH SOLIDUS looks like '/' and NFKC-normalizes to '/'.
1041        let v = dcv();
1042        // "rm -rf ／" where ／ is U+FF0F
1043        let result = v.verify("bash", &json!({"command": "rm -rf \u{FF0F}"}));
1044        assert!(matches!(result, VerificationResult::Block { .. }));
1045    }
1046
1047    // --- FIX-2: Path traversal in is_allowed_path ---
1048
1049    #[test]
1050    fn path_traversal_not_allowed_via_dotdot() {
1051        // `/tmp/build/../../etc` lexically resolves to `/etc`, NOT under `/tmp/build`.
1052        let config = DestructiveVerifierConfig {
1053            allowed_paths: vec!["/tmp/build".to_string()],
1054            ..Default::default()
1055        };
1056        let v = DestructiveCommandVerifier::new(&config);
1057        // Should be BLOCKED: resolved path is /etc, not under /tmp/build.
1058        let result = v.verify("bash", &json!({"command": "rm -rf /tmp/build/../../etc"}));
1059        assert!(matches!(result, VerificationResult::Block { .. }));
1060    }
1061
1062    #[test]
1063    fn allowed_path_with_dotdot_stays_in_allowed() {
1064        // `/tmp/build/sub/../artifacts` resolves to `/tmp/build/artifacts` — still allowed.
1065        let config = DestructiveVerifierConfig {
1066            allowed_paths: vec!["/tmp/build".to_string()],
1067            ..Default::default()
1068        };
1069        let v = DestructiveCommandVerifier::new(&config);
1070        assert_eq!(
1071            v.verify(
1072                "bash",
1073                &json!({"command": "rm -rf /tmp/build/sub/../artifacts"}),
1074            ),
1075            VerificationResult::Allow,
1076        );
1077    }
1078
1079    // --- FIX-3: Double-nested shell wrapping ---
1080
1081    #[test]
1082    fn double_nested_bash_c_blocked() {
1083        let v = dcv();
1084        let result = v.verify(
1085            "bash",
1086            &json!({"command": "bash -c \"bash -c 'rm -rf /'\""}),
1087        );
1088        assert!(matches!(result, VerificationResult::Block { .. }));
1089    }
1090
1091    #[test]
1092    fn env_prefix_stripping_blocked() {
1093        let v = dcv();
1094        let result = v.verify(
1095            "bash",
1096            &json!({"command": "env FOO=bar bash -c 'rm -rf /'"}),
1097        );
1098        assert!(matches!(result, VerificationResult::Block { .. }));
1099    }
1100
1101    #[test]
1102    fn exec_prefix_stripping_blocked() {
1103        let v = dcv();
1104        let result = v.verify("bash", &json!({"command": "exec bash -c 'rm -rf /'"}));
1105        assert!(matches!(result, VerificationResult::Block { .. }));
1106    }
1107
1108    // --- FIX-4: SSRF host extraction (not substring match) ---
1109
1110    #[test]
1111    fn ssrf_not_triggered_for_embedded_localhost_in_query_param() {
1112        // `evil.com/?r=http://localhost` — host is `evil.com`, not localhost.
1113        let v = ipv();
1114        let result = v.verify(
1115            "http_get",
1116            &json!({"url": "http://evil.com/?r=http://localhost"}),
1117        );
1118        // Should NOT warn — the actual request host is evil.com, not localhost.
1119        assert_eq!(result, VerificationResult::Allow);
1120    }
1121
1122    #[test]
1123    fn ssrf_triggered_for_bare_localhost_no_port() {
1124        // FIX-7: `http://localhost` with no trailing slash or port must warn.
1125        let v = ipv();
1126        let result = v.verify("http_get", &json!({"url": "http://localhost"}));
1127        assert!(matches!(result, VerificationResult::Warn { .. }));
1128    }
1129
1130    #[test]
1131    fn ssrf_triggered_for_localhost_with_path() {
1132        let v = ipv();
1133        let result = v.verify("http_get", &json!({"url": "http://localhost/api/v1"}));
1134        assert!(matches!(result, VerificationResult::Warn { .. }));
1135    }
1136
1137    // --- Verifier chain: first Block wins, Warn continues ---
1138
1139    #[test]
1140    fn chain_first_block_wins() {
1141        let dcv = DestructiveCommandVerifier::new(&DestructiveVerifierConfig::default());
1142        let ipv = InjectionPatternVerifier::new(&InjectionVerifierConfig::default());
1143        let verifiers: Vec<Box<dyn PreExecutionVerifier>> = vec![Box::new(dcv), Box::new(ipv)];
1144
1145        let args = json!({"command": "rm -rf /"});
1146        let mut result = VerificationResult::Allow;
1147        for v in &verifiers {
1148            result = v.verify("bash", &args);
1149            if matches!(result, VerificationResult::Block { .. }) {
1150                break;
1151            }
1152        }
1153        assert!(matches!(result, VerificationResult::Block { .. }));
1154    }
1155
1156    #[test]
1157    fn chain_warn_continues() {
1158        let dcv = DestructiveCommandVerifier::new(&DestructiveVerifierConfig::default());
1159        let ipv = InjectionPatternVerifier::new(&InjectionVerifierConfig::default());
1160        let verifiers: Vec<Box<dyn PreExecutionVerifier>> = vec![Box::new(dcv), Box::new(ipv)];
1161
1162        // localhost URL in `url` field: dcv allows, ipv warns, chain does NOT block.
1163        let args = json!({"url": "http://localhost:8080/api"});
1164        let mut got_warn = false;
1165        let mut got_block = false;
1166        for v in &verifiers {
1167            match v.verify("http_get", &args) {
1168                VerificationResult::Block { .. } => {
1169                    got_block = true;
1170                    break;
1171                }
1172                VerificationResult::Warn { .. } => {
1173                    got_warn = true;
1174                }
1175                VerificationResult::Allow => {}
1176            }
1177        }
1178        assert!(got_warn);
1179        assert!(!got_block);
1180    }
1181
1182    // --- UrlGroundingVerifier ---
1183
1184    fn ugv(urls: &[&str]) -> UrlGroundingVerifier {
1185        let set: HashSet<String> = urls.iter().map(|s| s.to_lowercase()).collect();
1186        UrlGroundingVerifier::new(
1187            &UrlGroundingVerifierConfig::default(),
1188            Arc::new(RwLock::new(set)),
1189        )
1190    }
1191
1192    #[test]
1193    fn url_grounding_allows_user_provided_url() {
1194        let v = ugv(&["https://docs.anthropic.com/models"]);
1195        assert_eq!(
1196            v.verify(
1197                "fetch",
1198                &json!({"url": "https://docs.anthropic.com/models"})
1199            ),
1200            VerificationResult::Allow
1201        );
1202    }
1203
1204    #[test]
1205    fn url_grounding_blocks_hallucinated_url() {
1206        let v = ugv(&["https://example.com/page"]);
1207        let result = v.verify(
1208            "fetch",
1209            &json!({"url": "https://api.anthropic.ai/v1/models"}),
1210        );
1211        assert!(matches!(result, VerificationResult::Block { .. }));
1212    }
1213
1214    #[test]
1215    fn url_grounding_blocks_when_no_user_urls_at_all() {
1216        let v = ugv(&[]);
1217        let result = v.verify(
1218            "fetch",
1219            &json!({"url": "https://api.anthropic.ai/v1/models"}),
1220        );
1221        assert!(matches!(result, VerificationResult::Block { .. }));
1222    }
1223
1224    #[test]
1225    fn url_grounding_allows_non_guarded_tool() {
1226        let v = ugv(&[]);
1227        assert_eq!(
1228            v.verify("read_file", &json!({"path": "/etc/hosts"})),
1229            VerificationResult::Allow
1230        );
1231    }
1232
1233    #[test]
1234    fn url_grounding_guards_fetch_suffix_tool() {
1235        let v = ugv(&[]);
1236        let result = v.verify("http_fetch", &json!({"url": "https://evil.com/"}));
1237        assert!(matches!(result, VerificationResult::Block { .. }));
1238    }
1239
1240    #[test]
1241    fn url_grounding_allows_web_scrape_with_provided_url() {
1242        let v = ugv(&["https://rust-lang.org/"]);
1243        assert_eq!(
1244            v.verify(
1245                "web_scrape",
1246                &json!({"url": "https://rust-lang.org/", "select": "h1"})
1247            ),
1248            VerificationResult::Allow
1249        );
1250    }
1251
1252    #[test]
1253    fn url_grounding_allows_prefix_match() {
1254        // User provided https://docs.rs/ — agent fetches a sub-path.
1255        let v = ugv(&["https://docs.rs/"]);
1256        assert_eq!(
1257            v.verify(
1258                "fetch",
1259                &json!({"url": "https://docs.rs/tokio/latest/tokio/"})
1260            ),
1261            VerificationResult::Allow
1262        );
1263    }
1264
1265    // --- Regression: #2191 — fetch URL hallucination ---
1266
1267    /// REG-2191-1: exact reproduction of the bug scenario.
1268    /// Agent asks "do you know Anthropic?" (no URL provided) and halluccinates
1269    /// `https://api.anthropic.ai/v1/models`. With an empty user_provided_urls set
1270    /// the fetch must be blocked.
1271    #[test]
1272    fn reg_2191_hallucinated_api_endpoint_blocked_with_empty_session() {
1273        // Simulate: user never sent any URL in the conversation.
1274        let v = ugv(&[]);
1275        let result = v.verify(
1276            "fetch",
1277            &json!({"url": "https://api.anthropic.ai/v1/models"}),
1278        );
1279        assert!(
1280            matches!(result, VerificationResult::Block { .. }),
1281            "fetch must be blocked when no user URL was provided — this is the #2191 regression"
1282        );
1283    }
1284
1285    /// REG-2191-2: passthrough — user explicitly pasted the URL, fetch must proceed.
1286    #[test]
1287    fn reg_2191_user_provided_url_allows_fetch() {
1288        let v = ugv(&["https://api.anthropic.com/v1/models"]);
1289        assert_eq!(
1290            v.verify(
1291                "fetch",
1292                &json!({"url": "https://api.anthropic.com/v1/models"}),
1293            ),
1294            VerificationResult::Allow,
1295            "fetch must be allowed when the URL was explicitly provided by the user"
1296        );
1297    }
1298
1299    /// REG-2191-3: web_scrape variant — same rejection for web_scrape tool.
1300    #[test]
1301    fn reg_2191_web_scrape_hallucinated_url_blocked() {
1302        let v = ugv(&[]);
1303        let result = v.verify(
1304            "web_scrape",
1305            &json!({"url": "https://api.anthropic.ai/v1/models", "select": "body"}),
1306        );
1307        assert!(
1308            matches!(result, VerificationResult::Block { .. }),
1309            "web_scrape must be blocked for hallucinated URL with empty user_provided_urls"
1310        );
1311    }
1312
1313    /// REG-2191-4: URL present only in an imagined system/assistant message context
1314    /// is NOT in user_provided_urls (the agent only populates from user messages).
1315    /// The verifier itself cannot distinguish message roles — it only sees the set
1316    /// populated by the agent. This test confirms: an empty set always blocks.
1317    #[test]
1318    fn reg_2191_empty_url_set_always_blocks_fetch() {
1319        // Whether the URL came from a system/assistant message or was never seen —
1320        // if user_provided_urls is empty, fetch must be blocked.
1321        let v = ugv(&[]);
1322        let result = v.verify(
1323            "fetch",
1324            &json!({"url": "https://docs.anthropic.com/something"}),
1325        );
1326        assert!(matches!(result, VerificationResult::Block { .. }));
1327    }
1328
1329    /// REG-2191-5: URL matching is case-insensitive — user pastes mixed-case URL.
1330    #[test]
1331    fn reg_2191_case_insensitive_url_match_allows_fetch() {
1332        // user_provided_urls stores lowercase; verify that the fetched URL with
1333        // different casing still matches.
1334        let v = ugv(&["https://Docs.Anthropic.COM/models"]);
1335        assert_eq!(
1336            v.verify(
1337                "fetch",
1338                &json!({"url": "https://docs.anthropic.com/models/detail"}),
1339            ),
1340            VerificationResult::Allow,
1341            "URL matching must be case-insensitive"
1342        );
1343    }
1344
1345    /// REG-2191-6: tool name ending in `_fetch` is auto-guarded regardless of config.
1346    /// An MCP-registered `anthropic_fetch` tool must not bypass the gate.
1347    #[test]
1348    fn reg_2191_mcp_fetch_suffix_tool_blocked_with_empty_session() {
1349        let v = ugv(&[]);
1350        let result = v.verify(
1351            "anthropic_fetch",
1352            &json!({"url": "https://api.anthropic.ai/v1/models"}),
1353        );
1354        assert!(
1355            matches!(result, VerificationResult::Block { .. }),
1356            "MCP tools ending in _fetch must be guarded even if not in guarded_tools list"
1357        );
1358    }
1359
1360    /// REG-2191-7: reverse prefix — user provided a specific URL, agent fetches
1361    /// the root. This is the "reverse prefix" case: user_url starts_with fetch_url.
1362    #[test]
1363    fn reg_2191_reverse_prefix_match_allows_fetch() {
1364        // User provided a deep URL; agent wants to fetch the root.
1365        // Allowed: user_url.starts_with(fetch_url).
1366        let v = ugv(&["https://docs.rs/tokio/latest/tokio/index.html"]);
1367        assert_eq!(
1368            v.verify("fetch", &json!({"url": "https://docs.rs/"})),
1369            VerificationResult::Allow,
1370            "reverse prefix: fetched URL is a prefix of user-provided URL — should be allowed"
1371        );
1372    }
1373
1374    /// REG-2191-8: completely different domain with same path prefix must be blocked.
1375    #[test]
1376    fn reg_2191_different_domain_blocked() {
1377        // User provided docs.rs, agent wants to fetch evil.com/docs.rs path — must block.
1378        let v = ugv(&["https://docs.rs/"]);
1379        let result = v.verify("fetch", &json!({"url": "https://evil.com/docs.rs/exfil"}));
1380        assert!(
1381            matches!(result, VerificationResult::Block { .. }),
1382            "different domain must not be allowed even if path looks similar"
1383        );
1384    }
1385
1386    /// REG-2191-9: args without a `url` field — verifier must not block (Allow).
1387    #[test]
1388    fn reg_2191_missing_url_field_allows_fetch() {
1389        // Some fetch-like tools may call with different arg names.
1390        // Verifier only checks the `url` field; missing field → Allow.
1391        let v = ugv(&[]);
1392        assert_eq!(
1393            v.verify(
1394                "fetch",
1395                &json!({"endpoint": "https://api.anthropic.ai/v1/models"})
1396            ),
1397            VerificationResult::Allow,
1398            "missing url field must not trigger blocking — only explicit url field is checked"
1399        );
1400    }
1401
1402    /// REG-2191-10: verifier disabled via config — all fetch calls pass through.
1403    #[test]
1404    fn reg_2191_disabled_verifier_allows_all() {
1405        let config = UrlGroundingVerifierConfig {
1406            enabled: false,
1407            guarded_tools: default_guarded_tools(),
1408        };
1409        // Note: the enabled flag is checked by the pipeline, not inside verify().
1410        // The pipeline skips disabled verifiers. This test documents that the struct
1411        // can be constructed with enabled=false (config round-trip).
1412        let set: HashSet<String> = HashSet::new();
1413        let v = UrlGroundingVerifier::new(&config, Arc::new(RwLock::new(set)));
1414        // verify() itself doesn't check enabled — the pipeline is responsible.
1415        // When called directly it will still block (the field has no effect here).
1416        // This is an API documentation test, not a behaviour test.
1417        let _ = v.verify("fetch", &json!({"url": "https://example.com/"}));
1418        // No assertion: just verifies the struct can be built with enabled=false.
1419    }
1420}
zeph_tools/verifier.rs

zeph_tools/
verifier.rs