zeph_tools/
verifier.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Pre-execution verification for tool calls.
5//!
6//! Based on the `TrustBench` pattern (arXiv:2603.09157): intercept tool calls before
7//! execution to block or warn on destructive or injection patterns.
8//!
9//! ## Blocklist separation
10//!
11//! `DESTRUCTIVE_PATTERNS` (this module) is intentionally separate from
12//! `DEFAULT_BLOCKED_COMMANDS` in `shell.rs`. The two lists serve different purposes:
13//!
14//! - `DEFAULT_BLOCKED_COMMANDS` — shell safety net: prevents the *shell executor* from
15//!   running network tools (`curl`, `wget`, `nc`) and a few destructive commands.
16//!   It is applied at tool-execution time by `ShellExecutor`.
17//!
18//! - `DESTRUCTIVE_PATTERNS` — pre-execution guard: targets filesystem/system destruction
19//!   commands (disk formats, wipefs, fork bombs, recursive permission changes).
20//!   It runs *before* dispatch, in the LLM-call hot path, and must not be conflated
21//!   with the shell safety net to avoid accidental allow-listing via config drift.
22//!
23//! Overlap (3 entries: `rm -rf /`, `mkfs`, `dd if=`) is intentional — belt-and-suspenders.
24
25use std::sync::LazyLock;
26
27use regex::Regex;
28use serde::{Deserialize, Serialize};
29use unicode_normalization::UnicodeNormalization as _;
30
31fn default_true() -> bool {
32    true
33}
34
35fn default_shell_tools() -> Vec<String> {
36    vec![
37        "bash".to_string(),
38        "shell".to_string(),
39        "terminal".to_string(),
40    ]
41}
42
43/// Result of a pre-execution verification check.
44#[must_use]
45#[derive(Debug, Clone, PartialEq, Eq)]
46pub enum VerificationResult {
47    /// Tool call is safe to proceed.
48    Allow,
49    /// Tool call must be blocked. Executor returns an error to the LLM.
50    Block { reason: String },
51    /// Tool call proceeds but a warning is logged and tracked in metrics (metrics-only,
52    /// not visible to the LLM or user beyond the TUI security panel).
53    Warn { message: String },
54}
55
56/// Pre-execution verification trait. Implementations intercept tool calls
57/// before the executor runs them. Based on `TrustBench` pattern (arXiv:2603.09157).
58///
59/// Sync by design: verifiers inspect arguments only — no I/O needed.
60/// Object-safe: uses `&self` and returns a concrete enum.
61pub trait PreExecutionVerifier: Send + Sync + std::fmt::Debug {
62    /// Verify whether a tool call should proceed.
63    fn verify(&self, tool_name: &str, args: &serde_json::Value) -> VerificationResult;
64
65    /// Human-readable name for logging and TUI display.
66    fn name(&self) -> &'static str;
67}
68
69// ---------------------------------------------------------------------------
70// Config types
71// ---------------------------------------------------------------------------
72
73/// Configuration for the destructive command verifier.
74///
75/// `allowed_paths`: when **empty** (the default), ALL destructive commands are denied.
76/// This is a conservative default: to allow e.g. `rm -rf /tmp/build` you must
77/// explicitly add `/tmp/build` to `allowed_paths`.
78///
79/// `shell_tools`: the set of tool names considered shell executors. Defaults to
80/// `["bash", "shell", "terminal"]`. Add custom names here if your setup registers
81/// shell tools under different names (e.g., via MCP or ACP integrations).
82#[derive(Debug, Clone, Deserialize, Serialize)]
83pub struct DestructiveVerifierConfig {
84    #[serde(default = "default_true")]
85    pub enabled: bool,
86    /// Explicit path prefixes under which destructive commands are permitted.
87    /// **Empty = deny-all destructive commands** (safest default).
88    #[serde(default)]
89    pub allowed_paths: Vec<String>,
90    /// Additional command patterns to treat as destructive (substring match).
91    #[serde(default)]
92    pub extra_patterns: Vec<String>,
93    /// Tool names to treat as shell executors (case-insensitive).
94    /// Default: `["bash", "shell", "terminal"]`.
95    #[serde(default = "default_shell_tools")]
96    pub shell_tools: Vec<String>,
97}
98
99impl Default for DestructiveVerifierConfig {
100    fn default() -> Self {
101        Self {
102            enabled: true,
103            allowed_paths: Vec::new(),
104            extra_patterns: Vec::new(),
105            shell_tools: default_shell_tools(),
106        }
107    }
108}
109
110/// Configuration for the injection pattern verifier.
111#[derive(Debug, Clone, Deserialize, Serialize)]
112pub struct InjectionVerifierConfig {
113    #[serde(default = "default_true")]
114    pub enabled: bool,
115    /// Additional injection patterns to block (regex strings).
116    /// Invalid regexes are logged at WARN level and skipped.
117    #[serde(default)]
118    pub extra_patterns: Vec<String>,
119    /// URLs explicitly permitted even if they match SSRF patterns.
120    #[serde(default)]
121    pub allowlisted_urls: Vec<String>,
122}
123
124impl Default for InjectionVerifierConfig {
125    fn default() -> Self {
126        Self {
127            enabled: true,
128            extra_patterns: Vec::new(),
129            allowlisted_urls: Vec::new(),
130        }
131    }
132}
133
134/// Top-level configuration for all pre-execution verifiers.
135#[derive(Debug, Clone, Deserialize, Serialize)]
136pub struct PreExecutionVerifierConfig {
137    #[serde(default = "default_true")]
138    pub enabled: bool,
139    #[serde(default)]
140    pub destructive_commands: DestructiveVerifierConfig,
141    #[serde(default)]
142    pub injection_patterns: InjectionVerifierConfig,
143}
144
145impl Default for PreExecutionVerifierConfig {
146    fn default() -> Self {
147        Self {
148            enabled: true,
149            destructive_commands: DestructiveVerifierConfig::default(),
150            injection_patterns: InjectionVerifierConfig::default(),
151        }
152    }
153}
154
155// ---------------------------------------------------------------------------
156// DestructiveCommandVerifier
157// ---------------------------------------------------------------------------
158
159/// Destructive command patterns for `DestructiveCommandVerifier`.
160///
161/// Intentionally separate from `DEFAULT_BLOCKED_COMMANDS` in `shell.rs` — see module
162/// docs for the semantic distinction between the two lists.
163static DESTRUCTIVE_PATTERNS: &[&str] = &[
164    "rm -rf /",
165    "rm -rf ~",
166    "rm -r /",
167    "dd if=",
168    "mkfs",
169    "fdisk",
170    "shred",
171    "wipefs",
172    ":(){ :|:& };:",
173    ":(){:|:&};:",
174    "chmod -r 777 /",
175    "chown -r",
176];
177
178/// Verifier that blocks destructive shell commands (e.g., `rm -rf /`, `dd`, `mkfs`)
179/// before the shell tool executes them.
180///
181/// Applies to any tool whose name is in the configured `shell_tools` set (default:
182/// `["bash", "shell", "terminal"]`). For commands targeting a specific path, execution
183/// is allowed when the path starts with one of the configured `allowed_paths`. When
184/// `allowed_paths` is empty (the default), **all** matching destructive commands are blocked.
185#[derive(Debug)]
186pub struct DestructiveCommandVerifier {
187    shell_tools: Vec<String>,
188    allowed_paths: Vec<String>,
189    extra_patterns: Vec<String>,
190}
191
192impl DestructiveCommandVerifier {
193    #[must_use]
194    pub fn new(config: &DestructiveVerifierConfig) -> Self {
195        Self {
196            shell_tools: config
197                .shell_tools
198                .iter()
199                .map(|s| s.to_lowercase())
200                .collect(),
201            allowed_paths: config
202                .allowed_paths
203                .iter()
204                .map(|s| s.to_lowercase())
205                .collect(),
206            extra_patterns: config
207                .extra_patterns
208                .iter()
209                .map(|s| s.to_lowercase())
210                .collect(),
211        }
212    }
213
214    fn is_shell_tool(&self, tool_name: &str) -> bool {
215        let lower = tool_name.to_lowercase();
216        self.shell_tools.iter().any(|t| t == &lower)
217    }
218
219    /// Extract the effective command string from `args`.
220    ///
221    /// Supports:
222    /// - `{"command": "rm -rf /"}` (string)
223    /// - `{"command": ["rm", "-rf", "/"]}` (array — joined with spaces)
224    /// - `{"command": "bash -c 'rm -rf /'"}` (shell `-c` unwrapping, looped up to 8 levels)
225    /// - `env VAR=val bash -c '...'` and `exec bash -c '...'` prefix stripping
226    ///
227    /// NFKC-normalizes the result to defeat Unicode homoglyph bypasses.
228    fn extract_command(args: &serde_json::Value) -> Option<String> {
229        let raw = match args.get("command") {
230            Some(serde_json::Value::String(s)) => s.clone(),
231            Some(serde_json::Value::Array(arr)) => arr
232                .iter()
233                .filter_map(|v| v.as_str())
234                .collect::<Vec<_>>()
235                .join(" "),
236            _ => return None,
237        };
238        // NFKC-normalize + lowercase to defeat Unicode homoglyph and case bypasses.
239        let mut current: String = raw.nfkc().collect::<String>().to_lowercase();
240        // Loop: strip shell wrapper prefixes up to 8 levels deep.
241        // Handles double-nested: `bash -c "bash -c 'rm -rf /'"`.
242        for _ in 0..8 {
243            let trimmed = current.trim().to_owned();
244            // Strip `env VAR=value ... CMD` prefix (one or more VAR=value tokens).
245            let after_env = Self::strip_env_prefix(&trimmed);
246            // Strip `exec ` prefix.
247            let after_exec = after_env.strip_prefix("exec ").map_or(after_env, str::trim);
248            // Strip interpreter wrapper: `bash -c '...'` / `sh -c '...'` / `zsh -c '...'`.
249            let mut unwrapped = false;
250            for interp in &["bash -c ", "sh -c ", "zsh -c "] {
251                if let Some(rest) = after_exec.strip_prefix(interp) {
252                    let script = rest.trim().trim_matches(|c: char| c == '\'' || c == '"');
253                    current.clone_from(&script.to_owned());
254                    unwrapped = true;
255                    break;
256                }
257            }
258            if !unwrapped {
259                return Some(after_exec.to_owned());
260            }
261        }
262        Some(current)
263    }
264
265    /// Strip leading `env VAR=value` tokens from a command string.
266    /// Returns the remainder after all `KEY=VALUE` pairs are consumed.
267    fn strip_env_prefix(cmd: &str) -> &str {
268        let mut rest = cmd;
269        // `env` keyword is optional; strip it if present.
270        if let Some(after_env) = rest.strip_prefix("env ") {
271            rest = after_env.trim_start();
272        }
273        // Consume `KEY=VALUE` tokens.
274        loop {
275            // A VAR=value token: identifier chars + '=' + non-space chars.
276            let mut chars = rest.chars();
277            let key_end = chars
278                .by_ref()
279                .take_while(|c| c.is_alphanumeric() || *c == '_')
280                .count();
281            if key_end == 0 {
282                break;
283            }
284            let remainder = &rest[key_end..];
285            if let Some(after_eq) = remainder.strip_prefix('=') {
286                // Consume the value (up to the first space).
287                let val_end = after_eq.find(' ').unwrap_or(after_eq.len());
288                rest = after_eq[val_end..].trim_start();
289            } else {
290                break;
291            }
292        }
293        rest
294    }
295
296    /// Returns `true` if `command` targets a path that is covered by `allowed_paths`.
297    ///
298    /// Uses lexical normalization (resolves `..` and `.` without filesystem access)
299    /// so that `/tmp/build/../../etc` is correctly resolved to `/etc` before comparison,
300    /// defeating path traversal bypasses like `/tmp/build/../../etc/passwd`.
301    fn is_allowed_path(&self, command: &str) -> bool {
302        if self.allowed_paths.is_empty() {
303            return false;
304        }
305        let tokens: Vec<&str> = command.split_whitespace().collect();
306        for token in &tokens {
307            let t = token.trim_matches(|c| c == '\'' || c == '"');
308            if t.starts_with('/') || t.starts_with('~') || t.starts_with('.') {
309                let normalized = Self::lexical_normalize(std::path::Path::new(t));
310                let n_lower = normalized.to_string_lossy().to_lowercase();
311                if self
312                    .allowed_paths
313                    .iter()
314                    .any(|p| n_lower.starts_with(p.as_str()))
315                {
316                    return true;
317                }
318            }
319        }
320        false
321    }
322
323    /// Lexically normalize a path by resolving `.` and `..` components without
324    /// hitting the filesystem. Does not require the path to exist.
325    fn lexical_normalize(p: &std::path::Path) -> std::path::PathBuf {
326        let mut out = std::path::PathBuf::new();
327        for component in p.components() {
328            match component {
329                std::path::Component::ParentDir => {
330                    out.pop();
331                }
332                std::path::Component::CurDir => {}
333                other => out.push(other),
334            }
335        }
336        out
337    }
338
339    fn check_patterns(command: &str) -> Option<&'static str> {
340        DESTRUCTIVE_PATTERNS
341            .iter()
342            .find(|&pat| command.contains(pat))
343            .copied()
344    }
345
346    fn check_extra_patterns(&self, command: &str) -> Option<String> {
347        self.extra_patterns
348            .iter()
349            .find(|pat| command.contains(pat.as_str()))
350            .cloned()
351    }
352}
353
354impl PreExecutionVerifier for DestructiveCommandVerifier {
355    fn name(&self) -> &'static str {
356        "DestructiveCommandVerifier"
357    }
358
359    fn verify(&self, tool_name: &str, args: &serde_json::Value) -> VerificationResult {
360        if !self.is_shell_tool(tool_name) {
361            return VerificationResult::Allow;
362        }
363
364        let Some(command) = Self::extract_command(args) else {
365            return VerificationResult::Allow;
366        };
367
368        if let Some(pat) = Self::check_patterns(&command) {
369            if self.is_allowed_path(&command) {
370                return VerificationResult::Allow;
371            }
372            return VerificationResult::Block {
373                reason: format!("[{}] destructive pattern '{}' detected", self.name(), pat),
374            };
375        }
376
377        if let Some(pat) = self.check_extra_patterns(&command) {
378            if self.is_allowed_path(&command) {
379                return VerificationResult::Allow;
380            }
381            return VerificationResult::Block {
382                reason: format!(
383                    "[{}] extra destructive pattern '{}' detected",
384                    self.name(),
385                    pat
386                ),
387            };
388        }
389
390        VerificationResult::Allow
391    }
392}
393
394// ---------------------------------------------------------------------------
395// InjectionPatternVerifier
396// ---------------------------------------------------------------------------
397
398/// High-confidence injection block patterns applied to string field values in tool args.
399///
400/// These require *structural* patterns, not just keywords — e.g., `UNION SELECT` is
401/// blocked but a plain mention of "SELECT" is not. This avoids false positives for
402/// `memory_search` queries discussing SQL or coding assistants writing SQL examples.
403static INJECTION_BLOCK_PATTERNS: LazyLock<Vec<Regex>> = LazyLock::new(|| {
404    [
405        // SQL injection structural patterns
406        r"(?i)'\s*OR\s*'1'\s*=\s*'1",
407        r"(?i)'\s*OR\s*1\s*=\s*1",
408        r"(?i);\s*DROP\s+TABLE",
409        r"(?i)UNION\s+SELECT",
410        r"(?i)'\s*;\s*SELECT",
411        // Command injection via shell metacharacters with dangerous commands
412        r";\s*rm\s+",
413        r"\|\s*rm\s+",
414        r"&&\s*rm\s+",
415        r";\s*curl\s+",
416        r"\|\s*curl\s+",
417        r"&&\s*curl\s+",
418        r";\s*wget\s+",
419        // Path traversal to sensitive system files
420        r"\.\./\.\./\.\./etc/passwd",
421        r"\.\./\.\./\.\./etc/shadow",
422        r"\.\./\.\./\.\./windows/",
423        r"\.\.[/\\]\.\.[/\\]\.\.[/\\]",
424    ]
425    .iter()
426    .map(|s| Regex::new(s).expect("static pattern must compile"))
427    .collect()
428});
429
430/// SSRF host patterns — matched against the *extracted host* (not the full URL string).
431/// This prevents bypasses like `http://evil.com/?r=http://localhost` where the SSRF
432/// target appears only in a query parameter, not as the actual request host.
433/// Bare hostnames (no port/path) are included alongside `host:port` variants.
434static SSRF_HOST_PATTERNS: LazyLock<Vec<Regex>> = LazyLock::new(|| {
435    [
436        // localhost — with or without port
437        r"^localhost$",
438        r"^localhost:",
439        // IPv4 loopback
440        r"^127\.0\.0\.1$",
441        r"^127\.0\.0\.1:",
442        // IPv6 loopback
443        r"^\[::1\]$",
444        r"^\[::1\]:",
445        // AWS metadata service
446        r"^169\.254\.169\.254$",
447        r"^169\.254\.169\.254:",
448        // RFC-1918 private ranges
449        r"^10\.\d+\.\d+\.\d+$",
450        r"^10\.\d+\.\d+\.\d+:",
451        r"^172\.(1[6-9]|2\d|3[01])\.\d+\.\d+$",
452        r"^172\.(1[6-9]|2\d|3[01])\.\d+\.\d+:",
453        r"^192\.168\.\d+\.\d+$",
454        r"^192\.168\.\d+\.\d+:",
455    ]
456    .iter()
457    .map(|s| Regex::new(s).expect("static pattern must compile"))
458    .collect()
459});
460
461/// Extract the host (and optional port) from a URL string.
462/// Returns the portion between `://` and the next `/`, `?`, `#`, or end of string.
463/// If the URL has no scheme, returns `None`.
464fn extract_url_host(url: &str) -> Option<&str> {
465    let after_scheme = url.split_once("://")?.1;
466    let host_end = after_scheme
467        .find(['/', '?', '#'])
468        .unwrap_or(after_scheme.len());
469    Some(&after_scheme[..host_end])
470}
471
472/// Field names that suggest URL/endpoint content — SSRF patterns are applied here.
473static URL_FIELD_NAMES: &[&str] = &["url", "endpoint", "uri", "href", "src", "host", "base_url"];
474
475/// Field names that are known to carry user-provided text queries — SQL injection and
476/// command injection patterns are skipped for these fields to avoid false positives.
477/// Examples: `memory_search(query=...)`, `web_search(query=...)`.
478static SAFE_QUERY_FIELDS: &[&str] = &["query", "q", "search", "text", "message", "content"];
479
480/// Verifier that blocks tool arguments containing SQL injection, command injection,
481/// or path traversal patterns. Applies to ALL tools using field-aware matching.
482///
483/// ## Field-aware matching
484///
485/// Rather than serialising all args to a flat string (which causes false positives),
486/// this verifier iterates over each string-valued field and applies pattern categories
487/// based on field semantics:
488///
489/// - `SAFE_QUERY_FIELDS` (`query`, `q`, `search`, `text`, …): injection patterns are
490///   **skipped** — these fields contain user-provided text and generate too many false
491///   positives for SQL/command discussions in chat.
492/// - `URL_FIELD_NAMES` (`url`, `endpoint`, `uri`, …): SSRF patterns are applied.
493/// - All other string fields: injection + path traversal patterns are applied.
494///
495/// ## Warn semantics
496///
497/// `VerificationResult::Warn` is metrics-only — the tool call proceeds, a WARN log
498/// entry is emitted, and the TUI security panel counter increments. The LLM does not
499/// see the warning in its tool result.
500#[derive(Debug)]
501pub struct InjectionPatternVerifier {
502    extra_patterns: Vec<Regex>,
503    allowlisted_urls: Vec<String>,
504}
505
506impl InjectionPatternVerifier {
507    #[must_use]
508    pub fn new(config: &InjectionVerifierConfig) -> Self {
509        let extra_patterns = config
510            .extra_patterns
511            .iter()
512            .filter_map(|s| match Regex::new(s) {
513                Ok(re) => Some(re),
514                Err(e) => {
515                    tracing::warn!(
516                        pattern = %s,
517                        error = %e,
518                        "InjectionPatternVerifier: invalid extra_pattern, skipping"
519                    );
520                    None
521                }
522            })
523            .collect();
524
525        Self {
526            extra_patterns,
527            allowlisted_urls: config
528                .allowlisted_urls
529                .iter()
530                .map(|s| s.to_lowercase())
531                .collect(),
532        }
533    }
534
535    fn is_allowlisted(&self, text: &str) -> bool {
536        let lower = text.to_lowercase();
537        self.allowlisted_urls
538            .iter()
539            .any(|u| lower.contains(u.as_str()))
540    }
541
542    fn is_url_field(field: &str) -> bool {
543        let lower = field.to_lowercase();
544        URL_FIELD_NAMES.iter().any(|&f| f == lower)
545    }
546
547    fn is_safe_query_field(field: &str) -> bool {
548        let lower = field.to_lowercase();
549        SAFE_QUERY_FIELDS.iter().any(|&f| f == lower)
550    }
551
552    /// Check a single string value from a named field.
553    fn check_field_value(&self, field: &str, value: &str) -> VerificationResult {
554        let is_url = Self::is_url_field(field);
555        let is_safe_query = Self::is_safe_query_field(field);
556
557        // Injection + path traversal: skip safe query fields (user text), apply elsewhere.
558        if !is_safe_query {
559            for pat in INJECTION_BLOCK_PATTERNS.iter() {
560                if pat.is_match(value) {
561                    return VerificationResult::Block {
562                        reason: format!(
563                            "[{}] injection pattern detected in field '{}': {}",
564                            "InjectionPatternVerifier",
565                            field,
566                            pat.as_str()
567                        ),
568                    };
569                }
570            }
571            for pat in &self.extra_patterns {
572                if pat.is_match(value) {
573                    return VerificationResult::Block {
574                        reason: format!(
575                            "[{}] extra injection pattern detected in field '{}': {}",
576                            "InjectionPatternVerifier",
577                            field,
578                            pat.as_str()
579                        ),
580                    };
581                }
582            }
583        }
584
585        // SSRF: apply only to URL-like fields.
586        // Extract the host first so that SSRF targets embedded in query parameters
587        // (e.g. `http://evil.com/?r=http://localhost`) are not falsely matched.
588        if is_url && let Some(host) = extract_url_host(value) {
589            for pat in SSRF_HOST_PATTERNS.iter() {
590                if pat.is_match(host) {
591                    if self.is_allowlisted(value) {
592                        return VerificationResult::Allow;
593                    }
594                    return VerificationResult::Warn {
595                        message: format!(
596                            "[{}] possible SSRF in field '{}': host '{}' matches pattern (not blocked)",
597                            "InjectionPatternVerifier", field, host,
598                        ),
599                    };
600                }
601            }
602        }
603
604        VerificationResult::Allow
605    }
606
607    /// Walk all string leaf values in a JSON object, collecting field names for context.
608    fn check_object(&self, obj: &serde_json::Map<String, serde_json::Value>) -> VerificationResult {
609        for (key, val) in obj {
610            let result = self.check_value(key, val);
611            if !matches!(result, VerificationResult::Allow) {
612                return result;
613            }
614        }
615        VerificationResult::Allow
616    }
617
618    fn check_value(&self, field: &str, val: &serde_json::Value) -> VerificationResult {
619        match val {
620            serde_json::Value::String(s) => self.check_field_value(field, s),
621            serde_json::Value::Array(arr) => {
622                for item in arr {
623                    let r = self.check_value(field, item);
624                    if !matches!(r, VerificationResult::Allow) {
625                        return r;
626                    }
627                }
628                VerificationResult::Allow
629            }
630            serde_json::Value::Object(obj) => self.check_object(obj),
631            // Non-string primitives (numbers, booleans, null) cannot contain injection.
632            _ => VerificationResult::Allow,
633        }
634    }
635}
636
637impl PreExecutionVerifier for InjectionPatternVerifier {
638    fn name(&self) -> &'static str {
639        "InjectionPatternVerifier"
640    }
641
642    fn verify(&self, _tool_name: &str, args: &serde_json::Value) -> VerificationResult {
643        match args {
644            serde_json::Value::Object(obj) => self.check_object(obj),
645            // Flat string args (unusual but handle gracefully — treat as unnamed field).
646            serde_json::Value::String(s) => self.check_field_value("_args", s),
647            _ => VerificationResult::Allow,
648        }
649    }
650}
651
652// ---------------------------------------------------------------------------
653// Tests
654// ---------------------------------------------------------------------------
655
656#[cfg(test)]
657mod tests {
658    use serde_json::json;
659
660    use super::*;
661
662    // --- DestructiveCommandVerifier ---
663
664    fn dcv() -> DestructiveCommandVerifier {
665        DestructiveCommandVerifier::new(&DestructiveVerifierConfig::default())
666    }
667
668    #[test]
669    fn allow_normal_command() {
670        let v = dcv();
671        assert_eq!(
672            v.verify("bash", &json!({"command": "ls -la /tmp"})),
673            VerificationResult::Allow
674        );
675    }
676
677    #[test]
678    fn block_rm_rf_root() {
679        let v = dcv();
680        let result = v.verify("bash", &json!({"command": "rm -rf /"}));
681        assert!(matches!(result, VerificationResult::Block { .. }));
682    }
683
684    #[test]
685    fn block_dd_dev_zero() {
686        let v = dcv();
687        let result = v.verify("bash", &json!({"command": "dd if=/dev/zero of=/dev/sda"}));
688        assert!(matches!(result, VerificationResult::Block { .. }));
689    }
690
691    #[test]
692    fn block_mkfs() {
693        let v = dcv();
694        let result = v.verify("bash", &json!({"command": "mkfs.ext4 /dev/sda1"}));
695        assert!(matches!(result, VerificationResult::Block { .. }));
696    }
697
698    #[test]
699    fn allow_rm_rf_in_allowed_path() {
700        let config = DestructiveVerifierConfig {
701            allowed_paths: vec!["/tmp/build".to_string()],
702            ..Default::default()
703        };
704        let v = DestructiveCommandVerifier::new(&config);
705        assert_eq!(
706            v.verify("bash", &json!({"command": "rm -rf /tmp/build/artifacts"})),
707            VerificationResult::Allow
708        );
709    }
710
711    #[test]
712    fn block_rm_rf_when_not_in_allowed_path() {
713        let config = DestructiveVerifierConfig {
714            allowed_paths: vec!["/tmp/build".to_string()],
715            ..Default::default()
716        };
717        let v = DestructiveCommandVerifier::new(&config);
718        let result = v.verify("bash", &json!({"command": "rm -rf /home/user"}));
719        assert!(matches!(result, VerificationResult::Block { .. }));
720    }
721
722    #[test]
723    fn allow_non_shell_tool() {
724        let v = dcv();
725        assert_eq!(
726            v.verify("read_file", &json!({"path": "rm -rf /"})),
727            VerificationResult::Allow
728        );
729    }
730
731    #[test]
732    fn block_extra_pattern() {
733        let config = DestructiveVerifierConfig {
734            extra_patterns: vec!["format c:".to_string()],
735            ..Default::default()
736        };
737        let v = DestructiveCommandVerifier::new(&config);
738        let result = v.verify("bash", &json!({"command": "format c:"}));
739        assert!(matches!(result, VerificationResult::Block { .. }));
740    }
741
742    #[test]
743    fn array_args_normalization() {
744        let v = dcv();
745        let result = v.verify("bash", &json!({"command": ["rm", "-rf", "/"]}));
746        assert!(matches!(result, VerificationResult::Block { .. }));
747    }
748
749    #[test]
750    fn sh_c_wrapping_normalization() {
751        let v = dcv();
752        let result = v.verify("bash", &json!({"command": "bash -c 'rm -rf /'"}));
753        assert!(matches!(result, VerificationResult::Block { .. }));
754    }
755
756    #[test]
757    fn fork_bomb_blocked() {
758        let v = dcv();
759        let result = v.verify("bash", &json!({"command": ":(){ :|:& };:"}));
760        assert!(matches!(result, VerificationResult::Block { .. }));
761    }
762
763    #[test]
764    fn custom_shell_tool_name_blocked() {
765        let config = DestructiveVerifierConfig {
766            shell_tools: vec!["execute".to_string(), "run_command".to_string()],
767            ..Default::default()
768        };
769        let v = DestructiveCommandVerifier::new(&config);
770        let result = v.verify("execute", &json!({"command": "rm -rf /"}));
771        assert!(matches!(result, VerificationResult::Block { .. }));
772    }
773
774    #[test]
775    fn terminal_tool_name_blocked_by_default() {
776        let v = dcv();
777        let result = v.verify("terminal", &json!({"command": "rm -rf /"}));
778        assert!(matches!(result, VerificationResult::Block { .. }));
779    }
780
781    #[test]
782    fn default_shell_tools_contains_bash_shell_terminal() {
783        let config = DestructiveVerifierConfig::default();
784        let lower: Vec<String> = config
785            .shell_tools
786            .iter()
787            .map(|s| s.to_lowercase())
788            .collect();
789        assert!(lower.contains(&"bash".to_string()));
790        assert!(lower.contains(&"shell".to_string()));
791        assert!(lower.contains(&"terminal".to_string()));
792    }
793
794    // --- InjectionPatternVerifier ---
795
796    fn ipv() -> InjectionPatternVerifier {
797        InjectionPatternVerifier::new(&InjectionVerifierConfig::default())
798    }
799
800    #[test]
801    fn allow_clean_args() {
802        let v = ipv();
803        assert_eq!(
804            v.verify("search", &json!({"query": "rust async traits"})),
805            VerificationResult::Allow
806        );
807    }
808
809    #[test]
810    fn allow_sql_discussion_in_query_field() {
811        // S2: memory_search with SQL discussion must NOT be blocked.
812        let v = ipv();
813        assert_eq!(
814            v.verify(
815                "memory_search",
816                &json!({"query": "explain SQL UNION SELECT vs JOIN"})
817            ),
818            VerificationResult::Allow
819        );
820    }
821
822    #[test]
823    fn allow_sql_or_pattern_in_query_field() {
824        // S2: safe query field must not trigger SQL injection pattern.
825        let v = ipv();
826        assert_eq!(
827            v.verify("memory_search", &json!({"query": "' OR '1'='1"})),
828            VerificationResult::Allow
829        );
830    }
831
832    #[test]
833    fn block_sql_injection_in_non_query_field() {
834        let v = ipv();
835        let result = v.verify("db_query", &json!({"sql": "' OR '1'='1"}));
836        assert!(matches!(result, VerificationResult::Block { .. }));
837    }
838
839    #[test]
840    fn block_drop_table() {
841        let v = ipv();
842        let result = v.verify("db_query", &json!({"input": "name'; DROP TABLE users"}));
843        assert!(matches!(result, VerificationResult::Block { .. }));
844    }
845
846    #[test]
847    fn block_path_traversal() {
848        let v = ipv();
849        let result = v.verify("read_file", &json!({"path": "../../../etc/passwd"}));
850        assert!(matches!(result, VerificationResult::Block { .. }));
851    }
852
853    #[test]
854    fn warn_on_localhost_url_field() {
855        // S2: SSRF warn only fires on URL-like fields.
856        let v = ipv();
857        let result = v.verify("http_get", &json!({"url": "http://localhost:8080/api"}));
858        assert!(matches!(result, VerificationResult::Warn { .. }));
859    }
860
861    #[test]
862    fn allow_localhost_in_non_url_field() {
863        // S2: localhost in a "text" field (not a URL field) must not warn.
864        let v = ipv();
865        assert_eq!(
866            v.verify(
867                "memory_search",
868                &json!({"query": "connect to http://localhost:8080"})
869            ),
870            VerificationResult::Allow
871        );
872    }
873
874    #[test]
875    fn warn_on_private_ip_url_field() {
876        let v = ipv();
877        let result = v.verify("fetch", &json!({"url": "http://192.168.1.1/admin"}));
878        assert!(matches!(result, VerificationResult::Warn { .. }));
879    }
880
881    #[test]
882    fn allow_localhost_when_allowlisted() {
883        let config = InjectionVerifierConfig {
884            allowlisted_urls: vec!["http://localhost:3000".to_string()],
885            ..Default::default()
886        };
887        let v = InjectionPatternVerifier::new(&config);
888        assert_eq!(
889            v.verify("http_get", &json!({"url": "http://localhost:3000/api"})),
890            VerificationResult::Allow
891        );
892    }
893
894    #[test]
895    fn block_union_select_in_non_query_field() {
896        let v = ipv();
897        let result = v.verify(
898            "db_query",
899            &json!({"input": "id=1 UNION SELECT password FROM users"}),
900        );
901        assert!(matches!(result, VerificationResult::Block { .. }));
902    }
903
904    #[test]
905    fn allow_union_select_in_query_field() {
906        // S2: "UNION SELECT" in a `query` field is a SQL discussion, not an injection.
907        let v = ipv();
908        assert_eq!(
909            v.verify(
910                "memory_search",
911                &json!({"query": "id=1 UNION SELECT password FROM users"})
912            ),
913            VerificationResult::Allow
914        );
915    }
916
917    // --- FIX-1: Unicode normalization bypass ---
918
919    #[test]
920    fn block_rm_rf_unicode_homoglyph() {
921        // U+FF0F FULLWIDTH SOLIDUS looks like '/' and NFKC-normalizes to '/'.
922        let v = dcv();
923        // "rm -rf ／" where ／ is U+FF0F
924        let result = v.verify("bash", &json!({"command": "rm -rf \u{FF0F}"}));
925        assert!(matches!(result, VerificationResult::Block { .. }));
926    }
927
928    // --- FIX-2: Path traversal in is_allowed_path ---
929
930    #[test]
931    fn path_traversal_not_allowed_via_dotdot() {
932        // `/tmp/build/../../etc` lexically resolves to `/etc`, NOT under `/tmp/build`.
933        let config = DestructiveVerifierConfig {
934            allowed_paths: vec!["/tmp/build".to_string()],
935            ..Default::default()
936        };
937        let v = DestructiveCommandVerifier::new(&config);
938        // Should be BLOCKED: resolved path is /etc, not under /tmp/build.
939        let result = v.verify("bash", &json!({"command": "rm -rf /tmp/build/../../etc"}));
940        assert!(matches!(result, VerificationResult::Block { .. }));
941    }
942
943    #[test]
944    fn allowed_path_with_dotdot_stays_in_allowed() {
945        // `/tmp/build/sub/../artifacts` resolves to `/tmp/build/artifacts` — still allowed.
946        let config = DestructiveVerifierConfig {
947            allowed_paths: vec!["/tmp/build".to_string()],
948            ..Default::default()
949        };
950        let v = DestructiveCommandVerifier::new(&config);
951        assert_eq!(
952            v.verify(
953                "bash",
954                &json!({"command": "rm -rf /tmp/build/sub/../artifacts"}),
955            ),
956            VerificationResult::Allow,
957        );
958    }
959
960    // --- FIX-3: Double-nested shell wrapping ---
961
962    #[test]
963    fn double_nested_bash_c_blocked() {
964        let v = dcv();
965        let result = v.verify(
966            "bash",
967            &json!({"command": "bash -c \"bash -c 'rm -rf /'\""}),
968        );
969        assert!(matches!(result, VerificationResult::Block { .. }));
970    }
971
972    #[test]
973    fn env_prefix_stripping_blocked() {
974        let v = dcv();
975        let result = v.verify(
976            "bash",
977            &json!({"command": "env FOO=bar bash -c 'rm -rf /'"}),
978        );
979        assert!(matches!(result, VerificationResult::Block { .. }));
980    }
981
982    #[test]
983    fn exec_prefix_stripping_blocked() {
984        let v = dcv();
985        let result = v.verify("bash", &json!({"command": "exec bash -c 'rm -rf /'"}));
986        assert!(matches!(result, VerificationResult::Block { .. }));
987    }
988
989    // --- FIX-4: SSRF host extraction (not substring match) ---
990
991    #[test]
992    fn ssrf_not_triggered_for_embedded_localhost_in_query_param() {
993        // `evil.com/?r=http://localhost` — host is `evil.com`, not localhost.
994        let v = ipv();
995        let result = v.verify(
996            "http_get",
997            &json!({"url": "http://evil.com/?r=http://localhost"}),
998        );
999        // Should NOT warn — the actual request host is evil.com, not localhost.
1000        assert_eq!(result, VerificationResult::Allow);
1001    }
1002
1003    #[test]
1004    fn ssrf_triggered_for_bare_localhost_no_port() {
1005        // FIX-7: `http://localhost` with no trailing slash or port must warn.
1006        let v = ipv();
1007        let result = v.verify("http_get", &json!({"url": "http://localhost"}));
1008        assert!(matches!(result, VerificationResult::Warn { .. }));
1009    }
1010
1011    #[test]
1012    fn ssrf_triggered_for_localhost_with_path() {
1013        let v = ipv();
1014        let result = v.verify("http_get", &json!({"url": "http://localhost/api/v1"}));
1015        assert!(matches!(result, VerificationResult::Warn { .. }));
1016    }
1017
1018    // --- Verifier chain: first Block wins, Warn continues ---
1019
1020    #[test]
1021    fn chain_first_block_wins() {
1022        let dcv = DestructiveCommandVerifier::new(&DestructiveVerifierConfig::default());
1023        let ipv = InjectionPatternVerifier::new(&InjectionVerifierConfig::default());
1024        let verifiers: Vec<Box<dyn PreExecutionVerifier>> = vec![Box::new(dcv), Box::new(ipv)];
1025
1026        let args = json!({"command": "rm -rf /"});
1027        let mut result = VerificationResult::Allow;
1028        for v in &verifiers {
1029            result = v.verify("bash", &args);
1030            if matches!(result, VerificationResult::Block { .. }) {
1031                break;
1032            }
1033        }
1034        assert!(matches!(result, VerificationResult::Block { .. }));
1035    }
1036
1037    #[test]
1038    fn chain_warn_continues() {
1039        let dcv = DestructiveCommandVerifier::new(&DestructiveVerifierConfig::default());
1040        let ipv = InjectionPatternVerifier::new(&InjectionVerifierConfig::default());
1041        let verifiers: Vec<Box<dyn PreExecutionVerifier>> = vec![Box::new(dcv), Box::new(ipv)];
1042
1043        // localhost URL in `url` field: dcv allows, ipv warns, chain does NOT block.
1044        let args = json!({"url": "http://localhost:8080/api"});
1045        let mut got_warn = false;
1046        let mut got_block = false;
1047        for v in &verifiers {
1048            match v.verify("http_get", &args) {
1049                VerificationResult::Block { .. } => {
1050                    got_block = true;
1051                    break;
1052                }
1053                VerificationResult::Warn { .. } => {
1054                    got_warn = true;
1055                }
1056                VerificationResult::Allow => {}
1057            }
1058        }
1059        assert!(got_warn);
1060        assert!(!got_block);
1061    }
1062}
zeph_tools/verifier.rs

zeph_tools/
verifier.rs