1use std::collections::HashSet;
26use std::sync::{Arc, LazyLock, RwLock};
27
28use regex::Regex;
29use serde::{Deserialize, Serialize};
30use unicode_normalization::UnicodeNormalization as _;
31
32fn default_true() -> bool {
33 true
34}
35
36fn default_shell_tools() -> Vec<String> {
37 vec![
38 "bash".to_string(),
39 "shell".to_string(),
40 "terminal".to_string(),
41 ]
42}
43
44#[must_use]
46#[derive(Debug, Clone, PartialEq, Eq)]
47pub enum VerificationResult {
48 Allow,
50 Block { reason: String },
52 Warn { message: String },
55}
56
57pub trait PreExecutionVerifier: Send + Sync + std::fmt::Debug {
63 fn verify(&self, tool_name: &str, args: &serde_json::Value) -> VerificationResult;
65
66 fn name(&self) -> &'static str;
68}
69
70#[derive(Debug, Clone, Deserialize, Serialize)]
84pub struct DestructiveVerifierConfig {
85 #[serde(default = "default_true")]
86 pub enabled: bool,
87 #[serde(default)]
90 pub allowed_paths: Vec<String>,
91 #[serde(default)]
93 pub extra_patterns: Vec<String>,
94 #[serde(default = "default_shell_tools")]
97 pub shell_tools: Vec<String>,
98}
99
100impl Default for DestructiveVerifierConfig {
101 fn default() -> Self {
102 Self {
103 enabled: true,
104 allowed_paths: Vec::new(),
105 extra_patterns: Vec::new(),
106 shell_tools: default_shell_tools(),
107 }
108 }
109}
110
111#[derive(Debug, Clone, Deserialize, Serialize)]
113pub struct InjectionVerifierConfig {
114 #[serde(default = "default_true")]
115 pub enabled: bool,
116 #[serde(default)]
119 pub extra_patterns: Vec<String>,
120 #[serde(default)]
122 pub allowlisted_urls: Vec<String>,
123}
124
125impl Default for InjectionVerifierConfig {
126 fn default() -> Self {
127 Self {
128 enabled: true,
129 extra_patterns: Vec::new(),
130 allowlisted_urls: Vec::new(),
131 }
132 }
133}
134
135#[derive(Debug, Clone, Deserialize, Serialize)]
142pub struct UrlGroundingVerifierConfig {
143 #[serde(default = "default_true")]
144 pub enabled: bool,
145 #[serde(default = "default_guarded_tools")]
148 pub guarded_tools: Vec<String>,
149}
150
151fn default_guarded_tools() -> Vec<String> {
152 vec!["fetch".to_string(), "web_scrape".to_string()]
153}
154
155impl Default for UrlGroundingVerifierConfig {
156 fn default() -> Self {
157 Self {
158 enabled: true,
159 guarded_tools: default_guarded_tools(),
160 }
161 }
162}
163
164#[derive(Debug, Clone, Deserialize, Serialize)]
166pub struct PreExecutionVerifierConfig {
167 #[serde(default = "default_true")]
168 pub enabled: bool,
169 #[serde(default)]
170 pub destructive_commands: DestructiveVerifierConfig,
171 #[serde(default)]
172 pub injection_patterns: InjectionVerifierConfig,
173 #[serde(default)]
174 pub url_grounding: UrlGroundingVerifierConfig,
175}
176
177impl Default for PreExecutionVerifierConfig {
178 fn default() -> Self {
179 Self {
180 enabled: true,
181 destructive_commands: DestructiveVerifierConfig::default(),
182 injection_patterns: InjectionVerifierConfig::default(),
183 url_grounding: UrlGroundingVerifierConfig::default(),
184 }
185 }
186}
187
188static DESTRUCTIVE_PATTERNS: &[&str] = &[
197 "rm -rf /",
198 "rm -rf ~",
199 "rm -r /",
200 "dd if=",
201 "mkfs",
202 "fdisk",
203 "shred",
204 "wipefs",
205 ":(){ :|:& };:",
206 ":(){:|:&};:",
207 "chmod -r 777 /",
208 "chown -r",
209];
210
211#[derive(Debug)]
219pub struct DestructiveCommandVerifier {
220 shell_tools: Vec<String>,
221 allowed_paths: Vec<String>,
222 extra_patterns: Vec<String>,
223}
224
225impl DestructiveCommandVerifier {
226 #[must_use]
227 pub fn new(config: &DestructiveVerifierConfig) -> Self {
228 Self {
229 shell_tools: config
230 .shell_tools
231 .iter()
232 .map(|s| s.to_lowercase())
233 .collect(),
234 allowed_paths: config
235 .allowed_paths
236 .iter()
237 .map(|s| s.to_lowercase())
238 .collect(),
239 extra_patterns: config
240 .extra_patterns
241 .iter()
242 .map(|s| s.to_lowercase())
243 .collect(),
244 }
245 }
246
247 fn is_shell_tool(&self, tool_name: &str) -> bool {
248 let lower = tool_name.to_lowercase();
249 self.shell_tools.iter().any(|t| t == &lower)
250 }
251
252 fn extract_command(args: &serde_json::Value) -> Option<String> {
262 let raw = match args.get("command") {
263 Some(serde_json::Value::String(s)) => s.clone(),
264 Some(serde_json::Value::Array(arr)) => arr
265 .iter()
266 .filter_map(|v| v.as_str())
267 .collect::<Vec<_>>()
268 .join(" "),
269 _ => return None,
270 };
271 let mut current: String = raw.nfkc().collect::<String>().to_lowercase();
273 for _ in 0..8 {
276 let trimmed = current.trim().to_owned();
277 let after_env = Self::strip_env_prefix(&trimmed);
279 let after_exec = after_env.strip_prefix("exec ").map_or(after_env, str::trim);
281 let mut unwrapped = false;
283 for interp in &["bash -c ", "sh -c ", "zsh -c "] {
284 if let Some(rest) = after_exec.strip_prefix(interp) {
285 let script = rest.trim().trim_matches(|c: char| c == '\'' || c == '"');
286 current.clone_from(&script.to_owned());
287 unwrapped = true;
288 break;
289 }
290 }
291 if !unwrapped {
292 return Some(after_exec.to_owned());
293 }
294 }
295 Some(current)
296 }
297
298 fn strip_env_prefix(cmd: &str) -> &str {
301 let mut rest = cmd;
302 if let Some(after_env) = rest.strip_prefix("env ") {
304 rest = after_env.trim_start();
305 }
306 loop {
308 let mut chars = rest.chars();
310 let key_end = chars
311 .by_ref()
312 .take_while(|c| c.is_alphanumeric() || *c == '_')
313 .count();
314 if key_end == 0 {
315 break;
316 }
317 let remainder = &rest[key_end..];
318 if let Some(after_eq) = remainder.strip_prefix('=') {
319 let val_end = after_eq.find(' ').unwrap_or(after_eq.len());
321 rest = after_eq[val_end..].trim_start();
322 } else {
323 break;
324 }
325 }
326 rest
327 }
328
329 fn is_allowed_path(&self, command: &str) -> bool {
335 if self.allowed_paths.is_empty() {
336 return false;
337 }
338 let tokens: Vec<&str> = command.split_whitespace().collect();
339 for token in &tokens {
340 let t = token.trim_matches(|c| c == '\'' || c == '"');
341 if t.starts_with('/') || t.starts_with('~') || t.starts_with('.') {
342 let normalized = Self::lexical_normalize(std::path::Path::new(t));
343 let n_lower = normalized.to_string_lossy().to_lowercase();
344 if self
345 .allowed_paths
346 .iter()
347 .any(|p| n_lower.starts_with(p.as_str()))
348 {
349 return true;
350 }
351 }
352 }
353 false
354 }
355
356 fn lexical_normalize(p: &std::path::Path) -> std::path::PathBuf {
359 let mut out = std::path::PathBuf::new();
360 for component in p.components() {
361 match component {
362 std::path::Component::ParentDir => {
363 out.pop();
364 }
365 std::path::Component::CurDir => {}
366 other => out.push(other),
367 }
368 }
369 out
370 }
371
372 fn check_patterns(command: &str) -> Option<&'static str> {
373 DESTRUCTIVE_PATTERNS
374 .iter()
375 .find(|&pat| command.contains(pat))
376 .copied()
377 }
378
379 fn check_extra_patterns(&self, command: &str) -> Option<String> {
380 self.extra_patterns
381 .iter()
382 .find(|pat| command.contains(pat.as_str()))
383 .cloned()
384 }
385}
386
387impl PreExecutionVerifier for DestructiveCommandVerifier {
388 fn name(&self) -> &'static str {
389 "DestructiveCommandVerifier"
390 }
391
392 fn verify(&self, tool_name: &str, args: &serde_json::Value) -> VerificationResult {
393 if !self.is_shell_tool(tool_name) {
394 return VerificationResult::Allow;
395 }
396
397 let Some(command) = Self::extract_command(args) else {
398 return VerificationResult::Allow;
399 };
400
401 if let Some(pat) = Self::check_patterns(&command) {
402 if self.is_allowed_path(&command) {
403 return VerificationResult::Allow;
404 }
405 return VerificationResult::Block {
406 reason: format!("[{}] destructive pattern '{}' detected", self.name(), pat),
407 };
408 }
409
410 if let Some(pat) = self.check_extra_patterns(&command) {
411 if self.is_allowed_path(&command) {
412 return VerificationResult::Allow;
413 }
414 return VerificationResult::Block {
415 reason: format!(
416 "[{}] extra destructive pattern '{}' detected",
417 self.name(),
418 pat
419 ),
420 };
421 }
422
423 VerificationResult::Allow
424 }
425}
426
427static INJECTION_BLOCK_PATTERNS: LazyLock<Vec<Regex>> = LazyLock::new(|| {
437 [
438 r"(?i)'\s*OR\s*'1'\s*=\s*'1",
440 r"(?i)'\s*OR\s*1\s*=\s*1",
441 r"(?i);\s*DROP\s+TABLE",
442 r"(?i)UNION\s+SELECT",
443 r"(?i)'\s*;\s*SELECT",
444 r";\s*rm\s+",
446 r"\|\s*rm\s+",
447 r"&&\s*rm\s+",
448 r";\s*curl\s+",
449 r"\|\s*curl\s+",
450 r"&&\s*curl\s+",
451 r";\s*wget\s+",
452 r"\.\./\.\./\.\./etc/passwd",
454 r"\.\./\.\./\.\./etc/shadow",
455 r"\.\./\.\./\.\./windows/",
456 r"\.\.[/\\]\.\.[/\\]\.\.[/\\]",
457 ]
458 .iter()
459 .map(|s| Regex::new(s).expect("static pattern must compile"))
460 .collect()
461});
462
463static SSRF_HOST_PATTERNS: LazyLock<Vec<Regex>> = LazyLock::new(|| {
468 [
469 r"^localhost$",
471 r"^localhost:",
472 r"^127\.0\.0\.1$",
474 r"^127\.0\.0\.1:",
475 r"^\[::1\]$",
477 r"^\[::1\]:",
478 r"^169\.254\.169\.254$",
480 r"^169\.254\.169\.254:",
481 r"^10\.\d+\.\d+\.\d+$",
483 r"^10\.\d+\.\d+\.\d+:",
484 r"^172\.(1[6-9]|2\d|3[01])\.\d+\.\d+$",
485 r"^172\.(1[6-9]|2\d|3[01])\.\d+\.\d+:",
486 r"^192\.168\.\d+\.\d+$",
487 r"^192\.168\.\d+\.\d+:",
488 ]
489 .iter()
490 .map(|s| Regex::new(s).expect("static pattern must compile"))
491 .collect()
492});
493
494fn extract_url_host(url: &str) -> Option<&str> {
498 let after_scheme = url.split_once("://")?.1;
499 let host_end = after_scheme
500 .find(['/', '?', '#'])
501 .unwrap_or(after_scheme.len());
502 Some(&after_scheme[..host_end])
503}
504
505static URL_FIELD_NAMES: &[&str] = &["url", "endpoint", "uri", "href", "src", "host", "base_url"];
507
508static SAFE_QUERY_FIELDS: &[&str] = &["query", "q", "search", "text", "message", "content"];
512
513#[derive(Debug)]
534pub struct InjectionPatternVerifier {
535 extra_patterns: Vec<Regex>,
536 allowlisted_urls: Vec<String>,
537}
538
539impl InjectionPatternVerifier {
540 #[must_use]
541 pub fn new(config: &InjectionVerifierConfig) -> Self {
542 let extra_patterns = config
543 .extra_patterns
544 .iter()
545 .filter_map(|s| match Regex::new(s) {
546 Ok(re) => Some(re),
547 Err(e) => {
548 tracing::warn!(
549 pattern = %s,
550 error = %e,
551 "InjectionPatternVerifier: invalid extra_pattern, skipping"
552 );
553 None
554 }
555 })
556 .collect();
557
558 Self {
559 extra_patterns,
560 allowlisted_urls: config
561 .allowlisted_urls
562 .iter()
563 .map(|s| s.to_lowercase())
564 .collect(),
565 }
566 }
567
568 fn is_allowlisted(&self, text: &str) -> bool {
569 let lower = text.to_lowercase();
570 self.allowlisted_urls
571 .iter()
572 .any(|u| lower.contains(u.as_str()))
573 }
574
575 fn is_url_field(field: &str) -> bool {
576 let lower = field.to_lowercase();
577 URL_FIELD_NAMES.iter().any(|&f| f == lower)
578 }
579
580 fn is_safe_query_field(field: &str) -> bool {
581 let lower = field.to_lowercase();
582 SAFE_QUERY_FIELDS.iter().any(|&f| f == lower)
583 }
584
585 fn check_field_value(&self, field: &str, value: &str) -> VerificationResult {
587 let is_url = Self::is_url_field(field);
588 let is_safe_query = Self::is_safe_query_field(field);
589
590 if !is_safe_query {
592 for pat in INJECTION_BLOCK_PATTERNS.iter() {
593 if pat.is_match(value) {
594 return VerificationResult::Block {
595 reason: format!(
596 "[{}] injection pattern detected in field '{}': {}",
597 "InjectionPatternVerifier",
598 field,
599 pat.as_str()
600 ),
601 };
602 }
603 }
604 for pat in &self.extra_patterns {
605 if pat.is_match(value) {
606 return VerificationResult::Block {
607 reason: format!(
608 "[{}] extra injection pattern detected in field '{}': {}",
609 "InjectionPatternVerifier",
610 field,
611 pat.as_str()
612 ),
613 };
614 }
615 }
616 }
617
618 if is_url && let Some(host) = extract_url_host(value) {
622 for pat in SSRF_HOST_PATTERNS.iter() {
623 if pat.is_match(host) {
624 if self.is_allowlisted(value) {
625 return VerificationResult::Allow;
626 }
627 return VerificationResult::Warn {
628 message: format!(
629 "[{}] possible SSRF in field '{}': host '{}' matches pattern (not blocked)",
630 "InjectionPatternVerifier", field, host,
631 ),
632 };
633 }
634 }
635 }
636
637 VerificationResult::Allow
638 }
639
640 fn check_object(&self, obj: &serde_json::Map<String, serde_json::Value>) -> VerificationResult {
642 for (key, val) in obj {
643 let result = self.check_value(key, val);
644 if !matches!(result, VerificationResult::Allow) {
645 return result;
646 }
647 }
648 VerificationResult::Allow
649 }
650
651 fn check_value(&self, field: &str, val: &serde_json::Value) -> VerificationResult {
652 match val {
653 serde_json::Value::String(s) => self.check_field_value(field, s),
654 serde_json::Value::Array(arr) => {
655 for item in arr {
656 let r = self.check_value(field, item);
657 if !matches!(r, VerificationResult::Allow) {
658 return r;
659 }
660 }
661 VerificationResult::Allow
662 }
663 serde_json::Value::Object(obj) => self.check_object(obj),
664 _ => VerificationResult::Allow,
666 }
667 }
668}
669
670impl PreExecutionVerifier for InjectionPatternVerifier {
671 fn name(&self) -> &'static str {
672 "InjectionPatternVerifier"
673 }
674
675 fn verify(&self, _tool_name: &str, args: &serde_json::Value) -> VerificationResult {
676 match args {
677 serde_json::Value::Object(obj) => self.check_object(obj),
678 serde_json::Value::String(s) => self.check_field_value("_args", s),
680 _ => VerificationResult::Allow,
681 }
682 }
683}
684
685#[derive(Debug, Clone)]
704pub struct UrlGroundingVerifier {
705 guarded_tools: Vec<String>,
706 user_provided_urls: Arc<RwLock<HashSet<String>>>,
707}
708
709impl UrlGroundingVerifier {
710 #[must_use]
711 pub fn new(
712 config: &UrlGroundingVerifierConfig,
713 user_provided_urls: Arc<RwLock<HashSet<String>>>,
714 ) -> Self {
715 Self {
716 guarded_tools: config
717 .guarded_tools
718 .iter()
719 .map(|s| s.to_lowercase())
720 .collect(),
721 user_provided_urls,
722 }
723 }
724
725 fn is_guarded(&self, tool_name: &str) -> bool {
726 let lower = tool_name.to_lowercase();
727 self.guarded_tools.iter().any(|t| t == &lower) || lower.ends_with("_fetch")
728 }
729
730 fn is_grounded(url: &str, user_provided_urls: &HashSet<String>) -> bool {
733 let lower = url.to_lowercase();
734 user_provided_urls
735 .iter()
736 .any(|u| lower.starts_with(u.as_str()) || u.starts_with(lower.as_str()))
737 }
738}
739
740impl PreExecutionVerifier for UrlGroundingVerifier {
741 fn name(&self) -> &'static str {
742 "UrlGroundingVerifier"
743 }
744
745 fn verify(&self, tool_name: &str, args: &serde_json::Value) -> VerificationResult {
746 if !self.is_guarded(tool_name) {
747 return VerificationResult::Allow;
748 }
749
750 let Some(url) = args.get("url").and_then(|v| v.as_str()) else {
751 return VerificationResult::Allow;
752 };
753
754 let Ok(urls) = self.user_provided_urls.read() else {
755 return VerificationResult::Allow;
757 };
758
759 if Self::is_grounded(url, &urls) {
760 return VerificationResult::Allow;
761 }
762
763 VerificationResult::Block {
764 reason: format!(
765 "[UrlGroundingVerifier] fetch rejected: URL '{url}' was not provided by the user",
766 ),
767 }
768 }
769}
770
771#[cfg(test)]
776mod tests {
777 use serde_json::json;
778
779 use super::*;
780
781 fn dcv() -> DestructiveCommandVerifier {
784 DestructiveCommandVerifier::new(&DestructiveVerifierConfig::default())
785 }
786
787 #[test]
788 fn allow_normal_command() {
789 let v = dcv();
790 assert_eq!(
791 v.verify("bash", &json!({"command": "ls -la /tmp"})),
792 VerificationResult::Allow
793 );
794 }
795
796 #[test]
797 fn block_rm_rf_root() {
798 let v = dcv();
799 let result = v.verify("bash", &json!({"command": "rm -rf /"}));
800 assert!(matches!(result, VerificationResult::Block { .. }));
801 }
802
803 #[test]
804 fn block_dd_dev_zero() {
805 let v = dcv();
806 let result = v.verify("bash", &json!({"command": "dd if=/dev/zero of=/dev/sda"}));
807 assert!(matches!(result, VerificationResult::Block { .. }));
808 }
809
810 #[test]
811 fn block_mkfs() {
812 let v = dcv();
813 let result = v.verify("bash", &json!({"command": "mkfs.ext4 /dev/sda1"}));
814 assert!(matches!(result, VerificationResult::Block { .. }));
815 }
816
817 #[test]
818 fn allow_rm_rf_in_allowed_path() {
819 let config = DestructiveVerifierConfig {
820 allowed_paths: vec!["/tmp/build".to_string()],
821 ..Default::default()
822 };
823 let v = DestructiveCommandVerifier::new(&config);
824 assert_eq!(
825 v.verify("bash", &json!({"command": "rm -rf /tmp/build/artifacts"})),
826 VerificationResult::Allow
827 );
828 }
829
830 #[test]
831 fn block_rm_rf_when_not_in_allowed_path() {
832 let config = DestructiveVerifierConfig {
833 allowed_paths: vec!["/tmp/build".to_string()],
834 ..Default::default()
835 };
836 let v = DestructiveCommandVerifier::new(&config);
837 let result = v.verify("bash", &json!({"command": "rm -rf /home/user"}));
838 assert!(matches!(result, VerificationResult::Block { .. }));
839 }
840
841 #[test]
842 fn allow_non_shell_tool() {
843 let v = dcv();
844 assert_eq!(
845 v.verify("read_file", &json!({"path": "rm -rf /"})),
846 VerificationResult::Allow
847 );
848 }
849
850 #[test]
851 fn block_extra_pattern() {
852 let config = DestructiveVerifierConfig {
853 extra_patterns: vec!["format c:".to_string()],
854 ..Default::default()
855 };
856 let v = DestructiveCommandVerifier::new(&config);
857 let result = v.verify("bash", &json!({"command": "format c:"}));
858 assert!(matches!(result, VerificationResult::Block { .. }));
859 }
860
861 #[test]
862 fn array_args_normalization() {
863 let v = dcv();
864 let result = v.verify("bash", &json!({"command": ["rm", "-rf", "/"]}));
865 assert!(matches!(result, VerificationResult::Block { .. }));
866 }
867
868 #[test]
869 fn sh_c_wrapping_normalization() {
870 let v = dcv();
871 let result = v.verify("bash", &json!({"command": "bash -c 'rm -rf /'"}));
872 assert!(matches!(result, VerificationResult::Block { .. }));
873 }
874
875 #[test]
876 fn fork_bomb_blocked() {
877 let v = dcv();
878 let result = v.verify("bash", &json!({"command": ":(){ :|:& };:"}));
879 assert!(matches!(result, VerificationResult::Block { .. }));
880 }
881
882 #[test]
883 fn custom_shell_tool_name_blocked() {
884 let config = DestructiveVerifierConfig {
885 shell_tools: vec!["execute".to_string(), "run_command".to_string()],
886 ..Default::default()
887 };
888 let v = DestructiveCommandVerifier::new(&config);
889 let result = v.verify("execute", &json!({"command": "rm -rf /"}));
890 assert!(matches!(result, VerificationResult::Block { .. }));
891 }
892
893 #[test]
894 fn terminal_tool_name_blocked_by_default() {
895 let v = dcv();
896 let result = v.verify("terminal", &json!({"command": "rm -rf /"}));
897 assert!(matches!(result, VerificationResult::Block { .. }));
898 }
899
900 #[test]
901 fn default_shell_tools_contains_bash_shell_terminal() {
902 let config = DestructiveVerifierConfig::default();
903 let lower: Vec<String> = config
904 .shell_tools
905 .iter()
906 .map(|s| s.to_lowercase())
907 .collect();
908 assert!(lower.contains(&"bash".to_string()));
909 assert!(lower.contains(&"shell".to_string()));
910 assert!(lower.contains(&"terminal".to_string()));
911 }
912
913 fn ipv() -> InjectionPatternVerifier {
916 InjectionPatternVerifier::new(&InjectionVerifierConfig::default())
917 }
918
919 #[test]
920 fn allow_clean_args() {
921 let v = ipv();
922 assert_eq!(
923 v.verify("search", &json!({"query": "rust async traits"})),
924 VerificationResult::Allow
925 );
926 }
927
928 #[test]
929 fn allow_sql_discussion_in_query_field() {
930 let v = ipv();
932 assert_eq!(
933 v.verify(
934 "memory_search",
935 &json!({"query": "explain SQL UNION SELECT vs JOIN"})
936 ),
937 VerificationResult::Allow
938 );
939 }
940
941 #[test]
942 fn allow_sql_or_pattern_in_query_field() {
943 let v = ipv();
945 assert_eq!(
946 v.verify("memory_search", &json!({"query": "' OR '1'='1"})),
947 VerificationResult::Allow
948 );
949 }
950
951 #[test]
952 fn block_sql_injection_in_non_query_field() {
953 let v = ipv();
954 let result = v.verify("db_query", &json!({"sql": "' OR '1'='1"}));
955 assert!(matches!(result, VerificationResult::Block { .. }));
956 }
957
958 #[test]
959 fn block_drop_table() {
960 let v = ipv();
961 let result = v.verify("db_query", &json!({"input": "name'; DROP TABLE users"}));
962 assert!(matches!(result, VerificationResult::Block { .. }));
963 }
964
965 #[test]
966 fn block_path_traversal() {
967 let v = ipv();
968 let result = v.verify("read_file", &json!({"path": "../../../etc/passwd"}));
969 assert!(matches!(result, VerificationResult::Block { .. }));
970 }
971
972 #[test]
973 fn warn_on_localhost_url_field() {
974 let v = ipv();
976 let result = v.verify("http_get", &json!({"url": "http://localhost:8080/api"}));
977 assert!(matches!(result, VerificationResult::Warn { .. }));
978 }
979
980 #[test]
981 fn allow_localhost_in_non_url_field() {
982 let v = ipv();
984 assert_eq!(
985 v.verify(
986 "memory_search",
987 &json!({"query": "connect to http://localhost:8080"})
988 ),
989 VerificationResult::Allow
990 );
991 }
992
993 #[test]
994 fn warn_on_private_ip_url_field() {
995 let v = ipv();
996 let result = v.verify("fetch", &json!({"url": "http://192.168.1.1/admin"}));
997 assert!(matches!(result, VerificationResult::Warn { .. }));
998 }
999
1000 #[test]
1001 fn allow_localhost_when_allowlisted() {
1002 let config = InjectionVerifierConfig {
1003 allowlisted_urls: vec!["http://localhost:3000".to_string()],
1004 ..Default::default()
1005 };
1006 let v = InjectionPatternVerifier::new(&config);
1007 assert_eq!(
1008 v.verify("http_get", &json!({"url": "http://localhost:3000/api"})),
1009 VerificationResult::Allow
1010 );
1011 }
1012
1013 #[test]
1014 fn block_union_select_in_non_query_field() {
1015 let v = ipv();
1016 let result = v.verify(
1017 "db_query",
1018 &json!({"input": "id=1 UNION SELECT password FROM users"}),
1019 );
1020 assert!(matches!(result, VerificationResult::Block { .. }));
1021 }
1022
1023 #[test]
1024 fn allow_union_select_in_query_field() {
1025 let v = ipv();
1027 assert_eq!(
1028 v.verify(
1029 "memory_search",
1030 &json!({"query": "id=1 UNION SELECT password FROM users"})
1031 ),
1032 VerificationResult::Allow
1033 );
1034 }
1035
1036 #[test]
1039 fn block_rm_rf_unicode_homoglyph() {
1040 let v = dcv();
1042 let result = v.verify("bash", &json!({"command": "rm -rf \u{FF0F}"}));
1044 assert!(matches!(result, VerificationResult::Block { .. }));
1045 }
1046
1047 #[test]
1050 fn path_traversal_not_allowed_via_dotdot() {
1051 let config = DestructiveVerifierConfig {
1053 allowed_paths: vec!["/tmp/build".to_string()],
1054 ..Default::default()
1055 };
1056 let v = DestructiveCommandVerifier::new(&config);
1057 let result = v.verify("bash", &json!({"command": "rm -rf /tmp/build/../../etc"}));
1059 assert!(matches!(result, VerificationResult::Block { .. }));
1060 }
1061
1062 #[test]
1063 fn allowed_path_with_dotdot_stays_in_allowed() {
1064 let config = DestructiveVerifierConfig {
1066 allowed_paths: vec!["/tmp/build".to_string()],
1067 ..Default::default()
1068 };
1069 let v = DestructiveCommandVerifier::new(&config);
1070 assert_eq!(
1071 v.verify(
1072 "bash",
1073 &json!({"command": "rm -rf /tmp/build/sub/../artifacts"}),
1074 ),
1075 VerificationResult::Allow,
1076 );
1077 }
1078
1079 #[test]
1082 fn double_nested_bash_c_blocked() {
1083 let v = dcv();
1084 let result = v.verify(
1085 "bash",
1086 &json!({"command": "bash -c \"bash -c 'rm -rf /'\""}),
1087 );
1088 assert!(matches!(result, VerificationResult::Block { .. }));
1089 }
1090
1091 #[test]
1092 fn env_prefix_stripping_blocked() {
1093 let v = dcv();
1094 let result = v.verify(
1095 "bash",
1096 &json!({"command": "env FOO=bar bash -c 'rm -rf /'"}),
1097 );
1098 assert!(matches!(result, VerificationResult::Block { .. }));
1099 }
1100
1101 #[test]
1102 fn exec_prefix_stripping_blocked() {
1103 let v = dcv();
1104 let result = v.verify("bash", &json!({"command": "exec bash -c 'rm -rf /'"}));
1105 assert!(matches!(result, VerificationResult::Block { .. }));
1106 }
1107
1108 #[test]
1111 fn ssrf_not_triggered_for_embedded_localhost_in_query_param() {
1112 let v = ipv();
1114 let result = v.verify(
1115 "http_get",
1116 &json!({"url": "http://evil.com/?r=http://localhost"}),
1117 );
1118 assert_eq!(result, VerificationResult::Allow);
1120 }
1121
1122 #[test]
1123 fn ssrf_triggered_for_bare_localhost_no_port() {
1124 let v = ipv();
1126 let result = v.verify("http_get", &json!({"url": "http://localhost"}));
1127 assert!(matches!(result, VerificationResult::Warn { .. }));
1128 }
1129
1130 #[test]
1131 fn ssrf_triggered_for_localhost_with_path() {
1132 let v = ipv();
1133 let result = v.verify("http_get", &json!({"url": "http://localhost/api/v1"}));
1134 assert!(matches!(result, VerificationResult::Warn { .. }));
1135 }
1136
1137 #[test]
1140 fn chain_first_block_wins() {
1141 let dcv = DestructiveCommandVerifier::new(&DestructiveVerifierConfig::default());
1142 let ipv = InjectionPatternVerifier::new(&InjectionVerifierConfig::default());
1143 let verifiers: Vec<Box<dyn PreExecutionVerifier>> = vec![Box::new(dcv), Box::new(ipv)];
1144
1145 let args = json!({"command": "rm -rf /"});
1146 let mut result = VerificationResult::Allow;
1147 for v in &verifiers {
1148 result = v.verify("bash", &args);
1149 if matches!(result, VerificationResult::Block { .. }) {
1150 break;
1151 }
1152 }
1153 assert!(matches!(result, VerificationResult::Block { .. }));
1154 }
1155
1156 #[test]
1157 fn chain_warn_continues() {
1158 let dcv = DestructiveCommandVerifier::new(&DestructiveVerifierConfig::default());
1159 let ipv = InjectionPatternVerifier::new(&InjectionVerifierConfig::default());
1160 let verifiers: Vec<Box<dyn PreExecutionVerifier>> = vec![Box::new(dcv), Box::new(ipv)];
1161
1162 let args = json!({"url": "http://localhost:8080/api"});
1164 let mut got_warn = false;
1165 let mut got_block = false;
1166 for v in &verifiers {
1167 match v.verify("http_get", &args) {
1168 VerificationResult::Block { .. } => {
1169 got_block = true;
1170 break;
1171 }
1172 VerificationResult::Warn { .. } => {
1173 got_warn = true;
1174 }
1175 VerificationResult::Allow => {}
1176 }
1177 }
1178 assert!(got_warn);
1179 assert!(!got_block);
1180 }
1181
1182 fn ugv(urls: &[&str]) -> UrlGroundingVerifier {
1185 let set: HashSet<String> = urls.iter().map(|s| s.to_lowercase()).collect();
1186 UrlGroundingVerifier::new(
1187 &UrlGroundingVerifierConfig::default(),
1188 Arc::new(RwLock::new(set)),
1189 )
1190 }
1191
1192 #[test]
1193 fn url_grounding_allows_user_provided_url() {
1194 let v = ugv(&["https://docs.anthropic.com/models"]);
1195 assert_eq!(
1196 v.verify(
1197 "fetch",
1198 &json!({"url": "https://docs.anthropic.com/models"})
1199 ),
1200 VerificationResult::Allow
1201 );
1202 }
1203
1204 #[test]
1205 fn url_grounding_blocks_hallucinated_url() {
1206 let v = ugv(&["https://example.com/page"]);
1207 let result = v.verify(
1208 "fetch",
1209 &json!({"url": "https://api.anthropic.ai/v1/models"}),
1210 );
1211 assert!(matches!(result, VerificationResult::Block { .. }));
1212 }
1213
1214 #[test]
1215 fn url_grounding_blocks_when_no_user_urls_at_all() {
1216 let v = ugv(&[]);
1217 let result = v.verify(
1218 "fetch",
1219 &json!({"url": "https://api.anthropic.ai/v1/models"}),
1220 );
1221 assert!(matches!(result, VerificationResult::Block { .. }));
1222 }
1223
1224 #[test]
1225 fn url_grounding_allows_non_guarded_tool() {
1226 let v = ugv(&[]);
1227 assert_eq!(
1228 v.verify("read_file", &json!({"path": "/etc/hosts"})),
1229 VerificationResult::Allow
1230 );
1231 }
1232
1233 #[test]
1234 fn url_grounding_guards_fetch_suffix_tool() {
1235 let v = ugv(&[]);
1236 let result = v.verify("http_fetch", &json!({"url": "https://evil.com/"}));
1237 assert!(matches!(result, VerificationResult::Block { .. }));
1238 }
1239
1240 #[test]
1241 fn url_grounding_allows_web_scrape_with_provided_url() {
1242 let v = ugv(&["https://rust-lang.org/"]);
1243 assert_eq!(
1244 v.verify(
1245 "web_scrape",
1246 &json!({"url": "https://rust-lang.org/", "select": "h1"})
1247 ),
1248 VerificationResult::Allow
1249 );
1250 }
1251
1252 #[test]
1253 fn url_grounding_allows_prefix_match() {
1254 let v = ugv(&["https://docs.rs/"]);
1256 assert_eq!(
1257 v.verify(
1258 "fetch",
1259 &json!({"url": "https://docs.rs/tokio/latest/tokio/"})
1260 ),
1261 VerificationResult::Allow
1262 );
1263 }
1264
1265 #[test]
1272 fn reg_2191_hallucinated_api_endpoint_blocked_with_empty_session() {
1273 let v = ugv(&[]);
1275 let result = v.verify(
1276 "fetch",
1277 &json!({"url": "https://api.anthropic.ai/v1/models"}),
1278 );
1279 assert!(
1280 matches!(result, VerificationResult::Block { .. }),
1281 "fetch must be blocked when no user URL was provided — this is the #2191 regression"
1282 );
1283 }
1284
1285 #[test]
1287 fn reg_2191_user_provided_url_allows_fetch() {
1288 let v = ugv(&["https://api.anthropic.com/v1/models"]);
1289 assert_eq!(
1290 v.verify(
1291 "fetch",
1292 &json!({"url": "https://api.anthropic.com/v1/models"}),
1293 ),
1294 VerificationResult::Allow,
1295 "fetch must be allowed when the URL was explicitly provided by the user"
1296 );
1297 }
1298
1299 #[test]
1301 fn reg_2191_web_scrape_hallucinated_url_blocked() {
1302 let v = ugv(&[]);
1303 let result = v.verify(
1304 "web_scrape",
1305 &json!({"url": "https://api.anthropic.ai/v1/models", "select": "body"}),
1306 );
1307 assert!(
1308 matches!(result, VerificationResult::Block { .. }),
1309 "web_scrape must be blocked for hallucinated URL with empty user_provided_urls"
1310 );
1311 }
1312
1313 #[test]
1318 fn reg_2191_empty_url_set_always_blocks_fetch() {
1319 let v = ugv(&[]);
1322 let result = v.verify(
1323 "fetch",
1324 &json!({"url": "https://docs.anthropic.com/something"}),
1325 );
1326 assert!(matches!(result, VerificationResult::Block { .. }));
1327 }
1328
1329 #[test]
1331 fn reg_2191_case_insensitive_url_match_allows_fetch() {
1332 let v = ugv(&["https://Docs.Anthropic.COM/models"]);
1335 assert_eq!(
1336 v.verify(
1337 "fetch",
1338 &json!({"url": "https://docs.anthropic.com/models/detail"}),
1339 ),
1340 VerificationResult::Allow,
1341 "URL matching must be case-insensitive"
1342 );
1343 }
1344
1345 #[test]
1348 fn reg_2191_mcp_fetch_suffix_tool_blocked_with_empty_session() {
1349 let v = ugv(&[]);
1350 let result = v.verify(
1351 "anthropic_fetch",
1352 &json!({"url": "https://api.anthropic.ai/v1/models"}),
1353 );
1354 assert!(
1355 matches!(result, VerificationResult::Block { .. }),
1356 "MCP tools ending in _fetch must be guarded even if not in guarded_tools list"
1357 );
1358 }
1359
1360 #[test]
1363 fn reg_2191_reverse_prefix_match_allows_fetch() {
1364 let v = ugv(&["https://docs.rs/tokio/latest/tokio/index.html"]);
1367 assert_eq!(
1368 v.verify("fetch", &json!({"url": "https://docs.rs/"})),
1369 VerificationResult::Allow,
1370 "reverse prefix: fetched URL is a prefix of user-provided URL — should be allowed"
1371 );
1372 }
1373
1374 #[test]
1376 fn reg_2191_different_domain_blocked() {
1377 let v = ugv(&["https://docs.rs/"]);
1379 let result = v.verify("fetch", &json!({"url": "https://evil.com/docs.rs/exfil"}));
1380 assert!(
1381 matches!(result, VerificationResult::Block { .. }),
1382 "different domain must not be allowed even if path looks similar"
1383 );
1384 }
1385
1386 #[test]
1388 fn reg_2191_missing_url_field_allows_fetch() {
1389 let v = ugv(&[]);
1392 assert_eq!(
1393 v.verify(
1394 "fetch",
1395 &json!({"endpoint": "https://api.anthropic.ai/v1/models"})
1396 ),
1397 VerificationResult::Allow,
1398 "missing url field must not trigger blocking — only explicit url field is checked"
1399 );
1400 }
1401
1402 #[test]
1404 fn reg_2191_disabled_verifier_allows_all() {
1405 let config = UrlGroundingVerifierConfig {
1406 enabled: false,
1407 guarded_tools: default_guarded_tools(),
1408 };
1409 let set: HashSet<String> = HashSet::new();
1413 let v = UrlGroundingVerifier::new(&config, Arc::new(RwLock::new(set)));
1414 let _ = v.verify("fetch", &json!({"url": "https://example.com/"}));
1418 }
1420}