Skip to main content

wallfacer_core/run/
destructive.rs

1//! Destructive-tool detection.
2//!
3//! Phase C5 layers three signals, in order:
4//!
5//! 1. **MCP annotations.** If the server declares
6//!    `tool.annotations.destructive_hint == Some(true)` and does not also
7//!    set `read_only_hint == Some(true)`, the tool is destructive.
8//! 2. **Configurable patterns.** Operators can declare additional
9//!    destructive name patterns in `wallfacer.toml`:
10//!
11//!    ```toml
12//!    [destructive]
13//!    patterns = ["^remove_.*$", "^drop_.*$"]
14//!    ```
15//!
16//!    When `patterns` is empty, a default keyword list is used: `delete`,
17//!    `drop`, `destroy`, `truncate`, `kill`, `wipe`, `purge`, `reset`.
18//! 3. **Allowlist (regex).** `[allow_destructive] tools` is now a list of
19//!    regular expressions. A matching tool is permitted to be fuzzed even
20//!    when classified destructive.
21//!
22//! ```toml
23//! [allow_destructive]
24//! tools = ["^logs_.*$"]   # keep "logs_delete_old" in the fuzz set
25//! ```
26
27use regex::Regex;
28use rmcp::model::Tool;
29use thiserror::Error;
30
31use crate::target::{AllowDestructiveConfig, DestructiveConfig};
32
33/// Result of classifying a single tool.
34#[derive(Debug, Clone, PartialEq, Eq)]
35pub enum ToolClassification {
36    /// Tool is safe to fuzz.
37    Allowed,
38    /// Tool is destructive and not in the allowlist.
39    Destructive {
40        /// Why the tool was classified destructive.
41        reason: String,
42    },
43    /// Tool is destructive but matches the allowlist; fuzzing is permitted.
44    DestructiveButAllowed {
45        /// Why the tool was classified destructive.
46        reason: String,
47    },
48}
49
50impl ToolClassification {
51    /// Returns `true` if the classification means the tool is currently
52    /// safe to invoke.
53    pub fn is_runnable(&self) -> bool {
54        matches!(
55            self,
56            ToolClassification::Allowed | ToolClassification::DestructiveButAllowed { .. }
57        )
58    }
59
60    /// Returns the destructive reason, if any.
61    pub fn reason(&self) -> Option<&str> {
62        match self {
63            ToolClassification::Destructive { reason }
64            | ToolClassification::DestructiveButAllowed { reason } => Some(reason),
65            ToolClassification::Allowed => None,
66        }
67    }
68}
69
70/// Errors raised when configuration patterns fail to compile.
71#[derive(Debug, Error)]
72pub enum DestructiveError {
73    /// A regex in `[destructive] patterns` or `[allow_destructive] tools`
74    /// did not compile.
75    #[error("invalid regex `{pattern}`: {source}")]
76    InvalidRegex {
77        pattern: String,
78        #[source]
79        source: regex::Error,
80    },
81}
82
83/// Compiled detector reused across tool classifications.
84#[derive(Debug)]
85pub struct DestructiveDetector {
86    destructive_patterns: Vec<Regex>,
87    allow_patterns: Vec<Regex>,
88    use_default_keywords: bool,
89}
90
91const DEFAULT_KEYWORDS: &[&str] = &[
92    "delete", "drop", "destroy", "truncate", "kill", "wipe", "purge", "reset",
93];
94
95impl DestructiveDetector {
96    /// Compiles a detector from configuration. Returns an error if any
97    /// pattern fails to parse.
98    ///
99    /// Default keywords (`delete`, `drop`, ...) stay active unless the
100    /// operator opts out via `[destructive] replace_defaults = true`.
101    /// Older configs that simply added `patterns` used to silently lose
102    /// the built-in protection — that footgun is gone.
103    pub fn from_config(
104        destructive: &DestructiveConfig,
105        allow: &AllowDestructiveConfig,
106    ) -> Result<Self, DestructiveError> {
107        let destructive_patterns = compile_all(&destructive.patterns)?;
108        let allow_patterns = compile_all(&allow.tools)?;
109        Ok(Self {
110            destructive_patterns,
111            allow_patterns,
112            use_default_keywords: !destructive.replace_defaults,
113        })
114    }
115
116    /// Classifies a single tool.
117    pub fn classify(&self, tool: &Tool) -> ToolClassification {
118        let name = tool.name.as_ref();
119        let description = tool.description.as_deref();
120        let annotations_says_destructive = tool
121            .annotations
122            .as_ref()
123            .is_some_and(|a| a.destructive_hint == Some(true) && a.read_only_hint != Some(true));
124        let annotations_says_read_only = tool
125            .annotations
126            .as_ref()
127            .is_some_and(|a| a.read_only_hint == Some(true));
128
129        if annotations_says_read_only {
130            return ToolClassification::Allowed;
131        }
132
133        let reason = if annotations_says_destructive {
134            Some("annotations.destructive_hint == true".to_string())
135        } else if let Some(pattern) = self.match_destructive_pattern(name) {
136            Some(format!("name matches destructive pattern `{pattern}`"))
137        } else {
138            self.match_default_keyword(name, description)
139                .map(|keyword| format!("name/description contains keyword `{keyword}`"))
140        };
141
142        match reason {
143            None => ToolClassification::Allowed,
144            Some(reason) => {
145                if self.allow_patterns.iter().any(|r| r.is_match(name)) {
146                    ToolClassification::DestructiveButAllowed { reason }
147                } else {
148                    ToolClassification::Destructive { reason }
149                }
150            }
151        }
152    }
153
154    fn match_destructive_pattern(&self, name: &str) -> Option<String> {
155        self.destructive_patterns
156            .iter()
157            .find(|r| r.is_match(name))
158            .map(|r| r.as_str().to_string())
159    }
160
161    fn match_default_keyword(&self, name: &str, description: Option<&str>) -> Option<String> {
162        if !self.use_default_keywords {
163            return None;
164        }
165        let mut text = name.to_lowercase();
166        if let Some(description) = description {
167            text.push(' ');
168            text.push_str(&description.to_lowercase());
169        }
170        // Split on every non-alphanumeric (including underscores) so we
171        // catch keywords inside snake_case names like `logs_delete_old`.
172        for keyword in DEFAULT_KEYWORDS {
173            for word in text.split(|ch: char| !ch.is_ascii_alphanumeric()) {
174                if word.starts_with(keyword) {
175                    return Some((*keyword).to_string());
176                }
177            }
178        }
179        None
180    }
181}
182
183fn compile_all(patterns: &[String]) -> Result<Vec<Regex>, DestructiveError> {
184    patterns
185        .iter()
186        .map(|pattern| {
187            Regex::new(pattern).map_err(|source| DestructiveError::InvalidRegex {
188                pattern: pattern.clone(),
189                source,
190            })
191        })
192        .collect()
193}
194
195#[cfg(test)]
196#[allow(clippy::expect_used, clippy::unwrap_used, clippy::panic)]
197mod tests {
198    use super::*;
199    use rmcp::model::{Tool, ToolAnnotations};
200    use std::sync::Arc;
201
202    fn tool(name: &str, description: Option<&str>, annotations: Option<ToolAnnotations>) -> Tool {
203        let description = description.unwrap_or("test tool").to_string();
204        let mut tool = Tool::new(
205            name.to_string(),
206            description,
207            Arc::new(serde_json::Map::new()),
208        );
209        if let Some(annotations) = annotations {
210            tool = tool.annotate(annotations);
211        }
212        tool
213    }
214
215    fn empty_detector() -> DestructiveDetector {
216        DestructiveDetector::from_config(
217            &DestructiveConfig::default(),
218            &AllowDestructiveConfig::default(),
219        )
220        .expect("default config compiles")
221    }
222
223    #[test]
224    fn read_only_annotation_overrides_keywords() {
225        let detector = empty_detector();
226        let mut annotations = ToolAnnotations::default();
227        annotations.read_only_hint = Some(true);
228        let tool = tool("delete_user", None, Some(annotations));
229        assert_eq!(detector.classify(&tool), ToolClassification::Allowed);
230    }
231
232    #[test]
233    fn destructive_annotation_marks_tool_destructive() {
234        let detector = empty_detector();
235        let mut annotations = ToolAnnotations::default();
236        annotations.destructive_hint = Some(true);
237        let tool = tool("benign_name", None, Some(annotations));
238        let classification = detector.classify(&tool);
239        assert!(
240            matches!(classification, ToolClassification::Destructive { .. }),
241            "got {classification:?}"
242        );
243    }
244
245    #[test]
246    fn default_keywords_match_in_name() {
247        let detector = empty_detector();
248        let tool = tool("delete_user", None, None);
249        assert!(matches!(
250            detector.classify(&tool),
251            ToolClassification::Destructive { .. }
252        ));
253    }
254
255    #[test]
256    fn allowlist_regex_unblocks_destructive_tool() {
257        let allow = AllowDestructiveConfig {
258            tools: vec!["^logs_.*$".to_string()],
259        };
260        let detector =
261            DestructiveDetector::from_config(&DestructiveConfig::default(), &allow).unwrap();
262        let tool = tool("logs_delete_old", None, None);
263        let classification = detector.classify(&tool);
264        assert!(
265            matches!(
266                classification,
267                ToolClassification::DestructiveButAllowed { .. }
268            ),
269            "got {classification:?}"
270        );
271        assert!(classification.is_runnable());
272    }
273
274    #[test]
275    fn custom_patterns_layer_on_top_of_default_keywords() {
276        let destructive = DestructiveConfig {
277            patterns: vec!["^remove_.*$".to_string()],
278            replace_defaults: false,
279        };
280        let detector =
281            DestructiveDetector::from_config(&destructive, &AllowDestructiveConfig::default())
282                .unwrap();
283        // Both the default keyword (`delete_user`) and the custom
284        // pattern (`remove_*`) flag a tool as destructive: additive.
285        let by_default_keyword = tool("delete_user", None, None);
286        assert!(matches!(
287            detector.classify(&by_default_keyword),
288            ToolClassification::Destructive { .. }
289        ));
290        let by_custom_pattern = tool("remove_record", None, None);
291        assert!(matches!(
292            detector.classify(&by_custom_pattern),
293            ToolClassification::Destructive { .. }
294        ));
295    }
296
297    #[test]
298    fn replace_defaults_disables_built_in_keywords() {
299        let destructive = DestructiveConfig {
300            patterns: vec!["^remove_.*$".to_string()],
301            replace_defaults: true,
302        };
303        let detector =
304            DestructiveDetector::from_config(&destructive, &AllowDestructiveConfig::default())
305                .unwrap();
306        let now_benign = tool("delete_user", None, None);
307        assert_eq!(detector.classify(&now_benign), ToolClassification::Allowed);
308        let still_destructive = tool("remove_record", None, None);
309        assert!(matches!(
310            detector.classify(&still_destructive),
311            ToolClassification::Destructive { .. }
312        ));
313    }
314
315    #[test]
316    fn invalid_regex_surfaces_error() {
317        let allow = AllowDestructiveConfig {
318            tools: vec!["[unterminated".to_string()],
319        };
320        let result = DestructiveDetector::from_config(&DestructiveConfig::default(), &allow);
321        assert!(result.is_err());
322    }
323}