Skip to main content

wallfacer_core/run/
fuzz.rs

1//! Fuzz plan: generates payloads for each tool and reports the resulting
2//! findings.
3
4use std::time::Duration;
5
6use anyhow::{Context, Result};
7use rand::SeedableRng;
8use rand_chacha::ChaCha20Rng;
9use serde::Serialize;
10use serde_json::Value;
11
12use crate::{
13    client::CallOutcome,
14    corpus::Corpus,
15    finding::{Finding, FindingKind, ReproInfo},
16    mutate::{try_generate_payload, GenMode},
17    seed::{derive_seed, derive_seed_canonical},
18    target::SeverityConfig,
19};
20
21use super::{
22    destructive::DestructiveDetector,
23    exec::McpExec,
24    glob,
25    reporter::{Reporter, RunInfo},
26};
27
28/// Tools whose schema could not be exercised. Reasons are surfaced from
29/// [`crate::mutate::SkipReason`] formatted as a string.
30#[derive(Debug, Clone, Serialize)]
31pub struct SkippedTool {
32    /// Tool name.
33    pub tool: String,
34    /// Why we gave up (e.g. unresolved `$ref`).
35    pub reason: String,
36}
37
38/// Outcome of a fuzz run.
39///
40/// Phase E4: findings are streamed to the corpus and to the reporter
41/// during the run; this report carries only counts and the diagnostic
42/// lists (skipped, blocked) needed for exit-code logic and post-run
43/// summaries. Front-ends that need the findings themselves accumulate
44/// them via [`Reporter::on_finding`].
45#[derive(Debug, Default, Serialize)]
46pub struct FuzzReport {
47    /// Number of findings produced during the run.
48    pub findings_count: usize,
49    /// Tools we could not generate inputs for.
50    #[serde(skip_serializing_if = "Vec::is_empty")]
51    pub skipped: Vec<SkippedTool>,
52    /// Tools that were filtered out as destructive without an allowlist
53    /// match. Surfaced for visibility, not as findings.
54    #[serde(skip_serializing_if = "Vec::is_empty")]
55    pub blocked: Vec<String>,
56}
57
58/// Returned when a plan runs without errors. Distinct from `FuzzReport`
59/// only because dry-run mode does not produce a report.
60#[derive(Debug)]
61pub enum FuzzOutcome {
62    /// Tools that would be fuzzed; produced by [`FuzzPlan::dry_run`].
63    DryRun(Vec<String>),
64    /// Real fuzz results; produced by [`FuzzPlan::execute`].
65    Completed(FuzzReport),
66}
67
68/// A reproducible fuzz plan.
69#[derive(Debug)]
70pub struct FuzzPlan {
71    /// Number of payloads generated per tool.
72    pub iterations: u64,
73    /// Generation mode (Conform / Adversarial / Mixed).
74    pub mode: GenMode,
75    /// Master seed used to derive per-iteration seeds. The same seed
76    /// reproduces the same sequence of payloads.
77    pub master_seed: u64,
78    /// Glob patterns: empty = match every tool name.
79    pub include: Vec<String>,
80    /// Glob patterns excluded from the fuzz set. Always honored.
81    pub exclude: Vec<String>,
82    /// Cap on the number of tools after filtering. `None` = unlimited.
83    pub max_tools: Option<usize>,
84    /// Timeout applied to each `call_tool`.
85    pub timeout: Duration,
86    /// Transport label persisted in the [`ReproInfo`]. Plans don't open
87    /// the transport themselves, so we receive a stable name (`stdio` /
88    /// `http`) from the caller.
89    pub transport_name: String,
90    /// Compiled destructive-tool detector built from
91    /// `[destructive]` + `[allow_destructive]` config.
92    pub detector: DestructiveDetector,
93    /// `[severity]` overrides from `wallfacer.toml`. Applied to every
94    /// produced finding before it lands on disk.
95    pub severity: SeverityConfig,
96}
97
98impl FuzzPlan {
99    /// Returns the tool names that would be fuzzed, for `--dry-run`.
100    pub async fn dry_run<C: McpExec + ?Sized>(&self, client: &C) -> Result<Vec<String>> {
101        let (tools, _blocked) = self.select_tools(client).await?;
102        Ok(tools
103            .into_iter()
104            .map(|tool| tool.name.to_string())
105            .collect())
106    }
107
108    /// Drives the full fuzz loop, persisting findings to `corpus` and
109    /// notifying `reporter` along the way.
110    pub async fn execute<C: McpExec + ?Sized>(
111        self,
112        client: &mut C,
113        corpus: &Corpus,
114        reporter: &mut dyn Reporter,
115    ) -> Result<FuzzReport> {
116        let (tools, blocked) = self.select_tools(client).await?;
117        let total = tools.len() as u64 * self.iterations;
118        reporter.on_run_start(&RunInfo {
119            kind: "fuzz",
120            total_iterations: total,
121            tools: tools.iter().map(|tool| tool.name.to_string()).collect(),
122            blocked: blocked.clone(),
123            master_seed: Some(self.master_seed),
124        });
125
126        let mut report = FuzzReport {
127            findings_count: 0,
128            skipped: Vec::new(),
129            blocked,
130        };
131
132        for tool in tools {
133            let tool_name = tool.name.to_string();
134            let input_schema = Value::Object((*tool.input_schema).clone());
135            for iteration in 0..self.iterations {
136                reporter.on_iteration_start(&tool_name, iteration);
137
138                let seed = derive_seed(self.master_seed, &tool_name, iteration);
139                let canonical = derive_seed_canonical(self.master_seed, &tool_name, iteration);
140                let mut rng = ChaCha20Rng::from_seed(canonical);
141                let payload = match try_generate_payload(&input_schema, &mut rng, self.mode) {
142                    Ok(payload) => payload,
143                    Err(reason) => {
144                        let skip = SkippedTool {
145                            tool: tool_name.clone(),
146                            reason: reason.to_string(),
147                        };
148                        reporter.on_skipped(&skip.tool, &skip.reason);
149                        report.skipped.push(skip);
150                        // Bump remaining iterations on the reporter so the
151                        // progress bar accounts for the skipped tail.
152                        for i in (iteration + 1)..self.iterations {
153                            reporter.on_iteration_end(&tool_name, i);
154                        }
155                        break;
156                    }
157                };
158
159                let outcome = client
160                    .call_tool(&tool_name, payload.value.clone(), self.timeout)
161                    .await;
162                let kind_message_details: Option<(FindingKind, &str, String)> = match outcome {
163                    CallOutcome::Ok(_) => None,
164                    CallOutcome::Hang(duration) => Some((
165                        FindingKind::Hang {
166                            ms: duration.as_millis() as u64,
167                        },
168                        "tool call timed out",
169                        format!("timeout exceeded after {duration:?}"),
170                    )),
171                    CallOutcome::Crash(reason) => Some((
172                        FindingKind::Crash,
173                        "server crashed during tool call",
174                        reason,
175                    )),
176                    CallOutcome::ProtocolError(message) => Some((
177                        FindingKind::ProtocolError,
178                        "protocol error during tool call",
179                        message,
180                    )),
181                };
182
183                if let Some((kind, message, details)) = kind_message_details {
184                    let mut finding = Finding::new(
185                        kind,
186                        &tool_name,
187                        message,
188                        details,
189                        ReproInfo {
190                            seed,
191                            tool_call: payload.value,
192                            transport: self.transport_name.clone(),
193                            composition_trail: payload.trail,
194                        },
195                    );
196                    if let Some(override_sev) = self.severity.resolve(finding.kind.keyword()) {
197                        finding = finding.with_severity(override_sev);
198                    }
199                    corpus
200                        .write_finding(&finding)
201                        .with_context(|| format!("failed to persist finding for `{tool_name}`"))?;
202                    reporter.on_finding(&finding);
203                    report.findings_count += 1;
204                    client.reconnect().await.with_context(|| {
205                        format!("failed to reconnect after fault on `{tool_name}`")
206                    })?;
207                    reporter.on_iteration_end(&tool_name, iteration);
208                    break;
209                }
210
211                reporter.on_iteration_end(&tool_name, iteration);
212            }
213        }
214
215        reporter.on_run_end();
216        Ok(report)
217    }
218
219    async fn select_tools<C: McpExec + ?Sized>(
220        &self,
221        client: &C,
222    ) -> Result<(Vec<rmcp::model::Tool>, Vec<String>)> {
223        let all_tools = client
224            .list_tools()
225            .await
226            .context("failed to list tools from MCP server")?;
227        let mut blocked = Vec::new();
228        let mut tools: Vec<rmcp::model::Tool> = all_tools
229            .into_iter()
230            .filter(|tool| glob::matches_filters(tool.name.as_ref(), &self.include, &self.exclude))
231            .filter(|tool| {
232                let classification = self.detector.classify(tool);
233                if classification.is_runnable() {
234                    true
235                } else {
236                    blocked.push(tool.name.to_string());
237                    false
238                }
239            })
240            .collect();
241        if let Some(max_tools) = self.max_tools {
242            tools.truncate(max_tools);
243        }
244        Ok((tools, blocked))
245    }
246}
247
248#[cfg(test)]
249#[allow(clippy::expect_used, clippy::unwrap_used, clippy::panic)]
250mod tests {
251    use super::*;
252    use crate::run::exec::MockClient;
253    use crate::run::reporter::NoopReporter;
254    use crate::target::{AllowDestructiveConfig, DestructiveConfig};
255    use rmcp::model::Tool;
256    use serde_json::json;
257    use std::sync::Arc;
258
259    fn make_tool(name: &str, schema: Value) -> Tool {
260        let map = schema.as_object().cloned().unwrap_or_default();
261        Tool::new(name.to_string(), "test tool".to_string(), Arc::new(map))
262    }
263
264    fn detector() -> DestructiveDetector {
265        DestructiveDetector::from_config(
266            &DestructiveConfig::default(),
267            &AllowDestructiveConfig::default(),
268        )
269        .unwrap()
270    }
271
272    fn plan(detector: DestructiveDetector) -> FuzzPlan {
273        FuzzPlan {
274            iterations: 4,
275            mode: GenMode::Conform,
276            master_seed: 42,
277            include: Vec::new(),
278            exclude: Vec::new(),
279            max_tools: None,
280            timeout: Duration::from_secs(1),
281            transport_name: "mock".to_string(),
282            detector,
283            severity: SeverityConfig::default(),
284        }
285    }
286
287    #[tokio::test]
288    async fn fuzz_records_protocol_error_finding_and_reconnects() {
289        let tool = make_tool(
290            "echo",
291            json!({"type": "object", "properties": {"msg": {"type": "string"}}}),
292        );
293        let mut client = MockClient::new().register(tool, |_args| {
294            CallOutcome::ProtocolError("synthetic failure".to_string())
295        });
296
297        let tmp = tempfile::tempdir().unwrap();
298        let corpus = Corpus::new(tmp.path().join("corpus"));
299        let mut reporter = NoopReporter;
300
301        let report = plan(detector())
302            .execute(&mut client, &corpus, &mut reporter)
303            .await
304            .unwrap();
305        assert_eq!(report.findings_count, 1);
306        assert_eq!(client.reconnect_count(), 1);
307        assert!(report.skipped.is_empty());
308    }
309
310    #[tokio::test]
311    async fn fuzz_skips_tools_with_unresolvable_refs() {
312        let tool = make_tool(
313            "broken",
314            json!({"$ref": "https://external.example/schema.json"}),
315        );
316        let mut client = MockClient::new().register(tool, |_args| {
317            CallOutcome::Ok(rmcp::model::CallToolResult::success(vec![]))
318        });
319
320        let tmp = tempfile::tempdir().unwrap();
321        let corpus = Corpus::new(tmp.path().join("corpus"));
322        let mut reporter = NoopReporter;
323
324        let report = plan(detector())
325            .execute(&mut client, &corpus, &mut reporter)
326            .await
327            .unwrap();
328        assert_eq!(report.findings_count, 0);
329        assert_eq!(report.skipped.len(), 1);
330        assert!(report.skipped[0].reason.contains("external"));
331    }
332
333    #[tokio::test]
334    async fn fuzz_blocks_destructive_tools_unless_allowlisted() {
335        let destructive_tool = make_tool(
336            "delete_user",
337            json!({"type": "object", "properties": {"id": {"type": "string"}}}),
338        );
339        let safe_tool = make_tool(
340            "read_user",
341            json!({"type": "object", "properties": {"id": {"type": "string"}}}),
342        );
343        let mut client = MockClient::new()
344            .register(destructive_tool, |_| {
345                CallOutcome::Ok(rmcp::model::CallToolResult::success(vec![]))
346            })
347            .register(safe_tool, |_| {
348                CallOutcome::Ok(rmcp::model::CallToolResult::success(vec![]))
349            });
350
351        let tmp = tempfile::tempdir().unwrap();
352        let corpus = Corpus::new(tmp.path().join("corpus"));
353        let mut reporter = NoopReporter;
354        let report = plan(detector())
355            .execute(&mut client, &corpus, &mut reporter)
356            .await
357            .unwrap();
358        assert_eq!(report.blocked, vec!["delete_user".to_string()]);
359    }
360}