Skip to main content

wallfacer_core/run/
fuzz.rs

1//! Fuzz plan: generates payloads for each tool and reports the resulting
2//! findings.
3
4use std::time::Duration;
5
6use anyhow::{Context, Result};
7use rand::SeedableRng;
8use rand_chacha::ChaCha20Rng;
9use serde::Serialize;
10use serde_json::Value;
11
12use crate::{
13    client::CallOutcome,
14    corpus::Corpus,
15    finding::{Finding, FindingKind, ReproInfo},
16    fuzz_corpus::{response_fingerprint, CorpusTrigger, FuzzCorpus, FuzzCorpusEntry},
17    mutate::{corpus_mutator, try_generate_payload, GenMode},
18    seed::{derive_seed, derive_seed_canonical},
19    target::SeverityConfig,
20};
21
22use super::{
23    destructive::DestructiveDetector,
24    exec::McpExec,
25    glob,
26    reporter::{Reporter, RunInfo},
27};
28
29/// Tools whose schema could not be exercised. Reasons are surfaced from
30/// [`crate::mutate::SkipReason`] formatted as a string.
31#[derive(Debug, Clone, Serialize)]
32pub struct SkippedTool {
33    /// Tool name.
34    pub tool: String,
35    /// Why we gave up (e.g. unresolved `$ref`).
36    pub reason: String,
37}
38
39/// Outcome of a fuzz run.
40///
41/// Phase E4: findings are streamed to the corpus and to the reporter
42/// during the run; this report carries only counts and the diagnostic
43/// lists (skipped, blocked) needed for exit-code logic and post-run
44/// summaries. Front-ends that need the findings themselves accumulate
45/// them via [`Reporter::on_finding`].
46#[derive(Debug, Default, Serialize)]
47pub struct FuzzReport {
48    /// Number of findings produced during the run.
49    pub findings_count: usize,
50    /// Tools we could not generate inputs for.
51    #[serde(skip_serializing_if = "Vec::is_empty")]
52    pub skipped: Vec<SkippedTool>,
53    /// Tools that were filtered out as destructive without an allowlist
54    /// match. Surfaced for visibility, not as findings.
55    #[serde(skip_serializing_if = "Vec::is_empty")]
56    pub blocked: Vec<String>,
57}
58
59/// Returned when a plan runs without errors. Distinct from `FuzzReport`
60/// only because dry-run mode does not produce a report.
61#[derive(Debug)]
62pub enum FuzzOutcome {
63    /// Tools that would be fuzzed; produced by [`FuzzPlan::dry_run`].
64    DryRun(Vec<String>),
65    /// Real fuzz results; produced by [`FuzzPlan::execute`].
66    Completed(FuzzReport),
67}
68
69/// A reproducible fuzz plan.
70#[derive(Debug)]
71pub struct FuzzPlan {
72    /// Number of payloads generated per tool.
73    pub iterations: u64,
74    /// Generation mode (Conform / Adversarial / Mixed).
75    pub mode: GenMode,
76    /// Master seed used to derive per-iteration seeds. The same seed
77    /// reproduces the same sequence of payloads.
78    pub master_seed: u64,
79    /// Glob patterns: empty = match every tool name.
80    pub include: Vec<String>,
81    /// Glob patterns excluded from the fuzz set. Always honored.
82    pub exclude: Vec<String>,
83    /// Cap on the number of tools after filtering. `None` = unlimited.
84    pub max_tools: Option<usize>,
85    /// Timeout applied to each `call_tool`.
86    pub timeout: Duration,
87    /// Transport label persisted in the [`ReproInfo`]. Plans don't open
88    /// the transport themselves, so we receive a stable name (`stdio` /
89    /// `http`) from the caller.
90    pub transport_name: String,
91    /// Compiled destructive-tool detector built from
92    /// `[destructive]` + `[allow_destructive]` config.
93    pub detector: DestructiveDetector,
94    /// `[severity]` overrides from `wallfacer.toml`. Applied to every
95    /// produced finding before it lands on disk.
96    pub severity: SeverityConfig,
97    /// Phase R — optional persistent fuzz corpus. When set, the
98    /// loop pulls inputs that triggered findings or new response
99    /// fingerprints from prior runs and mutates from them
100    /// `mutate_ratio` fraction of the time. Pure schema-driven
101    /// generation handles the remainder so the fuzzer keeps
102    /// exploring beyond the corpus's basin.
103    pub fuzz_corpus: Option<FuzzCorpus>,
104    /// Phase R — fraction of iterations that mutate from the
105    /// corpus instead of generating fresh schema-driven payloads.
106    /// Default `0.9` matches AFL/libFuzzer convention. Ignored
107    /// when [`Self::fuzz_corpus`] is `None` or the corpus is
108    /// empty.
109    pub mutate_ratio: f64,
110}
111
112impl FuzzPlan {
113    /// Returns the tool names that would be fuzzed, for `--dry-run`.
114    pub async fn dry_run<C: McpExec + ?Sized>(&self, client: &C) -> Result<Vec<String>> {
115        let (tools, _blocked) = self.select_tools(client).await?;
116        Ok(tools
117            .into_iter()
118            .map(|tool| tool.name.to_string())
119            .collect())
120    }
121
122    /// Drives the full fuzz loop, persisting findings to `corpus` and
123    /// notifying `reporter` along the way.
124    pub async fn execute<C: McpExec + ?Sized>(
125        self,
126        client: &mut C,
127        corpus: &Corpus,
128        reporter: &mut dyn Reporter,
129    ) -> Result<FuzzReport> {
130        let (tools, blocked) = self.select_tools(client).await?;
131        let total = tools.len() as u64 * self.iterations;
132        reporter.on_run_start(&RunInfo {
133            kind: "fuzz",
134            total_iterations: total,
135            tools: tools.iter().map(|tool| tool.name.to_string()).collect(),
136            blocked: blocked.clone(),
137            master_seed: Some(self.master_seed),
138        });
139
140        let mut report = FuzzReport {
141            findings_count: 0,
142            skipped: Vec::new(),
143            blocked,
144        };
145
146        // Phase R — preload the corpus (if enabled) and the
147        // fingerprint set so we can dedup novel responses against
148        // prior runs.
149        let mut seen_fingerprints: std::collections::BTreeSet<String> =
150            std::collections::BTreeSet::new();
151        if let Some(corpus_ref) = self.fuzz_corpus.as_ref() {
152            for tool in &tools {
153                let tool_name = tool.name.to_string();
154                if let Ok(entries) = corpus_ref.list(&tool_name) {
155                    for e in entries {
156                        seen_fingerprints.insert(e.fingerprint);
157                    }
158                }
159            }
160        }
161
162        for tool in tools {
163            let tool_name = tool.name.to_string();
164            let input_schema = Value::Object((*tool.input_schema).clone());
165            // Cache prior corpus entries for THIS tool so the
166            // 90/10 split doesn't re-list every iteration.
167            let prior_corpus: Vec<FuzzCorpusEntry> = self
168                .fuzz_corpus
169                .as_ref()
170                .map(|c| c.list(&tool_name).unwrap_or_default())
171                .unwrap_or_default();
172
173            for iteration in 0..self.iterations {
174                reporter.on_iteration_start(&tool_name, iteration);
175
176                let seed = derive_seed(self.master_seed, &tool_name, iteration);
177                let canonical = derive_seed_canonical(self.master_seed, &tool_name, iteration);
178                let mut rng = ChaCha20Rng::from_seed(canonical);
179
180                // 90/10 mutate-vs-random when the corpus has at
181                // least one prior entry for this tool. Without a
182                // corpus or with an empty per-tool sub-corpus we
183                // fall back to pure schema-driven generation.
184                use rand::Rng;
185                let use_mutation = !prior_corpus.is_empty()
186                    && self.fuzz_corpus.is_some()
187                    && rng.gen_bool(self.mutate_ratio.clamp(0.0, 1.0));
188                let (payload_value, payload_trail): (Value, Vec<String>) = if use_mutation {
189                    let pick = &prior_corpus[rng.gen_range(0..prior_corpus.len())];
190                    let mutated = corpus_mutator::mutate(&pick.input, &mut rng);
191                    (mutated, vec![format!("mutated from corpus seed")])
192                } else {
193                    match try_generate_payload(&input_schema, &mut rng, self.mode) {
194                        Ok(payload) => (payload.value, payload.trail),
195                        Err(reason) => {
196                            let skip = SkippedTool {
197                                tool: tool_name.clone(),
198                                reason: reason.to_string(),
199                            };
200                            reporter.on_skipped(&skip.tool, &skip.reason);
201                            report.skipped.push(skip);
202                            // Bump remaining iterations on the reporter so the
203                            // progress bar accounts for the skipped tail.
204                            for i in (iteration + 1)..self.iterations {
205                                reporter.on_iteration_end(&tool_name, i);
206                            }
207                            break;
208                        }
209                    }
210                };
211
212                let outcome = client
213                    .call_tool(&tool_name, payload_value.clone(), self.timeout)
214                    .await;
215
216                // Phase R — capture the response fingerprint
217                // *before* we destructure outcome (the match below
218                // moves Hang/Crash/ProtocolError out of the enum).
219                let response_value: Value = match &outcome {
220                    CallOutcome::Ok(result) => serde_json::to_value(result).unwrap_or(Value::Null),
221                    _ => Value::Null,
222                };
223                let fingerprint = response_fingerprint(&response_value);
224
225                let kind_message_details: Option<(FindingKind, &str, String)> = match outcome {
226                    CallOutcome::Ok(_) => None,
227                    CallOutcome::Hang(duration) => Some((
228                        FindingKind::Hang {
229                            ms: duration.as_millis() as u64,
230                        },
231                        "tool call timed out",
232                        format!("timeout exceeded after {duration:?}"),
233                    )),
234                    CallOutcome::Crash(reason) => Some((
235                        FindingKind::Crash,
236                        "server crashed during tool call",
237                        reason,
238                    )),
239                    CallOutcome::ProtocolError(message) => Some((
240                        FindingKind::ProtocolError,
241                        "protocol error during tool call",
242                        message,
243                    )),
244                };
245
246                if let Some((kind, message, details)) = kind_message_details {
247                    let mut finding = Finding::new(
248                        kind,
249                        &tool_name,
250                        message,
251                        details,
252                        ReproInfo {
253                            seed,
254                            tool_call: payload_value.clone(),
255                            transport: self.transport_name.clone(),
256                            composition_trail: payload_trail,
257                        },
258                    );
259                    if let Some(override_sev) = self.severity.resolve(finding.kind.keyword()) {
260                        finding = finding.with_severity(override_sev);
261                    }
262                    corpus
263                        .write_finding(&finding)
264                        .with_context(|| format!("failed to persist finding for `{tool_name}`"))?;
265                    reporter.on_finding(&finding);
266                    report.findings_count += 1;
267                    // Phase R — input that triggered a finding is
268                    // the highest-value corpus entry. Save it
269                    // before the reconnect (the reconnect is best-
270                    // effort).
271                    if let Some(corpus_ref) = self.fuzz_corpus.as_ref() {
272                        let _ = corpus_ref.save(&FuzzCorpusEntry {
273                            tool: tool_name.clone(),
274                            input: payload_value.clone(),
275                            trigger: CorpusTrigger::Finding {
276                                kind: finding.kind.keyword().to_string(),
277                            },
278                            fingerprint: fingerprint.clone(),
279                            timestamp: chrono::Utc::now(),
280                        });
281                    }
282                    client.reconnect().await.with_context(|| {
283                        format!("failed to reconnect after fault on `{tool_name}`")
284                    })?;
285                    reporter.on_iteration_end(&tool_name, iteration);
286                    break;
287                }
288
289                // Phase R — non-finding outcome. Save the input
290                // when the response fingerprint is novel (helps
291                // the next run explore further from this point).
292                if let Some(corpus_ref) = self.fuzz_corpus.as_ref() {
293                    if seen_fingerprints.insert(fingerprint.clone()) {
294                        let _ = corpus_ref.save(&FuzzCorpusEntry {
295                            tool: tool_name.clone(),
296                            input: payload_value,
297                            trigger: CorpusTrigger::NewFingerprint,
298                            fingerprint,
299                            timestamp: chrono::Utc::now(),
300                        });
301                    }
302                }
303
304                reporter.on_iteration_end(&tool_name, iteration);
305            }
306        }
307
308        reporter.on_run_end();
309        Ok(report)
310    }
311
312    async fn select_tools<C: McpExec + ?Sized>(
313        &self,
314        client: &C,
315    ) -> Result<(Vec<rmcp::model::Tool>, Vec<String>)> {
316        let all_tools = client
317            .list_tools()
318            .await
319            .context("failed to list tools from MCP server")?;
320        let mut blocked = Vec::new();
321        let mut tools: Vec<rmcp::model::Tool> = all_tools
322            .into_iter()
323            .filter(|tool| glob::matches_filters(tool.name.as_ref(), &self.include, &self.exclude))
324            .filter(|tool| {
325                let classification = self.detector.classify(tool);
326                if classification.is_runnable() {
327                    true
328                } else {
329                    blocked.push(tool.name.to_string());
330                    false
331                }
332            })
333            .collect();
334        if let Some(max_tools) = self.max_tools {
335            tools.truncate(max_tools);
336        }
337        Ok((tools, blocked))
338    }
339}
340
341#[cfg(test)]
342#[allow(clippy::expect_used, clippy::unwrap_used, clippy::panic)]
343mod tests {
344    use super::*;
345    use crate::run::exec::MockClient;
346    use crate::run::reporter::NoopReporter;
347    use crate::target::{AllowDestructiveConfig, DestructiveConfig};
348    use rmcp::model::Tool;
349    use serde_json::json;
350    use std::sync::Arc;
351
352    fn make_tool(name: &str, schema: Value) -> Tool {
353        let map = schema.as_object().cloned().unwrap_or_default();
354        Tool::new(name.to_string(), "test tool".to_string(), Arc::new(map))
355    }
356
357    fn detector() -> DestructiveDetector {
358        DestructiveDetector::from_config(
359            &DestructiveConfig::default(),
360            &AllowDestructiveConfig::default(),
361        )
362        .unwrap()
363    }
364
365    fn plan(detector: DestructiveDetector) -> FuzzPlan {
366        FuzzPlan {
367            iterations: 4,
368            mode: GenMode::Conform,
369            master_seed: 42,
370            include: Vec::new(),
371            exclude: Vec::new(),
372            max_tools: None,
373            timeout: Duration::from_secs(1),
374            transport_name: "mock".to_string(),
375            detector,
376            severity: SeverityConfig::default(),
377            fuzz_corpus: None,
378            mutate_ratio: 0.0,
379        }
380    }
381
382    #[tokio::test]
383    async fn fuzz_records_protocol_error_finding_and_reconnects() {
384        let tool = make_tool(
385            "echo",
386            json!({"type": "object", "properties": {"msg": {"type": "string"}}}),
387        );
388        let mut client = MockClient::new().register(tool, |_args| {
389            CallOutcome::ProtocolError("synthetic failure".to_string())
390        });
391
392        let tmp = tempfile::tempdir().unwrap();
393        let corpus = Corpus::new(tmp.path().join("corpus"));
394        let mut reporter = NoopReporter;
395
396        let report = plan(detector())
397            .execute(&mut client, &corpus, &mut reporter)
398            .await
399            .unwrap();
400        assert_eq!(report.findings_count, 1);
401        assert_eq!(client.reconnect_count(), 1);
402        assert!(report.skipped.is_empty());
403    }
404
405    #[tokio::test]
406    async fn fuzz_skips_tools_with_unresolvable_refs() {
407        let tool = make_tool(
408            "broken",
409            json!({"$ref": "https://external.example/schema.json"}),
410        );
411        let mut client = MockClient::new().register(tool, |_args| {
412            CallOutcome::Ok(rmcp::model::CallToolResult::success(vec![]))
413        });
414
415        let tmp = tempfile::tempdir().unwrap();
416        let corpus = Corpus::new(tmp.path().join("corpus"));
417        let mut reporter = NoopReporter;
418
419        let report = plan(detector())
420            .execute(&mut client, &corpus, &mut reporter)
421            .await
422            .unwrap();
423        assert_eq!(report.findings_count, 0);
424        assert_eq!(report.skipped.len(), 1);
425        assert!(report.skipped[0].reason.contains("external"));
426    }
427
428    #[tokio::test]
429    async fn fuzz_blocks_destructive_tools_unless_allowlisted() {
430        let destructive_tool = make_tool(
431            "delete_user",
432            json!({"type": "object", "properties": {"id": {"type": "string"}}}),
433        );
434        let safe_tool = make_tool(
435            "read_user",
436            json!({"type": "object", "properties": {"id": {"type": "string"}}}),
437        );
438        let mut client = MockClient::new()
439            .register(destructive_tool, |_| {
440                CallOutcome::Ok(rmcp::model::CallToolResult::success(vec![]))
441            })
442            .register(safe_tool, |_| {
443                CallOutcome::Ok(rmcp::model::CallToolResult::success(vec![]))
444            });
445
446        let tmp = tempfile::tempdir().unwrap();
447        let corpus = Corpus::new(tmp.path().join("corpus"));
448        let mut reporter = NoopReporter;
449        let report = plan(detector())
450            .execute(&mut client, &corpus, &mut reporter)
451            .await
452            .unwrap();
453        assert_eq!(report.blocked, vec!["delete_user".to_string()]);
454    }
455}