wasm_shrink/
lib.rs

1//! Shrink a Wasm file while maintaining a property of interest (such as
2//! triggering a compiler bug).
3//!
4//! See the [`WasmShrink`] type for details.
5
6use anyhow::{Context, Result};
7use rand::{Rng, SeedableRng, rngs::SmallRng};
8use std::collections::HashSet;
9use wasm_mutate::WasmMutate;
10use wasmparser::WasmFeatures;
11
12#[rustfmt::skip]
13static EMPTY_WASM: &'static [u8] = &[
14    // Magic.
15    0x00, b'a', b's', b'm',
16    // Version.
17    0x01, 0x00, 0x00, 0x00,
18];
19
20#[cfg_attr(
21    not(feature = "clap"),
22    doc = r###"
23Shrink a Wasm file while maintaining a property of interest (such as
24triggering a compiler bug).
25
26# Example
27
28```
29use wasm_shrink::WasmShrink;
30
31# fn foo() -> anyhow::Result<()> {
32// Get the Wasm you want to shrink from somewhere.
33let my_input_wasm: Vec<u8> = todo!();
34
35// Configure the shrinking task.
36let shrink = WasmShrink::default()
37    // Give up on shrinking after 999 failed attempts to shrink a given
38    // Wasm test case any further.
39    .attempts(999)
40    // Optionally do something with each new smallest Wasm as it is discovered.
41    .on_new_smallest(Some(Box::new(|new_smallest| {
42        // Do stuff...
43        Ok(())
44    })));
45
46// Run the configured shrinking task.
47let info = shrink.run(
48    my_input_wasm,
49    // Predicate.
50    &mut |wasm| {
51        let is_interesting: bool = todo!(
52            "check for whether the given Wasm is interesting"
53        );
54        Ok(is_interesting)
55    }
56)?;
57
58// Get the shrunken Wasm and other information about the completed shrink
59// task from the returned `ShrinkInfo`.
60let shrunken_wasm = info.output;
61# Ok(()) }
62```
63"###
64)]
65#[cfg_attr(feature = "clap", derive(clap::Parser))]
66pub struct WasmShrink {
67    /// The number of shrink attempts to try before considering a Wasm module as
68    /// small as it will ever get.
69    #[cfg_attr(feature = "clap", clap(short, long, default_value = "1000"))]
70    attempts: u32,
71
72    /// Allow shrinking the input down to an empty Wasm module.
73    ///
74    /// This is usually not desired and typically reflects a bug in the
75    /// predicate script.
76    #[cfg_attr(feature = "clap", clap(long))]
77    allow_empty: bool,
78
79    /// The RNG seed for choosing which size-reducing mutation to attempt next.
80    #[cfg_attr(feature = "clap", clap(short, long, default_value = "42"))]
81    seed: u64,
82
83    #[cfg_attr(feature = "clap", clap(skip))]
84    on_new_smallest: Option<Box<dyn FnMut(&[u8]) -> Result<()>>>,
85}
86
87impl Default for WasmShrink {
88    fn default() -> Self {
89        WasmShrink {
90            attempts: 1000,
91            allow_empty: false,
92            seed: 42,
93            on_new_smallest: None,
94        }
95    }
96}
97
98impl WasmShrink {
99    /// Set the number of shrink attempts to try before considering a Wasm
100    /// module as small as it will ever get.
101    pub fn attempts(mut self, attempts: u32) -> WasmShrink {
102        self.attempts = attempts;
103        self
104    }
105
106    /// Is it okay to shrink the input down to an empty Wasm module?
107    ///
108    /// This is usually not desired and typically reflects a bug in the
109    /// predicate.
110    pub fn allow_empty(mut self, allow_empty: bool) -> WasmShrink {
111        self.allow_empty = allow_empty;
112        self
113    }
114
115    /// Set the RNG seed for choosing which size-reducing mutation to attempt
116    /// next.
117    pub fn seed(mut self, seed: u64) -> WasmShrink {
118        self.seed = seed;
119        self
120    }
121
122    /// Set the callback that is called each time we discover a new smallest
123    /// test case that is interesting.
124    pub fn on_new_smallest(
125        mut self,
126        on_new_smallest: Option<Box<dyn FnMut(&[u8]) -> Result<()>>>,
127    ) -> WasmShrink {
128        self.on_new_smallest = on_new_smallest;
129        self
130    }
131
132    /// Run this configured Wasm shrinking task.
133    ///
134    /// The `predicate` function is called on each candidate Wasm to determine
135    /// whether the Wasm is interesting or not. Returning `true` means that it
136    /// is interesting, `false` means that it is not.
137    ///
138    /// Returns the shrunken Wasm and information and metrics about the shrink
139    /// task, such as the size of the input Wasm, the size of the output Wasm,
140    /// etc.
141    pub fn run<P, I>(self, input: Vec<u8>, predicate: P) -> Result<ShrinkInfo>
142    where
143        P: FnMut(&[u8]) -> Result<I>,
144        I: IsInteresting,
145    {
146        ShrinkRun::new(self, input).run(predicate)
147    }
148}
149
150struct ShrinkRun {
151    shrink: WasmShrink,
152    rng: SmallRng,
153
154    // The size of the original input Wasm.
155    input_size: u64,
156
157    // The smallest Wasm that passes the predicate.
158    best: Vec<u8>,
159
160    // Mutated Wasm test cases that we've already tested and either found to be
161    // uninteresting or ultimately not lead to test cases smaller than our
162    // current best.
163    already_tested: HashSet<blake3::Hash>,
164
165    // The count of how many times we've attempted to shrink our current test
166    // case smaller than `best`.
167    attempt: u32,
168}
169
170impl ShrinkRun {
171    pub fn new(shrink: WasmShrink, input: Vec<u8>) -> ShrinkRun {
172        let rng = SmallRng::seed_from_u64(shrink.seed);
173        let input_size = input.len() as u64;
174        let best = input;
175        ShrinkRun {
176            shrink,
177            rng,
178            input_size,
179            best,
180            already_tested: HashSet::new(),
181            attempt: 0,
182        }
183    }
184
185    fn on_new_best(&mut self, new_best: Vec<u8>) -> Result<()> {
186        debug_assert!(
187            new_best.len() < self.best.len() || (new_best == EMPTY_WASM && self.best == EMPTY_WASM)
188        );
189        log::info!("New smallest Wasm found: {} bytes", new_best.len());
190        if let Some(f) = self.shrink.on_new_smallest.as_mut() {
191            f(&new_best)?;
192        }
193        self.best = new_best;
194        self.attempt = 0;
195        Ok(())
196    }
197
198    fn should_accept(&mut self, current: &[u8], new_interesting: &[u8]) -> bool {
199        new_interesting.len() < current.len() ||
200            // With low probability, accept larger interesting Wasm test cases
201            // to avoid getting stuck in local minima.
202            self.rng.random_ratio(1, 100)
203    }
204
205    fn on_new_interesting(
206        &mut self,
207        current: &mut Vec<u8>,
208        new_interesting: Vec<u8>,
209    ) -> Result<()> {
210        debug_assert!(self.best.len() <= current.len());
211        *current = new_interesting;
212        if current.len() < self.best.len() {
213            self.on_new_best(current.clone())?;
214        }
215        Ok(())
216    }
217
218    fn validate_wasm(&self, wasm: &[u8]) -> Result<()> {
219        let mut validator = wasmparser::Validator::new_with_features(WasmFeatures::all());
220        validator.validate_all(wasm)?;
221        Ok(())
222    }
223
224    fn finish(self) -> ShrinkInfo {
225        ShrinkInfo {
226            input_size: self.input_size,
227            output_size: self.best.len() as u64,
228            output: self.best,
229        }
230    }
231
232    pub fn run<P, I>(mut self, mut predicate: P) -> Result<ShrinkInfo>
233    where
234        P: FnMut(&[u8]) -> Result<I>,
235        I: IsInteresting,
236    {
237        // The Wasm that we are currently mutating.
238        //
239        // This can differ from `best` in that, with a very small probability,
240        // we will sometimes accept mutations that don't shrink Wasm size. This
241        // behavior is borrowed from MCMC[0] and helps us avoid getting stuck in
242        // local minima. For example, we might replace a `ref.func $f` with a
243        // `ref.null`, which doesn't actually shrink code size itself, but which
244        // might make `$f` dead code such that we can remove `$f` altogether in
245        // a follow up mutation.
246        //
247        // [0]: https://en.wikipedia.org/wiki/Markov_chain_Monte_Carlo
248        let mut current = self.best.clone();
249
250        // Check prerequisites for the input Wasm.
251        self.validate_wasm(&current)
252            .context("The input is not valid Wasm.")?;
253
254        // First double check that the input Wasm passes the predicate.
255        //
256        // It is fairly common to accidentally write a predicate script that
257        // doesn't consider the input Wasm interesting. Better to surface this
258        // user error as quick as possible than to make them wait until we've
259        // exhausted all the ways we could shrink it further.
260        let result = predicate(&current)?;
261        anyhow::ensure!(
262            result.is_interesting(),
263            "The predicate does not consider the input Wasm interesting: {}",
264            result
265        );
266
267        // Next try running the predicate on an empty Wasm module.
268        //
269        // It is fairly common to accidentally write a predicate script that
270        // considers the empty module interesting, and we might as well check
271        // for it eagerly, rather than make the user wait forever until we
272        // finally to reduce the whole Wasm module to nothing.
273        let result = predicate(EMPTY_WASM)?;
274        if result.is_interesting() {
275            if self.shrink.allow_empty {
276                self.on_new_best(EMPTY_WASM.to_vec())?;
277                return Ok(self.finish());
278            } else {
279                anyhow::bail!(
280                    "The predicate considers the empty Wasm module \
281                     interesting, which is usually not desired and \
282                     is a symptom of a bug in the predicate:\n\
283                     \n\
284                     {}",
285                    result
286                );
287            }
288        }
289
290        // Now we perform the main search. Keep trying to find smaller and
291        // interesting variants of the current smallest interesting Wasm file
292        // until we run out of attempts and get stuck.
293        while self.attempt < self.shrink.attempts {
294            self.attempt += 1;
295
296            let mut mutate = WasmMutate::default();
297            let seed = self.rng.random();
298            mutate.reduce(true).seed(seed);
299            log::trace!("Attempt #{}: seed: {}", self.attempt, seed);
300
301            let mutations = match mutate.run(&current) {
302                Ok(m) => m,
303                Err(e) => {
304                    // This mutation failed, but another randomly chosen
305                    // mutation might succeed, so keep attempting.
306                    log::trace!("Attempt #{}: mutation failed ({:?})", self.attempt, e);
307                    continue;
308                }
309            };
310
311            let mut mutations = mutations
312                // NB: The only mutator that takes advantage of returning an
313                // iterator with more than one item is the peephole mutator, which
314                // doesn't help shrinking too much. Therefore, we only take at most
315                // 10 elements.
316                .take(std::cmp::min(self.shrink.attempts - self.attempt, 10) as usize)
317                .peekable();
318
319            if mutations.peek().is_none() {
320                log::trace!(
321                    "Attempt #{}: `wasm-mutate` failed to generate any mutations",
322                    self.attempt
323                );
324                continue;
325            }
326
327            let mut new_current_wasm = None;
328            for (i, mutated_wasm) in mutations.enumerate() {
329                if i > 0 {
330                    self.attempt += 1;
331                }
332
333                let mutated_wasm = match mutated_wasm {
334                    Ok(w) => w,
335                    Err(e) => {
336                        // This mutation failed, but another randomly chosen
337                        // mutation might succeed, so keep attempting.
338                        log::trace!("Attempt #{}: mutation failed ({:?})", self.attempt, e);
339                        continue;
340                    }
341                };
342
343                let hash = blake3::hash(&mutated_wasm);
344                if !self.already_tested.insert(hash) {
345                    log::trace!("Attempt #{}: already tested this candidate", self.attempt,);
346                    continue;
347                }
348
349                log::trace!(
350                    "Attempt #{}: testing candidate ({} bytes)",
351                    self.attempt,
352                    mutated_wasm.len()
353                );
354
355                let result = predicate(&mutated_wasm)?;
356                if result.is_interesting() {
357                    log::trace!("Attempt #{}: candidate is interesting", self.attempt);
358                    if self.should_accept(&current, &mutated_wasm) {
359                        log::trace!("Attempt #{}: accepting candidate", self.attempt);
360                        new_current_wasm = Some(mutated_wasm);
361                        break;
362                    }
363                } else {
364                    log::trace!("Attempt #{}: candidate is not interesting", self.attempt);
365                }
366            }
367
368            if let Some(new_current_wasm) = new_current_wasm {
369                self.on_new_interesting(&mut current, new_current_wasm)?;
370            }
371        }
372
373        Ok(self.finish())
374    }
375}
376
377/// A type that describes whether a Wasm is interesting or not.
378pub trait IsInteresting: std::fmt::Display {
379    /// Was the Wasm interesting?
380    fn is_interesting(&self) -> bool;
381}
382
383impl IsInteresting for bool {
384    fn is_interesting(&self) -> bool {
385        *self
386    }
387}
388
389/// Information about a completed shrinking run.
390pub struct ShrinkInfo {
391    /// The original size of the input Wasm.
392    pub input_size: u64,
393
394    /// The final size of the output, shrunken Wasm.
395    pub output_size: u64,
396
397    /// The final, shrunken Wasm.
398    pub output: Vec<u8>,
399}