1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
//!
//! Fuzzcheck is coverage-guided, evolutionary fuzzing engines for Rust
//! functions.
//!

#![feature(drain_filter)]
#![feature(never_type)]
#![feature(thread_spawn_unchecked)]
#![feature(ptr_offset_from)]
#![feature(vec_remove_item)]
#![feature(is_sorted)]

mod code_coverage_sensor;
mod data_structures;

mod fuzzer;
mod world;

mod pool;
mod signals_handler;

use fuzzcheck_arg_parser::{
    options_parser, CommandLineArguments, COMMAND_FUZZ, COMMAND_MINIFY_CORPUS, COMMAND_MINIFY_INPUT, CORPUS_SIZE_FLAG,
    DEFAULT_ARGUMENTS, INPUT_FILE_FLAG, IN_CORPUS_FLAG,
};

use std::borrow::Borrow;

/// Fuzz-test the given test function.
/// # Errors
/// TODO: doc
pub fn launch<T, F, M, S>(test: F, mutator: M, serializer: S) -> Result<(), std::io::Error>
where
    T: ?Sized,
    M::Value: Borrow<T>,
    F: Fn(&T) -> bool,
    M: Mutator,
    S: Serializer<Value = M::Value>,
    fuzzer::Fuzzer<T, F, M, S>: 'static,
{
    let env_args: Vec<_> = std::env::args().collect();
    let parser = options_parser();
    let mut help = format!(
        r#""
fuzzcheck <SUBCOMMAND> [OPTIONS]

SUBCOMMANDS:
    {fuzz}    Run the fuzz test
    {tmin}    Minify a crashing test input, requires --{input_file}
    {cmin}    Minify a corpus of test inputs, requires --{in_corpus}
"#,
        fuzz = COMMAND_FUZZ,
        tmin = COMMAND_MINIFY_INPUT,
        input_file = INPUT_FILE_FLAG,
        cmin = COMMAND_MINIFY_CORPUS,
        in_corpus = IN_CORPUS_FLAG,
    );
    help += parser.usage("").as_str();
    help += format!(
        r#""
## Examples:

fuzzcheck {fuzz}
    Launch the fuzzer with default options.

fuzzcheck {tmin} --{input_file} "artifacts/crash.json"

    Minify the test input defined in the file "artifacts/crash.json".
    It will put minified inputs in the folder artifacts/crash.minified/
    and name them {{complexity}}-{{hash}}.json. 
    For example, artifacts/crash.minified/4213--8cd7777109b57b8c.json
    is a minified input of complexity 42.13.

fuzzcheck {cmin} --{in_corpus} "fuzz-corpus" --{corpus_size} 25

    Minify the corpus defined by the folder "fuzz-corpus", which should
    contain JSON-encoded test inputs.
    It will remove files from that folder until only the 25 most important
    test inputs remain.
"#,
        fuzz = COMMAND_FUZZ,
        tmin = COMMAND_MINIFY_INPUT,
        input_file = INPUT_FILE_FLAG,
        cmin = COMMAND_MINIFY_CORPUS,
        in_corpus = IN_CORPUS_FLAG,
        corpus_size = CORPUS_SIZE_FLAG
    )
    .as_str();

    let args = match CommandLineArguments::from_parser(&parser, &env_args[1..], DEFAULT_ARGUMENTS) {
        Ok(r) => r,
        Err(e) => {
            println!("{}\n\n{}", e, help);
            std::process::exit(1);
        }
    };

    fuzzer::launch(test, mutator, serializer, args)
}

/**
 * A [Mutator] is an object capable of mutating a value for the purpose of
 * fuzz-testing.
 *
 * For example, a mutator could change the value
 * `v1 = [1, 4, 2, 1]` to `v1' = [1, 5, 2, 1]`.
 * The idea is that if v1 is an “interesting” value to test, then v1' also
 * has a high chance of being “interesting” to test.
 *
 * ## Complexity
 *
 * A mutator is also responsible for keeping track of the
 * [complexity](crate::Mutator::complexity) of a value. The complexity is,
 * roughly speaking, how large the value is.
 *
 * For example, the complexity of a vector is the complexity of its length,
 * plus  the sum of the complexities of its elements. So `vec![]` would have a
 * complexity of `0.0` and `vec![76]` would have a complexity of `9.0`: `1.0`
 * for  its short length and `8.0` for the 8-bit integer “76”. But there is no
 * fixed rule for how to compute the complexity of a value, and it is up to you
 * to judge how “large” something is.
 *
 *  ## Cache
 *
 * In order to mutate values efficiently, the mutator is able to make use of a
 * per-value *cache*. The Cache contains information associated with the value
 * that will make it faster to compute its complexity or apply a mutation to
 * it. For a vector, its cache is its total complexity, along with a vector of
 * the cache of each of its element.
 *
 *  ## MutationStep
 *
 * The same values will be passed to the mutator many times, so that it is
 * mutated in many different ways. There are different strategies to choose
 * what mutation to apply to a value. The first one is to create a list of
 * mutation operations, and choose one to apply randomly from this list.
 *
 * However, one may want to have better control over which mutation operation
 * is used. For example, if the value to be mutated is of type `Option<T>`,
 * then you may want to first mutate it to `None`, and then always mutate it
 * to another `Some(t)`. This is where `MutationStep` comes in. The mutation
 * step is a type you define to allow you to keep track of which mutation
 * operation has already been tried. This allows you to deterministically
 * apply mutations to a value such that better mutations are tried first, and
 * duplicate mutations are avoided.
 *
 * ## Unmutate
 *
 * Finally, it is important to note that values and caches are mutated
 * *in-place*. The fuzzer does not clone them before handing them to the
 * mutator. Therefore, the mutator also needs to know how to reverse each
 * mutation it performed. To do so, each mutation needs to return a token
 * describing how to reverse it. The [unmutate](crate::Mutator::unmutate)
 * method will later be called with that token to get the original value
 * and cache back.
 *
 * For example, if the value is `[[1, 3], [5], [9, 8]]`, the mutator may
 * mutate it to `[[1, 3], [5], [9, 1, 8]]` and return the token:
 * `Element(2, Remove(1))`, which means that in order to reverse the
 * mutation, the element at index 2 has to be unmutated by removing
 * its element at index 1. In pseudocode:
 *
 * ```ignore
 * value = [[1, 3], [5], [9, 8]];
 * cache: c1 (ommitted from example)
 * step: s1 (ommitted from example)
 *
 * let unmutate_token = self.mutate(&mut value, &mut cache, &mut step, max_cplx);
 *
 * // value = [[1, 3], [5], [9, 1, 8]]
 * // token = Element(2, Remove(1))
 * // cache = c2
 * // step = s2
 *
 * test(&value);
 *
 * self.unmutate(&mut value, &mut cache, unmutate_token);
 *
 * // value = [[1, 3], [5], [9, 8]]
 * // cache = c1 (back to original cache)
 * // step = s2 (step has not been reversed)
 * ```
 *
**/
pub trait Mutator: Sized {
    type Value: Clone;
    type Cache: Clone;
    type MutationStep;
    type UnmutateToken;

    /// Compute the cache for the given value
    fn cache_from_value(&self, value: &Self::Value) -> Self::Cache;
    /// Compute the initial mutation step for the given value
    fn mutation_step_from_value(&self, value: &Self::Value) -> Self::MutationStep;

    /// The maximum complexity of an input of this type
    fn max_complexity(&self) -> f64;
    /// The minimum complexity of an input of this type
    fn min_complexity(&self) -> f64;
    /// The complexity of the current input
    fn complexity(&self, value: &Self::Value, cache: &Self::Cache) -> f64;

    /// Create an arbitrary value
    fn arbitrary(&self, seed: usize, max_cplx: f64) -> (Self::Value, Self::Cache);

    fn mutate(
        &self,
        value: &mut Self::Value,
        cache: &mut Self::Cache,
        step: &mut Self::MutationStep,
        max_cplx: f64,
    ) -> Self::UnmutateToken;

    fn unmutate(&self, value: &mut Self::Value, cache: &mut Self::Cache, t: Self::UnmutateToken);
}

/**
 * A Serializer is used to encode and decode values into bytes.
 *
 * One possible implementation would be to use `serde` to implement
 * both required functions. But we also want to be able to fuzz-test
 * types that are not serializable with `serde`, which is why this
 * Serializer trait exists.
*/
pub trait Serializer {
    type Value;
    fn extension(&self) -> &str;
    fn from_data(&self, data: &[u8]) -> Option<Self::Value>;
    fn to_data(&self, value: &Self::Value) -> Vec<u8>;
}

/**
 * A unit of code coverage.
 *
 * A `Feature` describes a certain characteristic of the program’s code
 * coverage. For example, it can mean “this control flow edge was reached” or
 * “this instruction was called with these operands”.
 *
 * It is implemented as a wrapper of a `u64` for performance reason. But it
 * actually contains a lot of information.
 *
 * - The first two bits designate the kind of the `Feature`, which can be either
 * `edge`, `indirect`, or `instruction`.
 * - Then, the next 54 bits are the `id` of the feature. They are supposed to
 * uniquely identify a point in the source code.
 * - Finally, the last 8 bits are for the `payload` of the feature. They are
 * the information associated with the feature, such as the number of times
 * the control flow edge was reached or a hash of the operands to the
 * instruction.
 * - Note that for `indirect` features, `id` and `payload` are merged.
 *
 * Each feature has a certain [score](Feature::score) that is currently only
 * determined by its `tag`.
 */
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
struct Feature(u64);

impl Feature {
    /// The bit offset for the id of the feature
    fn id_offset() -> u64 {
        8
    }
    /// The bit offset for the tag of the feature
    fn tag_offset() -> u64 {
        62
    }

    // fn edge_tag() -> u64 {
    //     0b00
    // }
    fn indir_tag() -> u64 {
        0b01
    }
    fn instr_tag() -> u64 {
        0b10
    }
    /// Create a “control flow edge” feature identified by the given `pc_guard`
    /// whose payload is the intensity of the given `counter`.
    fn edge(pc_guard: usize, counter: u16) -> Feature {
        let mut feature: u64 = 0;

        // feature |= 0b00 << Feature::tag_offset();
        // take 32 last bits, I don't want to worry about programs with more than 4 billion instrumented edges anyway
        feature |= ((pc_guard & 0xFFFF_FFFF) as u64) << Feature::id_offset();
        feature |= u64::from(Feature::score_from_counter(counter)); // will only ever be 8 bits long

        Feature(feature)
    }
    /// Create an “indirect call” feature identified by the given `caller_xor_callee`
    fn indir(caller_xor_callee: usize) -> Feature {
        let mut feature: u64 = 0;
        // keep 62 bits with modulo
        feature |= Feature::indir_tag() << Feature::tag_offset();
        feature |= (caller_xor_callee as u64) % (0x4000_0000_0000_0000 as u64);

        Feature(feature)
    }
    /// Create an “instructon” feature identified by the given `pc` whose payload
    /// is a ~hash of the two arguments.
    fn instruction(pc: usize, arg1: u64, arg2: u64) -> Feature {
        let mut feature: u64 = 0;
        feature |= Feature::instr_tag() << Feature::tag_offset(); // won't do anything
                                                                  // keep 54 bits with modulo
        feature |= ((pc as u64) % 0x40_0000_0000_0000) << Feature::id_offset(); // id
        feature |= u64::from(Feature::score_from_counter((arg1 ^ arg2).count_ones() as u16));

        Feature(feature)
    }

    fn erasing_payload(self) -> Self {
        if (self.0 >> Self::tag_offset()) == Self::indir_tag() {
            // if it is indirect, there is no payload to erase
            self
        } else {
            // else, zero out the payload bits
            Feature(self.0 & 0xFFFF_FFFF_FFFF_FF00)
        }
    }

    /// “Hash” a u16 into a number between 0 and 16.
    ///
    /// So that similar numbers have the same hash, and very different
    /// numbers have a greater hash.
    fn score_from_counter(counter: u16) -> u8 {
        if counter == core::u16::MAX {
            16
        } else if counter <= 3 {
            counter as u8
        } else {
            (16 - counter.leading_zeros() + 1) as u8
        }
    }
}

/**
 * A struct that stores the value, cache, and mutation step of an input.
 * It is used for convenience.
 */
#[derive(Clone)]
struct FuzzedInput<Mut: Mutator> {
    pub value: Mut::Value,
    pub cache: Mut::Cache,
    pub mutation_step: Mut::MutationStep,
}

impl<Mut: Mutator> FuzzedInput<Mut> {
    pub fn new(value: Mut::Value, cache: Mut::Cache, mutation_step: Mut::MutationStep) -> Self {
        Self {
            value,
            cache,
            mutation_step,
        }
    }
    pub fn default(m: &Mut) -> Self {
        let (value, cache) = m.arbitrary(0, 1.0);
        let mutation_step = m.mutation_step_from_value(&value);
        Self::new(value, cache, mutation_step)
    }

    pub fn new_source(&self, m: &Mut) -> Self {
        Self::new(
            self.value.clone(),
            self.cache.clone(),
            m.mutation_step_from_value(&self.value),
        )
    }

    pub fn complexity(&self, m: &Mut) -> f64 {
        m.complexity(&self.value, &self.cache)
    }

    pub fn mutate(&mut self, m: &Mut, max_cplx: f64) -> Mut::UnmutateToken {
        m.mutate(&mut self.value, &mut self.cache, &mut self.mutation_step, max_cplx)
    }

    pub fn unmutate(&mut self, m: &Mut, t: Mut::UnmutateToken) {
        m.unmutate(&mut self.value, &mut self.cache, t);
    }
}