1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379
//! //! Fuzzcheck is coverage-guided, evolutionary fuzzing engines for Rust //! functions. //! #![feature(drain_filter)] #![feature(never_type)] #![feature(thread_spawn_unchecked)] #![feature(ptr_offset_from)] #![feature(vec_remove_item)] #![feature(is_sorted)] mod code_coverage_sensor; mod data_structures; mod fuzzer; mod world; mod pool; mod signals_handler; use fuzzcheck_arg_parser::{ options_parser, CommandLineArguments, COMMAND_FUZZ, COMMAND_MINIFY_CORPUS, COMMAND_MINIFY_INPUT, CORPUS_SIZE_FLAG, DEFAULT_ARGUMENTS, INPUT_FILE_FLAG, IN_CORPUS_FLAG, }; use std::borrow::Borrow; /// Fuzz-test the given test function. /// # Errors /// TODO: doc pub fn launch<T, F, M, S>(test: F, mutator: M, serializer: S) -> Result<(), std::io::Error> where T: ?Sized, M::Value: Borrow<T>, F: Fn(&T) -> bool, M: Mutator, S: Serializer<Value = M::Value>, fuzzer::Fuzzer<T, F, M, S>: 'static, { let env_args: Vec<_> = std::env::args().collect(); let parser = options_parser(); let mut help = format!( r#"" fuzzcheck <SUBCOMMAND> [OPTIONS] SUBCOMMANDS: {fuzz} Run the fuzz test {tmin} Minify a crashing test input, requires --{input_file} {cmin} Minify a corpus of test inputs, requires --{in_corpus} "#, fuzz = COMMAND_FUZZ, tmin = COMMAND_MINIFY_INPUT, input_file = INPUT_FILE_FLAG, cmin = COMMAND_MINIFY_CORPUS, in_corpus = IN_CORPUS_FLAG, ); help += parser.usage("").as_str(); help += format!( r#"" ## Examples: fuzzcheck {fuzz} Launch the fuzzer with default options. fuzzcheck {tmin} --{input_file} "artifacts/crash.json" Minify the test input defined in the file "artifacts/crash.json". It will put minified inputs in the folder artifacts/crash.minified/ and name them {{complexity}}-{{hash}}.json. For example, artifacts/crash.minified/4213--8cd7777109b57b8c.json is a minified input of complexity 42.13. fuzzcheck {cmin} --{in_corpus} "fuzz-corpus" --{corpus_size} 25 Minify the corpus defined by the folder "fuzz-corpus", which should contain JSON-encoded test inputs. It will remove files from that folder until only the 25 most important test inputs remain. "#, fuzz = COMMAND_FUZZ, tmin = COMMAND_MINIFY_INPUT, input_file = INPUT_FILE_FLAG, cmin = COMMAND_MINIFY_CORPUS, in_corpus = IN_CORPUS_FLAG, corpus_size = CORPUS_SIZE_FLAG ) .as_str(); let args = match CommandLineArguments::from_parser(&parser, &env_args[1..], DEFAULT_ARGUMENTS) { Ok(r) => r, Err(e) => { println!("{}\n\n{}", e, help); std::process::exit(1); } }; fuzzer::launch(test, mutator, serializer, args) } /** * A [Mutator] is an object capable of mutating a value for the purpose of * fuzz-testing. * * For example, a mutator could change the value * `v1 = [1, 4, 2, 1]` to `v1' = [1, 5, 2, 1]`. * The idea is that if v1 is an “interesting” value to test, then v1' also * has a high chance of being “interesting” to test. * * ## Complexity * * A mutator is also responsible for keeping track of the * [complexity](crate::Mutator::complexity) of a value. The complexity is, * roughly speaking, how large the value is. * * For example, the complexity of a vector is the complexity of its length, * plus the sum of the complexities of its elements. So `vec![]` would have a * complexity of `0.0` and `vec![76]` would have a complexity of `9.0`: `1.0` * for its short length and `8.0` for the 8-bit integer “76”. But there is no * fixed rule for how to compute the complexity of a value, and it is up to you * to judge how “large” something is. * * ## Cache * * In order to mutate values efficiently, the mutator is able to make use of a * per-value *cache*. The Cache contains information associated with the value * that will make it faster to compute its complexity or apply a mutation to * it. For a vector, its cache is its total complexity, along with a vector of * the cache of each of its element. * * ## MutationStep * * The same values will be passed to the mutator many times, so that it is * mutated in many different ways. There are different strategies to choose * what mutation to apply to a value. The first one is to create a list of * mutation operations, and choose one to apply randomly from this list. * * However, one may want to have better control over which mutation operation * is used. For example, if the value to be mutated is of type `Option<T>`, * then you may want to first mutate it to `None`, and then always mutate it * to another `Some(t)`. This is where `MutationStep` comes in. The mutation * step is a type you define to allow you to keep track of which mutation * operation has already been tried. This allows you to deterministically * apply mutations to a value such that better mutations are tried first, and * duplicate mutations are avoided. * * ## Unmutate * * Finally, it is important to note that values and caches are mutated * *in-place*. The fuzzer does not clone them before handing them to the * mutator. Therefore, the mutator also needs to know how to reverse each * mutation it performed. To do so, each mutation needs to return a token * describing how to reverse it. The [unmutate](crate::Mutator::unmutate) * method will later be called with that token to get the original value * and cache back. * * For example, if the value is `[[1, 3], [5], [9, 8]]`, the mutator may * mutate it to `[[1, 3], [5], [9, 1, 8]]` and return the token: * `Element(2, Remove(1))`, which means that in order to reverse the * mutation, the element at index 2 has to be unmutated by removing * its element at index 1. In pseudocode: * * ```ignore * value = [[1, 3], [5], [9, 8]]; * cache: c1 (ommitted from example) * step: s1 (ommitted from example) * * let unmutate_token = self.mutate(&mut value, &mut cache, &mut step, max_cplx); * * // value = [[1, 3], [5], [9, 1, 8]] * // token = Element(2, Remove(1)) * // cache = c2 * // step = s2 * * test(&value); * * self.unmutate(&mut value, &mut cache, unmutate_token); * * // value = [[1, 3], [5], [9, 8]] * // cache = c1 (back to original cache) * // step = s2 (step has not been reversed) * ``` * **/ pub trait Mutator: Sized { type Value: Clone; type Cache: Clone; type MutationStep; type UnmutateToken; /// Compute the cache for the given value fn cache_from_value(&self, value: &Self::Value) -> Self::Cache; /// Compute the initial mutation step for the given value fn mutation_step_from_value(&self, value: &Self::Value) -> Self::MutationStep; /// The maximum complexity of an input of this type fn max_complexity(&self) -> f64; /// The minimum complexity of an input of this type fn min_complexity(&self) -> f64; /// The complexity of the current input fn complexity(&self, value: &Self::Value, cache: &Self::Cache) -> f64; /// Create an arbitrary value fn arbitrary(&self, seed: usize, max_cplx: f64) -> (Self::Value, Self::Cache); fn mutate( &self, value: &mut Self::Value, cache: &mut Self::Cache, step: &mut Self::MutationStep, max_cplx: f64, ) -> Self::UnmutateToken; fn unmutate(&self, value: &mut Self::Value, cache: &mut Self::Cache, t: Self::UnmutateToken); } /** * A Serializer is used to encode and decode values into bytes. * * One possible implementation would be to use `serde` to implement * both required functions. But we also want to be able to fuzz-test * types that are not serializable with `serde`, which is why this * Serializer trait exists. */ pub trait Serializer { type Value; fn extension(&self) -> &str; fn from_data(&self, data: &[u8]) -> Option<Self::Value>; fn to_data(&self, value: &Self::Value) -> Vec<u8>; } /** * A unit of code coverage. * * A `Feature` describes a certain characteristic of the program’s code * coverage. For example, it can mean “this control flow edge was reached” or * “this instruction was called with these operands”. * * It is implemented as a wrapper of a `u64` for performance reason. But it * actually contains a lot of information. * * - The first two bits designate the kind of the `Feature`, which can be either * `edge`, `indirect`, or `instruction`. * - Then, the next 54 bits are the `id` of the feature. They are supposed to * uniquely identify a point in the source code. * - Finally, the last 8 bits are for the `payload` of the feature. They are * the information associated with the feature, such as the number of times * the control flow edge was reached or a hash of the operands to the * instruction. * - Note that for `indirect` features, `id` and `payload` are merged. * * Each feature has a certain [score](Feature::score) that is currently only * determined by its `tag`. */ #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] struct Feature(u64); impl Feature { /// The bit offset for the id of the feature fn id_offset() -> u64 { 8 } /// The bit offset for the tag of the feature fn tag_offset() -> u64 { 62 } // fn edge_tag() -> u64 { // 0b00 // } fn indir_tag() -> u64 { 0b01 } fn instr_tag() -> u64 { 0b10 } /// Create a “control flow edge” feature identified by the given `pc_guard` /// whose payload is the intensity of the given `counter`. fn edge(pc_guard: usize, counter: u16) -> Feature { let mut feature: u64 = 0; // feature |= 0b00 << Feature::tag_offset(); // take 32 last bits, I don't want to worry about programs with more than 4 billion instrumented edges anyway feature |= ((pc_guard & 0xFFFF_FFFF) as u64) << Feature::id_offset(); feature |= u64::from(Feature::score_from_counter(counter)); // will only ever be 8 bits long Feature(feature) } /// Create an “indirect call” feature identified by the given `caller_xor_callee` fn indir(caller_xor_callee: usize) -> Feature { let mut feature: u64 = 0; // keep 62 bits with modulo feature |= Feature::indir_tag() << Feature::tag_offset(); feature |= (caller_xor_callee as u64) % (0x4000_0000_0000_0000 as u64); Feature(feature) } /// Create an “instructon” feature identified by the given `pc` whose payload /// is a ~hash of the two arguments. fn instruction(pc: usize, arg1: u64, arg2: u64) -> Feature { let mut feature: u64 = 0; feature |= Feature::instr_tag() << Feature::tag_offset(); // won't do anything // keep 54 bits with modulo feature |= ((pc as u64) % 0x40_0000_0000_0000) << Feature::id_offset(); // id feature |= u64::from(Feature::score_from_counter((arg1 ^ arg2).count_ones() as u16)); Feature(feature) } fn erasing_payload(self) -> Self { if (self.0 >> Self::tag_offset()) == Self::indir_tag() { // if it is indirect, there is no payload to erase self } else { // else, zero out the payload bits Feature(self.0 & 0xFFFF_FFFF_FFFF_FF00) } } /// “Hash” a u16 into a number between 0 and 16. /// /// So that similar numbers have the same hash, and very different /// numbers have a greater hash. fn score_from_counter(counter: u16) -> u8 { if counter == core::u16::MAX { 16 } else if counter <= 3 { counter as u8 } else { (16 - counter.leading_zeros() + 1) as u8 } } } /** * A struct that stores the value, cache, and mutation step of an input. * It is used for convenience. */ #[derive(Clone)] struct FuzzedInput<Mut: Mutator> { pub value: Mut::Value, pub cache: Mut::Cache, pub mutation_step: Mut::MutationStep, } impl<Mut: Mutator> FuzzedInput<Mut> { pub fn new(value: Mut::Value, cache: Mut::Cache, mutation_step: Mut::MutationStep) -> Self { Self { value, cache, mutation_step, } } pub fn default(m: &Mut) -> Self { let (value, cache) = m.arbitrary(0, 1.0); let mutation_step = m.mutation_step_from_value(&value); Self::new(value, cache, mutation_step) } pub fn new_source(&self, m: &Mut) -> Self { Self::new( self.value.clone(), self.cache.clone(), m.mutation_step_from_value(&self.value), ) } pub fn complexity(&self, m: &Mut) -> f64 { m.complexity(&self.value, &self.cache) } pub fn mutate(&mut self, m: &Mut, max_cplx: f64) -> Mut::UnmutateToken { m.mutate(&mut self.value, &mut self.cache, &mut self.mutation_step, max_cplx) } pub fn unmutate(&mut self, m: &Mut, t: Mut::UnmutateToken) { m.unmutate(&mut self.value, &mut self.cache, t); } }