hyperpom 0.1.2

AArch64 fuzzing library based on the Apple Silicon hypervisor
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
//! Handles everything related to coverage gathering at runtime.

use std::collections::{BTreeSet, HashSet};
use std::sync::{Arc, RwLock};

use applevisor as av;

use crate::core::*;
use crate::crash::*;
use crate::error::*;
use crate::hooks::*;
use crate::memory::*;

/// A range of virtual addresses where coverage is applied. Arguments are the range's start and
/// end address.
#[derive(Clone, Eq, PartialEq, Hash, Debug)]
pub struct CoverageRange(pub(crate) std::ops::Range<u64>);

impl CoverageRange {
    /// Creates a new coverage range.
    ///
    /// This structure is instanciated by the [`Loader`](crate::loader::Loader) in
    /// [`Loader::coverage_ranges`](crate::loader::Loader::coverage_ranges) to specify which
    /// virtual address ranges should be covered. We can't just instrument everything, because of
    /// data sections found in code ranges that could be interpreted as instructions. The onus is
    /// on the user to identify which ranges are actual code ranges.
    pub fn new(start: u64, end: u64) -> Self {
        Self(start..end)
    }
}

/// Contains coverage information for one testcase executed by one worker.
#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Debug, Hash)]
pub struct Coverage {
    /// Binary tree set that contains the address of the local worker coverage associated to the
    /// current testcase.
    pub set: BTreeSet<u128>,
}

impl Coverage {
    /// Instanciates a new structure containing coverage information.
    pub fn new() -> Self {
        Self {
            set: BTreeSet::new(),
        }
    }

    /// Reset the coverage information.
    pub fn clear(&mut self) {
        self.set.clear();
    }

    /// Returns the number of PCs covered.
    pub fn count(&self) -> usize {
        self.set.len()
    }
}

impl Default for Coverage {
    fn default() -> Self {
        Self::new()
    }
}

/// Non-thread safe version of [`GlobalCoverage`].
#[derive(Clone, Debug)]
pub struct GlobalCoverageInner {
    /// Structure that contains the aggregated coverage information from all fuzzing workers.
    pub(crate) coverage: Coverage,
    /// The virtual address ranges where coverage hooks can be placed without risking any
    /// data corruption that could lead to unwanted crashes or undefined behaviours.
    ranges: Vec<CoverageRange>,
    /// Contains a set of hashed backtrace to deduplicate crashes.
    known_crashes: HashSet<u64>,
}

impl GlobalCoverageInner {
    /// Creates a new global coverage structure.
    fn new(ranges: Vec<CoverageRange>) -> Self {
        Self {
            coverage: Coverage::new(),
            ranges,
            known_crashes: HashSet::new(),
        }
    }
}

/// Global coverage structure shared between threads and updated after each testcase run.
///
/// # Role of Coverage in the Fuzzer
///
/// Coverage-guided fuzzing is used in a lot of modern fuzzers nowadays (e.g. LibFuzzer,
/// HonggFuzz, etc.). The idea is to gather information during the execution of a program to
/// identify which parts have actually been executed. The information gathered can be of different
/// types (addresses, stack frames generated, etc.) and so do gathering methods (hardware
/// mechanisms, hooks added at compile-time / runtime, etc.).
///
/// While using coverage is not necessary and can hinder performances, it's a decent strategy for
/// a generic fuzzer unaware of the input formats expected by the program, since the generated
/// data can be used to identify interesting testcases (e.g. if they cover new paths).
///
/// # Coverage Implementation
///
/// ## Coverage
///
/// Coverage is implemented in this fuzzer by hooking instructions that perform arbitrary or
/// conditional branching, namely:
///
///  * all the `b.cond` instructions;
///  * `cbz` and `cbnz`;
///  * `tbz` and `tbnz`;
///  * `blr` and `br`.
///
/// The hooking function used for coverage is [`GlobalCoverage::hook_coverage`] and is placed while
/// initializing the fuzzed program's virtual space using [`GlobalCoverage::add_hooks`].
///
/// **Note:** for more information on hooking, you can refer to [`Hooks`](crate::hooks::Hooks).
///
/// Although, placing these hooks is a little bit complicated on ARM systems: it's possible to
/// have data sections, such as literal pools, wove into code sections. If we were to
/// indiscriminately place hooks on any byte sequences that can be disassembled into the
/// instructions listed above, we could corrupt one of these data sections, which might result in
/// unwanted crashes or undefined behaviours. To prevent this from happening, the user is
/// responsible for defining the ranges where coverage should be applied by implementing
/// [`Loader::code_ranges`](crate::loader::Loader::code_ranges) from the
/// [`Loader`](crate::loader::Loader) trait. This function will return a list of [`CoverageRange`]s
/// defining the virtual address ranges that are safe to hook.
///
/// Once a program is running on a worker, coverage hooks that are hit take the current address
/// and store it into a [`Coverage`] object. Each worker owns an instance of this structure.
/// To make fuzzing more efficient, coverage information are shared between all workers using
/// [`GlobalCoverage`]. At the end of each iteration, a worker take the coverage information that
/// was generated from the current testcase and compare it to the global one. If new paths have
/// been found, the testcase is added to the corpus so it can be reused and mutated to reach
/// increasingly deeper execution paths.
///
/// Additionally, since the only information we're interested in here is whether or not new paths
/// have been hit, keeping hooks for paths that have already been covered is redundant. We can then
/// get better performances by removing coverage hooks corresponding to new paths using
/// [`Hooks::revert_coverage_hooks`](crate::hooks::Hooks::revert_coverage_hooks).
///
/// ## Comparison Unrolling
///
/// One of the main roadblocks to overcome while fuzzing is handling comparisons with constant
/// magic values. For example, imagine that we have a program that checks that our input starts
/// with `0xdeadbeef` before processing it.
///
/// ```
/// if u32::from_le_bytes(input[0..4]) == 0xdeadbeef {
///     process();
/// } else {
///     exit();
/// }
/// ```
///
/// The fuzzer would have to guess 32 bits in a single iteration, which is very unlikely and would
/// stall the fuzzer until it guesses it correctly.
///
/// There are different approaches possible to solve this problem, like hooking comparisons to
/// always pass them or informing the mutator of the comparison value to generate a custom testcase
/// with it. This fuzzer implements comparison unrolling. The idea is to take a comparison on a
/// multi-byte value and split it into multiple single-byte comparisons.
///
/// ```
/// if input[3] == 0xde {
///     if input[2] == 0xad {
///         if input[1] == 0xbe {
///             // [...]
///         }
///     }
/// }
/// ```
///
/// When the fuzzer is initialized, it looks for every comparison instructions it can find and
/// then place a [`GlobalCoverage::hook_cmp`] hook on them. This function takes the hooked
/// comparison instruction, disassemble it to retrieve the values being compared and adds a path
/// for every byte it manages to guess correctly.
///
/// One of the issue with this implementation is that it adds new testcases for input values that
/// partially match. Once we have a testcase that pass the comparison, the other intermediate
/// testcases are less likely to produce interesting results and are mostly clogging up the input
/// queue at this point. A possible way to improve the fuzzer in the future would be to add an
/// additional queue for these intermediate testcases that would be flushed when the comparison
/// is no longer an issue.
#[derive(Clone, Debug)]
pub struct GlobalCoverage {
    pub inner: Arc<RwLock<GlobalCoverageInner>>,
}

impl GlobalCoverage {
    /// Instructions updating coverage.
    const B_INSNS: &'static [&'static str] = &[
        "b.eq", "b.ne", "b.cs", "b.cc", "b.mi", "b.pl", "b.vs", "b.vc", "b.hi", "b.ls", "b.ge",
        "b.lt", "b.gt", "b.le", "b.al", "cbnz", "cbz", "tbnz", "tbz", "blr", "br",
    ];
    /// Instructions that influence conditional behavior.
    const CMP_INSNS: &'static [&'static str] = &["cmp", "subs"];

    /// Instanciantes a new shared global coverage structure using user-provided virtual memory
    /// ranges where coverage should be applied.
    pub fn new(ranges: Vec<CoverageRange>) -> Self {
        Self {
            inner: Arc::new(RwLock::new(GlobalCoverageInner::new(ranges))),
        }
    }

    /// Returns a copy of the inner coverage structure.
    pub fn cloned(&self) -> Coverage {
        let inner = self.inner.read().unwrap();
        inner.coverage.clone()
    }

    /// Returns the number of PCs covered.
    pub fn count(&self) -> usize {
        let inner = self.inner.read().unwrap();
        inner.coverage.count()
    }

    /// Adds coverage hooks to the fuzzed program.
    ///
    /// There are two hook types:
    ///
    ///  * *coverage* hooks that updates the global coverage map;
    ///  * *comparison* hooks that perform comparison unrolling and update the global coverage map
    ///    when a new byte is matched.
    pub fn add_hooks<LD: Clone, GD: Clone>(
        &self,
        vma: &VirtMemAllocator,
        hooks: &mut Hooks<LD, GD>,
        comparison_unrolling: bool,
    ) -> Result<()> {
        let inner = self.inner.read().unwrap();
        // Iterates over the code ranges.
        for CoverageRange(range) in inner.ranges.iter() {
            // In a given range, iterates over each instruction address.
            for addr in range.clone().step_by(4) {
                // Reads the instruction at the current address.
                let mut code = [0; 4];
                vma.read(addr, &mut code)?;
                // Disassemble the instruction and returns a tuple that contains:
                //
                //  * if the instruction influences coverage;
                //  * if the instruction is a comparison.
                let (cov, cmp) = CSE.with(|cs| {
                    let insns = cs
                        .disasm_count(&code, addr, 1)
                        .expect("could not disassemble while adding coverage hooks");
                    if let Some(insn) = insns.as_ref().get(0) {
                        (
                            Self::B_INSNS.contains(&insn.mnemonic().unwrap()),
                            Self::CMP_INSNS.contains(&insn.mnemonic().unwrap()),
                        )
                    } else {
                        (false, false)
                    }
                });
                // Adds the corresponding hook(s) depending on the instruction type.
                if cov {
                    hooks.add_coverage_hook(addr, GlobalCoverage::hook_coverage);
                } else if comparison_unrolling && cmp {
                    hooks.add_coverage_hook(addr, GlobalCoverage::hook_cmp);
                }
            }
        }
        Ok(())
    }

    /// Checks if the crash already exists in the list of known crashes and inserts it if it's
    /// not the case. Returns `true` if the crash is new, `false` otherwise.
    pub fn update_new_crashes(&mut self, hash: u64) -> bool {
        let inner = self.inner.read().unwrap();
        if inner.known_crashes.contains(&hash) {
            false
        } else {
            drop(inner);
            let mut inner = self.inner.write().unwrap();
            inner.known_crashes.insert(hash);
            true
        }
    }

    /// Checks if the coverage computed by the current worker adds new path to the global coverage.
    /// If it's the case, the function adds the new paths covered to the global coverage map and
    /// returns `true` to signal that the current testcase yielded interesting results and should
    /// be added to the corpus.
    pub fn update_new_coverage(&self, other: &Coverage) -> Option<u64> {
        let inner = self.inner.read().unwrap();
        if other.set.is_subset(&inner.coverage.set) {
            None
        } else {
            drop(inner);
            let mut inner = self.inner.write().unwrap();
            // Make sure it's still a subset, because another thread might have updated the
            // coverage right after the lock was dropped.
            if other.set.is_subset(&inner.coverage.set) {
                None
            } else {
                let old_size = inner.coverage.set.len();
                inner.coverage.set = inner.coverage.set.union(&other.set).copied().collect();
                Some((inner.coverage.set.len() - old_size) as u64)
            }
        }
    }

    /// Handles coverage hooks and adds the current instruction's address to the local worker
    /// coverage.
    pub fn hook_coverage<LD, GD>(args: &mut HookArgs<LD, GD>) -> Result<ExitKind> {
        args.cdata.set.insert(args.addr as u128);
        Ok(ExitKind::Continue)
    }

    /// Handles comparison hooks by disassembling the current instruction, retrieving the compared
    /// value and adding new paths based on how many bytes match.
    pub fn hook_cmp<LD, GD>(args: &mut HookArgs<LD, GD>) -> Result<ExitKind> {
        let addr = args.addr as u128;
        let value = Comparisons::get_value(args.insn_int, args.vcpu)?;
        match value {
            ComparisonResult::U64(reg_value, cmp_value) => {
                for i in (0u128..8u128).rev() {
                    if (reg_value >> (i * 8)) & 0xff == (cmp_value >> (i * 8)) & 0xff {
                        let pc = ((i + 1) << 0x30) | addr;
                        args.cdata.set.insert(pc);
                    } else {
                        break;
                    }
                }
            }
            ComparisonResult::U32(reg_value, cmp_value) => {
                for i in (0u128..4u128).rev() {
                    if (reg_value >> (i * 8)) & 0xff == (cmp_value >> (i * 8)) & 0xff {
                        let pc = ((i + 1) << 0x30) | addr;
                        args.cdata.set.insert(pc);
                    } else {
                        break;
                    }
                }
            }
            _ => {}
        }
        Ok(ExitKind::Continue)
    }
}

// -----------------------------------------------------------------------------------------------
// Coverage - Comparisons
// -----------------------------------------------------------------------------------------------

#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
/// Operands of a comparison instruction.
enum ComparisonResult {
    /// 32-bit operands.
    U32(u32, u32),
    /// 64-bit operands.
    U64(u64, u64),
    /// Other instruction.
    Other,
}

/// Empty structure that represents the comparison emulator of the fuzzer.
#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
struct Comparisons;

impl Comparisons {
    /// Retrieves the operands of a comparison instruction.
    fn get_value(insn: u32, vcpu: &av::Vcpu) -> Result<ComparisonResult> {
        match insn {
            // -----------------------------------------------------------------------------------
            // Add/subtract (immediate)
            // -----------------------------------------------------------------------------------
            // SUBS (immediate)
            i if (i >> 23) & 0xff == 0b11100010 => Self::subs_immediate(i, vcpu),
            // -----------------------------------------------------------------------------------
            // Add/subtract (shifted registers)
            // -----------------------------------------------------------------------------------
            // SUBS (shifted registers)
            i if (i >> 24) & 0x7f == 0b1101011 => Self::subs_shifted_reg(i, vcpu),
            _ => Ok(ComparisonResult::Other),
        }
    }

    /// Returns the value stored in a register based on an instruction operand value.
    fn get_operand(vcpu: &av::Vcpu, rd: u32) -> Result<u64> {
        match rd {
            0 => Ok(vcpu.get_reg(av::Reg::X0)?),
            1 => Ok(vcpu.get_reg(av::Reg::X1)?),
            2 => Ok(vcpu.get_reg(av::Reg::X2)?),
            3 => Ok(vcpu.get_reg(av::Reg::X3)?),
            4 => Ok(vcpu.get_reg(av::Reg::X4)?),
            5 => Ok(vcpu.get_reg(av::Reg::X5)?),
            6 => Ok(vcpu.get_reg(av::Reg::X6)?),
            7 => Ok(vcpu.get_reg(av::Reg::X7)?),
            8 => Ok(vcpu.get_reg(av::Reg::X8)?),
            9 => Ok(vcpu.get_reg(av::Reg::X9)?),
            10 => Ok(vcpu.get_reg(av::Reg::X10)?),
            11 => Ok(vcpu.get_reg(av::Reg::X11)?),
            12 => Ok(vcpu.get_reg(av::Reg::X12)?),
            13 => Ok(vcpu.get_reg(av::Reg::X13)?),
            14 => Ok(vcpu.get_reg(av::Reg::X14)?),
            15 => Ok(vcpu.get_reg(av::Reg::X15)?),
            16 => Ok(vcpu.get_reg(av::Reg::X16)?),
            17 => Ok(vcpu.get_reg(av::Reg::X17)?),
            18 => Ok(vcpu.get_reg(av::Reg::X18)?),
            19 => Ok(vcpu.get_reg(av::Reg::X19)?),
            20 => Ok(vcpu.get_reg(av::Reg::X20)?),
            21 => Ok(vcpu.get_reg(av::Reg::X21)?),
            22 => Ok(vcpu.get_reg(av::Reg::X22)?),
            23 => Ok(vcpu.get_reg(av::Reg::X23)?),
            24 => Ok(vcpu.get_reg(av::Reg::X24)?),
            25 => Ok(vcpu.get_reg(av::Reg::X25)?),
            26 => Ok(vcpu.get_reg(av::Reg::X26)?),
            27 => Ok(vcpu.get_reg(av::Reg::X27)?),
            28 => Ok(vcpu.get_reg(av::Reg::X28)?),
            29 => Ok(vcpu.get_reg(av::Reg::X29)?),
            30 => Ok(vcpu.get_reg(av::Reg::LR)?),
            31 => Ok(vcpu.get_reg(av::Reg::PC)?),
            _ => unreachable!("invalid operand"),
        }
    }

    // -------------------------------------------------------------------------------------------
    // Add/subtract (immediate)
    // -------------------------------------------------------------------------------------------

    #[inline]
    /// Retrieves the operands from a SUBS (immediate).
    fn subs_immediate(insn: u32, vcpu: &av::Vcpu) -> Result<ComparisonResult> {
        let imm = (insn >> 10) & 0xfff;
        let sh = (insn >> 22) & 1;
        let value = imm << (12 * sh);
        let sf = ((insn >> 31) & 1) == 1;
        let rn = (insn >> 5) & 0x1f;
        let rn_val = Self::get_operand(vcpu, rn)?;
        if sf {
            Ok(ComparisonResult::U64(rn_val, value as u64))
        } else {
            Ok(ComparisonResult::U32(rn_val as u32, value))
        }
    }

    // -------------------------------------------------------------------------------------------
    // Add/subtract (shifted registers)
    // -------------------------------------------------------------------------------------------

    #[inline]
    /// Retrieves the operands from a SUBS (shifted registers).
    fn subs_shifted_reg(insn: u32, vcpu: &av::Vcpu) -> Result<ComparisonResult> {
        let rn = (insn >> 5) & 0x1f;
        let rn_val = Self::get_operand(vcpu, rn)?;
        let rm = (insn >> 16) & 0x1f;
        let rm_val = Self::get_operand(vcpu, rm)?;
        let shift_amount = (insn >> 10) & 0x3f;
        let shift_type = (insn >> 22) & 3;
        let sf = ((insn >> 31) & 1) == 1;
        if sf && (shift_amount >> 5) == 1 {
            return Ok(ComparisonResult::Other);
        }
        if sf {
            Ok(match shift_type {
                0b00 => ComparisonResult::U64(rn_val, (rm_val as u64) << shift_amount),
                0b01 => ComparisonResult::U64(rn_val, (rm_val as u64) >> shift_amount),
                0b10 => ComparisonResult::U64(rn_val, ((rm_val as i64) >> shift_amount) as u64),
                _ => return Ok(ComparisonResult::Other),
            })
        } else {
            Ok(match shift_type {
                0b00 => ComparisonResult::U32(rn_val as u32, (rm_val as u32) << shift_amount),
                0b01 => ComparisonResult::U32(rn_val as u32, (rm_val as u32) >> shift_amount),
                0b10 => {
                    ComparisonResult::U32(rn_val as u32, ((rm_val as i32) >> shift_amount) as u32)
                }
                _ => return Ok(ComparisonResult::Other),
            })
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn cov_cmp_subs_immediate() {
        let _vm = av::VirtualMachine::new().unwrap();
        // Creates a new Vcpu
        let vcpu = av::Vcpu::new().unwrap();
        vcpu.set_reg(av::Reg::X0, 0xdeadbeefdeadbeef).unwrap();
        // cmp x0, #0x123 - subs xzr, x0, #0x123
        let insn = 0xf1048c1fu32;
        let value = Comparisons::get_value(insn, &vcpu);
        assert_eq!(value, Ok(ComparisonResult::U64(0xdeadbeefdeadbeef, 0x123)));
        // cmp w0, #0x345 - subs xzr, x0, #0x345
        let insn = 0x710d141fu32;
        let value = Comparisons::get_value(insn, &vcpu);
        assert_eq!(value, Ok(ComparisonResult::U32(0xdeadbeef, 0x345)));
        // cmp x0, #0x678000 - subs xzr, x0, #0x678000
        let insn = 0xf159e01fu32;
        let value = Comparisons::get_value(insn, &vcpu);
        assert_eq!(
            value,
            Ok(ComparisonResult::U64(0xdeadbeefdeadbeef, 0x678000))
        );
        // cmp w0, #0x912000 - subs wzr, w0, #0x912000
        let insn = 0x7164481fu32;
        let value = Comparisons::get_value(insn, &vcpu);
        assert_eq!(value, Ok(ComparisonResult::U32(0xdeadbeef, 0x912000)));
    }
}