monster/engine/
rarity_simulation.rs

1//! Rarity Simulation
2//!
3//! This module contains an implementation of rarity simulation, as descibed in the paper ["Using Speculation for Sequential Equivalence Checking"
4//! ](https://people.eecs.berkeley.edu/~alanmi/publications/2012/iwls12_sec.pdf) by
5//! Brayton et. al.
6//!
7//!
8//! Rarity simulation is a "guided random simulation" using heuristics to determine interesting
9//! states and which aims to reduce the time required to find bugs in a target application.
10//! Instead of symbolically pursuing all control branches and recording constraints, it concretely
11//! executes a fixed number of states. On each iteration, rarity simulation advances each state by
12//! a fixed amount of instruction cycles and records statistics which values were encountered over
13//! the entire execution state. Furthermore, at the end of the iteration, it uses the recorded
14//! statistics to determine the _rarity_ of each state and only pursues a subset of rarest states,
15//! all other states are discarded and new states are copied from the remaining states or created
16//! to reach the number of states.
17//! Rarity simulation interations are repeated until a bug in any state is encountered or a
18//! specific amount of iterations have been completed.
19//!
20//!
21//! To archive divergent states, the target application shall issue word-granularity `read()`
22//! system calls to receive random input values. Rarity simulation stores a list of subsequent
23//! inputs that were provided to the application. In case a state caused a bug, it is able to
24//! provide a list of inputs which provoke the bug.
25//!
26//! The amount of iterations/cycles and the amount of states allows for a fine-grained control for
27//! finding bugs in depth or in breadth, respectively.
28
29use crate::engine::UninitializedMemoryAccessReason;
30
31use super::{
32    system::{instruction_to_str, SyscallId},
33    Bug as GenericBug, BugFinder, BugInfo, VirtualMemory,
34};
35use byteorder::{ByteOrder, LittleEndian};
36use bytesize::ByteSize;
37use itertools::Itertools;
38use log::{debug, info, trace, warn};
39use riscu::{
40    decode, types::*, DecodingError, Instruction, Program, ProgramSegment, Register,
41    INSTRUCTION_SIZE as INSTR_SIZE,
42};
43use std::{cmp::min, collections::HashMap, fmt, iter::IntoIterator, mem::size_of, sync::Arc};
44use strum::{EnumString, EnumVariantNames, IntoStaticStr};
45use thiserror::Error;
46
47pub type RaritySimulationBug = GenericBug<RarityBugInfo>;
48type Bug = RaritySimulationBug;
49
50type ExecutorResult = Result<Option<RaritySimulationBug>, RaritySimulationError>;
51
52const INSTRUCTION_SIZE: u64 = INSTR_SIZE as u64;
53
54const BYTES_PER_WORD: u64 = size_of::<u64>() as u64;
55const NUMBER_OF_BYTE_VALUES: u64 = 256;
56
57pub mod defaults {
58    use super::*;
59
60    pub const MEMORY_SIZE: ByteSize = ByteSize(bytesize::MIB);
61    pub const AMOUNT_OF_STATES: usize = 3000;
62    pub const STEP_SIZE: u64 = 1000;
63    pub const SELECTION: usize = 50;
64    pub const ITERATIONS: u64 = 20;
65    pub const COPY_INIT_RATIO: f64 = 0.6;
66    pub const MEAN_TYPE: MeanType = MeanType::Harmonic;
67}
68
69#[derive(Debug, Clone)]
70pub struct RaritySimulationOptions {
71    /// The size of the machine's memory
72    pub memory_size: ByteSize,
73    /// The number of states to pursue
74    pub amount_of_states: usize,
75    /// The amount of instructions to execute for each state on each iteration
76    pub step_size: u64,
77    /// Amount of (rarest) states that shall be further considered at the end of each iteration.
78    pub selection: usize,
79    /// The amount of rarity simulation iterations to perform
80    pub iterations: u64,
81    /// After discarding least rare and exited states, determines how much new states shall
82    /// be copied from the remaining (rare) states and, in inverse, how much shall be newly
83    /// created relative to the amount of missing states to archive `number_of_states`.
84    /// Must be between 0 and 1.
85    pub copy_init_ratio: f64,
86    /// The mean to use for determining state rarity
87    pub mean: MeanType,
88}
89
90impl Default for RaritySimulationOptions {
91    fn default() -> RaritySimulationOptions {
92        RaritySimulationOptions {
93            memory_size: defaults::MEMORY_SIZE,
94            amount_of_states: defaults::AMOUNT_OF_STATES,
95            step_size: defaults::STEP_SIZE,
96            selection: defaults::SELECTION,
97            iterations: defaults::ITERATIONS,
98            copy_init_ratio: defaults::COPY_INIT_RATIO,
99            mean: defaults::MEAN_TYPE,
100        }
101    }
102}
103
104#[derive(Debug, Clone, Error)]
105pub enum RaritySimulationError {
106    #[error("failed to write State to file")]
107    IoError(Arc<std::io::Error>),
108
109    #[error("engine does not support {0}")]
110    NotSupported(String),
111
112    #[error("failed to decode instruction at PC: {0:#010x}")]
113    InvalidInstructionEncoding(u64, DecodingError),
114
115    #[error("has reached the maximum execution depth of {0}")]
116    ExecutionDepthReached(u64),
117}
118
119/// Strategy for mean calculation
120///
121/// Based on the value counters, the rarity simulator calculates a score that is used to determine
122/// a state's rarity. This score is essential for the decision which states shall be further
123/// pursued and which shall be discarded.
124#[derive(Clone, Copy, Debug, EnumString, EnumVariantNames, IntoStaticStr)]
125#[strum(serialize_all = "kebab_case")]
126pub enum MeanType {
127    /// Mean is calculated using the [arithmetic
128    /// mean](https://en.wikipedia.org/wiki/Arithmetic_mean), i.e. the sum of all statistic
129    /// counters divided by the amount of states.
130    /// Lower scores are more rare
131    Arithmetic,
132    /// Mean is calculated using the [harmonic mean](https://en.wikipedia.org/wiki/Harmonic_mean),
133    /// i.e. the amount of states divided by the sum of residues of statistic counters.
134    /// Higher scores are more rare.
135    Harmonic,
136}
137
138/// The state information of a running target
139///
140/// The [`State`] struct contains all necessary state information of a running target application,
141/// similar to an operating system's context. Values are stored using the [`Value`] type. Only if a
142/// location has been written to, it hosts a concrete value ([`Value::Concrete`]), otherwise it is
143/// [`Value::Uninitialized`].
144#[derive(Debug, Clone)]
145pub struct State {
146    /// Program counter
147    pc: u64,
148    /// Processor integer registers x0..x31
149    regs: [Value; 32],
150    /// List of touched and untouched memory words
151    memory: VirtualMemory<Value>,
152}
153
154type Address = u64;
155type Counter = u64;
156
157#[derive(Debug, Clone)]
158pub struct RaritySimulation {
159    options: RaritySimulationOptions,
160}
161
162impl RaritySimulation {
163    pub fn new(options: &RaritySimulationOptions) -> Self {
164        Self {
165            options: options.clone(),
166        }
167    }
168}
169
170impl BugFinder<RarityBugInfo, RaritySimulationError> for RaritySimulation {
171    /// Performs rarity simulation on a given program
172    ///
173    /// If one state encountered a bug, execution is terminated and its description is returned. If no
174    /// bugs have been encountered after the configured limit has been met, [`None`] is returned.
175    ///
176    /// Please see the [module-level documentation](self) for a detailed description of rarity simulation.
177    #[allow(clippy::vec_box)]
178    fn search_for_bugs(&self, program: &Program) -> Result<Option<Bug>, RaritySimulationError> {
179        let mut executors: Vec<Box<Executor>> = Vec::new();
180
181        for iteration in 0..self.options.iterations {
182            info!("Running rarity simulation round {}...", iteration + 1);
183
184            time_info!("Creating states", {
185                create_missing_executors(
186                    &mut executors,
187                    self.options.amount_of_states,
188                    self.options.copy_init_ratio,
189                    self.options.memory_size,
190                    program,
191                )
192            });
193
194            let results = time_info!("Running engines", {
195                match run_all(&mut executors, self.options.step_size) {
196                    Ok((Some(bug), _)) => return Ok(Some(bug)),
197                    Ok((None, results)) => results,
198                    Err(e) => return Err(e),
199                }
200            });
201
202            let running = filter_successfully_exited(executors, results);
203
204            info!(
205                "Remove {} successfully exited states from selection",
206                self.options.amount_of_states - running.len()
207            );
208
209            let scores = time_info!("Scoring states", {
210                let states: Vec<_> = running.iter().map(|e| e.state()).collect();
211
212                score_with_mean(&states[..], self.options.mean)
213            });
214
215            let selection = min(self.options.selection, running.len());
216
217            executors = select_rarest(running, selection, scores);
218
219            info!("selecting rarest {} states", selection);
220        }
221
222        Ok(None)
223    }
224}
225
226#[allow(clippy::vec_box)]
227fn create_missing_executors(
228    executors: &mut Vec<Box<Executor>>,
229    amount: usize,
230    copy_init_ratio: f64,
231    memory_size: ByteSize,
232    program: &Program,
233) {
234    let missing = amount - executors.len();
235
236    let to_copy = if executors.is_empty() {
237        0
238    } else {
239        f64::round(missing as f64 * copy_init_ratio) as usize
240    };
241    let to_create = missing - to_copy;
242
243    info!(
244        "Creating {} new states ({} copied, {} new)",
245        missing, to_copy, to_create
246    );
247
248    let copyable_engines = executors.len();
249
250    let copied = (0..to_copy)
251        .map(|_| Box::new((*executors[random_index(copyable_engines)]).clone()))
252        .collect_vec();
253
254    let created = (0..to_create).map(|_| Box::new(Executor::new(program, memory_size)));
255
256    executors.extend(copied.into_iter().chain(created));
257}
258
259fn run_all(
260    executors: &mut [Box<Executor>],
261    step_size: u64,
262) -> Result<(Option<RaritySimulationBug>, Vec<ExecutorResult>), RaritySimulationError> {
263    let results: Vec<_> = executors
264        .iter_mut()
265        .map(|engine| engine.run(step_size))
266        .collect();
267
268    if let Some(Ok(Some(bug))) = results.iter().find(|r| matches!(r, Ok(Some(_)))) {
269        Ok((Some(bug.clone()), results))
270    } else if let Some(Err(e)) = results.iter().find(|r| match **r {
271        Err(RaritySimulationError::ExecutionDepthReached(_)) => false,
272        Err(_) => true,
273        _ => false,
274    }) {
275        Err(e.clone())
276    } else {
277        Ok((None, results))
278    }
279}
280
281#[allow(clippy::vec_box)]
282fn filter_successfully_exited(
283    executors: impl IntoIterator<Item = Box<Executor>>,
284    results: impl IntoIterator<Item = ExecutorResult>,
285) -> Vec<Box<Executor>> {
286    executors
287        .into_iter()
288        .zip(results)
289        .filter(|(_, r)| matches!(r, Err(RaritySimulationError::ExecutionDepthReached(_))))
290        .map(|(e, _)| e)
291        .collect()
292}
293
294#[allow(clippy::vec_box)]
295fn select_rarest(
296    executors: impl IntoIterator<Item = Box<Executor>>,
297    selection: usize,
298    scores: Vec<f64>,
299) -> Vec<Box<Executor>> {
300    executors
301        .into_iter()
302        .zip(scores)
303        .sorted_unstable_by(|l, r| l.1.partial_cmp(&r.1).expect("no NaN in scores"))
304        .map(|x| x.0)
305        .take(selection)
306        .collect()
307}
308
309fn score_with_mean(states: &[&State], mean: MeanType) -> Vec<f64> {
310    match mean {
311        MeanType::Harmonic => compute_scores(states, |cs| {
312            (cs.len() as f64) / cs.iter().map(|c| 1_f64 / (*c as f64)).sum::<f64>()
313        }),
314        MeanType::Arithmetic => compute_scores(states, |cs| {
315            cs.iter().sum::<u64>() as f64 / (cs.len() as f64)
316        }),
317    }
318}
319
320#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
321pub enum Value {
322    Concrete(u64),
323    Uninitialized,
324}
325
326impl Default for Value {
327    fn default() -> Self {
328        Value::Uninitialized
329    }
330}
331
332impl fmt::Display for Value {
333    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
334        match self {
335            Value::Concrete(c) => write!(f, "{:#x}", c),
336            Value::Uninitialized => write!(f, "uninit"),
337        }
338    }
339}
340
341#[derive(Debug, Clone)]
342pub struct Executor {
343    program_break: u64,
344    state: State,
345    execution_depth: u64,
346    max_exection_depth: u64,
347    concrete_inputs: Vec<u64>,
348    is_running: bool,
349}
350
351impl Executor {
352    // creates a machine state with a specific memory size
353    pub fn new(program: &Program, memory_size: ByteSize) -> Self {
354        assert!(
355            memory_size.as_u64() % 8 == 0,
356            "memory size has to be a multiple of 8"
357        );
358
359        let mut regs = [Value::Uninitialized; 32];
360        let mut memory = VirtualMemory::new(
361            memory_size.as_u64() as usize / 8,
362            bytesize::KIB as usize / 8,
363        );
364
365        let sp = memory_size.as_u64() - 8;
366        regs[Register::Sp as usize] = Value::Concrete(sp);
367        regs[Register::Zero as usize] = Value::Concrete(0);
368
369        // TODO: Init main function arguments
370        let argc = 0;
371        memory[sp as usize / size_of::<u64>()] = Value::Concrete(argc);
372
373        load_segment(&mut memory, &program.code);
374        load_segment(&mut memory, &program.data);
375
376        let pc = program.code.address;
377
378        let program_break = program.data.address + program.data.content.len() as u64;
379
380        debug!(
381            "initializing new execution context with {} of main memory",
382            memory_size
383        );
384        debug!(
385            "code segment: start={:#x} length={}",
386            program.code.address,
387            program.code.content.len(),
388        );
389        debug!(
390            "data segment: start={:#x} length={}",
391            program.data.address,
392            program.data.content.len(),
393        );
394        debug!(
395            "init state: pc={:#x} brk={:#x}, argc={}",
396            pc, program_break, argc
397        );
398
399        Self {
400            program_break,
401            state: State { pc, regs, memory },
402            execution_depth: 0,
403            max_exection_depth: 0,
404            concrete_inputs: Vec::new(),
405            is_running: false,
406        }
407    }
408
409    pub fn state(&self) -> &State {
410        &self.state
411    }
412
413    pub fn run(
414        &mut self,
415        number_of_instructions: u64,
416    ) -> Result<Option<Bug>, RaritySimulationError> {
417        self.is_running = true;
418        self.max_exection_depth += number_of_instructions;
419
420        loop {
421            if self.execution_depth >= self.max_exection_depth {
422                trace!("maximum execution depth reached => exiting this context");
423
424                self.is_running = false;
425
426                return Err(RaritySimulationError::ExecutionDepthReached(
427                    self.execution_depth,
428                ));
429            }
430
431            self.execution_depth += 1;
432
433            let bug = self
434                .fetch()
435                .and_then(|raw| self.decode(raw))
436                .and_then(|instr| self.execute(instr))?;
437
438            if bug.is_some() || !self.is_running {
439                return Ok(bug);
440            }
441        }
442    }
443
444    fn fetch(&self) -> Result<u32, RaritySimulationError> {
445        if let Value::Concrete(dword) =
446            self.state.memory[(self.state.pc as usize / size_of::<u64>()) as usize]
447        {
448            if self.state.pc % size_of::<u64>() as u64 == 0 {
449                Ok(dword as u32)
450            } else {
451                Ok((dword >> 32) as u32)
452            }
453        } else {
454            Err(RaritySimulationError::NotSupported(String::from(
455                "tried to fetch none concrete instruction",
456            )))
457        }
458    }
459
460    fn decode(&self, raw: u32) -> Result<Instruction, RaritySimulationError> {
461        decode(raw).map_err(|e| RaritySimulationError::InvalidInstructionEncoding(self.state.pc, e))
462    }
463
464    fn execute(&mut self, instruction: Instruction) -> Result<Option<Bug>, RaritySimulationError> {
465        match instruction {
466            Instruction::Ecall(_) => self.execute_ecall(),
467            Instruction::Lui(utype) => self.execute_lui(utype),
468            Instruction::Addi(itype) => self.execute_itype(instruction, itype, u64::wrapping_add),
469            Instruction::Add(rtype) => self.execute_rtype(instruction, rtype, u64::wrapping_add),
470            Instruction::Sub(rtype) => self.execute_rtype(instruction, rtype, u64::wrapping_sub),
471            Instruction::Mul(rtype) => self.execute_rtype(instruction, rtype, u64::wrapping_mul),
472            Instruction::Divu(rtype) => {
473                self.execute_divu_remu(instruction, rtype, u64::wrapping_div)
474            }
475            Instruction::Remu(rtype) => {
476                self.execute_divu_remu(instruction, rtype, u64::wrapping_rem)
477            }
478            Instruction::Sltu(rtype) => {
479                self.execute_rtype(instruction, rtype, |l, r| if l < r { 1 } else { 0 })
480            }
481            Instruction::Ld(itype) => self.execute_ld(instruction, itype),
482            Instruction::Sd(stype) => self.execute_sd(instruction, stype),
483            Instruction::Jal(jtype) => self.execute_jal(jtype),
484            Instruction::Jalr(itype) => self.execute_jalr(itype),
485            Instruction::Beq(btype) => self.execute_beq(btype),
486        }
487    }
488
489    fn access_to_uninitialized_memory(
490        &mut self,
491        instruction: Instruction,
492        v1: Value,
493        v2: Value,
494    ) -> Bug {
495        trace!(
496            "{}: {}, {} => computing reachability",
497            instruction_to_str(instruction),
498            v1,
499            v2
500        );
501
502        Bug::AccessToUnitializedMemory {
503            info: RarityBugInfo {
504                inputs: self.concrete_inputs.clone(),
505                pc: self.state.pc,
506            },
507            reason: UninitializedMemoryAccessReason::InstructionWithUninitializedOperand {
508                instruction,
509                // TODO: fix operands
510                operands: vec![],
511            },
512        }
513    }
514
515    fn is_in_vaddr_range(&self, vaddr: u64) -> bool {
516        vaddr as usize / size_of::<u64>() < self.state.memory.size()
517    }
518
519    fn check_for_valid_memory_address(&mut self, instruction: &str, address: u64) -> Option<Bug> {
520        let is_alignment_ok = address % size_of::<u64>() as u64 == 0;
521
522        if !is_alignment_ok {
523            trace!(
524                "{}: address {:#x} is not double word aligned => computing reachability",
525                instruction,
526                address
527            );
528
529            self.is_running = false;
530
531            Some(Bug::AccessToUnalignedAddress {
532                info: RarityBugInfo {
533                    inputs: self.concrete_inputs.clone(),
534                    pc: self.state.pc,
535                },
536                address,
537            })
538        } else if !self.is_in_vaddr_range(address) {
539            trace!(
540                "{}: address {:#x} out of virtual address range (0x0 - {:#x}) => computing reachability",
541                instruction,
542                address,
543                self.state.memory.size() * 8,
544            );
545
546            self.is_running = false;
547
548            Some(Bug::AccessToOutOfRangeAddress {
549                info: RarityBugInfo {
550                    inputs: self.concrete_inputs.clone(),
551                    pc: self.state.pc,
552                },
553            })
554        } else {
555            None
556        }
557    }
558
559    fn check_for_valid_memory_range(
560        &mut self,
561        instruction: &str,
562        address: u64,
563        size: u64,
564    ) -> Option<Bug> {
565        if !self.is_in_vaddr_range(address) || !self.is_in_vaddr_range(address + size) {
566            trace!(
567                "{}: buffer {:#x} - {:#x} out of virtual address range (0x0 - {:#x}) => computing reachability",
568                instruction,
569                address,
570                address + size,
571                self.state.memory.size() * 8,
572            );
573
574            self.is_running = false;
575
576            Some(Bug::AccessToOutOfRangeAddress {
577                info: RarityBugInfo {
578                    inputs: self.concrete_inputs.clone(),
579                    pc: self.state.pc,
580                },
581            })
582        } else {
583            None
584        }
585    }
586
587    #[allow(clippy::unnecessary_wraps)]
588    fn execute_lui(&mut self, utype: UType) -> Result<Option<Bug>, RaritySimulationError> {
589        let immediate = u64::from(utype.imm()) << 12;
590
591        let result = Value::Concrete(immediate);
592
593        trace!(
594            "[{:#010x}] {}: {:?} <- {}",
595            self.state.pc,
596            instruction_to_str(Instruction::Lui(utype)),
597            utype.rd(),
598            result,
599        );
600
601        self.assign_rd(utype.rd(), result);
602
603        self.state.pc += INSTRUCTION_SIZE;
604
605        Ok(None)
606    }
607
608    fn execute_divu_remu<Op>(
609        &mut self,
610        instruction: Instruction,
611        rtype: RType,
612        op: Op,
613    ) -> Result<Option<Bug>, RaritySimulationError>
614    where
615        Op: FnOnce(u64, u64) -> u64,
616    {
617        match self.state.regs[rtype.rs2() as usize] {
618            Value::Concrete(divisor) if divisor == 0 => {
619                trace!(
620                    "{}: divisor == 0 -> compute reachability",
621                    instruction_to_str(instruction)
622                );
623
624                Ok(Some(Bug::DivisionByZero {
625                    info: RarityBugInfo {
626                        inputs: self.concrete_inputs.clone(),
627                        pc: self.state.pc,
628                    },
629                }))
630            }
631            _ => self.execute_rtype(instruction, rtype, op),
632        }
633    }
634
635    fn execute_itype<Op>(
636        &mut self,
637        instruction: Instruction,
638        itype: IType,
639        op: Op,
640    ) -> Result<Option<Bug>, RaritySimulationError>
641    where
642        Op: FnOnce(u64, u64) -> u64,
643    {
644        let rs1_value = self.state.regs[itype.rs1() as usize];
645        let imm_value = Value::Concrete(itype.imm() as u64);
646
647        self.execute_binary_op(instruction, rs1_value, imm_value, itype.rd(), op)
648    }
649
650    #[allow(clippy::unnecessary_wraps)]
651    fn execute_rtype<Op>(
652        &mut self,
653        instruction: Instruction,
654        rtype: RType,
655        op: Op,
656    ) -> Result<Option<Bug>, RaritySimulationError>
657    where
658        Op: FnOnce(u64, u64) -> u64,
659    {
660        let rs1_value = self.state.regs[rtype.rs1() as usize];
661        let rs2_value = self.state.regs[rtype.rs2() as usize];
662
663        self.execute_binary_op(instruction, rs1_value, rs2_value, rtype.rd(), op)
664    }
665
666    #[allow(clippy::unnecessary_wraps)]
667    fn execute_binary_op<Op>(
668        &mut self,
669        instruction: Instruction,
670        lhs: Value,
671        rhs: Value,
672        rd: Register,
673        op: Op,
674    ) -> Result<Option<Bug>, RaritySimulationError>
675    where
676        Op: FnOnce(u64, u64) -> u64,
677    {
678        let result = match (lhs, rhs) {
679            (Value::Concrete(v1), Value::Concrete(v2)) => Value::Concrete(op(v1, v2)),
680            _ => {
681                let bug = self.access_to_uninitialized_memory(instruction, lhs, rhs);
682
683                trace!("could not find input assignment => exiting this context");
684
685                self.is_running = false;
686
687                return Ok(Some(bug));
688            }
689        };
690
691        trace!(
692            "[{:#010x}] {}:  {}, {} |- {:?} <- {}",
693            self.state.pc,
694            instruction_to_str(instruction),
695            lhs,
696            rhs,
697            rd,
698            result,
699        );
700
701        self.assign_rd(rd, result);
702
703        self.state.pc += INSTRUCTION_SIZE;
704
705        Ok(None)
706    }
707
708    fn execute_brk(&mut self) -> Result<Option<Bug>, RaritySimulationError> {
709        if let Value::Concrete(new_program_break) = self.state.regs[Register::A0 as usize] {
710            let old_program_break = self.program_break;
711
712            if new_program_break < self.program_break || !self.is_in_vaddr_range(new_program_break)
713            {
714                self.state.regs[Register::A0 as usize] = Value::Concrete(self.program_break);
715            } else {
716                self.program_break = new_program_break;
717            }
718
719            trace!(
720                "brk: old={:#x} new={:#x}",
721                old_program_break,
722                new_program_break
723            );
724
725            Ok(None)
726        } else {
727            not_supported("can not handle symbolic or uninitialized program break")
728        }
729    }
730
731    fn execute_read(&mut self) -> Result<Option<Bug>, RaritySimulationError> {
732        if !matches!(self.state.regs[Register::A0 as usize], Value::Concrete(0)) {
733            return not_supported("can not handle other fd than stdin in read syscall");
734        }
735
736        let buffer = if let Value::Concrete(b) = self.state.regs[Register::A1 as usize] {
737            b
738        } else {
739            return not_supported(
740                "can not handle symbolic or uninitialized buffer address in read syscall",
741            );
742        };
743
744        let size = if let Value::Concrete(s) = self.state.regs[Register::A2 as usize] {
745            s
746        } else {
747            return not_supported("can not handle symbolic or uinitialized size in read syscall");
748        };
749
750        trace!("read: fd={} buffer={:#x} size={}", 0, buffer, size,);
751
752        let bug = self.check_for_valid_memory_range("read", buffer, size);
753        if bug.is_some() {
754            return Ok(bug);
755        }
756
757        let size_of_u64 = size_of::<u64>() as u64;
758
759        let round_up = if size % size_of_u64 == 0 {
760            0
761        } else {
762            size_of_u64 - size % size_of_u64
763        };
764
765        let mut bytes_to_read = size;
766        let words_to_read = (bytes_to_read + round_up) / size_of_u64;
767
768        let start = buffer / size_of_u64;
769
770        for word_count in 0..words_to_read {
771            let result = if bytes_to_read >= size_of_u64 {
772                bytes_to_read -= size_of_u64;
773
774                let new = rand::random();
775
776                self.concrete_inputs.push(new);
777
778                new
779            } else if let Value::Concrete(c) = self.state.memory[(start + word_count) as usize] {
780                let bits_in_a_byte = 8;
781
782                let low_shift_factor = 2_u64.pow(bytes_to_read as u32 * bits_in_a_byte);
783
784                let high_shift_factor =
785                    2_u64.pow((size_of::<u64>() as u32 - bytes_to_read as u32) * bits_in_a_byte);
786
787                let prev = c / low_shift_factor * low_shift_factor;
788                let new = rand::random::<u64>().wrapping_mul(high_shift_factor) / high_shift_factor;
789
790                self.concrete_inputs.push(new);
791
792                prev + new
793            } else {
794                // we do not partially overwrite words with concrete values
795                // if at least one byte in a word is uninitialized, the whole word is uninitialized
796
797                warn!(
798                    "read: Ignoring partial read on uninitialized word at {:#016x}",
799                    start + word_count
800                );
801
802                break;
803            };
804
805            self.state.memory[(start + word_count) as usize] = Value::Concrete(result);
806        }
807
808        self.state.regs[Register::A0 as usize] = Value::Concrete(size);
809
810        Ok(None)
811    }
812
813    fn execute_write(&mut self) -> Result<Option<Bug>, RaritySimulationError> {
814        if !matches!(self.state.regs[Register::A0 as usize], Value::Concrete(1)) {
815            return not_supported("can not handle other fd than stdout in write syscall");
816        }
817
818        let buffer = if let Value::Concrete(b) = self.state.regs[Register::A1 as usize] {
819            b
820        } else {
821            return not_supported(
822                "can not handle symbolic or uninitialized buffer address in write syscall",
823            );
824        };
825
826        let size = if let Value::Concrete(s) = self.state.regs[Register::A2 as usize] {
827            s
828        } else {
829            return not_supported("can not handle symbolic or uinitialized size in write syscall");
830        };
831
832        trace!("write: fd={} buffer={:#x} size={}", 1, buffer, size,);
833
834        let bug = self.check_for_valid_memory_range("write", buffer, size);
835        if bug.is_some() {
836            return Ok(bug);
837        }
838
839        let size_of_u64 = size_of::<u64>() as u64;
840        let start = buffer / size_of_u64;
841        let bytes_to_read = size + buffer % size_of_u64;
842        let words_to_read = (bytes_to_read + size_of_u64 - 1) / size_of_u64;
843
844        for word_count in 0..words_to_read {
845            if self.state.memory[(start + word_count) as usize] == Value::Uninitialized {
846                trace!(
847                    "write: access to uninitialized memory at {:#x} => computing reachability",
848                    (start + word_count) * size_of_u64,
849                );
850
851                return Ok(Some(Bug::AccessToUnitializedMemory {
852                    info: RarityBugInfo {
853                        inputs: self.concrete_inputs.clone(),
854                        pc: self.state.pc,
855                    },
856                    reason: UninitializedMemoryAccessReason::ReadUnintializedMemoryAddress {
857                        description: format!(
858                            "access to uninitialized memory in write syscall ({:#x})",
859                            self.state.pc
860                        ),
861                        address: self.state.pc,
862                    },
863                }));
864            }
865        }
866
867        self.state.regs[Register::A0 as usize] = Value::Concrete(size);
868
869        Ok(None)
870    }
871
872    #[allow(clippy::unnecessary_wraps)]
873    fn execute_beq(&mut self, btype: BType) -> Result<Option<Bug>, RaritySimulationError> {
874        let lhs = self.state.regs[btype.rs1() as usize];
875        let rhs = self.state.regs[btype.rs2() as usize];
876
877        let true_branch = self.state.pc.wrapping_add(btype.imm() as u64);
878        let false_branch = self.state.pc.wrapping_add(INSTRUCTION_SIZE as u64);
879
880        match (lhs, rhs) {
881            (Value::Concrete(v1), Value::Concrete(v2)) => {
882                let old_pc = self.state.pc;
883
884                self.state.pc = if v1 == v2 { true_branch } else { false_branch };
885
886                trace!(
887                    "[{:#010x}] beq: {}, {} |- pc <- {:#x}",
888                    old_pc,
889                    lhs,
890                    rhs,
891                    self.state.pc
892                );
893
894                Ok(None)
895            }
896            (v1, v2) => {
897                self.is_running = false;
898
899                let result = self.access_to_uninitialized_memory(Instruction::Beq(btype), v1, v2);
900
901                trace!("access to uninitialized memory => exiting this context");
902
903                Ok(Some(result))
904            }
905        }
906    }
907
908    fn execute_exit(&mut self) -> Result<Option<Bug>, RaritySimulationError> {
909        self.is_running = false;
910
911        match self.state.regs[Register::A0 as usize] {
912            Value::Concrete(exit_code) => {
913                if exit_code > 0 {
914                    trace!(
915                        "exit: with code {} -> find input to satisfy path condition",
916                        exit_code
917                    );
918
919                    Ok(Some(Bug::ExitCodeGreaterZero {
920                        info: RarityBugInfo {
921                            inputs: self.concrete_inputs.clone(),
922                            pc: self.state.pc,
923                        },
924                        exit_code,
925                        address: self.state.pc,
926                    }))
927                } else {
928                    trace!("exiting context with exit_code 0");
929
930                    Ok(None)
931                }
932            }
933            _ => not_supported("exit only implemented for symbolic exit codes"),
934        }
935    }
936
937    fn execute_ecall(&mut self) -> Result<Option<Bug>, RaritySimulationError> {
938        trace!("[{:#010x}] ecall", self.state.pc);
939
940        let result = match self.state.regs[Register::A7 as usize] {
941            Value::Concrete(syscall_id) if syscall_id == (SyscallId::Brk as u64) => {
942                self.execute_brk()
943            }
944            Value::Concrete(syscall_id) if syscall_id == (SyscallId::Read as u64) => {
945                self.execute_read()
946            }
947            Value::Concrete(syscall_id) if syscall_id == (SyscallId::Write as u64) => {
948                self.execute_write()
949            }
950            Value::Concrete(syscall_id) if syscall_id == (SyscallId::Exit as u64) => {
951                self.execute_exit()
952            }
953            id => Err(RaritySimulationError::NotSupported(format!(
954                "syscall with id ({}) is not supported",
955                id
956            ))),
957        };
958
959        self.state.pc += INSTRUCTION_SIZE;
960
961        result
962    }
963
964    fn execute_ld(
965        &mut self,
966        instruction: Instruction,
967        itype: IType,
968    ) -> Result<Option<Bug>, RaritySimulationError> {
969        if let Value::Concrete(base_address) = self.state.regs[itype.rs1() as usize] {
970            let immediate = itype.imm() as u64;
971
972            let address = base_address.wrapping_add(immediate);
973
974            let bug = self.check_for_valid_memory_address(instruction_to_str(instruction), address);
975
976            if bug.is_none() {
977                let value = self.state.memory[(address / 8) as usize];
978
979                trace!(
980                    "[{:#010x}] {}: {:#x}, {} |- {:?} <- mem[{:#x}]={}",
981                    self.state.pc,
982                    instruction_to_str(instruction),
983                    base_address,
984                    immediate,
985                    itype.rd(),
986                    address,
987                    value,
988                );
989
990                self.assign_rd(itype.rd(), value);
991
992                self.state.pc += INSTRUCTION_SIZE;
993            }
994
995            Ok(bug)
996        } else {
997            not_supported("can not handle symbolic addresses in LD")
998        }
999    }
1000
1001    fn execute_sd(
1002        &mut self,
1003        instruction: Instruction,
1004        stype: SType,
1005    ) -> Result<Option<Bug>, RaritySimulationError> {
1006        if let Value::Concrete(base_address) = self.state.regs[stype.rs1() as usize] {
1007            let immediate = stype.imm();
1008
1009            let address = base_address.wrapping_add(immediate as u64);
1010
1011            let bug = self.check_for_valid_memory_address(instruction_to_str(instruction), address);
1012
1013            if bug.is_none() {
1014                let value = self.state.regs[stype.rs2() as usize];
1015
1016                trace!(
1017                    "[{:#010x}] {}: {:#x}, {}, {} |- mem[{:#x}] <- {}",
1018                    self.state.pc,
1019                    instruction_to_str(instruction),
1020                    base_address,
1021                    immediate,
1022                    self.state.regs[stype.rs2() as usize],
1023                    address,
1024                    value,
1025                );
1026
1027                self.state.memory[(address / 8) as usize] = value;
1028
1029                self.state.pc += INSTRUCTION_SIZE;
1030            }
1031
1032            Ok(bug)
1033        } else {
1034            not_supported("can not handle uninitialized addresses in SD")
1035        }
1036    }
1037
1038    #[allow(clippy::unnecessary_wraps)]
1039    fn execute_jal(&mut self, jtype: JType) -> Result<Option<Bug>, RaritySimulationError> {
1040        let link = self.state.pc + INSTRUCTION_SIZE;
1041
1042        let new_pc = self.state.pc.wrapping_add(jtype.imm() as u64);
1043
1044        trace!(
1045            "[{:#010x}] jal: pc <- {:#x}, {:?} <- {:#x}",
1046            self.state.pc,
1047            new_pc,
1048            jtype.rd(),
1049            link,
1050        );
1051
1052        self.state.pc = new_pc;
1053
1054        self.assign_rd(jtype.rd(), Value::Concrete(link));
1055
1056        Ok(None)
1057    }
1058
1059    fn assign_rd(&mut self, rd: Register, v: Value) {
1060        if rd != Register::Zero {
1061            self.state.regs[rd as usize] = v;
1062        }
1063    }
1064
1065    fn execute_jalr(&mut self, itype: IType) -> Result<Option<Bug>, RaritySimulationError> {
1066        if let Value::Concrete(dest) = self.state.regs[itype.rs1() as usize] {
1067            let link = self.state.pc + INSTRUCTION_SIZE;
1068
1069            let new_pc = dest.wrapping_add(itype.imm() as u64);
1070
1071            trace!(
1072                "[{:#010x}] jalr: {:#x}, {} |- pc <- {:#x}, {:?} <- {:#x}",
1073                self.state.pc,
1074                dest,
1075                itype.imm(),
1076                new_pc,
1077                itype.rd(),
1078                link
1079            );
1080
1081            self.assign_rd(itype.rd(), Value::Concrete(link));
1082
1083            self.state.pc = new_pc;
1084
1085            Ok(None)
1086        } else {
1087            not_supported("can only handle concrete addresses in JALR")
1088        }
1089    }
1090}
1091
1092fn load_segment(memory: &mut VirtualMemory<Value>, segment: &ProgramSegment<u8>) {
1093    let start = segment.address as usize / size_of::<u64>();
1094    let end = start + segment.content.len() / size_of::<u64>();
1095
1096    segment
1097        .content
1098        .chunks(size_of::<u64>())
1099        .map(LittleEndian::read_u64)
1100        .zip(start..end)
1101        .for_each(|(x, i)| memory[i] = Value::Concrete(x));
1102}
1103
1104fn not_supported(s: &str) -> Result<Option<Bug>, RaritySimulationError> {
1105    Err(RaritySimulationError::NotSupported(s.to_owned()))
1106}
1107
1108#[derive(Default, Debug, Clone)]
1109pub struct RarityBugInfo {
1110    inputs: Vec<u64>,
1111    pc: u64,
1112}
1113
1114impl BugInfo for RarityBugInfo {
1115    type Value = u64;
1116}
1117
1118impl fmt::Display for RarityBugInfo {
1119    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1120        writeln!(f, "concrete inputs read: {:?}", self.inputs)?;
1121        writeln!(f, "pc: {:#x}", self.pc)
1122    }
1123}
1124
1125/// Calculates all state scores with a given scoring predicate
1126///
1127/// Using all states executed by the rarity simulation execution, this function constructs the
1128/// statistical counters and, based upon these, calculates the rarity score of each state.
1129///
1130/// The counters work on byte-granularity basis. For each byte contained by a state, 256 counters
1131/// are created, one for each possible state a byte can be in. All bytes of all states are iterated
1132/// and a corresponding counter is incremented depending on the byte's value.
1133/// This is done to count the occurrences of distinct values for each byte. The smaller a counter
1134/// value is, the rarer the value of this specific counter is for a specific byte.
1135///
1136/// The function then determines the counter values that are relevant for rarity calculation, for
1137/// each state, that is, for each byte it appends the value of the counter relevant to the byte and
1138/// the byte's value.
1139///
1140/// The list of relevant counter values is passed to the scoring function in order to determine the
1141/// rarity score of each state.
1142///
1143/// # Arguments
1144/// * states: A list of states.
1145/// * score: A function taking the amount of states and relevant statistical counter values and returning a score
1146fn compute_scores<F>(states: &[&State], score: F) -> Vec<f64>
1147where
1148    F: Fn(&[Counter]) -> f64,
1149{
1150    let counter_addresses: Vec<Vec<Address>> = states
1151        .iter()
1152        .map(|s| compute_counter_addresses(&s))
1153        .collect();
1154
1155    // create global counters for all states
1156    let mut overall_counts = HashMap::<Address, Counter>::new();
1157
1158    counter_addresses
1159        .iter()
1160        .flatten()
1161        .for_each(|address| count_address(&mut overall_counts, *address));
1162
1163    // create counters per state based on overall counts
1164    counter_addresses
1165        .iter()
1166        .map(|addresses| {
1167            addresses
1168                .iter()
1169                .map(|address| {
1170                    *overall_counts
1171                        .get(address)
1172                        .expect("count should be available")
1173                })
1174                .collect_vec()
1175        })
1176        .map(|addresses| score(&addresses[..]))
1177        .collect()
1178}
1179
1180fn count_address(scores: &mut HashMap<Address, Counter>, addr: Address) {
1181    if let Some(entry) = scores.get_mut(&addr) {
1182        *entry += 1;
1183    } else {
1184        scores.insert(addr, 1);
1185    }
1186}
1187
1188/// Based on a state, generates an iterator that contains
1189/// matching counter `addresses` for each byte.
1190///
1191/// The counter address is a combination of the byte's address and the value of that
1192/// address in the state.
1193///
1194/// Each byte assumes one of 256 (2^8) values.
1195/// Thus, each distinct byte i of the state has has 256 different addresses: (i*256)..((i*256) + 255)
1196/// That is, each byte is `expanded` to 256 addresses.
1197///
1198/// The first 8 bytes are represented by the program counter, in the CPU's native byte ordering
1199/// Next, 32 64-bit registers are represented.
1200/// Then, all touched memory regions follow.
1201///
1202/// For each byte i, only one of its 256 different addresses may occur (because a byte can only
1203/// assume one state at a time)
1204fn compute_counter_addresses(state: &State) -> Vec<Address> {
1205    fn offset_for_word(idx: u64) -> u64 {
1206        idx * BYTES_PER_WORD * NUMBER_OF_BYTE_VALUES
1207    }
1208
1209    let mut addresses = Vec::new();
1210
1211    compute_counter_addresses_for_word(0, state.pc, &mut addresses);
1212
1213    compute_counter_addresses_for_iter(offset_for_word(1), state.regs.iter(), &mut addresses);
1214
1215    compute_counter_addresses_for_iter(offset_for_word(33), state.memory.iter(), &mut addresses);
1216
1217    addresses
1218}
1219
1220/// Appends relevant statistic counter addresses from an iterator
1221///
1222/// Iterates over a collection of [`Value`]s and appends them to the relevant address list if they
1223/// contain a concrete value.
1224///
1225/// # Arguments
1226/// * offset: The statistic counter address offset
1227/// * iter: The iterator
1228/// * addresses: The list where relevant addresses shall be appended to
1229///
1230/// # See
1231/// * [`compute_counter_addresses_for_word`]
1232fn compute_counter_addresses_for_iter<'a, Iter>(
1233    offset: u64,
1234    iter: Iter,
1235    addresses: &mut Vec<Address>,
1236) where
1237    Iter: Iterator<Item = &'a Value>,
1238{
1239    iter.enumerate()
1240        .filter_map(|(idx, v)| match v {
1241            Value::Concrete(w) => Some((idx, w)),
1242            _ => None,
1243        })
1244        .for_each(|(idx, word)| {
1245            compute_counter_addresses_for_word(
1246                offset + idx as u64 * NUMBER_OF_BYTE_VALUES,
1247                *word,
1248                addresses,
1249            );
1250        });
1251}
1252
1253/// Appends to a counter address list
1254///
1255/// Splits a 64-bit word into bytes (using the host machine's endianess) and appennds the relevant
1256/// counter addresses, depending on their respective values.
1257///
1258/// # Arguments
1259/// * offset:
1260/// * word: The word that s
1261/// * addresses: The list where relevant addresses shall be appended to
1262fn compute_counter_addresses_for_word(offset: u64, word: u64, addresses: &mut Vec<u64>) {
1263    u64::to_ne_bytes(word)
1264        .iter()
1265        .cloned()
1266        .enumerate()
1267        .for_each(|(byte_idx, byte_value)| {
1268            let byte_address = BYTES_PER_WORD * byte_idx as u64;
1269            let address = offset + byte_address * NUMBER_OF_BYTE_VALUES + byte_value as u64;
1270            addresses.push(address);
1271        });
1272}
1273
1274/// Generates a random value limited by an upper bound
1275///
1276/// Returns a random value between 0 inclusive and `len` exclusively by using the modulo operator
1277fn random_index(len: usize) -> usize {
1278    rand::random::<usize>() % len
1279}