1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
//! The `Config` structure for configuring `haybale`, and other associated definitions

use crate::backend::Backend;
use crate::callbacks::Callbacks;
pub use crate::demangling::Demangling;
use crate::function_hooks::FunctionHooks;
use crate::watchpoints::Watchpoint;
use std::collections::HashMap;
use std::time::Duration;

/// Various settings which affect how the symbolic execution is performed.
///
/// `Config` uses the (new-to-Rust-1.40) `#[non_exhaustive]` attribute to
/// indicate that fields may be added even in a point release (that is, without
/// incrementing the major or minor version). See
/// [here](https://blog.rust-lang.org/2019/12/19/Rust-1.40.0.html#[non_exhaustive]-structs,-enums,-and-variants)
/// for more details.
///
/// In general, you'll want to start with `Config::default()` and then change the
/// settings you want to change; `#[non_exhaustive]` will prevent users from
/// constructing a `Config` directly.
#[non_exhaustive]
#[derive(Clone)]
pub struct Config<'p, B: Backend> {
    /// Maximum number of times to execute any given line of LLVM IR.
    /// This bounds both the number of iterations of loops, and also the depth of recursion.
    /// For inner loops, this bounds the number of total iterations across all invocations of the loop.
    ///
    /// Default is `10`.
    pub loop_bound: usize,

    /// Maximum callstack depth to allow when symbolically executing.
    /// If symbolic execution encounters a call which would result in a
    /// stack depth exceeding this number, and the call is not hooked (see
    /// [`function_hooks`](struct.Config.html#structfield.function_hooks)), then
    /// the call will simply be ignored - as if
    /// [`generic_stub_hook`](../function_hooks/fn.generic_stub_hook.html) were
    /// applied to that call.
    ///
    /// For example, if this setting is set to `Some(1)`, and we're executing a
    /// function `foo()` which calls `bar()` which calls `baz()`, then the call
    /// to `bar()` will be fully analyzed, but fully calling `baz()` would result
    /// in a stack depth of `2`, so instead, `bar()`'s call to `baz()` will be
    /// ignored.
    ///
    /// As another example, the setting `Some(0)` means that all calls will be
    /// ignored, unless they are hooked in `function_hooks`.
    ///
    /// Note that this considers the LLVM callstack depth.
    /// If calls have been inlined in the LLVM bitcode, `haybale` sees this as
    /// a single function, and "entering" an inlined function doesn't affect
    /// the callstack depth.
    ///
    /// A value of `None` for this setting indicates no limit to the callstack depth;
    /// all calls will be fully analyzed, to the extent possible and unless
    /// overridden by `function_hooks`.
    ///
    /// Default is `None`.
    pub max_callstack_depth: Option<usize>,

    /// Maximum amount of time to allow for any single solver query.
    ///
    /// If `Some`, any solver query lasting longer than the given limit will
    /// be killed.  This will result in an `Error::SolverError` for that path.
    ///
    /// If `None`, there will be no time limit for solver queries.
    ///
    /// Default is 300 seconds (5 minutes).
    pub solver_query_timeout: Option<Duration>,

    /// Should we check each memory access for possible `NULL` dereference,
    /// and if so, how should we report any errors?
    ///
    /// Default is `NullPointerChecking::Simple`.
    pub null_pointer_checking: NullPointerChecking,

    /// When encountering a `memcpy`, `memset`, or `memmove` with multiple
    /// possible lengths, how (if at all) should we concretize the length?
    ///
    /// Default is `Concretize::Symbolic` - that is, no concretization.
    pub concretize_memcpy_lengths: Concretize,

    /// Maximum supported length of a `memcpy`, `memset`, or `memmove` operation.
    ///
    /// Setting this to `Some(x)` means that if we encounter a `memcpy`,
    /// `memset`, or `memmove` with length which may be greater than `x` bytes,
    /// we will constrain the length to be at most `x` bytes. (`haybale` will
    /// also emit a warning when doing this.) If the only possible values for the
    /// length are greater than `x` bytes, we will raise an error.
    ///
    /// Setting this to `None` means that there is no limit to the size of these
    /// operations.
    ///
    /// Default is `None` - that is, no limit.
    pub max_memcpy_length: Option<u64>,

    /// `Error::Unsat` is an error type which is used internally, but may not be
    /// useful for `ExecutionManager.next()` to return to consumers. In most
    /// cases, consumers probably don't care about paths which were partially
    /// explored and resulted in an unsat error; they are probably interested in
    /// only those paths that are actually feasible, or ended in one of the other
    /// error types.
    ///
    /// If this setting is `false`, the `ExecutionManager` will return an
    /// `Error::Unsat` to the consumer whenever one is encountered, just as it
    /// does for other error types.
    /// If this setting is `true`, paths ending in `Error::Unsat` will be
    /// silently ignored by the `ExecutionManager`, and it will move on to the
    /// next path, as if a filter were applied to the iterator.
    ///
    /// Note that many unsat paths are never even started processing, so they
    /// never actually result in an unsat error. In fact, many executions may
    /// never encounter an unsat error, despite having unsat paths. Furthermore,
    /// `haybale`'s behavior regarding which unsat paths actually result in an
    /// unsat error is not guaranteed to be stable, and may change even in point
    /// releases (that is, without incrementing the major or minor version).
    ///
    /// Default is `true`.
    pub squash_unsats: bool,

    /// When encountering the `llvm.assume()` intrinsic, should we only consider
    /// paths where the assumption holds (`true`), or should we also consider
    /// paths where the assumption does not hold, if that is possible (`false`)?
    ///
    /// Note that you may also provide a custom hook for `llvm.assume()` in
    /// [`function_hooks`](struct.Config.html#structfield.function_hooks).
    /// If you do, that overrides this setting.
    ///
    /// Default is `true`.
    pub trust_llvm_assumes: bool,

    /// The set of currently active function hooks; see
    /// [`FunctionHooks`](../function_hooks/struct.FunctionHooks.html) for more details.
    ///
    /// Default is
    /// [`FunctionHooks::default()`](../function_hooks/struct.FunctionHooks.html#method.default);
    /// see docs there for more details.
    pub function_hooks: FunctionHooks<'p, B>,

    /// The set of currently active callbacks; see
    /// [`Callbacks`](../callbacks/struct.Callbacks.html) for more details.
    ///
    /// Default is no callbacks.
    pub callbacks: Callbacks<'p, B>,

    /// The initial memory watchpoints when a `State` is created (mapping from
    /// watchpoint name to the actual watchpoint).
    ///
    /// More watchpoints may be added or removed at any time with
    /// `state.add_mem_watchpoint()` and `state.rm_mem_watchpoint()`.
    ///
    /// Default is no watchpoints.
    pub initial_mem_watchpoints: HashMap<String, Watchpoint>,

    /// Controls the (attempted) demangling of function names in error messages
    /// and backtraces.
    ///
    /// If `None`, `haybale` will attempt to autodetect which mangling is
    /// appropriate, based on the LLVM metadata.
    ///
    /// `Some` can be used to force `haybale` to attempt to demangle with a
    /// particular demangler.
    ///
    /// Any symbol that isn't valid for the chosen demangler will simply be left
    /// unchanged, regardless of this setting.
    ///
    /// Default is `None`.
    pub demangling: Option<Demangling>,

    /// If `true`, then `haybale` will attempt to print source location info
    /// (e.g., filename, line number, column number) along with the LLVM location
    /// info in error messages and backtraces. (This applies to error messages
    /// returned by
    /// [`State.full_error_message_with_context()`](../struct.State.html#method.full_error_message_with_context).)
    ///
    /// For this to work, the LLVM bitcode must contain debuginfo. For example,
    /// C/C++ or Rust sources must be compiled with the `-g` flag to `clang`,
    /// `clang++`, or `rustc`.
    ///
    /// In addition, some LLVM instructions simply don't correspond to a
    /// particular source location; e.g., they may be just setting up the stack
    /// frame for a function.
    ///
    /// For path dumps in the case of error, the value of the environment
    /// variable `HAYBALE_DUMP_PATH` has precedence over this setting.
    /// `HAYBALE_DUMP_PATH` may be set to:
    ///     `LLVM` for a list of the LLVM basic blocks in the path;
    ///     `SRC` for a list of the source-language locations in the path;
    ///     `BOTH` for both of the above.
    ///
    /// Default is `true`.
    pub print_source_info: bool,

    /// If `true`, then `haybale` will include the module name along with the
    /// LLVM location info in error messages, backtraces, log messages, and
    /// when dumping paths. If `false`, the module name will be omitted.
    /// You may want to use `false` for `Project`s with only a single bitcode
    /// file, or if the LLVM module is clear from the function name.
    ///
    /// Default is `true`.
    pub print_module_name: bool,
}

/// Enum used for the `null_pointer_checking` option in `Config`.
#[derive(PartialEq, Eq, Clone, Debug)]
pub enum NullPointerChecking {
    /// All memory accesses will be checked to ensure their addresses cannot be
    /// `NULL`. If `NULL` is a possible solution for the address of a memory
    /// access, we will return `Error::NullPointerDereference` and not continue
    /// along the path.
    Simple,

    /// All memory accesses will be checked to ensure their addresses cannot be
    /// `NULL`. If `NULL` is a possible solution for the address of a memory
    /// access, but not the only possible solution, we will split into two paths:
    /// one in which the address is constrained to be `NULL`, and which returns
    /// `Error::NullPointerDereference`; and another in which the address is
    /// constrained to be non-`NULL`, and which will continue execution.
    SplitPath,

    /// Memory accesses will not be checked for `NULL` addresses. This may result
    /// in fewer solver queries and thus improved performance for some workloads.
    None,
}

/// Enum used for the `concretize_memcpy_lengths` option in `Config`.
#[derive(PartialEq, Eq, Clone, Debug)]
pub enum Concretize {
    /// Handle everything fully symbolically - that is, have the solver fully
    /// consider all possible values. This may lead to poor solver performance
    /// for some workloads.
    Symbolic,

    /// Pick one possible value arbitrarily. Often this may choose `0` if `0` is
    /// a possible solution, but this behavior is not guaranteed. (To guarantee
    /// this behavior, use `Prefer(0)`.)
    ///
    /// The value will be permanently constrained to be the chosen value (on this
    /// path), and other possibilities will not be considered.
    Arbitrary,

    /// Prefer the given `u64` value if it is a possible value. Otherwise, fall
    /// back on the given `Concretize` strategy.
    ///
    /// If the given `u64` value is a possible value, then the value will be
    /// permanently constrained to be that value (on this path), and other
    /// possibilities will not be considered.
    Prefer(u64, Box<Concretize>),

    /// Choose the maximum possible value. `Maximum` will be interpreted in an
    /// unsigned fashion.
    ///
    /// The value will be permanently constrained to be this value (on this
    /// path), and other possibilities will not be considered.
    Maximum,

    /// Choose the minimum possible value. `Minimum` will be interpreted in an
    /// unsigned fashion.
    ///
    /// The value will be permanently constrained to be this value (on this
    /// path), and other possibilities will not be considered.
    Minimum,
}

impl<'p, B: Backend> Config<'p, B> {
    /// Creates a new `Config` with defaults for all the options, except with
    /// no function hooks.
    ///
    /// You may want to consider
    /// [`Config::default()`](struct.Config.html#method.default), which comes
    /// with predefined hooks for common functions.
    pub fn new() -> Self {
        let mut config = Self::default();
        config.function_hooks = FunctionHooks::new();
        config
    }
}

impl<'p, B: Backend> Default for Config<'p, B> {
    /// Default values for all configuration parameters.
    ///
    /// In particular, this uses
    /// [`FunctionHooks::default()`](../function_hooks/struct.FunctionHooks.html#method.default),
    /// and therefore comes with a set of predefined hooks for common functions.
    ///
    /// For more information, see
    /// [`FunctionHooks::default()`](../function_hooks/struct.FunctionHooks.html#method.default).
    fn default() -> Self {
        Self {
            loop_bound: 10,
            max_callstack_depth: None,
            solver_query_timeout: Some(Duration::from_secs(300)),
            null_pointer_checking: NullPointerChecking::Simple,
            concretize_memcpy_lengths: Concretize::Symbolic,
            max_memcpy_length: None,
            squash_unsats: true,
            trust_llvm_assumes: true,
            function_hooks: FunctionHooks::default(),
            callbacks: Callbacks::default(),
            initial_mem_watchpoints: HashMap::new(),
            demangling: None,
            print_source_info: true,
            print_module_name: true,
        }
    }
}