proptest-stateful 0.1.3

Library for building stateful property tests using the proptest crate
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
//! This framework builds on the [`mod@proptest`] framework to make it easier to build stateful
//! property-based tests. By "stateful", we mean that the intent is to be able to test stateful
//! systems, which we do by modeling the state of the system under test and using that model to
//! assist in generating test cases and defining the expected results.
//!
//! The code in this framework generates test cases as sequences of operations, and shrinks test
//! cases by removing operations from the sequence. Individual operation themselves are not
//! currently shrunk. Adding the ability to shrink individual operations within a sequence is
//! non-trivial, since it tends to break preconditions in a way that is difficult to compensate
//! for. There may be ways around this, but in my personal experience, shrinking via just removing
//! operations has been sufficient to yield simple and useful minimal failing cases, so shrinking
//! individual ops is not a priority right now.
//!
//! There is currently no way to symbolically model the outputs of operations when generating test
//! cases, so the inputs to each operation must be able to have concrete values generated at test
//! case generation time. At some point, we may try to add the feature support necessary to remove
//! this limitation, but for now the current framework code has been sufficient for our needs.
use std::cell::Cell;
use std::fmt::Debug;
use std::marker::PhantomData;
use std::rc::Rc;
use std::time::Duration;

use async_trait::async_trait;
use proptest::prelude::*;
use proptest::strategy::{NewTree, ValueTree};
use proptest::test_runner::TestRunner;
use rand::distributions::{Distribution, Uniform};

/// Used by the caller of [`test()`] for providing config options.
pub struct ProptestStatefulConfig {
    /// Minimum number of operations to generate for any given test case.
    pub min_ops: usize,
    /// Maximum number of operations to generate for any given test case.
    pub max_ops: usize,
    /// Amount of time to wait before timing out and failing a test case.
    pub test_case_timeout: Duration,
    /// Can be used to override default proptest configuration parameters.
    ///
    /// Note, however, that we ignore the provided value of [`ProptestConfig::max_shrink_iters`],
    /// and always set it to `max_ops * 2`, because the default value is not high enough to make
    /// sure we won't fail out of shrinking before we've had a chance to try removing and adding
    /// back in each individual op.
    pub proptest_config: ProptestConfig,
}

/// A trait for defining stateful property tests.
///
/// The [`ModelState`] trait includes callbacks that enable generating, shrinking, and running test
/// cases. Note that the same value is intended to be used both for planning test cases and
/// executing them; the [`ModelState::next_state`] callback is used during all three of test case
/// generation, execution, and shrinking, since all three of those processes require modeling the
/// state of the system under test in order to determine whether a given sequence of operations is
/// valid (in generation and shrinking) and to check postconditions (during test execution).
///
/// Note that the trait as it is currently implemented uses async callbacks. We may want to make a
/// non-async version of this at some point, but all the tests we've written so far are async, so
/// I'm leaving this as a TODO item.
#[async_trait(?Send)]
pub trait ModelState: Clone + Debug + Default {
    /// A type used to specify a single operation in the sequence of operations that constitute a
    /// test case.
    type Operation: Clone + Debug;
    /// A type used to hold on to any state that is only needed at runtime for executing cases (as
    /// opposed to the state held directly in an implementor of [`ModelState`], which is used for
    /// generation and shrinking).
    type RunContext;
    /// The type for proptest strategies used to generate [`Self::Operation`] values.
    ///
    /// Note that this type should no longer need to be explicitly named in this way once
    /// return_position_impl_trait_in_trait is stable.
    type OperationStrategy: Strategy<Value = Self::Operation>;

    // TODO once return_position_impl_trait_in_trait is stabilized, uncomment this and get rid of
    // the OperationStrategy type:
    // fn op_generators(&self) -> Vec<impl Strategy<Value = Self::Operation>>;
    /// Returns a [`Vec`] of [`Strategy`]s for generating [`Self::Operation`] values.
    ///
    /// The proptest-stateful code will pseudo-randomly pick a strategy based off of the random
    /// seed from the proptest [`TestRunner`], and generate a single [`Self::Operation`] value from
    /// the selected [`Strategy`].
    fn op_generators(&self) -> Vec<Self::OperationStrategy>;
    /// Determines whether preconditions have been met for an operation, based on the current model
    /// state.
    fn preconditions_met(&self, op: &Self::Operation) -> bool;

    /// Updates the internal model state based on the modeled result of executing of `op`.
    fn next_state(&mut self, op: &Self::Operation);

    /// Called once at the beginning of each test case's execution.
    ///
    /// After performing any necessary initialization work, the implementation returns a value of
    /// type [`Self::RunContext`] which is subsequently passed back into calls to
    /// [`run_op`](Self::run_op), [`check_postconditions`](Self::check_postconditions), and
    /// [`clean_up_test_run`](Self::clean_up_test_run).
    async fn init_test_run(&self) -> Self::RunContext;
    /// Called to execute `op` during a test run.
    async fn run_op(&self, op: &Self::Operation, ctxt: &mut Self::RunContext);
    /// Called after each execution of [`run_op`](Self::run_op) to check any postconditions that
    /// are expected to be upheld during the test run.
    ///
    /// Since the return type is `()`, postcondition failures must trigger a panic.
    async fn check_postconditions(&self, ctxt: &mut Self::RunContext);
    /// Called at the end of each test run to allow any necessary cleanup work to be done.
    async fn clean_up_test_run(&self, ctxt: &mut Self::RunContext);
}

/// This is used to store and shrink a list of operation values by implementing the [`ValueTree`]
/// trait.
///
/// Note that shrinking is entirely based around removing elements from the sequence of operations;
/// the individual operations themselves are not shrunk, as doing so would likely cause
/// preconditions to be violated later in the sequence of operations (unless we re-generated the
/// rest of the sequence, but this would likely result in a test case that no longer fails).
///
/// In practice, this is not a terrible limitation, as having a minimal sequence of operations
/// tends to make it relatively easy to understand the cause of a failing case, even if the inputs
/// to those operations are not necessarily minimal themselves.
struct TestTree<T, O>
where
    O: Clone + Debug,
    T: ModelState<Operation = O>,
{
    /// List of all operations initially included in this test case
    ops: Vec<O>,
    /// List of which operations are still included vs. which have been shrunk out
    currently_included: Vec<bool>,
    /// The last operation we removed via shrinking (which may or may not end up needing to be
    /// added back in, depending on whether the test continues to fail with that operation removed)
    last_shrank_idx: usize,
    /// The index of the next step we're planning to run. If the test fails, the last successful
    /// step will thus be `next_step_idx - 1`.
    next_step_idx: Rc<Cell<usize>>,
    /// Whether we've already used next_step_idx to trim out steps after the failure.
    have_trimmed_after_failure: bool,
    /// Used to suppress "unused type parameter" warnings.
    ///
    /// Even though we don't actually have any fields in this struct that implement [`ModelState`],
    /// we need the type parameter so that we can create the correct type of temporary state value
    /// when we perform shrinking in the [`ValueTree`] impl.
    state_type: PhantomData<T>,
}

impl<T, O> ValueTree for TestTree<T, O>
where
    O: Clone + Debug,
    T: ModelState<Operation = O>,
{
    type Value = Vec<O>;

    /// Returns a [`Vec`] of all values for this test case that have not been shrunk out.
    fn current(&self) -> Self::Value {
        self.ops
            .iter()
            .zip(self.currently_included.iter())
            .filter_map(|(v, include)| include.then_some(v))
            .cloned()
            .collect()
    }

    /// Attempts to remove one operation for shrinking purposes, starting at the end of the list
    /// and working backward for each invocation of this method. Operations are only removed if it
    /// is possible to do so without violating a precondition.
    ///
    /// There are some limitations in the current approach that I'd like to address at some point:
    ///  * It is inefficient to only remove one element at a time.
    ///    * It is likely that we can randomly try removing multiple elements at once, since most
    ///      minimal failing cases are much smaller than the original failing cases that produced
    ///      them. The math around this is a bit tricky though, as the ideal number of elements to
    ///      try removing at any given time depends on how big we expect a typical minimal failing
    ///      test case to be.
    ///  * Removing only one element at a time may prevent us from fully shrinking down to the
    ///    minimal failing case
    ///    * For example, if we create a table A, then drop A, then create A again with different
    ///      columns, then the third step may be the only one necessary to reproduce the failure,
    ///      but we will not be able to remove the first two steps. Removing the first step alone
    ///      would violate a precondition for the second step (since the second stop is to drop A),
    ///      and removing the second step alone would violate a precondition for the third step
    ///      (since we cannot create A again unless we've dropped the previous table named A). In
    ///      order to shrink this down completely, we must remove both of the first two steps in one
    ///      step, which the current shrinking algorithm cannot do.
    ///
    /// However, removing only one element at a time is much simpler from an implementation
    /// standpoint, so that is all that I've implemented for now. Designing and implementing a
    /// better shrinking algorithm could definitely be useful in other tests in the future, though.
    fn simplify(&mut self) -> bool {
        if !self.have_trimmed_after_failure {
            self.have_trimmed_after_failure = true; // Only run this block once per test case

            let next_step_idx = self.next_step_idx.get();

            // next_step_idx is the failing step, and we want to exclude steps *beyond* the failing
            // step, hence adding 1 to the start of the range:
            let steps_after_failure = next_step_idx + 1..self.ops.len();
            for i in steps_after_failure {
                self.currently_included[i] = false;
            }
            // Setting last_shrank_idx to next_step_idx causes us to skip trying to shrink the step
            // that failed, which should be good since the step that triggers the failure is
            // presumably needed to reproduce the failure:
            self.last_shrank_idx = next_step_idx;
        }

        self.try_removing_op()
    }

    /// Undoes the last call to [`simplify`], and attempts to remove another element instead.
    fn complicate(&mut self) -> bool {
        self.currently_included[self.last_shrank_idx] = true;
        self.try_removing_op()
    }
}

impl<T, O> TestTree<T, O>
where
    O: Clone + Debug,
    T: ModelState<Operation = O>,
{
    /// Used by both [`simplify`] and [`complicate`] to attempt the removal of another operation
    /// during shrinking. If we are able to find another operation we can remove without violating
    /// any preconditions, this returns `true`; otherwise, this function returns `false`.
    fn try_removing_op(&mut self) -> bool {
        // More efficient to iterate backward, since we only need to go over the list once
        // (preconditions are based only on the state changes caused by previous ops, so if we're
        // moving backward through the list then the only time we'll be unable to remove an op
        // based on a precondition is if we've previously had to add back in a later op that
        // depends on it due to failing to remove the later op via shrinking; in that case we'll
        // never be able to remove the current op either, so there's no need to check it more than
        // once).
        while self.last_shrank_idx > 0 {
            self.last_shrank_idx -= 1;

            // try removing next idx to try, and see if preconditions still hold
            self.currently_included[self.last_shrank_idx] = false;
            let candidate_ops = self.current();
            // if they still hold, leave it as removed, and we're done
            if preconditions_hold::<T, O>(&candidate_ops) {
                return true;
            }
            // if they don't still hold, add it back and try another run through the loop
            self.currently_included[self.last_shrank_idx] = true;
        }
        // We got all the way through and didn't remove anything
        false
    }
}

/// Tests whether a given sequence of operation elements maintains all of the required preconditions
/// as each step is executed according to [`TestModel`].
fn preconditions_hold<T, O>(ops: &[O]) -> bool
where
    T: ModelState<Operation = O>,
{
    let mut candidate_state = T::default();
    for op in ops {
        if !candidate_state.preconditions_met(op) {
            return false;
        } else {
            candidate_state.next_state(op);
        }
    }
    true
}

/// Holds the internal state of a stateful property test run, including both the supplied
/// [`ModelState`] value, and other fields that are used internally to the proptest-stateful
/// framework.
#[derive(Clone, Debug)]
struct TestState<T, O>
where
    T: ModelState<Operation = O>,
{
    /// Supplied by the caller
    model_state: T,
    /// Used during shrinking to optimize removal of steps after the failing step
    next_step_idx: Rc<Cell<usize>>,
    min_ops: usize,
    max_ops: usize,
}

impl<T, O> Strategy for TestState<T, O>
where
    T: ModelState<Operation = O>,
    O: Clone + Debug,
{
    type Value = Vec<O>;
    type Tree = TestTree<T, O>;

    /// Generates a new test case, consisting of a sequence of Operation values that conform to
    /// the preconditions defined for a instance of [`ModelState`].
    fn new_tree(&self, runner: &mut TestRunner) -> NewTree<Self> {
        let mut symbolic_state = self.model_state.clone();

        let size = Uniform::new_inclusive(self.min_ops, self.max_ops).sample(runner.rng());
        let ops = (0..size)
            .map(|_| {
                let mut possible_ops = symbolic_state.op_generators();
                // Once generated, we do not shrink individual Operations; all shrinking is done via
                // removing individual operations from the test sequence. Thus, rather than
                // returning a Strategy to generate any of the possible ops from
                // `possible_ops`, it's simpler to just randomly pick one and return
                // it directly. It's important to use the RNG from `runner` though
                // to ensure test runs are deterministic based on the seed from proptest.
                let op_gen =
                    possible_ops.swap_remove(runner.rng().gen_range(0..possible_ops.len()));
                let next_op = op_gen.new_tree(runner).unwrap().current();
                symbolic_state.next_state(&next_op);
                next_op
            })
            .collect();

        // initially we include everything, then exclude different steps during shrinking:
        let currently_included = vec![true; size];
        Ok(TestTree {
            ops,
            currently_included,
            last_shrank_idx: size, // 1 past the end of the ops vec
            next_step_idx: self.next_step_idx.clone(),
            have_trimmed_after_failure: false,
            state_type: PhantomData,
        })
    }
}

/// Runs a single test case, as specified by `steps`.
async fn run<T, O>(steps: Vec<O>, next_step_idx: Rc<Cell<usize>>)
where
    T: ModelState<Operation = O>,
    O: Debug,
{
    let mut runtime_state = T::default();
    let mut ctxt = runtime_state.init_test_run().await;

    for (idx, op) in steps.iter().enumerate() {
        println!("Running op {idx}: {op:?}");
        runtime_state.run_op(op, &mut ctxt).await;

        // Note that if we were verifying results of any operations directly, it would be better to
        // wait until checking postconditions before we compute the next state; this would
        // complicate the postcondition code a bit, but would allow us to check that the op result
        // makes sense given the state *prior* to the operation being run. However, right now we
        // just check that the database contents match, so it's simpler to update the expected
        // state right away and then check the table contents based on that.
        runtime_state.next_state(op);

        runtime_state.check_postconditions(&mut ctxt).await;

        next_step_idx.set(next_step_idx.get() + 1);
    }

    runtime_state.clean_up_test_run(&mut ctxt).await;
}

/// Execute a property test for the specified test model, using the config values passed in.
pub fn test<T>(mut stateful_config: ProptestStatefulConfig)
where
    T: ModelState,
{
    // Always make sure we have max_shrink_iters high enough to fully shrink a max size test case:
    stateful_config.proptest_config.max_shrink_iters = stateful_config.max_ops as u32 * 2;

    // Used for optimizing shrinking by letting us immediately discard any steps after the failing
    // step. To enable this, we need to be able to mutate the value from the test itself, and then
    // access it from within the [`TestModel`], which is not easily doable within the constraints
    // of the proptest API, since the test passed to [`TestRunner`] is [`Fn`] (not [`FnMut`]).
    // Hence the need for interior mutability.
    let next_step_idx = Rc::new(Cell::new(0));

    let test_state = TestState {
        model_state: T::default(),
        next_step_idx: next_step_idx.clone(),
        min_ops: stateful_config.min_ops,
        max_ops: stateful_config.max_ops,
    };

    // We have to manually construct a [`TestRunner`] with the given config because using the
    // closure-style version of the [`proptest!`] macro causes the `source_file` value to be
    // unconditionally replaced with the result of `file!()`; this prevents other tests from
    // configuring the failure persistence features to output regressions into the same crate
    // directory structure as the actual test code.
    let mut runner = TestRunner::new(stateful_config.proptest_config);
    let result = runner.run(&test_state, |steps| {
        prop_assume!(preconditions_hold::<T, _>(&steps));
        next_step_idx.set(0);
        let rt = tokio::runtime::Builder::new_multi_thread()
            .enable_all()
            .build()
            .unwrap();
        rt.block_on(async {
            tokio::time::timeout(
                stateful_config.test_case_timeout,
                run::<T, _>(steps, next_step_idx.clone()),
            )
            .await
        })
        .unwrap();
        Ok(())
    });

    match result {
        Ok(_) => (),
        Err(e) => panic!("{}\n{}", e, runner),
    }
}