1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
//! This module contains the underlying data generation and shrinking
//! mechanism. The main type is the `InfoPool`, which represents a pool of
//! random bytes that can be observed via the `InfoTap` object (obtained via
//! `InfoPool#tap`).
//!
//! Also manages the shrinking process (see [`minimize`](fn.minimize.html)).

use std::fmt;
use hex_slice::AsHex;
use rand::{random, Rng, XorShiftRng};
use std::cmp::min;

/// A pool of data that we can draw upon to generate other types of data.
#[derive(Clone, PartialEq)]
pub struct InfoPool {
    data: Vec<u8>,
}

/// A handle to an info Pool that we can draw newly generated bytes from.
pub struct InfoTap<'a> {
    data: &'a mut Vec<u8>,
    rng: XorShiftRng,
}

/// A handle to an info Pool that we can draw replayed bytes from, and zero after.
#[derive(Clone)]
pub struct InfoReplay<'a> {
    data: &'a [u8],
    off: usize,
}


impl fmt::Debug for InfoPool {
    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
        fmt.debug_struct("InfoPool")
            .field("data", &format_args!("{:x}", self.data.as_hex()))
            .finish()
    }
}

/// The reasons why drawing data from a pool can fail.
#[derive(Debug, Clone, Eq, PartialEq)]
pub enum DataError {
    /// Not enough data to generate a value
    PoolExhausted,
    /// One of our combinators said that we should not test this value.
    SkipItem,
}


impl InfoPool {
    /// Create an `InfoPool` with a given vector of bytes. (Mostly used for
    /// testing).
    pub fn of_vec(data: Vec<u8>) -> Self {
        InfoPool { data: data }
    }

    /// Create an `InfoPool` with a `size` length vector of random bytes
    /// using the generator `rng`. (Mostly used for testing).
    pub fn new() -> Self {
        Self { data: Vec::new() }
    }


    /// Allows access to the underlying buffer.
    pub fn buffer(&self) -> &[u8] {
        &*self.data
    }

    fn tap_with(&mut self, rng: XorShiftRng) -> InfoTap {
        InfoTap {
            data: &mut self.data,
            rng: rng,
        }
    }
    /// Creates a tap that allows drawing new information from this pool.
    pub fn tap(&mut self) -> InfoTap {
        self.tap_with(random::<XorShiftRng>())
    }

    /// Creates a tap that allows drawing new information from this pool.
    pub fn replay(&self) -> InfoReplay {
        InfoReplay {
            data: &*self.data,
            off: 0,
        }
    }
}

impl<'a> InfoTap<'a> {
    /// Consumes the next byte from this tap. Returns `Ok(x)` if successful,
    /// or `Err(DataError::PoolExhausted)` if we have reached the end.
    pub fn next_byte(&mut self) -> u8 {
        let next = self.rng.gen::<u8>();
        self.data.push(next);
        next
    }
}


impl<'a> Iterator for InfoTap<'a> {
    type Item = u8;
    fn next(&mut self) -> Option<u8> {
        Some(self.next_byte())
    }
}

impl<'a> InfoReplay<'a> {
    /// Consumes the next byte from this tap. Returns `Ok(x)` if successful,
    /// or `Err(DataError::PoolExhausted)` if we have reached the end.
    pub fn next_byte(&mut self) -> u8 {
        let res = self.data.get(self.off).cloned();
        self.off += 1;
        res.unwrap_or(0)
    }
}


impl<'a> Iterator for InfoReplay<'a> {
    type Item = u8;
    fn next(&mut self) -> Option<u8> {
        Some(self.next_byte())
    }
}

fn minimize_via_removal<F: Fn(InfoReplay) -> bool>(
    p: &InfoPool,
    candidate: &mut InfoPool,
    pred: &F,
) -> Option<InfoPool> {
    // First shrink tactic: item removal
    trace!("minimizing by removal: {:?}", p);
    if p.data.len() == 0 {
        return None;
    }

    let max_pow = 0usize.count_zeros();
    let pow = max_pow - p.data.len().leading_zeros();
    for granularity in 0..pow {
        let width = 1 << (pow - granularity);
        for chunk in 0..(1 << granularity) {
            let start = min(chunk * width, p.data.len());
            let end = min(start + width, p.data.len());
            if start == end {
                break;
            }

            candidate.data.clear();
            candidate.data.extend(&p.data[0..start]);
            candidate.data.extend(&p.data[end..]);

            let test = pred(candidate.replay());
            trace!(
                "removed {},{}: {:?}; test result {}",
                start,
                end,
                candidate,
                test
            );
            if test {
                if let Some(res) = minimize(&candidate, pred) {
                    trace!("Returning shrunk: {:?}", res);
                    return Some(res);
                } else {
                    trace!("Returning original: {:?}", candidate);
                    return Some(candidate.clone());
                }
            }
        }
    }
    None
}

fn minimize_via_scalar_shrink<F: Fn(InfoReplay) -> bool>(
    p: &InfoPool,
    candidate: &mut InfoPool,
    pred: &F,
) -> Option<InfoPool> {
    // Second shrink tactic: make values smaller
    trace!("minimizing by scalar shrink: {:?}", p);
    for i in 0..p.data.len() {
        candidate.clone_from(&p);

        let max_pow = 0u8.count_zeros();
        let pow = max_pow - p.data[i].leading_zeros();
        for bitoff in 0..pow {
            candidate.data[i] = p.data[i] - (p.data[i] >> bitoff);
            trace!(
                "shrunk item -(bitoff:{}) {} {}->{}: {:?}",
                bitoff,
                i,
                p.data[i],
                candidate.data[i],
                candidate
            );

            if candidate.buffer() == p.buffer() {
                trace!("No change");
                continue;
            }

            let test = pred(candidate.replay());
            trace!("test result {}", test);
            if test {
                if let Some(res) = minimize(&candidate, pred) {
                    trace!("Returning shrunk: {:?}", res);
                    return Some(res);
                } else {
                    trace!("Returning original: {:?}", candidate);
                    return Some(candidate.clone());
                }
            }
        }
    }

    None
}

/// Try to find the smallest pool `p` such that the predicate `pred` returns
/// true. Given that our [generators](../generators/index.html) tend to
/// generate smaller outputs from smaller inputs, by minimizing the source
/// pool we can find the smallest value that provokes a failure.
///
/// If we do not find any smaller pool that satisifies `pred`; we return
/// `None`.
///
/// Currently, we have two heuristics for shrinking: removing slices, and
/// reducing individual values.
///
/// Removing slices tries to remove as much of the pool as it can whilst still
/// having the predicate hold. At present, we do this by removing each half
/// and testing, then each quarter, eighths, and so on. Eventually, we plan to
/// track the regions of data that each generator draws from, and use that to
/// optimise the shrinking process.
///
/// Reducing individual values basically goes through each position in the
/// pool, and then tries reducing it to zero, then half, thn three quarters,
/// seven eighths, and so on.
pub fn minimize<F: Fn(InfoReplay) -> bool>(p: &InfoPool, pred: &F) -> Option<InfoPool> {
    let mut candidate = p.clone();
    if let Some(res) = minimize_via_removal(p, &mut candidate, pred) {
        return Some(res);
    }

    if let Some(res) = minimize_via_scalar_shrink(p, &mut candidate, pred) {
        return Some(res);
    }

    trace!("Nothing smaller found than {:?}", p);
    None
}

#[cfg(test)]
mod tests {
    extern crate env_logger;
    use super::*;

    #[test]
    fn should_take_each_item_in_pool() {
        let p = InfoPool::of_vec(vec![0, 1, 2, 3]);
        let mut t = p.replay();
        assert_eq!(t.next_byte(), 0);
        assert_eq!(t.next_byte(), 1);
        assert_eq!(t.next_byte(), 2);
        assert_eq!(t.next_byte(), 3);
        assert_eq!(t.next_byte(), 0);
    }

    #[test]
    fn should_generate_random_data() {
        let trials = 1024usize;
        let mut vals = 0;
        let mut p = InfoPool::new();
        let mut t = p.tap_with(::rand::XorShiftRng::new_unseeded());
        let error = 4;
        for _ in 0..trials {
            vals += t.next_byte() as usize;
        }
        let mean = vals / trials;
        let expected = 128;
        assert!(
            (expected - error) < mean && (expected + error) > mean,
            "Expected {} trials to be ({}+/-{}); got {}",
            trials,
            expected,
            error,
            mean
        )
    }

    #[test]
    fn should_allow_restarting_read() {
        let mut p = InfoPool::new();
        let mut v0 = Vec::new();
        {
            let mut t = p.tap();
            for _ in 0..4 {
                v0.push(t.next_byte())
            }
        }

        let mut t = p.replay();
        let mut v1 = Vec::new();
        for _ in 0..4 {
            v1.push(t.next_byte())
        }

        assert_eq!(v0, v1)
    }

    #[test]
    fn should_allow_borrowing_buffer() {
        let p = InfoPool::of_vec(vec![1]);
        assert_eq!(p.buffer(), &[1]);
    }

    #[test]
    fn tap_can_act_as_iterator() {
        let buf = vec![4, 3, 2, 1];
        let p = InfoPool::of_vec(buf.clone());
        let _: &Iterator<Item = u8> = &p.replay();

        assert_eq!(p.replay().take(4).collect::<Vec<_>>(), buf)
    }

    #[test]
    fn replay_can_act_as_iterator() {
        let buf = vec![4, 3, 2, 1];
        let p = InfoPool::of_vec(buf.clone());
        let _: &Iterator<Item = u8> = &p.replay();

        assert_eq!(p.replay().take(4).collect::<Vec<_>>(), buf)
    }

    #[test]
    fn minimiser_should_minimise_to_empty() {
        let p = InfoPool::of_vec(vec![1]);
        let min = minimize(&p, &|_| true);

        assert_eq!(min.as_ref().map(|p| p.buffer()), Some([].as_ref()))
    }

    #[test]
    fn minimiser_should_minimise_to_minimum_given_size() {
        env_logger::init().unwrap_or(());
        let p = InfoPool::of_vec(vec![1; 4]);
        let min = minimize(&p, &|t| t.take(16).filter(|&v| v > 0).count() > 1)
            .expect("some smaller pool");

        assert_eq!(min.buffer(), &[1, 1])
    }

    #[test]
    fn minimiser_should_minimise_scalar_values() {
        let p = InfoPool::of_vec(vec![255; 3]);
        let min = minimize(&p, &|t| t.take(16).any(|v| v >= 3)).expect("some smaller pool");

        assert_eq!(min.buffer(), &[3])
    }
    #[test]
    fn minimiser_should_minimise_scalar_values_to_empty() {
        let p = InfoPool::of_vec(vec![255; 3]);
        let min = minimize(&p, &|t| t.take(16).any(|_| true)).expect("some smaller pool");

        assert_eq!(min.buffer(), &[] as &[u8])
    }

    #[test]
    fn minimiser_should_minimise_scalar_values_by_search() {
        let p = InfoPool::of_vec(vec![255; 3]);
        let min = minimize(&p, &|t| t.take(16).any(|v| v >= 13)).expect("some smaller pool");

        assert_eq!(min.buffer(), &[13])
    }
    #[test]
    fn minimiser_should_minimise_scalar_values_accounting_for_overflow() {
        let p = InfoPool::of_vec(vec![255; 3]);
        let min = minimize(&p, &|t| t.take(16).any(|v| v >= 251)).expect("some smaller pool");

        assert_eq!(min.buffer(), &[251])
    }
}