proptest_arbitrary/_std/
string.rs

1//! Arbitrary implementations for `std::string`.
2
3use super::*;
4use std::string::{String, FromUtf8Error, FromUtf16Error};
5use std::iter;
6use std::slice;
7use std::rc::Rc;
8use std::sync::Arc;
9
10/// Wraps the regex that forms the `Strategy` for `String` so that a sensible
11/// `Default` can be given. The default is a string of non-control characters.
12#[derive(Copy, Clone, From, Into, PartialEq, Eq, PartialOrd, Ord, Hash)]
13pub struct StringParam<'a>(&'a str);
14
15impl<'a> Default for StringParam<'a> {
16    fn default() -> Self {
17        StringParam("\\PC*")
18    }
19}
20
21impl<'a> Arbitrary<'a> for String {
22    valuetree!();
23    type Parameters = StringParam<'a>;
24    type Strategy = &'a str;
25
26    /// ## Safety
27    ///
28    /// This implementation panics if the input is not a valid regex proptest
29    /// can handle.
30    fn arbitrary_with(args: Self::Parameters) -> Self::Strategy {
31        args.into()
32    }
33}
34
35macro_rules! dst_wrapped {
36    ($($w: ident),*) => {
37        $(arbitrary!($w<str>, FMapped<'a, String, Self>, StringParam<'a>;
38            a => any_with_sinto::<String, _>(a)
39        );)*
40    };
41}
42
43dst_wrapped!(Box, Rc, Arc);
44
45generator!(FromUtf16Error, || String::from_utf16(&[0xD800]).unwrap_err());
46
47// This is a void-like type, it needs to be handled by the user of
48// the type by simply never constructing the variant in an enum or for
49// structs by inductively not generating the struct.
50// The same applies to ! and Infallible.
51// generator!(ParseError, || panic!());
52
53arbitrary!(FromUtf8Error, SFnPtrMap<BoxedStrategy<Vec<u8>>, Self>;
54    static_map(not_utf8_bytes(true), |bs| String::from_utf8(bs).unwrap_err())
55);
56
57// This could be faster.. The main cause seems to be generation of
58// Vec<char>. any::<u8>() instead of any::<u16>() speeds it up considerably.
59pub(crate) fn not_utf8_bytes(allow_null: bool) -> BoxedStrategy<Vec<u8>> {
60    (any::<u8>(), gen_el_bytes(allow_null))
61        .prop_flat_map(move |(valid_up_to, el_bytes)| {
62            let bounds: SizeBounds = (valid_up_to as usize).into();
63            any_with_map(product_pack![bounds, default()], move |p: Vec<char>| {
64                let iter = p.iter();
65                let string = if allow_null {
66                    iter.collect::<String>()
67                } else {
68                    iter.filter(|&&x| x != '\u{0}').collect::<String>()
69                };
70                let mut bytes = string.into_bytes();
71                bytes.extend(el_bytes.into_iter());
72                bytes
73            })
74        }).boxed()
75}
76
77#[derive(Debug)]
78enum ELBytes {
79    B1([u8; 1]),
80    B2([u8; 2]),
81    B3([u8; 3]),
82    B4([u8; 4])
83}
84
85impl<'a> IntoIterator for &'a ELBytes {
86    type Item = u8;
87    type IntoIter = iter::Map<slice::Iter<'a, u8>, fn(&u8) -> u8>;
88    fn into_iter(self) -> Self::IntoIter {
89        use self::ELBytes::*;
90        (match *self {
91            B1(ref a) => a.iter(),
92            B2(ref a) => a.iter(),
93            B3(ref a) => a.iter(),
94            B4(ref a) => a.iter(),
95        }).map(|x| *x)
96    }
97}
98
99fn b1(a: u8) -> ELBytes { ELBytes::B1([a]) }
100fn b2(a: (u8, u8)) -> ELBytes { ELBytes::B2([a.0, a.1]) }
101fn b3(a: ((u8, u8), u8)) -> ELBytes { ELBytes::B3([(a.0).0, (a.0).1, a.1]) }
102fn b4(a: ((u8, u8), u8, u8)) -> ELBytes {
103    ELBytes::B4([(a.0).0, (a.0).1, a.1, a.2])
104}
105
106// By analysis of run_utf8_validation defined at:
107// https://doc.rust-lang.org/nightly/src/core/str/mod.rs.html#1429
108// we know that .error_len() \in {None, Some(1), Some(2), Some(3)}.
109// We represent this with the range [0..4) and generate a valid
110// sequence from that.
111fn gen_el_bytes(allow_null: bool) -> BoxedStrategy<ELBytes> {
112    /*
113    // https://tools.ietf.org/html/rfc3629
114    static UTF8_CHAR_WIDTH: [u8; 256] = [
115    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
116    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x1F
117    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
118    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x3F
119    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
120    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x5F
121    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
122    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x7F
123    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
124    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x9F
125    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
126    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xBF
127    0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
128    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // 0xDF
129    3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, // 0xEF
130    4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, // 0xFF
131    ];
132
133    /// Mask of the value bits of a continuation byte.
134    const CONT_MASK: u8 = 0b0011_1111;
135    /// Value of the tag bits (tag mask is !CONT_MASK) of a continuation byte.
136    const TAG_CONT_U8: u8 = 0b1000_0000;
137    */
138
139    let succ_byte  = 0x80u8..0xC0u8;
140    let start_byte = if allow_null { 0x00u8 } else { 0x01u8 };
141    let fail_byte  = prop_oneof![start_byte..0x7Fu8, 0xC1u8..];
142    let byte0_w0   = prop_oneof![0x80u8..0xC0u8, 0xF5u8..];
143    let byte0_w2   = 0xC2u8..0xE0u8;
144    let byte0_w3   = 0xE0u8..0xF0u8;
145    let byte0_w4   = 0xF0u8..0xF5u8;
146    let byte01_w3  = byte0_w3.clone().prop_flat_map(|x| (Just(x), match x {
147        0xE0u8          => 0xA0u8..0xC0u8,
148        0xE1u8...0xECu8 => 0x80u8..0xC0u8,
149        0xEDu8          => 0x80u8..0xA0u8,
150        0xEEu8...0xEFu8 => 0x80u8..0xA0u8,
151        _               => panic!(),
152    }));
153    let byte01_w3_e1 = byte0_w3.clone().prop_flat_map(move |x| (Just(x), match x {
154        0xE0u8          => prop_oneof![start_byte..0xA0u8, 0xC0u8..],
155        0xE1u8...0xECu8 => prop_oneof![start_byte..0x80u8, 0xC0u8..],
156        0xEDu8          => prop_oneof![start_byte..0x80u8, 0xA0u8..],
157        0xEEu8...0xEFu8 => prop_oneof![start_byte..0x80u8, 0xA0u8..],
158        _               => panic!(),
159    }));
160    let byte01_w4_e1 = byte0_w4.clone().prop_flat_map(move |x| (Just(x), match x {
161        0xF0u8          => prop_oneof![start_byte..0x90u8, 0xA0u8..],
162        0xF1u8...0xF3u8 => prop_oneof![start_byte..0x80u8, 0xA0u8..],
163        0xF4u8          => prop_oneof![start_byte..0x80u8, 0x90u8..],
164        _               => panic!()
165    }));
166    let byte01_w4 = byte0_w4.clone().prop_flat_map(|x| (Just(x), match x {
167        0xF0u8          => 0x90u8..0xA0u8,
168        0xF1u8...0xF3u8 => 0x80u8..0xA0u8,
169        0xF4u8          => 0x80u8..0x90u8,
170        _               => panic!()
171    }));
172    prop_oneof![
173        // error_len = None
174        prop_oneof![
175            // w = 2
176            // lacking 1 bytes:
177            static_map(byte0_w2.clone(), b1),
178            // w = 3
179            // lacking 2 bytes:
180            static_map(byte0_w3, b1),
181            // lacking 1 bytes:
182            static_map(byte01_w3.clone(), b2),
183            // w = 4
184            // lacking 3 bytes:
185            static_map(byte0_w4, b1),
186            // lacking 2 bytes:
187            static_map(byte01_w4.clone(), b2),
188            // lacking 1 byte:
189            static_map((byte01_w4.clone(), succ_byte.clone()), b3),
190        ],
191        // error_len = Some(1)
192        prop_oneof![
193            // w = 1 is not represented.
194            // w = 0
195            static_map(byte0_w0, b1),
196            // w = 2
197            static_map((byte0_w2, fail_byte.clone()), b2),
198            // w = 3
199            static_map(byte01_w3_e1, b2),
200            // w = 4
201            static_map(byte01_w4_e1, b2),
202        ],
203        // error_len = Some(2)
204        static_map(prop_oneof![
205            // w = 3
206            (byte01_w3, fail_byte.clone()),
207            // w = 4
208            (byte01_w4.clone(), fail_byte.clone())
209        ], b3),
210        // error_len = Some(3), w = 4
211        static_map((byte01_w4, succ_byte, fail_byte), b4),
212    ].boxed()
213}
214
215#[cfg(test)]
216mod test {
217    no_panic_test!(
218        string  => String,
219        str_box => Box<str>,
220        str_rc  => Rc<str>,
221        str_arc => Arc<str>,
222        from_utf16_error => FromUtf16Error,
223        from_utf8_error => FromUtf8Error
224    );
225}