spindle_lib/
visitor.rs

1use itoa::Buffer as itoaBuffer;
2use ryu::Buffer as ryuBuffer;
3use std::str;
4
5/// Defines state that is built during [`crate::Grammar::expression`].
6///
7/// This is implemented for
8/// - `String` to produce string expressions
9/// - `Vec<u8>` to produce byte sequences
10/// - `u64` to produce equivalence class IDs of the traversal path. See [`crate::Grammar::how_many`] for more info.
11///
12/// You can implement this yourself, for example if you want to implement equivalence classes that
13/// - ignore order
14/// - ignore certain paths or transitions
15/// - are more accurate
16/// - care about characterics of the arbitrary data, such as if a string is ascii or not.
17/// Or implement a visitor to build some structured state, such as a tree a collection of generated data.
18pub trait Visitor {
19    /// Instiates the visitor before traversal.
20    fn new() -> Self;
21    /// Visits the `X | Y` branch in the grammar and provides the `index`th path that was taken.
22    fn visit_or(&mut self, _index: usize) {}
23    /// Visits the `X Y` operation.
24    fn visit_concat(&mut self) {}
25    /// Visits `X?` and provides whether or not `X` will be evaluated.
26    fn visit_optional(&mut self, _was_chosen: bool) {}
27    /// Visits `X*`, `X+`, `X{k}`, or `X{min,max}` and provides how many repetitions were arbitrarily selected.
28    fn visit_repetition(&mut self, _reps: usize) {}
29    /// Visits a use/reference of a defined rule and provides the rule name and index/id.
30    fn visit_reference(&mut self, _name: &str, _index: usize) {}
31    /// Visits the literal `str`.
32    fn visit_literal(&mut self, _s: &str) {}
33    /// Visits the literal `&[u8]`.
34    fn visit_bytes(&mut self, _val: &[u8]) {}
35    /// Visits regex and provides the arbitrary regex that was generated.
36    fn visit_regex(&mut self, _generated: &[u8]) {}
37    /// Visits the `(X)` group.
38    fn visit_group(&mut self) {}
39    /// Visits `String` pre-defined rule and provides the generated arbitrary `str`.
40    fn visit_str(&mut self, _s: &str) {}
41    /// Visits `char` pre-defined rule and provides the generated arbitrary `char`.
42    fn visit_char(&mut self, _c: char) {}
43    /// Visits `f32` pre-defined rule and provides the generated arbitrary `f32`.
44    fn visit_f32(&mut self, _f: f32) {}
45    /// Visits `f64` pre-defined rule and provides the generated arbitrary `f64`.
46    fn visit_f64(&mut self, _f: f64) {}
47    /// Visits `u8` pre-defined rule and provides the generated arbitrary `u8`.
48    fn visit_u8(&mut self, _num: u8) {}
49    /// Visits `u16` pre-defined rule and provides the generated arbitrary `u16`.
50    fn visit_u16(&mut self, _num: u16) {}
51    /// Visits `u32` pre-defined rule and provides the generated arbitrary `u32`.
52    fn visit_u32(&mut self, _num: u32) {}
53    /// Visits `u64` pre-defined rule and provides the generated arbitrary `u64`.
54    fn visit_u64(&mut self, _num: u64) {}
55    /// Visits `u128` pre-defined rule and provides the generated arbitrary `u128`.
56    fn visit_u128(&mut self, _num: u128) {}
57    /// Visits `usize` pre-defined rule and provides the generated arbitrary `usize`.
58    fn visit_usize(&mut self, _num: usize) {}
59    /// Visits `i8` pre-defined rule and provides the generated arbitrary `i8`.
60    fn visit_i8(&mut self, _num: i8) {}
61    /// Visits `i16` pre-defined rule and provides the generated arbitrary `i16`.
62    fn visit_i16(&mut self, _num: i16) {}
63    /// Visits `i32` pre-defined rule and provides the generated arbitrary `i32`.
64    fn visit_i32(&mut self, _num: i32) {}
65    /// Visits `i64` pre-defined rule and provides the generated arbitrary `i64`.
66    fn visit_i64(&mut self, _num: i64) {}
67    /// Visits `i128` pre-defined rule and provides the generated arbitrary `i128`.
68    fn visit_i128(&mut self, _num: i128) {}
69    /// Visits `isize` pre-defined rule and provides the generated arbitrary `isize`.
70    fn visit_isize(&mut self, _num: isize) {}
71}
72
73macro_rules! impl_visit_num_for_vec {
74    ($($fn_name:ident = $type:ident = $buf_type:ident),* $(,)*) => (
75        $(
76            fn $fn_name(&mut self, num: $type) {
77                self.extend($buf_type::new().format(num).as_bytes());
78            }
79        )*
80    )
81}
82
83/// Returns an arbitrary byte sequence matching the grammar.
84impl Visitor for Vec<u8> {
85    fn new() -> Self {
86        Default::default()
87    }
88    fn visit_literal(&mut self, val: &str) {
89        self.extend(val.as_bytes());
90    }
91    fn visit_bytes(&mut self, val: &[u8]) {
92        self.extend(val);
93    }
94    fn visit_regex(&mut self, regex_result: &[u8]) {
95        self.extend(regex_result);
96    }
97    impl_visit_num_for_vec!(
98        visit_u8 = u8 = itoaBuffer,
99        visit_u16 = u16 = itoaBuffer,
100        visit_u32 = u32 = itoaBuffer,
101        visit_u64 = u64 = itoaBuffer,
102        visit_u128 = u128 = itoaBuffer,
103        visit_usize = usize = itoaBuffer,
104        visit_i8 = i8 = itoaBuffer,
105        visit_i16 = i16 = itoaBuffer,
106        visit_i32 = i32 = itoaBuffer,
107        visit_i64 = i64 = itoaBuffer,
108        visit_i128 = i128 = itoaBuffer,
109        visit_isize = isize = itoaBuffer,
110        visit_f32 = f32 = ryuBuffer,
111        visit_f64 = f64 = ryuBuffer,
112    );
113    fn visit_str(&mut self, s: &str) {
114        self.extend(s.as_bytes())
115    }
116    fn visit_char(&mut self, c: char) {
117        let mut b = [0; 4];
118        let result = c.encode_utf8(&mut b);
119        self.extend(result.as_bytes())
120    }
121}
122
123macro_rules! impl_visit_num_for_string {
124    ($($fn_name:ident = $type:ident = $buf_type:ident),* $(,)*) => (
125        $(
126            fn $fn_name(&mut self, num: $type) {
127                self.push_str($buf_type::new().format(num));
128            }
129        )*
130    )
131}
132
133/// Returns an arbitrary expression `String` matching the grammar.
134///
135/// # Panics
136/// Panics if the regex or byte sequence evaluates to non-utf8. This
137/// can be avoided by avoiding such regexes or non-utf8 bytes in the grammar.
138impl Visitor for String {
139    fn new() -> Self {
140        Default::default()
141    }
142    fn visit_literal(&mut self, val: &str) {
143        self.push_str(val);
144    }
145    fn visit_bytes(&mut self, val: &[u8]) {
146        self.push_str(str::from_utf8(val).expect("utf8 bytes"));
147    }
148    fn visit_regex(&mut self, regex_result: &[u8]) {
149        self.push_str(str::from_utf8(regex_result).expect("utf8 bytes"));
150    }
151    impl_visit_num_for_string!(
152        visit_u8 = u8 = itoaBuffer,
153        visit_u16 = u16 = itoaBuffer,
154        visit_u32 = u32 = itoaBuffer,
155        visit_u64 = u64 = itoaBuffer,
156        visit_u128 = u128 = itoaBuffer,
157        visit_usize = usize = itoaBuffer,
158        visit_i8 = i8 = itoaBuffer,
159        visit_i16 = i16 = itoaBuffer,
160        visit_i32 = i32 = itoaBuffer,
161        visit_i64 = i64 = itoaBuffer,
162        visit_i128 = i128 = itoaBuffer,
163        visit_isize = isize = itoaBuffer,
164        visit_f32 = f32 = ryuBuffer,
165        visit_f64 = f64 = ryuBuffer,
166    );
167    fn visit_str(&mut self, s: &str) {
168        self.push_str(s)
169    }
170    fn visit_char(&mut self, c: char) {
171        self.push(c)
172    }
173}
174
175fn id_hash(val: &mut u64, rule_id: u64) {
176    *val = fxhash::hash64(&(rule_id, *val));
177}
178
179/// Returns an identifier of the path taken during the traversal.
180impl Visitor for u64 {
181    // TODO: maybe a struct(s) that capture different traversal patterns?
182    // ```ignore
183    // OrderedClass(u64);
184    // Unordered(u64);
185    // IncludeLiterals(u64);
186    // ```
187
188    fn new() -> Self {
189        u64::MAX
190    }
191    fn visit_or(&mut self, index: usize) {
192        id_hash(self, fxhash::hash64(&(0, index as u64)))
193    }
194    fn visit_concat(&mut self) {
195        id_hash(self, 1)
196    }
197    fn visit_optional(&mut self, was_chosen: bool) {
198        id_hash(self, fxhash::hash64(&(2, was_chosen as u64)))
199    }
200    fn visit_reference(&mut self, _: &str, index: usize) {
201        id_hash(self, fxhash::hash64(&(3, index as u64)))
202    }
203    fn visit_repetition(&mut self, reps: usize) {
204        id_hash(self, fxhash::hash64(&(4, reps as u64)))
205    }
206    fn visit_literal(&mut self, _: &str) {
207        id_hash(self, 5)
208    }
209    fn visit_bytes(&mut self, _: &[u8]) {
210        id_hash(self, 6)
211    }
212    fn visit_regex(&mut self, _: &[u8]) {
213        id_hash(self, 7)
214    }
215    fn visit_group(&mut self) {
216        id_hash(self, 8)
217    }
218}
219
220// Code is adapted from:
221// <https://doc.rust-lang.org/src/core/tuple.rs.html#10>
222// <https://doc.rust-lang.org/src/core/hash/mod.rs.html#879>
223macro_rules! impl_visitor_tuple {
224    () => (
225        impl Visitor for () {
226            #[inline]
227            fn new() {}
228        }
229    );
230
231    ( $($name:ident)+) => (
232        #[allow(non_snake_case)]
233        impl<$($name: Visitor),+> Visitor for ($($name,)+) {
234            fn new() -> ($($name,)+) {
235                ($({ let x: $name = Visitor::new(); x},)+)
236            }
237
238            fn visit_or(&mut self, index: usize) {
239                let ($(ref mut $name,)+) = *self;
240                $($name.visit_or(index);)+
241            }
242            fn visit_concat(&mut self) {
243                let ($(ref mut $name,)+) = *self;
244                $($name.visit_concat();)+
245            }
246            fn visit_optional(&mut self, b: bool) {
247                let ($(ref mut $name,)+) = *self;
248                $($name.visit_optional(b);)+
249            }
250            fn visit_repetition(&mut self, reps: usize) {
251                let ($(ref mut $name,)+) = *self;
252                $($name.visit_repetition(reps);)+
253            }
254            fn visit_reference(&mut self, name: &str, index: usize) {
255                let ($(ref mut $name,)+) = *self;
256                $($name.visit_reference(name, index);)+
257            }
258            fn visit_literal(&mut self, val: &str) {
259                let ($(ref mut $name,)+) = *self;
260                $($name.visit_literal(val);)+
261            }
262            fn visit_bytes(&mut self, val: &[u8]) {
263                let ($(ref mut $name,)+) = *self;
264                $($name.visit_bytes(val);)+
265            }
266            fn visit_regex(&mut self, val: &[u8]) {
267                let ($(ref mut $name,)+) = *self;
268                $($name.visit_regex(val);)+
269            }
270            fn visit_group(&mut self) {
271                let ($(ref mut $name,)+) = *self;
272                $($name.visit_group();)+
273            }
274            fn visit_u16(&mut self, num: u16) {
275                let ($(ref mut $name,)+) = *self;
276                $($name.visit_u16(num);)+
277            }
278            fn visit_str(&mut self, s: &str) {
279                let ($(ref mut $name,)+) = *self;
280                $($name.visit_str(s);)+
281            }
282        }
283    );
284}
285
286impl_visitor_tuple! {}
287impl_visitor_tuple! { T }
288impl_visitor_tuple! { T B }
289impl_visitor_tuple! { T B C }
290impl_visitor_tuple! { T B C D }
291impl_visitor_tuple! { T B C D E }
292impl_visitor_tuple! { T B C D E F }
293impl_visitor_tuple! { T B C D E F G }
294impl_visitor_tuple! { T B C D E F G H }
295impl_visitor_tuple! { T B C D E F G H I }
296impl_visitor_tuple! { T B C D E F G H I J }
297impl_visitor_tuple! { T B C D E F G H I J K }
298impl_visitor_tuple! { T B C D E F G H I J K L }