spindle_lib/
visitor.rs

1use itoa::Buffer as itoaBuffer;
2use ryu::Buffer as ryuBuffer;
3use std::str;
4
5/// Defines state that is built during `Grammar::expression`.
6///
7/// This is implemented for
8/// - `String` to produce string expressions
9/// - `Vec<u8>` to produce byte sequences
10/// - `u64` to produce equivalence class IDs of the traversal path. See [`crate::Grammar::how_many`] for more info.
11///
12/// You can implement this yourself, for example if you want to implement equivalence classes that
13/// - ignore order
14/// - ignore certain rules
15/// - are more accurate
16/// - care about characterics of the arbitrary data, such as if a string is ascii or not.  
17pub trait Visitor {
18    fn new() -> Self;
19    fn visit_or(&mut self, _index: usize) {}
20    fn visit_concat(&mut self) {}
21    fn visit_optional(&mut self, _was_chosen: bool) {}
22    fn visit_repetition(&mut self, _reps: usize) {}
23    fn visit_reference(&mut self, _index: usize) {}
24    fn visit_literal(&mut self, _s: &str) {}
25    fn visit_bytes(&mut self, _val: &[u8]) {}
26    fn visit_regex(&mut self, _val: &[u8]) {}
27    fn visit_group(&mut self) {}
28    fn visit_str(&mut self, _s: &str) {}
29    fn visit_char(&mut self, _c: char) {}
30    fn visit_f32(&mut self, _f: f32) {}
31    fn visit_f64(&mut self, _f: f64) {}
32    fn visit_u8(&mut self, _num: u8) {}
33    fn visit_u16(&mut self, _num: u16) {}
34    fn visit_u32(&mut self, _num: u32) {}
35    fn visit_u64(&mut self, _num: u64) {}
36    fn visit_u128(&mut self, _num: u128) {}
37    fn visit_usize(&mut self, _num: usize) {}
38    fn visit_i8(&mut self, _num: i8) {}
39    fn visit_i16(&mut self, _num: i16) {}
40    fn visit_i32(&mut self, _num: i32) {}
41    fn visit_i64(&mut self, _num: i64) {}
42    fn visit_i128(&mut self, _num: i128) {}
43    fn visit_isize(&mut self, _num: isize) {}
44}
45
46macro_rules! impl_visit_num_for_vec {
47    ($($fn_name:ident = $type:ident = $buf_type:ident),* $(,)*) => (
48        $(
49            fn $fn_name(&mut self, num: $type) {
50                self.extend($buf_type::new().format(num).as_bytes());
51            }
52        )*
53    )
54}
55
56/// Returns an arbitrary byte sequence matching the grammar.
57impl Visitor for Vec<u8> {
58    fn new() -> Self {
59        Default::default()
60    }
61    fn visit_literal(&mut self, val: &str) {
62        self.extend(val.as_bytes());
63    }
64    fn visit_bytes(&mut self, val: &[u8]) {
65        self.extend(val);
66    }
67    fn visit_regex(&mut self, regex_result: &[u8]) {
68        self.extend(regex_result);
69    }
70    impl_visit_num_for_vec!(
71        visit_u8 = u8 = itoaBuffer,
72        visit_u16 = u16 = itoaBuffer,
73        visit_u32 = u32 = itoaBuffer,
74        visit_u64 = u64 = itoaBuffer,
75        visit_u128 = u128 = itoaBuffer,
76        visit_usize = usize = itoaBuffer,
77        visit_i8 = i8 = itoaBuffer,
78        visit_i16 = i16 = itoaBuffer,
79        visit_i32 = i32 = itoaBuffer,
80        visit_i64 = i64 = itoaBuffer,
81        visit_i128 = i128 = itoaBuffer,
82        visit_isize = isize = itoaBuffer,
83        visit_f32 = f32 = ryuBuffer,
84        visit_f64 = f64 = ryuBuffer,
85    );
86    fn visit_str(&mut self, s: &str) {
87        self.extend(s.as_bytes())
88    }
89    fn visit_char(&mut self, c: char) {
90        let mut b = [0; 4];
91        let result = c.encode_utf8(&mut b);
92        self.extend(result.as_bytes())
93    }
94}
95
96macro_rules! impl_visit_num_for_string {
97    ($($fn_name:ident = $type:ident = $buf_type:ident),* $(,)*) => (
98        $(
99            fn $fn_name(&mut self, num: $type) {
100                self.push_str($buf_type::new().format(num));
101            }
102        )*
103    )
104}
105
106/// Returns an arbitrary expression `String` matching the grammar.
107///
108/// # Panics
109/// Panics if the regex or byte sequence evaluates to non-utf8. This
110/// can be avoided by avoiding such regexes or non-utf8 bytes in the grammar.
111impl Visitor for String {
112    fn new() -> Self {
113        Default::default()
114    }
115    fn visit_literal(&mut self, val: &str) {
116        self.push_str(val);
117    }
118    fn visit_bytes(&mut self, val: &[u8]) {
119        self.push_str(str::from_utf8(val).expect("utf8 bytes"));
120    }
121    fn visit_regex(&mut self, regex_result: &[u8]) {
122        self.push_str(str::from_utf8(regex_result).expect("utf8 bytes"));
123    }
124    impl_visit_num_for_string!(
125        visit_u8 = u8 = itoaBuffer,
126        visit_u16 = u16 = itoaBuffer,
127        visit_u32 = u32 = itoaBuffer,
128        visit_u64 = u64 = itoaBuffer,
129        visit_u128 = u128 = itoaBuffer,
130        visit_usize = usize = itoaBuffer,
131        visit_i8 = i8 = itoaBuffer,
132        visit_i16 = i16 = itoaBuffer,
133        visit_i32 = i32 = itoaBuffer,
134        visit_i64 = i64 = itoaBuffer,
135        visit_i128 = i128 = itoaBuffer,
136        visit_isize = isize = itoaBuffer,
137        visit_f32 = f32 = ryuBuffer,
138        visit_f64 = f64 = ryuBuffer,
139    );
140    fn visit_str(&mut self, s: &str) {
141        self.push_str(s)
142    }
143    fn visit_char(&mut self, c: char) {
144        self.push(c)
145    }
146}
147
148fn id_hash(val: &mut u64, rule_id: u64) {
149    *val = fxhash::hash64(&(rule_id, *val));
150}
151
152/// Returns an identifier of the path taken during the traversal.
153impl Visitor for u64 {
154    // TODO: maybe a struct(s) that capture different traversal patterns?
155    // ```ignore
156    // OrderedClass(u64);
157    // Unordered(u64);
158    // IncludeLiterals(u64);
159    // ```
160
161    fn new() -> Self {
162        u64::MAX
163    }
164    fn visit_or(&mut self, index: usize) {
165        id_hash(self, fxhash::hash64(&(0, index as u64)))
166    }
167    fn visit_concat(&mut self) {
168        id_hash(self, 1)
169    }
170    fn visit_optional(&mut self, was_chosen: bool) {
171        id_hash(self, fxhash::hash64(&(2, was_chosen as u64)))
172    }
173    fn visit_reference(&mut self, index: usize) {
174        id_hash(self, fxhash::hash64(&(3, index as u64)))
175    }
176    fn visit_repetition(&mut self, reps: usize) {
177        id_hash(self, fxhash::hash64(&(4, reps as u64)))
178    }
179    fn visit_literal(&mut self, _: &str) {
180        id_hash(self, 5)
181    }
182    fn visit_bytes(&mut self, _: &[u8]) {
183        id_hash(self, 6)
184    }
185    fn visit_regex(&mut self, _: &[u8]) {
186        id_hash(self, 7)
187    }
188    fn visit_group(&mut self) {
189        id_hash(self, 8)
190    }
191}
192
193// Code is adapted from:
194// <https://doc.rust-lang.org/src/core/tuple.rs.html#10>
195// <https://doc.rust-lang.org/src/core/hash/mod.rs.html#879>
196macro_rules! impl_visitor_tuple {
197    () => (
198        impl Visitor for () {
199            #[inline]
200            fn new() {}
201        }
202    );
203
204    ( $($name:ident)+) => (
205        #[allow(non_snake_case)]
206        impl<$($name: Visitor),+> Visitor for ($($name,)+) {
207            fn new() -> ($($name,)+) {
208                ($({ let x: $name = Visitor::new(); x},)+)
209            }
210
211            fn visit_or(&mut self, index: usize) {
212                let ($(ref mut $name,)+) = *self;
213                $($name.visit_or(index);)+
214            }
215            fn visit_concat(&mut self) {
216                let ($(ref mut $name,)+) = *self;
217                $($name.visit_concat();)+
218            }
219            fn visit_optional(&mut self, b: bool) {
220                let ($(ref mut $name,)+) = *self;
221                $($name.visit_optional(b);)+
222            }
223            fn visit_repetition(&mut self, reps: usize) {
224                let ($(ref mut $name,)+) = *self;
225                $($name.visit_repetition(reps);)+
226            }
227            fn visit_reference(&mut self, index: usize) {
228                let ($(ref mut $name,)+) = *self;
229                $($name.visit_reference(index);)+
230            }
231            fn visit_literal(&mut self, val: &str) {
232                let ($(ref mut $name,)+) = *self;
233                $($name.visit_literal(val);)+
234            }
235            fn visit_bytes(&mut self, val: &[u8]) {
236                let ($(ref mut $name,)+) = *self;
237                $($name.visit_bytes(val);)+
238            }
239            fn visit_regex(&mut self, val: &[u8]) {
240                let ($(ref mut $name,)+) = *self;
241                $($name.visit_regex(val);)+
242            }
243            fn visit_group(&mut self) {
244                let ($(ref mut $name,)+) = *self;
245                $($name.visit_group();)+
246            }
247            fn visit_u16(&mut self, num: u16) {
248                let ($(ref mut $name,)+) = *self;
249                $($name.visit_u16(num);)+
250            }
251            fn visit_str(&mut self, s: &str) {
252                let ($(ref mut $name,)+) = *self;
253                $($name.visit_str(s);)+
254            }
255        }
256    );
257}
258
259impl_visitor_tuple! {}
260impl_visitor_tuple! { T }
261impl_visitor_tuple! { T B }
262impl_visitor_tuple! { T B C }
263impl_visitor_tuple! { T B C D }
264impl_visitor_tuple! { T B C D E }
265impl_visitor_tuple! { T B C D E F }
266impl_visitor_tuple! { T B C D E F G }
267impl_visitor_tuple! { T B C D E F G H }
268impl_visitor_tuple! { T B C D E F G H I }
269impl_visitor_tuple! { T B C D E F G H I J }
270impl_visitor_tuple! { T B C D E F G H I J K }
271impl_visitor_tuple! { T B C D E F G H I J K L }