spindle_lib/
visitor.rs

1use std::str;
2
3/// Defines state that is built during `Grammar::expression`.
4/// This is implemented for
5/// - `String` to produce string expressions
6/// - `Vec<u8>` to produce byte sequences
7/// - `u64` to produce equivalence class IDs of the traversal path.
8///
9/// You can implement this yourself, for example if you want to implement equivalence classes that
10/// - ignore order
11/// - ignore certain rules
12/// - are more accurate
13/// - care about characterics of the arbitrary data, such as if a string is ascii or not.  
14pub trait Visitor {
15    fn new() -> Self;
16    fn visit_or(&mut self, _index: usize) {}
17    fn visit_concat(&mut self) {}
18    fn visit_optional(&mut self, _was_chosen: bool) {}
19    fn visit_repetition(&mut self, _reps: usize) {}
20    fn visit_reference(&mut self, _index: usize) {}
21    fn visit_literal(&mut self, _s: &str) {}
22    fn visit_bytes(&mut self, _val: &[u8]) {}
23    fn visit_regex(&mut self, _val: &[u8]) {}
24    fn visit_group(&mut self) {}
25    fn visit_u16(&mut self, _num: u16) {}
26    fn visit_str(&mut self, _s: &str) {}
27}
28
29/// Returns an arbitrary byte sequence matching the grammar.
30impl Visitor for Vec<u8> {
31    fn new() -> Self {
32        Default::default()
33    }
34    fn visit_literal(&mut self, val: &str) {
35        self.extend(val.as_bytes());
36    }
37    fn visit_bytes(&mut self, val: &[u8]) {
38        self.extend(val);
39    }
40    fn visit_regex(&mut self, regex_result: &[u8]) {
41        self.extend(regex_result);
42    }
43    fn visit_u16(&mut self, num: u16) {
44        let mut num_buf = itoa::Buffer::new();
45        self.extend(num_buf.format(num).as_bytes());
46    }
47    fn visit_str(&mut self, s: &str) {
48        self.extend(s.as_bytes())
49    }
50}
51
52/// Returns an arbitrary expression `String` matching the grammar.
53///
54/// # Panics
55/// Panics if the regex or byte sequence evaluates to non-utf8. This
56/// can be avoided by avoiding such regexes or non-utf8 bytes in the grammar.
57impl Visitor for String {
58    fn new() -> Self {
59        Default::default()
60    }
61    fn visit_literal(&mut self, val: &str) {
62        self.push_str(val);
63    }
64    fn visit_bytes(&mut self, val: &[u8]) {
65        self.push_str(str::from_utf8(val).expect("utf8 bytes"));
66    }
67    fn visit_regex(&mut self, regex_result: &[u8]) {
68        self.push_str(str::from_utf8(regex_result).expect("utf8 bytes"));
69    }
70    fn visit_u16(&mut self, num: u16) {
71        let mut num_buf = itoa::Buffer::new();
72        self.push_str(num_buf.format(num));
73    }
74    fn visit_str(&mut self, s: &str) {
75        self.push_str(s)
76    }
77}
78
79fn id_hash(val: &mut u64, rule_id: u64) {
80    *val = fxhash::hash64(&(rule_id, *val));
81}
82
83/// Returns an identifier of the path taken during the traversal.
84impl Visitor for u64 {
85    // TODO: maybe a struct(s) that capture different traversal patterns?
86    // ```ignore
87    // OrderedClass(u64);
88    // Unordered(u64);
89    // IncludeLiterals(u64);
90    // ```
91
92    fn new() -> Self {
93        u64::MAX
94    }
95    fn visit_or(&mut self, index: usize) {
96        id_hash(self, fxhash::hash64(&(0, index as u64)))
97    }
98    fn visit_concat(&mut self) {
99        id_hash(self, 1)
100    }
101    fn visit_optional(&mut self, was_chosen: bool) {
102        id_hash(self, fxhash::hash64(&(2, was_chosen as u64)))
103    }
104    fn visit_reference(&mut self, index: usize) {
105        id_hash(self, fxhash::hash64(&(3, index as u64)))
106    }
107    fn visit_repetition(&mut self, reps: usize) {
108        id_hash(self, fxhash::hash64(&(4, reps as u64)))
109    }
110    fn visit_literal(&mut self, _: &str) {
111        id_hash(self, 5)
112    }
113    fn visit_bytes(&mut self, _: &[u8]) {
114        id_hash(self, 6)
115    }
116    fn visit_regex(&mut self, _: &[u8]) {
117        id_hash(self, 7)
118    }
119    fn visit_group(&mut self) {
120        id_hash(self, 8)
121    }
122}
123
124// Code is adapted from:
125// <https://doc.rust-lang.org/src/core/tuple.rs.html#10>
126// <https://doc.rust-lang.org/src/core/hash/mod.rs.html#879>
127macro_rules! impl_visitor_tuple {
128    () => (
129        impl Visitor for () {
130            #[inline]
131            fn new() {}
132        }
133    );
134
135    ( $($name:ident)+) => (
136        #[allow(non_snake_case)]
137        impl<$($name: Visitor),+> Visitor for ($($name,)+) {
138            fn new() -> ($($name,)+) {
139                ($({ let x: $name = Visitor::new(); x},)+)
140            }
141
142            fn visit_or(&mut self, index: usize) {
143                let ($(ref mut $name,)+) = *self;
144                $($name.visit_or(index);)+
145            }
146            fn visit_concat(&mut self) {
147                let ($(ref mut $name,)+) = *self;
148                $($name.visit_concat();)+
149            }
150            fn visit_optional(&mut self, b: bool) {
151                let ($(ref mut $name,)+) = *self;
152                $($name.visit_optional(b);)+
153            }
154            fn visit_repetition(&mut self, reps: usize) {
155                let ($(ref mut $name,)+) = *self;
156                $($name.visit_repetition(reps);)+
157            }
158            fn visit_reference(&mut self, index: usize) {
159                let ($(ref mut $name,)+) = *self;
160                $($name.visit_reference(index);)+
161            }
162            fn visit_literal(&mut self, val: &str) {
163                let ($(ref mut $name,)+) = *self;
164                $($name.visit_literal(val);)+
165            }
166            fn visit_bytes(&mut self, val: &[u8]) {
167                let ($(ref mut $name,)+) = *self;
168                $($name.visit_bytes(val);)+
169            }
170            fn visit_regex(&mut self, val: &[u8]) {
171                let ($(ref mut $name,)+) = *self;
172                $($name.visit_regex(val);)+
173            }
174            fn visit_group(&mut self) {
175                let ($(ref mut $name,)+) = *self;
176                $($name.visit_group();)+
177            }
178            fn visit_u16(&mut self, num: u16) {
179                let ($(ref mut $name,)+) = *self;
180                $($name.visit_u16(num);)+
181            }
182            fn visit_str(&mut self, s: &str) {
183                let ($(ref mut $name,)+) = *self;
184                $($name.visit_str(s);)+
185            }
186        }
187    );
188}
189
190impl_visitor_tuple! {}
191impl_visitor_tuple! { T }
192impl_visitor_tuple! { T B }
193impl_visitor_tuple! { T B C }
194impl_visitor_tuple! { T B C D }
195impl_visitor_tuple! { T B C D E }
196impl_visitor_tuple! { T B C D E F }
197impl_visitor_tuple! { T B C D E F G }
198impl_visitor_tuple! { T B C D E F G H }
199impl_visitor_tuple! { T B C D E F G H I }
200impl_visitor_tuple! { T B C D E F G H I J }
201impl_visitor_tuple! { T B C D E F G H I J K }
202impl_visitor_tuple! { T B C D E F G H I J K L }