1use bit_set::BitSet;
2
3use rand::{rng, seq::IteratorRandom};
4
5use crate::{
6 automata::{TransitionType, NFA},
7 re::{deriv::DerivativeBuilder, ReBuilder, Regex},
8 SmtString,
9};
10
11pub fn sample_regex(
16 regex: &Regex,
17 builder: &mut ReBuilder,
18 max_depth: usize,
19 comp: bool,
20) -> Option<SmtString> {
21 let mut w = SmtString::empty();
22 let mut deriver = DerivativeBuilder::default();
23
24 let mut i = 0;
25 let mut re = regex.clone();
26
27 let done = |re: &Regex| {
28 if comp {
29 !re.nullable()
30 } else {
31 re.nullable()
32 }
33 };
34
35 if done(&re) {
36 return Some(w);
37 }
38
39 while !done(&re) && i < max_depth {
40 let next = re
41 .first()
42 .iter()
43 .choose(&mut rng())
44 .and_then(|c| c.choose())?;
45 w.push(next);
46 re = deriver.deriv(&re, next, builder);
47 i += 1;
48 }
49
50 if done(&re) {
51 Some(w)
52 } else {
53 None
54 }
55}
56
57pub fn sample_nfa(nfa: &NFA, max: usize, comp: bool) -> Option<SmtString> {
64 let mut w = SmtString::empty();
65
66 let mut states = BitSet::new();
67 if let Some(q0) = nfa.initial() {
68 states = BitSet::from_iter(nfa.epsilon_closure(q0).unwrap());
69 }
70 let mut i = 0;
71
72 let done = |s: &BitSet| {
73 if comp {
74 !s.iter().any(|q| nfa.is_final(q))
75 } else {
76 s.iter().any(|q| nfa.is_final(q))
77 }
78 };
79
80 while i <= max {
81 i += 1;
82 if done(&states) {
84 return Some(w);
85 }
86
87 let mut transitions = Vec::new();
89 for q in states.iter() {
90 transitions.extend(nfa.transitions_from(q).unwrap());
91 }
92 let transition = transitions.iter().choose(&mut rng())?;
94 let c = match transition.get_type() {
96 TransitionType::Range(r) => r.choose(),
97 TransitionType::NotRange(nr) => {
98 let r = nr.complement();
99 r.into_iter()
100 .filter(|r| !r.is_empty())
101 .choose(&mut rng())
102 .and_then(|r| r.choose())
103 }
104 TransitionType::Epsilon => None,
105 };
106 match c {
107 Some(c) => {
108 w.push(c);
109 states = BitSet::from_iter(nfa.epsilon_closure(transition.get_dest()).unwrap());
111 }
112 None => continue,
113 }
114 }
115
116 None
117}
118
119#[cfg(test)]
120mod tests {
121
122 use quickcheck_macros::quickcheck;
123 use smallvec::smallvec;
124
125 use crate::alphabet::CharRange;
126
127 use super::*;
128
129 #[test]
130 fn sample_const() {
131 let mut builder = ReBuilder::default();
132 let regex = builder.to_re("foo".into());
133
134 assert_eq!(
135 sample_regex(®ex, &mut builder, 3, false),
136 Some("foo".into())
137 );
138 assert_eq!(
139 sample_regex(®ex, &mut builder, 10, false),
140 Some("foo".into())
141 );
142 assert_eq!(sample_regex(®ex, &mut builder, 2, false), None);
143 }
144
145 #[test]
146 fn sample_with_optional_characters() {
147 let mut builder = ReBuilder::default();
148
149 let o = builder.to_re("o".into());
151 let fo = builder.to_re("fo".into());
152 let bar = builder.to_re("bar".into());
153 let o_or_bar = builder.union(smallvec![o, bar]);
154 let regex = builder.concat(smallvec![fo, o_or_bar]);
155
156 assert!(sample_regex(®ex, &mut builder, 5, false).is_some());
158 }
159
160 #[quickcheck]
161 fn sample_with_character_range(range: CharRange) {
162 let mut builder = ReBuilder::default();
163 let regex = builder.range(range);
164
165 assert!(sample_regex(®ex, &mut builder, 1, false).is_some());
166 assert!(sample_regex(®ex, &mut builder, 3, false).is_some());
168 }
169
170 #[quickcheck]
171 fn sample_character_range(range: CharRange) {
172 let mut builder = ReBuilder::default();
173 let regex = builder.range(range);
174
175 assert!(sample_regex(®ex, &mut builder, 1, false).is_some());
176 assert!(sample_regex(®ex, &mut builder, 3, false).is_some());
178 }
179
180 #[quickcheck]
181 fn sample_character_range_pow(range: CharRange, n: u32) {
182 let n = n % 100;
183 let mut builder = ReBuilder::default();
184 let regex = builder.range(range);
185 let regex = builder.pow(regex, n);
186
187 for i in 0..n {
188 assert!(sample_regex(®ex, &mut builder, i as usize, false).is_none());
189 }
190 assert!(sample_regex(®ex, &mut builder, n as usize, false).is_some());
191 }
192
193 #[quickcheck]
194 fn sample_alternatives(rs: Vec<CharRange>) {
195 let n = rs.len();
196 let mut builder = ReBuilder::default();
197 let rs = rs.into_iter().map(|r| builder.range(r)).collect();
198 let regex = builder.union(rs);
199
200 if n > 0 {
201 assert!(sample_regex(®ex, &mut builder, 1, false).is_some());
202 } else {
203 assert!(sample_regex(®ex, &mut builder, 10, false).is_none());
204 }
205 }
206
207 #[test]
208 fn sampling_alternatives_bug() {
209 let rs = vec![
210 CharRange::new(2u32, 5u32),
214 CharRange::new(3u32, 6u32),
215 CharRange::new(1u32, 4u32),
216 ];
217
218 let n = rs.len();
220 let mut builder = ReBuilder::default();
221 let rs = rs.into_iter().map(|r| builder.range(r)).collect();
222 let regex = builder.union(rs);
223
224 if n > 0 {
225 assert!(sample_regex(®ex, &mut builder, 1, false).is_some());
226 } else {
227 assert!(sample_regex(®ex, &mut builder, 10, false).is_none());
228 }
229 }
230
231 #[quickcheck]
232 fn sample_opt(r: CharRange) {
233 let mut builder = ReBuilder::default();
234 let r = builder.range(r);
235 let regex = builder.opt(r);
236
237 assert!(sample_regex(®ex, &mut builder, 0, false).is_some());
238 assert!(sample_regex(®ex, &mut builder, 1, false).is_some());
239 }
240
241 #[test]
242 fn sample_empty_string() {
243 let mut builder = ReBuilder::default();
244 let regex = builder.epsilon();
245
246 assert!(sample_regex(®ex, &mut builder, 0, false).is_some());
247 }
248
249 #[test]
250 fn sample_empty_regex() {
251 let mut builder = ReBuilder::default();
252 let regex = builder.none();
253
254 assert!(sample_regex(®ex, &mut builder, 0, false).is_none());
255 assert!(sample_regex(®ex, &mut builder, 20, false).is_none());
256 }
257
258 #[test]
259 fn sample_all() {
260 let mut builder = ReBuilder::default();
261 let regex = builder.all();
262
263 assert!(sample_regex(®ex, &mut builder, 0, false).is_some());
264 assert!(sample_regex(®ex, &mut builder, 20, false).is_some());
265 }
266
267 #[test]
268 fn sample_any() {
269 let mut builder = ReBuilder::default();
270 let regex = builder.allchar();
271
272 assert!(sample_regex(®ex, &mut builder, 0, false).is_none());
273 assert!(sample_regex(®ex, &mut builder, 20, false).is_some());
274 }
275
276 #[test]
277 fn test_sample_nfa_accepts_word() {
278 let mut nfa = NFA::new();
279 let q0 = nfa.new_state();
280 let q1 = nfa.new_state();
281
282 nfa.set_initial(q0).unwrap();
283 nfa.add_final(q1).unwrap();
284
285 nfa.add_transition(q0, q1, TransitionType::Range(CharRange::new('a', 'a')))
286 .unwrap();
287
288 let sample = sample_nfa(&nfa, 10, false);
289 assert_eq!(sample, Some(SmtString::from("a")));
290 }
291
292 #[test]
293 fn test_sample_nfa_rejects_unreachable_final_state() {
294 let mut nfa = NFA::new();
295 let q0 = nfa.new_state();
296 let q1 = nfa.new_state(); nfa.set_initial(q0).unwrap();
299 nfa.add_final(q1).unwrap();
300
301 let sample = sample_nfa(&nfa, 10, false);
302 assert_eq!(sample, None);
303 }
304
305 #[test]
306 fn test_sample_nfa_handles_epsilon_transitions() {
307 let mut nfa = NFA::new();
308 let q0 = nfa.new_state();
309 let q1 = nfa.new_state();
310 let q2 = nfa.new_state();
311
312 nfa.set_initial(q0).unwrap();
313 nfa.add_final(q2).unwrap();
314
315 nfa.add_transition(q0, q1, TransitionType::Epsilon).unwrap();
316 nfa.add_transition(q1, q2, TransitionType::Range(CharRange::new('b', 'b')))
317 .unwrap();
318
319 let sample = sample_nfa(&nfa, 10, false);
320 assert_eq!(sample, Some(SmtString::from("b")));
321 }
322
323 #[test]
324 fn test_sample_nfa_stops_at_max_depth() {
325 let mut nfa = NFA::new();
326 let q0 = nfa.new_state();
327 let q1 = nfa.new_state();
328 let q2 = nfa.new_state();
329
330 nfa.set_initial(q0).unwrap();
331 nfa.add_final(q2).unwrap();
332
333 nfa.add_transition(q0, q1, TransitionType::Range(CharRange::new('a', 'z')))
335 .unwrap();
336 nfa.add_transition(q1, q2, TransitionType::Range(CharRange::new('a', 'z')))
337 .unwrap();
338
339 let sample = sample_nfa(&nfa, 1, false); assert_eq!(sample, None); }
342
343 #[test]
344 fn test_sample_nfa_handles_not_range_transitions() {
345 let mut nfa = NFA::new();
346 let q0 = nfa.new_state();
347 let q1 = nfa.new_state();
348
349 nfa.set_initial(q0).unwrap();
350 nfa.add_final(q1).unwrap();
351
352 nfa.add_transition(q0, q1, TransitionType::NotRange(CharRange::new('x', 'z')))
353 .unwrap();
354
355 let sample = sample_nfa(&nfa, 10, false);
356 assert!(sample.is_some()); if let Some(word) = sample {
358 assert!(
359 !word.contains_char('x') && !word.contains_char('y') && !word.contains_char('z')
360 );
361 }
362 }
363
364 #[test]
365 fn test_sample_nfa_multiple_paths() {
366 let mut nfa = NFA::new();
367 let q0 = nfa.new_state();
368 let q1 = nfa.new_state();
369 let q2 = nfa.new_state();
370 let q3 = nfa.new_state();
371
372 nfa.set_initial(q0).unwrap();
373 nfa.add_final(q3).unwrap();
374
375 nfa.add_transition(q0, q1, TransitionType::Range(CharRange::new('a', 'a')))
376 .unwrap();
377 nfa.add_transition(q1, q3, TransitionType::Range(CharRange::new('b', 'b')))
378 .unwrap();
379 nfa.add_transition(q0, q2, TransitionType::Range(CharRange::new('x', 'x')))
380 .unwrap();
381 nfa.add_transition(q2, q3, TransitionType::Range(CharRange::new('y', 'y')))
382 .unwrap();
383
384 let sample = sample_nfa(&nfa, 10, false);
385 assert!(sample == Some(SmtString::from("ab")) || sample == Some(SmtString::from("xy")));
386 }
387
388 #[test]
389 fn test_sample_nfa_leaves_loops() {
390 let mut nfa = NFA::new();
391 let q0 = nfa.new_state();
392 let q1 = nfa.new_state();
393
394 nfa.set_initial(q0).unwrap();
395 nfa.add_final(q1).unwrap();
396
397 nfa.add_transition(q0, q0, TransitionType::Range(CharRange::singleton('a')))
398 .unwrap();
399 nfa.add_transition(q0, q1, TransitionType::Range(CharRange::singleton('b')))
400 .unwrap();
401
402 match sample_nfa(&nfa, 100, false) {
403 Some(w) => {
404 let l = w.len();
405 let mut expected = SmtString::from("a").repeat(l - 1);
406 expected.push('b');
407 assert_eq!(w, expected);
408 }
409 None => unreachable!("Sample should not return None"),
410 }
411 }
412}