smt_str/
sampling.rs

1use bit_set::BitSet;
2
3use rand::{rng, seq::IteratorRandom};
4
5use crate::{
6    automata::{TransitionType, NFA},
7    re::{deriv::DerivativeBuilder, ReBuilder, Regex},
8    SmtString,
9};
10
11/// Tries to sample a word that is accepted by the regex.
12/// The function aborts if no word is found after `max_depth` steps.
13/// If `comp` is set to `true`, the function will return a word that is not accepted by the regex.
14/// In other words, the function will sample a word from the complement of the regex's language.
15pub fn sample_regex(
16    regex: &Regex,
17    builder: &mut ReBuilder,
18    max_depth: usize,
19    comp: bool,
20) -> Option<SmtString> {
21    let mut w = SmtString::empty();
22    let mut deriver = DerivativeBuilder::default();
23
24    let mut i = 0;
25    let mut re = regex.clone();
26
27    let done = |re: &Regex| {
28        if comp {
29            !re.nullable()
30        } else {
31            re.nullable()
32        }
33    };
34
35    if done(&re) {
36        return Some(w);
37    }
38
39    while !done(&re) && i < max_depth {
40        let next = re
41            .first()
42            .iter()
43            .choose(&mut rng())
44            .and_then(|c| c.choose())?;
45        w.push(next);
46        re = deriver.deriv(&re, next, builder);
47        i += 1;
48    }
49
50    if done(&re) {
51        Some(w)
52    } else {
53        None
54    }
55}
56
57/// Tries to sample a word that is accepted or not accepted by the NFA.
58/// Randomly picks transitions to follow until a final state is reached.
59/// Once a final state is reached, the function returns the word that was sampled.
60/// The function aborts and returns `None` if no word is found after `max_depth` transitions.
61/// If `comp` is set to `true`, the function will return a word that is not accepted by the NFA.
62/// In other words, the function will sample a word from the complement of the NFA's language.
63pub fn sample_nfa(nfa: &NFA, max: usize, comp: bool) -> Option<SmtString> {
64    let mut w = SmtString::empty();
65
66    let mut states = BitSet::new();
67    if let Some(q0) = nfa.initial() {
68        states = BitSet::from_iter(nfa.epsilon_closure(q0).unwrap());
69    }
70    let mut i = 0;
71
72    let done = |s: &BitSet| {
73        if comp {
74            !s.iter().any(|q| nfa.is_final(q))
75        } else {
76            s.iter().any(|q| nfa.is_final(q))
77        }
78    };
79
80    while i <= max {
81        i += 1;
82        // Check if the current state set contains a final state
83        if done(&states) {
84            return Some(w);
85        }
86
87        // Collect all transitions from the current state set
88        let mut transitions = Vec::new();
89        for q in states.iter() {
90            transitions.extend(nfa.transitions_from(q).unwrap());
91        }
92        // Pick a random transition
93        let transition = transitions.iter().choose(&mut rng())?;
94        // Pick a random character from the transition
95        let c = match transition.get_type() {
96            TransitionType::Range(r) => r.choose(),
97            TransitionType::NotRange(nr) => {
98                let r = nr.complement();
99                r.into_iter()
100                    .filter(|r| !r.is_empty())
101                    .choose(&mut rng())
102                    .and_then(|r| r.choose())
103            }
104            TransitionType::Epsilon => None,
105        };
106        match c {
107            Some(c) => {
108                w.push(c);
109                // set the next state set to the epsilon closure of the destination state
110                states = BitSet::from_iter(nfa.epsilon_closure(transition.get_dest()).unwrap());
111            }
112            None => continue,
113        }
114    }
115
116    None
117}
118
119#[cfg(test)]
120mod tests {
121
122    use quickcheck_macros::quickcheck;
123    use smallvec::smallvec;
124
125    use crate::alphabet::CharRange;
126
127    use super::*;
128
129    #[test]
130    fn sample_const() {
131        let mut builder = ReBuilder::default();
132        let regex = builder.to_re("foo".into());
133
134        assert_eq!(
135            sample_regex(&regex, &mut builder, 3, false),
136            Some("foo".into())
137        );
138        assert_eq!(
139            sample_regex(&regex, &mut builder, 10, false),
140            Some("foo".into())
141        );
142        assert_eq!(sample_regex(&regex, &mut builder, 2, false), None);
143    }
144
145    #[test]
146    fn sample_with_optional_characters() {
147        let mut builder = ReBuilder::default();
148
149        // fo(o|bar)
150        let o = builder.to_re("o".into());
151        let fo = builder.to_re("fo".into());
152        let bar = builder.to_re("bar".into());
153        let o_or_bar = builder.union(smallvec![o, bar]);
154        let regex = builder.concat(smallvec![fo, o_or_bar]);
155
156        // Test matching "foo"
157        assert!(sample_regex(&regex, &mut builder, 5, false).is_some());
158    }
159
160    #[quickcheck]
161    fn sample_with_character_range(range: CharRange) {
162        let mut builder = ReBuilder::default();
163        let regex = builder.range(range);
164
165        assert!(sample_regex(&regex, &mut builder, 1, false).is_some());
166        // Test matching word within the class
167        assert!(sample_regex(&regex, &mut builder, 3, false).is_some());
168    }
169
170    #[quickcheck]
171    fn sample_character_range(range: CharRange) {
172        let mut builder = ReBuilder::default();
173        let regex = builder.range(range);
174
175        assert!(sample_regex(&regex, &mut builder, 1, false).is_some());
176        // Test matching word within the class
177        assert!(sample_regex(&regex, &mut builder, 3, false).is_some());
178    }
179
180    #[quickcheck]
181    fn sample_character_range_pow(range: CharRange, n: u32) {
182        let n = n % 100;
183        let mut builder = ReBuilder::default();
184        let regex = builder.range(range);
185        let regex = builder.pow(regex, n);
186
187        for i in 0..n {
188            assert!(sample_regex(&regex, &mut builder, i as usize, false).is_none());
189        }
190        assert!(sample_regex(&regex, &mut builder, n as usize, false).is_some());
191    }
192
193    #[quickcheck]
194    fn sample_alternatives(rs: Vec<CharRange>) {
195        let n = rs.len();
196        let mut builder = ReBuilder::default();
197        let rs = rs.into_iter().map(|r| builder.range(r)).collect();
198        let regex = builder.union(rs);
199
200        if n > 0 {
201            assert!(sample_regex(&regex, &mut builder, 1, false).is_some());
202        } else {
203            assert!(sample_regex(&regex, &mut builder, 10, false).is_none());
204        }
205    }
206
207    #[test]
208    fn sampling_alternatives_bug() {
209        let rs = vec![
210            //CharRange::new(76887, 179877),
211            //CharRange::new(142686, 186533),
212            //CharRange::new(51684, 146039),
213            CharRange::new(2u32, 5u32),
214            CharRange::new(3u32, 6u32),
215            CharRange::new(1u32, 4u32),
216        ];
217
218        //  CharRange  CharRange { start: SmtChar(51684), end: SmtChar(146039) }])]
219        let n = rs.len();
220        let mut builder = ReBuilder::default();
221        let rs = rs.into_iter().map(|r| builder.range(r)).collect();
222        let regex = builder.union(rs);
223
224        if n > 0 {
225            assert!(sample_regex(&regex, &mut builder, 1, false).is_some());
226        } else {
227            assert!(sample_regex(&regex, &mut builder, 10, false).is_none());
228        }
229    }
230
231    #[quickcheck]
232    fn sample_opt(r: CharRange) {
233        let mut builder = ReBuilder::default();
234        let r = builder.range(r);
235        let regex = builder.opt(r);
236
237        assert!(sample_regex(&regex, &mut builder, 0, false).is_some());
238        assert!(sample_regex(&regex, &mut builder, 1, false).is_some());
239    }
240
241    #[test]
242    fn sample_empty_string() {
243        let mut builder = ReBuilder::default();
244        let regex = builder.epsilon();
245
246        assert!(sample_regex(&regex, &mut builder, 0, false).is_some());
247    }
248
249    #[test]
250    fn sample_empty_regex() {
251        let mut builder = ReBuilder::default();
252        let regex = builder.none();
253
254        assert!(sample_regex(&regex, &mut builder, 0, false).is_none());
255        assert!(sample_regex(&regex, &mut builder, 20, false).is_none());
256    }
257
258    #[test]
259    fn sample_all() {
260        let mut builder = ReBuilder::default();
261        let regex = builder.all();
262
263        assert!(sample_regex(&regex, &mut builder, 0, false).is_some());
264        assert!(sample_regex(&regex, &mut builder, 20, false).is_some());
265    }
266
267    #[test]
268    fn sample_any() {
269        let mut builder = ReBuilder::default();
270        let regex = builder.allchar();
271
272        assert!(sample_regex(&regex, &mut builder, 0, false).is_none());
273        assert!(sample_regex(&regex, &mut builder, 20, false).is_some());
274    }
275
276    #[test]
277    fn test_sample_nfa_accepts_word() {
278        let mut nfa = NFA::new();
279        let q0 = nfa.new_state();
280        let q1 = nfa.new_state();
281
282        nfa.set_initial(q0).unwrap();
283        nfa.add_final(q1).unwrap();
284
285        nfa.add_transition(q0, q1, TransitionType::Range(CharRange::new('a', 'a')))
286            .unwrap();
287
288        let sample = sample_nfa(&nfa, 10, false);
289        assert_eq!(sample, Some(SmtString::from("a")));
290    }
291
292    #[test]
293    fn test_sample_nfa_rejects_unreachable_final_state() {
294        let mut nfa = NFA::new();
295        let q0 = nfa.new_state();
296        let q1 = nfa.new_state(); // Final state, but not reachable
297
298        nfa.set_initial(q0).unwrap();
299        nfa.add_final(q1).unwrap();
300
301        let sample = sample_nfa(&nfa, 10, false);
302        assert_eq!(sample, None);
303    }
304
305    #[test]
306    fn test_sample_nfa_handles_epsilon_transitions() {
307        let mut nfa = NFA::new();
308        let q0 = nfa.new_state();
309        let q1 = nfa.new_state();
310        let q2 = nfa.new_state();
311
312        nfa.set_initial(q0).unwrap();
313        nfa.add_final(q2).unwrap();
314
315        nfa.add_transition(q0, q1, TransitionType::Epsilon).unwrap();
316        nfa.add_transition(q1, q2, TransitionType::Range(CharRange::new('b', 'b')))
317            .unwrap();
318
319        let sample = sample_nfa(&nfa, 10, false);
320        assert_eq!(sample, Some(SmtString::from("b")));
321    }
322
323    #[test]
324    fn test_sample_nfa_stops_at_max_depth() {
325        let mut nfa = NFA::new();
326        let q0 = nfa.new_state();
327        let q1 = nfa.new_state();
328        let q2 = nfa.new_state();
329
330        nfa.set_initial(q0).unwrap();
331        nfa.add_final(q2).unwrap();
332
333        // Large range transition that makes random sampling harder
334        nfa.add_transition(q0, q1, TransitionType::Range(CharRange::new('a', 'z')))
335            .unwrap();
336        nfa.add_transition(q1, q2, TransitionType::Range(CharRange::new('a', 'z')))
337            .unwrap();
338
339        let sample = sample_nfa(&nfa, 1, false); // Very low max depth
340        assert_eq!(sample, None); // Should not reach q2 in one step
341    }
342
343    #[test]
344    fn test_sample_nfa_handles_not_range_transitions() {
345        let mut nfa = NFA::new();
346        let q0 = nfa.new_state();
347        let q1 = nfa.new_state();
348
349        nfa.set_initial(q0).unwrap();
350        nfa.add_final(q1).unwrap();
351
352        nfa.add_transition(q0, q1, TransitionType::NotRange(CharRange::new('x', 'z')))
353            .unwrap();
354
355        let sample = sample_nfa(&nfa, 10, false);
356        assert!(sample.is_some()); // Should produce a valid word
357        if let Some(word) = sample {
358            assert!(
359                !word.contains_char('x') && !word.contains_char('y') && !word.contains_char('z')
360            );
361        }
362    }
363
364    #[test]
365    fn test_sample_nfa_multiple_paths() {
366        let mut nfa = NFA::new();
367        let q0 = nfa.new_state();
368        let q1 = nfa.new_state();
369        let q2 = nfa.new_state();
370        let q3 = nfa.new_state();
371
372        nfa.set_initial(q0).unwrap();
373        nfa.add_final(q3).unwrap();
374
375        nfa.add_transition(q0, q1, TransitionType::Range(CharRange::new('a', 'a')))
376            .unwrap();
377        nfa.add_transition(q1, q3, TransitionType::Range(CharRange::new('b', 'b')))
378            .unwrap();
379        nfa.add_transition(q0, q2, TransitionType::Range(CharRange::new('x', 'x')))
380            .unwrap();
381        nfa.add_transition(q2, q3, TransitionType::Range(CharRange::new('y', 'y')))
382            .unwrap();
383
384        let sample = sample_nfa(&nfa, 10, false);
385        assert!(sample == Some(SmtString::from("ab")) || sample == Some(SmtString::from("xy")));
386    }
387
388    #[test]
389    fn test_sample_nfa_leaves_loops() {
390        let mut nfa = NFA::new();
391        let q0 = nfa.new_state();
392        let q1 = nfa.new_state();
393
394        nfa.set_initial(q0).unwrap();
395        nfa.add_final(q1).unwrap();
396
397        nfa.add_transition(q0, q0, TransitionType::Range(CharRange::singleton('a')))
398            .unwrap();
399        nfa.add_transition(q0, q1, TransitionType::Range(CharRange::singleton('b')))
400            .unwrap();
401
402        match sample_nfa(&nfa, 100, false) {
403            Some(w) => {
404                let l = w.len();
405                let mut expected = SmtString::from("a").repeat(l - 1);
406                expected.push('b');
407                assert_eq!(w, expected);
408            }
409            None => unreachable!("Sample should not return None"),
410        }
411    }
412}