regex_anre/
process.rs

1// Copyright (c) 2024 Hemashushu <hippospark@gmail.com>, All rights reserved.
2//
3// This Source Code Form is subject to the terms of
4// the Mozilla Public License version 2.0 and additional exceptions,
5// more details in file LICENSE, LICENSE.additional and CONTRIBUTING.
6
7use std::ops::{Index, Range};
8
9use crate::{
10    compiler::{compile_from_anre, compile_from_regex},
11    AnreError,
12    instance::{Instance, Thread},
13    route::{Route, MAIN_LINE_INDEX},
14    transition::CheckResult,
15    utf8reader::read_char,
16};
17
18pub struct Regex {
19    pub route: Route,
20}
21
22impl Regex {
23    pub fn new(pattern: &str) -> Result<Self, AnreError> {
24        let route = compile_from_regex(pattern)?;
25        Ok(Regex { route })
26    }
27
28    pub fn from_anre(expression: &str) -> Result<Self, AnreError> {
29        let route = compile_from_anre(expression)?;
30        Ok(Regex { route })
31    }
32
33    pub fn find<'a, 'b>(&'a self, text: &'b str) -> Option<Match<'a, 'b>> {
34        let bytes = text.as_bytes();
35        let number_of_capture_groups = self.route.capture_groups.len();
36        let mut instance = Instance::from_bytes(bytes, number_of_capture_groups);
37
38        if !instance.exec(&self.route, 0) {
39            return None;
40        }
41
42        let match_range = &instance.match_ranges[0];
43        let match_ = Match::new(
44            match_range.start,
45            match_range.end,
46            self.route.get_capture_group_name_by_index(0),
47            sub_string(bytes, match_range.start, match_range.end),
48        );
49
50        Some(match_)
51    }
52
53    pub fn find_iter<'a, 'b>(&'a self, text: &'b str) -> Matches<'a, 'b> {
54        let bytes = text.as_bytes();
55        let number_of_capture_groups = self.route.capture_groups.len();
56        let instance = Instance::from_bytes(bytes, number_of_capture_groups);
57
58        Matches::new(&self.route, instance)
59    }
60
61    pub fn captures<'a, 'b>(&'a self, text: &'b str) -> Option<Captures<'a, 'b>> {
62        let bytes = text.as_bytes();
63        let number_of_capture_groups = self.route.capture_groups.len();
64        let mut instance = Instance::from_bytes(bytes, number_of_capture_groups);
65
66        if !instance.exec(&self.route, 0) {
67            return None;
68        }
69
70        let matches: Vec<Match> = instance
71            .match_ranges
72            .iter()
73            .enumerate()
74            .map(|(idx, match_range)| {
75                Match::new(
76                    match_range.start,
77                    match_range.end,
78                    self.route.get_capture_group_name_by_index(idx),
79                    sub_string(bytes, match_range.start, match_range.end),
80                )
81            })
82            .collect();
83
84        Some(Captures { matches })
85    }
86
87    pub fn captures_iter<'a, 'b>(&'a self, text: &'b str) -> CaptureMatches<'a, 'b> {
88        let bytes = text.as_bytes();
89        let number_of_capture_groups = self.route.capture_groups.len();
90        let instance = Instance::from_bytes(bytes, number_of_capture_groups);
91
92        CaptureMatches::new(&self.route, instance)
93    }
94
95    pub fn is_match(&self, text: &str) -> bool {
96        let bytes = text.as_bytes();
97        let number_of_capture_groups = self.route.capture_groups.len();
98        let mut instance = Instance::from_bytes(bytes, number_of_capture_groups);
99
100        instance.exec(&self.route, 0)
101    }
102}
103
104pub struct CaptureMatches<'a, 'b> {
105    route: &'a Route,
106    instance: Instance<'b>,
107    last_position: usize,
108}
109
110impl<'a, 'b> CaptureMatches<'a, 'b> {
111    fn new(route: &'a Route, instance: Instance<'b>) -> Self {
112        CaptureMatches {
113            route,
114            instance,
115            last_position: 0,
116        }
117    }
118}
119
120impl<'a, 'b> Iterator for CaptureMatches<'a, 'b> {
121    type Item = Captures<'a, 'b>;
122
123    fn next(&mut self) -> Option<Self::Item> {
124        if !self.instance.exec(self.route, self.last_position) {
125            return None;
126        }
127
128        let matches: Vec<Match> = self
129            .instance
130            .match_ranges
131            .iter()
132            .enumerate()
133            .map(|(idx, match_range)| {
134                Match::new(
135                    match_range.start,
136                    match_range.end,
137                    self.route.get_capture_group_name_by_index(idx),
138                    sub_string(self.instance.bytes, match_range.start, match_range.end),
139                )
140            })
141            .collect();
142
143        self.last_position = matches[0].end;
144
145        Some(Captures { matches })
146    }
147}
148
149pub struct Matches<'a, 'b> {
150    route: &'a Route,
151    instance: Instance<'b>,
152    last_position: usize,
153}
154
155impl<'a, 'b> Matches<'a, 'b> {
156    fn new(route: &'a Route, instance: Instance<'b>) -> Self {
157        Matches {
158            route,
159            instance,
160            last_position: 0,
161        }
162    }
163}
164
165impl<'a, 'b> Iterator for Matches<'a, 'b> {
166    type Item = Match<'a, 'b>;
167
168    fn next(&mut self) -> Option<Self::Item> {
169        if !self.instance.exec(self.route, self.last_position) {
170            return None;
171        }
172
173        let match_range = &self.instance.match_ranges[0];
174        let match_ = Match::new(
175            match_range.start,
176            match_range.end,
177            self.route.get_capture_group_name_by_index(0),
178            sub_string(self.instance.bytes, match_range.start, match_range.end),
179        );
180
181        self.last_position = match_.end;
182
183        Some(match_)
184    }
185}
186
187impl Instance<'_> {
188    pub fn exec(&mut self, route: &Route, start: usize) -> bool {
189        let end = self.bytes.len();
190        new_thread(self, route, MAIN_LINE_INDEX, start, end)
191    }
192}
193
194#[derive(Debug, PartialEq, Clone)]
195pub struct Captures<'a, 'b> {
196    pub matches: Vec<Match<'a, 'b>>,
197}
198
199impl Captures<'_, '_> {
200    // the following methods are intended to
201    // be compatible with the 'Captures' API of crate 'regex':
202    // https://docs.rs/regex/latest/regex/struct.Captures.html
203
204    pub fn get(&self, index: usize) -> Option<&Match> {
205        self.matches.get(index)
206    }
207
208    pub fn name(&self, name: &str) -> Option<&Match> {
209        // Option<Match> {
210        self.matches.iter().find(|item| match item.name {
211            Some(s) => s == name,
212            None => false,
213        })
214    }
215
216    // e.g.
217    //
218    // ```
219    //   let c = re.find("...").next().unwrap();
220    //   let (whole, [one, two, three]) = c.extract();
221    // ```
222    pub fn extract<const N: usize>(&self) -> (&str, [&str; N]) {
223        let mut items: [&str; N] = [""; N];
224        for (idx, item) in items.iter_mut().enumerate() {
225            *item = self.matches[idx + 1].value;
226        }
227        (self.matches[0].value, items)
228    }
229
230    pub fn len(&self) -> usize {
231        self.matches.len()
232    }
233
234    pub fn is_empty(&self) -> bool {
235        self.len() == 0
236    }
237}
238
239impl Index<usize> for Captures<'_, '_> {
240    type Output = str;
241
242    fn index(&self, index: usize) -> &Self::Output {
243        self.get(index)
244            .unwrap_or_else(|| panic!(
245                "Index {} is out of range of the capture group and the length of capture groups is {}.",
246                index, self.len()))
247            .as_str()
248    }
249}
250
251impl Index<&str> for Captures<'_, '_> {
252    type Output = str;
253
254    fn index(&self, name: &str) -> &Self::Output {
255        self.name(name)
256            .unwrap_or_else(|| panic!("Cannot find the capture group named \"{}\".", name))
257            .as_str()
258    }
259}
260
261#[derive(Debug, PartialEq, Clone)]
262pub struct Match<'a, 'b> {
263    pub start: usize, // the position of utf-8 byte stream (value included)
264    pub end: usize,   // the position of utf-8 byte stream (value excluded)
265    pub name: Option<&'a str>,
266    pub value: &'b str,
267}
268
269impl<'a, 'b> Match<'a, 'b> {
270    pub fn new(start: usize, end: usize, name: Option<&'a str>, value: &'b str) -> Self {
271        Match {
272            start,
273            end,
274            name,
275            value,
276        }
277    }
278
279    // the following methods are intended to
280    // be compatible with the 'Match' API of crate 'regex':
281    // https://docs.rs/regex/latest/regex/struct.Match.html
282
283    pub fn start(&self) -> usize {
284        self.start
285    }
286
287    pub fn end(&self) -> usize {
288        self.end
289    }
290
291    pub fn is_empty(&self) -> bool {
292        self.len() == 0
293    }
294
295    pub fn len(&self) -> usize {
296        self.end - self.start
297    }
298
299    pub fn range(&self) -> Range<usize> {
300        Range {
301            start: self.start,
302            end: self.end,
303        }
304    }
305
306    pub fn as_str(&self) -> &'b str {
307        self.value
308    }
309}
310
311fn sub_string(bytes: &[u8], start: usize, end_excluded: usize) -> &str {
312    /*
313     * convert Vec<char> into String:
314     * `let s:String = chars.iter().collect()`
315     * or
316     * `let s = String::from_iter(&chars)`
317     */
318    let slice = &bytes[start..end_excluded];
319    core::str::from_utf8(slice).unwrap()
320}
321
322pub fn new_thread(
323    instance: &mut Instance,
324    route: &Route,
325    line_index: usize,
326    mut start: usize,
327    end: usize,
328) -> bool {
329    let thread = Thread::new(start, end, line_index);
330    instance.threads.push(thread);
331
332    let mut result = false;
333    while start < end {
334        if run_thread(instance, route, start) {
335            result = true;
336            break;
337        }
338
339        if route.lines[line_index].fixed_start_position {
340            break;
341        }
342
343        // move forward one character and try again
344        let (_, byte_length) = read_char(instance.bytes, start);
345        start += byte_length;
346    }
347
348    instance.threads.pop();
349    result
350}
351
352fn run_thread(instance: &mut Instance, route: &Route, position: usize) -> bool {
353    let (line_index, entry_node_index, exit_node_index) = {
354        let thread = instance.get_current_thread_ref();
355        let line_index = thread.line_index;
356        let line = &route.lines[line_index];
357        (line_index, line.start_node_index, line.end_node_index)
358    };
359
360    // add transitions of the entry node
361    instance.append_transition_stack_frames_by_node(route, entry_node_index, position, 0);
362
363    let mut result = false;
364
365    while let Some(frame) = instance.get_current_thread_ref_mut().transition_stack.pop() {
366        // get the transition
367        let line = &route.lines[line_index];
368        let node = &line.nodes[frame.current_node_index];
369        let transition_item = &node.transition_items[frame.transition_index];
370
371        let position = frame.position;
372        let last_repetition_count = frame.repetition_count;
373        let transition = &transition_item.transition;
374        let target_node_index = transition_item.target_node_index;
375
376        let check_result = transition.check(instance, route, position, last_repetition_count);
377        match check_result {
378            CheckResult::Success(move_forward, current_repetition_count) => {
379                if target_node_index == exit_node_index {
380                    result = true;
381                    break;
382                }
383
384                instance.append_transition_stack_frames_by_node(
385                    route,
386                    target_node_index,
387                    position + move_forward,
388                    current_repetition_count,
389                );
390            }
391            CheckResult::Failure => {
392                // check next transition
393            }
394        }
395    }
396
397    result
398}
399
400#[cfg(test)]
401mod tests {
402    use super::{Captures, Match, Regex};
403    use pretty_assertions::assert_eq;
404
405    fn new_match(start: usize, end: usize, value: &str) -> Match {
406        Match::new(start, end, None, value)
407    }
408
409    fn new_captures<'a, 'b>(
410        mes: &'a [(
411            /*start:*/ usize,
412            /*end:*/ usize,
413            /*name:*/ Option<&'a str>,
414            /*value:*/ &'b str,
415        )],
416    ) -> Captures<'a, 'b> {
417        let matches: Vec<Match> = mes
418            .iter()
419            .map(|item| Match::new(item.0, item.1, item.2, item.3))
420            .collect();
421
422        Captures { matches }
423    }
424
425    fn generate_res(anre: &str, regex: &str) -> [Regex; 2] {
426        [Regex::from_anre(anre).unwrap(), Regex::new(regex).unwrap()]
427    }
428
429    #[test]
430    fn test_process_char() {
431        // exists in the middle and at the end of the text
432        for re in generate_res(
433            "'a'", // anre
434            "a",   // regex
435        ) {
436            let mut matches = re.find_iter("babbaa");
437
438            assert_eq!(matches.next(), Some(new_match(1, 2, "a")));
439            assert_eq!(matches.next(), Some(new_match(4, 5, "a")));
440            assert_eq!(matches.next(), Some(new_match(5, 6, "a")));
441            assert_eq!(matches.next(), None);
442        }
443
444        // exists in the middle and at the beginning of the text
445        for re in generate_res(
446            "'a'", // anre
447            "a",   // regex
448        ) {
449            let mut matches = re.find_iter("abaabb");
450
451            assert_eq!(matches.next(), Some(new_match(0, 1, "a")));
452            assert_eq!(matches.next(), Some(new_match(2, 3, "a")));
453            assert_eq!(matches.next(), Some(new_match(3, 4, "a")));
454            assert_eq!(matches.next(), None);
455        }
456
457        // non-existent
458        for re in generate_res(
459            "'a'", // anre
460            "a",   // regex
461        ) {
462            let mut matches = re.find_iter("xyz");
463
464            assert_eq!(matches.next(), None);
465        }
466    }
467
468    #[test]
469    fn test_process_char_with_utf8() {
470        // existent
471        for re in generate_res(
472            "'文'", // anre
473            "文",   // regex
474        ) {
475            let mut matches = re.find_iter("abc中文字符文字🌏人文");
476
477            assert_eq!(matches.next(), Some(new_match(6, 9, "文")));
478            assert_eq!(matches.next(), Some(new_match(15, 18, "文")));
479            assert_eq!(matches.next(), Some(new_match(28, 31, "文")));
480            assert_eq!(matches.next(), None);
481        }
482
483        // non-existent
484        for re in generate_res(
485            "'文'", // anre
486            "文",   // regex
487        ) {
488            let mut matches = re.find_iter("abc正则表达式🌏改");
489
490            assert_eq!(matches.next(), None);
491        }
492    }
493
494    #[test]
495    fn test_process_string() {
496        // existent
497        for re in generate_res(
498            r#""abc""#, // anre
499            r#"abc"#,   // regex
500        ) {
501            let text = "ababcbcabc";
502            let mut matches = re.find_iter(text);
503
504            assert_eq!(matches.next(), Some(new_match(2, 5, "abc")));
505            assert_eq!(matches.next(), Some(new_match(7, 10, "abc")));
506            assert_eq!(matches.next(), None);
507        }
508
509        // non-existent
510        for re in generate_res(
511            r#""abc""#, // anre
512            r#"abc"#,   // regex
513        ) {
514            let text = "uvwxyz";
515            let mut matches = re.find_iter(text);
516
517            assert_eq!(matches.next(), None);
518        }
519    }
520
521    #[test]
522    fn test_process_string_with_utf8() {
523        for re in generate_res(
524            r#""文字""#, // anre
525            r#"文字"#,   // regex
526        ) {
527            let text = "abc文字文本象形文字🎁表情文字";
528            let mut matches = re.find_iter(text);
529
530            assert_eq!(matches.next(), Some(new_match(3, 9, "文字")));
531            assert_eq!(matches.next(), Some(new_match(21, 27, "文字")));
532            assert_eq!(matches.next(), Some(new_match(37, 43, "文字")));
533            assert_eq!(matches.next(), None);
534        }
535    }
536
537    #[test]
538    fn test_process_preset_charset() {
539        for re in generate_res(
540            r#"char_word"#, // anre
541            r#"\w"#,        // regex
542        ) {
543            let text = "a*1**_ **";
544            //               "^ ^  ^   "
545            let mut matches = re.find_iter(text);
546            assert_eq!(matches.next(), Some(new_match(0, 1, "a")));
547            assert_eq!(matches.next(), Some(new_match(2, 3, "1")));
548            assert_eq!(matches.next(), Some(new_match(5, 6, "_")));
549            assert_eq!(matches.next(), None);
550        }
551
552        for re in generate_res(
553            r#"char_not_word"#, // anre
554            r#"\W"#,            // regex
555        ) {
556            let text = "!a@12 bc_";
557            //               "v v  v   "
558            let mut matches = re.find_iter(text);
559            assert_eq!(matches.next(), Some(new_match(0, 1, "!")));
560            assert_eq!(matches.next(), Some(new_match(2, 3, "@")));
561            assert_eq!(matches.next(), Some(new_match(5, 6, " ")));
562            assert_eq!(matches.next(), None);
563        }
564
565        for re in generate_res(
566            r#"char_digit"#, // anre
567            r#"\d"#,         // regex
568        ) {
569            let text = "1a2b_3de*";
570            //               "^ ^  ^   "
571            let mut matches = re.find_iter(text);
572            assert_eq!(matches.next(), Some(new_match(0, 1, "1")));
573            assert_eq!(matches.next(), Some(new_match(2, 3, "2")));
574            assert_eq!(matches.next(), Some(new_match(5, 6, "3")));
575            assert_eq!(matches.next(), None);
576        }
577
578        for re in generate_res(
579            r#"char_not_digit"#, // anre
580            r#"\D"#,             // regex
581        ) {
582            let text = "a1_23 456";
583            //               "v v  v   "
584            let mut matches = re.find_iter(text);
585            assert_eq!(matches.next(), Some(new_match(0, 1, "a")));
586            assert_eq!(matches.next(), Some(new_match(2, 3, "_")));
587            assert_eq!(matches.next(), Some(new_match(5, 6, " ")));
588            assert_eq!(matches.next(), None);
589        }
590
591        for re in generate_res(
592            r#"char_space"#, // anre
593            r#"\s"#,         // regex
594        ) {
595            let text = " 1\tab\n_*!";
596            //               "^ ^-  ^-   "
597            //                012 345 678
598            let mut matches = re.find_iter(text);
599            assert_eq!(matches.next(), Some(new_match(0, 1, " ")));
600            assert_eq!(matches.next(), Some(new_match(2, 3, "\t")));
601            assert_eq!(matches.next(), Some(new_match(5, 6, "\n")));
602            assert_eq!(matches.next(), None);
603        }
604
605        for re in generate_res(
606            r#"char_not_space"#, // anre
607            r#"\S"#,             // regex
608        ) {
609            let text = "a\t1\r\n*   ";
610            //               "v  v    v   "
611            //                01 23 4 5678
612            let mut matches = re.find_iter(text);
613            assert_eq!(matches.next(), Some(new_match(0, 1, "a")));
614            assert_eq!(matches.next(), Some(new_match(2, 3, "1")));
615            assert_eq!(matches.next(), Some(new_match(5, 6, "*")));
616            assert_eq!(matches.next(), None);
617        }
618    }
619
620    #[test]
621    fn test_process_charset() {
622        // chars
623        for re in generate_res(
624            r#"['a','b','c']"#, // anre
625            r#"[abc]"#,         // regex
626        ) {
627            let text = "adbefcghi";
628            //               "^ ^  ^   "
629            let mut matches = re.find_iter(text);
630            assert_eq!(matches.next(), Some(new_match(0, 1, "a")));
631            assert_eq!(matches.next(), Some(new_match(2, 3, "b")));
632            assert_eq!(matches.next(), Some(new_match(5, 6, "c")));
633            assert_eq!(matches.next(), None);
634        }
635
636        // negative
637        for re in generate_res(
638            r#"!['a','b','c']"#, // anre
639            r#"[^abc]"#,         // regex
640        ) {
641            let text = "xa1bb*ccc";
642            //               "v v  v   "
643            let mut matches = re.find_iter(text);
644            assert_eq!(matches.next(), Some(new_match(0, 1, "x")));
645            assert_eq!(matches.next(), Some(new_match(2, 3, "1")));
646            assert_eq!(matches.next(), Some(new_match(5, 6, "*")));
647            assert_eq!(matches.next(), None);
648        }
649
650        // range
651        for re in generate_res(
652            r#"['a'..'c']"#, // anre
653            r#"[a-c]"#,      // regex
654        ) {
655            let text = "adbefcghi";
656            //               "^ ^  ^   "
657            let mut matches = re.find_iter(text);
658            assert_eq!(matches.next(), Some(new_match(0, 1, "a")));
659            assert_eq!(matches.next(), Some(new_match(2, 3, "b")));
660            assert_eq!(matches.next(), Some(new_match(5, 6, "c")));
661            assert_eq!(matches.next(), None);
662        }
663
664        // negative
665        for re in generate_res(
666            r#"!['a'..'c']"#, // anre
667            r#"[^a-c]"#,      // regex
668        ) {
669            let text = "xa1bb*ccc";
670            //               "v v  v   "
671            let mut matches = re.find_iter(text);
672            assert_eq!(matches.next(), Some(new_match(0, 1, "x")));
673            assert_eq!(matches.next(), Some(new_match(2, 3, "1")));
674            assert_eq!(matches.next(), Some(new_match(5, 6, "*")));
675            assert_eq!(matches.next(), None);
676        }
677
678        // ranges
679        for re in generate_res(
680            r#"['a'..'f', '0'..'9']"#, // anre
681            r#"[a-f0-9]"#,             // regex
682        ) {
683            let text = "am1npfq*_";
684            //               "^ ^  ^   "
685            let mut matches = re.find_iter(text);
686            assert_eq!(matches.next(), Some(new_match(0, 1, "a")));
687            assert_eq!(matches.next(), Some(new_match(2, 3, "1")));
688            assert_eq!(matches.next(), Some(new_match(5, 6, "f")));
689            assert_eq!(matches.next(), None);
690        }
691
692        // negative
693        for re in generate_res(
694            r#"!['a'..'f', '0'..'9']"#, // anre
695            r#"[^a-f0-9]"#,             // regex
696        ) {
697            let text = "man12*def";
698            //               "v v  v   "
699            let mut matches = re.find_iter(text);
700            assert_eq!(matches.next(), Some(new_match(0, 1, "m")));
701            assert_eq!(matches.next(), Some(new_match(2, 3, "n")));
702            assert_eq!(matches.next(), Some(new_match(5, 6, "*")));
703            assert_eq!(matches.next(), None);
704        }
705
706        // combine range with preset
707        for re in generate_res(
708            r#"['a'..'f', char_digit]"#, // anre
709            r#"[a-f\d]"#,                // regex
710        ) {
711            let text = "am1npfq*_";
712            //               "^ ^  ^   "
713            let mut matches = re.find_iter(text);
714            assert_eq!(matches.next(), Some(new_match(0, 1, "a")));
715            assert_eq!(matches.next(), Some(new_match(2, 3, "1")));
716            assert_eq!(matches.next(), Some(new_match(5, 6, "f")));
717            assert_eq!(matches.next(), None);
718        }
719
720        // negative
721        for re in generate_res(
722            r#"!['a'..'f', char_digit]"#, // anre
723            r#"[^a-f\d]"#,                // regex
724        ) {
725            let text = "man12*def";
726            //               "v v  v   "
727            let mut matches = re.find_iter(text);
728            assert_eq!(matches.next(), Some(new_match(0, 1, "m")));
729            assert_eq!(matches.next(), Some(new_match(2, 3, "n")));
730            assert_eq!(matches.next(), Some(new_match(5, 6, "*")));
731            assert_eq!(matches.next(), None);
732        }
733
734        // nested
735        {
736            let re = Regex::from_anre("[['a','b','c','d'..'f'], ['0'..'8'], '9']").unwrap();
737            let text = "am1npfq*_";
738            //               "^ ^  ^   "
739            let mut matches = re.find_iter(text);
740            assert_eq!(matches.next(), Some(new_match(0, 1, "a")));
741            assert_eq!(matches.next(), Some(new_match(2, 3, "1")));
742            assert_eq!(matches.next(), Some(new_match(5, 6, "f")));
743            assert_eq!(matches.next(), None);
744        }
745
746        // negative
747        {
748            let re = Regex::from_anre("![['a','b','c','d'..'f'], ['0'..'8'], '9']").unwrap();
749            let text = "man12*def";
750            //               "v v  v   "
751            let mut matches = re.find_iter(text);
752            assert_eq!(matches.next(), Some(new_match(0, 1, "m")));
753            assert_eq!(matches.next(), Some(new_match(2, 3, "n")));
754            assert_eq!(matches.next(), Some(new_match(5, 6, "*")));
755            assert_eq!(matches.next(), None);
756        }
757    }
758
759    #[test]
760    fn test_process_charset_with_utf8() {
761        for re in generate_res(
762            r#"['文','字','🍅']"#, // anre
763            r#"[文字🍅]"#,         // regex
764        ) {
765            let text = "abc正文写字🍉宋体字体🍅测试🍋";
766            let mut matches = re.find_iter(text);
767
768            assert_eq!(matches.next(), Some(new_match(6, 9, "文")));
769            assert_eq!(matches.next(), Some(new_match(12, 15, "字")));
770            assert_eq!(matches.next(), Some(new_match(25, 28, "字")));
771            assert_eq!(matches.next(), Some(new_match(31, 35, "🍅")));
772            assert_eq!(matches.next(), None);
773        }
774
775        // negative
776        for re in generate_res(
777            r#"!['文','字','🍅']"#, // anre
778            r#"[^文字🍅]"#,         // regex
779        ) {
780            let text = "哦字文🍅文噢字🍅文文字字喔";
781            let mut matches = re.find_iter(text);
782
783            assert_eq!(matches.next(), Some(new_match(0, 3, "哦")));
784            assert_eq!(matches.next(), Some(new_match(16, 19, "噢")));
785            assert_eq!(matches.next(), Some(new_match(38, 41, "喔")));
786            assert_eq!(matches.next(), None);
787        }
788    }
789
790    #[test]
791    fn test_process_special_char() {
792        for re in generate_res(
793            r#"char_any"#, // anre
794            r#"."#,        // regex
795        ) {
796            let text = "\na\r\n1 \n";
797            //               "  ^    ^^  "
798            let mut matches = re.find_iter(text);
799
800            assert_eq!(matches.next(), Some(new_match(1, 2, "a")));
801            assert_eq!(matches.next(), Some(new_match(4, 5, "1")));
802            assert_eq!(matches.next(), Some(new_match(5, 6, " ")));
803            assert_eq!(matches.next(), None);
804        }
805    }
806
807    #[test]
808    fn test_process_group() {
809        // anre group = a sequence of patterns
810        for re in generate_res(
811            r#"'a', 'b', 'c'"#, // anre
812            r#"abc"#,           // regex
813        ) {
814            let text = "ababcbcabc";
815            let mut matches = re.find_iter(text);
816
817            assert_eq!(matches.next(), Some(new_match(2, 5, "abc")));
818            assert_eq!(matches.next(), Some(new_match(7, 10, "abc")));
819            assert_eq!(matches.next(), None);
820        }
821
822        for re in generate_res(
823            r#"'%', char_digit"#, // anre
824            r#"%\d"#,             // regex
825        ) {
826            let text = "0123%567%9";
827            let mut matches = re.find_iter(text);
828
829            assert_eq!(matches.next(), Some(new_match(4, 6, "%5")));
830            assert_eq!(matches.next(), Some(new_match(8, 10, "%9")));
831            assert_eq!(matches.next(), None);
832        }
833
834        for re in generate_res(
835            r#"['+','-'], ('%', char_digit)"#, // anre
836            r#"[+-](%\d)"#,                    // regex
837        ) {
838            let text = "%12+%56-%9";
839            let mut matches = re.find_iter(text);
840
841            assert_eq!(matches.next(), Some(new_match(3, 6, "+%5")));
842            assert_eq!(matches.next(), Some(new_match(7, 10, "-%9")));
843            assert_eq!(matches.next(), None);
844        }
845    }
846
847    #[test]
848    fn test_process_logic_or() {
849        // two operands
850        for re in generate_res(
851            r#"'a' || 'b'"#, // anre
852            r#"a|b"#,        // regex
853        ) {
854            let text = "012a45b7a9";
855            let mut matches = re.find_iter(text);
856
857            assert_eq!(matches.next(), Some(new_match(3, 4, "a")));
858            assert_eq!(matches.next(), Some(new_match(6, 7, "b")));
859            assert_eq!(matches.next(), Some(new_match(8, 9, "a")));
860            assert_eq!(matches.next(), None);
861        }
862
863        // three operands
864        for re in generate_res(
865            r#""abc" || "mn" || "xyz""#, // anre
866            r#"abc|mn|xyz"#,             // regex
867        ) {
868            let text = "aabcmmnnxyzz";
869            let mut matches = re.find_iter(text);
870
871            assert_eq!(matches.next(), Some(new_match(1, 4, "abc")));
872            assert_eq!(matches.next(), Some(new_match(5, 7, "mn")));
873            assert_eq!(matches.next(), Some(new_match(8, 11, "xyz")));
874            assert_eq!(matches.next(), None);
875        }
876    }
877
878    #[test]
879    fn test_process_start_and_end_assertion() {
880        for re in generate_res(
881            r#"start, 'a'"#, // anre
882            r#"^a"#,         // regex
883        ) {
884            let text = "ab";
885            let mut matches = re.find_iter(text);
886
887            assert_eq!(matches.next(), Some(new_match(0, 1, "a")));
888            assert_eq!(matches.next(), None);
889        }
890
891        for re in generate_res(
892            r#"'a', end"#, // anre
893            r#"a$"#,       // regex
894        ) {
895            let text = "ab";
896            let mut matches = re.find_iter(text);
897
898            assert_eq!(matches.next(), None);
899        }
900
901        for re in generate_res(
902            r#"start, 'a'"#, // anre
903            r#"^a"#,         // regex
904        ) {
905            let text = "ba";
906            let mut matches = re.find_iter(text);
907
908            assert_eq!(matches.next(), None);
909        }
910
911        for re in generate_res(
912            r#"'a', end"#, // anre
913            r#"a$"#,       // regex
914        ) {
915            let text = "ba";
916            let mut matches = re.find_iter(text);
917
918            assert_eq!(matches.next(), Some(new_match(1, 2, "a")));
919            assert_eq!(matches.next(), None);
920        }
921
922        // both 'start' and 'end'
923        for re in generate_res(
924            r#"start, 'a', end"#, // anre
925            r#"^a$"#,             // regex
926        ) {
927            let text = "a";
928            let mut matches = re.find_iter(text);
929
930            assert_eq!(matches.next(), Some(new_match(0, 1, "a")));
931            assert_eq!(matches.next(), None);
932        }
933
934        // both 'start' and 'end' - failed 1
935        for re in generate_res(
936            r#"start, 'a', end"#, // anre
937            r#"^a$"#,             // regex
938        ) {
939            let text = "ab";
940            let mut matches = re.find_iter(text);
941
942            assert_eq!(matches.next(), None);
943        }
944
945        // both 'start' and 'end' - failed 2
946        for re in generate_res(
947            r#"start, 'a', end"#, // anre
948            r#"^a$"#,             // regex
949        ) {
950            let text = "ba";
951            let mut matches = re.find_iter(text);
952
953            assert_eq!(matches.next(), None);
954        }
955    }
956
957    #[test]
958    fn test_process_boundary_assertion() {
959        // matching 'boundary + char'
960        for re in generate_res(
961            r#"is_bound, 'a'"#, // anre
962            r#"\ba"#,           // regex
963        ) {
964            let text = "ab";
965            let mut matches = re.find_iter(text);
966
967            assert_eq!(matches.next(), Some(new_match(0, 1, "a")));
968            assert_eq!(matches.next(), None);
969        }
970
971        for re in generate_res(
972            r#"is_bound, 'a'"#, // anre
973            r#"\ba"#,           // regex
974        ) {
975            let text = "a";
976            let mut matches = re.find_iter(text);
977
978            assert_eq!(matches.next(), Some(new_match(0, 1, "a")));
979            assert_eq!(matches.next(), None);
980        }
981
982        for re in generate_res(
983            r#"is_bound, 'a'"#, // anre
984            r#"\ba"#,           // regex
985        ) {
986            let text = " a";
987            let mut matches = re.find_iter(text);
988
989            assert_eq!(matches.next(), Some(new_match(1, 2, "a")));
990            assert_eq!(matches.next(), None);
991        }
992
993        for re in generate_res(
994            r#"is_bound, 'a'"#, // anre
995            r#"\ba"#,           // regex
996        ) {
997            let text = "ba";
998            let mut matches = re.find_iter(text);
999
1000            assert_eq!(matches.next(), None);
1001        }
1002
1003        // matching 'char + boundary'
1004        for re in generate_res(
1005            r#"'a', is_bound"#, // anre
1006            r#"a\b"#,           // regex
1007        ) {
1008            let text = "ba";
1009            let mut matches = re.find_iter(text);
1010
1011            assert_eq!(matches.next(), Some(new_match(1, 2, "a")));
1012            assert_eq!(matches.next(), None);
1013        }
1014
1015        for re in generate_res(
1016            r#"'a', is_bound"#, // anre
1017            r#"a\b"#,           // regex
1018        ) {
1019            let text = "a";
1020            let mut matches = re.find_iter(text);
1021
1022            assert_eq!(matches.next(), Some(new_match(0, 1, "a")));
1023            assert_eq!(matches.next(), None);
1024        }
1025
1026        for re in generate_res(
1027            r#"'a', is_bound"#, // anre
1028            r#"a\b"#,           // regex
1029        ) {
1030            let text = "a ";
1031            let mut matches = re.find_iter(text);
1032
1033            assert_eq!(matches.next(), Some(new_match(0, 1, "a")));
1034            assert_eq!(matches.next(), None);
1035        }
1036
1037        for re in generate_res(
1038            r#"'a', is_bound"#, // anre
1039            r#"a\b"#,           // regex
1040        ) {
1041            let text = "ab";
1042            let mut matches = re.find_iter(text);
1043
1044            assert_eq!(matches.next(), None);
1045        }
1046    }
1047
1048    #[test]
1049    fn test_process_optional() {
1050        // char optional
1051        for re in generate_res(
1052            r#"'a', 'b'?, 'c'"#, // anre
1053            r#"ab?c"#,           // regex
1054        ) {
1055            // let re = Regex::from_anre("'a', 'b'?, 'c'").unwrap();
1056            let text = "ababccbacabc";
1057            //               "  ^^^  ^^vvv"
1058            let mut matches = re.find_iter(text);
1059
1060            assert_eq!(matches.next(), Some(new_match(2, 5, "abc")));
1061            assert_eq!(matches.next(), Some(new_match(7, 9, "ac")));
1062            assert_eq!(matches.next(), Some(new_match(9, 12, "abc")));
1063            assert_eq!(matches.next(), None);
1064        }
1065
1066        // char optional - greedy
1067        for re in generate_res(
1068            r#"'a', 'b', 'c'?"#, // anre
1069            r#"abc?"#,           // regex
1070        ) {
1071            // let re = Regex::from_anre("'a', 'b', 'c'?").unwrap();
1072            let text = "abcabx";
1073            //               "^^^vv"
1074            let mut matches = re.find_iter(text);
1075
1076            assert_eq!(matches.next(), Some(new_match(0, 3, "abc")));
1077            assert_eq!(matches.next(), Some(new_match(3, 5, "ab")));
1078            assert_eq!(matches.next(), None);
1079        }
1080
1081        // char optional - lazy
1082        for re in generate_res(
1083            r#"'a', 'b', 'c'??"#, // anre
1084            r#"abc??"#,           // regex
1085        ) {
1086            // let re = Regex::from_anre("'a', 'b', 'c'??").unwrap();
1087            let text = "abcabx";
1088            //               "^^ ^^ "
1089            let mut matches = re.find_iter(text);
1090
1091            assert_eq!(matches.next(), Some(new_match(0, 2, "ab")));
1092            assert_eq!(matches.next(), Some(new_match(3, 5, "ab")));
1093            assert_eq!(matches.next(), None);
1094        }
1095
1096        // group optional
1097        for re in generate_res(
1098            r#"'a', ('b','c')?, 'd'"#, // anre
1099            r#"a(bc)?d"#,              // regex
1100        ) {
1101            // let re = Regex::from_anre("'a', ('b','c')?, 'd'").unwrap();
1102            let text = "abcabdacdabcdabacad";
1103            //               "         ^^^^    ^^"
1104            let mut matches = re.find_iter(text);
1105
1106            assert_eq!(matches.next(), Some(new_match(9, 13, "abcd")));
1107            assert_eq!(matches.next(), Some(new_match(17, 19, "ad")));
1108            assert_eq!(matches.next(), None);
1109        }
1110    }
1111
1112    #[test]
1113    fn test_process_repetition_specified() {
1114        // char repetition
1115        for re in generate_res(
1116            r#"'a'{3}"#, // anre
1117            r#"a{3}"#,   // regex
1118        ) {
1119            // let re = Regex::from_anre("'a'{3}").unwrap();
1120            let text = "abaabbaaabbbaaaaa";
1121            //               "      ^^^   ^^^  "
1122            let mut matches = re.find_iter(text);
1123
1124            assert_eq!(matches.next(), Some(new_match(6, 9, "aaa")));
1125            assert_eq!(matches.next(), Some(new_match(12, 15, "aaa")));
1126            assert_eq!(matches.next(), None);
1127        }
1128
1129        // charset repetition
1130        for re in generate_res(
1131            r#"char_digit{3}"#, // anre
1132            r#"\d{3}"#,         // regex
1133        ) {
1134            // let re = Regex::from_anre("char_digit{3}").unwrap();
1135            let text = "a1ab12abc123abcd1234";
1136            //               "         ^^^    ^^^ "
1137            let mut matches = re.find_iter(text);
1138
1139            assert_eq!(matches.next(), Some(new_match(9, 12, "123")));
1140            assert_eq!(matches.next(), Some(new_match(16, 19, "123")));
1141            assert_eq!(matches.next(), None);
1142        }
1143
1144        // group repetition
1145        for re in generate_res(
1146            r#"('a','b'){3}"#, // anre
1147            r#"(ab){3}"#,      // regex
1148        ) {
1149            // let re = Regex::from_anre("('a','b'){3}").unwrap();
1150            let text = "abbaababbaababababab";
1151            //               "          ^^^^^^    "
1152            let mut matches = re.find_iter(text);
1153
1154            assert_eq!(matches.next(), Some(new_match(10, 16, "ababab")));
1155            assert_eq!(matches.next(), None);
1156        }
1157
1158        // repetition + other pattern
1159        for re in generate_res(
1160            r#"'a'{2}, char_digit"#, // anre
1161            r#"a{2}\d"#,             // regex
1162        ) {
1163            // let re = Regex::from_anre("'a'{2}, char_digit").unwrap();
1164            let text = "abaabbaa1bb1aa123bb123a11b11";
1165            //               "      ^^^   ^^^             "
1166            let mut matches = re.find_iter(text);
1167
1168            assert_eq!(matches.next(), Some(new_match(6, 9, "aa1")));
1169            assert_eq!(matches.next(), Some(new_match(12, 15, "aa1")));
1170            assert_eq!(matches.next(), None);
1171        }
1172    }
1173
1174    #[test]
1175    fn test_process_repetition_range() {
1176        // char repetition
1177        for re in generate_res(
1178            r#"'a'{1,3}"#, // anre
1179            r#"a{1,3}"#,   // regex
1180        ) {
1181            // let re = Regex::from_anre("'a'{1,3}").unwrap();
1182            let text = "abaabbaaabbbaaaabbbb";
1183            //               "^ ^^  ^^^   ^^^v    "
1184            let mut matches = re.find_iter(text);
1185
1186            assert_eq!(matches.next(), Some(new_match(0, 1, "a")));
1187            assert_eq!(matches.next(), Some(new_match(2, 4, "aa")));
1188            assert_eq!(matches.next(), Some(new_match(6, 9, "aaa")));
1189            assert_eq!(matches.next(), Some(new_match(12, 15, "aaa")));
1190            assert_eq!(matches.next(), Some(new_match(15, 16, "a")));
1191            assert_eq!(matches.next(), None);
1192        }
1193
1194        // char repetition lazy
1195        for re in generate_res(
1196            r#"'a'{1,3}?"#, // anre
1197            r#"a{1,3}?"#,   // regex
1198        ) {
1199            // let re = Regex::from_anre("'a'{1,3}?").unwrap();
1200            let text = "abaabbaaabbbaaaabbbb";
1201            //               "^ ^v  ^v^   ^v^v    "
1202            let mut matches = re.find_iter(text);
1203
1204            assert_eq!(matches.next(), Some(new_match(0, 1, "a")));
1205            assert_eq!(matches.next(), Some(new_match(2, 3, "a")));
1206            assert_eq!(matches.next(), Some(new_match(3, 4, "a")));
1207            assert_eq!(matches.next(), Some(new_match(6, 7, "a")));
1208            assert_eq!(matches.next(), Some(new_match(7, 8, "a")));
1209            // omit the follow up
1210        }
1211
1212        // char repetition - to MAX
1213        for re in generate_res(
1214            r#"'a'{2,}"#, // anre
1215            r#"a{2,}"#,   // regex
1216        ) {
1217            // let re = Regex::from_anre("'a'{2,}").unwrap();
1218            let text = "abaabbaaabbbaaaabbbb";
1219            //               "  ^^  ^^^   ^^^^    "
1220            let mut matches = re.find_iter(text);
1221
1222            assert_eq!(matches.next(), Some(new_match(2, 4, "aa")));
1223            assert_eq!(matches.next(), Some(new_match(6, 9, "aaa")));
1224            assert_eq!(matches.next(), Some(new_match(12, 16, "aaaa")));
1225            assert_eq!(matches.next(), None);
1226        }
1227
1228        // char repetition - to MAX - lazy
1229        for re in generate_res(
1230            r#"'a'{2,}?"#, // anre
1231            r#"a{2,}?"#,   // regex
1232        ) {
1233            // let re = Regex::from_anre("'a'{2,}?").unwrap();
1234            let text = "abaabbaaabbbaaaabbbb";
1235            //               "  ^^  ^^    ^^vv    "
1236            let mut matches = re.find_iter(text);
1237
1238            assert_eq!(matches.next(), Some(new_match(2, 4, "aa")));
1239            assert_eq!(matches.next(), Some(new_match(6, 8, "aa")));
1240            assert_eq!(matches.next(), Some(new_match(12, 14, "aa")));
1241            assert_eq!(matches.next(), Some(new_match(14, 16, "aa")));
1242            assert_eq!(matches.next(), None);
1243        }
1244    }
1245
1246    #[test]
1247    fn test_process_optional_and_repetition_range() {
1248        // implicit
1249        for re in generate_res(
1250            r#"'a', 'b'{0,3}, 'c'"#, // anre
1251            r#"ab{0,3}c"#,           // regex
1252        ) {
1253            // let re = Regex::from_anre("'a', 'b'{0,3}, 'c'").unwrap();
1254            let text = "acaabcaabbcaabbbcaabbbbc";
1255            //               "^^ ^^^ ^^^^ ^^^^^       "
1256            let mut matches = re.find_iter(text);
1257
1258            assert_eq!(matches.next(), Some(new_match(0, 2, "ac")));
1259            assert_eq!(matches.next(), Some(new_match(3, 6, "abc")));
1260            assert_eq!(matches.next(), Some(new_match(7, 11, "abbc")));
1261            assert_eq!(matches.next(), Some(new_match(12, 17, "abbbc")));
1262            assert_eq!(matches.next(), None);
1263        }
1264
1265        // explicit
1266        for re in generate_res(
1267            r#"'a', ('b'{2,3})?, 'c'"#, // anre
1268            r#"a(b{2,3})?c"#,           // regex
1269        ) {
1270            // let re = Regex::from_anre("'a', ('b'{2,3})?, 'c'").unwrap();
1271            let text = "acaabcaabbcaabbbcaabbbbc";
1272            //               "^^     ^^^^ ^^^^^       "
1273            let mut matches = re.find_iter(text);
1274
1275            assert_eq!(matches.next(), Some(new_match(0, 2, "ac")));
1276            assert_eq!(matches.next(), Some(new_match(7, 11, "abbc")));
1277            assert_eq!(matches.next(), Some(new_match(12, 17, "abbbc")));
1278            assert_eq!(matches.next(), None);
1279        }
1280
1281        // repetition specified
1282        for re in generate_res(
1283            r#"'a', ('b'{2})?, 'c'"#, // anre
1284            r#"a(b{2})?c"#,           // regex
1285        ) {
1286            // let re = Regex::from_anre("'a', ('b'{2})?, 'c'").unwrap();
1287            let text = "acaabcaabbcaabbbcaabbbbc";
1288            //               "^^     ^^^^             "
1289            let mut matches = re.find_iter(text);
1290
1291            assert_eq!(matches.next(), Some(new_match(0, 2, "ac")));
1292            assert_eq!(matches.next(), Some(new_match(7, 11, "abbc")));
1293            assert_eq!(matches.next(), None);
1294        }
1295    }
1296
1297    #[test]
1298    fn test_process_repetition_char_any() {
1299        // repetition specified
1300        for re in generate_res(
1301            r#"char_any{3}"#, // anre
1302            r#".{3}"#,        // regex
1303        ) {
1304            // let re = Regex::from_anre("char_any{3}").unwrap();
1305            let text = "abcdefgh";
1306            //               "^^^vvv  "
1307            let mut matches = re.find_iter(text);
1308
1309            assert_eq!(matches.next(), Some(new_match(0, 3, "abc")));
1310            assert_eq!(matches.next(), Some(new_match(3, 6, "def")));
1311            assert_eq!(matches.next(), None);
1312        }
1313
1314        // repetition range - to MAX
1315        for re in generate_res(
1316            r#"char_any+"#, // anre
1317            r#".+"#,        // regex
1318        ) {
1319            // let re = Regex::from_anre("char_any+").unwrap();
1320            let text = "abcdefg";
1321            let mut matches = re.find_iter(text);
1322
1323            assert_eq!(matches.next(), Some(new_match(0, 7, "abcdefg")));
1324            assert_eq!(matches.next(), None);
1325        }
1326    }
1327
1328    #[test]
1329    fn test_process_repetition_backtracking() {
1330        // backtracking
1331        for re in generate_res(
1332            r#"start, 'a', char_any+, 'c'"#, // anre
1333            r#"^a.+c"#,                      // regex
1334        ) {
1335            // let re = Regex::from_anre("start, 'a', char_any+, 'c'").unwrap();
1336            let text = "abbcmn";
1337            //               "^^^^  "
1338            let mut matches = re.find_iter(text);
1339
1340            assert_eq!(matches.next(), Some(new_match(0, 4, "abbc")));
1341        }
1342
1343        // backtracking - failed
1344        // because there is no char between 'a' and 'c'
1345        for re in generate_res(
1346            r#"start, 'a', char_any+, 'c'"#, // anre
1347            r#"^a.+c"#,                      // regex
1348        ) {
1349            // let re = Regex::from_anre("start, 'a', char_any+, 'c'").unwrap();
1350            let text = "acmn";
1351            let mut matches = re.find_iter(text);
1352            assert_eq!(matches.next(), None);
1353        }
1354
1355        // backtracking - failed
1356        // because there is not enough char between 'a' and 'c'
1357        for re in generate_res(
1358            r#"start, 'a', char_any{3,}, 'c'"#, // anre
1359            r#"^a.{3,}c"#,                      // regex
1360        ) {
1361            // let re = Regex::from_anre("start, 'a', char_any{3,}, 'c'").unwrap();
1362            let text = "abbcmn";
1363            let mut matches = re.find_iter(text);
1364            assert_eq!(matches.next(), None);
1365        }
1366
1367        // lazy repetition - no backtracking
1368        for re in generate_res(
1369            r#"'a', char_any+?, 'c'"#, // anre
1370            r#"a.+?c"#,                // regex
1371        ) {
1372            // let re = Regex::from_anre("'a', char_any+?, 'c'").unwrap();
1373            let text = "abbcmn";
1374            //               "^^^^  "
1375            let mut matches = re.find_iter(text);
1376
1377            assert_eq!(matches.next(), Some(new_match(0, 4, "abbc")));
1378        }
1379
1380        // nested backtracking
1381        for re in generate_res(
1382            r#"start, 'a', char_any{2,}, 'c', char_any{2,}, 'e'"#, // anre
1383            r#"^a.{2,}c.{2,}e"#,                                   // regex
1384        ) {
1385            // let re = Regex::from_anre("start, 'a', char_any{2,}, 'c', char_any{2,}, 'e'").unwrap();
1386            let text = "a88c88ewwefg";
1387            let mut matches = re.find_iter(text);
1388            assert_eq!(matches.next(), Some(new_match(0, 10, "a88c88ewwe")));
1389            assert_eq!(matches.next(), None);
1390        }
1391    }
1392
1393    #[test]
1394    fn test_process_capture() {
1395        // index
1396        for re in generate_res(
1397            r#"("0x" || "0o" || "0b").index(), (char_digit+).index()"#, // anre
1398            r#"(0x|0o|0b)(\d+)"#,                                       // regex
1399        ) {
1400            // let re = Regex::from_anre(r#"("0x" || "0o" || "0b").index(), (char_digit+).index()"#)
1401            //     .unwrap();
1402            let text = "abc0x23def0o456xyz";
1403
1404            let mut matches = re.captures_iter(text);
1405
1406            assert_eq!(
1407                matches.next(),
1408                Some(new_captures(&[
1409                    (3, 7, None, "0x23"),
1410                    (3, 5, None, "0x"),
1411                    (5, 7, None, "23")
1412                ]))
1413            );
1414
1415            assert_eq!(
1416                matches.next(),
1417                Some(new_captures(&[
1418                    (10, 15, None, "0o456"),
1419                    (10, 12, None, "0o"),
1420                    (12, 15, None, "456")
1421                ]))
1422            );
1423        }
1424
1425        // named
1426        for re in generate_res(
1427            r#"("0x" || "0o" || "0b").name(prefix), (char_digit+).name(number)"#, // anre
1428            r#"(?<prefix>0x|0o|0b)(?<number>\d+)"#,                               // regex
1429        ) {
1430            // let re = Regex::from_anre(
1431            //     r#"("0x" || "0o" || "0b").name(prefix), (char_digit+).name(number)"#,
1432            // )
1433            // .unwrap();
1434            let text = "abc0x23def0o456xyz";
1435
1436            let mut matches = re.captures_iter(text);
1437
1438            assert_eq!(
1439                matches.next(),
1440                Some(new_captures(&[
1441                    (3, 7, None, "0x23"),
1442                    (3, 5, Some("prefix"), "0x"),
1443                    (5, 7, Some("number"), "23")
1444                ]))
1445            );
1446
1447            assert_eq!(
1448                matches.next(),
1449                Some(new_captures(&[
1450                    (10, 15, None, "0o456"),
1451                    (10, 12, Some("prefix"), "0o"),
1452                    (12, 15, Some("number"), "456")
1453                ]))
1454            );
1455        }
1456
1457        // named - by Regex::captures_iter(...)
1458        for re in generate_res(
1459            r#"("0x" || "0o" || "0b").name(prefix), (char_digit+).name(number)"#, // anre
1460            r#"(?<prefix>0x|0o|0b)(?<number>\d+)"#,                               // regex
1461        ) {
1462            // let re = Regex::from_anre(
1463            //     r#"("0x" || "0o" || "0b").name(prefix), (char_digit+).name(number)"#,
1464            // )
1465            // .unwrap();
1466            let text = "abc0x23def0o456xyz";
1467
1468            let mut matches = re.captures_iter(text);
1469            let one = matches.next().unwrap();
1470
1471            assert_eq!(one.len(), 3);
1472
1473            // test 'Captures::get'
1474            assert_eq!(one.get(0).unwrap().as_str(), "0x23");
1475            assert_eq!(one.get(1).unwrap().as_str(), "0x");
1476            assert_eq!(one.get(2).unwrap().as_str(), "23");
1477
1478            // test Captures number index trait
1479            assert_eq!(&one[0], "0x23");
1480            assert_eq!(&one[1], "0x");
1481            assert_eq!(&one[2], "23");
1482
1483            // test 'Captures::name'
1484            assert_eq!(one.name("prefix").unwrap().as_str(), "0x");
1485            assert_eq!(one.name("number").unwrap().as_str(), "23");
1486
1487            // test Captures str index trait
1488            assert_eq!(&one["prefix"], "0x");
1489            assert_eq!(&one["number"], "23");
1490
1491            // test 'Captures::extract()'
1492            assert_eq!(("0x23", ["0x", "23"]), one.extract());
1493        }
1494
1495        // named - by Regex::find_iter(...)
1496        for re in generate_res(
1497            r#"("0x" || "0o" || "0b").name(prefix), (char_digit+).name(number)"#, // anre
1498            r#"(?<prefix>0x|0o|0b)(?<number>\d+)"#,                               // regex
1499        ) {
1500            // let re = Regex::from_anre(
1501            //     r#"("0x" || "0o" || "0b").name(prefix), (char_digit+).name(number)"#,
1502            // )
1503            // .unwrap();
1504            let text = "abc0x23def0o456xyz";
1505
1506            let mut matches = re.find_iter(text);
1507            let one = matches.next().unwrap();
1508            let two = matches.next().unwrap();
1509
1510            assert_eq!(one.as_str(), "0x23");
1511            assert_eq!(one.range(), 3..7);
1512
1513            assert_eq!(two.as_str(), "0o456");
1514            assert_eq!(two.range(), 10..15);
1515        }
1516    }
1517
1518    #[test]
1519    fn test_process_backreference() {
1520        for re in generate_res(
1521            r#"
1522            ('<', (char_word+).name(tag_name), '>'),
1523            char_any+,
1524            ("</", tag_name, '>')
1525            "#, // anre
1526            r#"<(?<tag_name>\w+)>.+</\k<tag_name>>"#, // regex
1527        ) {
1528            // let re = Regex::from_anre(
1529            //     r#"
1530            // ('<', (char_word+).name(tag_name), '>'),
1531            // char_any+,
1532            // ("</", tag_name, '>')
1533            // "#,
1534            // )
1535            // .unwrap();
1536            let text = "zero<div>one<div>two</div>three</div>four";
1537            let mut matches = re.captures_iter(text);
1538
1539            assert_eq!(
1540                matches.next(),
1541                Some(new_captures(&[
1542                    (4, 37, None, "<div>one<div>two</div>three</div>"),
1543                    (5, 8, Some("tag_name"), "div")
1544                ]))
1545            );
1546        }
1547
1548        // backreference + lazy
1549        for re in generate_res(
1550            r#"
1551            ('<', (char_word+).name(tag_name), '>'),
1552            char_any+?,
1553            ("</", tag_name, '>')
1554            "#, // anre
1555            r#"<(?<tag_name>\w+)>.+?</\k<tag_name>>"#, // regex
1556        ) {
1557            // let re = Regex::from_anre(
1558            //     r#"
1559            // ('<', (char_word+).name(tag), '>'),
1560            // char_any+?,
1561            // ("</", tag, '>')
1562            // "#,
1563            // )
1564            // .unwrap();
1565            let text = "zero<div>one<div>two</div>three</div>four";
1566            let mut matches = re.captures_iter(text);
1567
1568            assert_eq!(
1569                matches.next(),
1570                Some(new_captures(&[
1571                    (4, 26, None, "<div>one<div>two</div>"),
1572                    (5, 8, Some("tag_name"), "div")
1573                ]))
1574            );
1575        }
1576    }
1577
1578    #[test]
1579    fn test_process_lookbehind() {
1580        for re in generate_res(
1581            r#"char_digit.is_after(['a'..'f'])"#, // anre
1582            r#"(?<=[a-f])\d"#,                    // regex
1583        ) {
1584            // let re = Regex::from_anre("char_digit.is_after(['a'..'f'])").unwrap();
1585            let text = "a1 22 f9 cc z3 b2";
1586            let mut matches = re.find_iter(text);
1587
1588            assert_eq!(matches.next(), Some(new_match(1, 2, "1")));
1589            assert_eq!(matches.next(), Some(new_match(7, 8, "9")));
1590            assert_eq!(matches.next(), Some(new_match(16, 17, "2")));
1591            assert_eq!(matches.next(), None);
1592        }
1593
1594        for re in generate_res(
1595            r#"
1596            [char_digit, 'a'..'f']
1597                .repeat(2)
1598                .is_after("0x")
1599            "#, // anre
1600            r#"(?<=0x)[\da-f]{2}"#, // regex
1601        ) {
1602            // let re = Regex::from_anre(
1603            //     r#"
1604            // [char_digit, 'a'..'f']
1605            //     .repeat(2)
1606            //     .is_after("0x")
1607            // "#,
1608            // )
1609            // .unwrap();
1610            let text = "13 0x17 0o19 0x23 29";
1611            let mut matches = re.find_iter(text);
1612
1613            assert_eq!(matches.next(), Some(new_match(5, 7, "17")));
1614            assert_eq!(matches.next(), Some(new_match(15, 17, "23")));
1615            assert_eq!(matches.next(), None);
1616        }
1617
1618        // negative
1619        for re in generate_res(
1620            r#"
1621            [char_digit, 'a'..'f']
1622                .repeat(2)
1623                .is_not_after("0x")
1624            "#, // anre
1625            r#"(?<!0x)[\da-f]{2}"#, // regex
1626        ) {
1627            // let re = Regex::from_anre(
1628            //     r#"
1629            // [char_digit, 'a'..'f']
1630            //     .repeat(2)
1631            //     .is_not_after("0x")
1632            // "#,
1633            // )
1634            // .unwrap();
1635            let text = "13 0x17 0o19 0x23 29";
1636            let mut matches = re.find_iter(text);
1637
1638            assert_eq!(matches.next(), Some(new_match(0, 2, "13")));
1639            assert_eq!(matches.next(), Some(new_match(10, 12, "19")));
1640            assert_eq!(matches.next(), Some(new_match(18, 20, "29")));
1641            assert_eq!(matches.next(), None);
1642        }
1643
1644        // `('a','c'.is_after('b'))` always fails because it is
1645        // NOT possible to be both 'a' and 'b' before 'c'.
1646        // in the same way,
1647        // `('c'.is_before('a'), 'b')` always fails because it is
1648        // impossible to be both 'a' and 'b' after 'c'.
1649        for re in generate_res(
1650            r#"
1651            'a','c'.is_after('b')
1652            "#, // anre
1653            r#"a(?<=b)c"#, // regex
1654        ) {
1655            // let re = Regex::from_anre("'a','c'.is_after('b')").unwrap();
1656            let text = "ac bc abc bac";
1657            let mut matches = re.find_iter(text);
1658            assert_eq!(matches.next(), None);
1659        }
1660    }
1661
1662    #[test]
1663    fn test_process_lookahead() {
1664        for re in generate_res(
1665            r#"is_bound, ['a'..'f'].is_before(char_digit)"#, // anre
1666            r#"\b[a-f](?=\d)"#,                              // regex
1667        ) {
1668            // let re = Regex::from_anre("is_bound, ['a'..'f'].is_before(char_digit)").unwrap();
1669            let text = "a1 22 f9 cc z3 b2";
1670            let mut matches = re.find_iter(text);
1671
1672            assert_eq!(matches.next(), Some(new_match(0, 1, "a")));
1673            assert_eq!(matches.next(), Some(new_match(6, 7, "f")));
1674            assert_eq!(matches.next(), Some(new_match(15, 16, "b")));
1675            assert_eq!(matches.next(), None);
1676        }
1677
1678        for re in generate_res(
1679            r#"
1680            is_bound
1681                ['a'..'z']
1682                    .at_least(2)
1683                    .is_before("ing" || "ed")
1684            "#, // anre
1685            r#"\b[a-z]{2,}(?=ing|ed)"#, // regex
1686        ) {
1687            // let re = Regex::from_anre(
1688            //     r#"
1689            //     is_bound
1690            //     ['a'..'z']
1691            //         .at_least(2)
1692            //         .is_before("ing" || "ed")
1693            //     "#,
1694            // )
1695            // .unwrap();
1696            let text = "jump jumping aaaabbbbing push pushed fork";
1697            let mut matches = re.find_iter(text);
1698
1699            assert_eq!(matches.next(), Some(new_match(5, 9, "jump")));
1700            assert_eq!(matches.next(), Some(new_match(13, 21, "aaaabbbb")));
1701            assert_eq!(matches.next(), Some(new_match(30, 34, "push")));
1702            assert_eq!(matches.next(), None);
1703        }
1704
1705        // negative
1706        for re in generate_res(
1707            r#"
1708                is_bound
1709                ['a'..'z']
1710                    .repeat(4)
1711                    .is_not_before("ing" || "ed")
1712            "#, // anre
1713            r#"\b[a-z]{4}(?!ing|ed)"#, // regex
1714        ) {
1715            // let re = Regex::from_anre(
1716            //     r#"
1717            //     is_bound
1718            //     ['a'..'z']
1719            //         .repeat(4)
1720            //         .is_not_before("ing" || "ed")
1721            //     "#,
1722            // )
1723            // .unwrap();
1724            let text = "jump jumping aaaabbbbing push pushed fork";
1725            let mut matches = re.find_iter(text);
1726
1727            assert_eq!(matches.next(), Some(new_match(0, 4, "jump")));
1728            assert_eq!(matches.next(), Some(new_match(13, 17, "aaaa")));
1729            assert_eq!(matches.next(), Some(new_match(25, 29, "push")));
1730            assert_eq!(matches.next(), Some(new_match(37, 41, "fork")));
1731            assert_eq!(matches.next(), None);
1732        }
1733
1734        // `('a','c'.is_after('b'))` always fails because it is
1735        // NOT possible to be both 'a' and 'b' before 'c'.
1736        // in the same way,
1737        // `('c'.is_before('a'), 'b')` always fails because it is
1738        // impossible to be both 'a' and 'b' after 'c'.
1739        for re in generate_res(
1740            r#"
1741            'c'.is_before('a'), 'b'
1742            "#, // anre
1743            r#"c(?=a)b"#, // regex
1744        ) {
1745            // let re = Regex::from_anre("'c'.is_before('a'), 'b'").unwrap();
1746            let text = "ca cb cab cba";
1747            let mut matches = re.find_iter(text);
1748            assert_eq!(matches.next(), None);
1749        }
1750    }
1751}