ib_matcher/syntax/regex/hir/
literal.rs

1use regex_syntax::hir::Hir;
2
3pub use regex_syntax::hir::literal::*;
4
5pub fn extract_first_byte(hirs: &[Hir]) -> Option<u8> {
6    let mut extractor = Extractor::new();
7    extractor
8        .kind(ExtractKind::Prefix)
9        .limit_class(1)
10        .limit_repeat(1)
11        .limit_literal_len(1)
12        .limit_total(2);
13
14    let mut prefixes = Seq::empty();
15    for hir in hirs {
16        prefixes.union(&mut extractor.extract(hir));
17    }
18    #[cfg(test)]
19    println!(
20        "prefixes (len={:?}, exact={:?}) extracted: {:?}",
21        prefixes.len(),
22        prefixes.is_exact(),
23        prefixes
24    );
25
26    prefixes
27        .literals()
28        .filter(|l| {
29            // 0: empty hirs, >1: many hirs
30            l.len() == 1
31        })
32        .and_then(|l| {
33            let l = unsafe { l.get_unchecked(0) };
34            // May be ""
35            debug_assert!(l.as_bytes().len() <= 1);
36            l.as_bytes().first().copied()
37        })
38}
39
40#[cfg(test)]
41mod tests {
42    use regex_syntax::{hir::Look, parse};
43
44    use super::*;
45
46    #[test]
47    fn extract_first_byte_test() {
48        assert_eq!(extract_first_byte(&[]), None);
49        assert_eq!(extract_first_byte(&[parse("").unwrap()]), None);
50        assert_eq!(extract_first_byte(&[parse("a").unwrap()]), Some(b'a'));
51        assert_eq!(extract_first_byte(&[parse("a|ab").unwrap()]), Some(b'a'));
52        assert_eq!(
53            extract_first_byte(&[parse("a|ab|abc|aki|azki|ahegao").unwrap()]),
54            Some(b'a')
55        );
56        assert_eq!(
57            extract_first_byte(&[parse("a{3}|ab|abc").unwrap()]),
58            Some(b'a')
59        );
60        assert_eq!(extract_first_byte(&[parse("a|b").unwrap()]), None);
61        assert_eq!(
62            extract_first_byte(&[parse("(a|(ab))").unwrap()]),
63            Some(b'a')
64        );
65
66        assert_eq!(
67            extract_first_byte(&[Hir::concat(vec![
68                Hir::look(Look::StartCRLF),
69                Hir::literal("foo".as_bytes()),
70                Hir::look(Look::EndCRLF),
71            ])]),
72            Some(b'f')
73        );
74        assert_eq!(
75            extract_first_byte(&[
76                Hir::concat(vec![
77                    Hir::look(Look::StartCRLF),
78                    Hir::literal("foo".as_bytes()),
79                    Hir::look(Look::EndCRLF),
80                ]),
81                Hir::concat(vec![
82                    Hir::look(Look::StartCRLF),
83                    Hir::literal("bar".as_bytes()),
84                    Hir::look(Look::EndCRLF),
85                ])
86            ]),
87            None
88        );
89        assert_eq!(
90            extract_first_byte(&[
91                Hir::concat(vec![
92                    Hir::look(Look::StartCRLF),
93                    Hir::literal("foo".as_bytes()),
94                    Hir::look(Look::EndCRLF),
95                ]),
96                Hir::concat(vec![
97                    Hir::look(Look::StartCRLF),
98                    Hir::literal("bar".as_bytes()),
99                    Hir::look(Look::EndCRLF),
100                ]),
101                Hir::concat(vec![
102                    Hir::look(Look::StartCRLF),
103                    Hir::literal("far".as_bytes()),
104                    Hir::look(Look::EndCRLF),
105                ])
106            ]),
107            None
108        );
109        assert_eq!(
110            extract_first_byte(&[
111                Hir::concat(vec![
112                    Hir::look(Look::StartCRLF),
113                    Hir::literal("foo".as_bytes()),
114                    Hir::look(Look::EndCRLF),
115                ]),
116                Hir::concat(vec![
117                    Hir::look(Look::StartCRLF),
118                    Hir::literal("far".as_bytes()),
119                    Hir::look(Look::EndCRLF),
120                ]),
121                Hir::concat(vec![
122                    Hir::look(Look::StartCRLF),
123                    Hir::literal("far".as_bytes()),
124                    Hir::look(Look::EndCRLF),
125                ])
126            ]),
127            Some(b'f')
128        );
129    }
130}