1use std::{iter::Peekable, str::Chars};
7
8mod ast;
9mod compile;
10mod matches;
11mod vm;
12
13pub use matches::{Match, MatchIter};
14pub use vm::Regex;
15
16#[derive(Debug, Clone, PartialEq, Eq)]
18pub enum Error {
19 EmptyParens,
21 EmptyRegex,
23 InvalidClass,
25 InvalidEscape(char),
27 InvalidRepetition,
29 ReTooLong,
31 TooManyParens,
33 UnbalancedAlt,
35 UnbalancedParens,
37 UnclosedGroupName(String),
39 UnknownGroupQualifier(char),
41}
42
43const fn char_ix(ch: char) -> usize {
45 ((ch as u16) & 0xFF) as usize
46}
47
48const fn init_escapes() -> [Option<char>; 256] {
49 macro_rules! escape {
50 ($escapes:expr, $($ch:expr),+) => {
51 $($escapes[char_ix($ch)] = Some($ch);)+
52 };
53 ($escapes:expr, $($ch:expr => $esc:expr),+) => {
54 $($escapes[char_ix($ch)] = Some($esc);)+
55 };
56 }
57
58 let mut escapes = [None; 256];
59 escape!(escapes, '*', '+', '?', '.', '@', '(', ')', '[', ']', '{', '}', '|');
60 escape!(escapes, '\\', '\'', '"', '^', '$', '-');
61 escape!(escapes, 'b', 'B', 'd', 'D', 'w', 'W', 's', 'S');
62 escape!(escapes, 'n'=>'\n', 'r'=>'\r', 't'=>'\t');
63
64 escapes
65}
66
67const ESCAPES: [Option<char>; 256] = init_escapes();
69
70#[derive(Debug, Clone, PartialEq, Eq)]
71struct CharClass {
72 negated: bool,
73 chars: Vec<char>,
74 ranges: Vec<(char, char)>,
75}
76
77impl CharClass {
78 fn try_parse(it: &mut Peekable<Chars<'_>>) -> Result<Self, Error> {
79 let mut next = || next_char(it)?.ok_or(Error::InvalidClass);
80 let (mut ch, _) = next()?;
81
82 let negated = ch == '^';
83 if negated {
84 (ch, _) = next()?
85 };
86 let mut chars = vec![ch];
87 let mut ranges = vec![];
88
89 loop {
90 let (ch, escaped) = next()?;
91 match ch {
92 ']' if !escaped => break,
93
94 '-' if !escaped => {
95 let start = chars.pop().ok_or(Error::InvalidClass)?;
96 let (end, _) = next()?;
97 ranges.push((start, end));
98 }
99
100 ch => chars.push(ch),
101 }
102 }
103
104 Ok(Self {
105 negated,
106 chars,
107 ranges,
108 })
109 }
110
111 #[inline]
113 fn matches(&self, ch: char) -> bool {
114 if self.negated && ch == '\n' {
115 return false;
116 }
117
118 let res = self.chars.contains(&ch)
119 || self
120 .ranges
121 .iter()
122 .any(|&(start, end)| ch >= start && ch <= end);
123
124 if self.negated {
125 !res
126 } else {
127 res
128 }
129 }
130}
131
132fn next_char(it: &mut Peekable<Chars<'_>>) -> Result<Option<(char, bool)>, Error> {
133 match it.next() {
134 Some('\\') => (),
135 Some(ch) => return Ok(Some((ch, false))),
136 None => return Ok(None),
137 }
138
139 let ch = match it.next() {
140 Some(ch) => ch,
141 None => return Err(Error::InvalidEscape('\0')),
142 };
143
144 match ESCAPES[char_ix(ch)] {
145 Some(ch) => Ok(Some((ch, true))),
146 None => Err(Error::InvalidEscape(ch)),
147 }
148}
149
150#[cfg(test)]
151mod tests {
152 use super::*;
153 use simple_test_case::test_case;
154
155 #[test_case("_", &['_'], &[]; "single underscore")]
156 #[test_case("abc_", &['a', 'b', 'c', '_'], &[]; "chars")]
157 #[test_case("a-z", &[], &[('a', 'z')]; "single range")]
158 #[test_case("a-zA-Z", &[], &[('a', 'z'), ('A', 'Z')]; "multi range")]
159 #[test_case("a-z_./]", &['_', '.', '/'], &[('a', 'z')]; "compound")]
160 #[test_case("a-zA-Z_\\-.]", &['_', '-', '.'], &[('a', 'z'), ('A', 'Z')]; "compound escaped dash")]
161 #[test]
162 fn parsing_classes_works(raw: &str, chars: &[char], ranges: &[(char, char)]) {
163 for (s, negated) in [(format!("{raw}]"), false), (format!("^{raw}]"), true)] {
166 let cls = CharClass::try_parse(&mut s.chars().peekable()).unwrap();
167 let expected = CharClass {
168 negated,
169 chars: chars.to_vec(),
170 ranges: ranges.to_vec(),
171 };
172
173 assert_eq!(cls, expected, "negated={negated}");
174 }
175 }
176}