1use iregex::automata::RangeSet;
8use replace_with::replace_with_or_abort;
9use std::ops::Deref;
10
11mod parsing;
12pub use parsing::*;
13
14mod display;
15pub use display::*;
16
17mod build;
18
19#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
21pub struct Ast {
22 pub start_anchor: bool,
23 pub end_anchor: bool,
24 pub disjunction: Disjunction,
25}
26
27impl Ast {
28 pub fn empty() -> Self {
29 Self {
30 start_anchor: false,
31 end_anchor: false,
32 disjunction: Disjunction::new(),
33 }
34 }
35
36 pub fn is_empty(&self) -> bool {
37 self.disjunction.is_empty()
38 }
39
40 }
75
76#[derive(Debug, Default, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
78pub struct Disjunction(Vec<Sequence>);
79
80impl Disjunction {
81 pub fn new() -> Self {
82 Self::default()
83 }
84}
85
86impl Deref for Disjunction {
87 type Target = [Sequence];
88
89 fn deref(&self) -> &Self::Target {
90 self.0.as_slice()
91 }
92}
93
94impl<'a> IntoIterator for &'a Disjunction {
95 type IntoIter = std::slice::Iter<'a, Sequence>;
96 type Item = &'a Sequence;
97
98 fn into_iter(self) -> Self::IntoIter {
99 self.0.iter()
100 }
101}
102
103impl IntoIterator for Disjunction {
104 type IntoIter = std::vec::IntoIter<Sequence>;
105 type Item = Sequence;
106
107 fn into_iter(self) -> Self::IntoIter {
108 self.0.into_iter()
109 }
110}
111
112impl From<Sequence> for Disjunction {
113 fn from(value: Sequence) -> Self {
114 Self(vec![value])
115 }
116}
117
118#[derive(Debug, Default, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
120pub struct Sequence(Vec<Atom>);
121
122impl Sequence {
123 pub fn new() -> Self {
124 Self::default()
125 }
126
127 pub fn push(&mut self, atom: Atom) {
128 self.0.push(atom)
129 }
130
131 pub fn into_disjunction(self) -> Disjunction {
132 self.into()
133 }
134}
135
136impl Deref for Sequence {
137 type Target = [Atom];
138
139 fn deref(&self) -> &Self::Target {
140 self.0.as_slice()
141 }
142}
143
144impl FromIterator<Atom> for Sequence {
145 fn from_iter<T: IntoIterator<Item = Atom>>(iter: T) -> Self {
146 Self(Vec::from_iter(iter))
147 }
148}
149
150impl<'a> IntoIterator for &'a Sequence {
151 type IntoIter = std::slice::Iter<'a, Atom>;
152 type Item = &'a Atom;
153
154 fn into_iter(self) -> Self::IntoIter {
155 self.0.iter()
156 }
157}
158
159impl IntoIterator for Sequence {
160 type IntoIter = std::vec::IntoIter<Atom>;
161 type Item = Atom;
162
163 fn into_iter(self) -> Self::IntoIter {
164 self.0.into_iter()
165 }
166}
167
168#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
169pub enum Atom {
170 Any,
174
175 Char(char),
177
178 Set(Charset),
182
183 Repeat(Box<Self>, Repeat),
185
186 Group(Disjunction),
188}
189
190impl Atom {
191 pub fn repeat(&mut self, r: Repeat) {
192 replace_with_or_abort(self, |this| Self::Repeat(Box::new(this), r))
193 }
194}
195
196#[derive(Debug, Default, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
197pub struct Charset {
198 negative: bool,
199 classes: Classes,
200 set: RangeSet<char>,
201}
202
203impl From<RangeSet<char>> for Charset {
204 fn from(value: RangeSet<char>) -> Self {
205 Self {
206 negative: false,
207 classes: Classes::none(),
208 set: value,
209 }
210 }
211}
212
213macro_rules! classes {
214 ($($id:ident: $name:literal ($flag:ident: $flag_value:literal)),*) => {
215 $(const $flag: u16 = $flag_value;)*
216
217 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
218 pub enum Class {
219 $($id),*
220 }
221
222 impl Class {
223 pub fn from_name(name: &str) -> Option<Self> {
224 match name {
225 $($name => Some(Self::$id),)*
226 _ => None
227 }
228 }
229
230 pub fn name(&self) -> &'static str {
231 match self {
232 $(Self::$id => $name),*
233 }
234 }
235
236 fn flag(&self) -> u16 {
237 match self {
238 $(Self::$id => $flag),*
239 }
240 }
241 }
242
243 #[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
244 pub struct Classes(u16);
245
246 impl Classes {
247 pub fn none() -> Self {
248 Self(0)
249 }
250
251 pub fn all() -> Self {
252 Self($($flag)|*)
253 }
254
255 pub fn contains(&self, c: Class) -> bool {
256 self.0 & c.flag() != 0
257 }
258
259 pub fn insert(&mut self, c: Class) {
260 self.0 |= c.flag()
261 }
262
263 pub fn iter(&self) -> ClassesIter {
264 ClassesIter(self.0)
265 }
266 }
267
268 pub struct ClassesIter(u16);
269
270 impl Iterator for ClassesIter {
271 type Item = Class;
272
273 fn next(&mut self) -> Option<Class> {
274 $(
275 if self.0 & $flag != 0 {
276 self.0 &= !$flag;
277 return Some(Class::$id)
278 }
279 )*
280
281 None
282 }
283 }
284 };
285}
286
287impl IntoIterator for Classes {
288 type IntoIter = ClassesIter;
289 type Item = Class;
290
291 fn into_iter(self) -> Self::IntoIter {
292 self.iter()
293 }
294}
295
296impl<'a> IntoIterator for &'a Classes {
297 type IntoIter = ClassesIter;
298 type Item = Class;
299
300 fn into_iter(self) -> Self::IntoIter {
301 self.iter()
302 }
303}
304
305classes! {
306 Upper: "upper" (CLASS_UPPER: 0b0000000000001),
307 Lower: "lower" (CLASS_LOWER: 0b0000000000010),
308 Alpha: "alpha" (CLASS_ALPHA: 0b0000000000100),
309 Alnum: "alnum" (CLASS_ALNUM: 0b0000000001000),
310 Digit: "digit" (CLASS_DIGIT: 0b0000000010000),
311 Xdigit: "xdigit" (CLASS_XDIGIT: 0b0000000100000),
312 Punct: "punct" (CLASS_PUNCT: 0b0000001000000),
313 Blank: "blank" (CLASS_BLANK: 0b0000100000000),
314 Space: "space" (CLASS_SPACE: 0b0001000000000),
315 Cntrl: "cntrl" (CLASS_CNTRL: 0b0010000000000),
316 Graph: "graph" (CLASS_GRAPH: 0b0100000000000),
317 Print: "print" (CLASS_PRINT: 0b1000000000000)
318}
319
320#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
321pub struct Repeat {
322 pub min: u32,
323 pub max: Option<u32>,
324}
325
326#[cfg(test)]
327mod tests {
328 use iregex::automata::nfa::U32StateBuilder;
329
330 use crate::Ast;
331
332 #[test]
333 fn test1() {
334 let ast = Ast::parse("^#([^\n#][^\n]*)?$".chars()).unwrap();
335 let exp = ast.build();
336 let aut = exp.compile(U32StateBuilder::new()).unwrap();
337
338 assert!(aut.matches_str("#").next().is_some());
339 assert!(aut.matches_str("#a").next().is_some());
340 assert!(aut.matches_str("##").next().is_none());
341
342 }
361}