1use std::fmt::{Display, Write};
2
3use proc_macro2::TokenStream;
4use quote::{quote, ToTokens};
5
6pub struct ERE(pub(crate) Vec<EREBranch>);
9impl ERE {
10 fn take<'a>(rest: &'a str) -> Option<(&'a str, ERE)> {
11 let mut branches = Vec::new();
12 let (mut rest, branch) = EREBranch::take(rest)?;
13 branches.push(branch);
14 while let Some(new_rest) = rest.strip_prefix('|') {
15 rest = new_rest;
16 let Some((new_rest, branch)) = EREBranch::take(rest) else {
17 break;
18 };
19 rest = new_rest;
20 branches.push(branch);
21 }
22 return Some((rest, ERE(branches)));
23 }
24
25 pub fn parse_str(input: &str) -> Option<Self> {
26 let Some(("", ere)) = ERE::take(&input) else {
27 return None;
28 };
29 return Some(ere);
30 }
31}
32impl syn::parse::Parse for ERE {
33 fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
34 let literal: syn::LitStr = input.parse()?;
35 let string = literal.value();
36 return ERE::parse_str(&string).ok_or_else(|| {
37 syn::Error::new(
38 literal.span(),
39 "Failed to parse POSIX Extended Regex Expression",
40 )
41 });
42 }
43}
44impl Display for ERE {
45 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
46 let mut it = self.0.iter();
47 let Some(first) = it.next() else {
48 return Ok(());
49 };
50 write!(f, "{first}")?;
51 for part in it {
52 write!(f, "|{part}")?;
53 }
54 return Ok(());
55 }
56}
57
58pub(crate) struct EREBranch(pub(crate) Vec<EREPart>);
59impl EREBranch {
60 fn take<'a>(rest: &'a str) -> Option<(&'a str, EREBranch)> {
61 let mut parts = Vec::new();
62 let (mut rest, part) = EREPart::take(rest)?;
63 parts.push(part);
64 while let Some((new_rest, part)) = EREPart::take(rest) {
65 rest = new_rest;
66 parts.push(part);
67 }
68 return Some((rest, EREBranch(parts)));
69 }
70}
71impl Display for EREBranch {
72 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
73 for part in &self.0 {
74 write!(f, "{part}")?;
75 }
76 return Ok(());
77 }
78}
79
80pub(crate) enum EREPart {
81 Single(EREExpression),
82 Quantified(EREExpression, Quantifier),
83 Start,
84 End,
85}
86impl EREPart {
87 fn take<'a>(rest: &'a str) -> Option<(&'a str, EREPart)> {
88 if let Some(rest) = rest.strip_prefix('^') {
89 return Some((rest, EREPart::Start));
90 } else if let Some(rest) = rest.strip_prefix('$') {
91 return Some((rest, EREPart::End));
92 }
93
94 let (rest, expr) = if let Some(rest) = rest.strip_prefix('(') {
95 let (rest, ere) = ERE::take(rest)?;
96 let rest = rest.strip_prefix(')')?;
97 (rest, EREExpression::Subexpression(ere))
98 } else {
99 let (rest, atom) = Atom::take(rest)?;
100 (rest, EREExpression::Atom(atom))
101 };
102
103 let part = match Quantifier::take(rest) {
104 Some((rest, quantifier)) => (rest, EREPart::Quantified(expr, quantifier)),
105 None => (rest, EREPart::Single(expr)),
106 };
107 return Some(part);
108 }
109}
110impl Display for EREPart {
111 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
112 return match self {
113 EREPart::Single(expr) => write!(f, "{expr}"),
114 EREPart::Quantified(expr, quantifier) => write!(f, "{expr}{quantifier}"),
115 EREPart::Start => f.write_char('^'),
116 EREPart::End => f.write_char('$'),
117 };
118 }
119}
120
121pub(crate) enum EREExpression {
122 Atom(Atom),
123 Subexpression(ERE),
124}
125impl Display for EREExpression {
126 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
127 return match self {
128 EREExpression::Atom(atom) => write!(f, "{atom}"),
129 EREExpression::Subexpression(ere) => write!(f, "({ere})"),
130 };
131 }
132}
133
134#[derive(Debug, PartialEq, Eq, Clone, Copy)]
135pub(crate) enum Quantifier {
136 Star,
137 Plus,
138 QuestionMark,
139 Multiple(u32),
141 Range(u32, Option<u32>),
143}
144
145impl Quantifier {
146 #[inline]
148 const fn min(&self) -> u32 {
149 return match self {
150 Quantifier::Star => 0,
151 Quantifier::Plus => 1,
152 Quantifier::QuestionMark => 0,
153 Quantifier::Multiple(n) => *n,
154 Quantifier::Range(n, _) => *n,
155 };
156 }
157 #[inline]
159 const fn max(&self) -> Option<u32> {
160 return match self {
161 Quantifier::Star => None,
162 Quantifier::Plus => None,
163 Quantifier::QuestionMark => Some(1),
164 Quantifier::Multiple(n) => Some(*n),
165 Quantifier::Range(_, m) => *m,
166 };
167 }
168
169 #[inline]
170 fn take<'a>(rest: &'a str) -> Option<(&'a str, Quantifier)> {
171 let mut it = rest.chars();
172 match it.next() {
173 Some('*') => return Some((it.as_str(), Quantifier::Star)),
174 Some('+') => return Some((it.as_str(), Quantifier::Plus)),
175 Some('?') => return Some((it.as_str(), Quantifier::QuestionMark)),
176 Some('{') => {
177 let (inside, rest) = it.as_str().split_once('}')?;
178 match inside.split_once(',') {
179 None => return Some((rest, Quantifier::Multiple(inside.parse().ok()?))),
180 Some((min, "")) => {
181 return Some((rest, Quantifier::Range(min.parse().ok()?, None)))
182 }
183 Some((min, max)) => {
184 return Some((
185 rest,
186 Quantifier::Range(min.parse().ok()?, Some(max.parse().ok()?)),
187 ))
188 }
189 }
190 }
191 _ => return None,
192 }
193 }
194}
195impl Display for Quantifier {
196 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
197 return match self {
198 Quantifier::Star => f.write_char('*'),
199 Quantifier::Plus => f.write_char('+'),
200 Quantifier::QuestionMark => f.write_char('?'),
201 Quantifier::Multiple(n) => write!(f, "{{{n}}}"),
202 Quantifier::Range(n, None) => write!(f, "{{{n},}}"),
203 Quantifier::Range(n, Some(m)) => write!(f, "{{{n},{m}}}"),
204 };
205 }
206}
207
208#[derive(Debug, PartialEq, Eq, Clone, Copy)]
209pub enum CharClass {
210 Dot,
211}
212impl CharClass {
213 pub const fn check(&self, c: char) -> bool {
214 return match self {
215 CharClass::Dot => c != '\0',
216 };
217 }
218}
219impl Display for CharClass {
220 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
221 return match self {
222 CharClass::Dot => f.write_char('.'),
223 };
224 }
225}
226impl ToTokens for CharClass {
227 fn to_tokens(&self, tokens: &mut TokenStream) {
228 match self {
229 CharClass::Dot => tokens.extend(quote! {::ere_core::parse_tree::CharClass::Dot}),
230 }
231 }
232}
233
234#[derive(Debug, PartialEq, Eq, Clone)]
235pub enum Atom {
236 NormalChar(char),
238 CharClass(CharClass), MatchingList(Vec<BracketExpressionTerm>),
241 NonmatchingList(Vec<BracketExpressionTerm>),
243}
244impl From<char> for Atom {
245 fn from(value: char) -> Self {
246 return Atom::NormalChar(value);
247 }
248}
249impl From<CharClass> for Atom {
250 fn from(value: CharClass) -> Self {
251 return Atom::CharClass(value);
252 }
253}
254
255impl Atom {
256 fn take<'a>(rest: &'a str) -> Option<(&'a str, Atom)> {
257 let mut it = rest.chars();
258 match it.next() {
259 Some('\\') => match it.next() {
260 Some(c) if is_escapable_character(c) => {
261 return Some((it.as_str(), Atom::NormalChar(c)))
262 }
263 _ => return None,
264 },
265 Some('.') => return Some((it.as_str(), CharClass::Dot.into())),
266 Some('[') => {
267 let rest = it.as_str();
268 let (rest, end_index, none_of) = if rest.starts_with(']') {
269 (rest, rest.match_indices(']').nth(1)?.0, false)
270 } else if let Some(rest) = rest.strip_prefix('^') {
271 if rest.starts_with(']') {
272 (rest, rest.match_indices(']').nth(1)?.0, true)
273 } else {
274 (rest, rest.find(']')?, true)
275 }
276 } else {
277 (rest, rest.find(']')?, false)
278 };
279
280 let inside = &rest[..end_index];
281 let rest = &rest[end_index + 1..];
282 let mut it = inside.chars();
283 let mut items = Vec::new();
284
285 while let Some(first) = it.next() {
287 match it.clone().next() {
288 Some('-') => {
289 it.next();
290 if let Some(second) = it.next() {
291 items.push(BracketExpressionTerm::Range(first, second));
292 } else {
293 items.push(BracketExpressionTerm::Single(first));
294 items.push(BracketExpressionTerm::Single('-'));
295 }
296 }
297 _ => {
298 items.push(BracketExpressionTerm::Single(first));
299 }
300 }
301 }
302
303 if none_of {
304 return Some((rest, Atom::NonmatchingList(items)));
305 } else {
306 return Some((rest, Atom::MatchingList(items)));
307 }
308 }
309 Some(c) if !is_special_character(c) => return Some((it.as_str(), Atom::NormalChar(c))),
310 None | Some(_) => return None,
311 }
312 }
313 pub fn check(&self, c: char) -> bool {
314 return match self {
315 Atom::NormalChar(a) => *a == c,
316 Atom::CharClass(char_class) => char_class.check(c),
317 Atom::MatchingList(vec) => vec.into_iter().any(|b| b.check(c)),
318 Atom::NonmatchingList(vec) => !vec.into_iter().any(|b| b.check(c)),
319 };
320 }
321}
322impl Display for Atom {
323 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
324 return match self {
325 Atom::NormalChar(c) if is_escapable_character(*c) => write!(f, "\\{c}"),
326 Atom::NormalChar(c) => f.write_char(*c),
327 Atom::CharClass(c) => c.fmt(f),
328 Atom::MatchingList(vec) => {
329 f.write_char('[')?;
330 for term in vec {
331 write!(f, "{term}")?;
332 }
333 f.write_char(']')
334 }
335 Atom::NonmatchingList(vec) => {
336 f.write_str("[^")?;
337 for term in vec {
338 write!(f, "{term}")?;
339 }
340 f.write_char(']')
341 }
342 };
343 }
344}
345#[derive(Debug, PartialEq, Eq, Clone, Copy)]
346pub enum BracketExpressionTerm {
347 Single(char),
348 Range(char, char),
349 CharClass(CharClass),
350}
351impl BracketExpressionTerm {
352 pub const fn check(&self, c: char) -> bool {
353 return match self {
354 BracketExpressionTerm::Single(a) => *a == c,
355 BracketExpressionTerm::Range(a, b) => *a <= c && c <= *b,
356 BracketExpressionTerm::CharClass(class) => class.check(c),
357 };
358 }
359}
360impl Display for BracketExpressionTerm {
361 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
362 return match self {
363 BracketExpressionTerm::Single(c) => f.write_char(*c),
364 BracketExpressionTerm::Range(first, second) => write!(f, "{first}-{second}"),
365 BracketExpressionTerm::CharClass(class) => class.fmt(f),
366 };
367 }
368}
369impl From<char> for BracketExpressionTerm {
370 fn from(value: char) -> Self {
371 return BracketExpressionTerm::Single(value);
372 }
373}
374impl From<CharClass> for BracketExpressionTerm {
375 fn from(value: CharClass) -> Self {
376 return BracketExpressionTerm::CharClass(value);
377 }
378}
379impl ToTokens for BracketExpressionTerm {
380 fn to_tokens(&self, tokens: &mut TokenStream) {
381 tokens.extend(match self {
382 BracketExpressionTerm::Single(c) => quote! {
383 ::ere_core::parse_tree::BracketExpressionTerm::Single(#c)
384 },
385 BracketExpressionTerm::Range(a, z) => quote! {
386 ::ere_core::parse_tree::BracketExpressionTerm::Range(#a, #z)
387 },
388 BracketExpressionTerm::CharClass(char_class) => quote! {
389 ::ere_core::parse_tree::BracketExpressionTerm::CharClass(#char_class)
390 },
391 });
392 }
393}
394
395#[inline]
397const fn is_special_character(c: char) -> bool {
398 return c == '^'
399 || c == '.'
400 || c == '['
401 || c == '$'
402 || c == '('
403 || c == ')'
404 || c == '|'
405 || c == '*'
406 || c == '+'
407 || c == '?'
408 || c == '{'
409 || c == '\\';
410}
411
412#[inline]
414const fn is_escapable_character(c: char) -> bool {
415 return is_special_character(c) || c == ']' || c == '}';
416}
417
418#[cfg(test)]
419mod tests {
420 use super::*;
421
422 #[test]
423 fn reconstruction() {
424 fn test_reconstruction(text: &str) {
425 let (rest, ere) = ERE::take(text).unwrap();
426 assert!(rest.is_empty(), "{text} did not get used (left {rest})");
427 let reconstructed = ere.to_string();
428 assert_eq!(text, &reconstructed);
429 }
430 test_reconstruction("asdf");
431 test_reconstruction("as+df123$");
432 test_reconstruction("asd?f*123");
433 test_reconstruction("^asdf123.*");
434
435 test_reconstruction("a[|]");
436 test_reconstruction("[^]a-z1-4A-X-]asdf");
437 test_reconstruction("cd[X-]er");
438
439 test_reconstruction("a|b");
440 test_reconstruction("a|b|c");
441 test_reconstruction("a(a(b|c)|d)|(g|f)b");
442 test_reconstruction("(a|b)|(c|d){3}");
443
444 test_reconstruction("a[y-z]{1,3}");
445 test_reconstruction("a{3,}");
446 test_reconstruction("a(efg){1,}");
447 }
448
449 #[test]
450 fn parse_quantifiers() {
451 assert_eq!(Quantifier::take(""), None);
452 assert_eq!(Quantifier::take("+asdf"), Some(("asdf", Quantifier::Plus)));
453 assert_eq!(Quantifier::take("*"), Some(("", Quantifier::Star)));
454 assert_eq!(Quantifier::take("e?"), None);
455 assert_eq!(Quantifier::take("{"), None);
456 assert_eq!(Quantifier::take("{}"), None);
457 assert_eq!(Quantifier::take("{1}"), Some(("", Quantifier::Multiple(1))));
458 assert_eq!(
459 Quantifier::take("{9}ee"),
460 Some(("ee", Quantifier::Multiple(9)))
461 );
462 assert_eq!(
463 Quantifier::take("{10,}ee"),
464 Some(("ee", Quantifier::Range(10, None)))
465 );
466 assert_eq!(
467 Quantifier::take("{0,11}ef"),
468 Some(("ef", Quantifier::Range(0, Some(11))))
469 );
470 assert_eq!(Quantifier::take("{0,e11}ef"), None);
471 assert_eq!(Quantifier::take("{0;11}ef"), None);
472 }
473
474 #[test]
475 fn parse_atom_simple() {
476 assert_eq!(Atom::take("a"), Some(("", Atom::NormalChar('a'))));
477 assert_eq!(Atom::take(r"abcd"), Some(("bcd", Atom::NormalChar('a'))));
478 assert_eq!(Atom::take(r"\\"), Some(("", Atom::NormalChar('\\'))));
479 assert_eq!(
480 Atom::take(r"\[asdf\]"),
481 Some((r"asdf\]", Atom::NormalChar('[')))
482 );
483 assert_eq!(Atom::take(r"\."), Some(("", Atom::NormalChar('.'))));
484 assert_eq!(Atom::take(r" "), Some(("", Atom::NormalChar(' '))));
485 assert_eq!(Atom::take(r"\"), None);
486
487 assert_eq!(Atom::take("."), Some(("", Atom::CharClass(CharClass::Dot))));
488 assert_eq!(
489 Atom::take(".."),
490 Some((".", Atom::CharClass(CharClass::Dot)))
491 );
492 }
493
494 #[test]
495 fn parse_atom_brackets() {
496 assert_eq!(
497 Atom::take("[ab]"),
498 Some((
499 "",
500 Atom::MatchingList(vec![
501 BracketExpressionTerm::Single('a'),
502 BracketExpressionTerm::Single('b'),
503 ])
504 ))
505 );
506 assert_eq!(
507 Atom::take("[]ab]"),
508 Some((
509 "",
510 Atom::MatchingList(vec![
511 BracketExpressionTerm::Single(']'),
512 BracketExpressionTerm::Single('a'),
513 BracketExpressionTerm::Single('b'),
514 ])
515 ))
516 );
517 assert_eq!(
518 Atom::take("[]ab-]"),
519 Some((
520 "",
521 Atom::MatchingList(vec![
522 BracketExpressionTerm::Single(']'),
523 BracketExpressionTerm::Single('a'),
524 BracketExpressionTerm::Single('b'),
525 BracketExpressionTerm::Single('-'),
526 ])
527 ))
528 );
529
530 assert_eq!(
531 Atom::take("[]a-y]"),
532 Some((
533 "",
534 Atom::MatchingList(vec![
535 BracketExpressionTerm::Single(']'),
536 BracketExpressionTerm::Range('a', 'y'),
537 ])
538 ))
539 );
540 assert_eq!(
541 Atom::take("[]+--]"),
542 Some((
543 "",
544 Atom::MatchingList(vec![
545 BracketExpressionTerm::Single(']'),
546 BracketExpressionTerm::Range('+', '-'),
547 ])
548 ))
549 );
550
551 assert_eq!(
552 Atom::take("[^]a-y]"),
553 Some((
554 "",
555 Atom::NonmatchingList(vec![
556 BracketExpressionTerm::Single(']'),
557 BracketExpressionTerm::Range('a', 'y'),
558 ])
559 ))
560 );
561 assert_eq!(
562 Atom::take("[^]+--]"),
563 Some((
564 "",
565 Atom::NonmatchingList(vec![
566 BracketExpressionTerm::Single(']'),
567 BracketExpressionTerm::Range('+', '-'),
568 ])
569 ))
570 );
571 }
572}