1use std::{
5 fmt::{Display, Write},
6 ops::RangeInclusive,
7};
8
9use proc_macro2::TokenStream;
10use quote::{quote, ToTokens};
11
12macro_rules! match_prefix {
27 ($text:ident, { }) => (::core::option::Option::None);
28 ($text:ident, {
29 $x:literal => $y:expr,
30 $($xs:literal => $ys:expr,)*
31 }) => {
32 if let ::core::option::Option::Some(rest) = str::strip_prefix($text, $x) {
33 ::core::option::Option::Some((rest, $y))
34 } $(else if let ::core::option::Option::Some(rest) = str::strip_prefix($text, $xs) {
35 ::core::option::Option::Some((rest, $ys))
36 })* else {
37 ::core::option::Option::None
38 }
39 };
40}
41
42pub struct ERE(pub(crate) Vec<EREBranch>);
45impl ERE {
46 fn take<'a>(rest: &'a str) -> Option<(&'a str, ERE)> {
47 let mut branches = Vec::new();
48 let (mut rest, branch) = EREBranch::take(rest)?;
49 branches.push(branch);
50 while let Some(new_rest) = rest.strip_prefix('|') {
51 rest = new_rest;
52 let Some((new_rest, branch)) = EREBranch::take(rest) else {
53 break;
54 };
55 rest = new_rest;
56 branches.push(branch);
57 }
58 return Some((rest, ERE(branches)));
59 }
60
61 pub fn parse_str(input: &str) -> Option<Self> {
62 let Some(("", ere)) = ERE::take(&input) else {
63 return None;
64 };
65 return Some(ere);
66 }
67}
68impl syn::parse::Parse for ERE {
69 fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
70 let literal: syn::LitStr = input.parse()?;
71 let string = literal.value();
72 return ERE::parse_str(&string).ok_or_else(|| {
73 syn::Error::new(
74 literal.span(),
75 "Failed to parse POSIX Extended Regex Expression",
76 )
77 });
78 }
79}
80impl Display for ERE {
81 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
82 let mut it = self.0.iter();
83 let Some(first) = it.next() else {
84 return Ok(());
85 };
86 write!(f, "{first}")?;
87 for part in it {
88 write!(f, "|{part}")?;
89 }
90 return Ok(());
91 }
92}
93
94pub(crate) struct EREBranch(pub(crate) Vec<EREPart>);
95impl EREBranch {
96 fn take<'a>(rest: &'a str) -> Option<(&'a str, EREBranch)> {
97 let mut parts = Vec::new();
98 let (mut rest, part) = EREPart::take(rest)?;
99 parts.push(part);
100 while let Some((new_rest, part)) = EREPart::take(rest) {
101 rest = new_rest;
102 parts.push(part);
103 }
104 return Some((rest, EREBranch(parts)));
105 }
106}
107impl Display for EREBranch {
108 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
109 for part in &self.0 {
110 write!(f, "{part}")?;
111 }
112 return Ok(());
113 }
114}
115
116pub(crate) enum EREPart {
117 Single(EREExpression),
118 Quantified(EREExpression, Quantifier),
119 Start,
120 End,
121}
122impl EREPart {
123 fn take<'a>(rest: &'a str) -> Option<(&'a str, EREPart)> {
124 if let Some(rest) = rest.strip_prefix('^') {
125 return Some((rest, EREPart::Start));
126 } else if let Some(rest) = rest.strip_prefix('$') {
127 return Some((rest, EREPart::End));
128 }
129
130 let (rest, expr) = if let Some(rest) = rest.strip_prefix('(') {
131 let (rest, ere) = ERE::take(rest)?;
132 let rest = rest.strip_prefix(')')?;
133 (rest, EREExpression::Subexpression(ere))
134 } else {
135 let (rest, atom) = Atom::take(rest)?;
136 (rest, EREExpression::Atom(atom))
137 };
138
139 let part = match Quantifier::take(rest) {
140 Some((rest, quantifier)) => (rest, EREPart::Quantified(expr, quantifier)),
141 None => (rest, EREPart::Single(expr)),
142 };
143 return Some(part);
144 }
145}
146impl Display for EREPart {
147 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
148 return match self {
149 EREPart::Single(expr) => write!(f, "{expr}"),
150 EREPart::Quantified(expr, quantifier) => write!(f, "{expr}{quantifier}"),
151 EREPart::Start => f.write_char('^'),
152 EREPart::End => f.write_char('$'),
153 };
154 }
155}
156
157pub(crate) enum EREExpression {
158 Atom(Atom),
159 Subexpression(ERE),
160}
161impl Display for EREExpression {
162 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
163 return match self {
164 EREExpression::Atom(atom) => write!(f, "{atom}"),
165 EREExpression::Subexpression(ere) => write!(f, "({ere})"),
166 };
167 }
168}
169
170#[derive(Debug, PartialEq, Eq, Clone, Copy)]
171pub(crate) enum QuantifierType {
172 Star,
173 Plus,
174 QuestionMark,
175 Multiple(u32),
177 Range(u32, Option<u32>),
179}
180
181impl QuantifierType {
182 #[inline]
184 const fn min(&self) -> u32 {
185 return match self {
186 QuantifierType::Star => 0,
187 QuantifierType::Plus => 1,
188 QuantifierType::QuestionMark => 0,
189 QuantifierType::Multiple(n) => *n,
190 QuantifierType::Range(n, _) => *n,
191 };
192 }
193 #[inline]
195 const fn max(&self) -> Option<u32> {
196 return match self {
197 QuantifierType::Star => None,
198 QuantifierType::Plus => None,
199 QuantifierType::QuestionMark => Some(1),
200 QuantifierType::Multiple(n) => Some(*n),
201 QuantifierType::Range(_, m) => *m,
202 };
203 }
204}
205impl Display for QuantifierType {
206 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
207 return match self {
208 QuantifierType::Star => f.write_char('*'),
209 QuantifierType::Plus => f.write_char('+'),
210 QuantifierType::QuestionMark => f.write_char('?'),
211 QuantifierType::Multiple(n) => write!(f, "{{{n}}}"),
212 QuantifierType::Range(n, None) => write!(f, "{{{n},}}"),
213 QuantifierType::Range(n, Some(m)) => write!(f, "{{{n},{m}}}"),
214 };
215 }
216}
217
218#[derive(Debug, PartialEq, Eq, Clone, Copy)]
219pub(crate) struct Quantifier {
220 pub quantifier: QuantifierType,
221 pub alt: bool,
225}
226impl Quantifier {
227 #[inline]
228 fn take<'a>(rest: &'a str) -> Option<(&'a str, Quantifier)> {
229 let mut it = rest.chars();
230 let (rest, quantifier) = match it.next() {
231 Some('*') => (it.as_str(), QuantifierType::Star),
232 Some('+') => (it.as_str(), QuantifierType::Plus),
233 Some('?') => (it.as_str(), QuantifierType::QuestionMark),
234 Some('{') => {
235 let (inside, rest) = it.as_str().split_once('}')?;
236 match inside.split_once(',') {
237 None => (rest, QuantifierType::Multiple(inside.parse().ok()?)),
238 Some((min, "")) => (rest, QuantifierType::Range(min.parse().ok()?, None)),
239 Some((min, max)) => (
240 rest,
241 QuantifierType::Range(min.parse().ok()?, Some(max.parse().ok()?)),
242 ),
243 }
244 }
245 _ => return None,
246 };
247 if let Some(rest) = rest.strip_prefix('?') {
248 return Some((
249 rest,
250 Quantifier {
251 quantifier,
252 alt: true,
253 },
254 ));
255 } else {
256 return Some((
257 rest,
258 Quantifier {
259 quantifier,
260 alt: false,
261 },
262 ));
263 }
264 }
265}
266impl Display for Quantifier {
267 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
268 self.quantifier.fmt(f)?;
269 if self.alt {
270 return f.write_char('?');
271 } else {
272 return Ok(());
273 }
274 }
275}
276impl From<QuantifierType> for Quantifier {
277 fn from(quantifier: QuantifierType) -> Self {
278 return Quantifier {
279 quantifier,
280 alt: false,
281 };
282 }
283}
284
285#[derive(Debug, PartialEq, Eq, Clone, Copy)]
286pub enum CharClass {
287 Dot,
289}
290impl CharClass {
291 pub const fn check(&self, c: char) -> bool {
292 return match self {
293 CharClass::Dot => c != '\0',
294 };
295 }
296}
297impl Display for CharClass {
298 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
299 return match self {
300 CharClass::Dot => f.write_char('.'),
301 };
302 }
303}
304impl ToTokens for CharClass {
305 fn to_tokens(&self, tokens: &mut TokenStream) {
306 match self {
307 CharClass::Dot => tokens.extend(quote! {::ere_core::parse_tree::CharClass::Dot}),
308 }
309 }
310}
311
312#[derive(Debug, Clone)]
328pub enum Atom {
329 NormalChar(char),
331 CharClass(CharClass),
332 MatchingList(Vec<BracketExpressionTerm>),
334 NonmatchingList(Vec<BracketExpressionTerm>),
336}
337impl From<char> for Atom {
338 fn from(value: char) -> Self {
339 return Atom::NormalChar(value);
340 }
341}
342impl From<CharClass> for Atom {
343 fn from(value: CharClass) -> Self {
344 return Atom::CharClass(value);
345 }
346}
347impl std::str::FromStr for Atom {
348 type Err = ();
349
350 fn from_str(s: &str) -> Result<Self, Self::Err> {
351 let Some(("", atom)) = Atom::take(s) else {
352 return Err(());
353 };
354 return Ok(atom);
355 }
356}
357
358impl Atom {
359 fn take<'a>(rest: &'a str) -> Option<(&'a str, Atom)> {
360 let mut it = rest.chars();
361 match it.next() {
362 Some('\\') => match it.next() {
363 Some(c) if is_escapable_character(c) => {
364 return Some((it.as_str(), Atom::NormalChar(c)))
365 }
366 _ => return None,
367 },
368 Some('.') => return Some((it.as_str(), CharClass::Dot.into())),
369 Some('[') => {
370 let mut rest = it.as_str();
371 let mut items = Vec::new();
372 let none_of = if let Some(new_rest) = rest.strip_prefix('^') {
373 rest = new_rest;
374 true
375 } else {
376 false
377 };
378 if let Some(new_rest) = rest.strip_prefix(']') {
379 rest = new_rest;
380 items.push(BracketExpressionTerm::Single(']'));
381 }
382 loop {
383 if let Some(new_rest) = rest.strip_prefix(']') {
384 rest = new_rest;
386 break;
387 } else if let Some((new_rest, class)) = BracketCharClass::take(rest) {
388 rest = new_rest;
390 items.push(BracketExpressionTerm::CharClass(class));
391 } else {
392 let mut it = rest.chars();
394 let first = it.next()?;
395 rest = it.as_str();
396 if let '-' = it.next()? {
397 let second = it.next()?;
398 rest = it.as_str();
399 if second == ']' {
400 items.push(BracketExpressionTerm::Single(first));
402 items.push(BracketExpressionTerm::Single('-'));
403 break;
404 } else {
405 items.push(BracketExpressionTerm::Range(first, second));
407 }
408 } else {
409 items.push(BracketExpressionTerm::Single(first));
410 }
411 }
412 }
413
414 if none_of {
415 return Some((rest, Atom::NonmatchingList(items)));
416 } else {
417 return Some((rest, Atom::MatchingList(items)));
418 }
419 }
420 Some(c) if !is_special_character(c) => return Some((it.as_str(), Atom::NormalChar(c))),
421 None | Some(_) => return None,
422 }
423 }
424 pub fn check(&self, c: char) -> bool {
425 return match self {
426 Atom::NormalChar(a) => *a == c,
427 Atom::CharClass(char_class) => char_class.check(c),
428 Atom::MatchingList(vec) => vec.into_iter().any(|b| b.check(c)),
429 Atom::NonmatchingList(vec) => !vec.into_iter().any(|b| b.check(c)),
430 };
431 }
432 pub fn to_ranges(&self) -> Vec<RangeInclusive<char>> {
443 fn combine_ranges_chars(
445 mut ranges: Vec<RangeInclusive<char>>,
446 ) -> Vec<RangeInclusive<char>> {
447 ranges.sort_by_key(|range| *range.start());
448 let Some((first_range, terms)) = ranges.split_first_chunk::<1>() else {
449 return Vec::new();
450 };
451 let mut reduced_terms = Vec::new();
452
453 let mut current_start = *first_range[0].start();
454 let mut current_end = *first_range[0].end();
455 for term in terms {
456 if term.is_empty() {
457 continue;
458 } else if *term.start() <= current_end || (current_end..=*term.start()).count() == 2
459 {
460 current_end = std::cmp::max(current_end, *term.end());
462 } else {
463 reduced_terms.push(current_start.clone()..=current_end.clone());
464 current_start = *term.start();
465 current_end = *term.end();
466 }
467 }
468 reduced_terms.push(current_start.clone()..=current_end.clone());
469 return reduced_terms;
470 }
471
472 return match self {
473 Atom::NormalChar(c) => vec![*c..=*c],
474 Atom::CharClass(CharClass::Dot) => vec!['\x01'..=char::MAX],
475 Atom::MatchingList(_terms) => {
476 let mut terms = Vec::new();
477 for term in _terms {
478 match term {
479 crate::parse_tree::BracketExpressionTerm::Single(c) => {
480 terms.push(*c..=*c);
481 }
482 crate::parse_tree::BracketExpressionTerm::Range(a, b) => {
483 terms.push(*a..=*b);
484 }
485 crate::parse_tree::BracketExpressionTerm::CharClass(class) => {
486 terms.extend_from_slice(class.to_ranges());
487 }
488 }
489 }
490 combine_ranges_chars(terms)
491 }
492 Atom::NonmatchingList(terms) => {
493 let mut ranges = Vec::new();
494 ranges.push('\0'..=char::MAX);
495 for term in terms.iter().flat_map(|term| term.to_ranges()) {
496 ranges = ranges
497 .iter()
498 .flat_map(|range| {
499 if term.end() < range.start() || range.end() < term.start() {
500 return vec![range.clone()]; }
502 let mut out = Vec::new();
503 if range.start() < term.start() {
504 let mut new_range = *range.start()..=*term.start();
505 new_range.next_back();
506 out.push(new_range); }
508 if term.end() < range.end() {
509 let mut new_range = *term.end()..=*range.end();
510 new_range.next();
511 out.push(new_range); }
513 return out;
514 })
515 .collect();
516 }
517
518 ranges.sort_by_key(|range: &RangeInclusive<char>| *range.start());
519 ranges
520 }
521 };
522 }
523}
524impl Display for Atom {
525 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
526 return match self {
527 Atom::NormalChar(c) if is_escapable_character(*c) => write!(f, "\\{c}"),
528 Atom::NormalChar(c) => f.write_char(*c),
529 Atom::CharClass(c) => c.fmt(f),
530 Atom::MatchingList(vec) => {
531 f.write_char('[')?;
532 for term in vec {
533 write!(f, "{term}")?;
534 }
535 f.write_char(']')
536 }
537 Atom::NonmatchingList(vec) => {
538 f.write_str("[^")?;
539 for term in vec {
540 write!(f, "{term}")?;
541 }
542 f.write_char(']')
543 }
544 };
545 }
546}
547impl PartialEq for Atom {
548 fn eq(&self, other: &Self) -> bool {
549 return self.to_ranges() == other.to_ranges();
550 }
551}
552impl Eq for Atom {}
553
554#[derive(Debug, PartialEq, Eq, Clone, Copy)]
556pub enum BracketCharClass {
557 Alphanumeric,
559 Control,
561 Lower,
563 Space,
565 Alphabet,
567 Digit,
569 Print,
571 Upper,
573 Blank,
575 Graphic,
577 Punctuation,
579 HexDigit,
581}
582impl BracketCharClass {
583 pub const fn to_ranges(&self) -> &'static [RangeInclusive<char>] {
584 return match self {
585 BracketCharClass::Alphanumeric => &['0'..='9', 'A'..='Z', 'a'..='z'],
586 BracketCharClass::Control => &['\0'..='\x1f', '\x7f'..='\x7f'],
587 BracketCharClass::Lower => &['a'..='z'],
588 BracketCharClass::Space => &['\t'..='\t', '\x0a'..='\x0d', ' '..=' '],
589 BracketCharClass::Alphabet => &['A'..='Z', 'a'..='z'],
590 BracketCharClass::Digit => &['0'..='9'],
591 BracketCharClass::Print => &['\x20'..='\x7E'],
592 BracketCharClass::Upper => &['A'..='Z'],
593 BracketCharClass::Blank => &['\t'..='\t', ' '..=' '],
594 BracketCharClass::Graphic => &['\x21'..='\x7e'],
595 BracketCharClass::Punctuation => &[
596 '\x21'..='\x2f',
597 '\x3a'..='\x40',
598 '\x5b'..='\x60',
599 '\x7b'..='\x7e',
600 ],
601 BracketCharClass::HexDigit => &['0'..='9', 'A'..='F', 'a'..='f'],
602 };
603 }
604 pub const fn check_ascii(&self, c: char) -> bool {
606 return match self {
607 BracketCharClass::Alphanumeric => c.is_ascii_alphanumeric(),
608 BracketCharClass::Control => c.is_ascii_control(),
609 BracketCharClass::Lower => c.is_ascii_lowercase(),
610 BracketCharClass::Space => c.is_ascii_whitespace() || c == '\x0b', BracketCharClass::Alphabet => c.is_ascii_alphabetic(),
612 BracketCharClass::Digit => c.is_ascii_digit(),
613 BracketCharClass::Print => matches!(c, '\x20'..='\x7E'),
614 BracketCharClass::Upper => c.is_ascii_uppercase(),
615 BracketCharClass::Blank => c == ' ' || c == '\t',
616 BracketCharClass::Graphic => c.is_ascii_graphic(),
617 BracketCharClass::Punctuation => c.is_ascii_punctuation(),
618 BracketCharClass::HexDigit => c.is_ascii_hexdigit(),
619 };
620 }
621 fn take<'a>(rest: &'a str) -> Option<(&'a str, BracketCharClass)> {
622 let rest = rest.strip_prefix("[:")?;
623 return match_prefix!(rest, {
624 "alnum:]" => BracketCharClass::Alphanumeric,
625 "cntrl:]" => BracketCharClass::Control,
626 "lower:]" => BracketCharClass::Lower,
627 "alpha:]" => BracketCharClass::Alphabet,
628 "digit:]" => BracketCharClass::Digit,
629 "print:]" => BracketCharClass::Print,
630 "upper:]" => BracketCharClass::Upper,
631 "blank:]" => BracketCharClass::Blank,
632 "graph:]" => BracketCharClass::Graphic,
633 "punct:]" => BracketCharClass::Punctuation,
634 "xdigit:]" => BracketCharClass::HexDigit,
635 });
636 }
637}
638impl Display for BracketCharClass {
639 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
640 return match self {
641 BracketCharClass::Alphanumeric => f.write_str("[:alnum:]"),
642 BracketCharClass::Control => f.write_str("[:cntrl:]"),
643 BracketCharClass::Lower => f.write_str("[:lower:]"),
644 BracketCharClass::Space => f.write_str("[:space:]"),
645 BracketCharClass::Alphabet => f.write_str("[:alpha:]"),
646 BracketCharClass::Digit => f.write_str("[:digit:]"),
647 BracketCharClass::Print => f.write_str("[:print:]"),
648 BracketCharClass::Upper => f.write_str("[:upper:]"),
649 BracketCharClass::Blank => f.write_str("[:blank:]"),
650 BracketCharClass::Graphic => f.write_str("[:graph:]"),
651 BracketCharClass::Punctuation => f.write_str("[:punct:]"),
652 BracketCharClass::HexDigit => f.write_str("[:xdigit:]"),
653 };
654 }
655}
656impl ToTokens for BracketCharClass {
657 fn to_tokens(&self, tokens: &mut TokenStream) {
658 match self {
659 BracketCharClass::Alphanumeric => {
660 tokens.extend(quote! {::ere_core::parse_tree::BracketCharClass::Alphanumeric})
661 }
662 BracketCharClass::Control => {
663 tokens.extend(quote! {::ere_core::parse_tree::BracketCharClass::Control})
664 }
665 BracketCharClass::Lower => {
666 tokens.extend(quote! {::ere_core::parse_tree::BracketCharClass::Lower})
667 }
668 BracketCharClass::Space => {
669 tokens.extend(quote! {::ere_core::parse_tree::BracketCharClass::Space})
670 }
671 BracketCharClass::Alphabet => {
672 tokens.extend(quote! {::ere_core::parse_tree::BracketCharClass::Alphabet})
673 }
674 BracketCharClass::Digit => {
675 tokens.extend(quote! {::ere_core::parse_tree::BracketCharClass::Digit})
676 }
677 BracketCharClass::Print => {
678 tokens.extend(quote! {::ere_core::parse_tree::BracketCharClass::Print})
679 }
680 BracketCharClass::Upper => {
681 tokens.extend(quote! {::ere_core::parse_tree::BracketCharClass::Upper})
682 }
683 BracketCharClass::Blank => {
684 tokens.extend(quote! {::ere_core::parse_tree::BracketCharClass::Blank})
685 }
686 BracketCharClass::Graphic => {
687 tokens.extend(quote! {::ere_core::parse_tree::BracketCharClass::Graph})
688 }
689 BracketCharClass::Punctuation => {
690 tokens.extend(quote! {::ere_core::parse_tree::BracketCharClass::Punctuation})
691 }
692 BracketCharClass::HexDigit => {
693 tokens.extend(quote! {::ere_core::parse_tree::BracketCharClass::XDigit})
694 }
695 }
696 }
697}
698
699#[derive(Debug, PartialEq, Eq, Clone, Copy)]
700pub enum BracketExpressionTerm {
701 Single(char),
702 Range(char, char),
703 CharClass(BracketCharClass),
704}
705impl BracketExpressionTerm {
706 pub const fn check(&self, c: char) -> bool {
707 return match self {
708 BracketExpressionTerm::Single(a) => *a == c,
709 BracketExpressionTerm::Range(a, b) => *a <= c && c <= *b,
710 BracketExpressionTerm::CharClass(class) => class.check_ascii(c),
711 };
712 }
713 pub(crate) fn to_ranges(&self) -> Vec<RangeInclusive<char>> {
714 return match self {
715 BracketExpressionTerm::Single(c) => vec![*c..=*c],
716 BracketExpressionTerm::Range(a, b) => vec![*a..=*b],
717 BracketExpressionTerm::CharClass(bracket_char_class) => {
718 bracket_char_class.to_ranges().to_vec()
719 }
720 };
721 }
722}
723impl Display for BracketExpressionTerm {
724 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
725 return match self {
726 BracketExpressionTerm::Single(c) => f.write_char(*c),
727 BracketExpressionTerm::Range(first, second) => write!(f, "{first}-{second}"),
728 BracketExpressionTerm::CharClass(class) => class.fmt(f),
729 };
730 }
731}
732impl From<char> for BracketExpressionTerm {
733 fn from(value: char) -> Self {
734 return BracketExpressionTerm::Single(value);
735 }
736}
737impl From<BracketCharClass> for BracketExpressionTerm {
738 fn from(value: BracketCharClass) -> Self {
739 return BracketExpressionTerm::CharClass(value);
740 }
741}
742impl ToTokens for BracketExpressionTerm {
743 fn to_tokens(&self, tokens: &mut TokenStream) {
744 tokens.extend(match self {
745 BracketExpressionTerm::Single(c) => quote! {
746 ::ere_core::parse_tree::BracketExpressionTerm::Single(#c)
747 },
748 BracketExpressionTerm::Range(a, z) => quote! {
749 ::ere_core::parse_tree::BracketExpressionTerm::Range(#a, #z)
750 },
751 BracketExpressionTerm::CharClass(char_class) => quote! {
752 ::ere_core::parse_tree::BracketExpressionTerm::CharClass(#char_class)
753 },
754 });
755 }
756}
757
758#[inline]
760const fn is_special_character(c: char) -> bool {
761 return c == '^'
762 || c == '.'
763 || c == '['
764 || c == '$'
765 || c == '('
766 || c == ')'
767 || c == '|'
768 || c == '*'
769 || c == '+'
770 || c == '?'
771 || c == '{'
772 || c == '\\';
773}
774
775#[inline]
777pub(crate) const fn is_escapable_character(c: char) -> bool {
778 return is_special_character(c) || c == ']' || c == '}';
779}
780
781#[cfg(test)]
782mod tests {
783 use super::*;
784
785 #[test]
786 fn reconstruction() {
787 fn test_reconstruction(text: &str) {
788 let (rest, ere) = ERE::take(text).unwrap();
789 assert!(rest.is_empty(), "{text} did not get used (left {rest})");
790 let reconstructed = ere.to_string();
791 assert_eq!(text, &reconstructed);
792 }
793 test_reconstruction("asdf");
794 test_reconstruction("as+df123$");
795 test_reconstruction("asd?f*123");
796 test_reconstruction("^asdf123.*");
797
798 test_reconstruction("a[|]");
799 test_reconstruction("[^]a-z1-4A-X-]asdf");
800 test_reconstruction("cd[X-]er");
801
802 test_reconstruction("my word is [[:alnum:]_]");
803 test_reconstruction("my word is [[:lower:][:digit:]_]");
804
805 test_reconstruction("a|b");
806 test_reconstruction("a|b|c");
807 test_reconstruction("a(a(b|c)|d)|(g|f)b");
808 test_reconstruction("(a|b)|(c|d){3}");
809
810 test_reconstruction("a[y-z]{1,3}");
811 test_reconstruction("a{3,}");
812 test_reconstruction("a(efg){1,}");
813 }
814
815 #[test]
816 fn parse_quantifiers() {
817 assert_eq!(Quantifier::take(""), None);
818 assert_eq!(
819 Quantifier::take("+asdf"),
820 Some(("asdf", QuantifierType::Plus.into()))
821 );
822 assert_eq!(
823 Quantifier::take("*"),
824 Some(("", QuantifierType::Star.into()))
825 );
826 assert_eq!(Quantifier::take("e?"), None);
827 assert_eq!(Quantifier::take("{"), None);
828 assert_eq!(Quantifier::take("{}"), None);
829 assert_eq!(
830 Quantifier::take("{1}"),
831 Some(("", QuantifierType::Multiple(1).into()))
832 );
833 assert_eq!(
834 Quantifier::take("{9}ee"),
835 Some(("ee", QuantifierType::Multiple(9).into()))
836 );
837 assert_eq!(
838 Quantifier::take("{10,}ee"),
839 Some(("ee", QuantifierType::Range(10, None).into()))
840 );
841 assert_eq!(
842 Quantifier::take("{0,11}ef"),
843 Some(("ef", QuantifierType::Range(0, Some(11)).into()))
844 );
845 assert_eq!(Quantifier::take("{0,e11}ef"), None);
846 assert_eq!(Quantifier::take("{0;11}ef"), None);
847 }
848
849 #[test]
850 fn parse_atom_simple() {
851 assert_eq!(Atom::take("a"), Some(("", Atom::NormalChar('a'))));
852 assert_eq!(Atom::take(r"abcd"), Some(("bcd", Atom::NormalChar('a'))));
853 assert_eq!(Atom::take(r"\\"), Some(("", Atom::NormalChar('\\'))));
854 assert_eq!(
855 Atom::take(r"\[asdf\]"),
856 Some((r"asdf\]", Atom::NormalChar('[')))
857 );
858 assert_eq!(Atom::take(r"\."), Some(("", Atom::NormalChar('.'))));
859 assert_eq!(Atom::take(r" "), Some(("", Atom::NormalChar(' '))));
860 assert_eq!(Atom::take(r"\"), None);
861
862 assert_eq!(Atom::take("."), Some(("", Atom::CharClass(CharClass::Dot))));
863 assert_eq!(
864 Atom::take(".."),
865 Some((".", Atom::CharClass(CharClass::Dot)))
866 );
867 }
868
869 #[test]
870 fn parse_atom_brackets() {
871 assert_eq!(
872 Atom::take("[ab]"),
873 Some((
874 "",
875 Atom::MatchingList(vec![
876 BracketExpressionTerm::Single('a'),
877 BracketExpressionTerm::Single('b'),
878 ])
879 ))
880 );
881 assert_eq!(
882 Atom::take("[]ab]"),
883 Some((
884 "",
885 Atom::MatchingList(vec![
886 BracketExpressionTerm::Single(']'),
887 BracketExpressionTerm::Single('a'),
888 BracketExpressionTerm::Single('b'),
889 ])
890 ))
891 );
892 assert_eq!(
893 Atom::take("[]ab-]"),
894 Some((
895 "",
896 Atom::MatchingList(vec![
897 BracketExpressionTerm::Single(']'),
898 BracketExpressionTerm::Single('a'),
899 BracketExpressionTerm::Single('b'),
900 BracketExpressionTerm::Single('-'),
901 ])
902 ))
903 );
904
905 assert_eq!(
906 Atom::take("[]a-y]"),
907 Some((
908 "",
909 Atom::MatchingList(vec![
910 BracketExpressionTerm::Single(']'),
911 BracketExpressionTerm::Range('a', 'y'),
912 ])
913 ))
914 );
915 assert_eq!(
916 Atom::take("[]+--]"),
917 Some((
918 "",
919 Atom::MatchingList(vec![
920 BracketExpressionTerm::Single(']'),
921 BracketExpressionTerm::Range('+', '-'),
922 ])
923 ))
924 );
925
926 assert_eq!(
927 Atom::take("[^]a-y]"),
928 Some((
929 "",
930 Atom::NonmatchingList(vec![
931 BracketExpressionTerm::Single(']'),
932 BracketExpressionTerm::Range('a', 'y'),
933 ])
934 ))
935 );
936 assert_eq!(
937 Atom::take("[^]+--]"),
938 Some((
939 "",
940 Atom::NonmatchingList(vec![
941 BracketExpressionTerm::Single(']'),
942 BracketExpressionTerm::Range('+', '-'),
943 ])
944 ))
945 );
946 }
947
948 #[test]
949 fn atom_to_ranges_normal_char() {
950 let (_, atom) = Atom::take("a").unwrap();
951 assert_eq!(atom.to_ranges(), vec!['a'..='a']);
952 }
953
954 #[test]
955 fn atom_to_ranges_matching_list() {
956 let (_, atom) = Atom::take("[a-z]").unwrap();
957 assert_eq!(atom.to_ranges(), vec!['a'..='z']);
958
959 let (_, atom) = Atom::take("[0-45-9]").unwrap();
960 assert_eq!(atom.to_ranges(), vec!['0'..='9']);
961
962 let (_, atom) = Atom::take("[5-90-4]").unwrap();
963 assert_eq!(atom.to_ranges(), vec!['0'..='9']);
964
965 let (_, atom) = Atom::take("[0-46-89]").unwrap();
966 assert_eq!(atom.to_ranges(), vec!['0'..='4', '6'..='9']);
967
968 let (_, atom) = Atom::take("[96-80-4]").unwrap();
969 assert_eq!(atom.to_ranges(), vec!['0'..='4', '6'..='9']);
970 }
971
972 #[test]
973 fn atom_to_ranges_nonmatching_list() {
974 let (_, atom) = Atom::take("[^b-y]").unwrap();
975 assert_eq!(atom.to_ranges(), vec!['\0'..='a', 'z'..=char::MAX]);
976
977 let (_, atom) = Atom::take("[^1-45-8]").unwrap();
978 assert_eq!(atom.to_ranges(), vec!['\0'..='0', '9'..=char::MAX]);
979
980 let (_, atom) = Atom::take("[^5-81-4]").unwrap();
981 assert_eq!(atom.to_ranges(), vec!['\0'..='0', '9'..=char::MAX]);
982
983 let (_, atom) = Atom::take("[^1-486-7]").unwrap();
984 assert_eq!(
985 atom.to_ranges(),
986 vec!['\0'..='0', '5'..='5', '9'..=char::MAX]
987 );
988
989 let (_, atom) = Atom::take("[^86-71-4]").unwrap();
990 assert_eq!(
991 atom.to_ranges(),
992 vec!['\0'..='0', '5'..='5', '9'..=char::MAX]
993 );
994 }
995
996 #[test]
997 fn atom_eq() {
998 assert_eq!(
999 Atom::take("[a-z]").unwrap().1,
1000 Atom::take("[a-kl-z]").unwrap().1
1001 );
1002
1003 assert_eq!(
1004 Atom::take("[a-z]").unwrap().1,
1005 Atom::take("[l-za-k]").unwrap().1
1006 );
1007
1008 assert_eq!(Atom::take("a").unwrap().1, Atom::take("[a]").unwrap().1);
1009
1010 assert_eq!(
1011 Atom::take("[1-8]").unwrap().1,
1012 Atom::take("[^\0-09-\u{10ffff}]").unwrap().1
1013 );
1014
1015 assert_eq!(
1016 Atom::take("[[:upper:]]").unwrap().1,
1017 Atom::take("[A-Z]").unwrap().1
1018 );
1019
1020 assert_ne!(
1021 Atom::take("[a-z]").unwrap().1,
1022 Atom::take("[A-Z]").unwrap().1
1023 )
1024 }
1025}