checkito/
regex.rs

1#![cfg(feature = "regex")]
2
3use crate::{
4    REPEATS, all,
5    any::{self, Any},
6    collect::{self},
7    generate::{Generate, State},
8    prelude::collect,
9    primitive::char,
10    shrink::Shrink,
11};
12use core::{fmt, ops::RangeInclusive};
13use regex_syntax::{
14    Parser,
15    hir::{Capture, Class, ClassBytesRange, ClassUnicodeRange, Hir, HirKind, Repetition},
16};
17
18#[derive(Debug, Clone)]
19pub enum Regex {
20    Empty,
21    Text(String),
22    Range(RangeInclusive<char>),
23    Collect(collect::Collect<Box<Regex>, RangeInclusive<usize>, String>),
24    Any(any::Any<Box<[Regex]>>),
25    All(Box<[Regex]>),
26}
27
28#[derive(Debug, Clone)]
29pub enum Shrinker {
30    Empty,
31    Text(String),
32    Range(char::Shrinker),
33    All(all::Shrinker<Box<[Shrinker]>>),
34    Collect(collect::Shrinker<Shrinker, String>),
35}
36
37#[derive(Clone)]
38pub struct Error(Box<regex_syntax::Error>);
39
40impl fmt::Debug for Error {
41    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
42        f.debug_tuple("Error").field(&self.0).finish()
43    }
44}
45
46impl Regex {
47    pub(crate) fn new(pattern: &str, repeats: Option<u32>) -> Result<Self, Error> {
48        let hir = Parser::new().parse(pattern)?;
49        Ok(Regex::from_hir(hir, repeats.unwrap_or(REPEATS)))
50    }
51}
52
53impl From<regex_syntax::Error> for Error {
54    fn from(value: regex_syntax::Error) -> Self {
55        Error(Box::new(value))
56    }
57}
58
59impl From<&ClassUnicodeRange> for Regex {
60    fn from(value: &ClassUnicodeRange) -> Self {
61        Regex::Range(value.start()..=value.end())
62    }
63}
64
65impl From<&ClassBytesRange> for Regex {
66    fn from(value: &ClassBytesRange) -> Self {
67        Regex::Range(value.start() as char..=value.end() as char)
68    }
69}
70
71impl Regex {
72    const fn is_empty(&self) -> bool {
73        matches!(self, Self::Empty)
74    }
75
76    fn from_iter(
77        trees: impl IntoIterator<Item = Regex>,
78        merge: impl FnOnce(Box<[Regex]>) -> Regex,
79    ) -> Regex {
80        let mut buffer = Vec::new();
81        let mut last = None;
82        for tree in trees {
83            if !tree.is_empty() {
84                buffer.extend(last.replace(tree));
85            }
86        }
87        match last {
88            Some(tree) if buffer.is_empty() => tree,
89            Some(tree) => {
90                buffer.push(tree);
91                merge(buffer.into_boxed_slice())
92            }
93            None => Self::Empty,
94        }
95    }
96
97    fn from_hir(hir: Hir, repeats: u32) -> Self {
98        match hir.into_kind() {
99            HirKind::Empty | HirKind::Look(_) => Self::Empty,
100            HirKind::Literal(literal) => {
101                String::from_utf8(literal.0.to_vec()).map_or(Self::Empty, Self::Text)
102            }
103            HirKind::Capture(Capture { sub, .. }) => Self::from_hir(*sub, repeats),
104            HirKind::Repetition(Repetition { min, max, sub, .. }) => {
105                let tree = Self::from_hir(*sub, repeats / 2);
106                if tree.is_empty() {
107                    return Self::Empty;
108                }
109                let low = min;
110                let high = max.unwrap_or(repeats.max(low));
111                if low == 1 && high == 1 {
112                    return tree;
113                }
114                Self::Collect(collect(
115                    Box::new(tree),
116                    low as usize..=high as usize,
117                    Some(low as _),
118                ))
119            }
120            HirKind::Class(Class::Unicode(class)) => {
121                Self::from_iter(class.ranges().iter().map(Self::from), |trees| {
122                    Self::Any(Any(trees))
123                })
124            }
125            HirKind::Class(Class::Bytes(class)) => {
126                Self::from_iter(class.ranges().iter().map(Self::from), |trees| {
127                    Self::Any(Any(trees))
128                })
129            }
130            HirKind::Concat(hirs) => Self::from_iter(
131                hirs.into_iter().map(|hir| Self::from_hir(hir, repeats)),
132                Self::All,
133            ),
134            HirKind::Alternation(hirs) => Self::from_iter(
135                hirs.into_iter().map(|hir| Self::from_hir(hir, repeats)),
136                |trees| Self::Any(Any(trees)),
137            ),
138        }
139    }
140}
141
142impl Generate for Regex {
143    type Item = String;
144    type Shrink = Shrinker;
145
146    fn generate(&self, state: &mut State) -> Self::Shrink {
147        match self {
148            Regex::Empty => Shrinker::Empty,
149            Regex::Text(text) => Shrinker::Text(text.clone()),
150            Regex::Range(range) => Shrinker::Range(range.generate(state)),
151            Regex::Collect(collect) => Shrinker::Collect(collect.generate(state)),
152            Regex::Any(any) => any.generate(state).0.unwrap_or(Shrinker::Empty),
153            Regex::All(all) => Shrinker::All(all.generate(state)),
154        }
155    }
156
157    fn constant(&self) -> bool {
158        match self {
159            Regex::Empty | Regex::Text(_) => true,
160            Regex::Range(range) => range.constant(),
161            Regex::Collect(collect) => collect.constant(),
162            Regex::Any(any) => any.constant(),
163            Regex::All(all) => all.constant(),
164        }
165    }
166}
167
168impl Shrink for Shrinker {
169    type Item = String;
170
171    fn item(&self) -> Self::Item {
172        fn descend(shrinker: &Shrinker, buffer: &mut String) {
173            match shrinker {
174                Shrinker::Empty => {}
175                Shrinker::Text(text) => buffer.push_str(text),
176                Shrinker::Range(shrinker) => buffer.push(shrinker.item()),
177                Shrinker::All(shrinker) => {
178                    for shrinker in shrinker.shrinkers.iter() {
179                        descend(shrinker, buffer);
180                    }
181                }
182                Shrinker::Collect(shrinker) => {
183                    for shrinker in shrinker.shrinkers.iter() {
184                        descend(shrinker, buffer);
185                    }
186                }
187            }
188        }
189
190        let mut buffer = String::new();
191        descend(self, &mut buffer);
192        buffer
193    }
194
195    fn shrink(&mut self) -> Option<Self> {
196        match self {
197            Self::Empty | Self::Text(_) => None,
198            Self::Range(shrinker) => Some(Self::Range(shrinker.shrink()?)),
199            Self::All(shrinker) => Some(Self::All(shrinker.shrink()?)),
200            Self::Collect(shrinker) => Some(Self::Collect(shrinker.shrink()?)),
201        }
202    }
203}