Skip to main content

checkito/
regex.rs

1use crate::{
2    REPEATS, all,
3    any::Any,
4    collect::{self},
5    generate::Generate,
6    prelude::collect,
7    primitive::{Range, char},
8    shrink::Shrink,
9    state::State,
10};
11use core::fmt;
12use regex_syntax::{
13    Parser,
14    hir::{Capture, Class, ClassBytesRange, ClassUnicodeRange, Hir, HirKind, Repetition},
15};
16
17#[derive(Debug, Clone)]
18pub enum Regex {
19    Empty,
20    Text(String),
21    Range(Range<char>),
22    Collect(collect::Collect<Box<Regex>, Range<usize>, String>),
23    Any(Any<Box<[Regex]>>),
24    All(Box<[Regex]>),
25}
26
27#[derive(Debug, Clone)]
28pub enum Shrinker {
29    Empty,
30    Text(String),
31    Range(char::Shrinker),
32    All(all::Shrinker<Box<[Shrinker]>>),
33    Collect(collect::Shrinker<Shrinker, String>),
34}
35
36#[derive(Clone)]
37pub struct Error(Box<regex_syntax::Error>);
38
39impl fmt::Debug for Error {
40    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
41        f.debug_tuple("Error").field(&self.0).finish()
42    }
43}
44
45impl Regex {
46    pub(crate) fn new(pattern: &str, repeats: Option<u32>) -> Result<Self, Error> {
47        let hir = Parser::new().parse(pattern)?;
48        Ok(Regex::from_hir(hir, repeats.unwrap_or(REPEATS)))
49    }
50}
51
52impl From<regex_syntax::Error> for Error {
53    fn from(value: regex_syntax::Error) -> Self {
54        Error(Box::new(value))
55    }
56}
57
58impl From<&ClassUnicodeRange> for Regex {
59    fn from(value: &ClassUnicodeRange) -> Self {
60        Regex::Range(Range(value.start(), value.end()))
61    }
62}
63
64impl From<&ClassBytesRange> for Regex {
65    fn from(value: &ClassBytesRange) -> Self {
66        let start = char::from_u32(value.start() as u32).unwrap_or(char::REPLACEMENT_CHARACTER);
67        let end = char::from_u32(value.end() as u32).unwrap_or(char::REPLACEMENT_CHARACTER);
68        Regex::Range(Range(start, end))
69    }
70}
71
72impl Regex {
73    const fn is_empty(&self) -> bool {
74        matches!(self, Self::Empty)
75    }
76
77    fn from_iter(
78        trees: impl IntoIterator<Item = Regex>,
79        merge: impl FnOnce(Box<[Regex]>) -> Regex,
80    ) -> Regex {
81        let mut buffer = Vec::new();
82        let mut last = None;
83        for tree in trees {
84            if !tree.is_empty() {
85                buffer.extend(last.replace(tree));
86            }
87        }
88        match last {
89            Some(tree) if buffer.is_empty() => tree,
90            Some(tree) => {
91                buffer.push(tree);
92                merge(buffer.into_boxed_slice())
93            }
94            None => Self::Empty,
95        }
96    }
97
98    fn from_hir(hir: Hir, repeats: u32) -> Self {
99        match hir.into_kind() {
100            HirKind::Empty | HirKind::Look(_) => Self::Empty,
101            HirKind::Literal(literal) => {
102                String::from_utf8(literal.0.to_vec()).map_or(Self::Empty, Self::Text)
103            }
104            HirKind::Capture(Capture { sub, .. }) => Self::from_hir(*sub, repeats),
105            HirKind::Repetition(Repetition { min, max, sub, .. }) => {
106                let tree = Self::from_hir(*sub, repeats / 2);
107                if tree.is_empty() {
108                    return Self::Empty;
109                }
110                let low = min;
111                let high = max.unwrap_or(repeats.max(low));
112                if low == 1 && high == 1 {
113                    return tree;
114                }
115                Self::Collect(collect(
116                    Box::new(tree),
117                    (low as usize..=high as usize).into(),
118                ))
119            }
120            HirKind::Class(Class::Unicode(class)) => {
121                Self::from_iter(class.ranges().iter().map(Self::from), |trees| {
122                    Self::Any(Any(trees))
123                })
124            }
125            HirKind::Class(Class::Bytes(class)) => {
126                Self::from_iter(class.ranges().iter().map(Self::from), |trees| {
127                    Self::Any(Any(trees))
128                })
129            }
130            HirKind::Concat(hirs) => Self::from_iter(
131                hirs.into_iter().map(|hir| Self::from_hir(hir, repeats)),
132                Self::All,
133            ),
134            HirKind::Alternation(hirs) => Self::from_iter(
135                hirs.into_iter().map(|hir| Self::from_hir(hir, repeats)),
136                |trees| Self::Any(Any(trees)),
137            ),
138        }
139    }
140}
141
142impl Generate for Regex {
143    type Item = String;
144    type Shrink = Shrinker;
145
146    const CARDINALITY: Option<u128> = None;
147
148    fn generate(&self, state: &mut State) -> Self::Shrink {
149        match self {
150            Regex::Empty => Shrinker::Empty,
151            Regex::Text(text) => Shrinker::Text(text.clone()),
152            Regex::Range(range) => Shrinker::Range(range.generate(state)),
153            Regex::Collect(collect) => Shrinker::Collect(collect.generate(state)),
154            Regex::Any(any) => any.generate(state).0.unwrap_or(Shrinker::Empty),
155            Regex::All(all) => Shrinker::All(all.generate(state)),
156        }
157    }
158
159    fn cardinality(&self) -> Option<u128> {
160        match self {
161            Regex::Empty | Regex::Text(_) => Some(1),
162            Regex::Range(range) => range.cardinality(),
163            Regex::Collect(collect) => collect.cardinality(),
164            Regex::Any(any) => any.cardinality(),
165            Regex::All(all) => all.cardinality(),
166        }
167    }
168}
169
170impl Shrink for Shrinker {
171    type Item = String;
172
173    fn item(&self) -> Self::Item {
174        fn descend(shrinker: &Shrinker, buffer: &mut String) {
175            match shrinker {
176                Shrinker::Empty => {}
177                Shrinker::Text(text) => buffer.push_str(text),
178                Shrinker::Range(shrinker) => buffer.push(shrinker.item()),
179                Shrinker::All(shrinker) => {
180                    for shrinker in shrinker.shrinkers.iter() {
181                        descend(shrinker, buffer);
182                    }
183                }
184                Shrinker::Collect(shrinker) => {
185                    for shrinker in shrinker.shrinkers.iter() {
186                        descend(shrinker, buffer);
187                    }
188                }
189            }
190        }
191
192        let mut buffer = String::new();
193        descend(self, &mut buffer);
194        buffer
195    }
196
197    fn shrink(&mut self) -> Option<Self> {
198        match self {
199            Self::Empty | Self::Text(_) => None,
200            Self::Range(shrinker) => Some(Self::Range(shrinker.shrink()?)),
201            Self::All(shrinker) => Some(Self::All(shrinker.shrink()?)),
202            Self::Collect(shrinker) => Some(Self::Collect(shrinker.shrink()?)),
203        }
204    }
205}