1use crate::{
2 REPEATS, all,
3 any::Any,
4 collect::{self},
5 generate::Generate,
6 prelude::collect,
7 primitive::{Range, char},
8 shrink::Shrink,
9 state::State,
10};
11use core::fmt;
12use regex_syntax::{
13 Parser,
14 hir::{Capture, Class, ClassBytesRange, ClassUnicodeRange, Hir, HirKind, Repetition},
15};
16
17#[derive(Debug, Clone)]
18pub enum Regex {
19 Empty,
20 Text(String),
21 Range(Range<char>),
22 Collect(collect::Collect<Box<Regex>, Range<usize>, String>),
23 Any(Any<Box<[Regex]>>),
24 All(Box<[Regex]>),
25}
26
27#[derive(Debug, Clone)]
28pub enum Shrinker {
29 Empty,
30 Text(String),
31 Range(char::Shrinker),
32 All(all::Shrinker<Box<[Shrinker]>>),
33 Collect(collect::Shrinker<Shrinker, String>),
34}
35
36#[derive(Clone)]
37pub struct Error(Box<regex_syntax::Error>);
38
39impl fmt::Debug for Error {
40 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
41 f.debug_tuple("Error").field(&self.0).finish()
42 }
43}
44
45impl Regex {
46 pub(crate) fn new(pattern: &str, repeats: Option<u32>) -> Result<Self, Error> {
47 let hir = Parser::new().parse(pattern)?;
48 Ok(Regex::from_hir(hir, repeats.unwrap_or(REPEATS)))
49 }
50}
51
52impl From<regex_syntax::Error> for Error {
53 fn from(value: regex_syntax::Error) -> Self {
54 Error(Box::new(value))
55 }
56}
57
58impl From<&ClassUnicodeRange> for Regex {
59 fn from(value: &ClassUnicodeRange) -> Self {
60 Regex::Range(Range(value.start(), value.end()))
61 }
62}
63
64impl From<&ClassBytesRange> for Regex {
65 fn from(value: &ClassBytesRange) -> Self {
66 let start = char::from_u32(value.start() as u32).unwrap_or(char::REPLACEMENT_CHARACTER);
67 let end = char::from_u32(value.end() as u32).unwrap_or(char::REPLACEMENT_CHARACTER);
68 Regex::Range(Range(start, end))
69 }
70}
71
72impl Regex {
73 const fn is_empty(&self) -> bool {
74 matches!(self, Self::Empty)
75 }
76
77 fn from_iter(
78 trees: impl IntoIterator<Item = Regex>,
79 merge: impl FnOnce(Box<[Regex]>) -> Regex,
80 ) -> Regex {
81 let mut buffer = Vec::new();
82 let mut last = None;
83 for tree in trees {
84 if !tree.is_empty() {
85 buffer.extend(last.replace(tree));
86 }
87 }
88 match last {
89 Some(tree) if buffer.is_empty() => tree,
90 Some(tree) => {
91 buffer.push(tree);
92 merge(buffer.into_boxed_slice())
93 }
94 None => Self::Empty,
95 }
96 }
97
98 fn from_hir(hir: Hir, repeats: u32) -> Self {
99 match hir.into_kind() {
100 HirKind::Empty | HirKind::Look(_) => Self::Empty,
101 HirKind::Literal(literal) => {
102 String::from_utf8(literal.0.to_vec()).map_or(Self::Empty, Self::Text)
103 }
104 HirKind::Capture(Capture { sub, .. }) => Self::from_hir(*sub, repeats),
105 HirKind::Repetition(Repetition { min, max, sub, .. }) => {
106 let tree = Self::from_hir(*sub, repeats / 2);
107 if tree.is_empty() {
108 return Self::Empty;
109 }
110 let low = min;
111 let high = max.unwrap_or(repeats.max(low));
112 if low == 1 && high == 1 {
113 return tree;
114 }
115 Self::Collect(collect(
116 Box::new(tree),
117 (low as usize..=high as usize).into(),
118 ))
119 }
120 HirKind::Class(Class::Unicode(class)) => {
121 Self::from_iter(class.ranges().iter().map(Self::from), |trees| {
122 Self::Any(Any(trees))
123 })
124 }
125 HirKind::Class(Class::Bytes(class)) => {
126 Self::from_iter(class.ranges().iter().map(Self::from), |trees| {
127 Self::Any(Any(trees))
128 })
129 }
130 HirKind::Concat(hirs) => Self::from_iter(
131 hirs.into_iter().map(|hir| Self::from_hir(hir, repeats)),
132 Self::All,
133 ),
134 HirKind::Alternation(hirs) => Self::from_iter(
135 hirs.into_iter().map(|hir| Self::from_hir(hir, repeats)),
136 |trees| Self::Any(Any(trees)),
137 ),
138 }
139 }
140}
141
142impl Generate for Regex {
143 type Item = String;
144 type Shrink = Shrinker;
145
146 const CARDINALITY: Option<u128> = None;
147
148 fn generate(&self, state: &mut State) -> Self::Shrink {
149 match self {
150 Regex::Empty => Shrinker::Empty,
151 Regex::Text(text) => Shrinker::Text(text.clone()),
152 Regex::Range(range) => Shrinker::Range(range.generate(state)),
153 Regex::Collect(collect) => Shrinker::Collect(collect.generate(state)),
154 Regex::Any(any) => any.generate(state).0.unwrap_or(Shrinker::Empty),
155 Regex::All(all) => Shrinker::All(all.generate(state)),
156 }
157 }
158
159 fn cardinality(&self) -> Option<u128> {
160 match self {
161 Regex::Empty | Regex::Text(_) => Some(1),
162 Regex::Range(range) => range.cardinality(),
163 Regex::Collect(collect) => collect.cardinality(),
164 Regex::Any(any) => any.cardinality(),
165 Regex::All(all) => all.cardinality(),
166 }
167 }
168}
169
170impl Shrink for Shrinker {
171 type Item = String;
172
173 fn item(&self) -> Self::Item {
174 fn descend(shrinker: &Shrinker, buffer: &mut String) {
175 match shrinker {
176 Shrinker::Empty => {}
177 Shrinker::Text(text) => buffer.push_str(text),
178 Shrinker::Range(shrinker) => buffer.push(shrinker.item()),
179 Shrinker::All(shrinker) => {
180 for shrinker in shrinker.shrinkers.iter() {
181 descend(shrinker, buffer);
182 }
183 }
184 Shrinker::Collect(shrinker) => {
185 for shrinker in shrinker.shrinkers.iter() {
186 descend(shrinker, buffer);
187 }
188 }
189 }
190 }
191
192 let mut buffer = String::new();
193 descend(self, &mut buffer);
194 buffer
195 }
196
197 fn shrink(&mut self) -> Option<Self> {
198 match self {
199 Self::Empty | Self::Text(_) => None,
200 Self::Range(shrinker) => Some(Self::Range(shrinker.shrink()?)),
201 Self::All(shrinker) => Some(Self::All(shrinker.shrink()?)),
202 Self::Collect(shrinker) => Some(Self::Collect(shrinker.shrink()?)),
203 }
204 }
205}