1#![cfg(feature = "regex")]
2
3use crate::{
4 REPEATS, all,
5 any::{self, Any},
6 collect::{self},
7 generate::{Generate, State},
8 prelude::collect,
9 primitive::char,
10 shrink::Shrink,
11};
12use core::{fmt, ops::RangeInclusive};
13use regex_syntax::{
14 Parser,
15 hir::{Capture, Class, ClassBytesRange, ClassUnicodeRange, Hir, HirKind, Repetition},
16};
17
18#[derive(Debug, Clone)]
19pub enum Regex {
20 Empty,
21 Text(String),
22 Range(RangeInclusive<char>),
23 Collect(collect::Collect<Box<Regex>, RangeInclusive<usize>, String>),
24 Any(any::Any<Box<[Regex]>>),
25 All(Box<[Regex]>),
26}
27
28#[derive(Debug, Clone)]
29pub enum Shrinker {
30 Empty,
31 Text(String),
32 Range(char::Shrinker),
33 All(all::Shrinker<Box<[Shrinker]>>),
34 Collect(collect::Shrinker<Shrinker, String>),
35}
36
37#[derive(Clone)]
38pub struct Error(Box<regex_syntax::Error>);
39
40impl fmt::Debug for Error {
41 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
42 f.debug_tuple("Error").field(&self.0).finish()
43 }
44}
45
46impl Regex {
47 pub(crate) fn new(pattern: &str, repeats: Option<u32>) -> Result<Self, Error> {
48 let hir = Parser::new().parse(pattern)?;
49 Ok(Regex::from_hir(hir, repeats.unwrap_or(REPEATS)))
50 }
51}
52
53impl From<regex_syntax::Error> for Error {
54 fn from(value: regex_syntax::Error) -> Self {
55 Error(Box::new(value))
56 }
57}
58
59impl From<&ClassUnicodeRange> for Regex {
60 fn from(value: &ClassUnicodeRange) -> Self {
61 Regex::Range(value.start()..=value.end())
62 }
63}
64
65impl From<&ClassBytesRange> for Regex {
66 fn from(value: &ClassBytesRange) -> Self {
67 Regex::Range(value.start() as char..=value.end() as char)
68 }
69}
70
71impl Regex {
72 const fn is_empty(&self) -> bool {
73 matches!(self, Self::Empty)
74 }
75
76 fn from_iter(
77 trees: impl IntoIterator<Item = Regex>,
78 merge: impl FnOnce(Box<[Regex]>) -> Regex,
79 ) -> Regex {
80 let mut buffer = Vec::new();
81 let mut last = None;
82 for tree in trees {
83 if !tree.is_empty() {
84 buffer.extend(last.replace(tree));
85 }
86 }
87 match last {
88 Some(tree) if buffer.is_empty() => tree,
89 Some(tree) => {
90 buffer.push(tree);
91 merge(buffer.into_boxed_slice())
92 }
93 None => Self::Empty,
94 }
95 }
96
97 fn from_hir(hir: Hir, repeats: u32) -> Self {
98 match hir.into_kind() {
99 HirKind::Empty | HirKind::Look(_) => Self::Empty,
100 HirKind::Literal(literal) => {
101 String::from_utf8(literal.0.to_vec()).map_or(Self::Empty, Self::Text)
102 }
103 HirKind::Capture(Capture { sub, .. }) => Self::from_hir(*sub, repeats),
104 HirKind::Repetition(Repetition { min, max, sub, .. }) => {
105 let tree = Self::from_hir(*sub, repeats / 2);
106 if tree.is_empty() {
107 return Self::Empty;
108 }
109 let low = min;
110 let high = max.unwrap_or(repeats.max(low));
111 if low == 1 && high == 1 {
112 return tree;
113 }
114 Self::Collect(collect(
115 Box::new(tree),
116 low as usize..=high as usize,
117 Some(low as _),
118 ))
119 }
120 HirKind::Class(Class::Unicode(class)) => {
121 Self::from_iter(class.ranges().iter().map(Self::from), |trees| {
122 Self::Any(Any(trees))
123 })
124 }
125 HirKind::Class(Class::Bytes(class)) => {
126 Self::from_iter(class.ranges().iter().map(Self::from), |trees| {
127 Self::Any(Any(trees))
128 })
129 }
130 HirKind::Concat(hirs) => Self::from_iter(
131 hirs.into_iter().map(|hir| Self::from_hir(hir, repeats)),
132 Self::All,
133 ),
134 HirKind::Alternation(hirs) => Self::from_iter(
135 hirs.into_iter().map(|hir| Self::from_hir(hir, repeats)),
136 |trees| Self::Any(Any(trees)),
137 ),
138 }
139 }
140}
141
142impl Generate for Regex {
143 type Item = String;
144 type Shrink = Shrinker;
145
146 fn generate(&self, state: &mut State) -> Self::Shrink {
147 match self {
148 Regex::Empty => Shrinker::Empty,
149 Regex::Text(text) => Shrinker::Text(text.clone()),
150 Regex::Range(range) => Shrinker::Range(range.generate(state)),
151 Regex::Collect(collect) => Shrinker::Collect(collect.generate(state)),
152 Regex::Any(any) => any.generate(state).0.unwrap_or(Shrinker::Empty),
153 Regex::All(all) => Shrinker::All(all.generate(state)),
154 }
155 }
156
157 fn constant(&self) -> bool {
158 match self {
159 Regex::Empty | Regex::Text(_) => true,
160 Regex::Range(range) => range.constant(),
161 Regex::Collect(collect) => collect.constant(),
162 Regex::Any(any) => any.constant(),
163 Regex::All(all) => all.constant(),
164 }
165 }
166}
167
168impl Shrink for Shrinker {
169 type Item = String;
170
171 fn item(&self) -> Self::Item {
172 fn descend(shrinker: &Shrinker, buffer: &mut String) {
173 match shrinker {
174 Shrinker::Empty => {}
175 Shrinker::Text(text) => buffer.push_str(text),
176 Shrinker::Range(shrinker) => buffer.push(shrinker.item()),
177 Shrinker::All(shrinker) => {
178 for shrinker in shrinker.shrinkers.iter() {
179 descend(shrinker, buffer);
180 }
181 }
182 Shrinker::Collect(shrinker) => {
183 for shrinker in shrinker.shrinkers.iter() {
184 descend(shrinker, buffer);
185 }
186 }
187 }
188 }
189
190 let mut buffer = String::new();
191 descend(self, &mut buffer);
192 buffer
193 }
194
195 fn shrink(&mut self) -> Option<Self> {
196 match self {
197 Self::Empty | Self::Text(_) => None,
198 Self::Range(shrinker) => Some(Self::Range(shrinker.shrink()?)),
199 Self::All(shrinker) => Some(Self::All(shrinker.shrink()?)),
200 Self::Collect(shrinker) => Some(Self::Collect(shrinker.shrink()?)),
201 }
202 }
203}