1#![deny(missing_docs)]
5
6pub(crate) mod accel;
7pub(crate) mod engine;
8pub(crate) mod simd;
9
10#[doc(hidden)]
11pub use engine::calc_potential_start;
12#[doc(hidden)]
13pub use engine::calc_prefix_sets;
14#[doc(hidden)]
15pub use resharp_algebra::solver::TSetId;
16
17pub use resharp_algebra::nulls::Nullability;
18pub use resharp_algebra::NodeId;
19pub use resharp_algebra::RegexBuilder;
20
21use std::sync::Mutex;
22
23#[derive(Debug)]
25#[non_exhaustive]
26pub enum Error {
27 Parse(resharp_parser::ResharpError),
29 Algebra(resharp_algebra::AlgebraError),
31 CapacityExceeded,
33}
34
35impl std::fmt::Display for Error {
36 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
37 match self {
38 Error::Parse(e) => write!(f, "parse error: {}", e),
39 Error::Algebra(e) => write!(f, "{}", e),
40 Error::CapacityExceeded => write!(f, "DFA state capacity exceeded"),
41 }
42 }
43}
44
45impl std::error::Error for Error {
46 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
47 match self {
48 Error::Parse(e) => Some(e),
49 Error::Algebra(e) => Some(e),
50 Error::CapacityExceeded => None,
51 }
52 }
53}
54
55impl From<resharp_parser::ResharpError> for Error {
56 fn from(e: resharp_parser::ResharpError) -> Self {
57 Error::Parse(e)
58 }
59}
60
61impl From<resharp_algebra::AlgebraError> for Error {
62 fn from(e: resharp_algebra::AlgebraError) -> Self {
63 Error::Algebra(e)
64 }
65}
66
67pub struct EngineOptions {
69 pub dfa_threshold: usize,
71 pub max_dfa_capacity: usize,
73 pub lookahead_context_max: u32,
75}
76
77impl Default for EngineOptions {
78 fn default() -> Self {
79 Self {
80 dfa_threshold: 0,
81 max_dfa_capacity: u16::MAX as usize,
82 lookahead_context_max: 800,
83 }
84 }
85}
86
87#[derive(Debug, Clone, Copy, PartialEq, Eq)]
89#[repr(C)]
90pub struct Match {
91 pub start: usize,
93 pub end: usize,
95}
96
97struct RegexInner {
98 b: RegexBuilder,
99 fwd: engine::LazyDFA,
100 rev: engine::LazyDFA,
101 nulls_buf: Vec<usize>,
102}
103
104pub struct Regex {
108 inner: Mutex<RegexInner>,
109 fwd_prefix: Option<accel::FwdPrefixSearch>,
110 fixed_length: Option<u32>,
111 #[allow(dead_code)]
112 max_length: Option<u32>,
113 empty_nullable: bool,
114}
115
116impl Regex {
117 pub fn new(pattern: &str) -> Result<Regex, Error> {
119 Self::with_options(pattern, EngineOptions::default())
120 }
121
122 pub fn with_options(pattern: &str, opts: EngineOptions) -> Result<Regex, Error> {
124 let mut b = RegexBuilder::new();
125 b.lookahead_context_max = opts.lookahead_context_max;
126 let node = resharp_parser::parse_ast(&mut b, pattern)?;
127 Self::from_node(b, node, opts)
128 }
129
130 pub fn from_node(
132 mut b: RegexBuilder,
133 node: NodeId,
134 opts: EngineOptions,
135 ) -> Result<Regex, Error> {
136 let empty_nullable = b
137 .nullability_emptystring(node)
138 .has(Nullability::EMPTYSTRING);
139
140 let fwd_start = b.strip_lb(node)?;
141 let rev_start = b.reverse(node)?;
142 let ts_rev_start = b.mk_concat(NodeId::TS, rev_start);
143
144 let fixed_length = b.get_fixed_length(node);
145 let (min_len, max_len) = b.get_min_max_length(node);
146 let max_length = if max_len != u32::MAX {
147 Some(max_len)
148 } else {
149 None
150 };
151 let can_match_fwd = !b.is_infinite(node) && !b.contains_look(node);
152
153 let max_cap = opts.max_dfa_capacity.min(u16::MAX as usize);
154 let mut fwd = engine::LazyDFA::new(&mut b, fwd_start, max_cap)?;
155 let mut rev = engine::LazyDFA::new(&mut b, ts_rev_start, max_cap)?;
156
157 if opts.dfa_threshold > 0 {
158 fwd.precompile(&mut b, opts.dfa_threshold);
159 rev.precompile(&mut b, opts.dfa_threshold);
160 }
161
162 let fwd_prefix = if min_len > 0 && can_match_fwd {
163 engine::build_fwd_prefix(&mut b, node)?
164 } else {
165 None
166 };
167
168 rev.compute_skip(&mut b, rev_start)?;
169
170 Ok(Regex {
171 inner: Mutex::new(RegexInner {
172 b,
173 fwd,
174 rev,
175 nulls_buf: Vec::new(),
176 }),
177 fwd_prefix,
178 fixed_length,
179 max_length,
180 empty_nullable,
181 })
182 }
183
184 pub fn dfa_stats(&self) -> (usize, usize) {
186 let inner = self.inner.lock().unwrap();
187 (inner.fwd.state_nodes.len(), inner.rev.state_nodes.len())
188 }
189
190 pub fn find_all(&self, input: &[u8]) -> Result<Vec<Match>, Error> {
192 if input.is_empty() {
193 return if self.empty_nullable {
194 Ok(vec![Match { start: 0, end: 0 }])
195 } else {
196 Ok(vec![])
197 };
198 }
199 if self.fwd_prefix.is_some() {
200 return self.find_all_fwd_prefix(input);
201 }
202 self.find_all_dfa(input)
203 }
204
205 pub fn effects_debug(&self) -> String {
207 let inner = self.inner.lock().unwrap();
208 let rev = &inner.rev;
209 let mut out = String::new();
210 for (i, &eid) in rev.effects_id.iter().enumerate() {
211 if eid != 0 {
212 let nulls: Vec<String> = rev.effects[eid as usize]
213 .iter()
214 .map(|n| format!("(mask={},rel={})", n.mask.0, n.rel))
215 .collect();
216 out += &format!(" state[{}] eid={} nulls=[{}]\n", i, eid, nulls.join(", "));
217 }
218 }
219 out
220 }
221
222 pub fn collect_rev_nulls_debug(&self, input: &[u8]) -> Vec<usize> {
224 let inner = &mut *self.inner.lock().unwrap();
225 inner.nulls_buf.clear();
226 inner
227 .rev
228 .collect_rev(&mut inner.b, input.len() - 1, input, &mut inner.nulls_buf)
229 .unwrap();
230 inner.nulls_buf.clone()
231 }
232
233 fn find_all_dfa(&self, input: &[u8]) -> Result<Vec<Match>, Error> {
234 let inner = &mut *self.inner.lock().unwrap();
235
236 inner.nulls_buf.clear();
237
238 inner
239 .rev
240 .collect_rev(&mut inner.b, input.len() - 1, input, &mut inner.nulls_buf)?;
241
242 let mut matches = Vec::new();
243 if let Some(fl) = self.fixed_length {
244 let fl = fl as usize;
245 let mut last_end = 0;
246 for &start in inner.nulls_buf.iter().rev() {
247 if start >= last_end && start + fl <= input.len() {
248 matches.push(Match {
249 start,
250 end: start + fl,
251 });
252 last_end = start + fl;
253 }
254 }
255 } else {
256 inner
257 .fwd
258 .scan_fwd_all(&mut inner.b, &inner.nulls_buf, input, &mut matches)?;
259 }
260
261 if inner.rev.effects_id[inner.rev.initial as usize] != 0 {
262 matches.push(Match {
263 start: input.len(),
264 end: input.len(),
265 });
266 }
267
268 Ok(matches)
269 }
270
271 fn find_all_fwd_prefix(&self, input: &[u8]) -> Result<Vec<Match>, Error> {
272 let fwd_prefix = self.fwd_prefix.as_ref().unwrap();
273 let mut matches = Vec::new();
274 let mut search_start = 0;
275
276 if self.fixed_length.is_some() && fwd_prefix.find_all_literal(input, &mut matches) {
277 } else if let Some(fl) = self.fixed_length {
279 while let Some(candidate) = fwd_prefix.find_fwd(input, search_start) {
280 let end = candidate + fl as usize;
281 if end <= input.len() {
282 matches.push(Match {
283 start: candidate,
284 end,
285 });
286 search_start = end;
287 } else {
288 break;
289 }
290 }
291 } else {
292 let inner = &mut *self.inner.lock().unwrap();
293 let prefix_len = fwd_prefix.len();
294 while let Some(candidate) = fwd_prefix.find_fwd(input, search_start) {
295 let state = inner
296 .fwd
297 .walk_input(&mut inner.b, candidate, prefix_len, input)?;
298 if state != 0 {
299 let max_end = inner.fwd.scan_fwd_from(
300 &mut inner.b,
301 state,
302 candidate + prefix_len,
303 input,
304 )?;
305 if max_end != engine::NO_MATCH && max_end > candidate {
306 matches.push(Match {
307 start: candidate,
308 end: max_end,
309 });
310 search_start = max_end;
311 continue;
312 }
313 }
314 search_start = candidate + 1;
315 }
316 }
317
318 Ok(matches)
319 }
320
321 pub fn find_anchored(&self, input: &[u8]) -> Result<Option<Match>, Error> {
323 if input.is_empty() {
324 return if self.empty_nullable {
325 Ok(Some(Match { start: 0, end: 0 }))
326 } else {
327 Ok(None)
328 };
329 }
330 let inner = &mut *self.inner.lock().unwrap();
331 let max_end = inner.fwd.scan_fwd(&mut inner.b, 0, input)?;
332 if max_end != engine::NO_MATCH {
333 Ok(Some(Match {
334 start: 0,
335 end: max_end,
336 }))
337 } else {
338 Ok(None)
339 }
340 }
341
342 pub fn is_match(&self, input: &[u8]) -> Result<bool, Error> {
344 if input.is_empty() {
345 return Ok(self.empty_nullable);
346 }
347 let inner = &mut *self.inner.lock().unwrap();
348 if inner.rev.effects_id[inner.rev.initial as usize] != 0 {
349 return Ok(true);
350 }
351 inner
352 .rev
353 .any_nullable_rev(&mut inner.b, input.len() - 1, input)
354 }
355}