1#![warn(missing_docs)]
11#![warn(rust_2018_idioms)]
12
13pub mod dfa;
14pub mod engine;
15pub mod error;
16pub mod hir;
17pub mod literal;
18pub mod nfa;
19pub mod parser;
20pub mod vm;
21
22#[cfg(feature = "jit")]
23pub mod jit;
24
25#[cfg(feature = "simd")]
26pub mod simd;
27
28pub use error::{Error, Result};
29
30use engine::CompiledRegex;
31use std::collections::HashMap;
32use std::sync::Arc;
33
34#[derive(Debug, Clone, Default)]
36pub struct RegexBuilder {
37 pattern: String,
38 jit: bool,
40 optimize_prefixes: bool,
43}
44
45impl RegexBuilder {
46 pub fn new(pattern: &str) -> Self {
48 Self {
49 pattern: pattern.to_string(),
50 jit: false,
51 optimize_prefixes: false,
52 }
53 }
54
55 pub fn jit(mut self, enabled: bool) -> Self {
76 self.jit = enabled;
77 self
78 }
79
80 pub fn optimize_prefixes(mut self, enabled: bool) -> Self {
101 self.optimize_prefixes = enabled;
102 self
103 }
104
105 pub fn build(self) -> Result<Regex> {
107 let ast = parser::parse(&self.pattern)?;
108 let mut hir_result = hir::translate(&ast)?;
109
110 if self.optimize_prefixes {
112 hir_result = hir::optimize_prefixes(hir_result);
113 }
114
115 let named_groups = Arc::new(hir_result.props.named_groups.clone());
116
117 let inner = if self.jit {
118 engine::compile_with_jit(&hir_result)?
119 } else {
120 engine::compile_from_hir(&hir_result)?
122 };
123
124 Ok(Regex {
125 inner,
126 pattern: self.pattern,
127 named_groups,
128 })
129 }
130}
131
132#[derive(Debug)]
134pub struct Regex {
135 inner: CompiledRegex,
136 pattern: String,
137 named_groups: Arc<HashMap<String, u32>>,
139}
140
141impl Regex {
142 pub fn new(pattern: &str) -> Result<Regex> {
147 let ast = parser::parse(pattern)?;
148 let hir = hir::translate(&ast)?;
149 let named_groups = Arc::new(hir.props.named_groups.clone());
150 let inner = engine::compile_from_hir(&hir)?;
152
153 Ok(Regex {
154 inner,
155 pattern: pattern.to_string(),
156 named_groups,
157 })
158 }
159
160 pub fn capture_names(&self) -> impl Iterator<Item = &str> {
162 self.named_groups.keys().map(|s| s.as_str())
163 }
164
165 pub fn as_str(&self) -> &str {
167 &self.pattern
168 }
169
170 pub fn is_match(&self, text: &str) -> bool {
172 self.inner.is_match(text.as_bytes())
173 }
174
175 pub fn find<'t>(&self, text: &'t str) -> Option<Match<'t>> {
177 self.inner
178 .find(text.as_bytes())
179 .map(|(start, end)| Match { text, start, end })
180 }
181
182 pub fn find_iter<'a>(&'a self, text: &'a str) -> Matches<'a> {
184 Matches::new(self, text)
185 }
186
187 pub fn captures<'t>(&self, text: &'t str) -> Option<Captures<'t>> {
189 self.inner.captures(text.as_bytes()).map(|slots| Captures {
190 text,
191 slots,
192 named_groups: Arc::clone(&self.named_groups),
193 })
194 }
195
196 pub fn captures_iter<'r, 't>(&'r self, text: &'t str) -> CapturesIter<'r, 't> {
198 CapturesIter {
199 regex: self,
200 text,
201 last_end: 0,
202 }
203 }
204
205 pub fn replace<'t>(&self, text: &'t str, rep: &str) -> std::borrow::Cow<'t, str> {
207 match self.find(text) {
208 None => std::borrow::Cow::Borrowed(text),
209 Some(m) => {
210 let mut result = String::with_capacity(text.len() + rep.len());
211 result.push_str(&text[..m.start()]);
212 result.push_str(rep);
213 result.push_str(&text[m.end()..]);
214 std::borrow::Cow::Owned(result)
215 }
216 }
217 }
218
219 pub fn engine_name(&self) -> &'static str {
221 self.inner.engine_name()
222 }
223
224 pub fn replace_all<'t>(&self, text: &'t str, rep: &str) -> std::borrow::Cow<'t, str> {
226 let mut last_end = 0;
227 let mut result = String::new();
228 let mut had_match = false;
229
230 for m in self.find_iter(text) {
231 had_match = true;
232 result.push_str(&text[last_end..m.start()]);
233 result.push_str(rep);
234 last_end = m.end();
235 }
236
237 if !had_match {
238 std::borrow::Cow::Borrowed(text)
239 } else {
240 result.push_str(&text[last_end..]);
241 std::borrow::Cow::Owned(result)
242 }
243 }
244}
245
246#[derive(Debug, Clone, Copy)]
248pub struct Match<'t> {
249 text: &'t str,
250 start: usize,
251 end: usize,
252}
253
254impl<'t> Match<'t> {
255 pub fn start(&self) -> usize {
257 self.start
258 }
259
260 pub fn end(&self) -> usize {
262 self.end
263 }
264
265 pub fn as_str(&self) -> &'t str {
267 &self.text[self.start..self.end]
268 }
269
270 pub fn range(&self) -> std::ops::Range<usize> {
272 self.start..self.end
273 }
274
275 pub fn len(&self) -> usize {
277 self.end - self.start
278 }
279
280 pub fn is_empty(&self) -> bool {
282 self.start == self.end
283 }
284}
285
286pub struct Matches<'a> {
288 inner: MatchesInner<'a>,
289 text: &'a str,
290}
291
292impl<'a> std::fmt::Debug for Matches<'a> {
293 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
294 f.debug_struct("Matches")
295 .field("text_len", &self.text.len())
296 .finish_non_exhaustive()
297 }
298}
299
300enum MatchesInner<'a> {
302 TeddyFull(literal::FullMatchIter<'a, 'a>),
304 Generic { regex: &'a Regex, last_end: usize },
306}
307
308impl<'a> Matches<'a> {
309 fn new(regex: &'a Regex, text: &'a str) -> Self {
311 let inner = if regex.inner.is_full_match_prefilter() {
312 MatchesInner::TeddyFull(regex.inner.find_full_matches(text.as_bytes()))
314 } else {
315 MatchesInner::Generic { regex, last_end: 0 }
317 };
318 Matches { inner, text }
319 }
320}
321
322impl<'a> Iterator for Matches<'a> {
323 type Item = Match<'a>;
324
325 fn next(&mut self) -> Option<Match<'a>> {
326 match &mut self.inner {
327 MatchesInner::TeddyFull(iter) => {
328 iter.next().map(|(start, end)| Match {
330 text: self.text,
331 start,
332 end,
333 })
334 }
335 MatchesInner::Generic { regex, last_end } => {
336 if *last_end > self.text.len() {
337 return None;
338 }
339
340 let search_text = &self.text[*last_end..];
341 match regex.inner.find(search_text.as_bytes()) {
342 None => None,
343 Some((start, end)) => {
344 let abs_start = *last_end + start;
345 let abs_end = *last_end + end;
346
347 *last_end = if abs_start == abs_end {
350 let remaining = &self.text[abs_end..];
352 let next_char_len =
353 remaining.chars().next().map(|c| c.len_utf8()).unwrap_or(1);
354 abs_end + next_char_len
355 } else {
356 abs_end
357 };
358
359 Some(Match {
360 text: self.text,
361 start: abs_start,
362 end: abs_end,
363 })
364 }
365 }
366 }
367 }
368 }
369}
370
371#[derive(Debug)]
373pub struct CapturesIter<'r, 't> {
374 regex: &'r Regex,
375 text: &'t str,
376 last_end: usize,
377}
378
379impl<'r, 't> Iterator for CapturesIter<'r, 't> {
380 type Item = Captures<'t>;
381
382 fn next(&mut self) -> Option<Captures<'t>> {
383 if self.last_end > self.text.len() {
384 return None;
385 }
386
387 let search_text = &self.text[self.last_end..];
388 match self.regex.inner.captures(search_text.as_bytes()) {
389 None => None,
390 Some(slots) => {
391 let (start, end) = slots.first().and_then(|s| *s)?;
393 let offset = self.last_end;
394
395 let abs_end = offset + end;
398 self.last_end = if start == end {
399 let remaining = &self.text[abs_end..];
400 let next_char_len = remaining.chars().next().map(|c| c.len_utf8()).unwrap_or(1);
401 abs_end + next_char_len
402 } else {
403 abs_end
404 };
405
406 let adjusted_slots: Vec<_> = slots
408 .into_iter()
409 .map(|slot| slot.map(|(s, e)| (offset + s, offset + e)))
410 .collect();
411
412 Some(Captures {
413 text: self.text,
414 slots: adjusted_slots,
415 named_groups: Arc::clone(&self.regex.named_groups),
416 })
417 }
418 }
419 }
420}
421
422#[derive(Debug, Clone)]
424pub struct Captures<'t> {
425 text: &'t str,
426 slots: Vec<Option<(usize, usize)>>,
427 named_groups: Arc<HashMap<String, u32>>,
428}
429
430impl<'t> Captures<'t> {
431 pub fn len(&self) -> usize {
433 self.slots.len()
434 }
435
436 pub fn is_empty(&self) -> bool {
438 self.slots.is_empty()
439 }
440
441 pub fn get(&self, i: usize) -> Option<Match<'t>> {
443 self.slots.get(i).and_then(|slot| {
444 slot.map(|(start, end)| Match {
445 text: self.text,
446 start,
447 end,
448 })
449 })
450 }
451
452 pub fn name(&self, name: &str) -> Option<Match<'t>> {
454 self.named_groups
455 .get(name)
456 .and_then(|&idx| self.get(idx as usize))
457 }
458}
459
460impl<'t> std::ops::Index<usize> for Captures<'t> {
461 type Output = str;
462
463 fn index(&self, i: usize) -> &str {
464 self.get(i)
465 .map(|m| m.as_str())
466 .unwrap_or_else(|| panic!("no capture group at index {}", i))
467 }
468}
469
470impl<'t> std::ops::Index<&str> for Captures<'t> {
471 type Output = str;
472
473 fn index(&self, name: &str) -> &str {
474 self.name(name)
475 .map(|m| m.as_str())
476 .unwrap_or_else(|| panic!("no capture group named '{}'", name))
477 }
478}