1#![deny(missing_docs)]
39
40pub(crate) mod accel;
41pub(crate) mod engine;
42pub(crate) mod prefix;
43pub(crate) mod simd;
44
45#[cfg(feature = "diag")]
46pub use engine::BDFA;
47#[cfg(feature = "diag")]
48pub use prefix::calc_potential_start;
49#[cfg(feature = "diag")]
50pub use prefix::calc_potential_start_prune;
51#[cfg(feature = "diag")]
52pub use prefix::calc_prefix_sets;
53#[cfg(feature = "diag")]
54pub use prefix::PrefixSets;
55#[cfg(feature = "diag")]
56pub use resharp_algebra::solver::TSetId;
57use resharp_algebra::Kind;
58
59const PREFIX_NONE: u8 = 0;
61const PREFIX_SEARCH: u8 = 1;
62const PREFIX_LITERAL: u8 = 2;
63
64pub use resharp_algebra::nulls::Nullability;
65pub use resharp_algebra::NodeId;
66pub use resharp_algebra::RegexBuilder;
67pub use resharp_parser::escape;
74pub use resharp_parser::escape_into;
76
77use std::sync::Mutex;
78
79#[derive(Debug)]
81#[non_exhaustive]
82pub enum Error {
83 Parse(Box<resharp_parser::ResharpError>),
85 Algebra(resharp_algebra::AlgebraError),
87 CapacityExceeded,
89 Serialize(String),
91}
92
93impl std::fmt::Display for Error {
94 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
95 match self {
96 Error::Parse(e) => write!(f, "parse error: {}", e),
97 Error::Algebra(e) => write!(f, "{}", e),
98 Error::CapacityExceeded => write!(f, "DFA state capacity exceeded"),
99 Error::Serialize(ref s) => write!(f, "serialization error: {}", s),
100 }
101 }
102}
103
104impl std::error::Error for Error {
105 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
106 match self {
107 Error::Parse(e) => Some(e),
108 Error::Algebra(e) => Some(e),
109 Error::CapacityExceeded => None,
110 Error::Serialize(_) => None,
111 }
112 }
113}
114
115impl From<resharp_parser::ResharpError> for Error {
116 fn from(e: resharp_parser::ResharpError) -> Self {
117 Error::Parse(Box::new(e))
118 }
119}
120
121impl From<resharp_algebra::AlgebraError> for Error {
122 fn from(e: resharp_algebra::AlgebraError) -> Self {
123 Error::Algebra(e)
124 }
125}
126
127#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)]
142pub enum UnicodeMode {
143 Ascii,
145 #[default]
148 Unicode,
149 Full,
152}
153
154pub struct EngineOptions {
156 pub dfa_threshold: usize,
158 pub max_dfa_capacity: usize,
160 pub lookahead_context_max: u32,
162 pub unicode: UnicodeMode,
164 pub case_insensitive: bool,
166 pub dot_matches_new_line: bool,
168 pub ignore_whitespace: bool,
170 pub hardened: bool,
173}
174
175impl Default for EngineOptions {
176 fn default() -> Self {
177 Self {
178 dfa_threshold: 0,
179 max_dfa_capacity: u16::MAX as usize,
180 lookahead_context_max: 800,
181 unicode: UnicodeMode::Unicode,
182 case_insensitive: false,
183 dot_matches_new_line: false,
184 ignore_whitespace: false,
185 hardened: false,
186 }
187 }
188}
189
190impl EngineOptions {
191 pub fn unicode(mut self, mode: UnicodeMode) -> Self {
193 self.unicode = mode;
194 self
195 }
196 pub fn case_insensitive(mut self, yes: bool) -> Self {
198 self.case_insensitive = yes;
199 self
200 }
201 pub fn dot_matches_new_line(mut self, yes: bool) -> Self {
203 self.dot_matches_new_line = yes;
204 self
205 }
206 pub fn ignore_whitespace(mut self, yes: bool) -> Self {
208 self.ignore_whitespace = yes;
209 self
210 }
211 pub fn hardened(mut self, yes: bool) -> Self {
213 self.hardened = yes;
214 self
215 }
216}
217
218#[derive(Debug, Clone, Copy, PartialEq, Eq)]
220#[repr(C)]
221pub struct Match {
222 pub start: usize,
224 pub end: usize,
226}
227
228pub(crate) struct RegexInner {
229 pub(crate) b: RegexBuilder,
230 pub(crate) fwd: engine::LDFA,
231 pub(crate) rev_ts: engine::LDFA,
232 pub(crate) rev_bare: Option<engine::LDFA>,
233 pub(crate) nulls: Vec<usize>,
234 pub(crate) matches: Vec<Match>,
235 pub(crate) bounded: Option<engine::BDFA>,
236}
237
238pub struct Regex {
241 pub(crate) inner: Mutex<RegexInner>,
242 pub(crate) prefix: Option<prefix::PrefixKind>,
243 pub(crate) fixed_length: Option<u32>,
244 pub(crate) max_length: Option<u32>,
245 pub(crate) empty_nullable: bool,
246 pub(crate) fwd_end_nullable: bool,
247 pub(crate) hardened: bool,
248 pub(crate) has_bounded_prefix: bool,
249 pub(crate) has_bounded: bool,
250 pub(crate) lb_check_bytes: u8,
253 pub(crate) fwd_lb_begin_nullable: bool,
256}
257
258#[inline(never)]
259fn bdfa_inner<const PREFIX: u8>(
260 table: *const u32,
261 ml: *const u8,
262 data: *const u8,
263 mt_log: u32,
264 initial: u16,
265 match_end_off: *const u32,
266 mut state: u16,
267 mut pos: usize,
268 len: usize,
269 match_buf: *mut Match,
270 match_cap: usize,
271) -> (u16, usize, usize) {
272 let mut mc: usize = 0;
273 unsafe {
274 while pos < len {
275 if PREFIX != PREFIX_NONE && state == initial {
276 return (state, pos, mc);
277 }
278 let mt = *ml.add(*data.add(pos) as usize) as usize;
279 let delta = (state as usize) << mt_log | mt;
280 let entry = *table.add(delta);
281 if entry == 0 {
282 return (state, pos, mc);
283 }
284 let rel = entry >> 16;
285 state = (entry & 0xFFFF) as u16;
286 if rel > 0 {
287 if mc >= match_cap {
288 return (state, pos, mc);
289 }
290 let end_off = *match_end_off.add(state as usize);
291 *match_buf.add(mc) = Match {
292 start: pos + 1 - rel as usize,
293 end: pos + 1 - end_off as usize,
294 };
295 mc += 1;
296 state = initial;
297 continue;
298 }
299 pos += 1;
300 }
301 (state, pos, mc)
302 }
303}
304
305impl Regex {
306 pub fn new(pattern: &str) -> Result<Regex, Error> {
312 Self::with_options(pattern, EngineOptions::default())
313 }
314
315 pub fn with_options(pattern: &str, opts: EngineOptions) -> Result<Regex, Error> {
327 let mut b = RegexBuilder::new();
328 b.lookahead_context_max = opts.lookahead_context_max;
329 let pflags = resharp_parser::PatternFlags {
330 unicode: opts.unicode != UnicodeMode::Ascii,
331 full_unicode: opts.unicode == UnicodeMode::Full,
332 case_insensitive: opts.case_insensitive,
333 dot_matches_new_line: opts.dot_matches_new_line,
334 ignore_whitespace: opts.ignore_whitespace,
335 };
336 let node = resharp_parser::parse_ast_with(&mut b, pattern, &pflags)?;
337 Self::from_node_inner(b, node, opts, pattern.len())
338 }
339
340 pub fn from_node(b: RegexBuilder, node: NodeId, opts: EngineOptions) -> Result<Regex, Error> {
342 Self::from_node_inner(b, node, opts, 0)
343 }
344
345 fn from_node_inner(
346 mut b: RegexBuilder,
347 node: NodeId,
348 opts: EngineOptions,
349 pattern_len: usize,
350 ) -> Result<Regex, Error> {
351 let empty_nullable = b
352 .nullability_emptystring(node)
353 .has(Nullability::EMPTYSTRING);
354
355 let fwd_start = b.strip_lb(node)?;
356 let fwd_end_nullable = b.nullability(fwd_start).has(Nullability::END);
357 let rev_start = b.reverse(node)?;
358 let rev_start = b.normalize_rev(rev_start)?;
359 let ts_rev_start =
360 if b.get_kind(rev_start) == Kind::Concat && rev_start.left(&b) == NodeId::BEGIN {
361 rev_start
362 } else {
363 b.mk_concat(NodeId::TS, rev_start)
364 };
365
366 let fixed_length = b.get_fixed_length(node);
367 let (min_len, max_len) = b.get_min_max_length(node);
368 let max_length = if max_len != u32::MAX {
369 Some(max_len)
370 } else {
371 None
372 };
373 let has_look = b.contains_look(node);
374
375 let max_cap = opts.max_dfa_capacity.min(u16::MAX as usize);
376
377 let (selected, rev_skip) =
378 prefix::select_prefix(&mut b, node, rev_start, has_look, min_len)?;
379
380 let has_fwd_prefix = matches!(
381 selected,
382 Some(
383 prefix::PrefixKind::AnchoredFwd(_)
384 | prefix::PrefixKind::UnanchoredFwd(_)
385 | prefix::PrefixKind::AnchoredFwdLb(_)
386 )
387 );
388 let fwd_prefix_stripped = matches!(selected, Some(prefix::PrefixKind::UnanchoredFwd(_)));
389
390 let mut fwd = engine::LDFA::new(&mut b, fwd_start, max_cap)?;
391 let mut rev = engine::LDFA::new(&mut b, ts_rev_start, max_cap)?;
392 rev.prefix_skip = rev_skip;
393
394 let (fwd_lb_begin_nullable, lb_check_bytes) =
395 if matches!(selected, Some(prefix::PrefixKind::AnchoredFwdLb(_))) {
396 let lb = node.left(&b);
397 let lb_inner = b.get_lookbehind_inner(lb);
398 let lb_nonbegin = b.nonbegins(lb_inner);
399 let lb_stripped = b.strip_prefix_safe(lb_nonbegin);
400 let (_, lb_max) = b.get_min_max_length(lb_stripped);
401 let begin_nullable = b.nullability(lb_inner).has(Nullability::BEGIN);
402 (begin_nullable, lb_max.min(4) as u8)
403 } else {
404 (false, 0)
405 };
406
407 if opts.dfa_threshold > 0 {
408 fwd.precompile(&mut b, opts.dfa_threshold);
409 if !has_fwd_prefix {
410 rev.precompile(&mut b, opts.dfa_threshold);
411 }
412 }
413
414 let rev_bare = if fwd_prefix_stripped {
415 Some(engine::LDFA::new(&mut b, rev_start, max_cap)?)
416 } else {
417 None
418 };
419 if fwd_prefix_stripped {
420 fwd.compute_fwd_skip(&mut b);
421 } else if !opts.hardened && pattern_len <= 150 {
422 fwd.compute_fwd_skip_inner(&mut b, 10);
423 }
424
425 let use_bounded = !has_fwd_prefix
426 && max_length.is_some()
427 && max_len <= 100
428 && fixed_length.is_none()
429 && !has_look
430 && !b.contains_anchors(node)
431 && pattern_len <= 150;
432
433 if cfg!(feature = "debug-nulls") {
434 eprintln!(
435 " [bounded-check] max_length={:?} fixed_length={:?} has_look={} anchors={} fwd_prefix={} -> use={}",
436 max_length, fixed_length, has_look, b.contains_anchors(node), selected.is_some(), use_bounded
437 );
438 }
439
440 let bounded = if use_bounded {
441 Some(engine::BDFA::new(&mut b, fwd_start)?)
442 } else {
443 None
444 };
445
446 let has_bounded = bounded.is_some();
447 let has_bounded_prefix = bounded
448 .as_ref()
449 .is_some_and(|bd: &crate::engine::BDFA| bd.prefix.is_some());
450
451 let hardened = if opts.hardened && !has_bounded && fixed_length.is_none() && max_cap >= 64 {
452 fwd.has_nonnullable_cycle(&mut b, 256)
453 } else {
454 false
455 };
456
457 Ok(Regex {
458 inner: Mutex::new(RegexInner {
459 b,
460 fwd,
461 rev_ts: rev,
462 rev_bare,
463 nulls: Vec::new(),
464 matches: Vec::new(),
465 bounded,
466 }),
467 prefix: selected,
468 fixed_length,
469 max_length,
470 empty_nullable,
471 fwd_end_nullable,
472 hardened,
473 has_bounded_prefix,
474 has_bounded,
475 lb_check_bytes,
476 fwd_lb_begin_nullable,
477 })
478 }
479
480 #[cfg(feature = "diag")]
481 #[allow(missing_docs)]
482 pub fn node_count(&self) -> u32 {
483 self.inner.lock().unwrap().b.num_nodes()
484 }
485
486 #[cfg(feature = "diag")]
487 #[allow(missing_docs)]
488 pub fn dfa_stats(&self) -> (usize, usize) {
489 let inner = self.inner.lock().unwrap();
490 (inner.fwd.state_nodes.len(), inner.rev_ts.state_nodes.len())
491 }
492
493 pub fn is_hardened(&self) -> bool {
495 self.hardened
496 }
497
498 #[cfg(feature = "diag")]
499 #[allow(missing_docs)]
500 pub fn bdfa_stats(&self) -> Option<(usize, usize, usize)> {
501 let inner = self.inner.lock().unwrap();
502 inner
503 .bounded
504 .as_ref()
505 .map(|b| (b.states.len(), 1usize << b.mt_log, b.prefix_len))
506 }
507
508 #[cfg(feature = "diag")]
509 #[allow(missing_docs)]
510 pub fn dump_fwd_dfa(&self) {
511 let inner = self.inner.lock().unwrap();
512 let fwd = &inner.fwd;
513 let stride = 1usize << fwd.mt_log;
514 eprintln!(
515 "fwd: minterms={} states={}",
516 fwd.mt_num,
517 fwd.state_nodes.len()
518 );
519 eprintln!(
520 " mt['A']={} mt['a']={} mt['\\n']={} mt[0]={}",
521 fwd.mt_lookup[b'A' as usize],
522 fwd.mt_lookup[b'a' as usize],
523 fwd.mt_lookup[b'\n' as usize],
524 fwd.mt_lookup[0]
525 );
526 for sid in 2..fwd.state_nodes.len() {
527 let base = sid * stride;
528 if base + stride > fwd.center_table.len() {
529 continue;
530 }
531 let row: Vec<u16> = (0..fwd.mt_num as usize)
532 .map(|mt| fwd.center_table[base + mt])
533 .collect();
534 let nullable = sid < fwd.effects_id.len() && fwd.effects_id[sid] != 0;
535 let node = inner.b.pp(fwd.state_nodes[sid]);
536 let skip = fwd.skip_ids.get(sid).copied().unwrap_or(0);
537 eprintln!(
538 " s{}: null={} skip={} node={} tr={:?}",
539 sid, nullable, skip, node, row
540 );
541 }
542 }
543
544 #[cfg(feature = "diag")]
545 #[allow(missing_docs)]
546 pub fn prefix_kind_name(&self) -> Option<&'static str> {
547 match &self.prefix {
548 None => None,
549 Some(prefix::PrefixKind::AnchoredFwd(_)) => Some("AnchoredFwd"),
550 Some(prefix::PrefixKind::UnanchoredFwd(_)) => Some("UnanchoredFwd"),
551 Some(prefix::PrefixKind::AnchoredFwdLb(_)) => Some("AnchoredFwdLb"),
552 Some(prefix::PrefixKind::AnchoredRev) => Some("AnchoredRev"),
553 Some(prefix::PrefixKind::PotentialStart) => Some("PotentialStart"),
554 }
555 }
556
557 #[cfg(feature = "diag")]
558 #[allow(missing_docs)]
559 pub fn max_length(&self) -> Option<u32> {
560 self.max_length
561 }
562
563 #[cfg(feature = "diag")]
564 #[allow(missing_docs)]
565 pub fn fwd_prefix_kind(&self) -> Option<(&'static str, usize)> {
566 match &self.prefix {
567 Some(prefix::PrefixKind::AnchoredFwd(fp))
568 | Some(prefix::PrefixKind::UnanchoredFwd(fp))
569 | Some(prefix::PrefixKind::AnchoredFwdLb(fp)) => Some((fp.variant_name(), fp.len())),
570 _ => None,
571 }
572 }
573
574 #[cfg(feature = "diag")]
575 #[allow(missing_docs)]
576 pub fn has_accel(&self) -> (bool, bool) {
577 let inner = self.inner.lock().unwrap();
578 let fwd = self.prefix.as_ref().is_some_and(|p| p.is_fwd());
579 let rev = self.prefix.as_ref().is_some_and(|p| p.is_rev())
580 || inner.rev_ts.prefix_skip.is_some()
581 || inner.rev_ts.can_skip();
582 (fwd, rev)
583 }
584
585 pub fn find_all(&self, input: &[u8]) -> Result<Vec<Match>, Error> {
594 #[cfg(all(feature = "debug-nulls", debug_assertions))]
595 {
596 }
598
599 if input.is_empty() {
600 return if self.empty_nullable {
601 Ok(vec![Match { start: 0, end: 0 }])
602 } else {
603 Ok(vec![])
604 };
605 }
606 if self.hardened {
607 if self.has_bounded_prefix || self.has_bounded {
608 return self.find_all_fwd_bounded(input);
609 }
610 return self.find_all_dfa(input);
611 }
612 if self.has_bounded_prefix {
613 return self.find_all_fwd_bounded(input);
614 }
615 match &self.prefix {
616 Some(prefix::PrefixKind::UnanchoredFwd(_)) => {
617 return self.find_all_fwd_prefix_stripped(input);
618 }
619 Some(prefix::PrefixKind::AnchoredFwd(_)) => {
620 return self.find_all_fwd_prefix(input);
621 }
622 Some(prefix::PrefixKind::AnchoredFwdLb(_)) => {
623 return self.find_all_fwd_lb_prefix(input);
624 }
625 _ => {}
626 }
627 if self.has_bounded {
628 return self.find_all_fwd_bounded(input);
629 }
630 self.find_all_dfa(input)
631 }
632
633 #[cfg(feature = "diag")]
634 #[allow(missing_docs)]
635 pub fn effects_debug(&self) -> String {
636 let inner = self.inner.lock().unwrap();
637 let rev = &inner.rev_ts;
638 let mut out = String::new();
639 for (i, &eid) in rev.effects_id.iter().enumerate() {
640 if eid != 0 {
641 let nulls: Vec<String> = rev.effects[eid as usize]
642 .iter()
643 .map(|n| format!("(mask={},rel={})", n.mask.0, n.rel))
644 .collect();
645 out += &format!(" state[{}] eid={} nulls=[{}]\n", i, eid, nulls.join(", "));
646 }
647 }
648 out
649 }
650
651 #[cfg(feature = "diag")]
652 #[allow(missing_docs)]
653 pub fn collect_rev_nulls_debug(&self, input: &[u8]) -> Vec<usize> {
654 let inner = &mut *self.inner.lock().unwrap();
655 inner.nulls.clear();
656 inner
657 .rev_ts
658 .collect_rev(&mut inner.b, input.len() - 1, input, &mut inner.nulls)
659 .unwrap();
660 inner.nulls.clone()
661 }
662
663 fn find_all_dfa(&self, input: &[u8]) -> Result<Vec<Match>, Error> {
664 if self.fwd_end_nullable {
665 self.find_all_dfa_inner::<true>(input)
666 } else {
667 self.find_all_dfa_inner::<false>(input)
668 }
669 }
670
671 fn find_all_dfa_inner<const FWD_NULL: bool>(&self, input: &[u8]) -> Result<Vec<Match>, Error> {
672 let inner = &mut *self.inner.lock().unwrap();
673 #[cfg(feature = "debug-nulls")]
674 {
675 eprintln!("find_all_dfa_inner:");
676 eprintln!(
677 "rev0: {}",
678 inner
679 .b
680 .pp(inner.rev.state_nodes[engine::DFA_INITIAL as usize])
681 );
682 }
683
684 let rev_initial_nullable = inner.rev_ts.effects_id[engine::DFA_INITIAL as usize] != 0;
685 if rev_initial_nullable {
686 inner.matches.clear();
687 Self::find_all_nullable_slow(&mut inner.fwd, &mut inner.b, input, &mut inner.matches)?;
688 return Ok(inner.matches.clone());
689 }
690
691 inner.nulls.clear();
692
693 if rev_initial_nullable {
694 inner.nulls.push(input.len());
695 }
696 inner
697 .rev_ts
698 .collect_rev(&mut inner.b, input.len() - 1, input, &mut inner.nulls)?;
699
700 inner.matches.clear();
704 let matches = &mut inner.matches;
705 #[cfg(feature = "debug-nulls")]
706 {
707 eprintln!("nulls_buf={:?}", inner.nulls);
708 }
709
710 if let Some(fl) = self.fixed_length {
711 let fl = fl as usize;
712 let mut last_end = 0;
713 for &start in inner.nulls.iter().rev() {
714 if start >= last_end && start + fl <= input.len() {
715 matches.push(Match {
716 start,
717 end: start + fl,
718 });
719 last_end = start + fl;
720 }
721 }
722 } else if self.hardened {
723 if cfg!(feature = "debug-nulls") {
724 eprintln!(" [dispatch] scan_fwd_ordered");
725 }
726 inner.fwd.scan_fwd_ordered(
727 &mut inner.b,
728 &inner.nulls,
729 input,
730 self.max_length,
731 matches,
732 )?;
733 } else {
734 inner
735 .fwd
736 .scan_fwd_all(&mut inner.b, &inner.nulls, input, self.max_length, matches)?;
737 }
738
739 if FWD_NULL
740 && inner.nulls.first() == Some(&input.len())
741 && matches.last().map_or(true, |m| m.end <= input.len())
742 {
743 matches.push(Match {
744 start: input.len(),
745 end: input.len(),
746 });
747 }
748
749 Ok(inner.matches.clone())
750 }
751
752 fn find_all_nullable_slow(
753 fwd: &mut engine::LDFA,
754 b: &mut RegexBuilder,
755 input: &[u8],
756 matches: &mut Vec<Match>,
757 ) -> Result<(), Error> {
758 let mut pos = 0;
759 fwd.build_skip_all(b);
760 while pos < input.len() {
761 let max_end = fwd.scan_fwd_slow(b, pos, input)?;
762 if max_end != engine::NO_MATCH && max_end > pos {
763 matches.push(Match {
764 start: pos,
765 end: max_end,
766 });
767 pos = max_end;
768 } else if max_end != engine::NO_MATCH {
769 matches.push(Match {
770 start: pos,
771 end: pos,
772 });
773 pos += 1;
774 } else {
775 pos += 1;
776 }
777 }
778 let end_null = engine::has_any_null(
780 &fwd.effects_id,
781 &fwd.effects,
782 engine::DFA_INITIAL as u32,
783 Nullability::END,
784 );
785 if end_null {
786 matches.push(Match {
787 start: input.len(),
788 end: input.len(),
789 });
790 }
791 Ok(())
792 }
793
794 fn find_all_fwd_bounded(&self, input: &[u8]) -> Result<Vec<Match>, Error> {
795 let RegexInner {
796 b,
797 bounded,
798 matches: matches_buf,
799 ..
800 } = &mut *self.inner.lock().unwrap();
801 let bounded = bounded.as_mut().unwrap();
802 matches_buf.clear();
803 match &bounded.prefix {
804 Some(p) if p.is_literal() => {
805 Self::bdfa_scan::<{ PREFIX_LITERAL }, false>(bounded, b, input, matches_buf)?;
806 }
807 Some(_) => {
808 Self::bdfa_scan::<{ PREFIX_SEARCH }, false>(bounded, b, input, matches_buf)?;
809 }
810 None => {
811 Self::bdfa_scan::<{ PREFIX_NONE }, false>(bounded, b, input, matches_buf)?;
812 }
813 }
814 Ok(matches_buf.clone())
815 }
816
817 fn is_match_fwd_bounded(&self, input: &[u8]) -> Result<bool, Error> {
818 let RegexInner {
819 b,
820 bounded,
821 matches: matches_buf,
822 ..
823 } = &mut *self.inner.lock().unwrap();
824 let bounded = bounded.as_mut().unwrap();
825 matches_buf.clear();
826 let found = match &bounded.prefix {
827 Some(p) if p.is_literal() => {
828 Self::bdfa_scan::<{ PREFIX_LITERAL }, true>(bounded, b, input, matches_buf)?
829 }
830 Some(_) => Self::bdfa_scan::<{ PREFIX_SEARCH }, true>(bounded, b, input, matches_buf)?,
831 None => Self::bdfa_scan::<{ PREFIX_NONE }, true>(bounded, b, input, matches_buf)?,
832 };
833 Ok(found)
834 }
835
836 fn bdfa_scan<const PREFIX: u8, const ISMATCH: bool>(
837 bounded: &mut engine::BDFA,
838 b: &mut RegexBuilder,
839 input: &[u8],
840 matches: &mut Vec<Match>,
841 ) -> Result<bool, Error> {
842 let initial = bounded.initial;
843 let mt_log = bounded.mt_log;
844 let ml = bounded.minterms_lookup;
845 let len = input.len();
846 let mut state = initial;
847 let mut pos: usize = 0;
848
849 if PREFIX == PREFIX_NONE {
850 let data = input.as_ptr();
851 if !ISMATCH {
852 matches.reserve(2048);
853 }
854 loop {
855 if !ISMATCH && matches.len() == matches.capacity() {
856 matches.reserve(matches.capacity().max(256));
857 }
858 let spare = if ISMATCH {
859 1
860 } else {
861 matches.capacity() - matches.len()
862 };
863 let buf_ptr = unsafe { matches.as_mut_ptr().add(matches.len()) };
864 let table = bounded.table.as_ptr();
865 let meo = bounded.match_end_off.as_ptr();
866 let (s, p, mc) = bdfa_inner::<{ PREFIX_NONE }>(
867 table,
868 ml.as_ptr(),
869 data,
870 mt_log,
871 initial,
872 meo,
873 state,
874 pos,
875 len,
876 buf_ptr,
877 spare,
878 );
879 state = s;
880 pos = p;
881 if ISMATCH && mc > 0 {
882 return Ok(true);
883 }
884 unsafe { matches.set_len(matches.len() + mc) };
885 if pos >= len {
886 break;
887 }
888 let mt = ml[input[pos] as usize] as usize;
889 let entry = bounded.transition(b, state, mt)?;
890 state = (entry & 0xFFFF) as u16;
891 let rel = entry >> 16;
892 if rel > 0 {
893 if ISMATCH {
894 return Ok(true);
895 }
896 let end_off = bounded.match_end_off[state as usize];
897 matches.push(Match {
898 start: pos + 1 - rel as usize,
899 end: pos + 1 - end_off as usize,
900 });
901 state = initial;
902 } else {
903 pos += 1;
904 }
905 }
906 } else {
907 'main: loop {
909 if pos >= len {
910 break;
911 }
912
913 if state == initial {
914 let found = bounded.prefix.as_ref().unwrap().find_fwd(input, pos);
915 match found {
916 Some(p) => {
917 if PREFIX == PREFIX_LITERAL {
918 pos = p + bounded.prefix_len;
919 state = bounded.after_prefix;
920 } else {
921 pos = p;
922 for _ in 0..bounded.prefix_len {
923 if pos >= len {
924 break;
925 }
926 let mt = ml[input[pos] as usize] as usize;
927 let delta = (state as usize) << mt_log | mt;
928 let entry = bounded.table[delta];
929 let entry = if entry != 0 {
930 entry
931 } else {
932 bounded.transition(b, state, mt)?
933 };
934 state = (entry & 0xFFFF) as u16;
935 if state == initial {
936 break;
937 }
938 pos += 1;
939 }
940 }
941 let rel = bounded.match_rel[state as usize];
942 if rel > 0 {
943 if ISMATCH {
944 return Ok(true);
945 }
946 let end_off = bounded.match_end_off[state as usize];
947 matches.push(Match {
948 start: pos - rel as usize + 1,
949 end: pos - end_off as usize + 1,
950 });
951 state = initial;
952 }
953 continue 'main;
954 }
955 None => break 'main,
956 }
957 }
958
959 unsafe {
960 let table = bounded.table.as_ptr();
961 let data = input.as_ptr();
962 let ml_ptr = ml.as_ptr();
963 let meo = bounded.match_end_off.as_ptr();
964
965 while pos < len {
966 let mt = *ml_ptr.add(*data.add(pos) as usize) as usize;
967 let delta = (state as usize) << mt_log | mt;
968 let entry = *table.add(delta);
969 if entry == 0 {
970 break;
971 }
972 let rel = entry >> 16;
973 state = (entry & 0xFFFF) as u16;
974 if state == initial {
975 continue 'main;
976 }
977 if rel > 0 {
978 if ISMATCH {
979 return Ok(true);
980 }
981 let end_off = *meo.add(state as usize);
982 matches.push(Match {
983 start: pos + 1 - rel as usize,
984 end: pos + 1 - end_off as usize,
985 });
986 state = initial;
987 continue 'main;
988 }
989 pos += 1;
990 }
991 }
992
993 if pos >= len {
994 break;
995 }
996 let mt = ml[input[pos] as usize] as usize;
997 let entry = bounded.transition(b, state, mt)?;
998 state = (entry & 0xFFFF) as u16;
999 let rel = entry >> 16;
1000 if rel > 0 {
1001 if ISMATCH {
1002 return Ok(true);
1003 }
1004 let end_off = bounded.match_end_off[state as usize];
1005 matches.push(Match {
1006 start: pos + 1 - rel as usize,
1007 end: pos + 1 - end_off as usize,
1008 });
1009 state = initial;
1010 } else {
1011 pos += 1;
1012 }
1013 }
1014 }
1015
1016 if state != initial {
1017 let node = bounded.states[state as usize];
1018 if node != NodeId::MISSING {
1019 let mut best_val = 0u32;
1021 let mut best_step = 0u32;
1022 let mut cur = node;
1023 while cur.0 > NodeId::BOT.0 {
1024 let packed = b.get_extra(cur);
1025 let step = packed & 0xFFFF;
1026 let best = packed >> 16;
1027 if best > best_val {
1028 best_val = best;
1029 best_step = step;
1030 }
1031 cur = cur.right(b);
1032 }
1033 if best_val > 0 {
1034 if ISMATCH {
1035 return Ok(true);
1036 }
1037 matches.push(Match {
1038 start: len - best_step as usize,
1039 end: len - best_step as usize + best_val as usize,
1040 });
1041 }
1042 }
1043 }
1044
1045 Ok(false)
1046 }
1047
1048 fn find_all_fwd_prefix(&self, input: &[u8]) -> Result<Vec<Match>, Error> {
1049 let fwd_prefix = self.prefix.as_ref().and_then(|p| p.fwd_search()).unwrap();
1050 let inner = &mut *self.inner.lock().unwrap();
1051 let matches = &mut inner.matches;
1052 matches.clear();
1053 let mut search_start = 0;
1054
1055 if self.fixed_length == Some(fwd_prefix.len() as u32)
1056 && fwd_prefix.find_all_literal(input, matches)
1057 {
1058 } else {
1059 let prefix_len = fwd_prefix.len();
1060 while let Some(candidate) = fwd_prefix.find_fwd(input, search_start) {
1061 let state = inner
1062 .fwd
1063 .walk_input(&mut inner.b, candidate, prefix_len, input)?;
1064 if state != 0 {
1065 let max_end = inner.fwd.scan_fwd_from(
1066 &mut inner.b,
1067 state,
1068 candidate + prefix_len,
1069 input,
1070 )?;
1071 if max_end != engine::NO_MATCH && max_end > candidate {
1072 matches.push(Match {
1073 start: candidate,
1074 end: max_end,
1075 });
1076 search_start = max_end;
1077 continue;
1078 }
1079 }
1080 search_start = candidate + 1;
1081 }
1082 #[cfg(feature = "debug-nulls")]
1083 eprintln!(
1084 " [debug-nulls] fwd_prefix candidates={} confirmed={} false_positives={}",
1085 n_candidates,
1086 n_confirmed,
1087 n_candidates.saturating_sub(n_confirmed)
1088 );
1089 }
1090
1091 Ok(matches.clone())
1092 }
1093
1094 fn find_all_fwd_prefix_stripped(&self, input: &[u8]) -> Result<Vec<Match>, Error> {
1095 let fwd_prefix = self.prefix.as_ref().and_then(|p| p.fwd_search()).unwrap();
1096 let inner = &mut *self.inner.lock().unwrap();
1097 let prefix_len = fwd_prefix.len();
1098 inner.fwd.create_state(&mut inner.b, engine::DFA_INITIAL)?;
1099 inner.matches.clear();
1100 let mut search_start = 0;
1101
1102 while let Some(candidate) = fwd_prefix.find_fwd(input, search_start) {
1103 let mut state = engine::DFA_INITIAL;
1105 for i in 0..prefix_len {
1106 let mt = inner.fwd.mt_lookup[input[candidate + i] as usize] as u32;
1107 state = inner.fwd.lazy_transition(&mut inner.b, state, mt)?;
1108 if state == engine::DFA_DEAD {
1109 break;
1110 }
1111 }
1112 if state == engine::DFA_DEAD {
1113 search_start = candidate + 1;
1114 continue;
1115 }
1116 let max_end = inner.fwd.scan_fwd_from(
1117 &mut inner.b,
1118 state as u32,
1119 candidate + prefix_len,
1120 input,
1121 )?;
1122 if max_end == engine::NO_MATCH {
1123 search_start = candidate + 1;
1124 continue;
1125 }
1126
1127 let rev_bare = inner.rev_bare.as_mut().unwrap();
1129 let match_start = rev_bare.scan_rev_from(&mut inner.b, max_end, search_start, input)?;
1130 if match_start != engine::NO_MATCH {
1131 inner.matches.push(Match {
1132 start: match_start,
1133 end: max_end,
1134 });
1135 search_start = max_end;
1136 } else {
1137 search_start = candidate + 1;
1138 }
1139 }
1140
1141 Ok(inner.matches.clone())
1142 }
1143
1144 fn find_all_fwd_lb_prefix(&self, input: &[u8]) -> Result<Vec<Match>, Error> {
1145 let fwd_prefix = self.prefix.as_ref().and_then(|p| p.fwd_search()).unwrap();
1146 let inner = &mut *self.inner.lock().unwrap();
1147 inner.matches.clear();
1148 let lb_len = self.lb_check_bytes as usize;
1149 let mut search_start = 0usize;
1150
1151 if self.fwd_lb_begin_nullable && !input.is_empty() {
1154 let state = inner.fwd.walk_input(&mut inner.b, 0, 1, input)?;
1155 if state != 0 {
1156 let max_end = inner.fwd.scan_fwd_from(&mut inner.b, state, 1, input)?;
1157 if max_end != engine::NO_MATCH {
1158 inner.matches.push(Match {
1159 start: 0,
1160 end: max_end,
1161 });
1162 search_start = max_end;
1163 }
1164 }
1165 }
1166
1167 while let Some(candidate) = fwd_prefix.find_fwd(input, search_start) {
1170 let body_start = candidate + lb_len;
1171 let max_end = inner.fwd.scan_fwd_from(
1172 &mut inner.b,
1173 engine::DFA_INITIAL as u32,
1174 body_start,
1175 input,
1176 )?;
1177 if max_end != engine::NO_MATCH {
1178 inner.matches.push(Match {
1179 start: body_start,
1180 end: max_end,
1181 });
1182 search_start = max_end;
1183 } else {
1184 search_start = body_start;
1185 }
1186 }
1187
1188 Ok(inner.matches.clone())
1189 }
1190
1191 pub fn find_anchored(&self, input: &[u8]) -> Result<Option<Match>, Error> {
1195 if input.is_empty() {
1196 return if self.empty_nullable {
1197 Ok(Some(Match { start: 0, end: 0 }))
1198 } else {
1199 Ok(None)
1200 };
1201 }
1202 let inner = &mut *self.inner.lock().unwrap();
1203 let max_end = inner.fwd.scan_fwd_slow(&mut inner.b, 0, input)?;
1204 if max_end != engine::NO_MATCH {
1205 Ok(Some(Match {
1206 start: 0,
1207 end: max_end,
1208 }))
1209 } else {
1210 Ok(None)
1211 }
1212 }
1213
1214 pub fn is_match(&self, input: &[u8]) -> Result<bool, Error> {
1218 if input.is_empty() {
1219 return Ok(self.empty_nullable);
1220 }
1221 if self.has_bounded {
1222 return self.is_match_fwd_bounded(input);
1223 }
1224 if let Some(fwd_prefix) = self.prefix.as_ref().and_then(|p| p.fwd_search()) {
1226 if let Some(fl) = self.fixed_length {
1227 if fl as usize == fwd_prefix.len() {
1228 if let Some(candidate) = fwd_prefix.find_fwd(input, 0) {
1229 return Ok(candidate + fl as usize <= input.len());
1230 }
1231 return Ok(false);
1232 }
1233 }
1234 let inner = &mut *self.inner.lock().unwrap();
1235 let prefix_len = fwd_prefix.len();
1236 let mut search_start = 0;
1237 while let Some(candidate) = fwd_prefix.find_fwd(input, search_start) {
1238 let state = inner
1239 .fwd
1240 .walk_input(&mut inner.b, candidate, prefix_len, input)?;
1241 if state != 0 {
1242 let max_end = inner.fwd.scan_fwd_from(
1243 &mut inner.b,
1244 state,
1245 candidate + prefix_len,
1246 input,
1247 )?;
1248 if max_end != engine::NO_MATCH && max_end > candidate {
1249 return Ok(true);
1250 }
1251 }
1252 search_start = candidate + 1;
1253 }
1254 return Ok(false);
1255 }
1256 let inner = &mut *self.inner.lock().unwrap();
1257 if inner.rev_ts.effects_id[engine::DFA_INITIAL as usize] != 0 {
1258 return Ok(true);
1259 }
1260 inner.nulls.clear();
1261 inner
1262 .rev_ts
1263 .collect_rev_first(&mut inner.b, input.len() - 1, input, &mut inner.nulls)?;
1264 Ok(!inner.nulls.is_empty())
1265 }
1266}