1pub fn is_match(pattern: &str, flags: &str, haystack: &[u8]) -> bool {
55 let opts = Flags::parse(flags);
56 let nfa = match compile(pattern, &opts) {
57 Ok(nfa) => nfa,
58 Err(_) => return false,
59 };
60 nfa_search(&nfa, haystack, &opts).is_some()
61}
62
63pub fn find(pattern: &str, flags: &str, haystack: &[u8]) -> Option<(usize, usize)> {
65 let opts = Flags::parse(flags);
66 let nfa = compile(pattern, &opts).ok()?;
67 nfa_search(&nfa, haystack, &opts)
68}
69
70pub fn find_all(pattern: &str, flags: &str, haystack: &[u8]) -> Vec<(usize, usize)> {
72 let opts = Flags::parse(flags);
73 let nfa = match compile(pattern, &opts) {
74 Ok(nfa) => nfa,
75 Err(_) => return Vec::new(),
76 };
77 let mut results = Vec::new();
78 let mut start = 0;
79 while start <= haystack.len() {
80 if let Some((ms, me)) = nfa_search_from(&nfa, haystack, &opts, start) {
81 results.push((ms, me));
82 start = if me == ms { me + 1 } else { me };
83 } else {
84 break;
85 }
86 }
87 results
88}
89
90pub fn split(pattern: &str, flags: &str, haystack: &[u8]) -> Vec<(usize, usize)> {
92 let matches = find_all(pattern, flags, haystack);
93 let mut segments = Vec::new();
94 let mut pos = 0;
95 for (ms, me) in &matches {
96 segments.push((pos, *ms));
97 pos = *me;
98 }
99 segments.push((pos, haystack.len()));
100 segments
101}
102
103#[derive(Clone, Debug)]
108struct Flags {
109 case_insensitive: bool,
110 multiline: bool,
111 dotall: bool,
112 extended: bool,
113}
114
115impl Flags {
116 fn parse(s: &str) -> Self {
117 Self {
118 case_insensitive: s.contains('i'),
119 multiline: s.contains('m'),
120 dotall: s.contains('s'),
121 extended: s.contains('x'),
122 }
123 }
124}
125
126#[derive(Clone, Debug)]
131enum NfaNode {
132 Byte(u8),
133 Class(ByteClass),
134 AnyByte,
135 AnyByteNoNl,
136 Split(usize, usize),
137 Epsilon(usize),
138 Accept,
139 AnchorStart,
140 AnchorEnd,
141 WordBoundary,
142}
143
144#[derive(Clone, Debug)]
145struct ByteClass {
146 bits: [u64; 4], }
148
149impl ByteClass {
150 fn new() -> Self {
151 Self { bits: [0; 4] }
152 }
153
154 fn set(&mut self, b: u8) {
155 let idx = (b / 64) as usize;
156 let bit = b % 64;
157 self.bits[idx] |= 1u64 << bit;
158 }
159
160 fn contains(&self, b: u8) -> bool {
161 let idx = (b / 64) as usize;
162 let bit = b % 64;
163 (self.bits[idx] >> bit) & 1 == 1
164 }
165
166 fn negate(&mut self) {
167 for b in &mut self.bits {
168 *b = !*b;
169 }
170 }
171
172 fn set_range(&mut self, lo: u8, hi: u8) {
173 for b in lo..=hi {
174 self.set(b);
175 }
176 }
177}
178
179struct Nfa {
180 nodes: Vec<NfaNode>,
181 start: usize,
182 has_lazy: bool,
183}
184
185struct Compiler<'a> {
190 pattern: &'a [u8],
191 pos: usize,
192 nodes: Vec<NfaNode>,
193 flags: Flags,
194 has_lazy: bool,
195}
196
197impl<'a> Compiler<'a> {
198 fn new(pattern: &'a str, flags: &Flags) -> Self {
199 Self {
200 pattern: pattern.as_bytes(),
201 pos: 0,
202 nodes: Vec::new(),
203 flags: flags.clone(),
204 has_lazy: false,
205 }
206 }
207
208 fn peek(&self) -> Option<u8> {
209 if self.pos < self.pattern.len() {
210 Some(self.pattern[self.pos])
211 } else {
212 None
213 }
214 }
215
216 fn advance(&mut self) -> Option<u8> {
217 let ch = self.peek()?;
218 self.pos += 1;
219 Some(ch)
220 }
221
222 fn emit(&mut self, node: NfaNode) -> usize {
223 let id = self.nodes.len();
224 self.nodes.push(node);
225 id
226 }
227
228 fn compile(mut self) -> Result<Nfa, String> {
229 let (start, end) = self.parse_alternation()?;
230 let accept = self.emit(NfaNode::Accept);
231 self.patch(end, accept);
232 Ok(Nfa {
233 nodes: self.nodes,
234 start,
235 has_lazy: self.has_lazy,
236 })
237 }
238
239 fn patch(&mut self, placeholder: usize, target: usize) {
240 match &mut self.nodes[placeholder] {
241 NfaNode::Epsilon(ref mut t) if *t == usize::MAX => *t = target,
242 NfaNode::Split(ref mut a, ref mut b) => {
243 if *a == usize::MAX {
244 *a = target;
245 }
246 if *b == usize::MAX {
247 *b = target;
248 }
249 }
250 _ => {
251 let _next = self.nodes.len();
252 self.nodes.push(NfaNode::Epsilon(target));
253 }
254 }
255 }
256
257 fn parse_alternation(&mut self) -> Result<(usize, usize), String> {
258 let (mut start, mut end) = self.parse_sequence()?;
259 while self.peek() == Some(b'|') {
260 self.advance();
261 let (s2, e2) = self.parse_sequence()?;
262 let split = self.emit(NfaNode::Split(start, s2));
263 let join = self.emit(NfaNode::Epsilon(usize::MAX));
264 self.patch(end, join);
265 self.patch(e2, join);
266 start = split;
267 end = join;
268 }
269 Ok((start, end))
270 }
271
272 fn parse_sequence(&mut self) -> Result<(usize, usize), String> {
273 let mut fragments: Vec<(usize, usize)> = Vec::new();
274 loop {
275 match self.peek() {
276 None | Some(b'|') | Some(b')') => break,
277 _ => {
278 let frag = self.parse_quantified()?;
279 fragments.push(frag);
280 }
281 }
282 }
283 if fragments.is_empty() {
284 let e = self.emit(NfaNode::Epsilon(usize::MAX));
285 return Ok((e, e));
286 }
287 for i in 0..fragments.len() - 1 {
288 let next_start = fragments[i + 1].0;
289 self.patch(fragments[i].1, next_start);
290 }
291 Ok((fragments[0].0, fragments[fragments.len() - 1].1))
292 }
293
294 fn parse_quantified(&mut self) -> Result<(usize, usize), String> {
295 let (s, e) = self.parse_atom()?;
296 match self.peek() {
297 Some(b'*') => {
298 self.advance();
299 let lazy = self.peek() == Some(b'?');
300 if lazy {
301 self.advance();
302 self.has_lazy = true;
303 }
304 let split = self.emit(NfaNode::Split(s, usize::MAX));
305 self.patch(e, split);
306 let out = self.emit(NfaNode::Epsilon(usize::MAX));
307 if lazy {
308 if let NfaNode::Split(ref mut a, ref mut b) = self.nodes[split] {
309 *a = out;
310 *b = s;
311 }
312 } else if let NfaNode::Split(_, ref mut b) = self.nodes[split] {
313 *b = out;
314 }
315 Ok((split, out))
316 }
317 Some(b'+') => {
318 self.advance();
319 let lazy = self.peek() == Some(b'?');
320 if lazy {
321 self.advance();
322 self.has_lazy = true;
323 }
324 let split = self.emit(NfaNode::Split(s, usize::MAX));
325 self.patch(e, split);
326 let out = self.emit(NfaNode::Epsilon(usize::MAX));
327 if lazy {
328 if let NfaNode::Split(ref mut a, ref mut b) = self.nodes[split] {
329 *a = out;
330 *b = s;
331 }
332 } else if let NfaNode::Split(_, ref mut b) = self.nodes[split] {
333 *b = out;
334 }
335 Ok((s, out))
336 }
337 Some(b'?') => {
338 self.advance();
339 let lazy = self.peek() == Some(b'?');
340 if lazy {
341 self.advance();
342 self.has_lazy = true;
343 }
344 let out = self.emit(NfaNode::Epsilon(usize::MAX));
345 self.patch(e, out);
346 let split = if lazy {
347 self.emit(NfaNode::Split(out, s))
348 } else {
349 self.emit(NfaNode::Split(s, out))
350 };
351 Ok((split, out))
352 }
353 _ => Ok((s, e)),
354 }
355 }
356
357 fn parse_atom(&mut self) -> Result<(usize, usize), String> {
358 if self.flags.extended {
359 self.skip_extended_ws();
360 }
361
362 match self.peek() {
363 Some(b'(') => {
364 self.advance();
365 let inner = self.parse_alternation()?;
366 if self.peek() == Some(b')') {
367 self.advance();
368 } else {
369 return Err("unclosed group `(`".into());
370 }
371 Ok(inner)
372 }
373 Some(b'[') => self.parse_char_class(),
374 Some(b'.') => {
375 self.advance();
376 let node = if self.flags.dotall {
377 NfaNode::AnyByte
378 } else {
379 NfaNode::AnyByteNoNl
380 };
381 let s = self.emit(node);
382 let e = self.emit(NfaNode::Epsilon(usize::MAX));
383 Ok((s, e))
384 }
385 Some(b'^') => {
386 self.advance();
387 let s = self.emit(NfaNode::AnchorStart);
388 let e = self.emit(NfaNode::Epsilon(usize::MAX));
389 Ok((s, e))
390 }
391 Some(b'$') => {
392 self.advance();
393 let s = self.emit(NfaNode::AnchorEnd);
394 let e = self.emit(NfaNode::Epsilon(usize::MAX));
395 Ok((s, e))
396 }
397 Some(b'\\') => {
398 self.advance();
399 let (class_or_byte, negate) = self.parse_escape()?;
400 match class_or_byte {
401 EscapeResult::Byte(b) => {
402 let actual_byte = if self.flags.case_insensitive {
403 b.to_ascii_lowercase()
404 } else {
405 b
406 };
407 let s =
408 if self.flags.case_insensitive && actual_byte.is_ascii_alphabetic() {
409 let mut cls = ByteClass::new();
410 cls.set(actual_byte.to_ascii_lowercase());
411 cls.set(actual_byte.to_ascii_uppercase());
412 self.emit(NfaNode::Class(cls))
413 } else {
414 self.emit(NfaNode::Byte(actual_byte))
415 };
416 let e = self.emit(NfaNode::Epsilon(usize::MAX));
417 Ok((s, e))
418 }
419 EscapeResult::Class(mut cls) => {
420 if negate {
421 cls.negate();
422 }
423 let s = self.emit(NfaNode::Class(cls));
424 let e = self.emit(NfaNode::Epsilon(usize::MAX));
425 Ok((s, e))
426 }
427 EscapeResult::WordBoundary => {
428 let s = self.emit(NfaNode::WordBoundary);
429 let e = self.emit(NfaNode::Epsilon(usize::MAX));
430 Ok((s, e))
431 }
432 }
433 }
434 Some(ch) if ch != b'|' && ch != b')' && ch != b'*' && ch != b'+' && ch != b'?' => {
435 self.advance();
436 let s = if self.flags.case_insensitive && ch.is_ascii_alphabetic() {
437 let mut cls = ByteClass::new();
438 cls.set(ch.to_ascii_lowercase());
439 cls.set(ch.to_ascii_uppercase());
440 self.emit(NfaNode::Class(cls))
441 } else {
442 self.emit(NfaNode::Byte(ch))
443 };
444 let e = self.emit(NfaNode::Epsilon(usize::MAX));
445 Ok((s, e))
446 }
447 _ => Err(format!(
448 "unexpected character in regex at position {}",
449 self.pos
450 )),
451 }
452 }
453
454 fn parse_char_class(&mut self) -> Result<(usize, usize), String> {
455 self.advance(); let negated = self.peek() == Some(b'^');
457 if negated {
458 self.advance();
459 }
460 let mut cls = ByteClass::new();
461 let mut first = true;
462 loop {
463 match self.peek() {
464 None => return Err("unclosed character class `[`".into()),
465 Some(b']') if !first => {
466 self.advance();
467 break;
468 }
469 _ => {
470 first = false;
471 let lo = self.parse_class_atom()?;
472 if self.peek() == Some(b'-')
473 && self.pos + 1 < self.pattern.len()
474 && self.pattern[self.pos + 1] != b']'
475 {
476 self.advance(); let hi = self.parse_class_atom()?;
478 if self.flags.case_insensitive {
479 for b in lo..=hi {
480 cls.set(b.to_ascii_lowercase());
481 cls.set(b.to_ascii_uppercase());
482 }
483 } else {
484 cls.set_range(lo, hi);
485 }
486 } else if self.flags.case_insensitive && lo.is_ascii_alphabetic() {
487 cls.set(lo.to_ascii_lowercase());
488 cls.set(lo.to_ascii_uppercase());
489 } else {
490 cls.set(lo);
491 }
492 }
493 }
494 }
495 if negated {
496 cls.negate();
497 }
498 let s = self.emit(NfaNode::Class(cls));
499 let e = self.emit(NfaNode::Epsilon(usize::MAX));
500 Ok((s, e))
501 }
502
503 fn parse_class_atom(&mut self) -> Result<u8, String> {
504 match self.advance() {
505 Some(b'\\') => match self.advance() {
506 Some(b'n') => Ok(b'\n'),
507 Some(b't') => Ok(b'\t'),
508 Some(b'r') => Ok(b'\r'),
509 Some(b'\\') => Ok(b'\\'),
510 Some(b']') => Ok(b']'),
511 Some(b'-') => Ok(b'-'),
512 Some(b'x') => self.parse_hex_byte(),
513 Some(ch) => Ok(ch),
514 None => Err("unexpected end of escape in character class".into()),
515 },
516 Some(ch) => Ok(ch),
517 None => Err("unexpected end of character class".into()),
518 }
519 }
520
521 fn parse_hex_byte(&mut self) -> Result<u8, String> {
522 let hi = self.advance().ok_or("incomplete hex escape")?;
523 let lo = self.advance().ok_or("incomplete hex escape")?;
524 let h = hex_val(hi).ok_or_else(|| format!("invalid hex digit `{}`", hi as char))?;
525 let l = hex_val(lo).ok_or_else(|| format!("invalid hex digit `{}`", lo as char))?;
526 Ok(h * 16 + l)
527 }
528
529 fn parse_escape(&mut self) -> Result<(EscapeResult, bool), String> {
530 match self.advance() {
531 Some(b'd') => {
532 let mut cls = ByteClass::new();
533 cls.set_range(b'0', b'9');
534 Ok((EscapeResult::Class(cls), false))
535 }
536 Some(b'D') => {
537 let mut cls = ByteClass::new();
538 cls.set_range(b'0', b'9');
539 Ok((EscapeResult::Class(cls), true))
540 }
541 Some(b'w') => {
542 let mut cls = ByteClass::new();
543 cls.set_range(b'a', b'z');
544 cls.set_range(b'A', b'Z');
545 cls.set_range(b'0', b'9');
546 cls.set(b'_');
547 Ok((EscapeResult::Class(cls), false))
548 }
549 Some(b'W') => {
550 let mut cls = ByteClass::new();
551 cls.set_range(b'a', b'z');
552 cls.set_range(b'A', b'Z');
553 cls.set_range(b'0', b'9');
554 cls.set(b'_');
555 Ok((EscapeResult::Class(cls), true))
556 }
557 Some(b's') => {
558 let mut cls = ByteClass::new();
559 cls.set(b' ');
560 cls.set(b'\t');
561 cls.set(b'\n');
562 cls.set(b'\r');
563 cls.set(0x0C);
564 Ok((EscapeResult::Class(cls), false))
565 }
566 Some(b'S') => {
567 let mut cls = ByteClass::new();
568 cls.set(b' ');
569 cls.set(b'\t');
570 cls.set(b'\n');
571 cls.set(b'\r');
572 cls.set(0x0C);
573 Ok((EscapeResult::Class(cls), true))
574 }
575 Some(b'b') => Ok((EscapeResult::WordBoundary, false)),
576 Some(b'n') => Ok((EscapeResult::Byte(b'\n'), false)),
577 Some(b't') => Ok((EscapeResult::Byte(b'\t'), false)),
578 Some(b'r') => Ok((EscapeResult::Byte(b'\r'), false)),
579 Some(b'0') => Ok((EscapeResult::Byte(0), false)),
580 Some(b'x') => {
581 let b = self.parse_hex_byte()?;
582 Ok((EscapeResult::Byte(b), false))
583 }
584 Some(ch) => Ok((EscapeResult::Byte(ch), false)),
585 None => Err("unexpected end of escape sequence".into()),
586 }
587 }
588
589 fn skip_extended_ws(&mut self) {
590 while let Some(ch) = self.peek() {
591 if ch == b' ' || ch == b'\t' || ch == b'\n' || ch == b'\r' {
592 self.advance();
593 } else if ch == b'#' {
594 while let Some(c) = self.peek() {
595 self.advance();
596 if c == b'\n' {
597 break;
598 }
599 }
600 } else {
601 break;
602 }
603 }
604 }
605}
606
607enum EscapeResult {
608 Byte(u8),
609 Class(ByteClass),
610 WordBoundary,
611}
612
613fn compile(pattern: &str, flags: &Flags) -> Result<Nfa, String> {
614 Compiler::new(pattern, flags).compile()
615}
616
617fn hex_val(b: u8) -> Option<u8> {
618 match b {
619 b'0'..=b'9' => Some(b - b'0'),
620 b'a'..=b'f' => Some(b - b'a' + 10),
621 b'A'..=b'F' => Some(b - b'A' + 10),
622 _ => None,
623 }
624}
625
626fn nfa_search(nfa: &Nfa, haystack: &[u8], opts: &Flags) -> Option<(usize, usize)> {
631 for start in 0..=haystack.len() {
632 if let Some(end) = nfa_match_at(nfa, haystack, opts, start) {
633 return Some((start, end));
634 }
635 }
636 None
637}
638
639fn nfa_search_from(
640 nfa: &Nfa,
641 haystack: &[u8],
642 opts: &Flags,
643 from: usize,
644) -> Option<(usize, usize)> {
645 for start in from..=haystack.len() {
646 if let Some(end) = nfa_match_at(nfa, haystack, opts, start) {
647 return Some((start, end));
648 }
649 }
650 None
651}
652
653fn nfa_match_at(nfa: &Nfa, haystack: &[u8], opts: &Flags, start: usize) -> Option<usize> {
655 let mut current = Vec::new();
656 let mut next = Vec::new();
657 let mut in_current = vec![false; nfa.nodes.len()];
658 let mut in_next = vec![false; nfa.nodes.len()];
659 let mut last_match: Option<usize> = None;
660
661 add_state(
662 nfa,
663 nfa.start,
664 &mut current,
665 &mut in_current,
666 haystack,
667 opts,
668 start,
669 );
670
671 for &state in ¤t {
672 if matches!(nfa.nodes[state], NfaNode::Accept) {
673 last_match = Some(start);
674 if nfa.has_lazy {
675 return last_match;
676 }
677 }
678 }
679
680 let mut pos = start;
681 while pos < haystack.len() && !current.is_empty() {
682 let byte = haystack[pos];
683
684 for &state in ¤t {
685 match &nfa.nodes[state] {
686 NfaNode::Byte(b) => {
687 if byte == *b {
688 add_state(nfa, state + 1, &mut next, &mut in_next, haystack, opts, pos + 1);
689 }
690 }
691 NfaNode::Class(cls) => {
692 if cls.contains(byte) {
693 add_state(nfa, state + 1, &mut next, &mut in_next, haystack, opts, pos + 1);
694 }
695 }
696 NfaNode::AnyByte => {
697 add_state(nfa, state + 1, &mut next, &mut in_next, haystack, opts, pos + 1);
698 }
699 NfaNode::AnyByteNoNl => {
700 if byte != b'\n' {
701 add_state(nfa, state + 1, &mut next, &mut in_next, haystack, opts, pos + 1);
702 }
703 }
704 _ => {}
705 }
706 }
707
708 for &state in &next {
709 if matches!(nfa.nodes[state], NfaNode::Accept) {
710 last_match = Some(pos + 1);
711 if nfa.has_lazy {
712 return last_match;
713 }
714 }
715 }
716
717 std::mem::swap(&mut current, &mut next);
718 std::mem::swap(&mut in_current, &mut in_next);
719 next.clear();
720 for b in in_next.iter_mut() {
721 *b = false;
722 }
723
724 pos += 1;
725 }
726
727 last_match
728}
729
730fn add_state(
732 nfa: &Nfa,
733 state: usize,
734 set: &mut Vec<usize>,
735 in_set: &mut [bool],
736 haystack: &[u8],
737 opts: &Flags,
738 pos: usize,
739) {
740 if state >= nfa.nodes.len() || in_set[state] {
741 return;
742 }
743 in_set[state] = true;
744
745 match &nfa.nodes[state] {
746 NfaNode::Epsilon(target) => {
747 if *target != usize::MAX {
748 add_state(nfa, *target, set, in_set, haystack, opts, pos);
749 }
750 }
751 NfaNode::Split(a, b) => {
752 add_state(nfa, *a, set, in_set, haystack, opts, pos);
753 add_state(nfa, *b, set, in_set, haystack, opts, pos);
754 }
755 NfaNode::AnchorStart => {
756 let ok = if opts.multiline {
757 pos == 0 || (pos > 0 && haystack[pos - 1] == b'\n')
758 } else {
759 pos == 0
760 };
761 if ok {
762 add_state(nfa, state + 1, set, in_set, haystack, opts, pos);
763 }
764 }
765 NfaNode::AnchorEnd => {
766 let ok = if opts.multiline {
767 pos == haystack.len() || haystack[pos] == b'\n'
768 } else {
769 pos == haystack.len()
770 };
771 if ok {
772 add_state(nfa, state + 1, set, in_set, haystack, opts, pos);
773 }
774 }
775 NfaNode::WordBoundary => {
776 let before_word = pos > 0 && is_word_byte(haystack[pos - 1]);
777 let after_word = pos < haystack.len() && is_word_byte(haystack[pos]);
778 if before_word != after_word {
779 add_state(nfa, state + 1, set, in_set, haystack, opts, pos);
780 }
781 }
782 _ => {
783 set.push(state);
784 }
785 }
786}
787
788fn is_word_byte(b: u8) -> bool {
789 b.is_ascii_alphanumeric() || b == b'_'
790}
791
792#[cfg(test)]
797mod tests {
798 use super::*;
799
800 #[test]
801 fn test_simple_literal() {
802 assert!(is_match("hello", "", b"say hello world"));
803 assert!(!is_match("hello", "", b"say helo world"));
804 }
805
806 #[test]
807 fn test_dot() {
808 assert!(is_match("h.llo", "", b"hello"));
809 assert!(is_match("h.llo", "", b"hxllo"));
810 assert!(!is_match("h.llo", "", b"hlo"));
811 }
812
813 #[test]
814 fn test_star() {
815 assert!(is_match("ab*c", "", b"ac"));
816 assert!(is_match("ab*c", "", b"abc"));
817 assert!(is_match("ab*c", "", b"abbc"));
818 assert!(is_match("ab*c", "", b"abbbbc"));
819 }
820
821 #[test]
822 fn test_plus() {
823 assert!(!is_match("ab+c", "", b"ac"));
824 assert!(is_match("ab+c", "", b"abc"));
825 assert!(is_match("ab+c", "", b"abbc"));
826 }
827
828 #[test]
829 fn test_question() {
830 assert!(is_match("ab?c", "", b"ac"));
831 assert!(is_match("ab?c", "", b"abc"));
832 assert!(!is_match("ab?c", "", b"abbc"));
833 }
834
835 #[test]
836 fn test_alternation() {
837 assert!(is_match("cat|dog", "", b"I have a cat"));
838 assert!(is_match("cat|dog", "", b"I have a dog"));
839 assert!(!is_match("cat|dog", "", b"I have a bird"));
840 }
841
842 #[test]
843 fn test_char_class() {
844 assert!(is_match("[abc]", "", b"a"));
845 assert!(is_match("[abc]", "", b"b"));
846 assert!(!is_match("[abc]", "", b"d"));
847 }
848
849 #[test]
850 fn test_char_class_range() {
851 assert!(is_match("[a-z]", "", b"m"));
852 assert!(!is_match("[a-z]", "", b"M"));
853 }
854
855 #[test]
856 fn test_negated_class() {
857 assert!(!is_match("[^abc]", "", b"a"));
858 assert!(is_match("[^abc]", "", b"d"));
859 }
860
861 #[test]
862 fn test_digit_class() {
863 assert!(is_match("\\d+", "", b"abc123def"));
864 assert!(!is_match("^\\d+$", "", b"abc123def"));
865 assert!(is_match("^\\d+$", "", b"123"));
866 }
867
868 #[test]
869 fn test_word_class() {
870 assert!(is_match("\\w+", "", b"hello_world"));
871 assert!(!is_match("^\\w+$", "", b"hello world"));
872 }
873
874 #[test]
875 fn test_anchors() {
876 assert!(is_match("^hello", "", b"hello world"));
877 assert!(!is_match("^hello", "", b"say hello"));
878 assert!(is_match("world$", "", b"hello world"));
879 assert!(!is_match("world$", "", b"world hello"));
880 }
881
882 #[test]
883 fn test_case_insensitive() {
884 assert!(is_match("hello", "i", b"HELLO"));
885 assert!(is_match("hello", "i", b"HeLLo"));
886 }
887
888 #[test]
889 fn test_dotall() {
890 assert!(!is_match("a.b", "", b"a\nb"));
891 assert!(is_match("a.b", "s", b"a\nb"));
892 }
893
894 #[test]
895 fn test_multiline() {
896 assert!(is_match("^world", "m", b"hello\nworld"));
897 assert!(!is_match("^world", "", b"hello\nworld"));
898 }
899
900 #[test]
901 fn test_groups() {
902 assert!(is_match("(abc)+", "", b"abcabc"));
903 assert!(!is_match("(abc)+", "", b"abab"));
904 }
905
906 #[test]
907 fn test_hex_escape() {
908 assert!(is_match("\\x41\\x42", "", b"AB"));
909 }
910
911 #[test]
912 fn test_word_boundary() {
913 assert!(is_match("\\bhello\\b", "", b"say hello world"));
914 assert!(!is_match("\\bhello\\b", "", b"sayhelloworld"));
915 }
916
917 #[test]
918 fn test_find_span() {
919 let span = find("\\d+", "", b"abc123def");
920 assert_eq!(span, Some((3, 6)));
921 }
922
923 #[test]
924 fn test_find_all() {
925 let spans = find_all("\\d+", "", b"a1b22c333");
926 assert_eq!(spans, vec![(1, 2), (3, 5), (6, 9)]);
927 }
928
929 #[test]
930 fn test_split() {
931 let segs = split(",", "", b"a,b,c");
932 assert_eq!(segs, vec![(0, 1), (2, 3), (4, 5)]);
933 }
934
935 #[test]
936 fn test_split_whitespace() {
937 let segs = split("\\s+", "", b"hello world");
938 assert_eq!(segs, vec![(0, 5), (7, 12)]);
939 }
940
941 #[test]
942 fn test_empty_pattern() {
943 assert!(is_match("", "", b"anything"));
944 }
945
946 #[test]
947 fn test_empty_haystack() {
948 assert!(!is_match("a", "", b""));
949 assert!(is_match("", "", b""));
950 assert!(is_match("a*", "", b""));
951 }
952
953 #[test]
954 fn test_determinism() {
955 for _ in 0..10 {
956 let r = find_all("\\d+", "", b"x1y22z333");
957 assert_eq!(r, vec![(1, 2), (3, 5), (6, 9)]);
958 }
959 }
960
961 #[test]
962 fn test_whitespace_class() {
963 assert!(is_match("\\s", "", b" "));
964 assert!(is_match("\\s", "", b"\t"));
965 assert!(is_match("\\s", "", b"\n"));
966 assert!(!is_match("\\s", "", b"a"));
967 }
968
969 #[test]
970 fn test_negated_digit() {
971 assert!(is_match("\\D", "", b"a"));
972 assert!(!is_match("\\D", "", b"5"));
973 }
974
975 #[test]
976 fn test_lazy_star() {
977 let greedy = find("a.*b", "", b"aXbYb");
978 assert_eq!(greedy, Some((0, 5)));
979 let lazy = find("a.*?b", "", b"aXbYb");
980 assert_eq!(lazy, Some((0, 3)));
981 }
982
983 #[test]
984 fn test_lazy_plus() {
985 let greedy = find("a.+b", "", b"aXYZb");
986 assert_eq!(greedy, Some((0, 5)));
987 let lazy = find("a.+?b", "", b"aXbYb");
988 assert_eq!(lazy, Some((0, 3)));
989 }
990
991 #[test]
992 fn test_lazy_star_find_all() {
993 let greedy = find_all("<.+>", "", b"<a><b>");
994 assert_eq!(greedy, vec![(0, 6)]);
995 let lazy = find_all("<.+?>", "", b"<a><b>");
996 assert_eq!(lazy, vec![(0, 3), (3, 6)]);
997 }
998
999 #[test]
1000 fn test_lazy_star_zero_length() {
1001 let lazy = find("a*?", "", b"aaa");
1002 assert_eq!(lazy, Some((0, 0)));
1003 }
1004
1005 #[test]
1006 fn test_lazy_plus_minimum() {
1007 let lazy = find("a+?", "", b"aaa");
1008 assert_eq!(lazy, Some((0, 1)));
1009 }
1010
1011 #[test]
1012 fn test_greedy_unchanged() {
1013 let greedy = find("a+", "", b"aaa");
1014 assert_eq!(greedy, Some((0, 3)));
1015 let greedy2 = find("a*", "", b"aaa");
1016 assert_eq!(greedy2, Some((0, 3)));
1017 let greedy3 = find("<.+>", "", b"<a><b>");
1018 assert_eq!(greedy3, Some((0, 6)));
1019 }
1020}