1use super::charclass::{is_line_terminator, is_word_char};
2use super::opcode::*;
3use super::pool::BacktrackPool;
4
5#[inline(always)]
6fn likely(b: bool) -> bool {
7 b
8}
9
10#[inline(always)]
11fn unlikely(b: bool) -> bool {
12 b
13}
14
15#[derive(Debug, Clone)]
16pub struct Match {
17 pub start: usize,
18
19 pub end: usize,
20
21 pub captures: Vec<(Option<usize>, Option<usize>)>,
22}
23
24impl Match {
25 pub fn as_str<'a>(&self, input: &'a str) -> &'a str {
26 &input[self.start..self.end]
27 }
28}
29
30#[derive(Debug, Clone, Copy)]
31struct RegisterFile {
32 r: [usize; REG_COUNT],
33}
34
35impl Default for RegisterFile {
36 fn default() -> Self {
37 Self { r: [0; REG_COUNT] }
38 }
39}
40
41pub struct ExecContext<'a> {
42 input: &'a str,
43
44 input_bytes: &'a [u8],
45
46 char_positions: Vec<usize>,
47
48 char_len: usize,
49
50 is_ascii: bool,
51
52 bytecode: &'a [u8],
53
54 capture_count: usize,
55
56 backtrack_pool: BacktrackPool,
57
58 is_unicode: bool,
59
60 char_ranges: &'a [super::charclass::CharRange],
61
62 multi_line: bool,
63
64 sticky: bool,
65}
66
67pub fn execute(prog: &super::compiler::Program, input: &str, start_pos: usize) -> Option<Match> {
68 let ctx = ExecContext::new(prog, input);
69 ctx.execute(start_pos)
70}
71
72pub fn find_all(prog: &super::compiler::Program, input: &str) -> Vec<Match> {
73 let ctx = ExecContext::new(prog, input);
74 ctx.find_all()
75}
76
77impl<'a> ExecContext<'a> {
78 fn new(prog: &'a super::compiler::Program, input: &'a str) -> Self {
79 let input_bytes = input.as_bytes();
80 let flags = prog.flags;
81
82 let is_ascii = input_bytes.iter().all(|&b| b < 0x80);
83
84 let (char_positions, char_len) = if is_ascii {
85 (Vec::new(), input_bytes.len())
86 } else {
87 let positions: Vec<usize> = input.char_indices().map(|(i, _)| i).collect();
88 let len = positions.len();
89 (positions, len)
90 };
91
92 Self {
93 input,
94 input_bytes,
95 char_positions,
96 char_len,
97 is_ascii,
98 bytecode: &prog.bytecode[HEADER_LEN..],
99 capture_count: prog.capture_count,
100 backtrack_pool: BacktrackPool::new(),
101 is_unicode: (flags & FLAG_UNICODE) != 0 || (flags & FLAG_UNICODE_SETS) != 0,
102 multi_line: (flags & FLAG_MULTI_LINE) != 0,
103 sticky: (flags & FLAG_STICKY) != 0,
104 char_ranges: &prog.char_ranges,
105 }
106 }
107
108 #[inline(always)]
109 fn char_to_byte_pos(&self, char_pos: usize) -> usize {
110 if self.is_ascii {
111 char_pos.min(self.input_bytes.len())
112 } else {
113 self.char_positions
114 .get(char_pos)
115 .copied()
116 .unwrap_or(self.input_bytes.len())
117 }
118 }
119
120 fn execute(mut self, start_pos: usize) -> Option<Match> {
121 let is_sticky = self.sticky;
122
123 let char_len = self.char_len;
124 let mut pos = start_pos;
125
126 let starts_with_anchor = self.pattern_starts_with_start_anchor();
127
128 if starts_with_anchor && !is_sticky && start_pos == 0 {
129 let mut regs = RegisterFile::default();
130 regs.r[REG_POS] = 0;
131 let mut captures = vec![(None, None); self.capture_count];
132
133 if let Some(end_pos) = self.run(&mut regs, 0, &mut captures) {
134 return Some(Match {
135 start: 0,
136 end: end_pos,
137 captures,
138 });
139 }
140 return None;
141 }
142
143 let ends_with_anchor = self.pattern_ends_with_end_anchor();
144
145 if ends_with_anchor && !is_sticky && char_len > 0 {
146 return self.execute_end_anchor(start_pos);
147 }
148
149 let mut captures: Vec<(Option<usize>, Option<usize>)> =
150 vec![(None, None); self.capture_count];
151
152 while pos < char_len {
153 let mut regs = RegisterFile::default();
154 regs.r[REG_POS] = pos;
155
156 for c in captures.iter_mut() {
157 *c = (None, None);
158 }
159
160 if let Some(end_pos) = self.run(&mut regs, 0, &mut captures) {
161 return Some(Match {
162 start: pos,
163 end: end_pos,
164 captures,
165 });
166 }
167
168 if is_sticky {
169 break;
170 }
171 pos += 1;
172 }
173
174 None
175 }
176
177 fn pattern_starts_with_start_anchor(&self) -> bool {
178 let code = self.bytecode;
179 if code.is_empty() {
180 return false;
181 }
182
183 let mut i = 0;
184 while i < code.len() && i < 20 {
185 let op = code[i];
186 if op == OpCode::CheckLineStart as u8 {
187 return true;
188 }
189
190 match op {
191 27 | 28 => i += 2,
192 1 | 2 => i += 4,
193 9..=14 => i += 1,
194 _ => {
195 break;
196 }
197 }
198 }
199 false
200 }
201
202 fn pattern_ends_with_end_anchor(&self) -> bool {
203 self.check_bytecode_has_end_anchor()
204 }
205
206 fn check_bytecode_has_end_anchor(&self) -> bool {
207 let code = self.bytecode;
208 let len = code.len();
209
210 if len < 2 {
211 return false;
212 }
213
214 for &op in code.iter().rev().take(50) {
215 if op == OpCode::CheckLineEnd as u8 {
216 return true;
217 }
218 }
219
220 false
221 }
222
223 fn execute_end_anchor(mut self, start_pos: usize) -> Option<Match> {
224 let char_len = self.char_len;
225
226 if self.multi_line {
227 let end_positions = self.find_all_line_ends(start_pos);
228 for end_pos in end_positions {
229 if let Some(m) = self.try_match_ending_at(end_pos, start_pos) {
230 return Some(m);
231 }
232 }
233 } else {
234 if let Some(m) = self.try_match_ending_at(char_len, start_pos) {
235 return Some(m);
236 }
237 }
238
239 None
240 }
241
242 fn try_match_ending_at(&mut self, end_pos: usize, start_pos: usize) -> Option<Match> {
243 let max_lookback = 100;
244 let search_start = start_pos.max(end_pos.saturating_sub(max_lookback));
245
246 for pos in search_start..=end_pos {
247 let mut regs = RegisterFile::default();
248 regs.r[REG_POS] = pos;
249 let mut captures = vec![(None, None); self.capture_count];
250
251 if let Some(match_end) = self.run(&mut regs, 0, &mut captures) {
252 if match_end == end_pos {
253 return Some(Match {
254 start: pos,
255 end: match_end,
256 captures,
257 });
258 }
259 }
260 }
261
262 None
263 }
264
265 fn find_all_line_ends(&self, start_pos: usize) -> Vec<usize> {
266 let mut ends = vec![self.char_len];
267
268 if !self.multi_line {
269 return ends;
270 }
271
272 for (i, c) in self.input.char_indices() {
273 if is_line_terminator(c as u32) {
274 let char_pos = self.byte_to_char_pos(i);
275 if char_pos >= start_pos && char_pos < self.char_len && !ends.contains(&char_pos) {
276 ends.push(char_pos);
277 }
278 }
279 }
280
281 ends.sort_unstable();
282 ends
283 }
284
285 fn byte_to_char_pos(&self, byte_pos: usize) -> usize {
286 if self.is_ascii {
287 byte_pos
288 } else {
289 match self.char_positions.binary_search(&byte_pos) {
290 Ok(i) => i,
291 Err(i) => i.saturating_sub(1),
292 }
293 }
294 }
295
296 fn find_all(mut self) -> Vec<Match> {
297 let mut matches = Vec::new();
298 let mut pos = 0;
299
300 let char_len = self.char_len;
301
302 let mut captures: Vec<(Option<usize>, Option<usize>)> =
303 vec![(None, None); self.capture_count];
304
305 while pos < char_len {
306 let mut regs = RegisterFile::default();
307 regs.r[REG_POS] = pos;
308
309 self.backtrack_pool.clear();
310
311 for c in captures.iter_mut() {
312 *c = (None, None);
313 }
314
315 if let Some(end_pos) = self.run(&mut regs, 0, &mut captures) {
316 let match_start = pos;
317 let match_end = end_pos;
318
319 matches.push(Match {
320 start: match_start,
321 end: match_end,
322 captures: captures.clone(),
323 });
324
325 if match_end <= match_start {
326 pos += 1;
327 } else {
328 pos = match_end;
329 }
330 } else {
331 pos += 1;
332 }
333 }
334
335 matches
336 }
337
338 fn run(
339 &mut self,
340 regs: &mut RegisterFile,
341 mut pc: usize,
342 captures: &mut [(Option<usize>, Option<usize>)],
343 ) -> Option<usize> {
344 loop {
345 if unlikely(pc >= self.bytecode.len()) {
346 return self.fail_or_backtrack(regs, pc, captures);
347 }
348
349 let opcode_byte = self.bytecode[pc];
350
351 let opcode = if likely(opcode_byte <= OpCode::Halt as u8) {
352 unsafe { std::mem::transmute(opcode_byte) }
353 } else {
354 return None;
355 };
356
357 match opcode {
358 OpCode::Success => {
359 return Some(regs.r[REG_POS]);
360 }
361
362 OpCode::Fail => {
363 return self.fail_or_backtrack(regs, pc, captures);
364 }
365
366 OpCode::Halt => {
367 return None;
368 }
369
370 OpCode::MatchChar => {
371 let expected =
372 ((self.bytecode[pc + 3] as u16) << 8 | self.bytecode[pc + 2] as u16) as u32;
373 let pos = regs.r[REG_POS];
374
375 if likely(pos < self.char_len) {
376 let byte_pos = if self.is_ascii {
377 pos
378 } else {
379 self.char_positions[pos]
380 };
381 let b = self.input_bytes[byte_pos];
382 if likely(b < 0x80 && b as u32 == expected) {
383 regs.r[REG_POS] = pos + 1;
384 pc += 4;
385 continue;
386 }
387
388 if let Some(c) = self.get_char_fast(pos) {
389 if c as u32 == expected {
390 regs.r[REG_POS] = pos + 1;
391 pc += 4;
392 continue;
393 }
394 }
395 }
396 return self.fail_or_backtrack(regs, pc, captures);
397 }
398
399 OpCode::MatchCharI => {
400 let expected = self.read_u16(pc + 2) as u32;
401 let pos = regs.r[REG_POS];
402
403 if likely(pos < self.char_len) {
404 if let Some(c) = self.get_char_fast(pos) {
405 if canonicalize(c as u32, self.is_unicode) == expected {
406 regs.r[REG_POS] = pos + 1;
407 pc += opcode.size();
408 continue;
409 }
410 }
411 }
412 return self.fail_or_backtrack(regs, pc, captures);
413 }
414
415 OpCode::MatchChar32 => {
416 let reg = self.bytecode[pc + 1] as usize;
417 let expected = self.read_u32(pc + 2);
418 let pos = regs.r[reg];
419
420 if let Some(c) = self.get_char(pos) {
421 if c as u32 == expected {
422 regs.r[reg] = pos + 1;
423 pc += opcode.size();
424 continue;
425 }
426 }
427 return self.fail_or_backtrack(regs, pc, captures);
428 }
429
430 OpCode::MatchChar32I => {
431 let reg = self.bytecode[pc + 1] as usize;
432 let expected = self.read_u32(pc + 2);
433 let pos = regs.r[reg];
434
435 if let Some(c) = self.get_char(pos) {
436 if canonicalize(c as u32, self.is_unicode) == expected {
437 regs.r[reg] = pos + 1;
438 pc += opcode.size();
439 continue;
440 }
441 }
442 return self.fail_or_backtrack(regs, pc, captures);
443 }
444
445 OpCode::MatchDot => {
446 let pos = regs.r[REG_POS];
447 if let Some(c) = self.get_char(pos) {
448 if !is_line_terminator(c as u32) {
449 regs.r[REG_POS] = pos + 1;
450 pc += 1;
451 continue;
452 }
453 }
454 return self.fail_or_backtrack(regs, pc, captures);
455 }
456
457 OpCode::MatchAny => {
458 let pos = regs.r[REG_POS];
459 if self.get_char(pos).is_some() {
460 regs.r[REG_POS] = pos + 1;
461 pc += 1;
462 continue;
463 }
464 return self.fail_or_backtrack(regs, pc, captures);
465 }
466
467 OpCode::MatchClass => {
468 let range_idx = self.read_u16(pc + 2) as usize;
469 let pos = regs.r[REG_POS];
470 if let Some(c) = self.get_char(pos) {
471 if let Some(range) = self.char_ranges.get(range_idx) {
472 if range.contains(c as u32) {
473 regs.r[REG_POS] = pos + 1;
474 pc += 4;
475 continue;
476 }
477 }
478 }
479 return self.fail_or_backtrack(regs, pc, captures);
480 }
481
482 OpCode::MatchClassI => {
483 let range_idx = self.read_u16(pc + 2) as usize;
484 let pos = regs.r[REG_POS];
485 if let Some(c) = self.get_char(pos) {
486 let c_upper = canonicalize(c as u32, self.is_unicode);
487 if let Some(range) = self.char_ranges.get(range_idx) {
488 if range.contains(c as u32) || range.contains(c_upper) {
489 regs.r[REG_POS] = pos + 1;
490 pc += 4;
491 continue;
492 }
493 }
494 }
495 return self.fail_or_backtrack(regs, pc, captures);
496 }
497
498 OpCode::CheckLineStart => {
499 let pos = regs.r[REG_POS];
500 let at_start = pos == 0;
501 let after_newline = if self.multi_line {
502 pos > 0 && is_line_terminator(self.get_char(pos - 1).unwrap_or('\0') as u32)
503 } else {
504 false
505 };
506
507 if at_start || after_newline {
508 pc += 1;
509 continue;
510 }
511 return self.fail_or_backtrack(regs, pc, captures);
512 }
513
514 OpCode::CheckLineEnd => {
515 let pos = regs.r[REG_POS];
516 let at_end = pos >= self.char_len;
517 let before_newline = if self.multi_line {
518 self.get_char(pos)
519 .map_or(false, |c| is_line_terminator(c as u32))
520 } else {
521 false
522 };
523
524 if at_end || before_newline {
525 pc += 1;
526 continue;
527 }
528 return self.fail_or_backtrack(regs, pc, captures);
529 }
530
531 OpCode::CheckWordBoundary | OpCode::CheckWordBoundaryI => {
532 let ignore_case = opcode == OpCode::CheckWordBoundaryI;
533 if self.check_word_boundary(regs.r[REG_POS], ignore_case) {
534 pc += 1;
535 continue;
536 }
537 return self.fail_or_backtrack(regs, pc, captures);
538 }
539
540 OpCode::CheckNotWordBoundary | OpCode::CheckNotWordBoundaryI => {
541 let ignore_case = opcode == OpCode::CheckNotWordBoundaryI;
542 if !self.check_word_boundary(regs.r[REG_POS], ignore_case) {
543 pc += 1;
544 continue;
545 }
546 return self.fail_or_backtrack(regs, pc, captures);
547 }
548
549 OpCode::Jmp => {
550 let offset = i32::from_le_bytes([
551 self.bytecode[pc + 1],
552 self.bytecode[pc + 2],
553 self.bytecode[pc + 3],
554 self.bytecode[pc + 4],
555 ]);
556 pc = (pc as i32 + 5 + offset) as usize;
557 continue;
558 }
559
560 OpCode::JmpMatch => {
561 let offset = i32::from_le_bytes([
562 self.bytecode[pc + 1],
563 self.bytecode[pc + 2],
564 self.bytecode[pc + 3],
565 self.bytecode[pc + 4],
566 ]);
567 pc = (pc as i32 + 5 + offset) as usize;
568 continue;
569 }
570
571 OpCode::JmpFail => {
572 let offset = i32::from_le_bytes([
573 self.bytecode[pc + 1],
574 self.bytecode[pc + 2],
575 self.bytecode[pc + 3],
576 self.bytecode[pc + 4],
577 ]);
578 pc = (pc as i32 + 5 + offset) as usize;
579 continue;
580 }
581
582 OpCode::JmpEq => {
583 let reg = self.bytecode[pc + 1] as usize;
584 let imm = self.read_u32(pc + 2) as usize;
585 let offset = self.read_i32(pc + 6);
586
587 if regs.r[reg] == imm {
588 pc = (pc as i32 + 10 + offset) as usize;
589 } else {
590 pc += 10;
591 }
592 continue;
593 }
594
595 OpCode::JmpNe => {
596 let reg = self.bytecode[pc + 1] as usize;
597 let imm = self.read_u32(pc + 2) as usize;
598 let offset = self.read_i32(pc + 6);
599
600 if regs.r[reg] != imm {
601 pc = (pc as i32 + 10 + offset) as usize;
602 } else {
603 pc += 10;
604 }
605 continue;
606 }
607
608 OpCode::JmpLt => {
609 let reg = self.bytecode[pc + 1] as usize;
610 let imm = self.read_u32(pc + 2) as usize;
611 let offset = self.read_i32(pc + 6);
612
613 if regs.r[reg] < imm {
614 pc = (pc as i32 + 10 + offset) as usize;
615 } else {
616 pc += 10;
617 }
618 continue;
619 }
620
621 OpCode::MovImm => {
622 let reg = self.bytecode[pc + 1] as usize;
623 let imm = self.read_u32(pc + 2);
624 regs.r[reg] = imm as usize;
625 pc += 6;
626 continue;
627 }
628
629 OpCode::MovReg => {
630 let dst = self.bytecode[pc + 1] as usize;
631 let src = self.bytecode[pc + 2] as usize;
632 regs.r[dst] = regs.r[src];
633 pc += 3;
634 continue;
635 }
636
637 OpCode::Inc => {
638 let reg = self.bytecode[pc + 1] as usize;
639 regs.r[reg] = regs.r[reg].wrapping_add(1);
640 pc += 2;
641 continue;
642 }
643
644 OpCode::Dec => {
645 let reg = self.bytecode[pc + 1] as usize;
646 regs.r[reg] = regs.r[reg].wrapping_sub(1);
647 pc += 2;
648 continue;
649 }
650
651 OpCode::AddImm => {
652 let reg = self.bytecode[pc + 1] as usize;
653 let imm = self.read_u32(pc + 2);
654 regs.r[reg] = regs.r[reg].wrapping_add(imm as usize);
655 pc += 6;
656 continue;
657 }
658
659 OpCode::SaveStart => {
660 let idx = self.bytecode[pc + 1] as usize;
661 if idx < captures.len() {
662 captures[idx].0 = Some(regs.r[REG_POS]);
663 }
664 pc += 2;
665 continue;
666 }
667
668 OpCode::SaveEnd => {
669 let idx = self.bytecode[pc + 1] as usize;
670 if idx < captures.len() {
671 captures[idx].1 = Some(regs.r[REG_POS]);
672 }
673 pc += 2;
674 continue;
675 }
676
677 OpCode::ResetCaptures => {
678 let start = self.bytecode[pc + 1] as usize;
679 let end = self.bytecode[pc + 2] as usize;
680 for i in start..=end {
681 if i < captures.len() {
682 captures[i] = (None, None);
683 }
684 }
685 pc += 3;
686 continue;
687 }
688
689 OpCode::PushBacktrack => {
690 let offset = self.read_i32(pc + 1);
691 let fail_target = (pc as i32 + 5 + offset) as usize;
692
693 self.backtrack_pool.push(super::pool::BacktrackState {
694 pc: fail_target as u32,
695 pos: regs.r[REG_POS] as u32,
696 counter: regs.r[REG_COUNTER] as u32,
697 capture_start: captures
698 .get(0)
699 .and_then(|c| c.0)
700 .unwrap_or(u32::MAX as usize)
701 as u32,
702 capture_end: captures
703 .get(0)
704 .and_then(|c| c.1)
705 .unwrap_or(u32::MAX as usize)
706 as u32,
707 });
708
709 pc += 5;
710 continue;
711 }
712
713 OpCode::PopBacktrack => {
714 self.backtrack_pool.pop();
715 pc += 1;
716 continue;
717 }
718
719 OpCode::InitCounter => {
720 let reg = self.bytecode[pc + 1] as usize;
721 let min = self.read_u32(pc + 2);
722 let max = self.read_u32(pc + 6);
723
724 regs.r[reg] = 0;
725
726 if reg + 1 < REG_COUNT {
727 regs.r[reg + 1] = min as usize;
728 }
729 if reg + 2 < REG_COUNT {
730 regs.r[reg + 2] = max as usize;
731 }
732
733 pc += 10;
734 continue;
735 }
736
737 OpCode::CheckCounter => {
738 let reg = self.bytecode[pc + 1] as usize;
739 let fail_offset = self.read_i32(pc + 2);
740
741 let count = regs.r[reg];
742 let max = if reg + 2 < REG_COUNT {
743 regs.r[reg + 2]
744 } else {
745 usize::MAX
746 };
747
748 if count >= max {
749 pc = (pc as i32 + 6 + fail_offset) as usize;
750 } else {
751 regs.r[reg] = count + 1;
752 pc += 6;
753 }
754 continue;
755 }
756
757 OpCode::Invalid => {
758 panic!("Invalid opcode at pc={}", pc);
759 }
760
761 _ => {
762 return self.fail_or_backtrack(regs, pc, captures);
763 }
764 }
765 }
766 }
767
768 fn fail_or_backtrack(
769 &mut self,
770 regs: &mut RegisterFile,
771 _pc: usize,
772 captures: &mut [(Option<usize>, Option<usize>)],
773 ) -> Option<usize> {
774 while let Some(state) = self.backtrack_pool.pop() {
775 regs.r[REG_POS] = state.pos as usize;
776 regs.r[REG_COUNTER] = state.counter as usize;
777
778 if state.capture_start != u32::MAX && state.capture_end != u32::MAX {
779 if !captures.is_empty() {
780 captures[0] = (
781 Some(state.capture_start as usize),
782 Some(state.capture_end as usize),
783 );
784 }
785 }
786
787 if let Some(result) = self.run(regs, state.pc as usize, captures) {
788 return Some(result);
789 }
790 }
791 None
792 }
793
794 #[inline(always)]
795 fn get_char_fast(&self, pos: usize) -> Option<char> {
796 if unlikely(pos >= self.char_len) {
797 return None;
798 }
799
800 let byte_pos = self.char_to_byte_pos(pos);
801 let b = self.input_bytes[byte_pos];
802
803 if likely(b < 0x80) {
804 Some(b as char)
805 } else {
806 self.get_char_utf8(pos)
807 }
808 }
809
810 #[inline(never)]
811 fn get_char_utf8(&self, pos: usize) -> Option<char> {
812 if pos >= self.char_len {
813 return None;
814 }
815 let byte_pos = self.char_to_byte_pos(pos);
816 let bytes = &self.input_bytes[byte_pos..];
817 let first = *bytes.first()?;
818
819 let len = if first < 0xE0 {
820 2
821 } else if first < 0xF0 {
822 3
823 } else {
824 4
825 };
826
827 if bytes.len() < len {
828 return None;
829 }
830
831 std::str::from_utf8(&bytes[..len]).ok()?.chars().next()
832 }
833
834 fn get_char(&self, pos: usize) -> Option<char> {
835 if pos >= self.char_len {
836 return None;
837 }
838 self.get_char_fast(pos)
839 }
840
841 #[inline(always)]
842 fn read_u16(&self, pos: usize) -> u16 {
843 let bytes = &self.bytecode[pos..pos + 2];
844 u16::from_le_bytes([bytes[0], bytes[1]])
845 }
846
847 #[inline(always)]
848 fn read_u32(&self, pos: usize) -> u32 {
849 let bytes = &self.bytecode[pos..pos + 4];
850 u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]])
851 }
852
853 #[inline(always)]
854 fn read_i32(&self, pos: usize) -> i32 {
855 self.read_u32(pos) as i32
856 }
857
858 fn check_word_boundary(&self, pos: usize, ignore_case: bool) -> bool {
859 let prev_is_word = if pos == 0 {
860 false
861 } else {
862 self.get_char(pos - 1).map_or(false, |c| {
863 let cp = if ignore_case {
864 canonicalize(c as u32, self.is_unicode)
865 } else {
866 c as u32
867 };
868 is_word_char(cp)
869 })
870 };
871
872 let next_is_word = self.get_char(pos).map_or(false, |c| {
873 let cp = if ignore_case {
874 canonicalize(c as u32, self.is_unicode)
875 } else {
876 c as u32
877 };
878 is_word_char(cp)
879 });
880
881 prev_is_word != next_is_word
882 }
883}
884
885#[inline(always)]
886fn canonicalize(c: u32, is_unicode: bool) -> u32 {
887 if c < 128 {
888 if is_unicode {
889 if c >= b'A' as u32 && c <= b'Z' as u32 {
890 c + 32
891 } else {
892 c
893 }
894 } else {
895 if c >= b'a' as u32 && c <= b'z' as u32 {
896 c - 32
897 } else {
898 c
899 }
900 }
901 } else {
902 c
903 }
904}
905
906#[cfg(test)]
907mod tests {
908 use super::super::compiler::compile;
909 use super::super::parser::parse;
910 use super::*;
911
912 #[test]
913 fn test_execute_simple() {
914 let ast = parse("abc", 0).unwrap();
915 let prog = compile(&ast, 0).unwrap();
916
917 let m = execute(&prog, "abc", 0).unwrap();
918 assert_eq!(m.start, 0);
919 assert_eq!(m.end, 3);
920 }
921
922 #[test]
923 fn test_execute_literal() {
924 let ast = parse("hello", 0).unwrap();
925 let prog = compile(&ast, 0).unwrap();
926
927 let m = execute(&prog, "hello world", 0).unwrap();
928 assert_eq!(m.start, 0);
929 assert_eq!(m.end, 5);
930 }
931
932 #[test]
933 fn test_no_match() {
934 let ast = parse("xyz", 0).unwrap();
935 let prog = compile(&ast, 0).unwrap();
936
937 assert!(execute(&prog, "abc", 0).is_none());
938 }
939
940 #[test]
941 fn test_ascii_fast_path() {
942 let ast = parse("test", 0).unwrap();
943 let prog = compile(&ast, 0).unwrap();
944
945 let m = execute(&prog, "this is a test", 0).unwrap();
946 assert_eq!(m.start, 10);
947 assert_eq!(m.end, 14);
948 }
949}