1use crate::string::{
4 is_digit, is_linebreak, is_loc_word, is_space, is_uni_digit, is_uni_linebreak, is_uni_space,
5 is_uni_word, is_word, lower_ascii, lower_locate, lower_unicode, upper_locate, upper_unicode,
6};
7
8use super::{SreAtCode, SreCatCode, SreInfo, SreOpcode, StrDrive, StringCursor, MAXREPEAT};
9use optional::Optioned;
10use std::{convert::TryFrom, ptr::null};
11
12#[derive(Debug, Clone, Copy)]
13pub struct Request<'a, S> {
14 pub string: S,
15 pub start: usize,
16 pub end: usize,
17 pub pattern_codes: &'a [u32],
18 pub match_all: bool,
19 pub must_advance: bool,
20}
21
22impl<'a, S: StrDrive> Request<'a, S> {
23 pub fn new(
24 string: S,
25 start: usize,
26 end: usize,
27 pattern_codes: &'a [u32],
28 match_all: bool,
29 ) -> Self {
30 let end = std::cmp::min(end, string.count());
31 let start = std::cmp::min(start, end);
32
33 Self {
34 string,
35 start,
36 end,
37 pattern_codes,
38 match_all,
39 must_advance: false,
40 }
41 }
42}
43
44#[derive(Debug)]
45pub struct Marks {
46 last_index: isize,
47 marks: Vec<Optioned<usize>>,
48 marks_stack: Vec<(Vec<Optioned<usize>>, isize)>,
49}
50
51impl Default for Marks {
52 fn default() -> Self {
53 Self {
54 last_index: -1,
55 marks: Vec::new(),
56 marks_stack: Vec::new(),
57 }
58 }
59}
60
61impl Marks {
62 pub fn get(&self, group_index: usize) -> (Optioned<usize>, Optioned<usize>) {
63 let marks_index = 2 * group_index;
64 if marks_index + 1 < self.marks.len() {
65 (self.marks[marks_index], self.marks[marks_index + 1])
66 } else {
67 (Optioned::none(), Optioned::none())
68 }
69 }
70
71 pub fn last_index(&self) -> isize {
72 self.last_index
73 }
74
75 pub fn raw(&self) -> &[Optioned<usize>] {
76 self.marks.as_slice()
77 }
78
79 fn set(&mut self, mark_nr: usize, position: usize) {
80 if mark_nr & 1 != 0 {
81 self.last_index = mark_nr as isize / 2 + 1;
82 }
83 if mark_nr >= self.marks.len() {
84 self.marks.resize(mark_nr + 1, Optioned::none());
85 }
86 self.marks[mark_nr] = Optioned::some(position);
87 }
88
89 fn push(&mut self) {
90 self.marks_stack.push((self.marks.clone(), self.last_index));
91 }
92
93 fn pop(&mut self) {
94 let (marks, last_index) = self.marks_stack.pop().unwrap();
95 self.marks = marks;
96 self.last_index = last_index;
97 }
98
99 fn pop_keep(&mut self) {
100 let (marks, last_index) = self.marks_stack.last().unwrap().clone();
101 self.marks = marks;
102 self.last_index = last_index;
103 }
104
105 fn pop_discard(&mut self) {
106 self.marks_stack.pop();
107 }
108
109 fn clear(&mut self) {
110 self.last_index = -1;
111 self.marks.clear();
112 self.marks_stack.clear();
113 }
114}
115
116#[derive(Debug, Default)]
117pub struct State {
118 pub start: usize,
119 pub marks: Marks,
120 pub cursor: StringCursor,
121 repeat_stack: Vec<RepeatContext>,
122}
123
124impl State {
125 pub fn reset<S: StrDrive>(&mut self, req: &Request<S>, start: usize) {
126 self.marks.clear();
127 self.repeat_stack.clear();
128 self.start = start;
129 req.string.adjust_cursor(&mut self.cursor, start);
130 }
131
132 pub fn pymatch<S: StrDrive>(&mut self, req: &Request<S>) -> bool {
133 self.start = req.start;
134 req.string.adjust_cursor(&mut self.cursor, self.start);
135
136 let ctx = MatchContext {
137 cursor: self.cursor,
138 code_position: 0,
139 toplevel: true,
140 jump: Jump::OpCode,
141 repeat_ctx_id: usize::MAX,
142 count: -1,
143 };
144 _match(req, self, ctx)
145 }
146
147 pub fn search<S: StrDrive>(&mut self, mut req: Request<S>) -> bool {
148 self.start = req.start;
149 req.string.adjust_cursor(&mut self.cursor, self.start);
150
151 if req.start > req.end {
152 return false;
153 }
154
155 let mut end = req.end;
156
157 let mut ctx = MatchContext {
158 cursor: self.cursor,
159 code_position: 0,
160 toplevel: true,
161 jump: Jump::OpCode,
162 repeat_ctx_id: usize::MAX,
163 count: -1,
164 };
165
166 if ctx.peek_code(&req, 0) == SreOpcode::INFO as u32 {
167 let min = ctx.peek_code(&req, 3) as usize;
170
171 if ctx.remaining_chars(&req) < min {
172 return false;
173 }
174
175 if min > 1 {
176 end -= min - 1;
180
181 if end < ctx.cursor.position {
183 let skip = end - self.cursor.position;
184 S::skip(&mut self.cursor, skip);
185 }
186 }
187
188 let flags = SreInfo::from_bits_truncate(ctx.peek_code(&req, 2));
189
190 if flags.contains(SreInfo::PREFIX) {
191 if flags.contains(SreInfo::LITERAL) {
192 return search_info_literal::<true, S>(&mut req, self, ctx);
193 } else {
194 return search_info_literal::<false, S>(&mut req, self, ctx);
195 }
196 } else if flags.contains(SreInfo::CHARSET) {
197 return search_info_charset(&mut req, self, ctx);
198 }
199 ctx.skip_code_from(&req, 1);
202 }
203
204 if _match(&req, self, ctx) {
205 return true;
206 }
207
208 if ctx.try_peek_code_as::<SreOpcode, _>(&req, 0).unwrap() == SreOpcode::AT
209 && (ctx.try_peek_code_as::<SreAtCode, _>(&req, 1).unwrap() == SreAtCode::BEGINNING
210 || ctx.try_peek_code_as::<SreAtCode, _>(&req, 1).unwrap()
211 == SreAtCode::BEGINNING_STRING)
212 {
213 self.cursor.position = req.end;
214 self.cursor.ptr = null();
215 return false;
217 }
218
219 req.must_advance = false;
220 ctx.toplevel = false;
221 while req.start < end {
222 req.start += 1;
223 self.reset(&req, req.start);
224 ctx.cursor = self.cursor;
225
226 if _match(&req, self, ctx) {
227 return true;
228 }
229 }
230 false
231 }
232}
233
234pub struct SearchIter<'a, S: StrDrive> {
235 pub req: Request<'a, S>,
236 pub state: State,
237}
238
239impl<'a, S: StrDrive> Iterator for SearchIter<'a, S> {
240 type Item = ();
241
242 fn next(&mut self) -> Option<Self::Item> {
243 if self.req.start > self.req.end {
244 return None;
245 }
246
247 self.state.reset(&self.req, self.req.start);
248 if !self.state.search(self.req) {
249 return None;
250 }
251
252 self.req.must_advance = self.state.cursor.position == self.state.start;
253 self.req.start = self.state.cursor.position;
254
255 Some(())
256 }
257}
258
259#[derive(Debug, Clone, Copy)]
260enum Jump {
261 OpCode,
262 Assert1,
263 AssertNot1,
264 Branch1,
265 Branch2,
266 Repeat1,
267 UntilBacktrace,
268 MaxUntil2,
269 MaxUntil3,
270 MinUntil1,
271 RepeatOne1,
272 RepeatOne2,
273 MinRepeatOne1,
274 MinRepeatOne2,
275 AtomicGroup1,
276 PossessiveRepeat1,
277 PossessiveRepeat2,
278 PossessiveRepeat3,
279 PossessiveRepeat4,
280}
281
282fn _match<S: StrDrive>(req: &Request<S>, state: &mut State, mut ctx: MatchContext) -> bool {
283 let mut context_stack = vec![];
284 let mut popped_result = false;
285
286 #[allow(clippy::never_loop)]
288 'coro: loop {
289 popped_result = 'result: loop {
290 let yielded = 'context: loop {
291 match ctx.jump {
292 Jump::OpCode => {}
293 Jump::Assert1 => {
294 if popped_result {
295 ctx.skip_code_from(req, 1);
296 } else {
297 break 'result false;
298 }
299 }
300 Jump::AssertNot1 => {
301 if popped_result {
302 break 'result false;
303 }
304 state.marks.pop();
305 ctx.skip_code_from(req, 1);
306 }
307 Jump::Branch1 => {
308 let branch_offset = ctx.count as usize;
309 let next_length = ctx.peek_code(req, branch_offset) as isize;
310 if next_length == 0 {
311 state.marks.pop_discard();
312 break 'result false;
313 }
314 state.cursor = ctx.cursor;
315 let next_ctx = ctx.next_offset(branch_offset + 1, Jump::Branch2);
316 ctx.count += next_length;
317 break 'context next_ctx;
318 }
319 Jump::Branch2 => {
320 if popped_result {
321 break 'result true;
322 }
323 state.marks.pop_keep();
324 ctx.jump = Jump::Branch1;
325 continue 'context;
326 }
327 Jump::Repeat1 => {
328 state.repeat_stack.pop();
329 break 'result popped_result;
330 }
331 Jump::UntilBacktrace => {
332 if !popped_result {
333 state.repeat_stack[ctx.repeat_ctx_id].count -= 1;
334 state.cursor = ctx.cursor;
335 }
336 break 'result popped_result;
337 }
338 Jump::MaxUntil2 => {
339 let save_last_position = ctx.count as usize;
340 let repeat_ctx = &mut state.repeat_stack[ctx.repeat_ctx_id];
341 repeat_ctx.last_position = save_last_position;
342
343 if popped_result {
344 state.marks.pop_discard();
345 break 'result true;
346 }
347
348 state.marks.pop();
349 repeat_ctx.count -= 1;
350 state.cursor = ctx.cursor;
351
352 let mut next_ctx = ctx.next_offset(1, Jump::MaxUntil3);
355 next_ctx.repeat_ctx_id = repeat_ctx.prev_id;
356 break 'context next_ctx;
357 }
358 Jump::MaxUntil3 => {
359 if !popped_result {
360 state.cursor = ctx.cursor;
361 }
362 break 'result popped_result;
363 }
364 Jump::MinUntil1 => {
365 if popped_result {
366 break 'result true;
367 }
368 ctx.repeat_ctx_id = ctx.count as usize;
369 let repeat_ctx = &mut state.repeat_stack[ctx.repeat_ctx_id];
370 state.cursor = ctx.cursor;
371 state.marks.pop();
372
373 if repeat_ctx.count as usize >= repeat_ctx.max_count
375 && repeat_ctx.max_count != MAXREPEAT
376 || state.cursor.position == repeat_ctx.last_position
377 {
378 repeat_ctx.count -= 1;
379 break 'result false;
380 }
381
382 repeat_ctx.last_position = state.cursor.position;
384
385 break 'context ctx
386 .next_at(repeat_ctx.code_position + 4, Jump::UntilBacktrace);
387 }
388 Jump::RepeatOne1 => {
389 let min_count = ctx.peek_code(req, 2) as isize;
390 let next_code = ctx.peek_code(req, ctx.peek_code(req, 1) as usize + 1);
391 if next_code == SreOpcode::LITERAL as u32 {
392 let c = ctx.peek_code(req, ctx.peek_code(req, 1) as usize + 2);
395 while ctx.at_end(req) || ctx.peek_char::<S>() != c {
396 if ctx.count <= min_count {
397 state.marks.pop_discard();
398 break 'result false;
399 }
400 ctx.back_advance_char::<S>();
401 ctx.count -= 1;
402 }
403 }
404
405 state.cursor = ctx.cursor;
406 break 'context ctx.next_peek_from(1, req, Jump::RepeatOne2);
408 }
409 Jump::RepeatOne2 => {
410 if popped_result {
411 break 'result true;
412 }
413
414 let min_count = ctx.peek_code(req, 2) as isize;
415 if ctx.count <= min_count {
416 state.marks.pop_discard();
417 break 'result false;
418 }
419
420 ctx.back_advance_char::<S>();
421 ctx.count -= 1;
422
423 state.marks.pop_keep();
424 ctx.jump = Jump::RepeatOne1;
425 continue 'context;
426 }
427 Jump::MinRepeatOne1 => {
428 let max_count = ctx.peek_code(req, 3) as usize;
429 if max_count == MAXREPEAT || ctx.count as usize <= max_count {
430 state.cursor = ctx.cursor;
431 break 'context ctx.next_peek_from(1, req, Jump::MinRepeatOne2);
432 } else {
433 state.marks.pop_discard();
434 break 'result false;
435 }
436 }
437 Jump::MinRepeatOne2 => {
438 if popped_result {
439 break 'result true;
440 }
441
442 state.cursor = ctx.cursor;
443
444 let mut count_ctx = ctx;
445 count_ctx.skip_code(4);
446 if _count(req, state, &mut count_ctx, 1) == 0 {
447 state.marks.pop_discard();
448 break 'result false;
449 }
450
451 ctx.advance_char::<S>();
452 ctx.count += 1;
453 state.marks.pop_keep();
454 ctx.jump = Jump::MinRepeatOne1;
455 continue 'context;
456 }
457 Jump::AtomicGroup1 => {
458 if popped_result {
459 ctx.skip_code_from(req, 1);
460 ctx.cursor = state.cursor;
461 } else {
463 state.cursor = ctx.cursor;
464 break 'result false;
465 }
466 }
467 Jump::PossessiveRepeat1 => {
468 let min_count = ctx.peek_code(req, 2) as isize;
469 if ctx.count < min_count {
470 break 'context ctx.next_offset(4, Jump::PossessiveRepeat2);
471 }
472 ctx.cursor.position = usize::MAX;
474 ctx.jump = Jump::PossessiveRepeat3;
475 continue 'context;
476 }
477 Jump::PossessiveRepeat2 => {
478 if popped_result {
479 ctx.count += 1;
480 ctx.jump = Jump::PossessiveRepeat1;
481 continue 'context;
482 } else {
483 state.cursor = ctx.cursor;
484 break 'result false;
485 }
486 }
487 Jump::PossessiveRepeat3 => {
488 let max_count = ctx.peek_code(req, 3) as usize;
489 if ((ctx.count as usize) < max_count || max_count == MAXREPEAT)
490 && ctx.cursor.position != state.cursor.position
491 {
492 state.marks.push();
493 ctx.cursor = state.cursor;
494 break 'context ctx.next_offset(4, Jump::PossessiveRepeat4);
495 }
496 ctx.cursor = state.cursor;
497 ctx.skip_code_from(req, 1);
498 ctx.skip_code(1);
499 }
500 Jump::PossessiveRepeat4 => {
501 if popped_result {
502 state.marks.pop_discard();
503 ctx.count += 1;
504 ctx.jump = Jump::PossessiveRepeat3;
505 continue 'context;
506 }
507 state.marks.pop();
508 state.cursor = ctx.cursor;
509 ctx.skip_code_from(req, 1);
510 ctx.skip_code(1);
511 }
512 }
513 ctx.jump = Jump::OpCode;
514
515 loop {
516 macro_rules! general_op_literal {
517 ($f:expr) => {{
518 #[allow(clippy::redundant_closure_call)]
519 if ctx.at_end(req) || !$f(ctx.peek_code(req, 1), ctx.peek_char::<S>()) {
520 break 'result false;
521 }
522 ctx.skip_code(2);
523 ctx.advance_char::<S>();
524 }};
525 }
526
527 macro_rules! general_op_in {
528 ($f:expr) => {{
529 #[allow(clippy::redundant_closure_call)]
530 if ctx.at_end(req) || !$f(&ctx.pattern(req)[2..], ctx.peek_char::<S>())
531 {
532 break 'result false;
533 }
534 ctx.skip_code_from(req, 1);
535 ctx.advance_char::<S>();
536 }};
537 }
538
539 macro_rules! general_op_groupref {
540 ($f:expr) => {{
541 let (group_start, group_end) =
542 state.marks.get(ctx.peek_code(req, 1) as usize);
543 let (group_start, group_end) = if group_start.is_some()
544 && group_end.is_some()
545 && group_start.unpack() <= group_end.unpack()
546 {
547 (group_start.unpack(), group_end.unpack())
548 } else {
549 break 'result false;
550 };
551
552 let mut gctx = MatchContext {
553 cursor: req.string.create_cursor(group_start),
554 ..ctx
555 };
556
557 for _ in group_start..group_end {
558 #[allow(clippy::redundant_closure_call)]
559 if ctx.at_end(req)
560 || $f(ctx.peek_char::<S>()) != $f(gctx.peek_char::<S>())
561 {
562 break 'result false;
563 }
564 ctx.advance_char::<S>();
565 gctx.advance_char::<S>();
566 }
567
568 ctx.skip_code(2);
569 }};
570 }
571
572 if ctx.remaining_codes(req) == 0 {
573 break 'result false;
574 }
575 let opcode = ctx.peek_code(req, 0);
576 let opcode = SreOpcode::try_from(opcode).unwrap();
577
578 match opcode {
579 SreOpcode::FAILURE => break 'result false,
580 SreOpcode::SUCCESS => {
581 if ctx.can_success(req) {
582 state.cursor = ctx.cursor;
583 break 'result true;
584 }
585 break 'result false;
586 }
587 SreOpcode::ANY => {
588 if ctx.at_end(req) || ctx.at_linebreak(req) {
589 break 'result false;
590 }
591 ctx.skip_code(1);
592 ctx.advance_char::<S>();
593 }
594 SreOpcode::ANY_ALL => {
595 if ctx.at_end(req) {
596 break 'result false;
597 }
598 ctx.skip_code(1);
599 ctx.advance_char::<S>();
600 }
601 SreOpcode::ASSERT => {
603 let back = ctx.peek_code(req, 2) as usize;
604 if ctx.cursor.position < back {
605 break 'result false;
606 }
607
608 let mut next_ctx = ctx.next_offset(3, Jump::Assert1);
609 next_ctx.toplevel = false;
610 next_ctx.back_skip_char::<S>(back);
611 state.cursor = next_ctx.cursor;
612 break 'context next_ctx;
613 }
614 SreOpcode::ASSERT_NOT => {
616 let back = ctx.peek_code(req, 2) as usize;
617 if ctx.cursor.position < back {
618 ctx.skip_code_from(req, 1);
619 continue;
620 }
621 state.marks.push();
622
623 let mut next_ctx = ctx.next_offset(3, Jump::AssertNot1);
624 next_ctx.toplevel = false;
625 next_ctx.back_skip_char::<S>(back);
626 state.cursor = next_ctx.cursor;
627 break 'context next_ctx;
628 }
629 SreOpcode::AT => {
630 let atcode = SreAtCode::try_from(ctx.peek_code(req, 1)).unwrap();
631 if at(req, &ctx, atcode) {
632 ctx.skip_code(2);
633 } else {
634 break 'result false;
635 }
636 }
637 SreOpcode::BRANCH => {
639 state.marks.push();
640 ctx.count = 1;
641 ctx.jump = Jump::Branch1;
642 continue 'context;
643 }
644 SreOpcode::CATEGORY => {
645 let catcode = SreCatCode::try_from(ctx.peek_code(req, 1)).unwrap();
646 if ctx.at_end(req) || !category(catcode, ctx.peek_char::<S>()) {
647 break 'result false;
648 }
649 ctx.skip_code(2);
650 ctx.advance_char::<S>();
651 }
652 SreOpcode::IN => general_op_in!(charset),
653 SreOpcode::IN_IGNORE => {
654 general_op_in!(|set, c| charset(set, lower_ascii(c)))
655 }
656 SreOpcode::IN_UNI_IGNORE => {
657 general_op_in!(|set, c| charset(set, lower_unicode(c)))
658 }
659 SreOpcode::IN_LOC_IGNORE => general_op_in!(charset_loc_ignore),
660 SreOpcode::MARK => {
661 state
662 .marks
663 .set(ctx.peek_code(req, 1) as usize, ctx.cursor.position);
664 ctx.skip_code(2);
665 }
666 SreOpcode::INFO | SreOpcode::JUMP => ctx.skip_code_from(req, 1),
667 SreOpcode::REPEAT => {
669 let repeat_ctx = RepeatContext {
670 count: -1,
671 min_count: ctx.peek_code(req, 2) as usize,
672 max_count: ctx.peek_code(req, 3) as usize,
673 code_position: ctx.code_position,
674 last_position: usize::MAX,
675 prev_id: ctx.repeat_ctx_id,
676 };
677 state.repeat_stack.push(repeat_ctx);
678 let repeat_ctx_id = state.repeat_stack.len() - 1;
679 state.cursor = ctx.cursor;
680 let mut next_ctx = ctx.next_peek_from(1, req, Jump::Repeat1);
681 next_ctx.repeat_ctx_id = repeat_ctx_id;
682 break 'context next_ctx;
683 }
684 SreOpcode::MAX_UNTIL => {
685 let repeat_ctx = &mut state.repeat_stack[ctx.repeat_ctx_id];
686 state.cursor = ctx.cursor;
687 repeat_ctx.count += 1;
688
689 if (repeat_ctx.count as usize) < repeat_ctx.min_count {
690 break 'context ctx
692 .next_at(repeat_ctx.code_position + 4, Jump::UntilBacktrace);
693 }
694
695 if ((repeat_ctx.count as usize) < repeat_ctx.max_count
696 || repeat_ctx.max_count == MAXREPEAT)
697 && state.cursor.position != repeat_ctx.last_position
698 {
699 state.marks.push();
702 ctx.count = repeat_ctx.last_position as isize;
703 repeat_ctx.last_position = state.cursor.position;
704
705 break 'context ctx
706 .next_at(repeat_ctx.code_position + 4, Jump::MaxUntil2);
707 }
708
709 let mut next_ctx = ctx.next_offset(1, Jump::MaxUntil3);
712 next_ctx.repeat_ctx_id = repeat_ctx.prev_id;
713 break 'context next_ctx;
714 }
715 SreOpcode::MIN_UNTIL => {
716 let repeat_ctx = state.repeat_stack.last_mut().unwrap();
717 state.cursor = ctx.cursor;
718 repeat_ctx.count += 1;
719
720 if (repeat_ctx.count as usize) < repeat_ctx.min_count {
721 break 'context ctx
723 .next_at(repeat_ctx.code_position + 4, Jump::UntilBacktrace);
724 }
725
726 state.marks.push();
727 ctx.count = ctx.repeat_ctx_id as isize;
728 let mut next_ctx = ctx.next_offset(1, Jump::MinUntil1);
729 next_ctx.repeat_ctx_id = repeat_ctx.prev_id;
730 break 'context next_ctx;
731 }
732 SreOpcode::REPEAT_ONE => {
734 let min_count = ctx.peek_code(req, 2) as usize;
735 let max_count = ctx.peek_code(req, 3) as usize;
736
737 if ctx.remaining_chars(req) < min_count {
738 break 'result false;
739 }
740
741 state.cursor = ctx.cursor;
742
743 let mut count_ctx = ctx;
744 count_ctx.skip_code(4);
745 let count = _count(req, state, &mut count_ctx, max_count);
746 if count < min_count {
747 break 'result false;
748 }
749 ctx.cursor = count_ctx.cursor;
750
751 let next_code = ctx.peek_code(req, ctx.peek_code(req, 1) as usize + 1);
752 if next_code == SreOpcode::SUCCESS as u32 && ctx.can_success(req) {
753 state.cursor = ctx.cursor;
755 break 'result true;
756 }
757
758 state.marks.push();
759 ctx.count = count as isize;
760 ctx.jump = Jump::RepeatOne1;
761 continue 'context;
762 }
763 SreOpcode::MIN_REPEAT_ONE => {
765 let min_count = ctx.peek_code(req, 2) as usize;
766 if ctx.remaining_chars(req) < min_count {
767 break 'result false;
768 }
769
770 state.cursor = ctx.cursor;
771 ctx.count = if min_count == 0 {
772 0
773 } else {
774 let mut count_ctx = ctx;
775 count_ctx.skip_code(4);
776 let count = _count(req, state, &mut count_ctx, min_count);
777 if count < min_count {
778 break 'result false;
779 }
780 ctx.cursor = count_ctx.cursor;
781 count as isize
782 };
783
784 let next_code = ctx.peek_code(req, ctx.peek_code(req, 1) as usize + 1);
785 if next_code == SreOpcode::SUCCESS as u32 && ctx.can_success(req) {
786 state.cursor = ctx.cursor;
788 break 'result true;
789 }
790
791 state.marks.push();
792 ctx.jump = Jump::MinRepeatOne1;
793 continue 'context;
794 }
795 SreOpcode::LITERAL => general_op_literal!(|code, c| code == c),
796 SreOpcode::NOT_LITERAL => general_op_literal!(|code, c| code != c),
797 SreOpcode::LITERAL_IGNORE => {
798 general_op_literal!(|code, c| code == lower_ascii(c))
799 }
800 SreOpcode::NOT_LITERAL_IGNORE => {
801 general_op_literal!(|code, c| code != lower_ascii(c))
802 }
803 SreOpcode::LITERAL_UNI_IGNORE => {
804 general_op_literal!(|code, c| code == lower_unicode(c))
805 }
806 SreOpcode::NOT_LITERAL_UNI_IGNORE => {
807 general_op_literal!(|code, c| code != lower_unicode(c))
808 }
809 SreOpcode::LITERAL_LOC_IGNORE => general_op_literal!(char_loc_ignore),
810 SreOpcode::NOT_LITERAL_LOC_IGNORE => {
811 general_op_literal!(|code, c| !char_loc_ignore(code, c))
812 }
813 SreOpcode::GROUPREF => general_op_groupref!(|x| x),
814 SreOpcode::GROUPREF_IGNORE => general_op_groupref!(lower_ascii),
815 SreOpcode::GROUPREF_LOC_IGNORE => general_op_groupref!(lower_locate),
816 SreOpcode::GROUPREF_UNI_IGNORE => general_op_groupref!(lower_unicode),
817 SreOpcode::GROUPREF_EXISTS => {
818 let (group_start, group_end) =
819 state.marks.get(ctx.peek_code(req, 1) as usize);
820 if group_start.is_some()
821 && group_end.is_some()
822 && group_start.unpack() <= group_end.unpack()
823 {
824 ctx.skip_code(3);
825 } else {
826 ctx.skip_code_from(req, 2)
827 }
828 }
829 SreOpcode::ATOMIC_GROUP => {
831 state.cursor = ctx.cursor;
832 break 'context ctx.next_offset(2, Jump::AtomicGroup1);
833 }
834 SreOpcode::POSSESSIVE_REPEAT => {
837 state.cursor = ctx.cursor;
838 ctx.count = 0;
839 ctx.jump = Jump::PossessiveRepeat1;
840 continue 'context;
841 }
842 SreOpcode::POSSESSIVE_REPEAT_ONE => {
845 let min_count = ctx.peek_code(req, 2) as usize;
846 let max_count = ctx.peek_code(req, 3) as usize;
847 if ctx.remaining_chars(req) < min_count {
848 break 'result false;
849 }
850 state.cursor = ctx.cursor;
851 let mut count_ctx = ctx;
852 count_ctx.skip_code(4);
853 let count = _count(req, state, &mut count_ctx, max_count);
854 if count < min_count {
855 break 'result false;
856 }
857 ctx.cursor = count_ctx.cursor;
858 ctx.skip_code_from(req, 1);
859 }
860 SreOpcode::CHARSET
861 | SreOpcode::BIGCHARSET
862 | SreOpcode::NEGATE
863 | SreOpcode::RANGE
864 | SreOpcode::RANGE_UNI_IGNORE
865 | SreOpcode::SUBPATTERN => {
866 unreachable!("unexpected opcode on main dispatch")
867 }
868 }
869 }
870 };
871 context_stack.push(ctx);
872 ctx = yielded;
873 continue 'coro;
874 };
875 if let Some(popped_ctx) = context_stack.pop() {
876 ctx = popped_ctx;
877 } else {
878 break;
879 }
880 }
881 popped_result
882}
883
884fn search_info_literal<const LITERAL: bool, S: StrDrive>(
885 req: &mut Request<S>,
886 state: &mut State,
887 mut ctx: MatchContext,
888) -> bool {
889 let len = ctx.peek_code(req, 5) as usize;
892 let skip = ctx.peek_code(req, 6) as usize;
893 let prefix = &ctx.pattern(req)[7..7 + len];
894 let overlap = &ctx.pattern(req)[7 + len - 1..7 + len * 2];
895
896 ctx.skip_code_from(req, 1);
898 ctx.skip_code(2 * skip);
899
900 req.must_advance = false;
901
902 if len == 1 {
903 let c = prefix[0];
905
906 while !ctx.at_end(req) {
907 while ctx.peek_char::<S>() != c {
909 ctx.advance_char::<S>();
910 if ctx.at_end(req) {
911 return false;
912 }
913 }
914
915 req.start = ctx.cursor.position;
916 state.start = req.start;
917 state.cursor = ctx.cursor;
918 S::skip(&mut state.cursor, skip);
919
920 if LITERAL {
922 return true;
923 }
924
925 let mut next_ctx = ctx;
926 next_ctx.skip_char::<S>(skip);
927
928 if _match(req, state, next_ctx) {
929 return true;
930 }
931
932 ctx.advance_char::<S>();
933 state.marks.clear();
934 }
935 } else {
936 while !ctx.at_end(req) {
937 let c = prefix[0];
938 while ctx.peek_char::<S>() != c {
939 ctx.advance_char::<S>();
940 if ctx.at_end(req) {
941 return false;
942 }
943 }
944 ctx.advance_char::<S>();
945 if ctx.at_end(req) {
946 return false;
947 }
948
949 let mut i = 1;
950 loop {
951 if ctx.peek_char::<S>() == prefix[i] {
952 i += 1;
953 if i != len {
954 ctx.advance_char::<S>();
955 if ctx.at_end(req) {
956 return false;
957 }
958 continue;
959 }
960
961 req.start = ctx.cursor.position - (len - 1);
962 state.reset(req, req.start);
963 S::skip(&mut state.cursor, skip);
964 if LITERAL {
969 return true;
970 }
971
972 let mut next_ctx = ctx;
973 if skip != 0 {
974 next_ctx.advance_char::<S>();
975 } else {
976 next_ctx.cursor = state.cursor;
977 }
978
979 if _match(req, state, next_ctx) {
980 return true;
981 }
982
983 ctx.advance_char::<S>();
984 if ctx.at_end(req) {
985 return false;
986 }
987 state.marks.clear();
988 }
989
990 i = overlap[i] as usize;
991 if i == 0 {
992 break;
993 }
994 }
995 }
996 }
997 false
998}
999
1000fn search_info_charset<S: StrDrive>(
1001 req: &mut Request<S>,
1002 state: &mut State,
1003 mut ctx: MatchContext,
1004) -> bool {
1005 let set = &ctx.pattern(req)[5..];
1006
1007 ctx.skip_code_from(req, 1);
1008
1009 req.must_advance = false;
1010
1011 loop {
1012 while !ctx.at_end(req) && !charset(set, ctx.peek_char::<S>()) {
1013 ctx.advance_char::<S>();
1014 }
1015 if ctx.at_end(req) {
1016 return false;
1017 }
1018
1019 req.start = ctx.cursor.position;
1020 state.start = ctx.cursor.position;
1021 state.cursor = ctx.cursor;
1022
1023 if _match(req, state, ctx) {
1024 return true;
1025 }
1026
1027 ctx.advance_char::<S>();
1028 state.marks.clear();
1029 }
1030}
1031
1032#[derive(Debug, Clone, Copy)]
1033struct RepeatContext {
1034 count: isize,
1035 min_count: usize,
1036 max_count: usize,
1037 code_position: usize,
1038 last_position: usize,
1039 prev_id: usize,
1040}
1041
1042#[derive(Clone, Copy)]
1043struct MatchContext {
1044 cursor: StringCursor,
1045 code_position: usize,
1046 toplevel: bool,
1047 jump: Jump,
1048 repeat_ctx_id: usize,
1049 count: isize,
1050}
1051
1052impl MatchContext {
1053 fn pattern<'a, S>(&self, req: &Request<'a, S>) -> &'a [u32] {
1054 &req.pattern_codes[self.code_position..]
1055 }
1056
1057 fn remaining_codes<S>(&self, req: &Request<S>) -> usize {
1058 req.pattern_codes.len() - self.code_position
1059 }
1060
1061 fn remaining_chars<S>(&self, req: &Request<S>) -> usize {
1062 req.end - self.cursor.position
1063 }
1064
1065 fn peek_char<S: StrDrive>(&self) -> u32 {
1066 S::peek(&self.cursor)
1067 }
1068
1069 fn skip_char<S: StrDrive>(&mut self, skip: usize) {
1070 S::skip(&mut self.cursor, skip);
1071 }
1072
1073 fn advance_char<S: StrDrive>(&mut self) -> u32 {
1074 S::advance(&mut self.cursor)
1075 }
1076
1077 fn back_peek_char<S: StrDrive>(&self) -> u32 {
1078 S::back_peek(&self.cursor)
1079 }
1080
1081 fn back_skip_char<S: StrDrive>(&mut self, skip: usize) {
1082 S::back_skip(&mut self.cursor, skip);
1083 }
1084
1085 fn back_advance_char<S: StrDrive>(&mut self) -> u32 {
1086 S::back_advance(&mut self.cursor)
1087 }
1088
1089 fn peek_code<S>(&self, req: &Request<S>, peek: usize) -> u32 {
1090 req.pattern_codes[self.code_position + peek]
1091 }
1092
1093 fn try_peek_code_as<T, S>(&self, req: &Request<S>, peek: usize) -> Result<T, T::Error>
1094 where
1095 T: TryFrom<u32>,
1096 {
1097 self.peek_code(req, peek).try_into()
1098 }
1099
1100 fn skip_code(&mut self, skip: usize) {
1101 self.code_position += skip;
1102 }
1103
1104 fn skip_code_from<S>(&mut self, req: &Request<S>, peek: usize) {
1105 self.skip_code(self.peek_code(req, peek) as usize + 1);
1106 }
1107
1108 fn at_beginning(&self) -> bool {
1109 self.cursor.position == 0
1111 }
1112
1113 fn at_end<S>(&self, req: &Request<S>) -> bool {
1114 self.cursor.position == req.end
1115 }
1116
1117 fn at_linebreak<S: StrDrive>(&self, req: &Request<S>) -> bool {
1118 !self.at_end(req) && is_linebreak(self.peek_char::<S>())
1119 }
1120
1121 fn at_boundary<S: StrDrive, F: FnMut(u32) -> bool>(
1122 &self,
1123 req: &Request<S>,
1124 mut word_checker: F,
1125 ) -> bool {
1126 if self.at_beginning() && self.at_end(req) {
1127 return false;
1128 }
1129 let that = !self.at_beginning() && word_checker(self.back_peek_char::<S>());
1130 let this = !self.at_end(req) && word_checker(self.peek_char::<S>());
1131 this != that
1132 }
1133
1134 fn at_non_boundary<S: StrDrive, F: FnMut(u32) -> bool>(
1135 &self,
1136 req: &Request<S>,
1137 mut word_checker: F,
1138 ) -> bool {
1139 if self.at_beginning() && self.at_end(req) {
1140 return false;
1141 }
1142 let that = !self.at_beginning() && word_checker(self.back_peek_char::<S>());
1143 let this = !self.at_end(req) && word_checker(self.peek_char::<S>());
1144 this == that
1145 }
1146
1147 fn can_success<S>(&self, req: &Request<S>) -> bool {
1148 if !self.toplevel {
1149 return true;
1150 }
1151 if req.match_all && !self.at_end(req) {
1152 return false;
1153 }
1154 if req.must_advance && self.cursor.position == req.start {
1155 return false;
1156 }
1157 true
1158 }
1159
1160 #[must_use]
1161 fn next_peek_from<S>(&mut self, peek: usize, req: &Request<S>, jump: Jump) -> Self {
1162 self.next_offset(self.peek_code(req, peek) as usize + 1, jump)
1163 }
1164
1165 #[must_use]
1166 fn next_offset(&mut self, offset: usize, jump: Jump) -> Self {
1167 self.next_at(self.code_position + offset, jump)
1168 }
1169
1170 #[must_use]
1171 fn next_at(&mut self, code_position: usize, jump: Jump) -> Self {
1172 self.jump = jump;
1173 MatchContext {
1174 code_position,
1175 jump: Jump::OpCode,
1176 count: -1,
1177 ..*self
1178 }
1179 }
1180}
1181
1182fn at<S: StrDrive>(req: &Request<S>, ctx: &MatchContext, atcode: SreAtCode) -> bool {
1183 match atcode {
1184 SreAtCode::BEGINNING | SreAtCode::BEGINNING_STRING => ctx.at_beginning(),
1185 SreAtCode::BEGINNING_LINE => ctx.at_beginning() || is_linebreak(ctx.back_peek_char::<S>()),
1186 SreAtCode::BOUNDARY => ctx.at_boundary(req, is_word),
1187 SreAtCode::NON_BOUNDARY => ctx.at_non_boundary(req, is_word),
1188 SreAtCode::END => {
1189 (ctx.remaining_chars(req) == 1 && ctx.at_linebreak(req)) || ctx.at_end(req)
1190 }
1191 SreAtCode::END_LINE => ctx.at_linebreak(req) || ctx.at_end(req),
1192 SreAtCode::END_STRING => ctx.at_end(req),
1193 SreAtCode::LOC_BOUNDARY => ctx.at_boundary(req, is_loc_word),
1194 SreAtCode::LOC_NON_BOUNDARY => ctx.at_non_boundary(req, is_loc_word),
1195 SreAtCode::UNI_BOUNDARY => ctx.at_boundary(req, is_uni_word),
1196 SreAtCode::UNI_NON_BOUNDARY => ctx.at_non_boundary(req, is_uni_word),
1197 }
1198}
1199
1200fn char_loc_ignore(code: u32, c: u32) -> bool {
1201 code == c || code == lower_locate(c) || code == upper_locate(c)
1202}
1203
1204fn charset_loc_ignore(set: &[u32], c: u32) -> bool {
1205 let lo = lower_locate(c);
1206 if charset(set, c) {
1207 return true;
1208 }
1209 let up = upper_locate(c);
1210 up != lo && charset(set, up)
1211}
1212
1213fn category(catcode: SreCatCode, c: u32) -> bool {
1214 match catcode {
1215 SreCatCode::DIGIT => is_digit(c),
1216 SreCatCode::NOT_DIGIT => !is_digit(c),
1217 SreCatCode::SPACE => is_space(c),
1218 SreCatCode::NOT_SPACE => !is_space(c),
1219 SreCatCode::WORD => is_word(c),
1220 SreCatCode::NOT_WORD => !is_word(c),
1221 SreCatCode::LINEBREAK => is_linebreak(c),
1222 SreCatCode::NOT_LINEBREAK => !is_linebreak(c),
1223 SreCatCode::LOC_WORD => is_loc_word(c),
1224 SreCatCode::LOC_NOT_WORD => !is_loc_word(c),
1225 SreCatCode::UNI_DIGIT => is_uni_digit(c),
1226 SreCatCode::UNI_NOT_DIGIT => !is_uni_digit(c),
1227 SreCatCode::UNI_SPACE => is_uni_space(c),
1228 SreCatCode::UNI_NOT_SPACE => !is_uni_space(c),
1229 SreCatCode::UNI_WORD => is_uni_word(c),
1230 SreCatCode::UNI_NOT_WORD => !is_uni_word(c),
1231 SreCatCode::UNI_LINEBREAK => is_uni_linebreak(c),
1232 SreCatCode::UNI_NOT_LINEBREAK => !is_uni_linebreak(c),
1233 }
1234}
1235
1236fn charset(set: &[u32], ch: u32) -> bool {
1237 let mut ok = true;
1239 let mut i = 0;
1240 while i < set.len() {
1241 let opcode = match SreOpcode::try_from(set[i]) {
1242 Ok(code) => code,
1243 Err(_) => {
1244 break;
1245 }
1246 };
1247 match opcode {
1248 SreOpcode::FAILURE => {
1249 return !ok;
1250 }
1251 SreOpcode::CATEGORY => {
1252 let catcode = match SreCatCode::try_from(set[i + 1]) {
1254 Ok(code) => code,
1255 Err(_) => {
1256 break;
1257 }
1258 };
1259 if category(catcode, ch) {
1260 return ok;
1261 }
1262 i += 2;
1263 }
1264 SreOpcode::CHARSET => {
1265 let set = &set[i + 1..];
1267 if ch < 256 && ((set[(ch >> 5) as usize] & (1u32 << (ch & 31))) != 0) {
1268 return ok;
1269 }
1270 i += 1 + 8;
1271 }
1272 SreOpcode::BIGCHARSET => {
1273 let count = set[i + 1] as usize;
1275 if ch < 0x10000 {
1276 let set = &set[i + 2..];
1277 let block_index = ch >> 8;
1278 let (_, blockindices, _) = unsafe { set.align_to::<u8>() };
1279 let blocks = &set[64..];
1280 let block = blockindices[block_index as usize];
1281 if blocks[((block as u32 * 256 + (ch & 255)) / 32) as usize]
1282 & (1u32 << (ch & 31))
1283 != 0
1284 {
1285 return ok;
1286 }
1287 }
1288 i += 2 + 64 + count * 8;
1289 }
1290 SreOpcode::LITERAL => {
1291 if ch == set[i + 1] {
1293 return ok;
1294 }
1295 i += 2;
1296 }
1297 SreOpcode::NEGATE => {
1298 ok = !ok;
1299 i += 1;
1300 }
1301 SreOpcode::RANGE => {
1302 if set[i + 1] <= ch && ch <= set[i + 2] {
1304 return ok;
1305 }
1306 i += 3;
1307 }
1308 SreOpcode::RANGE_UNI_IGNORE => {
1309 if set[i + 1] <= ch && ch <= set[i + 2] {
1311 return ok;
1312 }
1313 let ch = upper_unicode(ch);
1314 if set[i + 1] <= ch && ch <= set[i + 2] {
1315 return ok;
1316 }
1317 i += 3;
1318 }
1319 _ => {
1320 break;
1321 }
1322 }
1323 }
1324 false
1327}
1328
1329fn _count<S: StrDrive>(
1330 req: &Request<S>,
1331 state: &mut State,
1332 ctx: &mut MatchContext,
1333 max_count: usize,
1334) -> usize {
1335 let max_count = std::cmp::min(max_count, ctx.remaining_chars(req));
1336 let end = ctx.cursor.position + max_count;
1337 let opcode = SreOpcode::try_from(ctx.peek_code(req, 0)).unwrap();
1338
1339 match opcode {
1340 SreOpcode::ANY => {
1341 while ctx.cursor.position < end && !ctx.at_linebreak(req) {
1342 ctx.advance_char::<S>();
1343 }
1344 }
1345 SreOpcode::ANY_ALL => {
1346 ctx.skip_char::<S>(max_count);
1347 }
1348 SreOpcode::IN => {
1349 while ctx.cursor.position < end && charset(&ctx.pattern(req)[2..], ctx.peek_char::<S>())
1350 {
1351 ctx.advance_char::<S>();
1352 }
1353 }
1354 SreOpcode::LITERAL => {
1355 general_count_literal(req, ctx, end, |code, c| code == c);
1356 }
1357 SreOpcode::NOT_LITERAL => {
1358 general_count_literal(req, ctx, end, |code, c| code != c);
1359 }
1360 SreOpcode::LITERAL_IGNORE => {
1361 general_count_literal(req, ctx, end, |code, c| code == lower_ascii(c));
1362 }
1363 SreOpcode::NOT_LITERAL_IGNORE => {
1364 general_count_literal(req, ctx, end, |code, c| code != lower_ascii(c));
1365 }
1366 SreOpcode::LITERAL_LOC_IGNORE => {
1367 general_count_literal(req, ctx, end, char_loc_ignore);
1368 }
1369 SreOpcode::NOT_LITERAL_LOC_IGNORE => {
1370 general_count_literal(req, ctx, end, |code, c| !char_loc_ignore(code, c));
1371 }
1372 SreOpcode::LITERAL_UNI_IGNORE => {
1373 general_count_literal(req, ctx, end, |code, c| code == lower_unicode(c));
1374 }
1375 SreOpcode::NOT_LITERAL_UNI_IGNORE => {
1376 general_count_literal(req, ctx, end, |code, c| code != lower_unicode(c));
1377 }
1378 _ => {
1379 ctx.toplevel = false;
1381 ctx.jump = Jump::OpCode;
1382 ctx.repeat_ctx_id = usize::MAX;
1383 ctx.count = -1;
1384
1385 let mut sub_state = State {
1386 marks: Marks::default(),
1387 repeat_stack: vec![],
1388 ..*state
1389 };
1390
1391 while ctx.cursor.position < end && _match(req, &mut sub_state, *ctx) {
1392 ctx.advance_char::<S>();
1393 }
1394 }
1395 }
1396
1397 ctx.cursor.position - state.cursor.position
1399}
1400
1401fn general_count_literal<S: StrDrive, F: FnMut(u32, u32) -> bool>(
1402 req: &Request<S>,
1403 ctx: &mut MatchContext,
1404 end: usize,
1405 mut f: F,
1406) {
1407 let ch = ctx.peek_code(req, 1);
1408 while ctx.cursor.position < end && f(ch, ctx.peek_char::<S>()) {
1409 ctx.advance_char::<S>();
1410 }
1411}