1use crate::string::{
4 is_digit, is_linebreak, is_loc_word, is_space, is_uni_digit, is_uni_linebreak, is_uni_space,
5 is_uni_word, is_word, lower_ascii, lower_locate, lower_unicode, upper_locate, upper_unicode,
6};
7
8use super::{MAXREPEAT, SreAtCode, SreCatCode, SreInfo, SreOpcode, StrDrive, StringCursor};
9use alloc::{vec, vec::Vec};
10use core::{convert::TryFrom, ptr::null};
11use optional::Optioned;
12
13#[derive(Debug, Clone, Copy)]
14pub struct Request<'a, S> {
15 pub string: S,
16 pub start: usize,
17 pub end: usize,
18 pub pattern_codes: &'a [u32],
19 pub match_all: bool,
20 pub must_advance: bool,
21}
22
23impl<'a, S: StrDrive> Request<'a, S> {
24 pub fn new(
25 string: S,
26 start: usize,
27 end: usize,
28 pattern_codes: &'a [u32],
29 match_all: bool,
30 ) -> Self {
31 let end = core::cmp::min(end, string.count());
32 let start = core::cmp::min(start, end);
33
34 Self {
35 string,
36 start,
37 end,
38 pattern_codes,
39 match_all,
40 must_advance: false,
41 }
42 }
43}
44
45#[derive(Debug)]
46pub struct Marks {
47 last_index: isize,
48 marks: Vec<Optioned<usize>>,
49 marks_stack: Vec<(Vec<Optioned<usize>>, isize)>,
50}
51
52impl Default for Marks {
53 fn default() -> Self {
54 Self {
55 last_index: -1,
56 marks: Vec::new(),
57 marks_stack: Vec::new(),
58 }
59 }
60}
61
62impl Marks {
63 pub fn get(&self, group_index: usize) -> (Optioned<usize>, Optioned<usize>) {
64 let marks_index = 2 * group_index;
65 if marks_index + 1 < self.marks.len() {
66 (self.marks[marks_index], self.marks[marks_index + 1])
67 } else {
68 (Optioned::none(), Optioned::none())
69 }
70 }
71
72 pub const fn last_index(&self) -> isize {
73 self.last_index
74 }
75
76 pub fn raw(&self) -> &[Optioned<usize>] {
77 self.marks.as_slice()
78 }
79
80 fn set(&mut self, mark_nr: usize, position: usize) {
81 if mark_nr & 1 != 0 {
82 self.last_index = mark_nr as isize / 2 + 1;
83 }
84 if mark_nr >= self.marks.len() {
85 self.marks.resize(mark_nr + 1, Optioned::none());
86 }
87 self.marks[mark_nr] = Optioned::some(position);
88 }
89
90 fn push(&mut self) {
91 self.marks_stack.push((self.marks.clone(), self.last_index));
92 }
93
94 fn pop(&mut self) {
95 let (marks, last_index) = self.marks_stack.pop().unwrap();
96 self.marks = marks;
97 self.last_index = last_index;
98 }
99
100 fn pop_keep(&mut self) {
101 let (marks, last_index) = self.marks_stack.last().unwrap().clone();
102 self.marks = marks;
103 self.last_index = last_index;
104 }
105
106 fn pop_discard(&mut self) {
107 self.marks_stack.pop();
108 }
109
110 fn clear(&mut self) {
111 self.last_index = -1;
112 self.marks.clear();
113 self.marks_stack.clear();
114 }
115}
116
117#[derive(Debug, Default)]
118pub struct State {
119 pub start: usize,
120 pub marks: Marks,
121 pub cursor: StringCursor,
122 repeat_stack: Vec<RepeatContext>,
123}
124
125impl State {
126 pub fn reset<S: StrDrive>(&mut self, req: &Request<'_, S>, start: usize) {
127 self.marks.clear();
128 self.repeat_stack.clear();
129 self.start = start;
130 req.string.adjust_cursor(&mut self.cursor, start);
131 }
132
133 pub fn py_match<S: StrDrive>(&mut self, req: &Request<'_, S>) -> bool {
134 self.start = req.start;
135 req.string.adjust_cursor(&mut self.cursor, self.start);
136
137 let ctx = MatchContext {
138 cursor: self.cursor,
139 code_position: 0,
140 toplevel: true,
141 jump: Jump::OpCode,
142 repeat_ctx_id: usize::MAX,
143 count: -1,
144 };
145 _match(req, self, ctx)
146 }
147
148 pub fn search<S: StrDrive>(&mut self, mut req: Request<'_, S>) -> bool {
149 self.start = req.start;
150 req.string.adjust_cursor(&mut self.cursor, self.start);
151
152 if req.start > req.end {
153 return false;
154 }
155
156 let mut end = req.end;
157
158 let mut ctx = MatchContext {
159 cursor: self.cursor,
160 code_position: 0,
161 toplevel: true,
162 jump: Jump::OpCode,
163 repeat_ctx_id: usize::MAX,
164 count: -1,
165 };
166
167 if ctx.peek_code(&req, 0) == SreOpcode::INFO as u32 {
168 let min = ctx.peek_code(&req, 3) as usize;
171
172 if ctx.remaining_chars(&req) < min {
173 return false;
174 }
175
176 if min > 1 {
177 end -= min - 1;
181
182 if end < ctx.cursor.position {
184 let skip = end - self.cursor.position;
185 S::skip(&mut self.cursor, skip);
186 }
187 }
188
189 let flags = SreInfo::from_bits_truncate(ctx.peek_code(&req, 2));
190
191 if flags.contains(SreInfo::PREFIX) {
192 if flags.contains(SreInfo::LITERAL) {
193 return search_info_literal::<true, S>(&mut req, self, ctx);
194 } else {
195 return search_info_literal::<false, S>(&mut req, self, ctx);
196 }
197 } else if flags.contains(SreInfo::CHARSET) {
198 return search_info_charset(&mut req, self, ctx);
199 }
200 ctx.skip_code_from(&req, 1);
203 }
204
205 if _match(&req, self, ctx) {
206 return true;
207 }
208
209 if ctx.try_peek_code_as::<SreOpcode, _>(&req, 0).unwrap() == SreOpcode::AT
210 && (ctx.try_peek_code_as::<SreAtCode, _>(&req, 1).unwrap() == SreAtCode::BEGINNING
211 || ctx.try_peek_code_as::<SreAtCode, _>(&req, 1).unwrap()
212 == SreAtCode::BEGINNING_STRING)
213 {
214 self.cursor.position = req.end;
215 self.cursor.ptr = null();
216 return false;
218 }
219
220 req.must_advance = false;
221 ctx.toplevel = false;
222 while req.start < end {
223 req.start += 1;
224 self.reset(&req, req.start);
225 ctx.cursor = self.cursor;
226
227 if _match(&req, self, ctx) {
228 return true;
229 }
230 }
231 false
232 }
233}
234
235pub struct SearchIter<'a, S: StrDrive> {
236 pub req: Request<'a, S>,
237 pub state: State,
238}
239
240impl<S: StrDrive> Iterator for SearchIter<'_, S> {
241 type Item = ();
242
243 fn next(&mut self) -> Option<Self::Item> {
244 if self.req.start > self.req.end {
245 return None;
246 }
247
248 self.state.reset(&self.req, self.req.start);
249 if !self.state.search(self.req) {
250 return None;
251 }
252
253 self.req.must_advance = self.state.cursor.position == self.state.start;
254 self.req.start = self.state.cursor.position;
255
256 Some(())
257 }
258}
259
260#[derive(Debug, Clone, Copy)]
261enum Jump {
262 OpCode,
263 Assert1,
264 AssertNot1,
265 Branch1,
266 Branch2,
267 Repeat1,
268 UntilBacktrace,
269 MaxUntil2,
270 MaxUntil3,
271 MinUntil1,
272 RepeatOne1,
273 RepeatOne2,
274 MinRepeatOne1,
275 MinRepeatOne2,
276 AtomicGroup1,
277 PossessiveRepeat1,
278 PossessiveRepeat2,
279 PossessiveRepeat3,
280 PossessiveRepeat4,
281}
282
283fn _match<S: StrDrive>(req: &Request<'_, S>, state: &mut State, mut ctx: MatchContext) -> bool {
284 let mut context_stack = vec![];
285 let mut popped_result = false;
286
287 #[allow(
288 clippy::never_loop,
289 reason = "'result loop is not an actual loop but break label"
290 )]
291 'coro: loop {
292 popped_result = 'result: loop {
293 let yielded = 'context: loop {
294 match ctx.jump {
295 Jump::OpCode => {}
296 Jump::Assert1 => {
297 if popped_result {
298 ctx.skip_code_from(req, 1);
299 } else {
300 break 'result false;
301 }
302 }
303 Jump::AssertNot1 => {
304 if popped_result {
305 break 'result false;
306 }
307 state.marks.pop();
308 ctx.skip_code_from(req, 1);
309 }
310 Jump::Branch1 => {
311 let branch_offset = ctx.count as usize;
312 let next_length = ctx.peek_code(req, branch_offset) as isize;
313 if next_length == 0 {
314 state.marks.pop_discard();
315 break 'result false;
316 }
317 state.cursor = ctx.cursor;
318 let next_ctx = ctx.next_offset(branch_offset + 1, Jump::Branch2);
319 ctx.count += next_length;
320 break 'context next_ctx;
321 }
322 Jump::Branch2 => {
323 if popped_result {
324 break 'result true;
325 }
326 state.marks.pop_keep();
327 ctx.jump = Jump::Branch1;
328 continue 'context;
329 }
330 Jump::Repeat1 => {
331 state.repeat_stack.pop();
332 break 'result popped_result;
333 }
334 Jump::UntilBacktrace => {
335 if !popped_result {
336 state.repeat_stack[ctx.repeat_ctx_id].count -= 1;
337 state.cursor = ctx.cursor;
338 }
339 break 'result popped_result;
340 }
341 Jump::MaxUntil2 => {
342 let save_last_position = ctx.count as usize;
343 let repeat_ctx = &mut state.repeat_stack[ctx.repeat_ctx_id];
344 repeat_ctx.last_position = save_last_position;
345
346 if popped_result {
347 state.marks.pop_discard();
348 break 'result true;
349 }
350
351 state.marks.pop();
352 repeat_ctx.count -= 1;
353 state.cursor = ctx.cursor;
354
355 let mut next_ctx = ctx.next_offset(1, Jump::MaxUntil3);
358 next_ctx.repeat_ctx_id = repeat_ctx.prev_id;
359 break 'context next_ctx;
360 }
361 Jump::MaxUntil3 => {
362 if !popped_result {
363 state.cursor = ctx.cursor;
364 }
365 break 'result popped_result;
366 }
367 Jump::MinUntil1 => {
368 if popped_result {
369 break 'result true;
370 }
371 ctx.repeat_ctx_id = ctx.count as usize;
372 let repeat_ctx = &mut state.repeat_stack[ctx.repeat_ctx_id];
373 state.cursor = ctx.cursor;
374 state.marks.pop();
375
376 if repeat_ctx.count as usize >= repeat_ctx.max_count
378 && repeat_ctx.max_count != MAXREPEAT
379 || state.cursor.position == repeat_ctx.last_position
380 {
381 repeat_ctx.count -= 1;
382 break 'result false;
383 }
384
385 repeat_ctx.last_position = state.cursor.position;
387
388 break 'context ctx
389 .next_at(repeat_ctx.code_position + 4, Jump::UntilBacktrace);
390 }
391 Jump::RepeatOne1 => {
392 let min_count = ctx.peek_code(req, 2) as isize;
393 let next_code = ctx.peek_code(req, ctx.peek_code(req, 1) as usize + 1);
394 if next_code == SreOpcode::LITERAL as u32 {
395 let c = ctx.peek_code(req, ctx.peek_code(req, 1) as usize + 2);
398 while ctx.at_end(req) || ctx.peek_char::<S>() != c {
399 if ctx.count <= min_count {
400 state.marks.pop_discard();
401 break 'result false;
402 }
403 ctx.back_advance_char::<S>();
404 ctx.count -= 1;
405 }
406 }
407
408 state.cursor = ctx.cursor;
409 break 'context ctx.next_peek_from(1, req, Jump::RepeatOne2);
411 }
412 Jump::RepeatOne2 => {
413 if popped_result {
414 break 'result true;
415 }
416
417 let min_count = ctx.peek_code(req, 2) as isize;
418 if ctx.count <= min_count {
419 state.marks.pop_discard();
420 break 'result false;
421 }
422
423 ctx.back_advance_char::<S>();
424 ctx.count -= 1;
425
426 state.marks.pop_keep();
427 ctx.jump = Jump::RepeatOne1;
428 continue 'context;
429 }
430 Jump::MinRepeatOne1 => {
431 let max_count = ctx.peek_code(req, 3) as usize;
432 if max_count == MAXREPEAT || ctx.count as usize <= max_count {
433 state.cursor = ctx.cursor;
434 break 'context ctx.next_peek_from(1, req, Jump::MinRepeatOne2);
435 } else {
436 state.marks.pop_discard();
437 break 'result false;
438 }
439 }
440 Jump::MinRepeatOne2 => {
441 if popped_result {
442 break 'result true;
443 }
444
445 state.cursor = ctx.cursor;
446
447 let mut count_ctx = ctx;
448 count_ctx.skip_code(4);
449 if _count(req, state, &mut count_ctx, 1) == 0 {
450 state.marks.pop_discard();
451 break 'result false;
452 }
453
454 ctx.advance_char::<S>();
455 ctx.count += 1;
456 state.marks.pop_keep();
457 ctx.jump = Jump::MinRepeatOne1;
458 continue 'context;
459 }
460 Jump::AtomicGroup1 => {
461 if popped_result {
462 ctx.skip_code_from(req, 1);
463 ctx.cursor = state.cursor;
464 } else {
466 state.cursor = ctx.cursor;
467 break 'result false;
468 }
469 }
470 Jump::PossessiveRepeat1 => {
471 let min_count = ctx.peek_code(req, 2) as isize;
472 if ctx.count < min_count {
473 let mut next = ctx.next_offset(4, Jump::PossessiveRepeat2);
475 next.toplevel = false;
476 break 'context next;
477 }
478 ctx.cursor.position = usize::MAX;
480 ctx.jump = Jump::PossessiveRepeat3;
481 continue 'context;
482 }
483 Jump::PossessiveRepeat2 => {
484 if popped_result {
485 ctx.count += 1;
486 ctx.jump = Jump::PossessiveRepeat1;
487 continue 'context;
488 } else {
489 state.cursor = ctx.cursor;
490 break 'result false;
491 }
492 }
493 Jump::PossessiveRepeat3 => {
494 let max_count = ctx.peek_code(req, 3) as usize;
495 if ((ctx.count as usize) < max_count || max_count == MAXREPEAT)
496 && ctx.cursor.position != state.cursor.position
497 {
498 state.marks.push();
499 ctx.cursor = state.cursor;
500 let mut next = ctx.next_offset(4, Jump::PossessiveRepeat4);
501 next.toplevel = false; break 'context next;
503 }
504 ctx.cursor = state.cursor;
505 ctx.skip_code_from(req, 1);
506 ctx.skip_code(1);
507 }
508 Jump::PossessiveRepeat4 => {
509 if popped_result {
510 state.marks.pop_discard();
511 ctx.count += 1;
512 ctx.jump = Jump::PossessiveRepeat3;
513 continue 'context;
514 }
515 state.marks.pop();
516 state.cursor = ctx.cursor;
517 ctx.skip_code_from(req, 1);
518 ctx.skip_code(1);
519 }
520 }
521 ctx.jump = Jump::OpCode;
522
523 loop {
524 macro_rules! general_op_literal {
525 ($f:expr) => {{
526 #[allow(clippy::redundant_closure_call)]
527 if ctx.at_end(req) || !$f(ctx.peek_code(req, 1), ctx.peek_char::<S>()) {
528 break 'result false;
529 }
530 ctx.skip_code(2);
531 ctx.advance_char::<S>();
532 }};
533 }
534
535 macro_rules! general_op_in {
536 ($f:expr) => {{
537 #[allow(clippy::redundant_closure_call)]
538 if ctx.at_end(req) || !$f(&ctx.pattern(req)[2..], ctx.peek_char::<S>())
539 {
540 break 'result false;
541 }
542 ctx.skip_code_from(req, 1);
543 ctx.advance_char::<S>();
544 }};
545 }
546
547 macro_rules! general_op_groupref {
548 ($f:expr) => {{
549 let (group_start, group_end) =
550 state.marks.get(ctx.peek_code(req, 1) as usize);
551 let (group_start, group_end) = if group_start.is_some()
552 && group_end.is_some()
553 && group_start.unpack() <= group_end.unpack()
554 {
555 (group_start.unpack(), group_end.unpack())
556 } else {
557 break 'result false;
558 };
559
560 let mut g_ctx = MatchContext {
561 cursor: req.string.create_cursor(group_start),
562 ..ctx
563 };
564
565 for _ in group_start..group_end {
566 #[allow(clippy::redundant_closure_call)]
567 if ctx.at_end(req)
568 || $f(ctx.peek_char::<S>()) != $f(g_ctx.peek_char::<S>())
569 {
570 break 'result false;
571 }
572 ctx.advance_char::<S>();
573 g_ctx.advance_char::<S>();
574 }
575
576 ctx.skip_code(2);
577 }};
578 }
579
580 if ctx.remaining_codes(req) == 0 {
581 break 'result false;
582 }
583 let opcode = ctx.peek_code(req, 0);
584 let opcode = SreOpcode::try_from(opcode).unwrap();
585
586 match opcode {
587 SreOpcode::FAILURE => break 'result false,
588 SreOpcode::SUCCESS => {
589 if ctx.can_success(req) {
590 state.cursor = ctx.cursor;
591 break 'result true;
592 }
593 break 'result false;
594 }
595 SreOpcode::ANY => {
596 if ctx.at_end(req) || ctx.at_linebreak(req) {
597 break 'result false;
598 }
599 ctx.skip_code(1);
600 ctx.advance_char::<S>();
601 }
602 SreOpcode::ANY_ALL => {
603 if ctx.at_end(req) {
604 break 'result false;
605 }
606 ctx.skip_code(1);
607 ctx.advance_char::<S>();
608 }
609 SreOpcode::ASSERT => {
611 let back = ctx.peek_code(req, 2) as usize;
612 if ctx.cursor.position < back {
613 break 'result false;
614 }
615
616 let mut next_ctx = ctx.next_offset(3, Jump::Assert1);
617 next_ctx.toplevel = false;
618 next_ctx.back_skip_char::<S>(back);
619 state.cursor = next_ctx.cursor;
620 break 'context next_ctx;
621 }
622 SreOpcode::ASSERT_NOT => {
624 let back = ctx.peek_code(req, 2) as usize;
625 if ctx.cursor.position < back {
626 ctx.skip_code_from(req, 1);
627 continue;
628 }
629 state.marks.push();
630
631 let mut next_ctx = ctx.next_offset(3, Jump::AssertNot1);
632 next_ctx.toplevel = false;
633 next_ctx.back_skip_char::<S>(back);
634 state.cursor = next_ctx.cursor;
635 break 'context next_ctx;
636 }
637 SreOpcode::AT => {
638 let at_code = SreAtCode::try_from(ctx.peek_code(req, 1)).unwrap();
639 if at(req, &ctx, at_code) {
640 ctx.skip_code(2);
641 } else {
642 break 'result false;
643 }
644 }
645 SreOpcode::BRANCH => {
647 state.marks.push();
648 ctx.count = 1;
649 ctx.jump = Jump::Branch1;
650 continue 'context;
651 }
652 SreOpcode::CATEGORY => {
653 let cat_code = SreCatCode::try_from(ctx.peek_code(req, 1)).unwrap();
654 if ctx.at_end(req) || !category(cat_code, ctx.peek_char::<S>()) {
655 break 'result false;
656 }
657 ctx.skip_code(2);
658 ctx.advance_char::<S>();
659 }
660 SreOpcode::IN => general_op_in!(charset),
661 SreOpcode::IN_IGNORE => {
662 general_op_in!(|set, c| charset(set, lower_ascii(c)))
663 }
664 SreOpcode::IN_UNI_IGNORE => {
665 general_op_in!(|set, c| charset(set, lower_unicode(c)))
666 }
667 SreOpcode::IN_LOC_IGNORE => general_op_in!(charset_loc_ignore),
668 SreOpcode::MARK => {
669 state
670 .marks
671 .set(ctx.peek_code(req, 1) as usize, ctx.cursor.position);
672 ctx.skip_code(2);
673 }
674 SreOpcode::INFO | SreOpcode::JUMP => ctx.skip_code_from(req, 1),
675 SreOpcode::REPEAT => {
677 let repeat_ctx = RepeatContext {
678 count: -1,
679 min_count: ctx.peek_code(req, 2) as usize,
680 max_count: ctx.peek_code(req, 3) as usize,
681 code_position: ctx.code_position,
682 last_position: usize::MAX,
683 prev_id: ctx.repeat_ctx_id,
684 };
685 state.repeat_stack.push(repeat_ctx);
686 let repeat_ctx_id = state.repeat_stack.len() - 1;
687 state.cursor = ctx.cursor;
688 let mut next_ctx = ctx.next_peek_from(1, req, Jump::Repeat1);
689 next_ctx.repeat_ctx_id = repeat_ctx_id;
690 break 'context next_ctx;
691 }
692 SreOpcode::MAX_UNTIL => {
693 let repeat_ctx = &mut state.repeat_stack[ctx.repeat_ctx_id];
694 state.cursor = ctx.cursor;
695 repeat_ctx.count += 1;
696
697 if (repeat_ctx.count as usize) < repeat_ctx.min_count {
698 break 'context ctx
700 .next_at(repeat_ctx.code_position + 4, Jump::UntilBacktrace);
701 }
702
703 if ((repeat_ctx.count as usize) < repeat_ctx.max_count
704 || repeat_ctx.max_count == MAXREPEAT)
705 && state.cursor.position != repeat_ctx.last_position
706 {
707 state.marks.push();
710 ctx.count = repeat_ctx.last_position as isize;
711 repeat_ctx.last_position = state.cursor.position;
712
713 break 'context ctx
714 .next_at(repeat_ctx.code_position + 4, Jump::MaxUntil2);
715 }
716
717 let mut next_ctx = ctx.next_offset(1, Jump::MaxUntil3);
720 next_ctx.repeat_ctx_id = repeat_ctx.prev_id;
721 break 'context next_ctx;
722 }
723 SreOpcode::MIN_UNTIL => {
724 let repeat_ctx = state.repeat_stack.last_mut().unwrap();
725 state.cursor = ctx.cursor;
726 repeat_ctx.count += 1;
727
728 if (repeat_ctx.count as usize) < repeat_ctx.min_count {
729 break 'context ctx
731 .next_at(repeat_ctx.code_position + 4, Jump::UntilBacktrace);
732 }
733
734 state.marks.push();
735 ctx.count = ctx.repeat_ctx_id as isize;
736 let mut next_ctx = ctx.next_offset(1, Jump::MinUntil1);
737 next_ctx.repeat_ctx_id = repeat_ctx.prev_id;
738 break 'context next_ctx;
739 }
740 SreOpcode::REPEAT_ONE => {
742 let min_count = ctx.peek_code(req, 2) as usize;
743 let max_count = ctx.peek_code(req, 3) as usize;
744
745 if ctx.remaining_chars(req) < min_count {
746 break 'result false;
747 }
748
749 state.cursor = ctx.cursor;
750
751 let mut count_ctx = ctx;
752 count_ctx.skip_code(4);
753 let count = _count(req, state, &mut count_ctx, max_count);
754 if count < min_count {
755 break 'result false;
756 }
757 ctx.cursor = count_ctx.cursor;
758
759 let next_code = ctx.peek_code(req, ctx.peek_code(req, 1) as usize + 1);
760 if next_code == SreOpcode::SUCCESS as u32 && ctx.can_success(req) {
761 state.cursor = ctx.cursor;
763 break 'result true;
764 }
765
766 state.marks.push();
767 ctx.count = count as isize;
768 ctx.jump = Jump::RepeatOne1;
769 continue 'context;
770 }
771 SreOpcode::MIN_REPEAT_ONE => {
773 let min_count = ctx.peek_code(req, 2) as usize;
774 if ctx.remaining_chars(req) < min_count {
775 break 'result false;
776 }
777
778 state.cursor = ctx.cursor;
779 ctx.count = if min_count == 0 {
780 0
781 } else {
782 let mut count_ctx = ctx;
783 count_ctx.skip_code(4);
784 let count = _count(req, state, &mut count_ctx, min_count);
785 if count < min_count {
786 break 'result false;
787 }
788 ctx.cursor = count_ctx.cursor;
789 count as isize
790 };
791
792 let next_code = ctx.peek_code(req, ctx.peek_code(req, 1) as usize + 1);
793 if next_code == SreOpcode::SUCCESS as u32 && ctx.can_success(req) {
794 state.cursor = ctx.cursor;
796 break 'result true;
797 }
798
799 state.marks.push();
800 ctx.jump = Jump::MinRepeatOne1;
801 continue 'context;
802 }
803 SreOpcode::LITERAL => general_op_literal!(|code, c| code == c),
804 SreOpcode::NOT_LITERAL => general_op_literal!(|code, c| code != c),
805 SreOpcode::LITERAL_IGNORE => {
806 general_op_literal!(|code, c| code == lower_ascii(c))
807 }
808 SreOpcode::NOT_LITERAL_IGNORE => {
809 general_op_literal!(|code, c| code != lower_ascii(c))
810 }
811 SreOpcode::LITERAL_UNI_IGNORE => {
812 general_op_literal!(|code, c| code == lower_unicode(c))
813 }
814 SreOpcode::NOT_LITERAL_UNI_IGNORE => {
815 general_op_literal!(|code, c| code != lower_unicode(c))
816 }
817 SreOpcode::LITERAL_LOC_IGNORE => general_op_literal!(char_loc_ignore),
818 SreOpcode::NOT_LITERAL_LOC_IGNORE => {
819 general_op_literal!(|code, c| !char_loc_ignore(code, c))
820 }
821 SreOpcode::GROUPREF => general_op_groupref!(|x| x),
822 SreOpcode::GROUPREF_IGNORE => general_op_groupref!(lower_ascii),
823 SreOpcode::GROUPREF_LOC_IGNORE => general_op_groupref!(lower_locate),
824 SreOpcode::GROUPREF_UNI_IGNORE => general_op_groupref!(lower_unicode),
825 SreOpcode::GROUPREF_EXISTS => {
826 let (group_start, group_end) =
827 state.marks.get(ctx.peek_code(req, 1) as usize);
828 if group_start.is_some()
829 && group_end.is_some()
830 && group_start.unpack() <= group_end.unpack()
831 {
832 ctx.skip_code(3);
833 } else {
834 ctx.skip_code_from(req, 2)
835 }
836 }
837 SreOpcode::ATOMIC_GROUP => {
839 state.cursor = ctx.cursor;
840 let mut next_ctx = ctx.next_offset(2, Jump::AtomicGroup1);
841 next_ctx.toplevel = false; break 'context next_ctx;
843 }
844 SreOpcode::POSSESSIVE_REPEAT => {
847 state.cursor = ctx.cursor;
848 ctx.count = 0;
849 ctx.jump = Jump::PossessiveRepeat1;
850 continue 'context;
851 }
852 SreOpcode::POSSESSIVE_REPEAT_ONE => {
855 let min_count = ctx.peek_code(req, 2) as usize;
856 let max_count = ctx.peek_code(req, 3) as usize;
857 if ctx.remaining_chars(req) < min_count {
858 break 'result false;
859 }
860 state.cursor = ctx.cursor;
861 let mut count_ctx = ctx;
862 count_ctx.skip_code(4);
863 let count = _count(req, state, &mut count_ctx, max_count);
864 if count < min_count {
865 break 'result false;
866 }
867 ctx.cursor = count_ctx.cursor;
868 ctx.skip_code_from(req, 1);
869 }
870 SreOpcode::CHARSET
871 | SreOpcode::BIGCHARSET
872 | SreOpcode::NEGATE
873 | SreOpcode::RANGE
874 | SreOpcode::RANGE_UNI_IGNORE
875 | SreOpcode::SUBPATTERN => {
876 unreachable!("unexpected opcode on main dispatch")
877 }
878 }
879 }
880 };
881 context_stack.push(ctx);
882 ctx = yielded;
883 continue 'coro;
884 };
885 if let Some(popped_ctx) = context_stack.pop() {
886 ctx = popped_ctx;
887 } else {
888 break;
889 }
890 }
891 popped_result
892}
893
894fn search_info_literal<const LITERAL: bool, S: StrDrive>(
895 req: &mut Request<'_, S>,
896 state: &mut State,
897 mut ctx: MatchContext,
898) -> bool {
899 let len = ctx.peek_code(req, 5) as usize;
902 let skip = ctx.peek_code(req, 6) as usize;
903 let prefix = &ctx.pattern(req)[7..7 + len];
904 let overlap = &ctx.pattern(req)[7 + len - 1..7 + len * 2];
905
906 ctx.skip_code_from(req, 1);
908 ctx.skip_code(2 * skip);
909
910 req.must_advance = false;
911
912 if len == 1 {
913 let c = prefix[0];
915
916 while !ctx.at_end(req) {
917 while ctx.peek_char::<S>() != c {
919 ctx.advance_char::<S>();
920 if ctx.at_end(req) {
921 return false;
922 }
923 }
924
925 req.start = ctx.cursor.position;
926 state.start = req.start;
927 state.cursor = ctx.cursor;
928 S::skip(&mut state.cursor, skip);
929
930 if LITERAL {
932 return true;
933 }
934
935 let mut next_ctx = ctx;
936 next_ctx.skip_char::<S>(skip);
937
938 if _match(req, state, next_ctx) {
939 return true;
940 }
941
942 ctx.advance_char::<S>();
943 state.marks.clear();
944 }
945 } else {
946 while !ctx.at_end(req) {
947 let c = prefix[0];
948 while ctx.peek_char::<S>() != c {
949 ctx.advance_char::<S>();
950 if ctx.at_end(req) {
951 return false;
952 }
953 }
954 ctx.advance_char::<S>();
955 if ctx.at_end(req) {
956 return false;
957 }
958
959 let mut i = 1;
960 loop {
961 if ctx.peek_char::<S>() == prefix[i] {
962 i += 1;
963 if i != len {
964 ctx.advance_char::<S>();
965 if ctx.at_end(req) {
966 return false;
967 }
968 continue;
969 }
970
971 req.start = ctx.cursor.position - (len - 1);
972 state.reset(req, req.start);
973 S::skip(&mut state.cursor, skip);
974 if LITERAL {
979 return true;
980 }
981
982 let mut next_ctx = ctx;
983 if skip != 0 {
984 next_ctx.advance_char::<S>();
985 } else {
986 next_ctx.cursor = state.cursor;
987 }
988
989 if _match(req, state, next_ctx) {
990 return true;
991 }
992
993 ctx.advance_char::<S>();
994 if ctx.at_end(req) {
995 return false;
996 }
997 state.marks.clear();
998 }
999
1000 i = overlap[i] as usize;
1001 if i == 0 {
1002 break;
1003 }
1004 }
1005 }
1006 }
1007 false
1008}
1009
1010fn search_info_charset<S: StrDrive>(
1011 req: &mut Request<'_, S>,
1012 state: &mut State,
1013 mut ctx: MatchContext,
1014) -> bool {
1015 let set = &ctx.pattern(req)[5..];
1016
1017 ctx.skip_code_from(req, 1);
1018
1019 req.must_advance = false;
1020
1021 loop {
1022 while !ctx.at_end(req) && !charset(set, ctx.peek_char::<S>()) {
1023 ctx.advance_char::<S>();
1024 }
1025 if ctx.at_end(req) {
1026 return false;
1027 }
1028
1029 req.start = ctx.cursor.position;
1030 state.start = ctx.cursor.position;
1031 state.cursor = ctx.cursor;
1032
1033 if _match(req, state, ctx) {
1034 return true;
1035 }
1036
1037 ctx.advance_char::<S>();
1038 state.marks.clear();
1039 }
1040}
1041
1042#[derive(Debug, Clone, Copy)]
1043struct RepeatContext {
1044 count: isize,
1045 min_count: usize,
1046 max_count: usize,
1047 code_position: usize,
1048 last_position: usize,
1049 prev_id: usize,
1050}
1051
1052#[derive(Clone, Copy)]
1053struct MatchContext {
1054 cursor: StringCursor,
1055 code_position: usize,
1056 toplevel: bool,
1057 jump: Jump,
1058 repeat_ctx_id: usize,
1059 count: isize,
1060}
1061
1062impl MatchContext {
1063 fn pattern<'a, S>(&self, req: &Request<'a, S>) -> &'a [u32] {
1064 &req.pattern_codes[self.code_position..]
1065 }
1066
1067 const fn remaining_codes<S>(&self, req: &Request<'_, S>) -> usize {
1068 req.pattern_codes.len() - self.code_position
1069 }
1070
1071 const fn remaining_chars<S>(&self, req: &Request<'_, S>) -> usize {
1072 req.end - self.cursor.position
1073 }
1074
1075 fn peek_char<S: StrDrive>(&self) -> u32 {
1076 S::peek(&self.cursor)
1077 }
1078
1079 fn skip_char<S: StrDrive>(&mut self, skip: usize) {
1080 S::skip(&mut self.cursor, skip);
1081 }
1082
1083 fn advance_char<S: StrDrive>(&mut self) -> u32 {
1084 S::advance(&mut self.cursor)
1085 }
1086
1087 fn back_peek_char<S: StrDrive>(&self) -> u32 {
1088 S::back_peek(&self.cursor)
1089 }
1090
1091 fn back_skip_char<S: StrDrive>(&mut self, skip: usize) {
1092 S::back_skip(&mut self.cursor, skip);
1093 }
1094
1095 fn back_advance_char<S: StrDrive>(&mut self) -> u32 {
1096 S::back_advance(&mut self.cursor)
1097 }
1098
1099 fn peek_code<S>(&self, req: &Request<'_, S>, peek: usize) -> u32 {
1100 req.pattern_codes[self.code_position + peek]
1101 }
1102
1103 fn try_peek_code_as<T, S>(&self, req: &Request<'_, S>, peek: usize) -> Result<T, T::Error>
1104 where
1105 T: TryFrom<u32>,
1106 {
1107 self.peek_code(req, peek).try_into()
1108 }
1109
1110 const fn skip_code(&mut self, skip: usize) {
1111 self.code_position += skip;
1112 }
1113
1114 fn skip_code_from<S>(&mut self, req: &Request<'_, S>, peek: usize) {
1115 self.skip_code(self.peek_code(req, peek) as usize + 1);
1116 }
1117
1118 const fn at_beginning(&self) -> bool {
1119 self.cursor.position == 0
1121 }
1122
1123 const fn at_end<S>(&self, req: &Request<'_, S>) -> bool {
1124 self.cursor.position == req.end
1125 }
1126
1127 fn at_linebreak<S: StrDrive>(&self, req: &Request<'_, S>) -> bool {
1128 !self.at_end(req) && is_linebreak(self.peek_char::<S>())
1129 }
1130
1131 fn at_boundary<S: StrDrive, F: FnMut(u32) -> bool>(
1132 &self,
1133 req: &Request<'_, S>,
1134 mut word_checker: F,
1135 ) -> bool {
1136 if self.at_beginning() && self.at_end(req) {
1137 return false;
1138 }
1139 let that = !self.at_beginning() && word_checker(self.back_peek_char::<S>());
1140 let this = !self.at_end(req) && word_checker(self.peek_char::<S>());
1141 this != that
1142 }
1143
1144 fn at_non_boundary<S: StrDrive, F: FnMut(u32) -> bool>(
1145 &self,
1146 req: &Request<'_, S>,
1147 mut word_checker: F,
1148 ) -> bool {
1149 if self.at_beginning() && self.at_end(req) {
1150 return false;
1151 }
1152 let that = !self.at_beginning() && word_checker(self.back_peek_char::<S>());
1153 let this = !self.at_end(req) && word_checker(self.peek_char::<S>());
1154 this == that
1155 }
1156
1157 const fn can_success<S>(&self, req: &Request<'_, S>) -> bool {
1158 if !self.toplevel {
1159 return true;
1160 }
1161 if req.match_all && !self.at_end(req) {
1162 return false;
1163 }
1164 if req.must_advance && self.cursor.position == req.start {
1165 return false;
1166 }
1167 true
1168 }
1169
1170 #[must_use]
1171 fn next_peek_from<S>(&mut self, peek: usize, req: &Request<'_, S>, jump: Jump) -> Self {
1172 self.next_offset(self.peek_code(req, peek) as usize + 1, jump)
1173 }
1174
1175 #[must_use]
1176 const fn next_offset(&mut self, offset: usize, jump: Jump) -> Self {
1177 self.next_at(self.code_position + offset, jump)
1178 }
1179
1180 #[must_use]
1181 const fn next_at(&mut self, code_position: usize, jump: Jump) -> Self {
1182 self.jump = jump;
1183 Self {
1184 code_position,
1185 jump: Jump::OpCode,
1186 count: -1,
1187 ..*self
1188 }
1189 }
1190}
1191
1192fn at<S: StrDrive>(req: &Request<'_, S>, ctx: &MatchContext, at_code: SreAtCode) -> bool {
1193 match at_code {
1194 SreAtCode::BEGINNING | SreAtCode::BEGINNING_STRING => ctx.at_beginning(),
1195 SreAtCode::BEGINNING_LINE => ctx.at_beginning() || is_linebreak(ctx.back_peek_char::<S>()),
1196 SreAtCode::BOUNDARY => ctx.at_boundary(req, is_word),
1197 SreAtCode::NON_BOUNDARY => ctx.at_non_boundary(req, is_word),
1198 SreAtCode::END => {
1199 (ctx.remaining_chars(req) == 1 && ctx.at_linebreak(req)) || ctx.at_end(req)
1200 }
1201 SreAtCode::END_LINE => ctx.at_linebreak(req) || ctx.at_end(req),
1202 SreAtCode::END_STRING => ctx.at_end(req),
1203 SreAtCode::LOC_BOUNDARY => ctx.at_boundary(req, is_loc_word),
1204 SreAtCode::LOC_NON_BOUNDARY => ctx.at_non_boundary(req, is_loc_word),
1205 SreAtCode::UNI_BOUNDARY => ctx.at_boundary(req, is_uni_word),
1206 SreAtCode::UNI_NON_BOUNDARY => ctx.at_non_boundary(req, is_uni_word),
1207 }
1208}
1209
1210fn char_loc_ignore(code: u32, c: u32) -> bool {
1211 code == c || code == lower_locate(c) || code == upper_locate(c)
1212}
1213
1214fn charset_loc_ignore(set: &[u32], c: u32) -> bool {
1215 let lo = lower_locate(c);
1216 if charset(set, c) {
1217 return true;
1218 }
1219 let up = upper_locate(c);
1220 up != lo && charset(set, up)
1221}
1222
1223fn category(cat_code: SreCatCode, c: u32) -> bool {
1224 match cat_code {
1225 SreCatCode::DIGIT => is_digit(c),
1226 SreCatCode::NOT_DIGIT => !is_digit(c),
1227 SreCatCode::SPACE => is_space(c),
1228 SreCatCode::NOT_SPACE => !is_space(c),
1229 SreCatCode::WORD => is_word(c),
1230 SreCatCode::NOT_WORD => !is_word(c),
1231 SreCatCode::LINEBREAK => is_linebreak(c),
1232 SreCatCode::NOT_LINEBREAK => !is_linebreak(c),
1233 SreCatCode::LOC_WORD => is_loc_word(c),
1234 SreCatCode::LOC_NOT_WORD => !is_loc_word(c),
1235 SreCatCode::UNI_DIGIT => is_uni_digit(c),
1236 SreCatCode::UNI_NOT_DIGIT => !is_uni_digit(c),
1237 SreCatCode::UNI_SPACE => is_uni_space(c),
1238 SreCatCode::UNI_NOT_SPACE => !is_uni_space(c),
1239 SreCatCode::UNI_WORD => is_uni_word(c),
1240 SreCatCode::UNI_NOT_WORD => !is_uni_word(c),
1241 SreCatCode::UNI_LINEBREAK => is_uni_linebreak(c),
1242 SreCatCode::UNI_NOT_LINEBREAK => !is_uni_linebreak(c),
1243 }
1244}
1245
1246fn charset(set: &[u32], ch: u32) -> bool {
1247 let mut ok = true;
1249 let mut i = 0;
1250 while i < set.len() {
1251 let opcode = match SreOpcode::try_from(set[i]) {
1252 Ok(code) => code,
1253 Err(_) => {
1254 break;
1255 }
1256 };
1257 match opcode {
1258 SreOpcode::FAILURE => {
1259 return !ok;
1260 }
1261 SreOpcode::CATEGORY => {
1262 let cat_code = match SreCatCode::try_from(set[i + 1]) {
1264 Ok(code) => code,
1265 Err(_) => {
1266 break;
1267 }
1268 };
1269 if category(cat_code, ch) {
1270 return ok;
1271 }
1272 i += 2;
1273 }
1274 SreOpcode::CHARSET => {
1275 let set = &set[i + 1..];
1277 if ch < 256 && ((set[(ch >> 5) as usize] & (1u32 << (ch & 31))) != 0) {
1278 return ok;
1279 }
1280 i += 1 + 8;
1281 }
1282 SreOpcode::BIGCHARSET => {
1283 let count = set[i + 1] as usize;
1285 if ch < 0x10000 {
1286 let set = &set[i + 2..];
1287 let block_index = ch >> 8;
1288 let (_, block_indices, _) = unsafe { set.align_to::<u8>() };
1289 let blocks = &set[64..];
1290 let block = block_indices[block_index as usize];
1291 if blocks[((block as u32 * 256 + (ch & 255)) / 32) as usize]
1292 & (1u32 << (ch & 31))
1293 != 0
1294 {
1295 return ok;
1296 }
1297 }
1298 i += 2 + 64 + count * 8;
1299 }
1300 SreOpcode::LITERAL => {
1301 if ch == set[i + 1] {
1303 return ok;
1304 }
1305 i += 2;
1306 }
1307 SreOpcode::NEGATE => {
1308 ok = !ok;
1309 i += 1;
1310 }
1311 SreOpcode::RANGE => {
1312 if set[i + 1] <= ch && ch <= set[i + 2] {
1314 return ok;
1315 }
1316 i += 3;
1317 }
1318 SreOpcode::RANGE_UNI_IGNORE => {
1319 if set[i + 1] <= ch && ch <= set[i + 2] {
1321 return ok;
1322 }
1323 let ch = upper_unicode(ch);
1324 if set[i + 1] <= ch && ch <= set[i + 2] {
1325 return ok;
1326 }
1327 i += 3;
1328 }
1329 _ => {
1330 break;
1331 }
1332 }
1333 }
1334 false
1337}
1338
1339fn _count<S: StrDrive>(
1340 req: &Request<'_, S>,
1341 state: &mut State,
1342 ctx: &mut MatchContext,
1343 max_count: usize,
1344) -> usize {
1345 let max_count = core::cmp::min(max_count, ctx.remaining_chars(req));
1346 let end = ctx.cursor.position + max_count;
1347 let opcode = SreOpcode::try_from(ctx.peek_code(req, 0)).unwrap();
1348
1349 match opcode {
1350 SreOpcode::ANY => {
1351 while ctx.cursor.position < end && !ctx.at_linebreak(req) {
1352 ctx.advance_char::<S>();
1353 }
1354 }
1355 SreOpcode::ANY_ALL => {
1356 ctx.skip_char::<S>(max_count);
1357 }
1358 SreOpcode::IN => {
1359 while ctx.cursor.position < end && charset(&ctx.pattern(req)[2..], ctx.peek_char::<S>())
1360 {
1361 ctx.advance_char::<S>();
1362 }
1363 }
1364 SreOpcode::LITERAL => {
1365 general_count_literal(req, ctx, end, |code, c| code == c);
1366 }
1367 SreOpcode::NOT_LITERAL => {
1368 general_count_literal(req, ctx, end, |code, c| code != c);
1369 }
1370 SreOpcode::LITERAL_IGNORE => {
1371 general_count_literal(req, ctx, end, |code, c| code == lower_ascii(c));
1372 }
1373 SreOpcode::NOT_LITERAL_IGNORE => {
1374 general_count_literal(req, ctx, end, |code, c| code != lower_ascii(c));
1375 }
1376 SreOpcode::LITERAL_LOC_IGNORE => {
1377 general_count_literal(req, ctx, end, char_loc_ignore);
1378 }
1379 SreOpcode::NOT_LITERAL_LOC_IGNORE => {
1380 general_count_literal(req, ctx, end, |code, c| !char_loc_ignore(code, c));
1381 }
1382 SreOpcode::LITERAL_UNI_IGNORE => {
1383 general_count_literal(req, ctx, end, |code, c| code == lower_unicode(c));
1384 }
1385 SreOpcode::NOT_LITERAL_UNI_IGNORE => {
1386 general_count_literal(req, ctx, end, |code, c| code != lower_unicode(c));
1387 }
1388 _ => {
1389 ctx.toplevel = false;
1391 ctx.jump = Jump::OpCode;
1392 ctx.repeat_ctx_id = usize::MAX;
1393 ctx.count = -1;
1394
1395 let mut sub_state = State {
1396 marks: Marks::default(),
1397 repeat_stack: vec![],
1398 ..*state
1399 };
1400
1401 while ctx.cursor.position < end && _match(req, &mut sub_state, *ctx) {
1402 ctx.advance_char::<S>();
1403 }
1404 }
1405 }
1406
1407 ctx.cursor.position - state.cursor.position
1409}
1410
1411fn general_count_literal<S: StrDrive, F: FnMut(u32, u32) -> bool>(
1412 req: &Request<'_, S>,
1413 ctx: &mut MatchContext,
1414 end: usize,
1415 mut f: F,
1416) {
1417 let ch = ctx.peek_code(req, 1);
1418 while ctx.cursor.position < end && f(ch, ctx.peek_char::<S>()) {
1419 ctx.advance_char::<S>();
1420 }
1421}