1use crate::constants::SreInfo;
4
5use super::constants::{SreAtCode, SreCatCode, SreOpcode};
6use super::MAXREPEAT;
7use optional::Optioned;
8use std::convert::TryFrom;
9use std::ops::Deref;
10
11const fn is_py_ascii_whitespace(b: u8) -> bool {
12 matches!(b, b'\t' | b'\n' | b'\x0C' | b'\r' | b' ' | b'\x0B')
13}
14
15#[derive(Debug, Clone, Copy)]
16pub struct Request<'a, S: StrDrive> {
17 pub string: S,
18 pub start: usize,
19 pub end: usize,
20 pub pattern_codes: &'a [u32],
21 pub match_all: bool,
22 pub must_advance: bool,
23}
24
25impl<'a, S: StrDrive> Request<'a, S> {
26 pub fn new(
27 string: S,
28 start: usize,
29 end: usize,
30 pattern_codes: &'a [u32],
31 match_all: bool,
32 ) -> Self {
33 let end = std::cmp::min(end, string.count());
34 let start = std::cmp::min(start, end);
35
36 Self {
37 string,
38 start,
39 end,
40 pattern_codes,
41 match_all,
42 must_advance: false,
43 }
44 }
45}
46
47#[derive(Debug)]
48pub struct Marks {
49 last_index: isize,
50 marks: Vec<Optioned<usize>>,
51 marks_stack: Vec<(Vec<Optioned<usize>>, isize)>,
52}
53
54impl Default for Marks {
55 fn default() -> Self {
56 Self {
57 last_index: -1,
58 marks: Vec::new(),
59 marks_stack: Vec::new(),
60 }
61 }
62}
63
64impl Deref for Marks {
65 type Target = Vec<Optioned<usize>>;
66
67 fn deref(&self) -> &Self::Target {
68 &self.marks
69 }
70}
71
72impl Marks {
73 pub fn get(&self, group_index: usize) -> (Optioned<usize>, Optioned<usize>) {
74 let marks_index = 2 * group_index;
75 if marks_index + 1 < self.marks.len() {
76 (self.marks[marks_index], self.marks[marks_index + 1])
77 } else {
78 (Optioned::none(), Optioned::none())
79 }
80 }
81
82 pub fn last_index(&self) -> isize {
83 self.last_index
84 }
85
86 fn set(&mut self, mark_nr: usize, position: usize) {
87 if mark_nr & 1 != 0 {
88 self.last_index = mark_nr as isize / 2 + 1;
89 }
90 if mark_nr >= self.marks.len() {
91 self.marks.resize(mark_nr + 1, Optioned::none());
92 }
93 self.marks[mark_nr] = Optioned::some(position);
94 }
95
96 fn push(&mut self) {
97 self.marks_stack.push((self.marks.clone(), self.last_index));
98 }
99
100 fn pop(&mut self) {
101 let (marks, last_index) = self.marks_stack.pop().unwrap();
102 self.marks = marks;
103 self.last_index = last_index;
104 }
105
106 fn pop_keep(&mut self) {
107 let (marks, last_index) = self.marks_stack.last().unwrap().clone();
108 self.marks = marks;
109 self.last_index = last_index;
110 }
111
112 fn pop_discard(&mut self) {
113 self.marks_stack.pop();
114 }
115
116 fn clear(&mut self) {
117 self.last_index = -1;
118 self.marks.clear();
119 self.marks_stack.clear();
120 }
121}
122
123#[derive(Debug)]
124pub struct State<S: StrDrive> {
125 pub marks: Marks,
126 context_stack: Vec<MatchContext<S>>,
127 repeat_stack: Vec<RepeatContext>,
128 pub start: usize,
129 pub string_position: usize,
130 next_context: Option<MatchContext<S>>,
131 popped_has_matched: bool,
132 pub has_matched: bool,
133}
134
135impl<S: StrDrive> Default for State<S> {
136 fn default() -> Self {
137 Self {
138 marks: Marks::default(),
139 context_stack: Vec::new(),
140 repeat_stack: Vec::new(),
141 start: 0,
142 string_position: 0,
143 next_context: None,
144 popped_has_matched: false,
145 has_matched: false,
146 }
147 }
148}
149
150impl<S: StrDrive> State<S> {
151 pub fn reset(&mut self, start: usize) {
152 self.marks.clear();
153 self.context_stack.clear();
154 self.repeat_stack.clear();
155 self.start = start;
156 self.string_position = start;
157 self.next_context = None;
158 self.popped_has_matched = false;
159 self.has_matched = false;
160 }
161
162 fn _match(&mut self, req: &mut Request<S>) {
163 while let Some(mut ctx) = self.context_stack.pop() {
164 if let Some(handler) = ctx.handler.take() {
165 handler(req, self, &mut ctx);
166 } else if ctx.remaining_codes(req) > 0 {
167 let code = ctx.peek_code(req, 0);
168 let code = SreOpcode::try_from(code).unwrap();
169 dispatch(req, self, &mut ctx, code);
170 } else {
171 ctx.failure();
172 }
173
174 if let Some(has_matched) = ctx.has_matched {
175 self.popped_has_matched = has_matched;
176 } else {
177 self.context_stack.push(ctx);
178 if let Some(next_ctx) = self.next_context.take() {
179 self.context_stack.push(next_ctx);
180 }
181 }
182 }
183 self.has_matched = self.popped_has_matched;
184 }
185
186 pub fn pymatch(&mut self, mut req: Request<S>) {
187 self.start = req.start;
188 self.string_position = req.start;
189
190 let ctx = MatchContext {
191 string_position: req.start,
192 string_offset: req.string.offset(0, req.start),
193 code_position: 0,
194 has_matched: None,
195 toplevel: true,
196 handler: None,
197 repeat_ctx_id: usize::MAX,
198 count: -1,
199 };
200 self.context_stack.push(ctx);
201
202 self._match(&mut req);
203 }
204
205 pub fn search(&mut self, mut req: Request<S>) {
206 self.start = req.start;
207 self.string_position = req.start;
208
209 if req.start > req.end {
211 return;
212 }
213
214 let mut end = req.end;
215
216 let mut start_offset = req.string.offset(0, req.start);
217
218 let mut ctx = MatchContext {
219 string_position: req.start,
220 string_offset: start_offset,
221 code_position: 0,
222 has_matched: None,
223 toplevel: true,
224 handler: None,
225 repeat_ctx_id: usize::MAX,
226 count: -1,
227 };
228
229 if ctx.peek_code(&req, 0) == SreOpcode::INFO as u32 {
230 let req = &mut req;
233 let min = ctx.peek_code(req, 3) as usize;
234
235 if ctx.remaining_chars(req) < min {
236 return;
237 }
238
239 if min > 1 {
240 end -= min - 1;
244
245 if end < ctx.string_position {
247 ctx.string_position = end;
248 ctx.string_offset = req.string.offset(0, ctx.string_position);
249 }
250 }
251
252 let flags = SreInfo::from_bits_truncate(ctx.peek_code(req, 2));
253
254 if flags.contains(SreInfo::PREFIX) {
255 if flags.contains(SreInfo::LITERAL) {
256 search_info_literal::<true, S>(req, self, ctx);
257 } else {
258 search_info_literal::<false, S>(req, self, ctx);
259 }
260 return;
261 } else if flags.contains(SreInfo::CHARSET) {
262 return search_info_charset(req, self, ctx);
263 }
264 }
266
267 self.context_stack.push(ctx);
268 self._match(&mut req);
269
270 req.must_advance = false;
271 ctx.toplevel = false;
272 while !self.has_matched && req.start < end {
273 req.start += 1;
274 start_offset = req.string.offset(start_offset, 1);
275 self.reset(req.start);
276 ctx.string_position = req.start;
277 ctx.string_offset = start_offset;
278
279 self.context_stack.push(ctx);
280 self._match(&mut req);
281 }
282 }
283}
284
285pub struct SearchIter<'a, S: StrDrive> {
286 pub req: Request<'a, S>,
287 pub state: State<S>,
288}
289
290impl<'a, S: StrDrive> Iterator for SearchIter<'a, S> {
291 type Item = ();
292
293 fn next(&mut self) -> Option<Self::Item> {
294 if self.req.start > self.req.end {
295 return None;
296 }
297
298 self.state.reset(self.req.start);
299 self.state.search(self.req);
300 if !self.state.has_matched {
301 return None;
302 }
303
304 self.req.must_advance = self.state.string_position == self.state.start;
305 self.req.start = self.state.string_position;
306
307 Some(())
308 }
309}
310
311fn dispatch<S: StrDrive>(
312 req: &Request<S>,
313 state: &mut State<S>,
314 ctx: &mut MatchContext<S>,
315 opcode: SreOpcode,
316) {
317 match opcode {
318 SreOpcode::FAILURE => {
319 ctx.failure();
320 }
321 SreOpcode::SUCCESS => {
322 if ctx.can_success(req) {
323 state.string_position = ctx.string_position;
324 ctx.success();
325 } else {
326 ctx.failure();
327 }
328 }
329 SreOpcode::ANY => {
330 if ctx.at_end(req) || ctx.at_linebreak(req) {
331 ctx.failure();
332 } else {
333 ctx.skip_code(1);
334 ctx.skip_char(req, 1);
335 }
336 }
337 SreOpcode::ANY_ALL => {
338 if ctx.at_end(req) {
339 ctx.failure();
340 } else {
341 ctx.skip_code(1);
342 ctx.skip_char(req, 1);
343 }
344 }
345 SreOpcode::ASSERT => op_assert(req, state, ctx),
346 SreOpcode::ASSERT_NOT => op_assert_not(req, state, ctx),
347 SreOpcode::AT => {
348 let atcode = SreAtCode::try_from(ctx.peek_code(req, 1)).unwrap();
349 if at(req, ctx, atcode) {
350 ctx.skip_code(2);
351 } else {
352 ctx.failure();
353 }
354 }
355 SreOpcode::BRANCH => op_branch(req, state, ctx),
356 SreOpcode::CATEGORY => {
357 let catcode = SreCatCode::try_from(ctx.peek_code(req, 1)).unwrap();
358 if ctx.at_end(req) || !category(catcode, ctx.peek_char(req)) {
359 ctx.failure();
360 } else {
361 ctx.skip_code(2);
362 ctx.skip_char(req, 1);
363 }
364 }
365 SreOpcode::IN => general_op_in(req, ctx, charset),
366 SreOpcode::IN_IGNORE => general_op_in(req, ctx, |set, c| charset(set, lower_ascii(c))),
367 SreOpcode::IN_UNI_IGNORE => {
368 general_op_in(req, ctx, |set, c| charset(set, lower_unicode(c)))
369 }
370 SreOpcode::IN_LOC_IGNORE => general_op_in(req, ctx, charset_loc_ignore),
371 SreOpcode::INFO => {
372 let min = ctx.peek_code(req, 3) as usize;
373 if ctx.remaining_chars(req) < min {
374 ctx.failure();
375 } else {
376 ctx.skip_code_from(req, 1);
377 }
378 }
379 SreOpcode::JUMP => ctx.skip_code_from(req, 1),
380 SreOpcode::LITERAL => general_op_literal(req, ctx, |code, c| code == c),
381 SreOpcode::NOT_LITERAL => general_op_literal(req, ctx, |code, c| code != c),
382 SreOpcode::LITERAL_IGNORE => general_op_literal(req, ctx, |code, c| code == lower_ascii(c)),
383 SreOpcode::NOT_LITERAL_IGNORE => {
384 general_op_literal(req, ctx, |code, c| code != lower_ascii(c))
385 }
386 SreOpcode::LITERAL_UNI_IGNORE => {
387 general_op_literal(req, ctx, |code, c| code == lower_unicode(c))
388 }
389 SreOpcode::NOT_LITERAL_UNI_IGNORE => {
390 general_op_literal(req, ctx, |code, c| code != lower_unicode(c))
391 }
392 SreOpcode::LITERAL_LOC_IGNORE => general_op_literal(req, ctx, char_loc_ignore),
393 SreOpcode::NOT_LITERAL_LOC_IGNORE => {
394 general_op_literal(req, ctx, |code, c| !char_loc_ignore(code, c))
395 }
396 SreOpcode::MARK => {
397 state
398 .marks
399 .set(ctx.peek_code(req, 1) as usize, ctx.string_position);
400 ctx.skip_code(2);
401 }
402 SreOpcode::MAX_UNTIL => op_max_until(state, ctx),
403 SreOpcode::MIN_UNTIL => op_min_until(state, ctx),
404 SreOpcode::REPEAT => op_repeat(req, state, ctx),
405 SreOpcode::REPEAT_ONE => op_repeat_one(req, state, ctx),
406 SreOpcode::MIN_REPEAT_ONE => op_min_repeat_one(req, state, ctx),
407 SreOpcode::GROUPREF => general_op_groupref(req, state, ctx, |x| x),
408 SreOpcode::GROUPREF_IGNORE => general_op_groupref(req, state, ctx, lower_ascii),
409 SreOpcode::GROUPREF_LOC_IGNORE => general_op_groupref(req, state, ctx, lower_locate),
410 SreOpcode::GROUPREF_UNI_IGNORE => general_op_groupref(req, state, ctx, lower_unicode),
411 SreOpcode::GROUPREF_EXISTS => {
412 let (group_start, group_end) = state.marks.get(ctx.peek_code(req, 1) as usize);
413 if group_start.is_some()
414 && group_end.is_some()
415 && group_start.unpack() <= group_end.unpack()
416 {
417 ctx.skip_code(3);
418 } else {
419 ctx.skip_code_from(req, 2)
420 }
421 }
422 _ => unreachable!("unexpected opcode"),
423 }
424}
425
426fn search_info_literal<const LITERAL: bool, S: StrDrive>(
427 req: &mut Request<S>,
428 state: &mut State<S>,
429 mut ctx: MatchContext<S>,
430) {
431 let len = ctx.peek_code(req, 5) as usize;
434 let skip = ctx.peek_code(req, 6) as usize;
435 let prefix = &ctx.pattern(req)[7..7 + len];
436 let overlap = &ctx.pattern(req)[7 + len - 1..7 + len * 2];
437
438 ctx.skip_code_from(req, 1);
440 ctx.skip_code(2 * skip);
441
442 req.must_advance = false;
443
444 if len == 1 {
445 let c = prefix[0];
447
448 while !ctx.at_end(req) {
449 while ctx.peek_char(req) != c {
451 ctx.skip_char(req, 1);
452 if ctx.at_end(req) {
453 return;
454 }
455 }
456
457 req.start = ctx.string_position;
458 state.start = ctx.string_position;
459 state.string_position = ctx.string_position + skip;
460
461 if LITERAL {
463 state.has_matched = true;
464 return;
465 }
466
467 let mut next_ctx = ctx;
468 next_ctx.skip_char(req, skip);
469
470 state.context_stack.push(next_ctx);
471 state._match(req);
472
473 if state.has_matched {
474 return;
475 }
476
477 ctx.skip_char(req, 1);
478 state.marks.clear();
479 }
480 } else {
481 while !ctx.at_end(req) {
482 let c = prefix[0];
483 while ctx.peek_char(req) != c {
484 ctx.skip_char(req, 1);
485 if ctx.at_end(req) {
486 return;
487 }
488 }
489 ctx.skip_char(req, 1);
490 if ctx.at_end(req) {
491 return;
492 }
493
494 let mut i = 1;
495 loop {
496 if ctx.peek_char(req) == prefix[i] {
497 i += 1;
498 if i != len {
499 ctx.skip_char(req, 1);
500 if ctx.at_end(req) {
501 return;
502 }
503 continue;
504 }
505
506 req.start = ctx.string_position - (len - 1);
507 state.start = req.start;
508 state.string_position = state.start + skip;
509
510 if LITERAL {
512 state.has_matched = true;
513 return;
514 }
515
516 let mut next_ctx = ctx;
517 if skip != 0 {
518 next_ctx.skip_char(req, 1);
519 } else {
520 next_ctx.string_position = state.string_position;
521 next_ctx.string_offset = req.string.offset(0, state.string_position);
522 }
523
524 state.context_stack.push(next_ctx);
525 state._match(req);
526
527 if state.has_matched {
528 return;
529 }
530
531 ctx.skip_char(req, 1);
532 if ctx.at_end(req) {
533 return;
534 }
535 state.marks.clear();
536 }
537
538 i = overlap[i] as usize;
539 if i == 0 {
540 break;
541 }
542 }
543 }
544 }
545}
546
547fn search_info_charset<S: StrDrive>(
548 req: &mut Request<S>,
549 state: &mut State<S>,
550 mut ctx: MatchContext<S>,
551) {
552 let set = &ctx.pattern(req)[5..];
553
554 ctx.skip_code_from(req, 1);
555
556 req.must_advance = false;
557
558 loop {
559 while !ctx.at_end(req) && !charset(set, ctx.peek_char(req)) {
560 ctx.skip_char(req, 1);
561 }
562 if ctx.at_end(req) {
563 return;
564 }
565
566 req.start = ctx.string_position;
567 state.start = ctx.string_position;
568 state.string_position = ctx.string_position;
569
570 state.context_stack.push(ctx);
571 state._match(req);
572
573 if state.has_matched {
574 return;
575 }
576
577 ctx.skip_char(req, 1);
578 state.marks.clear();
579 }
580}
581
582fn op_assert<S: StrDrive>(req: &Request<S>, state: &mut State<S>, ctx: &mut MatchContext<S>) {
585 let back = ctx.peek_code(req, 2) as usize;
586 if ctx.string_position < back {
587 return ctx.failure();
588 }
589
590 let next_ctx = ctx.next_offset(3, state, |req, state, ctx| {
591 if state.popped_has_matched {
592 ctx.skip_code_from(req, 1);
593 } else {
594 ctx.failure();
595 }
596 });
597 next_ctx.toplevel = false;
598 next_ctx.back_skip_char(req, back);
599 state.string_position = next_ctx.string_position;
600}
601
602fn op_assert_not<S: StrDrive>(req: &Request<S>, state: &mut State<S>, ctx: &mut MatchContext<S>) {
605 let back = ctx.peek_code(req, 2) as usize;
606
607 if ctx.string_position < back {
608 return ctx.skip_code_from(req, 1);
609 }
610
611 let next_ctx = ctx.next_offset(3, state, |req, state, ctx| {
612 if state.popped_has_matched {
613 ctx.failure();
614 } else {
615 ctx.skip_code_from(req, 1);
616 }
617 });
618 next_ctx.toplevel = false;
619 next_ctx.back_skip_char(req, back);
620 state.string_position = next_ctx.string_position;
621}
622
623fn op_branch<S: StrDrive>(req: &Request<S>, state: &mut State<S>, ctx: &mut MatchContext<S>) {
626 state.marks.push();
627
628 ctx.count = 1;
629 create_context(req, state, ctx);
630
631 fn create_context<S: StrDrive>(
632 req: &Request<S>,
633 state: &mut State<S>,
634 ctx: &mut MatchContext<S>,
635 ) {
636 let branch_offset = ctx.count as usize;
637 let next_length = ctx.peek_code(req, branch_offset) as isize;
638 if next_length == 0 {
639 state.marks.pop_discard();
640 return ctx.failure();
641 }
642
643 state.string_position = ctx.string_position;
644
645 ctx.count += next_length;
646 ctx.next_offset(branch_offset + 1, state, callback);
647 }
648
649 fn callback<S: StrDrive>(req: &Request<S>, state: &mut State<S>, ctx: &mut MatchContext<S>) {
650 if state.popped_has_matched {
651 return ctx.success();
652 }
653 state.marks.pop_keep();
654 create_context(req, state, ctx);
655 }
656}
657
658fn op_min_repeat_one<S: StrDrive>(
660 req: &Request<S>,
661 state: &mut State<S>,
662 ctx: &mut MatchContext<S>,
663) {
664 let min_count = ctx.peek_code(req, 2) as usize;
665
666 if ctx.remaining_chars(req) < min_count {
667 return ctx.failure();
668 }
669
670 state.string_position = ctx.string_position;
671
672 ctx.count = if min_count == 0 {
673 0
674 } else {
675 let count = _count(req, state, ctx, min_count);
676 if count < min_count {
677 return ctx.failure();
678 }
679 ctx.skip_char(req, count);
680 count as isize
681 };
682
683 let next_code = ctx.peek_code(req, ctx.peek_code(req, 1) as usize + 1);
684 if next_code == SreOpcode::SUCCESS as u32 && ctx.can_success(req) {
685 state.string_position = ctx.string_position;
687 return ctx.success();
688 }
689
690 state.marks.push();
691 create_context(req, state, ctx);
692
693 fn create_context<S: StrDrive>(
694 req: &Request<S>,
695 state: &mut State<S>,
696 ctx: &mut MatchContext<S>,
697 ) {
698 let max_count = ctx.peek_code(req, 3) as usize;
699
700 if max_count == MAXREPEAT || ctx.count as usize <= max_count {
701 state.string_position = ctx.string_position;
702 ctx.next_from(1, req, state, callback);
703 } else {
704 state.marks.pop_discard();
705 ctx.failure();
706 }
707 }
708
709 fn callback<S: StrDrive>(req: &Request<S>, state: &mut State<S>, ctx: &mut MatchContext<S>) {
710 if state.popped_has_matched {
711 return ctx.success();
712 }
713
714 state.string_position = ctx.string_position;
715
716 if _count(req, state, ctx, 1) == 0 {
717 state.marks.pop_discard();
718 return ctx.failure();
719 }
720
721 ctx.skip_char(req, 1);
722 ctx.count += 1;
723 state.marks.pop_keep();
724 create_context(req, state, ctx);
725 }
726}
727
728fn op_repeat_one<S: StrDrive>(req: &Request<S>, state: &mut State<S>, ctx: &mut MatchContext<S>) {
735 let min_count = ctx.peek_code(req, 2) as usize;
736 let max_count = ctx.peek_code(req, 3) as usize;
737
738 if ctx.remaining_chars(req) < min_count {
739 return ctx.failure();
740 }
741
742 state.string_position = ctx.string_position;
743
744 let count = _count(req, state, ctx, max_count);
745 ctx.skip_char(req, count);
746 if count < min_count {
747 return ctx.failure();
748 }
749
750 let next_code = ctx.peek_code(req, ctx.peek_code(req, 1) as usize + 1);
751 if next_code == SreOpcode::SUCCESS as u32 && ctx.can_success(req) {
752 state.string_position = ctx.string_position;
754 return ctx.success();
755 }
756
757 state.marks.push();
758 ctx.count = count as isize;
759 create_context(req, state, ctx);
760
761 fn create_context<S: StrDrive>(
762 req: &Request<S>,
763 state: &mut State<S>,
764 ctx: &mut MatchContext<S>,
765 ) {
766 let min_count = ctx.peek_code(req, 2) as isize;
767 let next_code = ctx.peek_code(req, ctx.peek_code(req, 1) as usize + 1);
768 if next_code == SreOpcode::LITERAL as u32 {
769 let c = ctx.peek_code(req, ctx.peek_code(req, 1) as usize + 2);
772 while ctx.at_end(req) || ctx.peek_char(req) != c {
773 if ctx.count <= min_count {
774 state.marks.pop_discard();
775 return ctx.failure();
776 }
777 ctx.back_skip_char(req, 1);
778 ctx.count -= 1;
779 }
780 }
781
782 state.string_position = ctx.string_position;
783
784 ctx.next_from(1, req, state, callback);
786 }
787
788 fn callback<S: StrDrive>(req: &Request<S>, state: &mut State<S>, ctx: &mut MatchContext<S>) {
789 if state.popped_has_matched {
790 return ctx.success();
791 }
792
793 let min_count = ctx.peek_code(req, 2) as isize;
794
795 if ctx.count <= min_count {
796 state.marks.pop_discard();
797 return ctx.failure();
798 }
799
800 ctx.back_skip_char(req, 1);
801 ctx.count -= 1;
802
803 state.marks.pop_keep();
804 create_context(req, state, ctx);
805 }
806}
807
808#[derive(Debug, Clone, Copy)]
809struct RepeatContext {
810 count: isize,
811 min_count: usize,
812 max_count: usize,
813 code_position: usize,
814 last_position: usize,
815 prev_id: usize,
816}
817
818fn op_repeat<S: StrDrive>(req: &Request<S>, state: &mut State<S>, ctx: &mut MatchContext<S>) {
822 let repeat_ctx = RepeatContext {
823 count: -1,
824 min_count: ctx.peek_code(req, 2) as usize,
825 max_count: ctx.peek_code(req, 3) as usize,
826 code_position: ctx.code_position,
827 last_position: std::usize::MAX,
828 prev_id: ctx.repeat_ctx_id,
829 };
830
831 state.repeat_stack.push(repeat_ctx);
832
833 state.string_position = ctx.string_position;
834
835 let repeat_ctx_id = state.repeat_stack.len() - 1;
836
837 let next_ctx = ctx.next_from(1, req, state, |_, state, ctx| {
838 ctx.has_matched = Some(state.popped_has_matched);
839 state.repeat_stack.pop();
840 });
841 next_ctx.repeat_ctx_id = repeat_ctx_id;
842}
843
844fn op_min_until<S: StrDrive>(state: &mut State<S>, ctx: &mut MatchContext<S>) {
846 let repeat_ctx = state.repeat_stack.last_mut().unwrap();
847
848 state.string_position = ctx.string_position;
849
850 repeat_ctx.count += 1;
851
852 if (repeat_ctx.count as usize) < repeat_ctx.min_count {
853 ctx.next_at(repeat_ctx.code_position + 4, state, |_, state, ctx| {
855 if state.popped_has_matched {
856 ctx.success();
857 } else {
858 state.repeat_stack[ctx.repeat_ctx_id].count -= 1;
859 state.string_position = ctx.string_position;
860 ctx.failure();
861 }
862 });
863 return;
864 }
865
866 state.marks.push();
867
868 ctx.count = ctx.repeat_ctx_id as isize;
869
870 let repeat_ctx_prev_id = repeat_ctx.prev_id;
871
872 let next_ctx = ctx.next_offset(1, state, |_, state, ctx| {
874 if state.popped_has_matched {
875 return ctx.success();
876 }
877
878 ctx.repeat_ctx_id = ctx.count as usize;
879
880 let repeat_ctx = &mut state.repeat_stack[ctx.repeat_ctx_id];
881
882 state.string_position = ctx.string_position;
883
884 state.marks.pop();
885
886 if repeat_ctx.count as usize >= repeat_ctx.max_count && repeat_ctx.max_count != MAXREPEAT
889 || state.string_position == repeat_ctx.last_position
890 {
891 repeat_ctx.count -= 1;
892 return ctx.failure();
893 }
894
895 repeat_ctx.last_position = state.string_position;
897
898 ctx.next_at(repeat_ctx.code_position + 4, state, |_, state, ctx| {
899 if state.popped_has_matched {
900 ctx.success();
901 } else {
902 state.repeat_stack[ctx.repeat_ctx_id].count -= 1;
903 state.string_position = ctx.string_position;
904 ctx.failure();
905 }
906 });
907 });
908 next_ctx.repeat_ctx_id = repeat_ctx_prev_id;
909}
910
911fn op_max_until<S: StrDrive>(state: &mut State<S>, ctx: &mut MatchContext<S>) {
913 let repeat_ctx = &mut state.repeat_stack[ctx.repeat_ctx_id];
914
915 state.string_position = ctx.string_position;
916
917 repeat_ctx.count += 1;
918
919 if (repeat_ctx.count as usize) < repeat_ctx.min_count {
920 ctx.next_at(repeat_ctx.code_position + 4, state, |_, state, ctx| {
922 if state.popped_has_matched {
923 ctx.success();
924 } else {
925 state.repeat_stack[ctx.repeat_ctx_id].count -= 1;
926 state.string_position = ctx.string_position;
927 ctx.failure();
928 }
929 });
930 return;
931 }
932
933 if ((repeat_ctx.count as usize) < repeat_ctx.max_count || repeat_ctx.max_count == MAXREPEAT)
934 && state.string_position != repeat_ctx.last_position
935 {
936 state.marks.push();
939
940 ctx.count = repeat_ctx.last_position as isize;
941 repeat_ctx.last_position = state.string_position;
942
943 ctx.next_at(repeat_ctx.code_position + 4, state, |_, state, ctx| {
944 let save_last_position = ctx.count as usize;
945 let repeat_ctx = &mut state.repeat_stack[ctx.repeat_ctx_id];
946 repeat_ctx.last_position = save_last_position;
947
948 if state.popped_has_matched {
949 state.marks.pop_discard();
950 return ctx.success();
951 }
952
953 state.marks.pop();
954 repeat_ctx.count -= 1;
955
956 state.string_position = ctx.string_position;
957
958 let repeat_ctx_prev_id = repeat_ctx.prev_id;
961 let next_ctx = ctx.next_offset(1, state, tail_callback);
962 next_ctx.repeat_ctx_id = repeat_ctx_prev_id;
963 });
964 return;
965 }
966
967 let repeat_ctx_prev_id = repeat_ctx.prev_id;
970 let next_ctx = ctx.next_offset(1, state, tail_callback);
971 next_ctx.repeat_ctx_id = repeat_ctx_prev_id;
972
973 fn tail_callback<S: StrDrive>(_: &Request<S>, state: &mut State<S>, ctx: &mut MatchContext<S>) {
974 if state.popped_has_matched {
975 ctx.success();
976 } else {
977 state.string_position = ctx.string_position;
978 ctx.failure();
979 }
980 }
981}
982
983pub trait StrDrive: Copy {
984 fn offset(&self, offset: usize, skip: usize) -> usize;
985 fn count(&self) -> usize;
986 fn peek(&self, offset: usize) -> u32;
987 fn back_peek(&self, offset: usize) -> u32;
988 fn back_offset(&self, offset: usize, skip: usize) -> usize;
989}
990
991impl StrDrive for &str {
992 fn offset(&self, offset: usize, skip: usize) -> usize {
993 self.get(offset..)
994 .and_then(|s| s.char_indices().nth(skip).map(|x| x.0 + offset))
995 .unwrap_or(self.len())
996 }
997
998 fn count(&self) -> usize {
999 self.chars().count()
1000 }
1001
1002 fn peek(&self, offset: usize) -> u32 {
1003 unsafe { self.get_unchecked(offset..) }
1004 .chars()
1005 .next()
1006 .unwrap() as u32
1007 }
1008
1009 fn back_peek(&self, offset: usize) -> u32 {
1010 let bytes = self.as_bytes();
1011 let back_offset = utf8_back_peek_offset(bytes, offset);
1012 match offset - back_offset {
1013 1 => u32::from_be_bytes([0, 0, 0, bytes[offset - 1]]),
1014 2 => u32::from_be_bytes([0, 0, bytes[offset - 2], bytes[offset - 1]]),
1015 3 => u32::from_be_bytes([0, bytes[offset - 3], bytes[offset - 2], bytes[offset - 1]]),
1016 4 => u32::from_be_bytes([
1017 bytes[offset - 4],
1018 bytes[offset - 3],
1019 bytes[offset - 2],
1020 bytes[offset - 1],
1021 ]),
1022 _ => unreachable!(),
1023 }
1024 }
1025
1026 fn back_offset(&self, offset: usize, skip: usize) -> usize {
1027 let bytes = self.as_bytes();
1028 let mut back_offset = offset;
1029 for _ in 0..skip {
1030 back_offset = utf8_back_peek_offset(bytes, back_offset);
1031 }
1032 back_offset
1033 }
1034}
1035
1036impl<'a> StrDrive for &'a [u8] {
1037 fn offset(&self, offset: usize, skip: usize) -> usize {
1038 offset + skip
1039 }
1040
1041 fn count(&self) -> usize {
1042 self.len()
1043 }
1044
1045 fn peek(&self, offset: usize) -> u32 {
1046 self[offset] as u32
1047 }
1048
1049 fn back_peek(&self, offset: usize) -> u32 {
1050 self[offset - 1] as u32
1051 }
1052
1053 fn back_offset(&self, offset: usize, skip: usize) -> usize {
1054 offset - skip
1055 }
1056}
1057
1058type OpFunc<S> = for<'a> fn(&Request<'a, S>, &mut State<S>, &mut MatchContext<S>);
1059
1060#[derive(Clone, Copy)]
1061struct MatchContext<S: StrDrive> {
1062 string_position: usize,
1063 string_offset: usize,
1064 code_position: usize,
1065 has_matched: Option<bool>,
1066 toplevel: bool,
1067 handler: Option<OpFunc<S>>,
1068 repeat_ctx_id: usize,
1069 count: isize,
1070}
1071
1072impl<S: StrDrive> std::fmt::Debug for MatchContext<S> {
1073 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1074 f.debug_struct("MatchContext")
1075 .field("string_position", &self.string_position)
1076 .field("string_offset", &self.string_offset)
1077 .field("code_position", &self.code_position)
1078 .field("has_matched", &self.has_matched)
1079 .field("toplevel", &self.toplevel)
1080 .field("handler", &self.handler.map(|x| x as usize))
1081 .field("repeat_ctx_id", &self.repeat_ctx_id)
1082 .field("count", &self.count)
1083 .finish()
1084 }
1085}
1086
1087impl<S: StrDrive> MatchContext<S> {
1088 fn pattern<'a>(&self, req: &Request<'a, S>) -> &'a [u32] {
1089 &req.pattern_codes[self.code_position..]
1090 }
1091
1092 fn remaining_codes(&self, req: &Request<S>) -> usize {
1093 req.pattern_codes.len() - self.code_position
1094 }
1095
1096 fn remaining_chars(&self, req: &Request<S>) -> usize {
1097 req.end - self.string_position
1098 }
1099
1100 fn peek_char(&self, req: &Request<S>) -> u32 {
1101 req.string.peek(self.string_offset)
1102 }
1103
1104 fn skip_char(&mut self, req: &Request<S>, skip: usize) {
1105 self.string_position += skip;
1106 self.string_offset = req.string.offset(self.string_offset, skip);
1107 }
1108
1109 fn back_peek_char(&self, req: &Request<S>) -> u32 {
1110 req.string.back_peek(self.string_offset)
1111 }
1112
1113 fn back_skip_char(&mut self, req: &Request<S>, skip: usize) {
1114 self.string_position -= skip;
1115 self.string_offset = req.string.back_offset(self.string_offset, skip);
1116 }
1117
1118 fn peek_code(&self, req: &Request<S>, peek: usize) -> u32 {
1119 req.pattern_codes[self.code_position + peek]
1120 }
1121
1122 fn skip_code(&mut self, skip: usize) {
1123 self.code_position += skip;
1124 }
1125
1126 fn skip_code_from(&mut self, req: &Request<S>, peek: usize) {
1127 self.skip_code(self.peek_code(req, peek) as usize + 1);
1128 }
1129
1130 fn at_beginning(&self) -> bool {
1131 self.string_position == 0
1133 }
1134
1135 fn at_end(&self, req: &Request<S>) -> bool {
1136 self.string_position == req.end
1137 }
1138
1139 fn at_linebreak(&self, req: &Request<S>) -> bool {
1140 !self.at_end(req) && is_linebreak(self.peek_char(req))
1141 }
1142
1143 fn at_boundary<F: FnMut(u32) -> bool>(&self, req: &Request<S>, mut word_checker: F) -> bool {
1144 if self.at_beginning() && self.at_end(req) {
1145 return false;
1146 }
1147 let that = !self.at_beginning() && word_checker(self.back_peek_char(req));
1148 let this = !self.at_end(req) && word_checker(self.peek_char(req));
1149 this != that
1150 }
1151
1152 fn at_non_boundary<F: FnMut(u32) -> bool>(
1153 &self,
1154 req: &Request<S>,
1155 mut word_checker: F,
1156 ) -> bool {
1157 if self.at_beginning() && self.at_end(req) {
1158 return false;
1159 }
1160 let that = !self.at_beginning() && word_checker(self.back_peek_char(req));
1161 let this = !self.at_end(req) && word_checker(self.peek_char(req));
1162 this == that
1163 }
1164
1165 fn can_success(&self, req: &Request<S>) -> bool {
1166 if !self.toplevel {
1167 return true;
1168 }
1169 if req.match_all && !self.at_end(req) {
1170 return false;
1171 }
1172 if req.must_advance && self.string_position == req.start {
1173 return false;
1174 }
1175 true
1176 }
1177
1178 fn success(&mut self) {
1179 self.has_matched = Some(true);
1180 }
1181
1182 fn failure(&mut self) {
1183 self.has_matched = Some(false);
1184 }
1185
1186 fn next_from<'b>(
1187 &mut self,
1188 peek: usize,
1189 req: &Request<S>,
1190 state: &'b mut State<S>,
1191 f: OpFunc<S>,
1192 ) -> &'b mut Self {
1193 self.next_offset(self.peek_code(req, peek) as usize + 1, state, f)
1194 }
1195
1196 fn next_offset<'b>(
1197 &mut self,
1198 offset: usize,
1199 state: &'b mut State<S>,
1200 f: OpFunc<S>,
1201 ) -> &'b mut Self {
1202 self.next_at(self.code_position + offset, state, f)
1203 }
1204
1205 fn next_at<'b>(
1206 &mut self,
1207 code_position: usize,
1208 state: &'b mut State<S>,
1209 f: OpFunc<S>,
1210 ) -> &'b mut Self {
1211 self.handler = Some(f);
1212 state.next_context.insert(MatchContext {
1213 code_position,
1214 has_matched: None,
1215 handler: None,
1216 count: -1,
1217 ..*self
1218 })
1219 }
1220}
1221
1222fn at<S: StrDrive>(req: &Request<S>, ctx: &MatchContext<S>, atcode: SreAtCode) -> bool {
1223 match atcode {
1224 SreAtCode::BEGINNING | SreAtCode::BEGINNING_STRING => ctx.at_beginning(),
1225 SreAtCode::BEGINNING_LINE => ctx.at_beginning() || is_linebreak(ctx.back_peek_char(req)),
1226 SreAtCode::BOUNDARY => ctx.at_boundary(req, is_word),
1227 SreAtCode::NON_BOUNDARY => ctx.at_non_boundary(req, is_word),
1228 SreAtCode::END => {
1229 (ctx.remaining_chars(req) == 1 && ctx.at_linebreak(req)) || ctx.at_end(req)
1230 }
1231 SreAtCode::END_LINE => ctx.at_linebreak(req) || ctx.at_end(req),
1232 SreAtCode::END_STRING => ctx.at_end(req),
1233 SreAtCode::LOC_BOUNDARY => ctx.at_boundary(req, is_loc_word),
1234 SreAtCode::LOC_NON_BOUNDARY => ctx.at_non_boundary(req, is_loc_word),
1235 SreAtCode::UNI_BOUNDARY => ctx.at_boundary(req, is_uni_word),
1236 SreAtCode::UNI_NON_BOUNDARY => ctx.at_non_boundary(req, is_uni_word),
1237 }
1238}
1239
1240fn general_op_literal<S: StrDrive, F: FnOnce(u32, u32) -> bool>(
1241 req: &Request<S>,
1242 ctx: &mut MatchContext<S>,
1243 f: F,
1244) {
1245 if ctx.at_end(req) || !f(ctx.peek_code(req, 1), ctx.peek_char(req)) {
1246 ctx.failure();
1247 } else {
1248 ctx.skip_code(2);
1249 ctx.skip_char(req, 1);
1250 }
1251}
1252
1253fn general_op_in<S: StrDrive, F: FnOnce(&[u32], u32) -> bool>(
1254 req: &Request<S>,
1255 ctx: &mut MatchContext<S>,
1256 f: F,
1257) {
1258 if ctx.at_end(req) || !f(&ctx.pattern(req)[2..], ctx.peek_char(req)) {
1259 ctx.failure();
1260 } else {
1261 ctx.skip_code_from(req, 1);
1262 ctx.skip_char(req, 1);
1263 }
1264}
1265
1266fn general_op_groupref<S: StrDrive, F: FnMut(u32) -> u32>(
1267 req: &Request<S>,
1268 state: &State<S>,
1269 ctx: &mut MatchContext<S>,
1270 mut f: F,
1271) {
1272 let (group_start, group_end) = state.marks.get(ctx.peek_code(req, 1) as usize);
1273 let (group_start, group_end) = if group_start.is_some()
1274 && group_end.is_some()
1275 && group_start.unpack() <= group_end.unpack()
1276 {
1277 (group_start.unpack(), group_end.unpack())
1278 } else {
1279 return ctx.failure();
1280 };
1281
1282 let mut gctx = MatchContext {
1283 string_position: group_start,
1284 string_offset: req.string.offset(0, group_start),
1285 ..*ctx
1286 };
1287
1288 for _ in group_start..group_end {
1289 if ctx.at_end(req) || f(ctx.peek_char(req)) != f(gctx.peek_char(req)) {
1290 return ctx.failure();
1291 }
1292 ctx.skip_char(req, 1);
1293 gctx.skip_char(req, 1);
1294 }
1295
1296 ctx.skip_code(2);
1297}
1298
1299fn char_loc_ignore(code: u32, c: u32) -> bool {
1300 code == c || code == lower_locate(c) || code == upper_locate(c)
1301}
1302
1303fn charset_loc_ignore(set: &[u32], c: u32) -> bool {
1304 let lo = lower_locate(c);
1305 if charset(set, c) {
1306 return true;
1307 }
1308 let up = upper_locate(c);
1309 up != lo && charset(set, up)
1310}
1311
1312fn category(catcode: SreCatCode, c: u32) -> bool {
1313 match catcode {
1314 SreCatCode::DIGIT => is_digit(c),
1315 SreCatCode::NOT_DIGIT => !is_digit(c),
1316 SreCatCode::SPACE => is_space(c),
1317 SreCatCode::NOT_SPACE => !is_space(c),
1318 SreCatCode::WORD => is_word(c),
1319 SreCatCode::NOT_WORD => !is_word(c),
1320 SreCatCode::LINEBREAK => is_linebreak(c),
1321 SreCatCode::NOT_LINEBREAK => !is_linebreak(c),
1322 SreCatCode::LOC_WORD => is_loc_word(c),
1323 SreCatCode::LOC_NOT_WORD => !is_loc_word(c),
1324 SreCatCode::UNI_DIGIT => is_uni_digit(c),
1325 SreCatCode::UNI_NOT_DIGIT => !is_uni_digit(c),
1326 SreCatCode::UNI_SPACE => is_uni_space(c),
1327 SreCatCode::UNI_NOT_SPACE => !is_uni_space(c),
1328 SreCatCode::UNI_WORD => is_uni_word(c),
1329 SreCatCode::UNI_NOT_WORD => !is_uni_word(c),
1330 SreCatCode::UNI_LINEBREAK => is_uni_linebreak(c),
1331 SreCatCode::UNI_NOT_LINEBREAK => !is_uni_linebreak(c),
1332 }
1333}
1334
1335fn charset(set: &[u32], ch: u32) -> bool {
1336 let mut ok = true;
1338 let mut i = 0;
1339 while i < set.len() {
1340 let opcode = match SreOpcode::try_from(set[i]) {
1341 Ok(code) => code,
1342 Err(_) => {
1343 break;
1344 }
1345 };
1346 match opcode {
1347 SreOpcode::FAILURE => {
1348 return !ok;
1349 }
1350 SreOpcode::CATEGORY => {
1351 let catcode = match SreCatCode::try_from(set[i + 1]) {
1353 Ok(code) => code,
1354 Err(_) => {
1355 break;
1356 }
1357 };
1358 if category(catcode, ch) {
1359 return ok;
1360 }
1361 i += 2;
1362 }
1363 SreOpcode::CHARSET => {
1364 let set = &set[i + 1..];
1366 if ch < 256 && ((set[(ch >> 5) as usize] & (1u32 << (ch & 31))) != 0) {
1367 return ok;
1368 }
1369 i += 1 + 8;
1370 }
1371 SreOpcode::BIGCHARSET => {
1372 let count = set[i + 1] as usize;
1374 if ch < 0x10000 {
1375 let set = &set[i + 2..];
1376 let block_index = ch >> 8;
1377 let (_, blockindices, _) = unsafe { set.align_to::<u8>() };
1378 let blocks = &set[64..];
1379 let block = blockindices[block_index as usize];
1380 if blocks[((block as u32 * 256 + (ch & 255)) / 32) as usize]
1381 & (1u32 << (ch & 31))
1382 != 0
1383 {
1384 return ok;
1385 }
1386 }
1387 i += 2 + 64 + count * 8;
1388 }
1389 SreOpcode::LITERAL => {
1390 if ch == set[i + 1] {
1392 return ok;
1393 }
1394 i += 2;
1395 }
1396 SreOpcode::NEGATE => {
1397 ok = !ok;
1398 i += 1;
1399 }
1400 SreOpcode::RANGE => {
1401 if set[i + 1] <= ch && ch <= set[i + 2] {
1403 return ok;
1404 }
1405 i += 3;
1406 }
1407 SreOpcode::RANGE_UNI_IGNORE => {
1408 if set[i + 1] <= ch && ch <= set[i + 2] {
1410 return ok;
1411 }
1412 let ch = upper_unicode(ch);
1413 if set[i + 1] <= ch && ch <= set[i + 2] {
1414 return ok;
1415 }
1416 i += 3;
1417 }
1418 _ => {
1419 break;
1420 }
1421 }
1422 }
1423 false
1426}
1427
1428fn _count<S: StrDrive>(
1429 req: &Request<S>,
1430 state: &mut State<S>,
1431 ctx: &MatchContext<S>,
1432 max_count: usize,
1433) -> usize {
1434 let mut ctx = *ctx;
1435 let max_count = std::cmp::min(max_count, ctx.remaining_chars(req));
1436 let end = ctx.string_position + max_count;
1437 let opcode = SreOpcode::try_from(ctx.peek_code(req, 0)).unwrap();
1438
1439 match opcode {
1440 SreOpcode::ANY => {
1441 while !ctx.string_position < end && !ctx.at_linebreak(req) {
1442 ctx.skip_char(req, 1);
1443 }
1444 }
1445 SreOpcode::ANY_ALL => {
1446 ctx.skip_char(req, max_count);
1447 }
1448 SreOpcode::IN => {
1449 while !ctx.string_position < end && charset(&ctx.pattern(req)[2..], ctx.peek_char(req))
1450 {
1451 ctx.skip_char(req, 1);
1452 }
1453 }
1454 SreOpcode::LITERAL => {
1455 general_count_literal(req, &mut ctx, end, |code, c| code == c as u32);
1456 }
1457 SreOpcode::NOT_LITERAL => {
1458 general_count_literal(req, &mut ctx, end, |code, c| code != c as u32);
1459 }
1460 SreOpcode::LITERAL_IGNORE => {
1461 general_count_literal(req, &mut ctx, end, |code, c| code == lower_ascii(c) as u32);
1462 }
1463 SreOpcode::NOT_LITERAL_IGNORE => {
1464 general_count_literal(req, &mut ctx, end, |code, c| code != lower_ascii(c) as u32);
1465 }
1466 SreOpcode::LITERAL_LOC_IGNORE => {
1467 general_count_literal(req, &mut ctx, end, char_loc_ignore);
1468 }
1469 SreOpcode::NOT_LITERAL_LOC_IGNORE => {
1470 general_count_literal(req, &mut ctx, end, |code, c| !char_loc_ignore(code, c));
1471 }
1472 SreOpcode::LITERAL_UNI_IGNORE => {
1473 general_count_literal(req, &mut ctx, end, |code, c| {
1474 code == lower_unicode(c) as u32
1475 });
1476 }
1477 SreOpcode::NOT_LITERAL_UNI_IGNORE => {
1478 general_count_literal(req, &mut ctx, end, |code, c| {
1479 code != lower_unicode(c) as u32
1480 });
1481 }
1482 _ => {
1483 let mut count = 0;
1485
1486 ctx.skip_code(4);
1487 let reset_position = ctx.code_position;
1488
1489 while count < max_count {
1490 ctx.code_position = reset_position;
1491 let code = ctx.peek_code(req, 0);
1492 let code = SreOpcode::try_from(code).unwrap();
1493 dispatch(req, state, &mut ctx, code);
1494 if ctx.has_matched == Some(false) {
1495 break;
1496 }
1497 count += 1;
1498 }
1499 return count;
1500 }
1501 }
1502
1503 ctx.string_position - state.string_position
1505}
1506
1507fn general_count_literal<S: StrDrive, F: FnMut(u32, u32) -> bool>(
1508 req: &Request<S>,
1509 ctx: &mut MatchContext<S>,
1510 end: usize,
1511 mut f: F,
1512) {
1513 let ch = ctx.peek_code(req, 1);
1514 while !ctx.string_position < end && f(ch, ctx.peek_char(req)) {
1515 ctx.skip_char(req, 1);
1516 }
1517}
1518
1519fn is_word(ch: u32) -> bool {
1520 ch == '_' as u32
1521 || u8::try_from(ch)
1522 .map(|x| x.is_ascii_alphanumeric())
1523 .unwrap_or(false)
1524}
1525fn is_space(ch: u32) -> bool {
1526 u8::try_from(ch)
1527 .map(is_py_ascii_whitespace)
1528 .unwrap_or(false)
1529}
1530fn is_digit(ch: u32) -> bool {
1531 u8::try_from(ch)
1532 .map(|x| x.is_ascii_digit())
1533 .unwrap_or(false)
1534}
1535fn is_loc_alnum(ch: u32) -> bool {
1536 u8::try_from(ch)
1538 .map(|x| x.is_ascii_alphanumeric())
1539 .unwrap_or(false)
1540}
1541fn is_loc_word(ch: u32) -> bool {
1542 ch == '_' as u32 || is_loc_alnum(ch)
1543}
1544fn is_linebreak(ch: u32) -> bool {
1545 ch == '\n' as u32
1546}
1547pub fn lower_ascii(ch: u32) -> u32 {
1548 u8::try_from(ch)
1549 .map(|x| x.to_ascii_lowercase() as u32)
1550 .unwrap_or(ch)
1551}
1552fn lower_locate(ch: u32) -> u32 {
1553 lower_ascii(ch)
1555}
1556fn upper_locate(ch: u32) -> u32 {
1557 u8::try_from(ch)
1559 .map(|x| x.to_ascii_uppercase() as u32)
1560 .unwrap_or(ch)
1561}
1562fn is_uni_digit(ch: u32) -> bool {
1563 char::try_from(ch)
1565 .map(|x| x.is_ascii_digit())
1566 .unwrap_or(false)
1567}
1568fn is_uni_space(ch: u32) -> bool {
1569 is_space(ch)
1571 || matches!(
1572 ch,
1573 0x0009
1574 | 0x000A
1575 | 0x000B
1576 | 0x000C
1577 | 0x000D
1578 | 0x001C
1579 | 0x001D
1580 | 0x001E
1581 | 0x001F
1582 | 0x0020
1583 | 0x0085
1584 | 0x00A0
1585 | 0x1680
1586 | 0x2000
1587 | 0x2001
1588 | 0x2002
1589 | 0x2003
1590 | 0x2004
1591 | 0x2005
1592 | 0x2006
1593 | 0x2007
1594 | 0x2008
1595 | 0x2009
1596 | 0x200A
1597 | 0x2028
1598 | 0x2029
1599 | 0x202F
1600 | 0x205F
1601 | 0x3000
1602 )
1603}
1604fn is_uni_linebreak(ch: u32) -> bool {
1605 matches!(
1606 ch,
1607 0x000A | 0x000B | 0x000C | 0x000D | 0x001C | 0x001D | 0x001E | 0x0085 | 0x2028 | 0x2029
1608 )
1609}
1610fn is_uni_alnum(ch: u32) -> bool {
1611 char::try_from(ch)
1613 .map(|x| x.is_alphanumeric())
1614 .unwrap_or(false)
1615}
1616fn is_uni_word(ch: u32) -> bool {
1617 ch == '_' as u32 || is_uni_alnum(ch)
1618}
1619pub fn lower_unicode(ch: u32) -> u32 {
1620 char::try_from(ch)
1622 .map(|x| x.to_lowercase().next().unwrap() as u32)
1623 .unwrap_or(ch)
1624}
1625pub fn upper_unicode(ch: u32) -> u32 {
1626 char::try_from(ch)
1628 .map(|x| x.to_uppercase().next().unwrap() as u32)
1629 .unwrap_or(ch)
1630}
1631
1632fn is_utf8_first_byte(b: u8) -> bool {
1633 (b & 0b10000000 == 0) || (b & 0b11000000 == 0b11000000)
1638}
1639
1640fn utf8_back_peek_offset(bytes: &[u8], offset: usize) -> usize {
1641 let mut offset = offset - 1;
1642 if !is_utf8_first_byte(bytes[offset]) {
1643 offset -= 1;
1644 if !is_utf8_first_byte(bytes[offset]) {
1645 offset -= 1;
1646 if !is_utf8_first_byte(bytes[offset]) {
1647 offset -= 1;
1648 if !is_utf8_first_byte(bytes[offset]) {
1649 panic!("not utf-8 code point");
1650 }
1651 }
1652 }
1653 }
1654 offset
1655}