resharp_algebra/unicode_classes/
mod.rs1mod classes;
2
3use crate::{NodeId, RegexBuilder};
4
5pub use classes::{
6 build_digit_class, build_digit_class_full, build_space_class, build_word_class,
7 build_word_class_full,
8};
9
10pub(crate) fn utf8_char(b: &mut RegexBuilder) -> NodeId {
11 let ascii = b.mk_range_u8(0, 127);
12 let cont = b.mk_range_u8(0x80, 0xBF);
13 let c2 = b.mk_range_u8(0xC0, 0xDF);
14 let c2s = b.mk_concat(c2, cont);
15 let e0 = b.mk_range_u8(0xE0, 0xEF);
16 let e0s = b.mk_concats([e0, cont, cont].into_iter());
17 let f0 = b.mk_range_u8(0xF0, 0xF7);
18 let f0s = b.mk_concats([f0, cont, cont, cont].into_iter());
19 b.mk_unions([ascii, c2s, e0s, f0s].into_iter())
20}
21
22fn neg_class(b: &mut RegexBuilder, positive: NodeId) -> NodeId {
23 let neg = b.mk_compl(positive);
24 let uc = utf8_char(b);
25 b.mk_inters([neg, uc].into_iter())
26}
27
28#[derive(Clone, Debug)]
29pub struct UnicodeClassCache {
30 pub word: NodeId,
31 pub non_word: NodeId,
32 pub digit: NodeId,
33 pub non_digit: NodeId,
34 pub space: NodeId,
35 pub non_space: NodeId,
36}
37
38impl Default for UnicodeClassCache {
39 fn default() -> Self {
40 UnicodeClassCache {
41 word: NodeId::MISSING,
42 non_word: NodeId::MISSING,
43 digit: NodeId::MISSING,
44 non_digit: NodeId::MISSING,
45 space: NodeId::MISSING,
46 non_space: NodeId::MISSING,
47 }
48 }
49}
50
51impl UnicodeClassCache {
52 pub fn ensure_word(&mut self, b: &mut RegexBuilder) {
53 if self.word == NodeId::MISSING {
54 self.word = build_word_class(b);
55 self.non_word = neg_class(b, self.word);
56 }
57 }
58
59 pub fn ensure_word_full(&mut self, b: &mut RegexBuilder) {
60 if self.word == NodeId::MISSING {
61 self.word = build_word_class_full(b);
62 self.non_word = neg_class(b, self.word);
63 }
64 }
65
66 pub fn ensure_digit(&mut self, b: &mut RegexBuilder) {
67 if self.digit == NodeId::MISSING {
68 self.digit = build_digit_class(b);
69 self.non_digit = neg_class(b, self.digit);
70 }
71 }
72
73 pub fn ensure_digit_full(&mut self, b: &mut RegexBuilder) {
74 if self.digit == NodeId::MISSING {
75 self.digit = build_digit_class_full(b);
76 self.non_digit = neg_class(b, self.digit);
77 }
78 }
79
80 pub fn ensure_space(&mut self, b: &mut RegexBuilder) {
81 if self.space == NodeId::MISSING {
82 self.space = build_space_class(b);
83 self.non_space = neg_class(b, self.space);
84 }
85 }
86}