resharp_algebra/unicode_classes/
mod.rs1mod classes;
2
3use crate::{NodeId, RegexBuilder};
4
5pub use classes::*;
6
7fn utf8_char(b: &mut RegexBuilder) -> NodeId {
8 let ascii = b.mk_range_u8(0, 127);
9 let cont = b.mk_range_u8(0x80, 0xBF);
10 let c2 = b.mk_range_u8(0xC0, 0xDF);
11 let c2s = b.mk_concat(c2, cont);
12 let e0 = b.mk_range_u8(0xE0, 0xEF);
13 let e0s = b.mk_concats([e0, cont, cont].into_iter());
14 let f0 = b.mk_range_u8(0xF0, 0xF7);
15 let f0s = b.mk_concats([f0, cont, cont, cont].into_iter());
16 b.mk_unions([ascii, c2s, e0s, f0s].into_iter())
17}
18
19fn neg_class(b: &mut RegexBuilder, positive: NodeId) -> NodeId {
20 let neg = b.mk_compl(positive);
21 let uc = utf8_char(b);
22 b.mk_inters([neg, uc].into_iter())
23}
24
25#[derive(Clone, Debug)]
26pub struct UnicodeClassCache {
27 pub word: NodeId,
28 pub non_word: NodeId,
29 pub digit: NodeId,
30 pub non_digit: NodeId,
31 pub space: NodeId,
32 pub non_space: NodeId,
33}
34
35impl Default for UnicodeClassCache {
36 fn default() -> Self {
37 UnicodeClassCache {
38 word: NodeId::MISSING,
39 non_word: NodeId::MISSING,
40 digit: NodeId::MISSING,
41 non_digit: NodeId::MISSING,
42 space: NodeId::MISSING,
43 non_space: NodeId::MISSING,
44 }
45 }
46}
47
48impl UnicodeClassCache {
49 pub fn ensure_word(&mut self, b: &mut RegexBuilder) {
50 if self.word == NodeId::MISSING {
51 self.word = build_word_class(b);
52 self.non_word = neg_class(b, self.word);
53 }
54 }
55
56 pub fn ensure_digit(&mut self, b: &mut RegexBuilder) {
57 if self.digit == NodeId::MISSING {
58 self.digit = build_digit_class(b);
59 self.non_digit = neg_class(b, self.digit);
60 }
61 }
62
63 pub fn ensure_space(&mut self, b: &mut RegexBuilder) {
64 if self.space == NodeId::MISSING {
65 self.space = build_space_class(b);
66 self.non_space = neg_class(b, self.space);
67 }
68 }
69}